OpenProteinAI
diff --git a/‎.gitignore‎
Lines changed: 221 additions & 1 deletion b/‎.gitignore‎
Lines changed: 221 additions & 1 deletion
diff --git a/‎openprotein/fold/api.py‎
Lines changed: 16 additions & 32 deletions b/‎openprotein/fold/api.py‎
Lines changed: 16 additions & 32 deletions
diff --git a/‎openprotein/fold/future.py‎
Lines changed: 53 additions & 0 deletions b/‎openprotein/fold/future.py‎
Lines changed: 53 additions & 0 deletions
@@ -2,7 +2,6 @@
 # SECRETS FILE
 secrets.config
 poetry.lock
-/.github/workflows/s3.yaml
 /htmlcov
 /apidocs/build
 # VS code
@@ -143,3 +142,224 @@ poet_demo.ipynb
 *chorismate_mutase_combined_seqs.fasta
 
 nohup.out
+# pixi environments
+.pixi
+*.egg-info
+
+notebooks
+./data/
+.aider*
+
+build
+# Python
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[codz]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py.cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+#poetry.toml
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
+#   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
+#pdm.lock
+#pdm.toml
+.pdm-python
+.pdm-build/
+
+# pixi
+#   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
+#pixi.lock
+#   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
+#   in the .venv directory. It is recommended not to include this directory in version control.
+.pixi
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.envrc
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+# Abstra
+# Abstra is an AI-powered process automation framework.
+# Ignore directories containing user credentials, local state, and settings.
+# Learn more at https://abstra.io/docs
+.abstra/
+
+# Visual Studio Code
+#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore 
+#  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+#  and can be added to the global gitignore or merged into this file. However, if you prefer, 
+#  you could uncomment the following to ignore the entire vscode folder
+# .vscode/
+
+# Ruff stuff:
+.ruff_cache/
+
+# PyPI configuration file
+.pypirc
+
+# Cursor
+#  Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
+#  exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
+#  refer to https://docs.cursor.com/context/ignore-files
+.cursorignore
+.cursorindexingignore
+
+# Marimo
+marimo/_static/
+marimo/_lsp/
+__marimo__/
+
+# Streamlit
+.streamlit/secrets.toml
+/data/
@@ -1,7 +1,7 @@
 """Fold REST API interface for making HTTP calls to our fold backend."""
 
 import io
-from typing import Literal
+from typing import TYPE_CHECKING, Literal
 
 import numpy as np
 from pydantic import TypeAdapter
@@ -12,6 +12,9 @@
 
 from .schemas import FoldJob, FoldMetadata
 
+if TYPE_CHECKING:
+    import pandas as pd
+
 PATH_PREFIX = "v1/fold"
 
 
@@ -160,8 +163,8 @@ def fold_get_complex_result(
 def fold_get_complex_extra_result(
     session: APISession,
     job_id: str,
-    key: Literal["pae", "pde", "plddt", "confidence", "affinity"],
-) -> np.ndarray | list[dict]:
+    key: Literal["pae", "pde", "plddt", "confidence", "affinity", "score", "metrics"],
+) -> "np.ndarray | list[dict] | pd.DataFrame":
     """
     Get extra result for a complex from the request ID.
 
@@ -183,6 +186,10 @@ def fold_get_complex_extra_result(
         formatter = lambda response: np.load(io.BytesIO(response.content))
     elif key in {"confidence", "affinity"}:
         formatter = lambda response: response.json()
+    elif key in {"score", "metrics"}:
+        import pandas as pd
+
+        formatter = lambda response: pd.read_csv(io.StringIO(response.content.decode()))
     else:
         raise ValueError(f"Unexpected key: {key}")
     endpoint = PATH_PREFIX + f"/{job_id}/complex/{key}"
@@ -194,7 +201,7 @@ def fold_get_complex_extra_result(
         if e.status_code == 400 and key == "affinity":
             raise ValueError("affinity not found for request") from None
         raise e
-    output: np.ndarray | list[dict] = formatter(response)
+    output = formatter(response)
     return output
 
 
@@ -254,34 +261,11 @@ def fold_models_post(
         sequences = kwargs["sequences"]
         # NOTE we are handling the boltz form here too
         sequences = [s.decode() if isinstance(s, bytes) else s for s in sequences]
-        body["sequences"] = sequences
-    if kwargs.get("msa_id"):
-        body["msa_id"] = kwargs["msa_id"]
-    if kwargs.get("num_recycles"):
-        body["num_recycles"] = kwargs["num_recycles"]
-    if kwargs.get("num_models"):
-        body["num_models"] = kwargs["num_models"]
-    if kwargs.get("num_relax"):
-        body["num_relax"] = kwargs["num_relax"]
-    if kwargs.get("use_potentials"):
-        body["use_potentials"] = kwargs["use_potentials"]
-    # boltz
-    if kwargs.get("diffusion_samples"):
-        body["diffusion_samples"] = kwargs["diffusion_samples"]
-    if kwargs.get("recycling_steps"):
-        body["recycling_steps"] = kwargs["recycling_steps"]
-    if kwargs.get("sampling_steps"):
-        body["sampling_steps"] = kwargs["sampling_steps"]
-    if kwargs.get("step_scale"):
-        body["step_scale"] = kwargs["step_scale"]
-    if kwargs.get("constraints"):
-        body["constraints"] = kwargs["constraints"]
-    if kwargs.get("templates"):
-        body["templates"] = kwargs["templates"]
-    if kwargs.get("properties"):
-        body["properties"] = kwargs["properties"]
-    if kwargs.get("method"):
-        body["method"] = kwargs["method"]
+        kwargs["sequences"] = sequences
+    # add non-None args - note this doesnt affect msa_id which is nested
+    for k, v in kwargs.items():
+        if v is not None:
+            body[k] = v
 
     response = session.post(endpoint, json=body)
     return FoldJob.model_validate(response.json())
@@ -3,6 +3,7 @@
 from typing import TYPE_CHECKING, Literal
 
 import numpy as np
+import pandas as pd
 from pydantic.type_adapter import TypeAdapter
 from typing_extensions import Self
 
@@ -246,6 +247,8 @@ def __init__(
         self._pae: np.ndarray | None = None
         self._pde: np.ndarray | None = None
         self._plddt: np.ndarray | None = None
+        self._score: pd.DataFrame | None = None
+        self._metrics: pd.DataFrame | None = None
         self._confidence: list["BoltzConfidence"] | None = None
         self._affinity: "BoltzAffinity | None" = None
 
@@ -439,6 +442,56 @@ def plddt(self) -> np.ndarray:
             self._plddt = plddt
         return self._plddt
 
+    @property
+    def score(self) -> pd.DataFrame:
+        """
+        Get the predicted scores.
+
+        Returns
+        -------
+        pd.DataFrame
+            Structure prediction scores.
+
+        Raises
+        ------
+        AttributeError
+            If score is not supported for the model.
+        """
+        if self.model_id not in {"rosettafold-3"}:
+            raise AttributeError("score not supported for non-RosettaFold model")
+        if self._score is None:
+            score = api.fold_get_complex_extra_result(
+                session=self.session, job_id=self.job.job_id, key="score"
+            )
+            assert isinstance(score, pd.DataFrame)
+            self._score = score
+        return self._score
+
+    @property
+    def metrics(self) -> pd.DataFrame:
+        """
+        Get the predicted metrics.
+
+        Returns
+        -------
+        pd.DataFrame
+            Structure prediction metrics.
+
+        Raises
+        ------
+        AttributeError
+            If metrics is not supported for the model.
+        """
+        if self.model_id not in {"rosettafold-3"}:
+            raise AttributeError("metrics not supported for non-RosettaFold model")
+        if self._metrics is None:
+            metrics = api.fold_get_complex_extra_result(
+                session=self.session, job_id=self.job.job_id, key="metrics"
+            )
+            assert isinstance(metrics, pd.DataFrame)
+            self._metrics = metrics
+        return self._metrics
+
     @property
     def confidence(self) -> list["BoltzConfidence"]:
         """