Skip to content

Commit 5616f2a

Browse files
committed
Release v0.8.5
- Fixed boltz affinity - Fixed predictor train with poet prompt - Add paging to listing assays and jobs - bfactor for plddt - Some typing improvements
1 parent b30942c commit 5616f2a

File tree

15 files changed

+225
-121
lines changed

15 files changed

+225
-121
lines changed

anaconda_build/meta.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
package:
22
name: openprotein-python
3-
version: "0.8.4"
3+
version: "0.8.5"
44

55
source:
66
path: ../

openprotein/common/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""Common classes and utilities for OpenProtein."""
22

3-
from .features import FeatureType
3+
from .features import Feature, FeatureType
44
from .model_metadata import ModelDescription, ModelMetadata, TokenInfo
5-
from .reduction import ReductionType
5+
from .reduction import Reduction, ReductionType

openprotein/common/features.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,15 @@
1+
"""Feature types used in OpenProtein."""
2+
13
from enum import Enum
4+
from typing import Literal
25

36

47
class FeatureType(str, Enum):
58

69
PLM = "PLM"
710
SVD = "SVD"
11+
12+
13+
# NOTE: only works with python 3.12+
14+
# Feature = Literal[*tuple([r.value for r in FeatureType])]
15+
Feature = Literal["PLM", "SVD"]

openprotein/common/reduction.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,14 @@
11
"""Reduction types used in OpenProtein."""
22

33
from enum import Enum
4+
from typing import Literal
45

56

67
class ReductionType(str, Enum):
78
MEAN = "MEAN"
89
SUM = "SUM"
10+
11+
12+
# NOTE: only works with python 3.12+
13+
# Reduction = Literal[*tuple([r.value for r in ReductionType])]
14+
Reduction = Literal["MEAN", "SUM"]

openprotein/data/api.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,14 +64,20 @@ def assaydata_post(
6464
raise APIError(f"Unable to post assay data: {response.text}")
6565

6666

67-
def assaydata_list(session: APISession) -> list[AssayMetadata]:
67+
def assaydata_list(
68+
session: APISession, limit: int | None = None, offset: int | None = None
69+
) -> list[AssayMetadata]:
6870
"""
6971
Get a list of all assay metadata.
7072
7173
Parameters
7274
----------
7375
session : APISession
7476
Session object for API communication.
77+
limit : int, optional
78+
Limit the number of assays to return.
79+
offset : int, optional
80+
Offset of assays to retrieve. Useful with limit.
7581
7682
Returns
7783
-------
@@ -84,7 +90,12 @@ def assaydata_list(session: APISession) -> list[AssayMetadata]:
8490
If an error occurs during the API request.
8591
"""
8692
endpoint = "v1/assaydata"
87-
response = session.get(endpoint)
93+
params = {}
94+
if limit is not None:
95+
params["limit"] = limit
96+
if offset is not None:
97+
params["offset"] = offset
98+
response = session.get(endpoint, params=params)
8899
if response.status_code == 200:
89100
return TypeAdapter(list[AssayMetadata]).validate_python(response.json())
90101
else:

openprotein/data/data.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,23 @@ class DataAPI:
1414
def __init__(self, session: APISession):
1515
self.session = session
1616

17-
def list(self) -> list[AssayDataset]:
17+
def list(
18+
self, limit: int | None = None, offset: int | None = None
19+
) -> list[AssayDataset]:
1820
"""
1921
List all assay datasets.
2022
23+
limit : int, optional
24+
Limit the number of assays to return.
25+
offset : int, optional
26+
Offset of assays to retrieve. Useful with limit.
27+
2128
Returns
2229
-------
2330
List[AssayDataset]
2431
List of all assay datasets.
2532
"""
26-
metadata = api.assaydata_list(self.session)
33+
metadata = api.assaydata_list(session=self.session, limit=limit, offset=offset)
2734
return [AssayDataset(self.session, x) for x in metadata]
2835

2936
def create(

openprotein/embeddings/models.py

Lines changed: 37 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,13 @@
33
from typing import TYPE_CHECKING
44

55
from openprotein.base import APISession
6-
from openprotein.common import FeatureType, ModelMetadata, ReductionType
6+
from openprotein.common import (
7+
Feature,
8+
FeatureType,
9+
ModelMetadata,
10+
Reduction,
11+
ReductionType,
12+
)
713
from openprotein.data import AssayDataset, AssayMetadata, DataAPI
814
from openprotein.errors import InvalidParameterError
915

@@ -199,9 +205,9 @@ def logits(
199205
def fit_svd(
200206
self,
201207
sequences: list[bytes] | list[str] | None = None,
202-
assay: AssayDataset | None = None,
208+
assay: AssayDataset | AssayMetadata | None = None,
203209
n_components: int = 1024,
204-
reduction: ReductionType | None = None,
210+
reduction: Reduction | ReductionType | None = None,
205211
**kwargs,
206212
) -> "SVDModel":
207213
"""
@@ -236,6 +242,11 @@ def fit_svd(
236242
# local import for cyclic dep
237243
from openprotein.svd import SVDAPI
238244

245+
# runtime check on value
246+
if isinstance(reduction, str):
247+
reduction = ReductionType(reduction)
248+
reduction = reduction.value
249+
239250
svd_api = getattr(self.session, "svd", None)
240251
assert isinstance(svd_api, SVDAPI)
241252

@@ -246,9 +257,8 @@ def fit_svd(
246257
raise InvalidParameterError(
247258
"Expected either assay or sequences to fit SVD on!"
248259
)
249-
model_id = self.id
250260
return svd_api.fit_svd(
251-
model_id=model_id,
261+
model=self,
252262
sequences=sequences,
253263
assay=assay,
254264
n_components=n_components,
@@ -259,9 +269,9 @@ def fit_svd(
259269
def fit_umap(
260270
self,
261271
sequences: list[bytes] | list[str] | None = None,
262-
assay: AssayDataset | None = None,
272+
assay: AssayDataset | AssayMetadata | None = None,
263273
n_components: int = 2,
264-
reduction: ReductionType | None = ReductionType.MEAN,
274+
reduction: Reduction | ReductionType = "MEAN",
265275
**kwargs,
266276
) -> "UMAPModel":
267277
"""
@@ -274,11 +284,11 @@ def fit_umap(
274284
----------
275285
sequences : list of bytes or list of str or None, optional
276286
Optional sequences to fit UMAP with. Either use sequences or assay. Sequences is preferred.
277-
assay : AssayDataset or None, optional
287+
assay : AssayDataset or AssayMetadata or None, optional
278288
Optional assay containing sequences to fit UMAP with. Either use sequences or assay. Ignored if sequences are provided.
279289
n_components : int, optional
280290
Number of components in UMAP fit. Determines output shapes. Default is 2.
281-
reduction : ReductionType or None, optional
291+
reduction : Reduction or ReductionType or None, optional
282292
Embeddings reduction to use (e.g. mean). Defaults to MEAN.
283293
kwargs :
284294
Additional keyword arguments to be used from foundational models, e.g. prompt_id for PoET models.
@@ -296,6 +306,16 @@ def fit_umap(
296306
# local import for cyclic dep
297307
from openprotein.umap import UMAPAPI
298308

309+
if reduction is None:
310+
raise InvalidParameterError(
311+
"Expected reduction if using EmbeddingModel to fit UMAP"
312+
)
313+
314+
# runtime check on value
315+
if isinstance(reduction, str):
316+
reduction = ReductionType(reduction)
317+
reduction = reduction.value
318+
299319
umap_api = getattr(self.session, "umap", None)
300320
assert isinstance(umap_api, UMAPAPI)
301321

@@ -306,20 +326,26 @@ def fit_umap(
306326
raise InvalidParameterError(
307327
"Expected either assay or sequences to fit UMAP on!"
308328
)
329+
# get assay_id
330+
assay_id = (
331+
assay.assay_id
332+
if isinstance(assay, AssayMetadata)
333+
else assay.id if isinstance(assay, AssayDataset) else assay
334+
)
309335
model_id = self.id
310336
return umap_api.fit_umap(
311337
model_id=model_id,
312338
feature_type=FeatureType.PLM,
313339
sequences=sequences,
314-
assay_id=assay.id if assay is not None else None,
340+
assay_id=assay_id,
315341
n_components=n_components,
316342
reduction=reduction,
317343
**kwargs,
318344
)
319345

320346
def fit_gp(
321347
self,
322-
assay: AssayMetadata | AssayDataset | str,
348+
assay: AssayDataset | AssayMetadata | str,
323349
properties: list[str],
324350
reduction: ReductionType,
325351
name: str | None = None,
@@ -358,26 +384,9 @@ def fit_gp(
358384
# local import to resolve cyclic
359385
from openprotein.predictor import PredictorAPI
360386

361-
data_api = getattr(self.session, "data", None)
362-
assert isinstance(data_api, DataAPI)
363387
predictor_api = getattr(self.session, "predictor", None)
364388
assert isinstance(predictor_api, PredictorAPI)
365389

366-
# get assay if str
367-
assay = data_api.get(assay_id=assay) if isinstance(assay, str) else assay
368-
# extract assay_id
369-
if len(properties) == 0:
370-
raise InvalidParameterError("Expected (at-least) 1 property to train")
371-
if not set(properties) <= set(assay.measurement_names):
372-
raise InvalidParameterError(
373-
f"Expected all provided properties to be a subset of assay's measurements: {assay.measurement_names}"
374-
)
375-
# TODO - support multitask
376-
if len(properties) > 1:
377-
raise InvalidParameterError(
378-
"Training a multitask GP is not yet supported (i.e. number of properties should only be 1 for now)"
379-
)
380-
381390
# inject into predictor api
382391
return predictor_api.fit_gp(
383392
assay=assay,

openprotein/fold/future.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -464,6 +464,8 @@ def affinity(self) -> "BoltzAffinity":
464464
AttributeError
465465
If affinity is not supported for the model.
466466
"""
467+
from .boltz import BoltzAffinity
468+
467469
if self.model_id not in {"boltz-1", "boltz-1x", "boltz-2"}:
468470
raise AttributeError("affinity not supported for non-Boltz model")
469471
if self._affinity is None:

openprotein/predictor/api.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -162,8 +162,8 @@ def predictor_fit_gp_post(
162162
body["name"] = name
163163
if description is not None:
164164
body["description"] = description
165-
# add kwargs for embeddings kwargs
166-
body.update(kwargs)
165+
# add kwargs for embeddings kwargs to features
166+
body["features"].update(kwargs)
167167

168168
response = session.post(endpoint, json=body)
169169
return PredictorTrainJob.model_validate(response.json())

openprotein/predictor/predictor.py

Lines changed: 40 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
"""Predictor API providing the interface to train and predict predictors."""
22

33
from openprotein.base import APISession
4-
from openprotein.common import FeatureType, ReductionType
4+
from openprotein.common import Feature, FeatureType, Reduction, ReductionType
55
from openprotein.data import (
66
AssayDataset,
77
AssayMetadata,
8+
DataAPI,
89
)
910
from openprotein.embeddings import EmbeddingModel, EmbeddingsAPI
1011
from openprotein.errors import InvalidParameterError
@@ -120,8 +121,8 @@ def fit_gp(
120121
assay: AssayDataset | AssayMetadata | str,
121122
properties: list[str],
122123
model: EmbeddingModel | SVDModel | str,
123-
feature_type: FeatureType | None = None,
124-
reduction: ReductionType | None = None,
124+
feature_type: Feature | FeatureType | None = None,
125+
reduction: Reduction | ReductionType | None = None,
125126
name: str | None = None,
126127
description: str | None = None,
127128
**kwargs,
@@ -139,10 +140,10 @@ def fit_gp(
139140
Instance of either EmbeddingModel or SVDModel to use depending
140141
on feature type. Can also be a str specifying the model id,
141142
but then feature_type would have to be specified.
142-
feature_type : FeatureType or None
143+
feature_type : Feature or FeatureType or None
143144
Type of features to use for encoding sequences. "SVD" or "PLM".
144145
None would require model to be EmbeddingModel or SVDModel.
145-
reduction : str or None, optional
146+
reduction : Reduction or ReductionType or None, optional
146147
Type of embedding reduction to use for computing features.
147148
E.g. "MEAN" or "SUM". Used only if using EmbeddingModel, and
148149
must be non-nil if using an EmbeddingModel. Defaults to None.
@@ -154,6 +155,29 @@ def fit_gp(
154155
PredictorModel
155156
The GP model being fit.
156157
"""
158+
data_api = getattr(self.session, "data", None)
159+
assert isinstance(data_api, DataAPI)
160+
# 1. Check assay data input
161+
# get assay if str
162+
assay = data_api.get(assay_id=assay) if isinstance(assay, str) else assay
163+
# extract assay_id
164+
assay_id = (
165+
assay.assay_id
166+
if isinstance(assay, AssayMetadata)
167+
else assay.id if isinstance(assay, AssayDataset) else assay
168+
)
169+
if len(properties) == 0:
170+
raise InvalidParameterError("Expected (at-least) 1 property to train")
171+
if not set(properties) <= set(assay.measurement_names):
172+
raise InvalidParameterError(
173+
f"Expected all provided properties to be a subset of assay's measurements: {assay.measurement_names}"
174+
)
175+
# TODO - support multitask
176+
if len(properties) > 1:
177+
raise InvalidParameterError(
178+
"Training a multitask GP is not yet supported (i.e. number of properties should only be 1 for now)"
179+
)
180+
# 2. Check features input
157181
# extract feature type
158182
feature_type = (
159183
FeatureType.PLM
@@ -164,6 +188,15 @@ def fit_gp(
164188
raise InvalidParameterError(
165189
"Expected feature_type to be provided if passing str model_id as model"
166190
)
191+
# runtime check on value
192+
if isinstance(feature_type, str):
193+
feature_type = FeatureType(feature_type)
194+
195+
# 3. Check reduction
196+
if isinstance(reduction, str):
197+
reduction = ReductionType(reduction)
198+
reduction = reduction.value
199+
167200
# get model if model_id
168201
if feature_type == FeatureType.PLM:
169202
if reduction is None:
@@ -183,19 +216,14 @@ def fit_gp(
183216
model = svd_api.get_svd(model)
184217
assert isinstance(model, SVDModel), "Expected SVDModel"
185218
model_id = model.id
186-
# get assay_id
187-
assay_id = (
188-
assay.assay_id
189-
if isinstance(assay, AssayMetadata)
190-
else assay.id if isinstance(assay, AssayDataset) else assay
191-
)
219+
192220
return PredictorModel(
193221
session=self.session,
194222
job=api.predictor_fit_gp_post(
195223
session=self.session,
196224
assay_id=assay_id,
197225
properties=properties,
198-
feature_type=feature_type,
226+
feature_type=feature_type.value,
199227
model_id=model_id,
200228
reduction=reduction,
201229
name=name,

0 commit comments

Comments
 (0)