tunable models

OKUA1 · iryna-kondr · OKUA1 · commit 6dce35d8bd66 · 2023-12-13T19:00:38.000+01:00
Co-authored-by: Iryna Kondrashchenko &lt;iryna-kondr@users.noreply.github.com&gt;
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -56,18 +56,3 @@ repos:
       - id: docformatter
         additional_dependencies: [tomli]
         args: ["--in-place", "--config", "pyproject.toml"]
-  # Python tool for docstring coverage
-  - repo: https://github.com/econchick/interrogate
-    rev: 1.5.0
-    hooks:
-      - id: interrogate
-        args:
-          [
-            "--config",
-            "pyproject.toml",
-            "--generate-badge",
-            ".github/assets/badges",
-            "--badge-style",
-            "flat",
-          ]
-        pass_filenames: false
diff --git a/skllm/llm/base.py b/skllm/llm/base.py
@@ -27,3 +27,7 @@ class BaseTunableMixin(ABC):
     @abstractmethod
     def _tune(self, X: Any, y: Any):
         pass
+
+    @abstractmethod
+    def _set_hyperparameters(self, **kwargs):
+        pass
diff --git a/skllm/llm/gpt/clients/openai/completion.py b/skllm/llm/gpt/clients/openai/completion.py
@@ -14,6 +14,7 @@ def get_chat_completion(
     org: str,
     model: str = "gpt-3.5-turbo",
     api="openai",
+    json_response=False,
 ):
     """Gets a chat completion from the OpenAI API.
 
@@ -38,12 +39,13 @@ def get_chat_completion(
     """
     if api == "openai":
         client = set_credentials(key, org)
-        model_dict = {"model": model, "response_format": {"type": "json_object"}}
     elif api == "azure":
         client = set_azure_credentials(key, org)
-        model_dict = {"model": model}
     else:
         raise ValueError("Invalid API")
+    model_dict = {"model": model}
+    if json_response and model in ["gpt-4-1106-preview", "gpt-3.5-turbo-1106"]:
+        model_dict["response_format"] = {"type": "json_object"}
     completion = client.chat.completions.create(
         temperature=0.0, messages=messages, **model_dict
     )
diff --git a/skllm/llm/gpt/clients/openai/credentials.py b/skllm/llm/gpt/clients/openai/credentials.py
@@ -3,6 +3,7 @@
 from time import sleep
 from openai import OpenAI, AzureOpenAI
 
+
 def set_credentials(key: str, org: str) -> None:
     """Set the OpenAI key and organization.
 
@@ -16,6 +17,7 @@ def set_credentials(key: str, org: str) -> None:
     client = OpenAI(api_key=key, organization=org)
     return client
 
+
 def set_azure_credentials(key: str, org: str) -> None:
     """Sets OpenAI credentials for Azure.
 
@@ -26,6 +28,10 @@ def set_azure_credentials(key: str, org: str) -> None:
     org : str
         The OpenAI (Azure) organization ID to use.
     """
-    client = AzureOpenAI(api_key=key, organization=org, api_version=_Config.get_azure_api_version(), azure_endpoint = _Config.get_azure_api_base())
+    client = AzureOpenAI(
+        api_key=key,
+        organization=org,
+        api_version=_Config.get_azure_api_version(),
+        azure_endpoint=_Config.get_azure_api_base(),
+    )
     return client
-    
diff --git a/skllm/llm/gpt/completion.py b/skllm/llm/gpt/completion.py
@@ -11,6 +11,7 @@ def get_chat_completion(
     openai_key: str = None,
     openai_org: str = None,
     model: str = "gpt-3.5-turbo",
+    json_response: bool = False,
 ):
     """Gets a chat completion from the OpenAI compatible API."""
     if model.startswith("gpt4all::"):
@@ -20,5 +21,10 @@ def get_chat_completion(
         if api == "azure":
             model = model[7:]
         return _oai_get_chat_completion(
-            messages, openai_key, openai_org, model, api=api
+            messages,
+            openai_key,
+            openai_org,
+            model,
+            api=api,
+            json_response=json_response,
         )
diff --git a/skllm/llm/gpt/mixin.py b/skllm/llm/gpt/mixin.py
@@ -60,6 +60,8 @@ class GPTMixin:
     A mixin class that provides OpenAI key and organization to other classes.
     """
 
+    _prefer_json_output = False
+
     def _set_keys(self, key: Optional[str] = None, org: Optional[str] = None) -> None:
         """
         Set the OpenAI key and organization.
@@ -132,12 +134,18 @@ def _get_chat_completion(
             for message in messages:
                 msgs.append(construct_message(message["role"], message["content"]))
         completion = get_chat_completion(
-            msgs, self._get_openai_key(), self._get_openai_org(), model
+            msgs,
+            self._get_openai_key(),
+            self._get_openai_org(),
+            model,
+            json_response=self._prefer_json_output,
         )
         return completion
 
 
 class GPTClassifierMixin(GPTTextCompletionMixin, BaseClassifierMixin):
+    _prefer_json_output = True
+
     def _extract_out_label(self, completion: Mapping[str, Any], **kwargs) -> Any:
         """Extracts the label from a completion.
 
@@ -205,6 +213,16 @@ class GPTTunableMixin(BaseTunableMixin):
     def _build_label(self, label: str):
         return json.dumps({"label": label})
 
+    def _set_hyperparameters(self, base_model: str, n_epochs: int, custom_suffix: str):
+        self.base_model = base_model
+        self.n_epochs = n_epochs
+        self.custom_suffix = custom_suffix
+        if base_model not in self._supported_tunable_models:
+            raise ValueError(
+                f"Model {base_model} is not supported. Supported models are"
+                f" {self._supported_tunable_models}"
+            )
+
     def _tune(self, X, y):
         if self.base_model.startswith(("azure::", "gpt4all")):
             raise ValueError(
diff --git a/skllm/llm/vertex/mixin.py b/skllm/llm/vertex/mixin.py
@@ -6,11 +6,12 @@
     BaseTextCompletionMixin,
     BaseTunableMixin,
 )
+from skllm.llm.vertex.tuning import tune
 from skllm.llm.vertex.completion import get_completion_chat_mode, get_completion
 from skllm.utils import extract_json_key
 import numpy as np
 from tqdm import tqdm
-import json
+import pandas as pd
 
 
 class VertexMixin:
@@ -68,6 +69,24 @@ def _get_embeddings(self, text: np.ndarray) -> List[List[float]]:
 
 
 class VertexTunableMixin(BaseTunableMixin):
-    # TODO
+    _supported_tunable_models = ["text-bison@002"]
+
+    def _set_hyperparameters(self, base_model: str, n_update_steps: int, **kwargs):
+        self.verify_model_is_supported(base_model)
+        self.base_model = base_model
+        self.n_update_steps = n_update_steps
+
+    def verify_model_is_supported(self, model: str):
+        if model not in self._supported_tunable_models:
+            raise ValueError(
+                f"Model {model} is not supported. Supported models are"
+                f" {self._supported_tunable_models}"
+            )
+
     def _tune(self, X: Any, y: Any):
-        raise NotImplementedError("Tuning is not yet supported for Vertex AI.")
+        df = pd.DataFrame({"input_text": X, "output_text": y})
+        job = tune(self.base_model, df, self.n_update_steps)._job
+        tuned_model = job.result()
+        self.tuned_model_ = tuned_model._model_resource_name
+        self.model = tuned_model
+        return self
diff --git a/skllm/models/_base/text2text.py b/skllm/models/_base/text2text.py
@@ -1 +1,79 @@
-pass
+from typing import Any, Union, List, Optional
+from abc import abstractmethod, ABC
+from numpy import ndarray
+from tqdm import tqdm
+import numpy as np
+import pandas as pd
+from skllm.utils import to_numpy as _to_numpy
+from sklearn.base import (
+    BaseEstimator as _SklBaseEstimator,
+    TransformerMixin as _SklTransformerMixin,
+)
+from skllm.llm.base import BaseTunableMixin as _BaseTunableMixin
+
+
+class BaseText2TextModel(ABC, _SklBaseEstimator, _SklTransformerMixin):
+    def fit(self, X: Any, y: Any):
+        return self
+
+    def predict(self, X: Union[np.ndarray, pd.Series, List[str]]):
+        return self.transform(X)
+
+    def fit_transform(
+        self,
+        X: Union[np.ndarray, pd.Series, List[str]],
+        y: Union[np.ndarray, pd.Series, List[str]],
+    ) -> ndarray:
+        return self.fit(X, y).transform(X)
+
+    def transform(self, X: Union[np.ndarray, pd.Series, List[str]]):
+        """Predicts the class of each input.
+
+        Parameters
+        ----------
+        X : Union[np.ndarray, pd.Series, List[str]]
+            The input data to predict the class of.
+
+        Returns
+        -------
+        List[str]
+        """
+        X = _to_numpy(X)
+        predictions = []
+        for i in tqdm(range(len(X))):
+            predictions.append(self._predict_single(X[i]))
+        return predictions
+
+    def _predict_single(self, x: Any) -> Any:
+        prompt_dict = self._get_prompt(x)
+        # this will be inherited from the LLM
+        prediction = self._get_chat_completion(model=self.model, **prompt_dict)
+        return prediction
+
+    @abstractmethod
+    def _get_prompt(self, x: str) -> dict:
+        """Returns the prompt to use for a single input."""
+        pass
+
+
+class BaseTunableText2TextModel(BaseText2TextModel):
+    def fit(
+        self,
+        X: Union[np.ndarray, pd.Series, List[str]],
+        y: Union[np.ndarray, pd.Series, List[str]],
+    ):
+        if not isinstance(self, _BaseTunableMixin):
+            raise TypeError(
+                "Classifier must be mixed with a skllm.llm.base.BaseTunableMixin class"
+            )
+        self._tune(X, y)
+        return self
+
+    def _get_prompt(self, x: str) -> dict:
+        """Returns the prompt to use for a single input."""
+        return str(x) 
+
+    def _predict_single(self, x: str) -> str:
+        if self.model is None:
+            raise RuntimeError("Model has not been tuned yet")
+        return super()._predict_single(x)
diff --git a/skllm/models/gpt/classification/tunable.py b/skllm/models/gpt/classification/tunable.py
@@ -10,19 +10,11 @@
 from typing import Optional
 
 
-class _Tunable(_BaseTunableClassifier, _GPTClassifierMixin, _GPTTunableMixin):
-    def _set_hyperparameters(self, base_model: str, n_epochs: int, custom_suffix: str):
-        self.base_model = base_model
-        self.n_epochs = n_epochs
-        self.custom_suffix = custom_suffix
-        if base_model not in self._supported_tunable_models:
-            raise ValueError(
-                f"Model {base_model} is not supported. Supported models are"
-                f" {self._supported_tunable_models}"
-            )
+class _TunableClassifier(_BaseTunableClassifier, _GPTClassifierMixin, _GPTTunableMixin):
+    pass
 
 
-class GPTClassifier(_Tunable, _SingleLabelMixin):
+class GPTClassifier(_TunableClassifier, _SingleLabelMixin):
     def __init__(
         self,
         base_model: str = "gpt-3.5-turbo-0613",
@@ -40,7 +32,7 @@ def __init__(
         self._set_hyperparameters(base_model, n_epochs, custom_suffix)
 
 
-class MultiLabelGPTClassifier(_Tunable, _MultiLabelMixin):
+class MultiLabelGPTClassifier(_TunableClassifier, _MultiLabelMixin):
     def __init__(
         self,
         base_model: str = "gpt-3.5-turbo-0613",
diff --git a/skllm/models/gpt/text2text/__init__.py b/skllm/models/gpt/text2text/__init__.py
diff --git a/skllm/models/gpt/text2text/tunable.py b/skllm/models/gpt/text2text/tunable.py
@@ -0,0 +1,24 @@
+from skllm.llm.gpt.mixin import (
+    GPTTunableMixin as _GPTTunableMixin,
+    GPTTextCompletionMixin as _GPTTextCompletionMixin,
+)
+from skllm.models._base.text2text import (
+    BaseTunableText2TextModel as _BaseTunableText2TextModel,
+)
+from typing import Optional
+
+
+class TunableGPTText2Text(
+    _BaseTunableText2TextModel, _GPTTextCompletionMixin, _GPTTunableMixin
+):
+    def __init__(
+        self,
+        base_model: str = "gpt-3.5-turbo-0613",
+        key: Optional[str] = None,
+        org: Optional[str] = None,
+        n_epochs: Optional[int] = None,
+        custom_suffix: Optional[str] = "skllm",
+    ):
+        self.model = None
+        self._set_keys(key, org)
+        self._set_hyperparameters(base_model, n_epochs, custom_suffix)
diff --git a/skllm/models/vertex/classification/tunable.py b/skllm/models/vertex/classification/tunable.py
@@ -0,0 +1,30 @@
+from skllm.models._base.classifier import (
+    BaseTunableClassifier as _BaseTunableClassifier,
+    SingleLabelMixin as _SingleLabelMixin,
+    MultiLabelMixin as _MultiLabelMixin,
+)
+from skllm.llm.vertex.mixin import (
+    VertexClassifierMixin as _VertexClassifierMixin,
+    VertexTunableMixin as _VertexTunableMixin,
+)
+from typing import Optional
+
+
+class _TunableClassifier(
+    _BaseTunableClassifier, _VertexClassifierMixin, _VertexTunableMixin
+):
+    pass
+
+
+class VertexClassifier(_TunableClassifier, _SingleLabelMixin):
+    def __init__(
+        self,
+        base_model: str = "text-bison@002",
+        n_update_steps: int = 1,
+        default_label: Optional[str] = "Random",
+    ):
+        self._set_hyperparametersI(base_model=base_model, n_update_steps=n_update_steps)
+        super().__init__(
+            model=None,
+            default_label=default_label,
+        )
diff --git a/skllm/models/vertex/text2text/__init__.py b/skllm/models/vertex/text2text/__init__.py
diff --git a/skllm/models/vertex/text2text/tunable.py b/skllm/models/vertex/text2text/tunable.py
@@ -0,0 +1,20 @@
+from skllm.llm.vertex.mixin import (
+    VertexTunableMixin as _VertexTunableMixin,
+    VertexTextCompletionMixin as _VertexTextCompletionMixin,
+)
+from skllm.models._base.text2text import (
+    BaseTunableText2TextModel as _BaseTunableText2TextModel,
+)
+from typing import Optional
+
+
+class TunableGPTText2Text(
+    _BaseTunableText2TextModel, _VertexTextCompletionMixin, _VertexTunableMixin
+):
+    def __init__(
+        self,
+        base_model: str = "text-bison@002",
+        n_update_steps: int = 1,
+    ):
+        self.model = None
+        self._set_hyperparameters(base_model=base_model, n_update_steps=n_update_steps)