GPT tuning

OKUA1 · iryna-kondr · OKUA1 · commit 0837b4a1ef69 · 2023-09-23T16:32:19.000+02:00
Co-authored-by: Iryna Kondrashchenko &lt;iryna-kondr@users.noreply.github.com&gt;
diff --git a/skllm/llm/gpt/mixin.py b/skllm/llm/gpt/mixin.py
@@ -197,7 +197,7 @@ def _get_embeddings(self, text: np.ndarray) -> List[List[float]]:
 
 # for now this works only with OpenAI
 class GPTTunableMixin(BaseTunableMixin):
-    system_msg = "You are a text classification model."
+    _supported_tunable_models = ["gpt-3.5-turbo-0613", "gpt-3.5-turbo"]
 
     def _build_label(self, label: str):
         return json.dumps({"label": label})
@@ -211,9 +211,16 @@ def _tune(self, X, y):
         filename = f"skllm_{file_uuid}.jsonl"
         with open(filename, "w+") as f:
             for xi, yi in zip(X, y):
+                prompt = self._get_prompt(xi)
+                if not isinstance(prompt["messages"], str):
+                    raise ValueError(
+                        "Incompatible prompt. Use a prompt with a single message."
+                    )
                 f.write(
                     _build_clf_example(
-                        self._get_prompt(xi), self._build_label(yi), self.system_msg
+                        prompt["messages"],
+                        self._build_label(yi),
+                        prompt["system_message"],
                     )
                 )
                 f.write("\n")
diff --git a/skllm/models/_base/classifier.py b/skllm/models/_base/classifier.py
@@ -19,6 +19,8 @@
     ZERO_SHOT_MLCLF_PROMPT_TEMPLATE,
     FEW_SHOT_CLF_PROMPT_TEMPLATE,
     FEW_SHOT_MLCLF_PROMPT_TEMPLATE,
+    ZERO_SHOT_CLF_SHORT_PROMPT_TEMPLATE,
+    ZERO_SHOT_MLCLF_SHORT_PROMPT_TEMPLATE,
 )
 from skllm.prompts.builders import (
     build_zero_shot_prompt_slc,
@@ -134,7 +136,7 @@ def _extract_labels(self, y) -> List[str]:
 class BaseClassifier(ABC, _SklBaseEstimator, _SklClassifierMixin):
     def __init__(
         self,
-        model: str,
+        model: Optional[str],  # model can initially be None for tunable estimators
         default_label: str = "Random",
         max_labels: Optional[int] = 5,
         prompt_template: Optional[str] = None,
@@ -452,3 +454,26 @@ def fit(
         super().fit(X, y)
         self._tune(X, y)
         return self
+
+    def _get_prompt_template(self) -> str:
+        """Returns the prompt template to use for a single input."""
+        if self.prompt_template is not None:
+            return self.prompt_template
+        elif isinstance(self, SingleLabelMixin):
+            return ZERO_SHOT_CLF_SHORT_PROMPT_TEMPLATE
+        return ZERO_SHOT_MLCLF_SHORT_PROMPT_TEMPLATE
+
+    def _get_prompt(self, x: str) -> dict:
+        """Returns the prompt to use for a single input."""
+        if isinstance(self, SingleLabelMixin):
+            prompt = build_zero_shot_prompt_slc(
+                x, repr(self.classes_), template=self._get_prompt_template()
+            )
+        else:
+            prompt = build_zero_shot_prompt_mlc(
+                x,
+                repr(self.classes_),
+                self.max_labels,
+                template=self._get_prompt_template(),
+            )
+        return {"messages": prompt, "system_message": "You are a text classifier."}
diff --git a/skllm/models/gpt/classification/tunable.py b/skllm/models/gpt/classification/tunable.py
@@ -0,0 +1,62 @@
+from skllm.llm.gpt.mixin import (
+    GPTClassifierMixin as _GPTClassifierMixin,
+    GPTTunableMixin as _GPTTunableMixin,
+)
+from skllm.models._base.classifier import (
+    BaseTunableClassifier as _BaseTunableClassifier,
+    SingleLabelMixin as _SingleLabelMixin,
+    MultiLabelMixin as _MultiLabelMixin,
+)
+from typing import Optional
+
+
+class _Tunable(_BaseTunableClassifier, _GPTClassifierMixin, _GPTTunableMixin):
+    def _set_hyperparameters(self, base_model: str, n_epochs: int, custom_suffix: str):
+        self.base_model = base_model
+        self.n_epochs = n_epochs
+        self.custom_suffix = custom_suffix
+        if base_model not in self._supported_tunable_models:
+            raise ValueError(
+                f"Model {base_model} is not supported. Supported models are"
+                f" {self._supported_tunable_models}"
+            )
+
+
+class GPTClassifier(_Tunable, _SingleLabelMixin):
+    def __init__(
+        self,
+        base_model: str = "gpt-3.5-turbo-0613",
+        default_label: Optional[str] = "Random",
+        key: Optional[str] = None,
+        org: Optional[str] = None,
+        n_epochs: Optional[int] = None,
+        custom_suffix: Optional[str] = "skllm",
+        prompt_template: Optional[str] = None,
+    ):
+        super().__init__(
+            model=None, default_label=default_label, prompt_template=prompt_template
+        )
+        self._set_keys(key, org)
+        self._set_hyperparameters(base_model, n_epochs, custom_suffix)
+
+
+class MultiLabelGPTClassifier(_Tunable, _MultiLabelMixin):
+    def __init__(
+        self,
+        base_model: str = "gpt-3.5-turbo-0613",
+        default_label: Optional[str] = "Random",
+        key: Optional[str] = None,
+        org: Optional[str] = None,
+        n_epochs: Optional[int] = None,
+        custom_suffix: Optional[str] = "skllm",
+        prompt_template: Optional[str] = None,
+        max_labels: Optional[int] = 5,
+    ):
+        super().__init__(
+            model=None,
+            default_label=default_label,
+            prompt_template=prompt_template,
+            max_labels=max_labels,
+        )
+        self._set_keys(key, org)
+        self._set_hyperparameters(base_model, n_epochs, custom_suffix)
diff --git a/skllm/prompts/templates.py b/skllm/prompts/templates.py
@@ -15,6 +15,16 @@
 Your JSON response:
 """
 
+ZERO_SHOT_CLF_SHORT_PROMPT_TEMPLATE = """
+Classify the following text into one of the following classes: {labels}. Provide your response in a JSON format containing a single key `label`.
+Text: ```{x}```
+"""
+
+ZERO_SHOT_MLCLF_SHORT_PROMPT_TEMPLATE = """
+Classify the following text into at least 1 but up to {max_cats} of the following classes: {labels}. Provide your response in a JSON format containing a single key `label`.
+Text: ```{x}```
+"""
+
 FEW_SHOT_CLF_PROMPT_TEMPLATE = """
 You will be provided with the following information:
 1. An arbitrary text sample. The sample is delimited with triple backticks.