google
diff --git a/‎COMMUNITY_PROVIDERS.md‎
Lines changed: 1 addition & 0 deletions b/‎COMMUNITY_PROVIDERS.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎langextract/annotation.py‎
Lines changed: 217 additions & 1 deletion b/‎langextract/annotation.py‎
Lines changed: 217 additions & 1 deletion
diff --git a/‎langextract/extraction.py‎
Lines changed: 21 additions & 0 deletions b/‎langextract/extraction.py‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎langextract/providers/gemini.py‎
Lines changed: 2 additions & 0 deletions b/‎langextract/providers/gemini.py‎
Lines changed: 2 additions & 0 deletions
@@ -13,6 +13,7 @@ Community-developed provider plugins that extend LangExtract with additional mod
 | AWS Bedrock | `langextract-bedrock` | [@andyxhadji](https://github.com/andyxhadji) | [andyxhadji/langextract-bedrock](https://github.com/andyxhadji/langextract-bedrock) | AWS Bedrock provider for LangExtract, supports all models & inference profiles | [#148](https://github.com/google/langextract/issues/148) |
 | LiteLLM | `langextract-litellm` | [@JustStas](https://github.com/JustStas) | [JustStas/langextract-litellm](https://github.com/JustStas/langextract-litellm) | LiteLLM provider for LangExtract, supports all models covered in LiteLLM, including OpenAI, Azure, Anthropic, etc., See [LiteLLM's supported models](https://docs.litellm.ai/docs/providers) | [#187](https://github.com/google/langextract/issues/187) |
 | Llama.cpp | `langextract-llamacpp` | [@fgarnadi](https://github.com/fgarnadi) | [fgarnadi/langextract-llamacpp](https://github.com/fgarnadi/langextract-llamacpp) | Llama.cpp provider for LangExtract, supports GGUF models from HuggingFace and local files | [#199](https://github.com/google/langextract/issues/199) |
+| Outlines | `langextract-outlines` | [@RobinPicard](https://github.com/RobinPicard) | [dottxt-ai/langextract-outlines](https://github.com/dottxt-ai/langextract-outlines) | Outlines provider for LangExtract, supports structured generation for various local and API-based models | [#101](https://github.com/google/langextract/issues/101) |
 | vLLM | `langextract-vllm` | [@wuli666](https://github.com/wuli666) | [wuli666/langextract-vllm](https://github.com/wuli666/langextract-vllm) | vLLM provider for LangExtract, supports local and distributed model serving | [#236](https://github.com/google/langextract/issues/236) |
 <!-- ADD NEW PLUGINS ABOVE THIS LINE -->
 
 
@@ -35,10 +35,12 @@
 from langextract import progress
 from langextract import prompting
 from langextract import resolver as resolver_lib
+from langextract import retry_utils
 from langextract.core import base_model
 from langextract.core import data
 from langextract.core import exceptions
 from langextract.core import format_handler as fh
+from langextract.core import types as core_types
 
 
 class DocumentRepeatError(exceptions.LangExtractError):
@@ -202,6 +204,173 @@ def __init__(
         "Annotator initialized with format_handler: %s", format_handler
     )
 
+  def _process_batch_with_retry(
+      self,
+      batch_prompts: list[str],
+      batch: list[chunking.TextChunk],
+      retry_transient_errors: bool = True,
+      max_retries: int = 3,
+      retry_initial_delay: float = 1.0,
+      retry_backoff_factor: float = 2.0,
+      retry_max_delay: float = 60.0,
+      **kwargs,
+  ) -> Iterator[list[core_types.ScoredOutput]]:
+    """Process a batch of prompts with individual chunk retry capability.
+
+    This method processes each chunk individually and retries failed chunks
+    due to transient errors (like 503 "model overloaded") while preserving
+    successful chunks from the same batch.
+
+    Args:
+      batch_prompts: List of prompts for the batch
+      batch: List of TextChunk objects corresponding to the prompts
+      **kwargs: Additional arguments passed to the language model
+
+    Yields:
+      Lists of ScoredOutputs, with retries for failed chunks
+    """
+    try:
+      batch_results = list(
+          self._language_model.infer(
+              batch_prompts=batch_prompts,
+              **kwargs,
+          )
+      )
+
+      for result in batch_results:
+        yield result
+      return
+
+    except Exception as e:
+      if not retry_utils.is_transient_error(e):
+        raise
+
+      logging.warning(
+          "Batch processing failed with transient error: %s. "
+          "Falling back to individual chunk processing with retry.",
+          str(e),
+      )
+
+    individual_results = []
+
+    for i, (prompt, chunk) in enumerate(zip(batch_prompts, batch)):
+      try:
+        chunk_result = self._process_single_chunk_with_retry(
+            prompt=prompt,
+            chunk=chunk,
+            retry_transient_errors=retry_transient_errors,
+            max_retries=max_retries,
+            retry_initial_delay=retry_initial_delay,
+            retry_backoff_factor=retry_backoff_factor,
+            retry_max_delay=retry_max_delay,
+            **kwargs,
+        )
+        individual_results.append(chunk_result)
+
+      except Exception as e:
+        logging.error(
+            "Failed to process chunk %d after retries: %s. "
+            "Chunk info: document_id=%s, text_length=%d. "
+            "Stopping document processing.",
+            i,
+            str(e),
+            chunk.document_id,
+            len(chunk.chunk_text),
+        )
+        raise
+
+    for result in individual_results:
+      yield result
+
+  def _process_single_chunk_with_retry(
+      self,
+      prompt: str,
+      chunk: chunking.TextChunk,
+      retry_transient_errors: bool = True,
+      max_retries: int = 3,
+      retry_initial_delay: float = 1.0,
+      retry_backoff_factor: float = 2.0,
+      retry_max_delay: float = 60.0,
+      **kwargs,
+  ) -> list[core_types.ScoredOutput]:
+    """Process a single chunk with retry logic.
+
+    Args:
+      prompt: The prompt for this chunk
+      chunk: The TextChunk object
+      retry_transient_errors: Whether to retry on transient errors
+      max_retries: Maximum number of retry attempts
+      retry_initial_delay: Initial delay before retry
+      retry_backoff_factor: Backoff multiplier for retries
+      retry_max_delay: Maximum delay between retries
+      **kwargs: Additional arguments for the language model
+
+    Returns:
+      List containing a single ScoredOutput for this chunk
+    """
+    last_exception = None
+    delay = retry_initial_delay
+
+    for attempt in range(max_retries + 1):
+      try:
+        batch_results = list(
+            self._language_model.infer(
+                batch_prompts=[prompt],
+                **kwargs,
+            )
+        )
+
+        if not batch_results:
+          raise exceptions.InferenceOutputError(
+              f"No results returned for chunk in document {chunk.document_id}"
+          )
+
+        return batch_results[0]
+
+      except Exception as e:
+        last_exception = e
+
+        if not retry_transient_errors or not retry_utils.is_transient_error(e):
+          logging.debug(
+              "Not retrying chunk processing: retry_disabled=%s,"
+              " is_transient=%s, error=%s",
+              not retry_transient_errors,
+              retry_utils.is_transient_error(e),
+              str(e),
+          )
+          raise
+
+        if attempt >= max_retries:
+          logging.error(
+              "Chunk processing failed after %d retries: %s",
+              max_retries,
+              str(e),
+          )
+          raise
+
+        current_delay = min(delay, retry_max_delay)
+
+        import random
+
+        jitter_amount = current_delay * 0.1 * random.random()
+        current_delay += jitter_amount
+
+        logging.warning(
+            "Chunk processing failed on attempt %d/%d due to transient error:"
+            " %s. Retrying in %.2f seconds...",
+            attempt + 1,
+            max_retries + 1,
+            str(e),
+            current_delay,
+        )
+
+        time.sleep(current_delay)
+        delay = min(delay * retry_backoff_factor, retry_max_delay)
+
+    if last_exception:
+      raise last_exception
+    raise RuntimeError("Chunk retry logic failed unexpectedly")
+
   def annotate_documents(
       self,
       documents: Iterable[data.Document],
@@ -211,6 +380,11 @@ def annotate_documents(
       debug: bool = True,
       extraction_passes: int = 1,
       show_progress: bool = True,
+      retry_transient_errors: bool = True,
+      max_retries: int = 3,
+      retry_initial_delay: float = 1.0,
+      retry_backoff_factor: float = 2.0,
+      retry_max_delay: float = 60.0,
       **kwargs,
   ) -> Iterator[data.AnnotatedDocument]:
     """Annotates a sequence of documents with NLP extractions.
@@ -253,6 +427,11 @@ def annotate_documents(
           batch_length,
           debug,
           show_progress,
+          retry_transient_errors,
+          max_retries,
+          retry_initial_delay,
+          retry_backoff_factor,
+          retry_max_delay,
           **kwargs,
       )
     else:
@@ -264,6 +443,11 @@ def annotate_documents(
           debug,
           extraction_passes,
           show_progress,
+          retry_transient_errors,
+          max_retries,
+          retry_initial_delay,
+          retry_backoff_factor,
+          retry_max_delay,
           **kwargs,
       )
 
@@ -275,6 +459,11 @@ def _annotate_documents_single_pass(
       batch_length: int,
       debug: bool,
       show_progress: bool = True,
+      retry_transient_errors: bool = True,
+      max_retries: int = 3,
+      retry_initial_delay: float = 1.0,
+      retry_backoff_factor: float = 2.0,
+      retry_max_delay: float = 60.0,
       **kwargs,
   ) -> Iterator[data.AnnotatedDocument]:
     """Single-pass annotation logic (original implementation)."""
@@ -321,8 +510,15 @@ def _annotate_documents_single_pass(
         )
         progress_bar.set_description(desc)
 
-      batch_scored_outputs = self._language_model.infer(
+      # Process batch with individual chunk retry capability
+      batch_scored_outputs = self._process_batch_with_retry(
           batch_prompts=batch_prompts,
+          batch=batch,
+          retry_transient_errors=retry_transient_errors,
+          max_retries=max_retries,
+          retry_initial_delay=retry_initial_delay,
+          retry_backoff_factor=retry_backoff_factor,
+          retry_max_delay=retry_max_delay,
           **kwargs,
       )
 
@@ -419,6 +615,11 @@ def _annotate_documents_sequential_passes(
       debug: bool,
       extraction_passes: int,
       show_progress: bool = True,
+      retry_transient_errors: bool = True,
+      max_retries: int = 3,
+      retry_initial_delay: float = 1.0,
+      retry_backoff_factor: float = 2.0,
+      retry_max_delay: float = 60.0,
       **kwargs,
   ) -> Iterator[data.AnnotatedDocument]:
     """Sequential extraction passes logic for improved recall."""
@@ -446,6 +647,11 @@ def _annotate_documents_sequential_passes(
           batch_length,
           debug=(debug and pass_num == 0),
           show_progress=show_progress if pass_num == 0 else False,
+          retry_transient_errors=retry_transient_errors,
+          max_retries=max_retries,
+          retry_initial_delay=retry_initial_delay,
+          retry_backoff_factor=retry_backoff_factor,
+          retry_max_delay=retry_max_delay,
           **kwargs,
       ):
         doc_id = annotated_doc.document_id
@@ -494,6 +700,11 @@ def annotate_text(
       debug: bool = True,
       extraction_passes: int = 1,
       show_progress: bool = True,
+      retry_transient_errors: bool = True,
+      max_retries: int = 3,
+      retry_initial_delay: float = 1.0,
+      retry_backoff_factor: float = 2.0,
+      retry_max_delay: float = 60.0,
       **kwargs,
   ) -> data.AnnotatedDocument:
     """Annotates text with NLP extractions for text input.
@@ -540,6 +751,11 @@ def annotate_text(
             debug,
             extraction_passes,
             show_progress,
+            retry_transient_errors,
+            max_retries,
+            retry_initial_delay,
+            retry_backoff_factor,
+            retry_max_delay,
             **kwargs,
         )
     )
 
@@ -59,6 +59,11 @@ def extract(
     prompt_validation_level: pv.PromptValidationLevel = pv.PromptValidationLevel.WARNING,
     prompt_validation_strict: bool = False,
     show_progress: bool = True,
+    retry_transient_errors: bool = True,
+    max_retries: int = 3,
+    retry_initial_delay: float = 1.0,
+    retry_backoff_factor: float = 2.0,
+    retry_max_delay: float = 60.0,
 ) -> typing.Any:
   """Extracts structured information from text.
 
@@ -150,6 +155,12 @@ def extract(
       prompt_validation_strict: When True and prompt_validation_level is ERROR,
         raises on non-exact matches (MATCH_FUZZY, MATCH_LESSER). Defaults to False.
       show_progress: Whether to show progress bar during extraction. Defaults to True.
+      retry_transient_errors: Whether to automatically retry on transient errors
+        like 503 "model overloaded". Defaults to True.
+      max_retries: Maximum number of retry attempts for transient errors. Defaults to 3.
+      retry_initial_delay: Initial delay in seconds before first retry. Defaults to 1.0.
+      retry_backoff_factor: Multiplier for exponential backoff between retries. Defaults to 2.0.
+      retry_max_delay: Maximum delay between retries in seconds. Defaults to 60.0.
 
   Returns:
       An AnnotatedDocument with the extracted information when input is a
@@ -320,6 +331,16 @@ def extract(
       format_handler=format_handler,
   )
 
+  # Add retry parameters to alignment kwargs
+  retry_kwargs = {
+      "retry_transient_errors": retry_transient_errors,
+      "max_retries": max_retries,
+      "retry_initial_delay": retry_initial_delay,
+      "retry_backoff_factor": retry_backoff_factor,
+      "retry_max_delay": retry_max_delay,
+  }
+  alignment_kwargs.update(retry_kwargs)
+
   if isinstance(text_or_documents, str):
     return annotator.annotate_text(
         text=text_or_documents,
 
@@ -23,6 +23,7 @@
 
 from absl import logging
 
+from langextract import retry_utils
 from langextract.core import base_model
 from langextract.core import data
 from langextract.core import exceptions
@@ -179,6 +180,7 @@ def __init__(
         k: v for k, v in (kwargs or {}).items() if k in _API_CONFIG_KEYS
     }
 
+  @retry_utils.retry_chunk_processing()
   def _process_single_prompt(
       self, prompt: str, config: dict
   ) -> core_types.ScoredOutput: