Redislabs-Solution-Architects
diff --git a/‎.dockerignore‎
Lines changed: 12 additions & 0 deletions b/‎.dockerignore‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎Dockerfile‎
Lines changed: 44 additions & 0 deletions b/‎Dockerfile‎
Lines changed: 44 additions & 0 deletions
diff --git a/‎main.py‎
Lines changed: 50 additions & 13 deletions b/‎main.py‎
Lines changed: 50 additions & 13 deletions
@@ -0,0 +1,12 @@
+.git
+.gitignore
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+*.swp
+*.DS_Store
+.venv/
+.env
+# Artefatos locais
+*.log
@@ -0,0 +1,44 @@
+# =========================
+# Dockerfile
+# =========================
+# Imagem base enxuta
+FROM python:3.12-slim AS runtime
+
+# Evita prompts interativos e acelera pip
+ENV PIP_DISABLE_PIP_VERSION_CHECK=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    GRADIO_SERVER_NAME=0.0.0.0 \
+    GRADIO_SERVER_PORT=7860
+
+# Dependências do sistema (ca-certificates + curl p/ healthcheck)
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    ca-certificates curl \
+ && rm -rf /var/lib/apt/lists/*
+
+# Cria usuário não-root
+RUN useradd -m -u 10001 appuser
+WORKDIR /app
+
+# Copia apenas o requirements primeiro (melhor cache de camadas)
+COPY requirements.txt /app/requirements.txt
+
+# Instala dependências Python
+RUN pip install --no-cache-dir -r /app/requirements.txt
+
+# Copia o restante do projeto (NÃO copie .env; use --env-file no run/compose)
+COPY . /app
+
+# Ajusta permissões
+RUN chown -R appuser:appuser /app
+USER appuser
+
+# Exponha a porta do Gradio
+EXPOSE 7860
+
+# Healthcheck simples (Gradio serve HTML na raiz)
+HEALTHCHECK --interval=30s --timeout=5s --retries=5 \
+  CMD curl -fsS http://127.0.0.1:7860/ >/dev/null || exit 1
+
+# Entry-point: rode o seu main
+ENTRYPOINT ["python", "-u", "main_demo_released.py"]
@@ -3,12 +3,14 @@
 from dotenv import load_dotenv
 from openai import OpenAI
 from langcache import LangCache
+from langcache.models import SearchStrategy
 
 # === Load environment variables ===
 load_dotenv()
 
 # === LangCache Configuration ===
-LANGCACHE_SERVICE_KEY = os.getenv("LANGCACHE_SERVICE_KEY")
+# Prefer LANGCACHE_API_KEY; fall back to legacy LANGCACHE_SERVICE_KEY for compatibility
+LANGCACHE_API_KEY = os.getenv("LANGCACHE_API_KEY") or os.getenv("LANGCACHE_SERVICE_KEY")
 LANGCACHE_CACHE_ID = os.getenv("LANGCACHE_CACHE_ID")
 LANGCACHE_BASE_URL = os.getenv("LANGCACHE_BASE_URL", "https://gcp-us-east4.langcache.redis.io")
 
@@ -23,44 +25,79 @@ def call_openai_llm(prompt: str) -> str:
         response = openai_client.chat.completions.create(
             model=OPENAI_MODEL,
             messages=[{"role": "user", "content": prompt}],
-            temperature=0.7
+            temperature=0.7,
         )
         return response.choices[0].message.content.strip()
     except Exception as e:
         return f"[ERROR] OpenAI request failed: {e}"
 
 
 def main():
+    if not LANGCACHE_API_KEY or not LANGCACHE_CACHE_ID:
+        print("[WARN] Missing LangCache config (LANGCACHE_API_KEY and/or LANGCACHE_CACHE_ID). Caching disabled.")
+    if not OPENAI_API_KEY:
+        raise SystemExit("Missing OPENAI_API_KEY in env.")
+
     print("LangCache Semantic Cache Chat - Type 'exit' to quit.\n")
 
-    with LangCache(
-        server_url=LANGCACHE_BASE_URL,
-        cache_id=LANGCACHE_CACHE_ID,
-        service_key=LANGCACHE_SERVICE_KEY
-    ) as lang_cache:
+    # Use a no-op context manager when LangCache isn't configured
+    class _Noop:
+        def __enter__(self): return None
+        def __exit__(self, exc_type, exc, tb): return False
+
+    cache_ctx = (
+        LangCache(server_url=LANGCACHE_BASE_URL, cache_id=LANGCACHE_CACHE_ID, api_key=LANGCACHE_API_KEY)
+        if (LANGCACHE_API_KEY and LANGCACHE_CACHE_ID)
+        else _Noop()
+    )
 
+    with cache_ctx as lang_cache:
         while True:
             query = input("Ask something: ").strip()
             if query.lower() in {"exit", "quit"}:
                 break
 
+            cached_resp = None
             start_time = time.perf_counter()
-            results = lang_cache.search(prompt=query, similarity_threshold=0.7)
+
+            # Try cache only if available
+            if lang_cache:
+                try:
+                    # First: semantic search with threshold
+                    results = lang_cache.search(prompt=query, similarity_threshold=0.7)
+                    # Fallback: exact + semantic if nothing found
+                    if not results or not getattr(results, "data", None):
+                        results = lang_cache.search(
+                            prompt=query,
+                            search_strategies=[SearchStrategy.EXACT, SearchStrategy.SEMANTIC],
+                        )
+                    if results and getattr(results, "data", None):
+                        cached_resp = results.data[0].response
+                except Exception as e:
+                    print(f"[LangCache search error] {e}")
+
             elapsed_time = time.perf_counter() - start_time
 
-            if results and results.data:
+            if cached_resp:
                 print("[CACHE HIT]")
                 print(f"[Latency] Cache hit in {elapsed_time:.3f} seconds")
-                print("Response:", results.data[0].response)
+                print("Response:", cached_resp)
             else:
-                print("[CACHE MISS]")
-                print(f"[Latency] Cache miss search took {elapsed_time:.3f} seconds")
+                print("[CACHE MISS]" if lang_cache else "[CACHE DISABLED]")
+                if lang_cache:
+                    print(f"[Latency] Cache search took {elapsed_time:.3f} seconds")
 
                 start_llm = time.perf_counter()
                 response = call_openai_llm(query)
-                lang_cache.set(prompt=query, response=response)
                 elapsed_llm = time.perf_counter() - start_llm
 
+                # Best-effort: store in cache if available
+                if lang_cache:
+                    try:
+                        lang_cache.set(prompt=query, response=response)
+                    except Exception as e:
+                        print(f"[LangCache set error] {e}")
+
                 print(f"[Latency] OpenAI response took {elapsed_llm:.3f} seconds")
                 print("Response:", response)