From eadc79d62a8889dff1026e98796ba4367b0630ba Mon Sep 17 00:00:00 2001 From: Tom Stockton Date: Tue, 22 Jul 2025 22:43:16 +0100 Subject: [PATCH 1/3] Add Ollama provider support for local LLM inference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary - Add Ollama as a new LLM provider option for local, private inference - Enables SRE Agent to work with local Kubernetes clusters using local LLMs - No API keys required, fully offline capable ## Features Added - OllamaClient class with HTTP API integration - Support for Ollama's chat completion API - Tool calling support for MCP servers - Configurable Ollama API URL (default: localhost:11434) - Model recommendations for SRE tasks ## Configuration - Added OLLAMA provider to enum - Added OLLAMA_API_URL setting - Updated credential setup script with Ollama options - Enhanced README with Ollama setup guide ## Benefits - Privacy: All data stays local - Cost: No API usage fees - Offline: Works without internet - Local K8s: Perfect for local development clusters 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- README.md | 38 +++++++++- setup_credentials.py | 6 +- sre_agent/llm/main.py | 2 + sre_agent/llm/utils/clients.py | 123 +++++++++++++++++++++++++++++++++ sre_agent/llm/utils/schemas.py | 5 ++ 5 files changed, 170 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 793d861..2d7a53b 100644 --- a/README.md +++ b/README.md @@ -33,19 +33,51 @@ We've been writing blogs and sharing our learnings along the way. Check out our The SRE Agent supports multiple the following LLM providers: ### Anthropic -- **Models**: e.g. "claude-4-0-sonnet-latest" +- **Models**: e.g. "claude-3-5-sonnet-latest" - **Setup**: Requires `ANTHROPIC_API_KEY` ### Google Gemini -- **Models**: e.g, "gemini-2.5-flash" +- **Models**: e.g. "gemini-2.5-flash" - **Setup**: Requires `GEMINI_API_KEY` +### Ollama (Local) +- **Models**: e.g. "llama3.1", "mistral", "codellama" +- **Setup**: Install Ollama locally, no API key needed +- **Benefits**: Privacy, no API costs, offline capable + +
+🦙 Ollama Setup Guide + +### Installing Ollama +1. **Install Ollama**: Visit [ollama.ai](https://ollama.ai) and follow installation instructions +2. **Start Ollama**: Run `ollama serve` in your terminal +3. **Pull a model**: Download a model like `ollama pull llama3.1` + +### Recommended Models for SRE Tasks +- **llama3.1** (8B): Fast, good general reasoning +- **mistral** (7B): Excellent for technical tasks +- **codellama** (7B): Specialized for code analysis +- **llama3.1:70b**: Most capable but requires more resources + +### Configuration +Set these in your `.env` file: +```bash +PROVIDER=ollama +MODEL=llama3.1 +OLLAMA_API_URL=http://localhost:11434 # default +``` + +
+ ## 🛠️ Prerequisites - [Docker](https://docs.docker.com/get-docker/) - A `.env` file in your project root ([see below](#getting-started)) -- An app deployed on AWS EKS (Elastic Kubernetes Service) or GCP GKE (Google Kubernetes Engine) +- A Kubernetes cluster: + - **Cloud**: AWS EKS, GCP GKE + - **Local**: minikube, Docker Desktop, kind, k3s +- For Ollama: Local installation ([see Ollama Setup Guide](#ollama-setup-guide)) ## ⚡ Getting Started diff --git a/setup_credentials.py b/setup_credentials.py index 879068b..03756fa 100644 --- a/setup_credentials.py +++ b/setup_credentials.py @@ -82,13 +82,17 @@ def get_credential_config(platform: str) -> dict[str, dict[str, Any]]: "prompt": "Enter your Github project root directory: ", "mask_value": False, }, - "PROVIDER": {"prompt": "Enter your LLM provider name: ", "mask_value": False}, + "PROVIDER": {"prompt": "Enter your LLM provider name (anthropic/gemini/ollama): ", "mask_value": False}, "MODEL": {"prompt": "Enter your LLM model name: ", "mask_value": False}, "GEMINI_API_KEY": {"prompt": "Enter your Gemini API Key: ", "mask_value": True}, "ANTHROPIC_API_KEY": { "prompt": "Enter your Anthropic API Key: ", "mask_value": True, }, + "OLLAMA_API_URL": { + "prompt": "Enter your Ollama API URL (default: http://localhost:11434): ", + "mask_value": False, + }, "MAX_TOKENS": { "prompt": "Controls the maximum number of tokens the LLM can generate in " "its response e.g. 10000: ", diff --git a/sre_agent/llm/main.py b/sre_agent/llm/main.py index 4aa707d..3e50cd8 100644 --- a/sre_agent/llm/main.py +++ b/sre_agent/llm/main.py @@ -13,6 +13,7 @@ BaseClient, DummyClient, GeminiClient, + OllamaClient, OpenAIClient, SelfHostedClient, ) @@ -32,6 +33,7 @@ Provider.MOCK: DummyClient(), Provider.OPENAI: OpenAIClient(), Provider.GEMINI: GeminiClient(), + Provider.OLLAMA: OllamaClient(), Provider.SELF_HOSTED: SelfHostedClient(), } diff --git a/sre_agent/llm/utils/clients.py b/sre_agent/llm/utils/clients.py index 05d3e9a..bd62f18 100644 --- a/sre_agent/llm/utils/clients.py +++ b/sre_agent/llm/utils/clients.py @@ -1,6 +1,8 @@ """A collection of clients for performing text generation.""" +import json import os +import requests from abc import ABC, abstractmethod from typing import Any, cast @@ -227,6 +229,127 @@ def generate(self, payload: TextGenerationPayload) -> Message: ) +class OllamaClient(BaseClient): + """A client for performing text generation using Ollama.""" + + def __init__(self, settings: LLMSettings = LLMSettings()) -> None: + """The constructor for the Ollama client.""" + super().__init__(settings) + self.api_url = settings.ollama_api_url + + def generate(self, payload: TextGenerationPayload) -> Message: + """A method for generating text using the Ollama API.""" + try: + # Convert the payload to Ollama format + messages = self._convert_messages_to_ollama(payload.messages) + + # Prepare the request data + request_data = { + "model": self.settings.model, + "messages": messages, + "stream": False, + "options": {} + } + + # Add max_tokens if specified + if self.settings.max_tokens: + request_data["options"]["num_predict"] = self.settings.max_tokens + + # Add tools if present + if payload.tools: + request_data["tools"] = self._convert_tools_to_ollama(payload.tools) + + logger.debug(f"Ollama request: {request_data}") + + # Make the request to Ollama + response = requests.post( + f"{self.api_url}/api/chat", + json=request_data, + timeout=120, + headers={"Content-Type": "application/json"} + ) + response.raise_for_status() + + ollama_response = response.json() + logger.debug(f"Ollama response: {ollama_response}") + + # Convert response back to our format + content: Content = [TextBlock( + text=ollama_response.get("message", {}).get("content", ""), + type="text" + )] + + # Extract usage information if available + usage = None + if "usage" in ollama_response: + usage_data = ollama_response["usage"] + usage = Usage( + input_tokens=usage_data.get("prompt_tokens", 0), + output_tokens=usage_data.get("completion_tokens", 0), + cache_creation_input_tokens=None, + cache_read_input_tokens=None, + ) + + logger.info( + f"Ollama token usage - Input: {usage.input_tokens if usage else 'N/A'}, " + f"Output: {usage.output_tokens if usage else 'N/A'}" + ) + + return Message( + id=f"ollama_{hash(str(ollama_response))}", + model=self.settings.model, + content=content, + role="assistant", + stop_reason="end_turn", + usage=usage, + ) + + except requests.RequestException as e: + logger.error(f"Failed to connect to Ollama: {e}") + raise ValueError(f"Ollama API error: {e}") + except Exception as e: + logger.error(f"Unexpected error in Ollama client: {e}") + raise + + def _convert_messages_to_ollama(self, messages: list[Any]) -> list[dict[str, Any]]: + """Convert messages to Ollama format.""" + ollama_messages = [] + + for message in messages: + role = message.get("role", "user") + content = message.get("content", "") + + # Handle different content types + if isinstance(content, list): + # Extract text from content blocks + text_parts = [] + for block in content: + if isinstance(block, dict) and block.get("type") == "text": + text_parts.append(block.get("text", "")) + content = "\n".join(text_parts) + + ollama_messages.append({ + "role": role, + "content": str(content) + }) + + return ollama_messages + + def _convert_tools_to_ollama(self, tools: list[Any]) -> list[dict[str, Any]]: + """Convert MCP tools to Ollama format.""" + ollama_tools = [] + + for tool in tools: + # Convert MCP tool format to Ollama function calling format + if isinstance(tool, dict) and "function" in tool: + ollama_tools.append({ + "type": "function", + "function": tool["function"] + }) + + return ollama_tools + + class SelfHostedClient(BaseClient): """A client for performing text generation using a self-hosted model.""" diff --git a/sre_agent/llm/utils/schemas.py b/sre_agent/llm/utils/schemas.py index d8fccac..a512bbd 100644 --- a/sre_agent/llm/utils/schemas.py +++ b/sre_agent/llm/utils/schemas.py @@ -12,6 +12,7 @@ class Provider(StrEnum): ANTHROPIC = "anthropic" OPENAI = "openai" GEMINI = "gemini" + OLLAMA = "ollama" SELF_HOSTED = "self-hosted" MOCK = "mock" @@ -29,3 +30,7 @@ class LLMSettings(BaseSettings): max_tokens: int | None = Field( description="The maximum number of tokens for generation.", default=10000 ) + ollama_api_url: str = Field( + description="The Ollama API URL for local LLM inference.", + default="http://localhost:11434", + ) From 1ccc723d32e779f8bba44fd85dc022c95ab112b8 Mon Sep 17 00:00:00 2001 From: Tom Stockton Date: Tue, 22 Jul 2025 23:13:29 +0100 Subject: [PATCH 2/3] Fix pre-commit issues: formatting, typos, and line length MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix typo: 'Specialized' -> 'Specialised' for UK English - Apply black formatting to clients.py - Fix line length issue in logging statement - Apply ruff import sorting 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- README.md | 2 +- setup_credentials.py | 5 ++- sre_agent/llm/utils/clients.py | 78 +++++++++++++++++----------------- 3 files changed, 44 insertions(+), 41 deletions(-) diff --git a/README.md b/README.md index 2d7a53b..e7acceb 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,7 @@ The SRE Agent supports multiple the following LLM providers: ### Recommended Models for SRE Tasks - **llama3.1** (8B): Fast, good general reasoning - **mistral** (7B): Excellent for technical tasks -- **codellama** (7B): Specialized for code analysis +- **codellama** (7B): Specialised for code analysis - **llama3.1:70b**: Most capable but requires more resources ### Configuration diff --git a/setup_credentials.py b/setup_credentials.py index 03756fa..90a7573 100644 --- a/setup_credentials.py +++ b/setup_credentials.py @@ -82,7 +82,10 @@ def get_credential_config(platform: str) -> dict[str, dict[str, Any]]: "prompt": "Enter your Github project root directory: ", "mask_value": False, }, - "PROVIDER": {"prompt": "Enter your LLM provider name (anthropic/gemini/ollama): ", "mask_value": False}, + "PROVIDER": { + "prompt": "Enter your LLM provider name (anthropic/gemini/ollama): ", + "mask_value": False, + }, "MODEL": {"prompt": "Enter your LLM model name: ", "mask_value": False}, "GEMINI_API_KEY": {"prompt": "Enter your Gemini API Key: ", "mask_value": True}, "ANTHROPIC_API_KEY": { diff --git a/sre_agent/llm/utils/clients.py b/sre_agent/llm/utils/clients.py index bd62f18..0371a95 100644 --- a/sre_agent/llm/utils/clients.py +++ b/sre_agent/llm/utils/clients.py @@ -1,11 +1,10 @@ """A collection of clients for performing text generation.""" -import json import os -import requests from abc import ABC, abstractmethod from typing import Any, cast +import requests from anthropic import Anthropic from anthropic.types import MessageParam as AnthropicMessageBlock from anthropic.types import ToolParam @@ -215,17 +214,21 @@ def generate(self, payload: TextGenerationPayload) -> Message: model=response.model_version, content=content, role="assistant", - stop_reason=response.candidates[0].finish_reason - if response.candidates - else "end_turn", - usage=Usage( - input_tokens=response.usage_metadata.prompt_token_count, - output_tokens=response.usage_metadata.candidates_token_count, - cache_creation_input_tokens=None, - cache_read_input_tokens=response.usage_metadata.cached_content_token_count, - ) - if response.usage_metadata - else None, + stop_reason=( + response.candidates[0].finish_reason + if response.candidates + else "end_turn" + ), + usage=( + Usage( + input_tokens=response.usage_metadata.prompt_token_count, + output_tokens=response.usage_metadata.candidates_token_count, + cache_creation_input_tokens=None, + cache_read_input_tokens=response.usage_metadata.cached_content_token_count, + ) + if response.usage_metadata + else None + ), ) @@ -242,19 +245,19 @@ def generate(self, payload: TextGenerationPayload) -> Message: try: # Convert the payload to Ollama format messages = self._convert_messages_to_ollama(payload.messages) - + # Prepare the request data request_data = { "model": self.settings.model, "messages": messages, "stream": False, - "options": {} + "options": {}, } - + # Add max_tokens if specified if self.settings.max_tokens: request_data["options"]["num_predict"] = self.settings.max_tokens - + # Add tools if present if payload.tools: request_data["tools"] = self._convert_tools_to_ollama(payload.tools) @@ -266,18 +269,20 @@ def generate(self, payload: TextGenerationPayload) -> Message: f"{self.api_url}/api/chat", json=request_data, timeout=120, - headers={"Content-Type": "application/json"} + headers={"Content-Type": "application/json"}, ) response.raise_for_status() - + ollama_response = response.json() logger.debug(f"Ollama response: {ollama_response}") # Convert response back to our format - content: Content = [TextBlock( - text=ollama_response.get("message", {}).get("content", ""), - type="text" - )] + content: Content = [ + TextBlock( + text=ollama_response.get("message", {}).get("content", ""), + type="text", + ) + ] # Extract usage information if available usage = None @@ -290,9 +295,10 @@ def generate(self, payload: TextGenerationPayload) -> Message: cache_read_input_tokens=None, ) + input_tokens = usage.input_tokens if usage else "N/A" + output_tokens = usage.output_tokens if usage else "N/A" logger.info( - f"Ollama token usage - Input: {usage.input_tokens if usage else 'N/A'}, " - f"Output: {usage.output_tokens if usage else 'N/A'}" + f"Ollama token usage - Input: {input_tokens}, Output: {output_tokens}" ) return Message( @@ -314,11 +320,11 @@ def generate(self, payload: TextGenerationPayload) -> Message: def _convert_messages_to_ollama(self, messages: list[Any]) -> list[dict[str, Any]]: """Convert messages to Ollama format.""" ollama_messages = [] - + for message in messages: role = message.get("role", "user") content = message.get("content", "") - + # Handle different content types if isinstance(content, list): # Extract text from content blocks @@ -327,26 +333,20 @@ def _convert_messages_to_ollama(self, messages: list[Any]) -> list[dict[str, Any if isinstance(block, dict) and block.get("type") == "text": text_parts.append(block.get("text", "")) content = "\n".join(text_parts) - - ollama_messages.append({ - "role": role, - "content": str(content) - }) - + + ollama_messages.append({"role": role, "content": str(content)}) + return ollama_messages def _convert_tools_to_ollama(self, tools: list[Any]) -> list[dict[str, Any]]: """Convert MCP tools to Ollama format.""" ollama_tools = [] - + for tool in tools: # Convert MCP tool format to Ollama function calling format if isinstance(tool, dict) and "function" in tool: - ollama_tools.append({ - "type": "function", - "function": tool["function"] - }) - + ollama_tools.append({"type": "function", "function": tool["function"]}) + return ollama_tools From c874860f9fe8e4d6a628dd6102e4078f9ee1ae09 Mon Sep 17 00:00:00 2001 From: Tom Stockton Date: Tue, 22 Jul 2025 23:43:47 +0100 Subject: [PATCH 3/3] Add CLAUDE.md with British English spelling guidelines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary - Create comprehensive CLAUDE.md project guidance file - Include explicit British English spelling guidelines and examples - Document Ollama provider support and local LLM setup - Add complete development workflow and architecture details ## British English Guidelines Added - Use -ise endings (organise, recognise, specialise) - Use -our endings (colour, honour, behaviour) - Use -re endings (centre, metre, theatre) - Use -yse endings (analyse, paralyse) - SRE-specific examples included ## Content Coverage - Project overview and microservices architecture - All LLM providers including new Ollama support - Development commands and testing procedures - Security guidelines and .env file warnings - Complete workspace structure documentation - API usage examples and deployment options 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- CLAUDE.md | 223 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 223 insertions(+) create mode 100644 CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..52d4858 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,223 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +SRE Agent is an AI-powered Site Reliability Engineering assistant that automates debugging, monitors application/infrastructure logs, diagnoses issues, and reports diagnostics. It integrates with Kubernetes clusters, GitHub repositories, and Slack for comprehensive incident response automation. + +## Architecture + +### Microservices Design +The system uses a microservices architecture with the following components: + +- **Orchestrator (Client)**: FastAPI-based MCP client (`sre_agent/client/`) that coordinates all services and handles incoming diagnostic requests +- **LLM Server**: Text generation service (`sre_agent/llm/`) supporting multiple AI providers (Anthropic, OpenAI, Gemini, Ollama) +- **Llama Firewall**: Security layer (`sre_agent/firewall/`) using Meta's Llama Prompt Guard for content validation +- **MCP Servers**: + - Kubernetes MCP (`sre_agent/servers/mcp-server-kubernetes/`) - TypeScript/Node.js K8s operations + - GitHub MCP (`sre_agent/servers/github/`) - TypeScript/Node.js repository operations + - Slack MCP (`sre_agent/servers/slack/`) - TypeScript/Node.js team notifications + - Prompt Server MCP (`sre_agent/servers/prompt_server/`) - Python structured prompts + +### Key Technologies +- **Languages**: Python 3.12+ (core services), TypeScript/Node.js (MCP servers) +- **Communication**: Model Context Protocol (MCP) with Server-Sent Events (SSE) transport +- **Infrastructure**: Docker Compose, AWS EKS deployment, GCP GKE deployment +- **AI/ML**: Multiple LLM providers, Hugging Face transformers + +### LLM Provider Support +- **Anthropic**: Claude models (API key required) +- **Google Gemini**: Gemini models (API key required) +- **Ollama**: Local LLM inference (no API key, privacy-focused) +- **OpenAI**: Placeholder (not yet implemented) +- **Self-hosted**: Placeholder (not yet implemented) + +## Common Development Commands + +### Project Setup +```bash +make project-setup # Install uv, create venv, install pre-commit hooks +``` + +### Code Quality +```bash +make check # Run linting, pre-commit hooks, and lock file check +make tests # Run pytest with coverage +make license-check # Verify dependency licences +``` + +### Service Management +```bash +# Local development - AWS +docker compose -f compose.aws.yaml up --build + +# Local development - GCP +docker compose -f compose.gcp.yaml up --build + +# Production with ECR images +docker compose -f compose.ecr.yaml up + +# Production with GAR images (Google) +docker compose -f compose.gar.yaml up + +# Test environment +docker compose -f compose.tests.yaml up +``` + +### Testing +```bash +# All tests +make tests + +# Specific test file +uv run python -m pytest tests/unit_tests/test_adapters.py + +# Specific test function +uv run python -m pytest tests/unit_tests/test_adapters.py::test_specific_function + +# With coverage +uv run python -m pytest --cov --cov-config=pyproject.toml --cov-report=xml + +# Security tests only +uv run python -m pytest tests/security_tests/ +``` + +## Configuration + +### Environment Variables Required +- `DEV_BEARER_TOKEN`: API authentication for the orchestrator +- `ANTHROPIC_API_KEY`: Claude API access (for Anthropic models) +- `GEMINI_API_KEY`: Google Gemini API access (for Gemini models) +- `OLLAMA_API_URL`: Ollama API endpoint (for local LLM inference, default: http://localhost:11434) +- `GITHUB_PERSONAL_ACCESS_TOKEN`: GitHub integration +- `SLACK_BOT_TOKEN`, `SLACK_TEAM_ID`, `CHANNEL_ID`: Slack notifications +- `AWS_REGION`, `TARGET_EKS_CLUSTER_NAME`: AWS EKS cluster access +- `GCP_PROJECT_ID`, `TARGET_GKE_CLUSTER_NAME`, `GKE_ZONE`: GCP GKE cluster access +- `HF_TOKEN`: Hugging Face model access + +### Cloud Platform Setup +- **AWS**: Credentials must be available at `~/.aws/credentials` for EKS cluster access +- **GCP**: Use `gcloud auth login` and `gcloud config set project YOUR_PROJECT_ID` for GKE access + +### Ollama Setup (Local LLM) +- **Install**: Visit [ollama.ai](https://ollama.ai) and follow installation instructions +- **Start**: Run `ollama serve` in your terminal +- **Models**: Download models like `ollama pull llama3.1` +- **Benefits**: Privacy-focused, no API costs, offline capable + +### Credential Setup Script +Use the interactive setup script for easy configuration: +```bash +python setup_credentials.py +# or with platform selection +python setup_credentials.py --platform aws +python setup_credentials.py --platform gcp +``` + +## Service Architecture Details + +### Communication Flow +1. Orchestrator receives `/diagnose` requests on port 8003 +2. Requests pass through Llama Firewall for security validation +3. LLM Server processes AI reasoning (using Anthropic, Gemini, or Ollama) +4. MCP servers handle tool operations (K8s, GitHub, Slack) +5. Results reported back via Slack notifications + +### Health Checks +All services implement health monitoring accessible via `/health` endpoints. + +## Development Patterns + +### MCP Integration +All external tool interactions use the Model Context Protocol standard. When adding new tools: +- Follow existing MCP server patterns in `sre_agent/servers/` +- Implement SSE transport for real-time communication +- Add health check endpoints + +### Security Considerations +- All requests pass through Llama Firewall validation +- Bearer token authentication required for API access +- Input validation at multiple service layers +- No secrets in code - use environment variables + +**IMPORTANT: Never commit the .env file!** +- The `.env` file contains sensitive credentials (API keys, tokens, secrets) +- It is included in `.gitignore` and should never be committed to the repository +- Use `python setup_credentials.py` to generate the `.env` file locally +- Each developer/environment needs their own `.env` file with appropriate credentials +- For production deployments, use proper secret management (AWS Secrets Manager, K8s secrets, etc.) + +### Code Style +- **Language**: Use British English spelling throughout (e.g., "specialised", "organised", "recognised") +- **Python**: Uses ruff, black, mypy for formatting and type checking +- **TypeScript**: Standard TypeScript/Node.js conventions +- **Line length**: 88 characters +- **Docstrings**: Google-style docstrings for Python +- **Type checking**: Strict type checking enabled + +### British English Spelling Guidelines +The project uses British English spelling. Common differences from American English: +- **-ise/-ize**: Use "-ise" endings (e.g., "organise", "recognise", "specialise") +- **-our/-or**: Use "-our" endings (e.g., "colour", "honour", "behaviour") +- **-re/-er**: Use "-re" endings (e.g., "centre", "metre", "theatre") +- **-ence/-ense**: Use "-ence" endings (e.g., "defence", "licence" as noun) +- **-yse/-yze**: Use "-yse" endings (e.g., "analyse", "paralyse") + +**Examples in SRE context:** +- "optimise" (not "optimize") +- "customise" (not "customize") +- "analyse logs" (not "analyze logs") +- "centralised monitoring" (not "centralized monitoring") +- "behaviour analysis" (not "behavior analysis") + +## Workspace Structure +This is a uv workspace with members: +- `sre_agent/llm`: LLM service with multi-provider support +- `sre_agent/client`: FastAPI orchestrator service +- `sre_agent/servers/prompt_server`: Python MCP server for structured prompts +- `sre_agent/firewall`: Llama Prompt Guard security layer +- `sre_agent/shared`: Shared utilities and schemas + +Each Python service has its own `pyproject.toml`. TypeScript MCP servers use `package.json`: +- `sre_agent/servers/mcp-server-kubernetes/`: Kubernetes operations (Node.js/TypeScript) +- `sre_agent/servers/github/`: GitHub API integration (Node.js/TypeScript) +- `sre_agent/servers/slack/`: Slack notifications (Node.js/TypeScript) + +## API Usage + +### Primary Endpoint +```bash +POST http://localhost:8003/diagnose +Authorization: Bearer +Content-Type: application/json +{"text": ""} +``` + +### Health Check +```bash +GET http://localhost:8003/health +``` + +## Deployment +- **Local**: Docker Compose with local builds (AWS: `compose.aws.yaml`, GCP: `compose.gcp.yaml`) +- **Production AWS**: ECR-based images on AWS EKS (`compose.ecr.yaml`) +- **Production GCP**: GAR-based images on GCP GKE (`compose.gar.yaml`) +- See [EKS Deployment](https://github.com/fuzzylabs/sre-agent-deployment) for cloud deployment examples + +## TypeScript MCP Server Development +For TypeScript MCP servers in `sre_agent/servers/`: + +### Building and Testing +```bash +# Kubernetes MCP server +cd sre_agent/servers/mcp-server-kubernetes +npm run build # Build TypeScript +npm run test # Run vitest tests +npm run dev # Watch mode + +# GitHub/Slack MCP servers +cd sre_agent/servers/github # or /slack +npm run build +npm run watch # Watch mode +``` \ No newline at end of file