.

alexh-scrt · alexh-scrt · commit a9b61b482f62 · 2025-06-26T00:08:38.000Z
diff --git a/dockerize/Caddyfile b/dockerize/Caddyfile
@@ -1,57 +1,78 @@
-# Caddyfile for MONAI Breast Density Classification
-# Simple reverse proxy to auth-gateway service
-
-# Main site configuration
-:23434 {
-    # TLS configuration using provided certificates
-    tls /etc/ssl/certs/fullchain.pem /etc/ssl/private/privkey.pem
-
-    # Reverse proxy all requests to auth-gateway
-    reverse_proxy auth-gateway:8090 {
-        # Health check for upstream
-        health_uri /health
-        health_interval 30s
-        health_timeout 10s
-        
-        # Forward original client information
-        header_up Host {host}
-        header_up X-Real-IP {remote_host}
-        header_up X-Forwarded-For {remote_host}
-        header_up X-Forwarded-Proto {scheme}
-        header_up X-Forwarded-Port {server_port}
-    }
-
-    # Request/response logging for debugging (optional)
-    log {
-        output stdout
-        format console
-        level INFO
-    }
-
-    # Security headers
-    header {
-        # Remove server identification
-        -Server
-        # Basic security headers
-        X-Content-Type-Options nosniff
-        X-Frame-Options DENY
-        X-XSS-Protection "1; mode=block"
-        Referrer-Policy strict-origin-when-cross-origin
-    }
-
-    # Handle specific endpoints with better error pages
-    handle_errors {
-        @502 expression {http.error.status_code} == 502
-        @503 expression {http.error.status_code} == 503
-        @504 expression {http.error.status_code} == 504
-        
-        respond @502 "Service temporarily unavailable - auth-gateway not ready" 502
-        respond @503 "Service temporarily unavailable - please try again" 503
-        respond @504 "Service timeout - request took too long" 504
-    }
+{
+	debug
+	order claive_reverse_proxy first
+	log {
+		output stdout
+		format console
+		level DEBUG
+	}
 }
 
-# Optional: Redirect HTTP to HTTPS if needed
-# :80 {
-#     redir https://{host}:23434{uri} permanent
-# }
+# HTTP to HTTPS redirect (eliminates TLS handshake errors)
+fetchai.scrtlabs.com:80 {
+	redir https://fetchai.scrtlabs.com:23434{uri} permanent
+}
+
+# Main HTTPS site configuration for fetchai.scrtlabs.com
+fetchai.scrtlabs.com:23434 {
+	# TLS configuration using provided certificates
+	tls /etc/ssl/certs/fullchain.pem /etc/ssl/private/privkey.pem
+
+	@cors_preflight method OPTIONS
+	handle @cors_preflight {
+		header {
+			Access-Control-Allow-Origin "{header.origin}"
+			Access-Control-Allow-Methods "GET, POST, PUT, PATCH, DELETE, OPTIONS"
+			Access-Control-Allow-Headers "Content-Type, Authorization, Referrer-Policy, priority, sec-ch-ua, sec-ch-ua-mobile, sec-ch-ua-platform"
+			Access-Control-Allow-Credentials "true"
+			Vary Origin
+			Access-Control-Max-Age "3600"
+		}
+		respond "" 204
+	}
+
+	handle {
+		header {
+			Access-Control-Allow-Origin "{header.origin}"
+			Access-Control-Allow-Headers "Content-Type, Authorization, Referrer-Policy, priority, sec-ch-ua, sec-ch-ua-mobile, sec-ch-ua-platform"
+			Access-Control-Allow-Credentials "true"
+			Vary Origin
+		}
+		claive_reverse_proxy {
+			API_MASTER_KEY bWFzdGVyQHNjcnRsYWJzLmNvbTpTZWNyZXROZXR3b3JrTWFzdGVyS2V5X18yMDI1
+		}
+		reverse_proxy auth-gateway:8090 {
+			# Health check for upstream
+			health_uri /health
+			health_interval 30s
+			health_timeout 10s
+
+			# Forward original client information
+			header_up Host {host}
+			header_up X-Real-IP {remote_host}
+			header_up X-Forwarded-Port {server_port}
+		}
+	}
+
+	# Security headers
+	header {
+		# Remove server identification
+		-Server
+		# Basic security headers
+		X-Content-Type-Options nosniff
+		X-Frame-Options DENY
+		X-XSS-Protection "1; mode=block"
+		Referrer-Policy strict-origin-when-cross-origin
+	}
+
+	# Handle specific endpoints with better error pages
+	handle_errors {
+		@502 expression {http.error.status_code} == 502
+		@503 expression {http.error.status_code} == 503
+		@504 expression {http.error.status_code} == 504
+
+		respond @502 "Service temporarily unavailable - auth-gateway not ready" 502
+		respond @503 "Service temporarily unavailable - please try again" 503
+		respond @504 "Service timeout - request took too long" 504
+	}
+}
diff --git a/dockerize/docker-compose.yaml b/dockerize/docker-compose.yaml
@@ -1,7 +1,8 @@
 services:
   # Caddy Reverse Proxy - Single Entry Point
   caddy:
-    image: caddy:2.7-alpine
+    # image: caddy:2.7-alpine 
+    image: secret-ai-caddy:latest
     container_name: caddy-proxy
     
     # Only port exposed to host
@@ -72,10 +73,16 @@ services:
       - NVIDIA_DRIVER_CAPABILITIES=compute,utility
       - LOCAL_EXECUTION=true
       - PYTHONUNBUFFERED=1
+      - OLLAMA_URL=http://ollama:11434
     
     # NO EXTERNAL PORTS - Internal network only
     # ports: removed - all communication goes through Caddy
     
+    # Wait for Ollama to be ready before starting TorchServe
+    depends_on:
+      ollama-init:
+        condition: service_completed_successfully
+    
     # Volume mounts
     volumes:
       # Mount the specific MAR file directly into the model store
@@ -129,6 +136,7 @@ services:
     
     environment:
       - TORCHSERVE_URL=http://torchserve:8085
+      - OLLAMA_URL=http://ollama:11434
       - PYTHONUNBUFFERED=1
     
     # NO EXTERNAL PORTS - Internal network only
@@ -164,6 +172,94 @@ services:
       - torchserve-tokens:/app/tokens:ro
       - shared-workspace:/app/workspace:rw
 
+  # Ollama Model Initialization
+  ollama-init:
+    image: alpine:latest
+    container_name: ollama-init
+    
+    # Wait for ollama to be healthy before running
+    depends_on:
+      ollama:
+        condition: service_healthy
+    
+    # Install curl and pull the model using Ollama API
+    command: >
+      sh -c "
+        apk add --no-cache curl &&
+        echo 'Checking if model exists...' &&
+        if ! curl -s http://ollama:11434/api/tags | grep -q 'thewindmom/llama3-med42-70b'; then
+          echo 'Pulling model thewindmom/llama3-med42-70b:latest...' &&
+          curl -X POST http://ollama:11434/api/pull -H 'Content-Type: application/json' -d '{\"name\": \"thewindmom/llama3-med42-70b:latest\"}' &&
+          echo 'Model pull request sent successfully' &&
+          echo 'Waiting for model to be fully loaded...' &&
+          sleep 60
+        else
+          echo 'Model already exists, skipping pull'
+        fi
+      "
+    
+    # Only run once - don't restart
+    restart: "no"
+    
+    networks:
+      - monai-network
+
+  # Ollama Service
+  ollama:
+    image: ollama/ollama:latest
+    container_name: ollama
+    privileged: true
+    
+    # GPU support for Ollama
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
+        limits:
+          memory: 32G
+
+    runtime: nvidia
+    environment:
+      - NVIDIA_VISIBLE_DEVICES=all
+      - NVIDIA_DRIVER_CAPABILITIES=compute,utility
+      - OLLAMA_HOST=0.0.0.0
+      - OLLAMA_ORIGINS=*
+      - OLLAMA_KEEP_ALIVE=-1
+      - OLLAMA_DEBUG=1
+    
+    # NO EXTERNAL PORTS - Internal network only
+    # ports: removed - all communication goes through Caddy or auth-gateway
+    
+    # Volume mounts for model storage
+    volumes:
+      # Mount host directory for persistent model storage
+      - /mnt/secure/.ollama:/root/.ollama
+      - shared-workspace:/workspace:rw
+    
+    # Health check
+    healthcheck:
+      test: ["CMD", "ollama", "list"]
+      interval: 30s
+      timeout: 10s
+      start_period: 60s
+      retries: 3
+    
+    # Restart policy
+    restart: unless-stopped
+    
+    # Logging configuration
+    logging:
+      driver: "json-file"
+      options:
+        max-size: "100m"
+        max-file: "5"
+
+    networks:
+      - monai-network
+
 # Networks
 networks:
   monai-network:
@@ -181,4 +277,4 @@ volumes:
   caddy-data:
     driver: local
   caddy-config:
-    driver: local
+    driver: local