Skip to content

Commit a9b61b4

Browse files
committed
.
1 parent b923773 commit a9b61b4

File tree

2 files changed

+174
-57
lines changed

2 files changed

+174
-57
lines changed

dockerize/Caddyfile

Lines changed: 76 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,57 +1,78 @@
1-
# Caddyfile for MONAI Breast Density Classification
2-
# Simple reverse proxy to auth-gateway service
3-
4-
# Main site configuration
5-
:23434 {
6-
# TLS configuration using provided certificates
7-
tls /etc/ssl/certs/fullchain.pem /etc/ssl/private/privkey.pem
8-
9-
# Reverse proxy all requests to auth-gateway
10-
reverse_proxy auth-gateway:8090 {
11-
# Health check for upstream
12-
health_uri /health
13-
health_interval 30s
14-
health_timeout 10s
15-
16-
# Forward original client information
17-
header_up Host {host}
18-
header_up X-Real-IP {remote_host}
19-
header_up X-Forwarded-For {remote_host}
20-
header_up X-Forwarded-Proto {scheme}
21-
header_up X-Forwarded-Port {server_port}
22-
}
23-
24-
# Request/response logging for debugging (optional)
25-
log {
26-
output stdout
27-
format console
28-
level INFO
29-
}
30-
31-
# Security headers
32-
header {
33-
# Remove server identification
34-
-Server
35-
# Basic security headers
36-
X-Content-Type-Options nosniff
37-
X-Frame-Options DENY
38-
X-XSS-Protection "1; mode=block"
39-
Referrer-Policy strict-origin-when-cross-origin
40-
}
41-
42-
# Handle specific endpoints with better error pages
43-
handle_errors {
44-
@502 expression {http.error.status_code} == 502
45-
@503 expression {http.error.status_code} == 503
46-
@504 expression {http.error.status_code} == 504
47-
48-
respond @502 "Service temporarily unavailable - auth-gateway not ready" 502
49-
respond @503 "Service temporarily unavailable - please try again" 503
50-
respond @504 "Service timeout - request took too long" 504
51-
}
1+
{
2+
debug
3+
order claive_reverse_proxy first
4+
log {
5+
output stdout
6+
format console
7+
level DEBUG
8+
}
529
}
5310

54-
# Optional: Redirect HTTP to HTTPS if needed
55-
# :80 {
56-
# redir https://{host}:23434{uri} permanent
57-
# }
11+
# HTTP to HTTPS redirect (eliminates TLS handshake errors)
12+
fetchai.scrtlabs.com:80 {
13+
redir https://fetchai.scrtlabs.com:23434{uri} permanent
14+
}
15+
16+
# Main HTTPS site configuration for fetchai.scrtlabs.com
17+
fetchai.scrtlabs.com:23434 {
18+
# TLS configuration using provided certificates
19+
tls /etc/ssl/certs/fullchain.pem /etc/ssl/private/privkey.pem
20+
21+
@cors_preflight method OPTIONS
22+
handle @cors_preflight {
23+
header {
24+
Access-Control-Allow-Origin "{header.origin}"
25+
Access-Control-Allow-Methods "GET, POST, PUT, PATCH, DELETE, OPTIONS"
26+
Access-Control-Allow-Headers "Content-Type, Authorization, Referrer-Policy, priority, sec-ch-ua, sec-ch-ua-mobile, sec-ch-ua-platform"
27+
Access-Control-Allow-Credentials "true"
28+
Vary Origin
29+
Access-Control-Max-Age "3600"
30+
}
31+
respond "" 204
32+
}
33+
34+
handle {
35+
header {
36+
Access-Control-Allow-Origin "{header.origin}"
37+
Access-Control-Allow-Headers "Content-Type, Authorization, Referrer-Policy, priority, sec-ch-ua, sec-ch-ua-mobile, sec-ch-ua-platform"
38+
Access-Control-Allow-Credentials "true"
39+
Vary Origin
40+
}
41+
claive_reverse_proxy {
42+
API_MASTER_KEY bWFzdGVyQHNjcnRsYWJzLmNvbTpTZWNyZXROZXR3b3JrTWFzdGVyS2V5X18yMDI1
43+
}
44+
reverse_proxy auth-gateway:8090 {
45+
# Health check for upstream
46+
health_uri /health
47+
health_interval 30s
48+
health_timeout 10s
49+
50+
# Forward original client information
51+
header_up Host {host}
52+
header_up X-Real-IP {remote_host}
53+
header_up X-Forwarded-Port {server_port}
54+
}
55+
}
56+
57+
# Security headers
58+
header {
59+
# Remove server identification
60+
-Server
61+
# Basic security headers
62+
X-Content-Type-Options nosniff
63+
X-Frame-Options DENY
64+
X-XSS-Protection "1; mode=block"
65+
Referrer-Policy strict-origin-when-cross-origin
66+
}
67+
68+
# Handle specific endpoints with better error pages
69+
handle_errors {
70+
@502 expression {http.error.status_code} == 502
71+
@503 expression {http.error.status_code} == 503
72+
@504 expression {http.error.status_code} == 504
73+
74+
respond @502 "Service temporarily unavailable - auth-gateway not ready" 502
75+
respond @503 "Service temporarily unavailable - please try again" 503
76+
respond @504 "Service timeout - request took too long" 504
77+
}
78+
}

dockerize/docker-compose.yaml

Lines changed: 98 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
services:
22
# Caddy Reverse Proxy - Single Entry Point
33
caddy:
4-
image: caddy:2.7-alpine
4+
# image: caddy:2.7-alpine
5+
image: secret-ai-caddy:latest
56
container_name: caddy-proxy
67

78
# Only port exposed to host
@@ -72,10 +73,16 @@ services:
7273
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
7374
- LOCAL_EXECUTION=true
7475
- PYTHONUNBUFFERED=1
76+
- OLLAMA_URL=http://ollama:11434
7577

7678
# NO EXTERNAL PORTS - Internal network only
7779
# ports: removed - all communication goes through Caddy
7880

81+
# Wait for Ollama to be ready before starting TorchServe
82+
depends_on:
83+
ollama-init:
84+
condition: service_completed_successfully
85+
7986
# Volume mounts
8087
volumes:
8188
# Mount the specific MAR file directly into the model store
@@ -129,6 +136,7 @@ services:
129136

130137
environment:
131138
- TORCHSERVE_URL=http://torchserve:8085
139+
- OLLAMA_URL=http://ollama:11434
132140
- PYTHONUNBUFFERED=1
133141

134142
# NO EXTERNAL PORTS - Internal network only
@@ -164,6 +172,94 @@ services:
164172
- torchserve-tokens:/app/tokens:ro
165173
- shared-workspace:/app/workspace:rw
166174

175+
# Ollama Model Initialization
176+
ollama-init:
177+
image: alpine:latest
178+
container_name: ollama-init
179+
180+
# Wait for ollama to be healthy before running
181+
depends_on:
182+
ollama:
183+
condition: service_healthy
184+
185+
# Install curl and pull the model using Ollama API
186+
command: >
187+
sh -c "
188+
apk add --no-cache curl &&
189+
echo 'Checking if model exists...' &&
190+
if ! curl -s http://ollama:11434/api/tags | grep -q 'thewindmom/llama3-med42-70b'; then
191+
echo 'Pulling model thewindmom/llama3-med42-70b:latest...' &&
192+
curl -X POST http://ollama:11434/api/pull -H 'Content-Type: application/json' -d '{\"name\": \"thewindmom/llama3-med42-70b:latest\"}' &&
193+
echo 'Model pull request sent successfully' &&
194+
echo 'Waiting for model to be fully loaded...' &&
195+
sleep 60
196+
else
197+
echo 'Model already exists, skipping pull'
198+
fi
199+
"
200+
201+
# Only run once - don't restart
202+
restart: "no"
203+
204+
networks:
205+
- monai-network
206+
207+
# Ollama Service
208+
ollama:
209+
image: ollama/ollama:latest
210+
container_name: ollama
211+
privileged: true
212+
213+
# GPU support for Ollama
214+
deploy:
215+
resources:
216+
reservations:
217+
devices:
218+
- driver: nvidia
219+
count: all
220+
capabilities: [gpu]
221+
limits:
222+
memory: 32G
223+
224+
runtime: nvidia
225+
environment:
226+
- NVIDIA_VISIBLE_DEVICES=all
227+
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
228+
- OLLAMA_HOST=0.0.0.0
229+
- OLLAMA_ORIGINS=*
230+
- OLLAMA_KEEP_ALIVE=-1
231+
- OLLAMA_DEBUG=1
232+
233+
# NO EXTERNAL PORTS - Internal network only
234+
# ports: removed - all communication goes through Caddy or auth-gateway
235+
236+
# Volume mounts for model storage
237+
volumes:
238+
# Mount host directory for persistent model storage
239+
- /mnt/secure/.ollama:/root/.ollama
240+
- shared-workspace:/workspace:rw
241+
242+
# Health check
243+
healthcheck:
244+
test: ["CMD", "ollama", "list"]
245+
interval: 30s
246+
timeout: 10s
247+
start_period: 60s
248+
retries: 3
249+
250+
# Restart policy
251+
restart: unless-stopped
252+
253+
# Logging configuration
254+
logging:
255+
driver: "json-file"
256+
options:
257+
max-size: "100m"
258+
max-file: "5"
259+
260+
networks:
261+
- monai-network
262+
167263
# Networks
168264
networks:
169265
monai-network:
@@ -181,4 +277,4 @@ volumes:
181277
caddy-data:
182278
driver: local
183279
caddy-config:
184-
driver: local
280+
driver: local

0 commit comments

Comments
 (0)