1- FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS builder
2- ARG CPU_ONLY=false
1+ FROM python:3.12-slim-bookworm
32
4- WORKDIR /app
3+ # Install system dependencies
4+ RUN apt-get update && apt-get install -y --no-install-recommends \
5+ libgl1 libglib2.0-0 curl wget git procps \
6+ && rm -rf /var/lib/apt/lists/*
57
6- # Install build dependencies
7- RUN apt-get update && \
8- apt-get install -y --no-install-recommends libgl1 libglib2.0-0 && \
9- rm -rf /var/lib/apt/lists/*
8+ # Copy UV from official image
9+ COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
1010
11- # Enable bytecode compilation and set proper link mode for cache mounting
1211ENV UV_COMPILE_BYTECODE=1 \
1312 UV_LINK_MODE=copy \
14- HF_HOME=/app/.cache/huggingface \
15- TORCH_HOME=/app/.cache/torch \
16- PYTHONPATH=/app \
13+ UV_SYSTEM_PYTHON=1 \
14+ HF_HOME=/tmp/ \
15+ TORCH_HOME=/tmp/ \
1716 OMP_NUM_THREADS=4
1817
19- # Copy dependency files and README
20- COPY pyproject.toml uv.lock README.md ./
18+ WORKDIR /app
2119
22- # Install dependencies but not the project itself
20+ RUN echo "# Docling API" > README.md
21+
22+ # Install dependencies first (for better layer caching)
23+ COPY pyproject.toml uv.lock ./
2324RUN --mount=type=cache,target=/root/.cache/uv \
2425 uv sync --frozen --no-install-project
2526
26- # Copy the rest of the project
27- COPY . .
28-
29- # Better GPU detection: Check both architecture and if NVIDIA is available
30- RUN ARCH=$(uname -m) && \
31- if [ "$CPU_ONLY" = "true" ] || [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ] || ! command -v nvidia-smi >/dev/null 2>&1; then \
32- USE_GPU=false; \
33- else \
34- USE_GPU=true; \
35- fi && \
36- echo "Detected GPU availability: $USE_GPU" && \
37- # For PyTorch installation with architecture detection
38- uv pip uninstall -y torch torchvision torchaudio || true && \
39- if [ "$USE_GPU" = "false" ]; then \
40- # For CPU or ARM architectures or no NVIDIA
41- echo "Installing PyTorch for CPU" && \
42- uv pip install --no-cache-dir torch torchvision --extra-index-url https://download.pytorch.org/whl/cpu; \
27+ ARG CPU_ONLY=false
28+ RUN if [ "$CPU_ONLY" = "true" ]; then \
29+ uv pip install --system --no-cache-dir torch torchvision --extra-index-url https://download.pytorch.org/whl/cpu; \
4330 else \
44- # For x86_64 with GPU support
45- echo "Installing PyTorch with CUDA support" && \
46- uv pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121; \
31+ uv pip install --system --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121; \
4732 fi
4833
49- # Install the project in non-editable mode
34+ # Install required packages
5035RUN --mount=type=cache,target=/root/.cache/uv \
51- uv sync --frozen --no-editable
52-
53- # Download models for the pipeline
54- RUN uv run python -c "from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline; artifacts_path = StandardPdfPipeline.download_models_hf(force=True)"
36+ uv pip install --system --no-cache-dir docling easyocr
5537
56- # Pre-download EasyOCR models with better GPU detection
57- RUN ARCH=$(uname -m) && \
58- if [ "$CPU_ONLY" = "true" ] || [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ] || ! command -v nvidia-smi >/dev/null 2>&1; then \
59- echo "Downloading EasyOCR models for CPU" && \
60- uv run python -c "import easyocr; reader = easyocr.Reader(['fr', 'de', 'es', 'en', 'it', 'pt'], gpu=False); print('EasyOCR CPU models downloaded successfully')" ; \
61- else \
62- echo "Downloading EasyOCR models with GPU support" && \
63- uv run python -c "import easyocr; reader = easyocr.Reader(['fr', 'de', 'es', 'en', 'it', 'pt'], gpu=True); print('EasyOCR GPU models downloaded successfully')" ; \
64- fi
65-
66- # Production stage
67- FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim
68- WORKDIR /app
69-
70- # Install runtime dependencies
71- RUN apt-get update && \
72- apt-get install -y --no-install-recommends redis-server libgl1 libglib2.0-0 curl && \
73- rm -rf /var/lib/apt/lists/*
38+ # Download models in a single step
39+ RUN python -c 'from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline; \
40+ from easyocr import Reader; \
41+ artifacts_path = StandardPdfPipeline.download_models_hf(force=True); \
42+ reader = Reader(["fr", "de", "es", "en", "it", "pt"], gpu=True); \
43+ print("Models downloaded successfully")'
7444
75- # Set environment variables
76- ENV HF_HOME=/app/.cache/huggingface \
77- TORCH_HOME=/app/.cache/torch \
78- PYTHONPATH=/app \
79- OMP_NUM_THREADS=4 \
80- UV_COMPILE_BYTECODE=1
81-
82- # Create a non-root user
83- RUN useradd --create-home app && \
84- mkdir -p /app && \
85- chown -R app:app /app /tmp
86-
87- # Copy the virtual environment from the builder stage
88- COPY --from=builder --chown=app:app /app/.venv /app/.venv
89- ENV PATH="/app/.venv/bin:$PATH"
45+ # Copy the application code
46+ COPY . .
9047
91- # Copy necessary files for the application
92- COPY --chown=app:app . .
48+ # Final dependency sync
49+ RUN --mount=type=cache,target=/root/.cache/uv uv sync --frozen
9350
94- # Switch to non-root user
95- USER app
51+ # Remove cache to save space
52+ RUN rm -rf /root/.cache/uv
9653
9754EXPOSE 8080
98- CMD ["uvicorn" , "main:app" , "--port" , "8080" , "--host" , "0.0.0.0" ]
55+
56+ CMD ["uv" , "run" , "uvicorn" , "--port" , "8080" , "--host" , "0.0.0.0" , "main:app" ]
0 commit comments