Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 43 additions & 22 deletions docker/Dockerfile.nightly_torch
Original file line number Diff line number Diff line change
Expand Up @@ -228,58 +228,79 @@ COPY --from=base /workspace/torch_build_versions.txt ./torch_build_versions.txt
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --system $(cat torch_build_versions.txt | xargs) --index-url https://download.pytorch.org/whl/nightly/cu128

# install the vllm wheel
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/vllm-dist \
--mount=type=cache,target=/root/.cache/uv \
uv pip install --system vllm-dist/*.whl --verbose
# Install apache-tvm-ffi first (required by flashinfer)
# TODO: remove apache-tvm-ffi once FlashInfer is fixed https://github.com/flashinfer-ai/flashinfer/issues/1962
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --system --prerelease=allow apache-tvm-ffi==0.1.0b15

# install xformers again for the new environment
# install xformers for the new environment
RUN --mount=type=bind,from=base,src=/workspace/xformers-dist,target=/vllm-workspace/xformers-dist \
--mount=type=cache,target=/root/.cache/uv \
uv pip install --system /vllm-workspace/xformers-dist/*.whl --verbose

# Install common dependencies (needed by flashinfer.aot build process)
COPY requirements/common.txt requirements/common.txt
COPY use_existing_torch.py use_existing_torch.py
COPY pyproject.toml pyproject.toml
RUN python3 use_existing_torch.py
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --system -r requirements/common.txt

ARG torch_cuda_arch_list='8.0;8.6;8.9;9.0'
# FlashInfer requires space-separated arch list (different from torch's semicolon format)
# CUDA 12.8 doesn't support sm_120 (cvt.e2m1x2 instruction not available)
# Excluding only 12.0 - sm_100a (Blackwell) compiles successfully
ARG flashinfer_cuda_arch_list='8.0 8.6 8.9 9.0a 10.0a'

# install package for build flashinfer
# install packages for building flashinfer
# Versions must match FlashInfer's pyproject.toml build requirements
# see issue: https://github.com/flashinfer-ai/flashinfer/issues/738
RUN pip install setuptools==75.6.0 packaging==23.2 ninja==1.11.1.3 build==1.2.2.post1

RUN pip install 'setuptools>=77' 'packaging>=24' ninja==1.11.1.3 build==1.2.2.post1

# build flashinfer for torch nightly from source around 10 mins
# IMPORTANT: Build flashinfer BEFORE installing vllm wheel, so uv doesn't download
# a prebuilt flashinfer that's incompatible with torch nightly
# release version: v0.4.1
# todo(elainewy): cache flashinfer build result for faster build
ENV CCACHE_DIR=/root/.cache/ccache
# Build and install flashinfer in one step (wheel must exist when installing)
RUN --mount=type=cache,target=/root/.cache/ccache \
--mount=type=cache,target=/root/.cache/uv \
echo "git clone flashinfer..." \
&& git clone --recursive https://github.com/flashinfer-ai/flashinfer.git \
&& cd flashinfer \
&& git checkout v0.4.1\
&& git checkout v0.4.1 \
&& git submodule update --init --recursive \
&& echo "finish git clone flashinfer..." \
&& rm -rf build \
&& export TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list} \
&& FLASHINFER_ENABLE_AOT=1 python3 setup.py bdist_wheel --dist-dir=../flashinfer-dist --verbose \
&& export FLASHINFER_CUDA_ARCH_LIST="${flashinfer_cuda_arch_list}" \
&& export FLASHINFER_EXTRA_CUDAFLAGS="--ptxas-options=-w,--allow-expensive-optimizations=true" \
&& echo "Building FlashInfer with TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST}" \
&& echo "Building FlashInfer with FLASHINFER_CUDA_ARCH_LIST=${FLASHINFER_CUDA_ARCH_LIST}" \
&& echo "Building FlashInfer with FLASHINFER_EXTRA_CUDAFLAGS=${FLASHINFER_EXTRA_CUDAFLAGS}" \
&& echo "Running AOT compilation..." \
&& python3 -m flashinfer.aot \
&& echo "AOT compilation completed successfully!" \
&& echo "Building wheel with python build module..." \
&& python3 -m build --no-isolation --wheel --outdir /tmp/flashinfer-dist . \
&& echo "Wheel build completed! Contents:" \
&& ls -lah /tmp/flashinfer-dist/ \
&& echo "Installing flashinfer wheel..." \
&& uv pip install --system --prerelease=allow /tmp/flashinfer-dist/*.whl --verbose \
&& cd .. \
&& rm -rf flashinfer

# install flashinfer
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --system flashinfer-dist/*.whl --verbose

# install common packages
COPY requirements/common.txt requirements/common.txt
COPY use_existing_torch.py use_existing_torch.py
COPY pyproject.toml pyproject.toml
# install the vllm wheel (flashinfer already installed, so uv won't download it)
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/vllm-dist \
--mount=type=cache,target=/root/.cache/uv \
uv pip install --system --prerelease=allow vllm-dist/*.whl --verbose

# Copy examples and benchmarks (common.txt already installed earlier)
COPY examples examples
COPY benchmarks benchmarks
COPY ./vllm/collect_env.py .

RUN python3 use_existing_torch.py
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --system -r requirements/common.txt

################### VLLM INSTALLED IMAGE ####################


Expand Down