1- FROM intel/oneapi-basekit:2024.1.1-devel-ubuntu22.04
1+ # First stage: build oneccl
2+ FROM intel/oneapi-basekit:2025.0.1-0-devel-ubuntu22.04 AS build
3+
4+ ARG http_proxy
5+ ARG https_proxy
6+
7+ ENV TZ=Asia/Shanghai
8+ ENV PYTHONUNBUFFERED=1
9+
10+ ARG PIP_NO_CACHE_DIR=false
11+
12+ ADD ./ccl_torch.patch /tmp/
13+
14+ RUN apt-get update && \
15+ apt-get install -y --no-install-recommends curl wget git libunwind8-dev vim less && \
16+ ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone && \
17+ env DEBIAN_FRONTEND=noninteractive apt-get update && \
18+ # add-apt-repository requires gnupg, gpg-agent, software-properties-common
19+ apt-get install -y --no-install-recommends gnupg gpg-agent software-properties-common && \
20+ # Add Python 3.11 PPA repository
21+ add-apt-repository ppa:deadsnakes/ppa -y && \
22+ apt-get install -y --no-install-recommends python3.11 git curl wget && \
23+ rm /usr/bin/python3 && \
24+ ln -s /usr/bin/python3.11 /usr/bin/python3 && \
25+ ln -s /usr/bin/python3 /usr/bin/python && \
26+ apt-get install -y --no-install-recommends python3-pip python3.11-dev python3-wheel python3.11-distutils && \
27+ wget https://bootstrap.pypa.io/get-pip.py -O get-pip.py && \
28+ # Install FastChat from source requires PEP 660 support
29+ python3 get-pip.py && \
30+ rm get-pip.py && \
31+ pip install --upgrade requests argparse urllib3 && \
32+ apt-get install -y --no-install-recommends libfabric-dev wrk libaio-dev numactl && \
33+ # If we do not install this compute-runtime, we will fail the build later
34+ mkdir -p /tmp/neo && \
35+ cd /tmp/neo && \
36+ wget https://github.com/intel/intel-graphics-compiler/releases/download/v2.5.6/intel-igc-core-2_2.5.6+18417_amd64.deb && \
37+ wget https://github.com/intel/intel-graphics-compiler/releases/download/v2.5.6/intel-igc-opencl-2_2.5.6+18417_amd64.deb && \
38+ wget https://github.com/intel/compute-runtime/releases/download/24.52.32224.5/intel-level-zero-gpu-dbgsym_1.6.32224.5_amd64.ddeb && \
39+ wget https://github.com/intel/compute-runtime/releases/download/24.52.32224.5/intel-level-zero-gpu_1.6.32224.5_amd64.deb && \
40+ wget https://github.com/intel/compute-runtime/releases/download/24.52.32224.5/intel-opencl-icd-dbgsym_24.52.32224.5_amd64.ddeb && \
41+ wget https://github.com/intel/compute-runtime/releases/download/24.52.32224.5/intel-opencl-icd_24.52.32224.5_amd64.deb && \
42+ wget https://github.com/intel/compute-runtime/releases/download/24.52.32224.5/libigdgmm12_22.5.5_amd64.deb && \
43+ dpkg -i *.deb && \
44+ pip install --pre --upgrade ipex-llm[xpu_2.6] --extra-index-url https://download.pytorch.org/whl/test/xpu && \
45+ mkdir /build && \
46+ cd /build && \
47+ git clone https://github.com/intel/torch-ccl.git && \
48+ cd torch-ccl && \
49+ git checkout ccl_torch2.5.0+xpu && \
50+ git submodule sync && \
51+ git submodule update --init --recursive && \
52+ # This patch will enable build torch-ccl with pytorch 2.6 environment
53+ git apply /tmp/ccl_torch.patch && \
54+ USE_SYSTEM_ONECCL=ON COMPUTE_BACKEND=dpcpp python setup.py bdist_wheel
55+ # File path: /build/torch-ccl/dist/oneccl_bind_pt-2.5.0+xpu-cp311-cp311-linux_x86_64.whl
56+
57+ FROM intel/oneapi-basekit:2025.0.1-0-devel-ubuntu22.04
58+
59+ COPY --from=build /build/torch-ccl/dist/oneccl_bind_pt-2.5.0+xpu-cp311-cp311-linux_x86_64.whl /opt/oneccl_bind_pt-2.5.0+xpu-cp311-cp311-linux_x86_64.whl
260
361ARG http_proxy
462ARG https_proxy
@@ -11,22 +69,12 @@ ENV VLLM_RPC_TIMEOUT=100000
1169
1270# Disable pip's cache behavior
1371ARG PIP_NO_CACHE_DIR=false
14- ADD ./gradio_web_server.patch /tmp/gradio_web_server.patch
15- ADD ./oneccl-binding.patch /tmp/oneccl-binding.patch
1672
17- RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/intel-oneapi-archive-keyring.gpg > /dev/null && \
18- echo "deb [signed-by=/usr/share/keyrings/intel-oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main " | tee /etc/apt/sources.list.d/oneAPI.list && \
19- chmod 644 /usr/share/keyrings/intel-oneapi-archive-keyring.gpg && \
20- rm /etc/apt/sources.list.d/intel-graphics.list && \
21- wget -O- https://repositories.intel.com/graphics/intel-graphics.key | gpg --dearmor | tee /usr/share/keyrings/intel-graphics.gpg > /dev/null && \
22- echo "deb [arch=amd64,i386 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/graphics/ubuntu jammy arc" | tee /etc/apt/sources.list.d/intel.gpu.jammy.list && \
23- chmod 644 /usr/share/keyrings/intel-graphics.gpg && \
24- apt-get update && \
73+ RUN apt-get update && \
2574 apt-get install -y --no-install-recommends curl wget git libunwind8-dev vim less && \
2675 ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone && \
2776 env DEBIAN_FRONTEND=noninteractive apt-get update && \
28- # add-apt-repository requires gnupg, gpg-agent, software-properties-common
29- apt-get install -y --no-install-recommends gnupg gpg-agent software-properties-common && \
77+ apt-get install -y --no-install-recommends gnupg gpg-agent software-properties-common kmod && \
3078 # Add Python 3.11 PPA repository
3179 add-apt-repository ppa:deadsnakes/ppa -y && \
3280 apt-get install -y --no-install-recommends python3.11 git curl wget && \
@@ -35,81 +83,59 @@ RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRO
3583 ln -s /usr/bin/python3 /usr/bin/python && \
3684 apt-get install -y --no-install-recommends python3-pip python3.11-dev python3-wheel python3.11-distutils && \
3785 wget https://bootstrap.pypa.io/get-pip.py -O get-pip.py && \
38- # Install FastChat from source requires PEP 660 support
3986 python3 get-pip.py && \
4087 rm get-pip.py && \
4188 pip install --upgrade requests argparse urllib3 && \
42- pip install --pre --upgrade ipex-llm[xpu,serving ] --extra-index-url https://pytorch-extension.intel.com/release- whl/stable /xpu/cn/ && \
89+ pip install --pre --upgrade ipex-llm[xpu_2.6 ] --extra-index-url https://download. pytorch.org/ whl/test /xpu && \
4390 pip install transformers_stream_generator einops tiktoken && \
4491 pip install --upgrade colorama && \
45- # Download all-in-one benchmark and examples
46- git clone https://github.com/intel-analytics/ipex-llm && \
47- # The following comment segment is used when building from source...
48- # cd ipex-llm && \
49- # git fetch origin pull/12338/head:local_pr && \
50- # git checkout local_pr && \
51- # pip uninstall -y ipex-llm && \
52- # cd python/llm && \
53- # python setup.py install && \
54- # cd ../../../ && \
92+ git clone https://github.com/intel/ipex-llm.git && \
5593 cp -r ./ipex-llm/python/llm/dev/benchmark/ ./benchmark && \
5694 cp -r ./ipex-llm/python/llm/example/GPU/HuggingFace/LLM ./examples && \
95+ cp -r ./ipex-llm/python/llm/example/GPU/vLLM-Serving/ ./vLLM-Serving && \
96+ rm -rf ./ipex-llm && \
5797 # Install vllm dependencies
5898 pip install --upgrade fastapi && \
5999 pip install --upgrade "uvicorn[standard]" && \
60- # Download vLLM-Serving
61- cp -r ./ipex-llm/python/llm/example/GPU/vLLM-Serving/ ./vLLM-Serving && \
62- rm -rf ./ipex-llm && \
63100 # Install torch-ccl
64- cd /tmp/ && \
65- pip install torch==2.1.0.post2 torchvision==0.16.0.post2 torchaudio==2.1.0.post2 intel-extension-for-pytorch==2.1.30.post0 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ && \
66- # Internal oneccl
67- wget https://sourceforge.net/projects/oneccl-wks/files/2024.0.0.6.5-release/oneccl_wks_installer_2024.0.0.6.5.sh && \
68- bash oneccl_wks_installer_2024.0.0.6.5.sh && \
69- git clone https://github.com/intel/torch-ccl -b v2.1.300+xpu && \
70- cd torch-ccl && \
71- patch -p1 < /tmp/oneccl-binding.patch && \
72- USE_SYSTEM_ONECCL=ON COMPUTE_BACKEND=dpcpp python setup.py install && \
101+ pip install /opt/oneccl_bind_pt-2.5.0+xpu-cp311-cp311-linux_x86_64.whl && \
102+ # install Internal oneccl
103+ cd /opt && \
104+ wget https://sourceforge.net/projects/oneccl-wks/files/2025.0.0.6.6-release/oneccl_wks_installer_2025.0.0.6.6.sh && \
105+ bash oneccl_wks_installer_2025.0.0.6.6.sh && \
73106 apt-get update && \
74107 apt-get install -y --no-install-recommends libfabric-dev wrk libaio-dev numactl && \
75- # apt-get install -y intel-opencl-icd intel-level-zero-gpu=1.3.26241.33-647~22.04 level-zero level-zero-dev --allow-downgrades && \
108+ # Install compute runtime
76109 mkdir -p /tmp/neo && \
77110 cd /tmp/neo && \
78- wget https://github.com/oneapi-src/level-zero/releases/download/v1.18.5/level-zero_1.18.5+u22.04_amd64.deb && \
79- wget https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17791.9/intel-igc-core_1.0.17791.9_amd64.deb && \
80- wget https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17791.9/intel-igc-opencl_1.0.17791.9_amd64.deb && \
81- wget https://github.com/intel/compute-runtime/releases/download/24.39.31294.12/intel-level-zero-gpu_1.6.31294.12_amd64.deb && \
82- wget https://github.com/intel/compute-runtime/releases/download/24.39.31294.12/intel-opencl-icd_24.39.31294.12_amd64.deb && \
83- wget https://github.com/intel/compute-runtime/releases/download/24.39.31294.12/libigdgmm12_22.5.2_amd64.deb && \
111+ wget https://github.com/intel/intel-graphics-compiler/releases/download/v2.5.6/intel-igc-core-2_2.5.6+18417_amd64.deb && \
112+ wget https://github.com/intel/intel-graphics-compiler/releases/download/v2.5.6/intel-igc-opencl-2_2.5.6+18417_amd64.deb && \
113+ wget https://github.com/intel/compute-runtime/releases/download/24.52.32224.5/intel-level-zero-gpu-dbgsym_1.6.32224.5_amd64.ddeb && \
114+ wget https://github.com/intel/compute-runtime/releases/download/24.52.32224.5/intel-level-zero-gpu_1.6.32224.5_amd64.deb && \
115+ wget https://github.com/intel/compute-runtime/releases/download/24.52.32224.5/intel-opencl-icd-dbgsym_24.52.32224.5_amd64.ddeb && \
116+ wget https://github.com/intel/compute-runtime/releases/download/24.52.32224.5/intel-opencl-icd_24.52.32224.5_amd64.deb && \
117+ wget https://github.com/intel/compute-runtime/releases/download/24.52.32224.5/libigdgmm12_22.5.5_amd64.deb && \
84118 dpkg -i *.deb && \
85- rm -rf /tmp/neo && \
86119 mkdir -p /llm && \
87120 cd /llm && \
88- git clone -b 0.6.2 https://github.com/analytics-zoo/vllm.git /llm/vllm && \
121+ rm -rf /tmp/neo && \
122+ # Install vllm
123+ git clone -b 0.6.6-pre https://github.com/analytics-zoo/vllm.git /llm/vllm && \
89124 cd /llm/vllm && \
90125 pip install setuptools-scm && \
91126 pip install --upgrade cmake && \
92127 VLLM_TARGET_DEVICE=xpu pip install --no-build-isolation -v /llm/vllm && \
93- # pip install -r /llm/vllm/requirements-xpu.txt && \
94- # VLLM_TARGET_DEVICE=xpu python setup.py install && \
95128 pip install mpi4py fastapi uvicorn openai && \
96129 pip install gradio==4.43.0 && \
97- # pip install transformers==4.44.2 && \
98- # patch /usr/local/lib/python3.11/dist-packages/fastchat/serve/gradio_web_server.py < /tmp/gradio_web_server.patch && \
99- pip install ray && \
100- patch /usr/local/lib/python3.11/dist-packages/fastchat/serve/gradio_web_server.py < /tmp/gradio_web_server.patch
130+ pip install ray
101131
102- COPY ./vllm_online_benchmark.py /llm/
103- COPY ./vllm_offline_inference.py /llm/
132+ COPY ./vllm_online_benchmark.py /llm/
133+ COPY ./vllm_offline_inference.py /llm/
104134COPY ./vllm_offline_inference_vision_language.py /llm/
105- COPY ./payload-1024.lua /llm/
106- COPY ./start-vllm-service.sh /llm/
107- COPY ./benchmark_vllm_throughput.py /llm/
108- COPY ./benchmark_vllm_latency.py /llm/
109- COPY ./start-fastchat-service.sh /llm/
110- COPY ./start-pp_serving-service.sh /llm/
111- COPY ./start-lightweight_serving-service.sh /llm/
112-
113- ENV LD_LIBRARY_PATH /usr/local/lib/python3.11/dist-packages/intel_extension_for_pytorch/lib/:/opt/intel/oneapi/tbb/2021.12/env/../lib/intel64/gcc4.8:/opt/intel/oneapi/mpi/2021.12/opt/mpi/libfabric/lib:/opt/intel/oneapi/mpi/2021.12/lib:/opt/intel/oneapi/mkl/2024.1/lib:/opt/intel/oneapi/ippcp/2021.11/lib/:/opt/intel/oneapi/ipp/2021.11/lib:/opt/intel/oneapi/dpl/2022.5/lib:/opt/intel/oneapi/dnnl/2024.1/lib:/opt/intel/oneapi/debugger/2024.1/opt/debugger/lib:/opt/intel/oneapi/dal/2024.2/lib:/opt/intel/oneapi/compiler/2024.1/opt/oclfpga/host/linux64/lib:/opt/intel/oneapi/compiler/2024.1/opt/compiler/lib:/opt/intel/oneapi/compiler/2024.1/lib:/opt/intel/oneapi/ccl/2021.12/lib/
135+ COPY ./payload-1024.lua /llm/
136+ COPY ./start-vllm-service.sh /llm/
137+ COPY ./benchmark_vllm_throughput.py /llm/
138+ COPY ./benchmark_vllm_latency.py /llm/
139+ COPY ./start-pp_serving-service.sh /llm/
114140
115141WORKDIR /llm/
0 commit comments