Skip to content

Commit b72decb

Browse files
Binyang2014chhwang
andauthored
Update docker image for cuda12.4 (#370)
Update docker image for cuda12.4 Image pushed to registry --------- Co-authored-by: Changho Hwang <[email protected]>
1 parent 582d386 commit b72decb

File tree

4 files changed

+25
-9
lines changed

4 files changed

+25
-9
lines changed

docker/base-dev-x.dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ ARG BASE_IMAGE
22
FROM ${BASE_IMAGE}
33

44
LABEL maintainer="MSCCL++"
5-
LABEL org.opencontainers.image.source https://github.com/microsoft/mscclpp
5+
LABEL org.opencontainers.image.source=https://github.com/microsoft/mscclpp
66

77
RUN apt-get update && \
88
apt-get install -y --no-install-recommends \

docker/base-x-rocm.dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ ARG BASE_IMAGE
22
FROM ${BASE_IMAGE}
33

44
LABEL maintainer="MSCCL++"
5-
LABEL org.opencontainers.image.source https://github.com/microsoft/mscclpp
5+
LABEL org.opencontainers.image.source=https://github.com/microsoft/mscclpp
66

77
ENV DEBIAN_FRONTEND=noninteractive
88

docker/base-x.dockerfile

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ ARG BASE_IMAGE
22
FROM ${BASE_IMAGE}
33

44
LABEL maintainer="MSCCL++"
5-
LABEL org.opencontainers.image.source https://github.com/microsoft/mscclpp
5+
LABEL org.opencontainers.image.source=https://github.com/microsoft/mscclpp
66

77
ENV DEBIAN_FRONTEND=noninteractive
88
USER root
@@ -17,6 +17,7 @@ RUN apt-get update && \
1717
git \
1818
libcap2 \
1919
libnuma-dev \
20+
lsb-release \
2021
openssh-client \
2122
openssh-server \
2223
python3-dev \
@@ -31,11 +32,13 @@ RUN apt-get update && \
3132
rm -rf /var/lib/apt/lists/* /tmp/*
3233

3334
# Install OFED
34-
ENV OFED_VERSION=5.2-2.2.3.0
35+
ARG OFED_VERSION=5.2-2.2.3.0
3536
RUN cd /tmp && \
36-
wget -q https://content.mellanox.com/ofed/MLNX_OFED-${OFED_VERSION}/MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu20.04-x86_64.tgz && \
37-
tar xzf MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu20.04-x86_64.tgz && \
38-
MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu20.04-x86_64/mlnxofedinstall --user-space-only --without-fw-update --force --all && \
37+
OS_VERSION=$(lsb_release -rs) && \
38+
OS_VERSION=ubuntu${OS_VERSION} && \
39+
wget -q https://content.mellanox.com/ofed/MLNX_OFED-${OFED_VERSION}/MLNX_OFED_LINUX-${OFED_VERSION}-${OS_VERSION}-x86_64.tgz && \
40+
tar xzf MLNX_OFED_LINUX-${OFED_VERSION}-${OS_VERSION}-x86_64.tgz && \
41+
MLNX_OFED_LINUX-${OFED_VERSION}-${OS_VERSION}-x86_64/mlnxofedinstall --user-space-only --without-fw-update --without-ucx-cuda --force --all && \
3942
rm -rf /tmp/MLNX_OFED_LINUX-${OFED_VERSION}*
4043

4144
# Install OpenMPI

docker/build.sh

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ baseImageTable=(
88
["cuda12.1"]="nvidia/cuda:12.1.1-devel-ubuntu20.04"
99
["cuda12.2"]="nvidia/cuda:12.2.2-devel-ubuntu20.04"
1010
["cuda12.3"]="nvidia/cuda:12.3.2-devel-ubuntu20.04"
11+
["cuda12.4"]="nvidia/cuda:12.4.1-devel-ubuntu22.04"
1112
["rocm6.2"]="rocm/rocm-terminal:6.2"
1213
)
1314

@@ -20,11 +21,16 @@ extraLdPathTable=(
2021
["rocm6.2"]="/opt/rocm/lib"
2122
)
2223

24+
declare -A ofedVersionTable
25+
ofedVersionTable=(
26+
["cuda12.4"]="23.07-0.5.1.2"
27+
)
28+
2329
GHCR="ghcr.io/microsoft/mscclpp/mscclpp"
2430
TARGET=${1}
2531

2632
print_usage() {
27-
echo "Usage: $0 [cuda11.8|cuda12.1|cuda12.2|cuda12.3|rocm6.2]"
33+
echo "Usage: $0 [cuda11.8|cuda12.1|cuda12.2|cuda12.3|cuda12.4|rocm6.2]"
2834
}
2935

3036
if [[ ! -v "baseImageTable[${TARGET}]" ]]; then
@@ -38,11 +44,18 @@ SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
3844

3945
cd ${SCRIPT_DIR}/..
4046

47+
DEFAULT_OFED_VERSION="5.2-2.2.3.0"
48+
OFED_VERSION=${ofedVersionTable[${TARGET}]}
49+
if [[ -z ${OFED_VERSION} ]]; then
50+
OFED_VERSION=${DEFAULT_OFED_VERSION}
51+
fi
52+
4153
docker build -t ${GHCR}-common:base-${TARGET} \
4254
-f docker/base-x.dockerfile \
4355
--build-arg BASE_IMAGE=${baseImageTable[${TARGET}]} \
4456
--build-arg EXTRA_LD_PATH=${extraLdPathTable[${TARGET}]} \
45-
--build-arg TARGET=${TARGET} .
57+
--build-arg TARGET=${TARGET} \
58+
--build-arg OFED_VERSION=${OFED_VERSION} .
4659

4760
if [[ ${TARGET} == rocm* ]]; then
4861
echo "Building ROCm base image..."

0 commit comments

Comments
 (0)