@@ -12,6 +12,15 @@ ARG TORCHVISION_VERSION
1212FROM gcr.io/kaggle-images/python-lightgbm-whl:${GPU_BASE_IMAGE_NAME}-${BASE_IMAGE_TAG}-${LIGHTGBM_VERSION} AS lightgbm_whl
1313FROM gcr.io/kaggle-images/python-torch-whl:${GPU_BASE_IMAGE_NAME}-${BASE_IMAGE_TAG}-${TORCH_VERSION} AS torch_whl
1414FROM ${BASE_IMAGE_REPO}/${GPU_BASE_IMAGE_NAME}:${BASE_IMAGE_TAG}
15+ {{ else }}
16+ FROM ${BASE_IMAGE_REPO}/${CPU_BASE_IMAGE_NAME}:${BASE_IMAGE_TAG}
17+ {{ end }}
18+
19+ # Ensures shared libraries installed with conda can be found by the dynamic link loader.
20+ ENV LIBRARY_PATH="$LIBRARY_PATH:/opt/conda/lib"
21+ ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/conda/lib"
22+
23+ {{ if eq .Accelerator "gpu" }}
1524ARG CUDA_MAJOR_VERSION
1625ARG CUDA_MINOR_VERSION
1726ENV CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION}
@@ -22,11 +31,10 @@ ENV PATH=/opt/bin:${PATH}
2231ENV LD_LIBRARY_PATH_NO_STUBS="$LD_LIBRARY_PATH"
2332ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/cuda/lib64/stubs"
2433RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1
25- {{ else }}
26- FROM ${BASE_IMAGE_REPO}/${CPU_BASE_IMAGE_NAME}:${BASE_IMAGE_TAG}
2734{{ end }}
35+
2836# Keep these variables in sync if base image is updated.
29- ENV TENSORFLOW_VERSION=2.6.4
37+ ENV TENSORFLOW_VERSION=2.9.2
3038
3139# We need to redefine the ARG here to get the ARG value defined above the FROM instruction.
3240# See: https://docs.docker.com/engine/reference/builder/#understand-how-arg-and-from-interact
@@ -76,33 +84,42 @@ ENV PROJ_LIB=/opt/conda/share/proj
7684# the remaining pip commands: https://www.anaconda.com/using-pip-in-a-conda-environment/
7785RUN conda config --add channels nvidia && \
7886 conda config --add channels rapidsai && \
87+ conda install -c conda-forge mamba && \
7988 # Base image channel order: conda-forge (highest priority), defaults.
8089 # End state: rapidsai (highest priority), nvidia, conda-forge, defaults.
81- conda install mkl cartopy=0.19 imagemagick=7.1 pyproj==3.1.0 && \
90+ mamba install mkl cartopy=0.19 imagemagick=7.1 pyproj==3.1.0 && \
8291 /tmp/clean-layer.sh
8392
8493{{ if eq .Accelerator "gpu" }}
8594
8695# b/232247930: uninstall pyarrow to avoid double installation with the GPU specific version.
87- RUN pip uninstall -y pyarrow && \
88- conda install cudf=21.10 cuml=21.10 cudatoolkit=$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION && \
89- /tmp/clean-layer.sh
90- {{ end }}
96+ # b/267180053: RapidsAI (cudf/cuml) are not compatible with the latest tensorflow cudatoolkit version.
97+ # RUN pip uninstall -y pyarrow && \
98+ # mamba install -y cudf cuml && \
99+ # /tmp/clean-layer.sh
100+ # {{ end }}
91101
92102# Install implicit
93103{{ if eq .Accelerator "gpu" }}
94- RUN conda install implicit implicit-proc=*=gpu && \
104+ RUN mamba install implicit implicit-proc=*=gpu && \
95105 /tmp/clean-layer.sh
96106{{ else }}
97- RUN conda install implicit && \
107+ RUN mamba install implicit && \
98108 /tmp/clean-layer.sh
99109{{ end}}
100110
101111# Install PyTorch
102112{{ if eq .Accelerator "gpu" }}
103113COPY --from=torch_whl /tmp/whl/*.whl /tmp/torch/
104- RUN conda install -c pytorch magma-cuda${CUDA_MAJOR_VERSION}${CUDA_MINOR_VERSION} && \
114+ RUN mamba install -c pytorch magma-cuda${CUDA_MAJOR_VERSION}${CUDA_MINOR_VERSION} && \
105115 pip install /tmp/torch/*.whl && \
116+ # b/255757999 openmp (libomp.so) is an dependency of libtorchtext and libtorchaudio but
117+ # the built from source versions don't seem to properly link it in. This forces the dep
118+ # which makes sure that libomp is loaded when these libraries are loaded.
119+ mamba install -y openmp && \
120+ pip install patchelf && \
121+ patchelf --add-needed libomp.so /opt/conda/lib/python3.7/site-packages/torchtext/lib/libtorchtext.so && \
122+ patchelf --add-needed libomp.so /opt/conda/lib/python3.7/site-packages/torchaudio/lib/libtorchaudio.so && \
106123 rm -rf /tmp/torch && \
107124 /tmp/clean-layer.sh
108125{{ else }}
@@ -141,7 +158,8 @@ RUN pip install jax[cpu] && \
141158
142159# Install mxnet
143160{{ if eq .Accelerator "gpu" }}
144- RUN pip install mxnet-cu$CUDA_MAJOR_VERSION$CUDA_MINOR_VERSION && \
161+ # No specific package for 11.3 minor versions, using 11.2 instead.
162+ RUN pip install mxnet-cu112 && \
145163 /tmp/clean-layer.sh
146164{{ else }}
147165RUN pip install mxnet && \
@@ -160,10 +178,11 @@ RUN pip install spacy && \
160178# Install GPU specific packages
161179{{ if eq .Accelerator "gpu" }}
162180# Install GPU-only packages
181+ # No specific package for nnabla-ext-cuda 11.x minor versions.
163182RUN pip install pycuda \
164183 pynvrtc \
165184 pynvml \
166- nnabla-ext-cuda$CUDA_MAJOR_VERSION$CUDA_MINOR_VERSION && \
185+ nnabla-ext-cuda${ CUDA_MAJOR_VERSION}0 && \
167186 /tmp/clean-layer.sh
168187{{ end }}
169188
@@ -176,9 +195,9 @@ RUN pip install pysal \
176195 # Use `conda install -c h2oai h2o` once Python 3.7 version is released to conda.
177196 apt-get install -y default-jre-headless && \
178197 pip install -f https://h2o-release.s3.amazonaws.com/h2o/latest_stable_Py.html h2o \
179- tensorflow-gcs-config==2.6.0 \
180- tensorflow-addons==0.14.0 \
181- tensorflow_decision_forests==0.2.0 && \
198+ " tensorflow-gcs-config<=${TENSORFLOW_VERSION}" \
199+ tensorflow-addons==0.17.1 \
200+ tensorflow_decision_forests==0.2.7 && \
182201 /tmp/clean-layer.sh
183202
184203RUN apt-get install -y libfreetype6-dev && \
@@ -393,6 +412,8 @@ RUN pip install cython \
393412 mlcrate && \
394413 /tmp/clean-layer.sh
395414
415+
416+ # Fix qgrid by pinning ipywidgets https://github.com/quantopian/qgrid/issues/376
396417RUN pip install bleach \
397418 certifi \
398419 cycler \
@@ -402,7 +423,7 @@ RUN pip install bleach \
402423 ipykernel \
403424 ipython \
404425 ipython-genutils \
405- ipywidgets \
426+ ipywidgets==7.7.1 \
406427 isoweek \
407428 jedi \
408429 jsonschema \
@@ -459,6 +480,10 @@ RUN pip install bleach \
459480 #
460481 ###########
461482
483+ # dlib has a libmkl incompatibility:
484+ # test_dlib_face_detector (test_dlib.TestDLib) ... INTEL MKL ERROR: /opt/conda/bin/../lib/libmkl_avx512.so.2: undefined symbol: mkl_sparse_optimize_bsr_trsm_i8.
485+ # Intel MKL FATAL ERROR: Cannot load libmkl_avx512.so.2 or libmkl_def.so.2.
486+ # nnabla breaks protobuf compatibiilty:
462487RUN pip install flashtext \
463488 wandb \
464489 # b/214080882 blake3 0.3.0 is not compatible with vaex.
@@ -505,10 +530,8 @@ RUN pip install flashtext \
505530 transformers \
506531 # b/232247930 >= 2.2.0 requires pyarrow >= 6.0.0 which conflicts with dependencies for rapidsai 0.21.*
507532 datasets==2.1.0 \
508- dlib \
509533 kaggle-environments \
510534 geopandas \
511- nnabla \
512535 vowpalwabbit \
513536 pydub \
514537 pydegensac \
@@ -600,6 +623,9 @@ RUN jupyter-nbextension disable nb_conda --py --sys-prefix && \
600623 jupyter-serverextension disable nb_conda --py --sys-prefix && \
601624 python -m nb_conda_kernels.install --disable
602625
626+ # Force only one libcusolver
627+ RUN rm /opt/conda/bin/../lib/libcusolver.so.11 && ln -s /usr/local/cuda/lib64/libcusolver.so.11 /opt/conda/bin/../lib/libcusolver.so.11
628+
603629# Set backend for matplotlib
604630ENV MPLBACKEND "agg"
605631
0 commit comments