Include the default PyPI for missing libucx-cu12 package version. (#1495)

metrizable · web-flow · commit d019ef74b14f · 2025-09-18T16:24:17.000-04:00
The CPU/GPU container image builds are currently broken due to dependency resolution failures: <img width="856" height="242" alt="9MgPMDp9qN8qCqf" src="https://github.com/user-attachments/assets/b30c6024-0391-4c54-a441-502847673d7c" /> It appears, this version was removed from the Nvidia index since the last build. <img width="1099" height="281" alt="6NFm9uDQDA5efcV" src="https://github.com/user-attachments/assets/c4756d56-4f21-43fc-8591-3eccfcb74dc0" /> We ensure that a compatible package version `libucx-cu12==1.18.1` is available by including the default PyPI and specifying an appropriate [index strategy](https://docs.astral.sh/uv/concepts/indexes/#searching-across-multiple-indexes) to `uv`. In the near future, we may want to consider upgrading `cuml-cu12` and ecosystem to 25.6 or later.
diff --git a/Dockerfile.tmpl b/Dockerfile.tmpl
@@ -34,7 +34,9 @@ RUN uv pip install --no-build-isolation --system "git+https://github.com/Kaggle/
 
 # b/408281617: Torch is adamant that it can not install cudnn 9.3.x, only 9.1.x, but Tensorflow can only support 9.3.x.
 # This conflict causes a number of package downgrades, which are handled in this command
-RUN uv pip install --system --force-reinstall --extra-index-url https://pypi.nvidia.com "cuml-cu12==25.2.1" \
+RUN uv pip install \
+    --index-url https://pypi.nvidia.com --extra-index-url https://pypi.org/simple/ --index-strategy unsafe-first-match \
+    --system --force-reinstall "cuml-cu12==25.2.1" \
     "nvidia-cudnn-cu12==9.3.0.75" "nvidia-cublas-cu12==12.5.3.2" "nvidia-cusolver-cu12==11.6.3.83" \
     "nvidia-cuda-cupti-cu12==12.5.82" "nvidia-cuda-nvrtc-cu12==12.5.82" "nvidia-cuda-runtime-cu12==12.5.82" \
     "nvidia-cufft-cu12==11.2.3.61" "nvidia-curand-cu12==10.3.6.82" "nvidia-cusparse-cu12==12.5.1.3" \
@@ -171,7 +173,7 @@ ENV PYTHONUSERBASE="/root/.local"
 ADD patches/kaggle_gcp.py \
     patches/kaggle_secrets.py \
     patches/kaggle_session.py \
-    patches/kaggle_web_client.py \ 
+    patches/kaggle_web_client.py \
     patches/kaggle_datasets.py \
     patches/log.py \
     $PACKAGE_PATH/
diff --git a/kaggle_requirements.txt b/kaggle_requirements.txt
@@ -35,7 +35,10 @@ easyocr
 # b/302136621: Fix eli5 import for learntools
 eli5
 emoji
-fastcore>=1.7.20
+fastcore
+# b/445960030: Requires a newer version of fastai than the currently used base image.
+# Remove when relying on a newer base image.
+fastai>=2.8.4
 fasttext
 featuretools
 fiona
@@ -89,7 +92,9 @@ nbconvert==6.4.5
 nbdev
 nilearn
 olefile
-onnx
+# b/445960030: Broken in 1.19.0. See https://github.com/onnx/onnx/issues/7249.
+# Fixed with https://github.com/onnx/onnx/pull/7254. Upgrade when version with fix is published.
+onnx==1.18.0
 openslide-bin
 openslide-python
 optuna
diff --git a/tests/test_fastai.py b/tests/test_fastai.py
@@ -1,35 +1,36 @@
 import unittest
 
 import fastai
-
 from fastai.tabular.all import *
 
+
 class TestFastAI(unittest.TestCase):
-    # Basic import
-    def test_basic(self):
-        import fastai
-        import fastcore
-        import fastprogress
-        import fastdownload
-
-    def test_has_version(self):
-        self.assertGreater(len(fastai.__version__), 2)
-    
-    # based on https://github.com/fastai/fastai/blob/master/tests/test_torch_core.py#L17
-    def test_torch_tensor(self):
-        a = tensor([1, 2, 3])
-        b = torch.tensor([1, 2, 3])
-
-        self.assertTrue(torch.all(a == b))
-
-    def test_tabular(self):
-        dls = TabularDataLoaders.from_csv(
-            "/input/tests/data/train.csv",
-            cont_names=["pixel"+str(i) for i in range(784)],
-            y_names='label',
-            procs=[FillMissing, Categorify, Normalize])
-        learn = tabular_learner(dls, layers=[200, 100])
-        with learn.no_bar():
-            learn.fit_one_cycle(n_epoch=1)
-        
-            self.assertGreater(learn.smooth_loss, 0)
+  # Basic import
+  def test_basic(self):
+    import fastai
+    import fastcore
+    import fastprogress
+    import fastdownload
+
+  def test_has_version(self):
+    self.assertGreater(len(fastai.__version__), 2)
+
+  # based on https://github.com/fastai/fastai/blob/master/tests/test_torch_core.py#L17
+  def test_torch_tensor(self):
+    a = tensor([1, 2, 3])
+    b = torch.tensor([1, 2, 3])
+
+    self.assertTrue(torch.all(a == b))
+
+  def test_tabular(self):
+    dls = TabularDataLoaders.from_csv(
+        "/input/tests/data/train.csv",
+        cont_names=["pixel" + str(i) for i in range(784)],
+        y_names="label",
+        procs=[FillMissing, Categorify, Normalize],
+    )
+    learn = tabular_learner(dls, layers=[200, 100])
+    with learn.no_bar():
+      learn.fit_one_cycle(n_epoch=1)
+
+      self.assertGreater(learn.smooth_loss, 0)