[NCCL]: trigger build for cuda 13 (#12061)

avik-pal · giordano · web-flow · commit 47bd0da31871 · 2025-09-15T17:48:55.000Z
* [NCCL]: trigger build for cuda 13

* fix: try using stuff from CUDA

* fix: dep

* fix: redist

* Apply suggestions from code review

* fix: try source build

* fix: flags

* fix: use redist on some platforms

* fix: missing aarch64 deps

* Apply suggestions from code review

Co-authored-by: Mosè Giordano &lt;765740+giordano@users.noreply.github.com&gt;

* Apply suggestions from code review

* Update N/NCCL/build_tarballs.jl

* fix: build on 13

* Update N/NCCL/build_tarballs.jl

Co-authored-by: Mosè Giordano &lt;765740+giordano@users.noreply.github.com&gt;

---------

Co-authored-by: Mosè Giordano &lt;765740+giordano@users.noreply.github.com&gt;
diff --git a/N/NCCL/build_tarballs.jl b/N/NCCL/build_tarballs.jl
@@ -4,21 +4,23 @@ using BinaryBuilder, Pkg
 using Base.BinaryPlatforms: arch, os, tags
 
 const YGGDRASIL_DIR = "../.."
+include(joinpath(YGGDRASIL_DIR, "C/CUDA/common.jl"))
 include(joinpath(YGGDRASIL_DIR, "fancy_toys.jl"))
 include(joinpath(YGGDRASIL_DIR, "platforms", "cuda.jl"))
 
 name = "NCCL"
-version = v"2.26.5"
+version = v"2.28.3"
 
-MIN_CUDA_VERSION = v"11.8" # doesnt quite match NCCL actual support
-
-sources = [
-    GitSource("https://github.com/NVIDIA/nccl.git", "3000e3c797b4b236221188c07aa09c1f3a0170d4"),
+git_sources = [
+    GitSource("https://github.com/NVIDIA/nccl.git", "f1308997d0420148b1be1c24d63f19d902ae589b"),
+    DirectorySource("./bundled/")
 ]
 
-
-script = raw"""
+build_script = raw"""
 cd $WORKSPACE/srcdir
+for f in ${WORKSPACE}/srcdir/patches/*.patch; do
+    atomic_patch -p1 ${f}
+done
 
 export TMPDIR=${WORKSPACE}/tmpdir # we need a lot of tmp space
 mkdir -p ${TMPDIR}
@@ -28,19 +30,20 @@ if [[ "${target}" == aarch64-linux-* ]]; then
 
    # Add /usr/lib/csl-musl-x86_64 to LD_LIBRARY_PATH to be able to use host nvcc
    export LD_LIBRARY_PATH="/usr/lib/csl-musl-x86_64:/usr/lib/csl-glibc-x86_64:${LD_LIBRARY_PATH}"
-   
+
    # Make sure we use host CUDA executable by copying from the x86_64 CUDA redist
    NVCC_DIR=(/workspace/srcdir/cuda_nvcc-*-archive)
    rm -rf ${prefix}/cuda/bin
    cp -r ${NVCC_DIR}/bin ${prefix}/cuda/bin
-   
+
    rm -rf ${prefix}/cuda/nvvm/bin
    cp -r ${NVCC_DIR}/nvvm/bin ${prefix}/cuda/nvvm/bin
 
    export NVCC_PREPEND_FLAGS="-ccbin='${CXX}'"
 fi
 
-export CXXFLAGS='-D__STDC_FORMAT_MACROS'
+export CXXFLAGS='-D__STDC_FORMAT_MACROS -D_GNU_SOURCE -Wno-unused-parameter -Wno-type-limits -Wno-error -Wno-missing-field-initializers -Wno-implicit-fallthrough'
+export NVCCFLAGS="$NVCCFLAGS -Wno-unused-parameter"
 export CUDARTLIB=cudart # link against dynamic library
 
 export CUDA_HOME=${prefix}/cuda;
@@ -63,10 +66,21 @@ if [[ "${target}" == aarch64-linux-* ]]; then
 fi
 """
 
+redist_script = raw"""
 
-platforms = CUDA.supported_platforms(min_version = MIN_CUDA_VERSION)
-filter!(p -> arch(p) == "x86_64" || arch(p) == "aarch64", platforms)
+cd ${WORKSPACE}/srcdir/nccl*
 
+install_license LICENSE.txt
+
+for file in lib/libnccl*.${dlext}*; do
+    install -Dvm 755 "${file}" -t "${libdir}"
+done
+
+find include -type f -print0 | while IFS= read -r -d '' file; do
+    relpath="${file#include/}"
+    install -Dvm644 "$file" "${includedir}/${relpath}"
+done
+"""
 
 products = [
     LibraryProduct("libnccl", :libnccl),
@@ -77,23 +91,75 @@ dependencies = [
     Dependency(PackageSpec(name="CompilerSupportLibraries_jll", uuid="e66e0078-7015-5450-92f7-15fbd957f2ae")),
 ]
 
-# Build for all supported CUDA toolkits
-for platform in platforms
+builds = []
+
+# redist for sources that are available
+for cuda_version in [v"13.0"]
+    platforms = [
+        Platform("x86_64", "linux"),
+        Platform("aarch64", "linux")
+    ]
+    for platform in platforms
+        augmented_platform = deepcopy(platform)
+        augmented_platform["cuda"] = CUDA.platform(cuda_version)
+        should_build_platform(triplet(augmented_platform)) || continue
+
+        if cuda_version == v"12.9"
+            if arch(platform) == "aarch64"
+                hash = "c51b970bb26a0d3afd676048923fc404ed1d1131441558a7d346940e93d6ab54"
+            elseif arch(platform) == "x86_64"
+                hash = "98f7abd2f505ba49f032052f3f36b14e28798a6e16ca783fe293e351e9376546"
+            end
+        else
+            if arch(platform) == "aarch64"
+                hash = "2b5961c4c4bcbc16148d8431c7b65525d00f386105ab1b9fa82051b7c05f6fd0"
+            elseif arch(platform) == "x86_64"
+                hash = "3117db0efe13e1336dbe32e8b98eab943ad5baa69518189918d4aca9e3ce3270"
+            end
+        end
+
+        sources = [
+            ArchiveSource("https://developer.download.nvidia.com/compute/redist/nccl/v$(version)/nccl_$(version)-1+cuda$(cuda_version.major).$(cuda_version.minor)_$(arch(platform)).txz", hash)
+        ]
+
+        push!(
+            builds,
+            (; platforms=[augmented_platform], sources, script=redist_script, req_deps=false)
+        )
+    end
+end
+
+for platform in CUDA.supported_platforms(; min_version=v"12", max_version=v"12.9.999")
     should_build_platform(triplet(platform)) || continue
 
-    cuda_deps = CUDA.required_dependencies(platform)
+    platform_sources = BinaryBuilder.AbstractSource[git_sources...]
+    if arch(platform) == "aarch64"
+        push!(platform_sources, CUDA.cuda_nvcc_redist_source(platform["cuda"], "x86_64"))
+    end
 
-    cuda_ver = platform["cuda"]
+    push!(
+        builds,
+        (; platforms=[platform], sources=platform_sources, script=build_script, req_deps=true)
+    )
+end
 
-    platform_sources = BinaryBuilder.AbstractSource[sources...]
+# don't allow `build_tarballs` to override platform selection based on ARGS.
+# we handle that ourselves by calling `should_build_platform`
+non_platform_ARGS = filter(arg -> startswith(arg, "--"), ARGS)
 
-    if arch(platform) == "aarch64"
-        push!(platform_sources, CUDA.cuda_nvcc_redist_source(cuda_ver, "x86_64"))
+# `--register` should only be passed to the latest `build_tarballs` invocation
+non_reg_ARGS = filter(arg -> arg != "--register", non_platform_ARGS)
+
+for (i, build) in enumerate(builds)
+    if build.req_deps
+        deps = [dependencies; CUDA.required_dependencies(build.platforms[1])]
+    else
+        deps = []
     end
 
-    build_tarballs(ARGS, name, version, platform_sources, script, [platform],
-                   products, [dependencies; cuda_deps]; 
-                   lazy_artifacts=true, julia_compat="1.10", 
-                   preferred_gcc_version = v"10",
-                   augment_platform_block = CUDA.augment)
+    build_tarballs(i == lastindex(builds) ? non_platform_ARGS : non_reg_ARGS,
+        name, version, build.sources, build.script,
+        build.platforms, products, deps;
+        julia_compat="1.10", augment_platform_block=CUDA.augment,
+        preferred_gcc_version=v"10")
 end
diff --git a/N/NCCL/bundled/patches/init.patch b/N/NCCL/bundled/patches/init.patch
@@ -0,0 +1,14 @@
+--- a/nccl/src/init.cc
++++ b/nccl/src/init.cc
+@@ -106,7 +106,11 @@
+       // Change the default pthread stack size (via a nonportable API, which will become necessary if we switch
+       // to C++ threads).
+       PTHREADCHECK(pthread_attr_setstacksize(&attr, SAFE_STACK_SIZE), "pthread_attr_setstacksize");
++#if defined(__GLIBC__) && __GLIBC__ >= 2 && __GLIBC_MINOR__ >= 34
+       PTHREADCHECK(pthread_setattr_default_np(&attr), "pthread_setattr_default_np");
++#else
++      // pthread_setattr_default_np not available on this system
++#endif
+     }
+ 
+     PTHREADCHECK(pthread_attr_destroy(&attr), "pthread_attr_destroy");