@@ -4,21 +4,23 @@ using BinaryBuilder, Pkg
44using Base. BinaryPlatforms: arch, os, tags
55
66const YGGDRASIL_DIR = " ../.."
7+ include (joinpath (YGGDRASIL_DIR, " C/CUDA/common.jl" ))
78include (joinpath (YGGDRASIL_DIR, " fancy_toys.jl" ))
89include (joinpath (YGGDRASIL_DIR, " platforms" , " cuda.jl" ))
910
1011name = " NCCL"
11- version = v " 2.26.5 "
12+ version = v " 2.28.3 "
1213
13- MIN_CUDA_VERSION = v " 11.8" # doesnt quite match NCCL actual support
14-
15- sources = [
16- GitSource (" https://github.com/NVIDIA/nccl.git" , " 3000e3c797b4b236221188c07aa09c1f3a0170d4" ),
14+ git_sources = [
15+ GitSource (" https://github.com/NVIDIA/nccl.git" , " f1308997d0420148b1be1c24d63f19d902ae589b" ),
16+ DirectorySource (" ./bundled/" )
1717]
1818
19-
20- script = raw """
19+ build_script = raw """
2120cd $WORKSPACE/srcdir
21+ for f in ${WORKSPACE}/srcdir/patches/*.patch; do
22+ atomic_patch -p1 ${f}
23+ done
2224
2325export TMPDIR=${WORKSPACE}/tmpdir # we need a lot of tmp space
2426mkdir -p ${TMPDIR}
@@ -28,19 +30,20 @@ if [[ "${target}" == aarch64-linux-* ]]; then
2830
2931 # Add /usr/lib/csl-musl-x86_64 to LD_LIBRARY_PATH to be able to use host nvcc
3032 export LD_LIBRARY_PATH="/usr/lib/csl-musl-x86_64:/usr/lib/csl-glibc-x86_64:${LD_LIBRARY_PATH}"
31-
33+
3234 # Make sure we use host CUDA executable by copying from the x86_64 CUDA redist
3335 NVCC_DIR=(/workspace/srcdir/cuda_nvcc-*-archive)
3436 rm -rf ${prefix}/cuda/bin
3537 cp -r ${NVCC_DIR}/bin ${prefix}/cuda/bin
36-
38+
3739 rm -rf ${prefix}/cuda/nvvm/bin
3840 cp -r ${NVCC_DIR}/nvvm/bin ${prefix}/cuda/nvvm/bin
3941
4042 export NVCC_PREPEND_FLAGS="-ccbin='${CXX}'"
4143fi
4244
43- export CXXFLAGS='-D__STDC_FORMAT_MACROS'
45+ export CXXFLAGS='-D__STDC_FORMAT_MACROS -D_GNU_SOURCE -Wno-unused-parameter -Wno-type-limits -Wno-error -Wno-missing-field-initializers -Wno-implicit-fallthrough'
46+ export NVCCFLAGS="$NVCCFLAGS -Wno-unused-parameter"
4447export CUDARTLIB=cudart # link against dynamic library
4548
4649export CUDA_HOME=${prefix}/cuda;
@@ -63,10 +66,21 @@ if [[ "${target}" == aarch64-linux-* ]]; then
6366fi
6467"""
6568
69+ redist_script = raw """
6670
67- platforms = CUDA. supported_platforms (min_version = MIN_CUDA_VERSION)
68- filter! (p -> arch (p) == " x86_64" || arch (p) == " aarch64" , platforms)
71+ cd ${WORKSPACE}/srcdir/nccl*
6972
73+ install_license LICENSE.txt
74+
75+ for file in lib/libnccl*.${dlext}*; do
76+ install -Dvm 755 "${file}" -t "${libdir}"
77+ done
78+
79+ find include -type f -print0 | while IFS= read -r -d '' file; do
80+ relpath="${file#include/}"
81+ install -Dvm644 "$file" "${includedir}/${relpath}"
82+ done
83+ """
7084
7185products = [
7286 LibraryProduct (" libnccl" , :libnccl ),
@@ -77,23 +91,75 @@ dependencies = [
7791 Dependency (PackageSpec (name= " CompilerSupportLibraries_jll" , uuid= " e66e0078-7015-5450-92f7-15fbd957f2ae" )),
7892]
7993
80- # Build for all supported CUDA toolkits
81- for platform in platforms
94+ builds = []
95+
96+ # redist for sources that are available
97+ for cuda_version in [v " 13.0" ]
98+ platforms = [
99+ Platform (" x86_64" , " linux" ),
100+ Platform (" aarch64" , " linux" )
101+ ]
102+ for platform in platforms
103+ augmented_platform = deepcopy (platform)
104+ augmented_platform[" cuda" ] = CUDA. platform (cuda_version)
105+ should_build_platform (triplet (augmented_platform)) || continue
106+
107+ if cuda_version == v " 12.9"
108+ if arch (platform) == " aarch64"
109+ hash = " c51b970bb26a0d3afd676048923fc404ed1d1131441558a7d346940e93d6ab54"
110+ elseif arch (platform) == " x86_64"
111+ hash = " 98f7abd2f505ba49f032052f3f36b14e28798a6e16ca783fe293e351e9376546"
112+ end
113+ else
114+ if arch (platform) == " aarch64"
115+ hash = " 2b5961c4c4bcbc16148d8431c7b65525d00f386105ab1b9fa82051b7c05f6fd0"
116+ elseif arch (platform) == " x86_64"
117+ hash = " 3117db0efe13e1336dbe32e8b98eab943ad5baa69518189918d4aca9e3ce3270"
118+ end
119+ end
120+
121+ sources = [
122+ ArchiveSource (" https://developer.download.nvidia.com/compute/redist/nccl/v$(version) /nccl_$(version) -1+cuda$(cuda_version. major) .$(cuda_version. minor) _$(arch (platform)) .txz" , hash)
123+ ]
124+
125+ push! (
126+ builds,
127+ (; platforms= [augmented_platform], sources, script= redist_script, req_deps= false )
128+ )
129+ end
130+ end
131+
132+ for platform in CUDA. supported_platforms (; min_version= v " 12" , max_version= v " 12.9.999" )
82133 should_build_platform (triplet (platform)) || continue
83134
84- cuda_deps = CUDA. required_dependencies (platform)
135+ platform_sources = BinaryBuilder. AbstractSource[git_sources... ]
136+ if arch (platform) == " aarch64"
137+ push! (platform_sources, CUDA. cuda_nvcc_redist_source (platform[" cuda" ], " x86_64" ))
138+ end
85139
86- cuda_ver = platform[" cuda" ]
140+ push! (
141+ builds,
142+ (; platforms= [platform], sources= platform_sources, script= build_script, req_deps= true )
143+ )
144+ end
87145
88- platform_sources = BinaryBuilder. AbstractSource[sources... ]
146+ # don't allow `build_tarballs` to override platform selection based on ARGS.
147+ # we handle that ourselves by calling `should_build_platform`
148+ non_platform_ARGS = filter (arg -> startswith (arg, " --" ), ARGS )
89149
90- if arch (platform) == " aarch64"
91- push! (platform_sources, CUDA. cuda_nvcc_redist_source (cuda_ver, " x86_64" ))
150+ # `--register` should only be passed to the latest `build_tarballs` invocation
151+ non_reg_ARGS = filter (arg -> arg != " --register" , non_platform_ARGS)
152+
153+ for (i, build) in enumerate (builds)
154+ if build. req_deps
155+ deps = [dependencies; CUDA. required_dependencies (build. platforms[1 ])]
156+ else
157+ deps = []
92158 end
93159
94- build_tarballs (ARGS , name, version, platform_sources, script, [platform] ,
95- products, [dependencies; cuda_deps];
96- lazy_artifacts = true , julia_compat = " 1.10 " ,
97- preferred_gcc_version = v " 10" ,
98- augment_platform_block = CUDA . augment )
160+ build_tarballs (i == lastindex (builds) ? non_platform_ARGS : non_reg_ARGS ,
161+ name, version, build . sources, build . script,
162+ build . platforms, products, deps;
163+ julia_compat = " 1. 10" , augment_platform_block = CUDA . augment ,
164+ preferred_gcc_version = v " 10 " )
99165end
0 commit comments