Skip to content

Commit 47bd0da

Browse files
avik-palgiordano
andauthored
[NCCL]: trigger build for cuda 13 (#12061)
* [NCCL]: trigger build for cuda 13 * fix: try using stuff from CUDA * fix: dep * fix: redist * Apply suggestions from code review * fix: try source build * fix: flags * fix: use redist on some platforms * fix: missing aarch64 deps * Apply suggestions from code review Co-authored-by: Mosè Giordano <[email protected]> * Apply suggestions from code review * Update N/NCCL/build_tarballs.jl * fix: build on 13 * Update N/NCCL/build_tarballs.jl Co-authored-by: Mosè Giordano <[email protected]> --------- Co-authored-by: Mosè Giordano <[email protected]>
1 parent 9aa34c7 commit 47bd0da

File tree

2 files changed

+104
-24
lines changed

2 files changed

+104
-24
lines changed

N/NCCL/build_tarballs.jl

Lines changed: 90 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -4,21 +4,23 @@ using BinaryBuilder, Pkg
44
using Base.BinaryPlatforms: arch, os, tags
55

66
const YGGDRASIL_DIR = "../.."
7+
include(joinpath(YGGDRASIL_DIR, "C/CUDA/common.jl"))
78
include(joinpath(YGGDRASIL_DIR, "fancy_toys.jl"))
89
include(joinpath(YGGDRASIL_DIR, "platforms", "cuda.jl"))
910

1011
name = "NCCL"
11-
version = v"2.26.5"
12+
version = v"2.28.3"
1213

13-
MIN_CUDA_VERSION = v"11.8" # doesnt quite match NCCL actual support
14-
15-
sources = [
16-
GitSource("https://github.com/NVIDIA/nccl.git", "3000e3c797b4b236221188c07aa09c1f3a0170d4"),
14+
git_sources = [
15+
GitSource("https://github.com/NVIDIA/nccl.git", "f1308997d0420148b1be1c24d63f19d902ae589b"),
16+
DirectorySource("./bundled/")
1717
]
1818

19-
20-
script = raw"""
19+
build_script = raw"""
2120
cd $WORKSPACE/srcdir
21+
for f in ${WORKSPACE}/srcdir/patches/*.patch; do
22+
atomic_patch -p1 ${f}
23+
done
2224
2325
export TMPDIR=${WORKSPACE}/tmpdir # we need a lot of tmp space
2426
mkdir -p ${TMPDIR}
@@ -28,19 +30,20 @@ if [[ "${target}" == aarch64-linux-* ]]; then
2830
2931
# Add /usr/lib/csl-musl-x86_64 to LD_LIBRARY_PATH to be able to use host nvcc
3032
export LD_LIBRARY_PATH="/usr/lib/csl-musl-x86_64:/usr/lib/csl-glibc-x86_64:${LD_LIBRARY_PATH}"
31-
33+
3234
# Make sure we use host CUDA executable by copying from the x86_64 CUDA redist
3335
NVCC_DIR=(/workspace/srcdir/cuda_nvcc-*-archive)
3436
rm -rf ${prefix}/cuda/bin
3537
cp -r ${NVCC_DIR}/bin ${prefix}/cuda/bin
36-
38+
3739
rm -rf ${prefix}/cuda/nvvm/bin
3840
cp -r ${NVCC_DIR}/nvvm/bin ${prefix}/cuda/nvvm/bin
3941
4042
export NVCC_PREPEND_FLAGS="-ccbin='${CXX}'"
4143
fi
4244
43-
export CXXFLAGS='-D__STDC_FORMAT_MACROS'
45+
export CXXFLAGS='-D__STDC_FORMAT_MACROS -D_GNU_SOURCE -Wno-unused-parameter -Wno-type-limits -Wno-error -Wno-missing-field-initializers -Wno-implicit-fallthrough'
46+
export NVCCFLAGS="$NVCCFLAGS -Wno-unused-parameter"
4447
export CUDARTLIB=cudart # link against dynamic library
4548
4649
export CUDA_HOME=${prefix}/cuda;
@@ -63,10 +66,21 @@ if [[ "${target}" == aarch64-linux-* ]]; then
6366
fi
6467
"""
6568

69+
redist_script = raw"""
6670
67-
platforms = CUDA.supported_platforms(min_version = MIN_CUDA_VERSION)
68-
filter!(p -> arch(p) == "x86_64" || arch(p) == "aarch64", platforms)
71+
cd ${WORKSPACE}/srcdir/nccl*
6972
73+
install_license LICENSE.txt
74+
75+
for file in lib/libnccl*.${dlext}*; do
76+
install -Dvm 755 "${file}" -t "${libdir}"
77+
done
78+
79+
find include -type f -print0 | while IFS= read -r -d '' file; do
80+
relpath="${file#include/}"
81+
install -Dvm644 "$file" "${includedir}/${relpath}"
82+
done
83+
"""
7084

7185
products = [
7286
LibraryProduct("libnccl", :libnccl),
@@ -77,23 +91,75 @@ dependencies = [
7791
Dependency(PackageSpec(name="CompilerSupportLibraries_jll", uuid="e66e0078-7015-5450-92f7-15fbd957f2ae")),
7892
]
7993

80-
# Build for all supported CUDA toolkits
81-
for platform in platforms
94+
builds = []
95+
96+
# redist for sources that are available
97+
for cuda_version in [v"13.0"]
98+
platforms = [
99+
Platform("x86_64", "linux"),
100+
Platform("aarch64", "linux")
101+
]
102+
for platform in platforms
103+
augmented_platform = deepcopy(platform)
104+
augmented_platform["cuda"] = CUDA.platform(cuda_version)
105+
should_build_platform(triplet(augmented_platform)) || continue
106+
107+
if cuda_version == v"12.9"
108+
if arch(platform) == "aarch64"
109+
hash = "c51b970bb26a0d3afd676048923fc404ed1d1131441558a7d346940e93d6ab54"
110+
elseif arch(platform) == "x86_64"
111+
hash = "98f7abd2f505ba49f032052f3f36b14e28798a6e16ca783fe293e351e9376546"
112+
end
113+
else
114+
if arch(platform) == "aarch64"
115+
hash = "2b5961c4c4bcbc16148d8431c7b65525d00f386105ab1b9fa82051b7c05f6fd0"
116+
elseif arch(platform) == "x86_64"
117+
hash = "3117db0efe13e1336dbe32e8b98eab943ad5baa69518189918d4aca9e3ce3270"
118+
end
119+
end
120+
121+
sources = [
122+
ArchiveSource("https://developer.download.nvidia.com/compute/redist/nccl/v$(version)/nccl_$(version)-1+cuda$(cuda_version.major).$(cuda_version.minor)_$(arch(platform)).txz", hash)
123+
]
124+
125+
push!(
126+
builds,
127+
(; platforms=[augmented_platform], sources, script=redist_script, req_deps=false)
128+
)
129+
end
130+
end
131+
132+
for platform in CUDA.supported_platforms(; min_version=v"12", max_version=v"12.9.999")
82133
should_build_platform(triplet(platform)) || continue
83134

84-
cuda_deps = CUDA.required_dependencies(platform)
135+
platform_sources = BinaryBuilder.AbstractSource[git_sources...]
136+
if arch(platform) == "aarch64"
137+
push!(platform_sources, CUDA.cuda_nvcc_redist_source(platform["cuda"], "x86_64"))
138+
end
85139

86-
cuda_ver = platform["cuda"]
140+
push!(
141+
builds,
142+
(; platforms=[platform], sources=platform_sources, script=build_script, req_deps=true)
143+
)
144+
end
87145

88-
platform_sources = BinaryBuilder.AbstractSource[sources...]
146+
# don't allow `build_tarballs` to override platform selection based on ARGS.
147+
# we handle that ourselves by calling `should_build_platform`
148+
non_platform_ARGS = filter(arg -> startswith(arg, "--"), ARGS)
89149

90-
if arch(platform) == "aarch64"
91-
push!(platform_sources, CUDA.cuda_nvcc_redist_source(cuda_ver, "x86_64"))
150+
# `--register` should only be passed to the latest `build_tarballs` invocation
151+
non_reg_ARGS = filter(arg -> arg != "--register", non_platform_ARGS)
152+
153+
for (i, build) in enumerate(builds)
154+
if build.req_deps
155+
deps = [dependencies; CUDA.required_dependencies(build.platforms[1])]
156+
else
157+
deps = []
92158
end
93159

94-
build_tarballs(ARGS, name, version, platform_sources, script, [platform],
95-
products, [dependencies; cuda_deps];
96-
lazy_artifacts=true, julia_compat="1.10",
97-
preferred_gcc_version = v"10",
98-
augment_platform_block = CUDA.augment)
160+
build_tarballs(i == lastindex(builds) ? non_platform_ARGS : non_reg_ARGS,
161+
name, version, build.sources, build.script,
162+
build.platforms, products, deps;
163+
julia_compat="1.10", augment_platform_block=CUDA.augment,
164+
preferred_gcc_version=v"10")
99165
end

N/NCCL/bundled/patches/init.patch

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
--- a/nccl/src/init.cc
2+
+++ b/nccl/src/init.cc
3+
@@ -106,7 +106,11 @@
4+
// Change the default pthread stack size (via a nonportable API, which will become necessary if we switch
5+
// to C++ threads).
6+
PTHREADCHECK(pthread_attr_setstacksize(&attr, SAFE_STACK_SIZE), "pthread_attr_setstacksize");
7+
+#if defined(__GLIBC__) && __GLIBC__ >= 2 && __GLIBC_MINOR__ >= 34
8+
PTHREADCHECK(pthread_setattr_default_np(&attr), "pthread_setattr_default_np");
9+
+#else
10+
+ // pthread_setattr_default_np not available on this system
11+
+#endif
12+
}
13+
14+
PTHREADCHECK(pthread_attr_destroy(&attr), "pthread_attr_destroy");

0 commit comments

Comments
 (0)