Skip to content

Commit f721fed

Browse files
committed
[build][cmake]: Bundle ACL dynlibs and torch libgomp for CPU extension builds
Signed-off-by: Radu Salavat <[email protected]>
1 parent 938772a commit f721fed

File tree

2 files changed

+130
-33
lines changed

2 files changed

+130
-33
lines changed

cmake/cpu_extension.cmake

Lines changed: 47 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -140,11 +140,11 @@ if (AVX512_FOUND AND NOT AVX512_DISABLED)
140140
set(ENABLE_AVX512VNNI OFF)
141141
message(WARNING "Disable AVX512-VNNI ISA support, no avx512_vnni found in local CPU flags." " If cross-compilation is required, please set env VLLM_CPU_AVX512VNNI=1.")
142142
endif()
143-
143+
144144
elseif (AVX2_FOUND)
145145
list(APPEND CXX_COMPILE_FLAGS "-mavx2")
146146
message(WARNING "vLLM CPU backend using AVX2 ISA")
147-
147+
148148
elseif (POWER9_FOUND OR POWER10_FOUND OR POWER11_FOUND)
149149
message(STATUS "PowerPC detected")
150150
if (POWER9_FOUND)
@@ -167,9 +167,9 @@ elseif (ASIMD_FOUND)
167167
add_compile_definitions(ARM_BF16_SUPPORT)
168168
else()
169169
message(WARNING "BF16 functionality is not available")
170-
set(MARCH_FLAGS "-march=armv8.2-a+dotprod+fp16")
170+
set(MARCH_FLAGS "-march=armv8.2-a+dotprod+fp16")
171171
endif()
172-
list(APPEND CXX_COMPILE_FLAGS ${MARCH_FLAGS})
172+
list(APPEND CXX_COMPILE_FLAGS ${MARCH_FLAGS})
173173
elseif (S390_FOUND)
174174
message(STATUS "S390 detected")
175175
# Check for S390 VXE support
@@ -191,9 +191,33 @@ endif()
191191

192192
# Build oneDNN for GEMM kernels (only for x86-AVX512 /ARM platforms)
193193
if ((AVX512_FOUND AND NOT AVX512_DISABLED) OR (ASIMD_FOUND AND NOT APPLE_SILICON_FOUND) OR POWER9_FOUND OR POWER10_FOUND OR POWER11_FOUND)
194+
# Set number of parallel build processes
195+
include(ProcessorCount)
196+
ProcessorCount(NPROC)
197+
if(NOT NPROC)
198+
set(NPROC 4)
199+
endif()
200+
201+
# locate PyTorch's libgomp (e.g. site-packages/torch.libs/libgomp-947d5fa1.so.1.0.0)
202+
# and create a local shim dir with it
203+
vllm_prepare_torch_gomp_shim(VLLM_TORCH_GOMP_SHIM_DIR)
204+
205+
find_library(OPEN_MP
206+
NAMES gomp
207+
PATHS ${VLLM_TORCH_GOMP_SHIM_DIR}
208+
NO_DEFAULT_PATH
209+
REQUIRED
210+
)
211+
# Set LD_LIBRARY_PATH to include the shim dir at build time to use the same libgomp as PyTorch
212+
if (OPEN_MP)
213+
set(ENV{LD_LIBRARY_PATH} "${VLLM_TORCH_GOMP_SHIM_DIR}:$ENV{LD_LIBRARY_PATH}")
214+
endif()
215+
194216
# Fetch and build Arm Compute Library (ACL) as oneDNN's backend for AArch64
195217
# TODO [fadara01]: remove this once ACL can be fetched and built automatically as a dependency of oneDNN
218+
set(ONEDNN_AARCH64_USE_ACL OFF CACHE BOOL "")
196219
if(ASIMD_FOUND)
220+
# Fetch and populate ACL
197221
if(DEFINED ENV{ACL_ROOT_DIR} AND IS_DIRECTORY "$ENV{ACL_ROOT_DIR}")
198222
message(STATUS "Using ACL from specified source directory: $ENV{ACL_ROOT_DIR}")
199223
else()
@@ -207,38 +231,37 @@ if ((AVX512_FOUND AND NOT AVX512_DISABLED) OR (ASIMD_FOUND AND NOT APPLE_SILICON
207231
GIT_PROGRESS TRUE
208232
)
209233
set(ENV{ACL_ROOT_DIR} "${arm_compute_SOURCE_DIR}")
234+
set(ACL_LIB_DIR "$ENV{ACL_ROOT_DIR}/build")
210235
endif()
211236

212237
# Build ACL with scons
213-
include(ProcessorCount)
214-
ProcessorCount(_NPROC)
238+
find_program(SCONS scons REQUIRED)
215239
set(_scons_cmd
216-
scons -j${_NPROC}
217-
Werror=0 debug=0 neon=1 examples=0 embed_kernels=0 os=linux
218-
arch=armv8.2-a build=native benchmark_examples=0 fixed_format_kernels=1
219-
multi_isa=1 openmp=1 cppthreads=0
240+
${SCONS} -j${NPROC}
241+
Werror=0 debug=0 neon=1 examples=0 embed_kernels=0 os=linux
242+
arch=armv8.2-a build=native benchmark_examples=0 fixed_format_kernels=1
243+
multi_isa=1 openmp=1 cppthreads=0
220244
)
221245

222-
# locate PyTorch's libgomp (e.g. site-packages/torch.libs/libgomp-947d5fa1.so.1.0.0)
223-
# and create a local shim dir with it
224-
include("${CMAKE_CURRENT_LIST_DIR}/utils.cmake")
225-
vllm_prepare_torch_gomp_shim(VLLM_TORCH_GOMP_SHIM_DIR)
226-
227-
if(NOT VLLM_TORCH_GOMP_SHIM_DIR STREQUAL "")
228-
list(APPEND _scons_cmd extra_link_flags=-L${VLLM_TORCH_GOMP_SHIM_DIR})
229-
endif()
230-
231246
execute_process(
232247
COMMAND ${_scons_cmd}
233248
WORKING_DIRECTORY "$ENV{ACL_ROOT_DIR}"
234249
RESULT_VARIABLE _acl_rc
235250
)
251+
236252
if(NOT _acl_rc EQUAL 0)
237253
message(FATAL_ERROR "ACL SCons build failed (exit ${_acl_rc}).")
238254
endif()
239255

240-
set(ONEDNN_AARCH64_USE_ACL "ON")
241-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,-rpath,$ENV{ACL_ROOT_DIR}/build/")
256+
# Add ACL libraries to be linked as dynamic libraries
257+
find_library(ACL_COMPUTE arm_compute PATHS "${ACL_LIB_DIR}" REQUIRED NO_DEFAULT_PATH)
258+
find_library(ACL_COMPUTE_GRAPH arm_compute_graph PATHS "${ACL_LIB_DIR}" REQUIRED NO_DEFAULT_PATH)
259+
260+
list(APPEND DYNLIBS "${ACL_COMPUTE}" "${ACL_COMPUTE_GRAPH}")
261+
list(APPEND DYNLIB_DIRS "${ACL_LIB_DIR}")
262+
263+
# VLLM/oneDNN settings for ACL
264+
set(ONEDNN_AARCH64_USE_ACL ON CACHE BOOL "" FORCE)
242265
add_compile_definitions(VLLM_USE_ACL)
243266
endif()
244267

@@ -349,9 +372,11 @@ define_gpu_extension_target(
349372
LANGUAGE CXX
350373
SOURCES ${VLLM_EXT_SRC}
351374
LIBRARIES ${LIBS}
375+
DYNLIBS ${DYNLIBS}
376+
DYNLIB_DIRS ${DYNLIB_DIRS}
352377
COMPILE_FLAGS ${CXX_COMPILE_FLAGS}
353378
USE_SABI 3
354379
WITH_SOABI
355380
)
356381

357-
message(STATUS "Enabling C extension.")
382+
message(STATUS "Enabling C extension.")

cmake/utils.cmake

Lines changed: 83 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -194,13 +194,13 @@ macro(clear_cuda_arches CUDA_ARCH_FLAGS)
194194
endmacro()
195195

196196
#
197-
# Extract unique CUDA architectures from a list of compute capabilities codes in
198-
# the form `<major><minor>[<letter>]`, convert them to the form sort
199-
# `<major>.<minor>`, dedupes them and then sorts them in ascending order and
197+
# Extract unique CUDA architectures from a list of compute capabilities codes in
198+
# the form `<major><minor>[<letter>]`, convert them to the form sort
199+
# `<major>.<minor>`, dedupes them and then sorts them in ascending order and
200200
# stores them in `OUT_ARCHES`.
201201
#
202202
# Example:
203-
# CUDA_ARCH_FLAGS="-gencode arch=compute_75,code=sm_75;...;-gencode arch=compute_90a,code=sm_90a"
203+
# CUDA_ARCH_FLAGS="-gencode arch=compute_75,code=sm_75;...;-gencode arch=compute_90a,code=sm_90a"
204204
# extract_unique_cuda_archs_ascending(OUT_ARCHES CUDA_ARCH_FLAGS)
205205
# OUT_ARCHES="7.5;...;9.0"
206206
function(extract_unique_cuda_archs_ascending OUT_ARCHES CUDA_ARCH_FLAGS)
@@ -221,15 +221,15 @@ function(extract_unique_cuda_archs_ascending OUT_ARCHES CUDA_ARCH_FLAGS)
221221
endfunction()
222222

223223
#
224-
# For a specific file set the `-gencode` flag in compile options conditionally
225-
# for the CUDA language.
224+
# For a specific file set the `-gencode` flag in compile options conditionally
225+
# for the CUDA language.
226226
#
227227
# Example:
228228
# set_gencode_flag_for_srcs(
229229
# SRCS "foo.cu"
230230
# ARCH "compute_75"
231231
# CODE "sm_75")
232-
# adds: "-gencode arch=compute_75,code=sm_75" to the compile options for
232+
# adds: "-gencode arch=compute_75,code=sm_75" to the compile options for
233233
# `foo.cu` (only for the CUDA language).
234234
#
235235
macro(set_gencode_flag_for_srcs)
@@ -249,14 +249,14 @@ macro(set_gencode_flag_for_srcs)
249249
endmacro(set_gencode_flag_for_srcs)
250250

251251
#
252-
# For a list of source files set the `-gencode` flags in the files specific
252+
# For a list of source files set the `-gencode` flags in the files specific
253253
# compile options (specifically for the CUDA language).
254254
#
255255
# arguments are:
256256
# SRCS: list of source files
257257
# CUDA_ARCHS: list of CUDA architectures in the form `<major>.<minor>[letter]`
258258
# BUILD_PTX_FOR_ARCH: if set to true, then the PTX code will be built
259-
# for architecture `BUILD_PTX_FOR_ARCH` if there is a CUDA_ARCH in CUDA_ARCHS
259+
# for architecture `BUILD_PTX_FOR_ARCH` if there is a CUDA_ARCH in CUDA_ARCHS
260260
# that is larger than BUILD_PTX_FOR_ARCH.
261261
#
262262
macro(set_gencode_flags_for_srcs)
@@ -410,7 +410,7 @@ endfunction()
410410
#
411411
# Override the GPU architectures detected by cmake/torch and filter them by
412412
# `GPU_SUPPORTED_ARCHES`. Sets the final set of architectures in
413-
# `GPU_ARCHES`. This only applies to the HIP language since for CUDA we set
413+
# `GPU_ARCHES`. This only applies to the HIP language since for CUDA we set
414414
# the architectures on a per file basis.
415415
#
416416
# Note: this is defined as a macro since it updates `CMAKE_CUDA_FLAGS`.
@@ -473,6 +473,8 @@ endmacro()
473473
# COMPILE_FLAGS <flags> - Extra compiler flags passed to NVCC/hip.
474474
# INCLUDE_DIRECTORIES <dirs> - Extra include directories.
475475
# LIBRARIES <libraries> - Extra link libraries.
476+
# DYNLIBS <dyn_libs> - Extra dynamic link libraries.
477+
# DYNLIB_DIRS <dyn_lib_dirs> - Extra dynamic link directories.
476478
# WITH_SOABI - Generate library with python SOABI suffix name.
477479
# USE_SABI <version> - Use python stable api <version>
478480
#
@@ -483,7 +485,7 @@ function (define_gpu_extension_target GPU_MOD_NAME)
483485
GPU
484486
"WITH_SOABI"
485487
"DESTINATION;LANGUAGE;USE_SABI"
486-
"SOURCES;ARCHITECTURES;COMPILE_FLAGS;INCLUDE_DIRECTORIES;LIBRARIES")
488+
"SOURCES;ARCHITECTURES;COMPILE_FLAGS;INCLUDE_DIRECTORIES;LIBRARIES;DYNLIBS;DYNLIB_DIRS")
487489

488490
# Add hipify preprocessing step when building with HIP/ROCm.
489491
if (GPU_LANGUAGE STREQUAL "HIP")
@@ -536,5 +538,75 @@ function (define_gpu_extension_target GPU_MOD_NAME)
536538
target_link_libraries(${GPU_MOD_NAME} PRIVATE ${TORCH_LIBRARIES})
537539
endif()
538540

541+
# Resolve, link and install any dynamic libraries
542+
set(_dyn_link_items)
543+
set(_dyn_install_paths)
544+
545+
foreach(_dyn_lib ${GPU_DYNLIBS})
546+
if (IS_ABSOLUTE "${_dyn_lib}")
547+
list(APPEND _dyn_link_items "${_dyn_lib}")
548+
list(APPEND _dyn_install_paths "${_dyn_lib}")
549+
else()
550+
unset(_found_lib)
551+
if (GPU_DYNLIB_DIRS)
552+
find_library(_found_lib NAMES "${_dyn_lib}" PATHS ${GPU_DYNLIB_DIRS} NO_DEFAULT_PATH)
553+
endif()
554+
if (NOT _found_lib)
555+
# CMake will search system library paths
556+
# options that are provided but may omit LIBRARY_PATH env variable paths
557+
# so we explicitly check those as well later on if needed.
558+
find_library(_found_lib NAMES "${_dyn_lib}")
559+
endif()
560+
561+
if (_found_lib)
562+
list(APPEND _dyn_link_items "${_found_lib}")
563+
list(APPEND _dyn_install_paths "${_found_lib}")
564+
else()
565+
message(WARNING "DYNLIBS: could not resolve '${_dyn_lib}'
566+
in DYNLIB_DIRS or system paths, we are
567+
linking by name only, will not install.")
568+
list(APPEND _dyn_link_items "${_dyn_lib}")
569+
endif()
570+
endif()
571+
endforeach()
572+
573+
list(REMOVE_DUPLICATES _dyn_link_items)
574+
list(REMOVE_DUPLICATES _dyn_install_paths)
575+
576+
if (_dyn_link_items)
577+
if (UNIX AND NOT APPLE)
578+
# Prevent linker from discarding unused dynamic libraries as
579+
# they may be added trough auditing with auditwheel or ar part
580+
# of the system's dynamic library search path.
581+
# (e.g.:/usr/lib)
582+
target_link_options(${GPU_MOD_NAME} PRIVATE "LINKER:--no-as-needed")
583+
endif()
584+
target_link_libraries(${GPU_MOD_NAME} PRIVATE ${_dyn_link_items})
585+
endif()
586+
587+
# Add rpath settings to find dynamic libraries at runtime and after install
588+
if(UNIX AND NOT APPLE)
589+
set_target_properties(${GPU_MOD_NAME} PROPERTIES
590+
BUILD_RPATH "\$ORIGIN;\$ORIGIN/.libs"
591+
INSTALL_RPATH "\$ORIGIN;\$ORIGIN/.libs;"
592+
INSTALL_RPATH_USE_LINK_PATH TRUE)
593+
elseif(APPLE)
594+
set_target_properties(${GPU_MOD_NAME} PROPERTIES
595+
MACOSX_RPATH ON
596+
BUILD_RPATH "@loader_path;@loader_path/.dylibs"
597+
INSTALL_RPATH "@loader_path;@loader_path/.dylibs")
598+
endif()
599+
539600
install(TARGETS ${GPU_MOD_NAME} LIBRARY DESTINATION ${GPU_DESTINATION} COMPONENT ${GPU_MOD_NAME})
601+
602+
foreach(_dyn_lib_path ${_dyn_install_paths})
603+
get_filename_component(_dir "${_dyn_lib_path}" DIRECTORY)
604+
get_filename_component(_base "${_dyn_lib_path}" NAME)
605+
file(GLOB _selected LIST_DIRECTORIES FALSE "${_dir}/${_base}*")
606+
list(REMOVE_DUPLICATES _selected)
607+
install(FILES "${_selected}"
608+
DESTINATION "${GPU_DESTINATION}/.libs"
609+
COMPONENT ${GPU_MOD_NAME})
610+
endforeach()
611+
540612
endfunction()

0 commit comments

Comments
 (0)