Add Python bindings for CUDA cluster finder
Build on RHEL8 / build (push) Successful in 2m50s
Build on RHEL9 / build (push) Successful in 2m57s
Run tests using data on local RHEL8 / build (push) Successful in 3m38s

- Add bind_ClusterFinderCUDA.hpp with pybind11 bindings for
  ClusterFinderCUDA
- Build CUDA bindings as separate _aare_cuda.so to avoid
  segfaults from mixing nvcc and gcc compiled code in the
  same shared object
- Re-export CUDA classes onto _aare in __init__.py so user
  code uses `from aare import ClusterFinderCUDA` regardless
  of which .so hosts the class
- Factory in ClusterFinder.py selects backend; RuntimeError
  if GPU requested on CPU-only build
- Update python/CMakeLists.txt: _aare_cuda module gated
  behind AARE_CUDA and AARE_PYTHON_BINDINGS
- Add validation notebook: ~20x speedup vs sequential ClusterFinder
This commit is contained in:
kferjaoui
2026-04-23 11:43:40 +02:00
parent 3ed773e520
commit e894bdac9b
7 changed files with 766 additions and 29 deletions
+50 -15
View File
@@ -15,20 +15,50 @@ else()
find_package(pybind11 2.13 REQUIRED)
endif()
# Add the compiled python extension
pybind11_add_module(
_aare # name of the module
src/module.cpp # source file
)
set_target_properties(_aare PROPERTIES
LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}
)
# ---- Main CPU module --------------------------------------------------------
# module.cpp is the only source for the main module. When AARE_CUDA=ON, the
# CUDA bindings live in a *separate* Python extension (_aare_cuda.so) loaded
# independently at runtime. This isolates the nvcc-compiled translation unit
# into its own ELF image so pybind11's type registry cannot be corrupted by
# weak-symbol collisions between gcc-emitted and nvcc-emitted template
# instantiations.
pybind11_add_module(_aare NO_EXTRAS src/module.cpp)
target_link_libraries(_aare PRIVATE aare_core aare_compiler_flags)
target_include_directories(_aare SYSTEM PRIVATE
$<TARGET_PROPERTY:Minuit2::Minuit2,INTERFACE_INCLUDE_DIRECTORIES>
)
set_target_properties(_aare PROPERTIES
LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/aare
INTERPROCEDURAL_OPTIMIZATION FALSE
)
# ---- CUDA module (separate .so) --------------------------------------------
if(AARE_CUDA)
pybind11_add_module(_aare_cuda NO_EXTRAS src/cuda_bindings.cu)
target_link_libraries(_aare_cuda PRIVATE aare_cuda aare_compiler_flags)
target_include_directories(_aare_cuda SYSTEM PRIVATE
$<TARGET_PROPERTY:Minuit2::Minuit2,INTERFACE_INCLUDE_DIRECTORIES>
)
set_target_properties(_aare_cuda PROPERTIES
LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/aare
INTERPROCEDURAL_OPTIMIZATION FALSE
CUDA_RESOLVE_DEVICE_SYMBOLS ON
CUDA_SEPARABLE_COMPILATION ON
)
target_compile_options(_aare_cuda PRIVATE
$<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>
$<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>
$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-fvisibility=hidden>
$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-fPIC>
)
endif()
# List of python files to be copied to the build directory
set( PYTHON_FILES
@@ -51,9 +81,9 @@ foreach(FILE ${PYTHON_FILES})
configure_file(${FILE} ${CMAKE_BINARY_DIR}/${FILE} )
endforeach(FILE ${PYTHON_FILES})
set_target_properties(_aare PROPERTIES
LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/aare
)
# set_target_properties(_aare PROPERTIES
# LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/aare
# )
set(PYTHON_EXAMPLES
examples/play.py
@@ -69,8 +99,13 @@ endforeach(FILE ${PYTHON_EXAMPLES})
if(AARE_INSTALL_PYTHONEXT)
set(AARE_PY_INSTALL_TARGETS _aare)
if(AARE_CUDA)
list(APPEND AARE_PY_INSTALL_TARGETS _aare_cuda)
endif()
install(
TARGETS _aare
TARGETS ${AARE_PY_INSTALL_TARGETS}
EXPORT "${TARGETS_EXPORT_NAME}"
LIBRARY DESTINATION aare
COMPONENT python
@@ -80,5 +115,5 @@ if(AARE_INSTALL_PYTHONEXT)
FILES ${PYTHON_FILES}
DESTINATION aare
COMPONENT python
)
)
endif()