mirror of
https://github.com/slsdetectorgroup/aare.git
synced 2026-06-05 09:58:40 +02:00
Add Python bindings for CUDA cluster finder
- Add bind_ClusterFinderCUDA.hpp with pybind11 bindings for ClusterFinderCUDA - Build CUDA bindings as separate _aare_cuda.so to avoid segfaults from mixing nvcc and gcc compiled code in the same shared object - Re-export CUDA classes onto _aare in __init__.py so user code uses `from aare import ClusterFinderCUDA` regardless of which .so hosts the class - Factory in ClusterFinder.py selects backend; RuntimeError if GPU requested on CPU-only build - Update python/CMakeLists.txt: _aare_cuda module gated behind AARE_CUDA and AARE_PYTHON_BINDINGS - Add validation notebook: ~20x speedup vs sequential ClusterFinder
This commit is contained in:
@@ -49,6 +49,44 @@ def ClusterFinderMT(image_size, cluster_size = (3,3), dtype=np.int32, n_sigma=5,
|
||||
return cls(image_size, n_sigma=n_sigma, capacity=capacity, n_threads=n_threads)
|
||||
|
||||
|
||||
def _cuda_available():
|
||||
"""True if this build of aare was compiled with -DAARE_CUDA=ON."""
|
||||
return hasattr(_aare, "ClusterFinderCUDA_Cluster3x3i")
|
||||
|
||||
|
||||
def ClusterFinderCUDA(image_size, cluster_size=(3,3), n_sigma=5, dtype=np.int32,
|
||||
capacity=1024, n_streams=1):
|
||||
"""
|
||||
Factory function to create a ClusterFinderCUDA object. Provides a cleaner
|
||||
syntax for the templated ClusterFinderCUDA in C++. API mirrors
|
||||
ClusterFinder() plus CUDA-specific knobs (n_streams).
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from aare import ClusterFinderCUDA
|
||||
|
||||
cf = ClusterFinderCUDA(image_size=(512, 1024),
|
||||
cluster_size=(3, 3),
|
||||
n_sigma=5,
|
||||
n_streams=4)
|
||||
for frame in pedestal_frames:
|
||||
cf.push_pedestal_frame(frame)
|
||||
for i, frame in enumerate(data_frames):
|
||||
cf.find_clusters(frame, frame_number=i)
|
||||
clusters = cf.steal_clusters()
|
||||
"""
|
||||
if not _cuda_available():
|
||||
raise RuntimeError(
|
||||
"ClusterFinderCUDA is not available in this build of aare. "
|
||||
"Rebuild with -DAARE_CUDA=ON (and -DAARE_PYTHON_BINDINGS=ON)."
|
||||
)
|
||||
|
||||
cls = _get_class("ClusterFinderCUDA", cluster_size, dtype)
|
||||
return cls(image_size,
|
||||
n_sigma=n_sigma,
|
||||
capacity=capacity,
|
||||
n_streams=n_streams)
|
||||
|
||||
def ClusterCollector(clusterfindermt, dtype=np.int32):
|
||||
"""
|
||||
Factory function to create a ClusterCollector object. Provides a cleaner syntax for
|
||||
|
||||
@@ -2,6 +2,23 @@
|
||||
# Make the compiled classes that live in _aare available from aare.
|
||||
from . import _aare
|
||||
|
||||
# ---- CUDA module (optional) ------------------------------------------------
|
||||
# When the package was built with AARE_CUDA=ON, a sibling extension
|
||||
# _aare_cuda contains the ClusterFinderCUDA_* classes. We re-export them
|
||||
# onto _aare so user code can do `from aare import ClusterFinderCUDA_*`
|
||||
# regardless of which .so physically hosts the class. On a CPU-only build
|
||||
# the import fails silently and ClusterFinderCUDA_* classes simply aren't
|
||||
# present; the factory in ClusterFinder.py handles that case with a clear
|
||||
# RuntimeError.
|
||||
try:
|
||||
from . import _aare_cuda as _aare_cuda_mod
|
||||
for _name in dir(_aare_cuda_mod):
|
||||
if _name.startswith("ClusterFinderCUDA"):
|
||||
setattr(_aare, _name, getattr(_aare_cuda_mod, _name))
|
||||
del _name
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
from . import transform
|
||||
|
||||
from ._aare import File, RawMasterFile, RawSubFile, JungfrauDataFile
|
||||
@@ -14,6 +31,7 @@ from ._aare import corner
|
||||
# from ._aare import ClusterFinderMT, ClusterCollector, ClusterFileSink, ClusterVector_i
|
||||
|
||||
from .ClusterFinder import ClusterFinder, ClusterCollector, ClusterFinderMT, ClusterFileSink, ClusterFile
|
||||
from .ClusterFinder import ClusterFinderCUDA, _cuda_available
|
||||
from .ClusterVector import ClusterVector
|
||||
from .Cluster import Cluster
|
||||
|
||||
|
||||
Reference in New Issue
Block a user