mirror of
https://github.com/slsdetectorgroup/aare.git
synced 2026-06-04 14:58:46 +02:00
Add Python bindings for CUDA cluster finder
- Add bind_ClusterFinderCUDA.hpp with pybind11 bindings for ClusterFinderCUDA - Build CUDA bindings as separate _aare_cuda.so to avoid segfaults from mixing nvcc and gcc compiled code in the same shared object - Re-export CUDA classes onto _aare in __init__.py so user code uses `from aare import ClusterFinderCUDA` regardless of which .so hosts the class - Factory in ClusterFinder.py selects backend; RuntimeError if GPU requested on CPU-only build - Update python/CMakeLists.txt: _aare_cuda module gated behind AARE_CUDA and AARE_PYTHON_BINDINGS - Add validation notebook: ~20x speedup vs sequential ClusterFinder
This commit is contained in:
+70
-14
@@ -56,6 +56,10 @@ option(AARE_CUSTOM_ASSERT "Use custom assert" OFF)
|
||||
option(AARE_INSTALL_PYTHONEXT "Install the python extension in the install tree under CMAKE_INSTALL_PREFIX/aare/" OFF)
|
||||
option(AARE_ASAN "Enable AddressSanitizer" OFF)
|
||||
|
||||
option(AARE_CUDA "Build CUDA cluster finder backend" OFF)
|
||||
set(AARE_CUDA_ARCHITECTURES "native" CACHE STRING
|
||||
"CUDA architectures to compile for (used when AARE_CUDA=ON)")
|
||||
|
||||
# Configure which of the dependencies to use FetchContent for
|
||||
option(AARE_FETCH_FMT "Use FetchContent to download fmt" ON)
|
||||
option(AARE_FETCH_PYBIND11 "Use FetchContent to download pybind11" ON)
|
||||
@@ -79,6 +83,25 @@ if(AARE_SYSTEM_LIBRARIES)
|
||||
# since these are not available on conda-forge
|
||||
endif()
|
||||
|
||||
if(AARE_CUDA)
|
||||
enable_language(CUDA)
|
||||
find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_CUDA_STANDARD 17)
|
||||
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_CUDA_EXTENSIONS OFF)
|
||||
set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
|
||||
|
||||
if(NOT CMAKE_CUDA_ARCHITECTURES)
|
||||
set(CMAKE_CUDA_ARCHITECTURES ${AARE_CUDA_ARCHITECTURES})
|
||||
endif()
|
||||
|
||||
message(STATUS "AARE: CUDA support ENABLED (archs=${CMAKE_CUDA_ARCHITECTURES}, "
|
||||
"toolkit=${CUDAToolkit_VERSION})")
|
||||
else()
|
||||
message(STATUS "AARE: CUDA support DISABLED (CPU-only build)")
|
||||
endif()
|
||||
|
||||
if(AARE_BENCHMARKS)
|
||||
add_subdirectory(benchmarks)
|
||||
endif()
|
||||
@@ -307,22 +330,24 @@ else()
|
||||
endif()
|
||||
|
||||
# Common flags for GCC and Clang
|
||||
# The strict host warnings (especially -Wold-style-cast and -Wdouble-promotion)
|
||||
# trip on NVIDIA's CUDA runtime headers when they get preprocessed by nvcc's
|
||||
# host pass, so we gate them behind $<COMPILE_LANGUAGE:CXX>.
|
||||
target_compile_options(
|
||||
aare_compiler_flags
|
||||
INTERFACE
|
||||
-Wall
|
||||
-Wextra
|
||||
-pedantic
|
||||
-Wshadow
|
||||
-Wold-style-cast
|
||||
-Wnon-virtual-dtor
|
||||
-Woverloaded-virtual
|
||||
-Wdouble-promotion
|
||||
-Wformat=2
|
||||
-Wredundant-decls
|
||||
-Wvla
|
||||
-Wdouble-promotion
|
||||
-Werror=return-type #important can cause segfault in optimzed builds
|
||||
$<$<COMPILE_LANGUAGE:CXX>:-Wall>
|
||||
$<$<COMPILE_LANGUAGE:CXX>:-Wextra>
|
||||
$<$<COMPILE_LANGUAGE:CXX>:-pedantic>
|
||||
$<$<COMPILE_LANGUAGE:CXX>:-Wshadow>
|
||||
$<$<COMPILE_LANGUAGE:CXX>:-Wold-style-cast>
|
||||
$<$<COMPILE_LANGUAGE:CXX>:-Wnon-virtual-dtor>
|
||||
$<$<COMPILE_LANGUAGE:CXX>:-Woverloaded-virtual>
|
||||
$<$<COMPILE_LANGUAGE:CXX>:-Wdouble-promotion>
|
||||
$<$<COMPILE_LANGUAGE:CXX>:-Wformat=2>
|
||||
$<$<COMPILE_LANGUAGE:CXX>:-Wredundant-decls>
|
||||
$<$<COMPILE_LANGUAGE:CXX>:-Wvla>
|
||||
$<$<COMPILE_LANGUAGE:CXX>:-Werror=return-type> #important can cause segfault in optimzed builds
|
||||
)
|
||||
|
||||
endif() #GCC/Clang specific
|
||||
@@ -392,6 +417,14 @@ set(PUBLICHEADERS
|
||||
include/aare/VarClusterFinder.hpp
|
||||
include/aare/utils/task.hpp
|
||||
)
|
||||
|
||||
if(AARE_CUDA)
|
||||
list(APPEND PUBLICHEADERS
|
||||
include/aare/ClusterFinderCUDA.hpp
|
||||
include/aare/clusterfinder_kernel.cuh
|
||||
include/aare/utils/cuda_check.cuh
|
||||
)
|
||||
endif()
|
||||
|
||||
|
||||
set(SourceFiles
|
||||
@@ -466,6 +499,22 @@ set_target_properties(aare_core PROPERTIES
|
||||
PUBLIC_HEADER "${PUBLICHEADERS}"
|
||||
)
|
||||
|
||||
if(AARE_CUDA)
|
||||
add_library(aare_cuda INTERFACE)
|
||||
target_link_libraries(aare_cuda INTERFACE
|
||||
aare_core
|
||||
CUDA::cudart
|
||||
)
|
||||
target_compile_features(aare_cuda INTERFACE cuda_std_17)
|
||||
target_compile_definitions(aare_cuda INTERFACE
|
||||
AARE_HAS_CUDA
|
||||
_GLIBCXX_USE_CXX11_ABI=1
|
||||
)
|
||||
# Usage example downstream:
|
||||
# set_source_files_properties(bind_ClusterFinderCUDA.cpp PROPERTIES LANGUAGE CUDA)
|
||||
# target_link_libraries(my_pymodule PRIVATE aare_cuda)
|
||||
endif()
|
||||
|
||||
if(AARE_TESTS)
|
||||
set(TestSources
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/algorithm.test.cpp
|
||||
@@ -500,7 +549,11 @@ endif()
|
||||
|
||||
|
||||
if(AARE_MASTER_PROJECT)
|
||||
install(TARGETS aare_core aare_compiler_flags
|
||||
set(AARE_INSTALL_TARGETS aare_core aare_compiler_flags)
|
||||
if(AARE_CUDA)
|
||||
list(APPEND AARE_INSTALL_TARGETS aare_cuda)
|
||||
endif()
|
||||
install(TARGETS ${AARE_INSTALL_TARGETS}
|
||||
EXPORT "${TARGETS_EXPORT_NAME}"
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
@@ -563,5 +616,8 @@ add_custom_target(
|
||||
if(AARE_MASTER_PROJECT)
|
||||
set(CMAKE_INSTALL_DIR "share/cmake/${PROJECT_NAME}")
|
||||
set(PROJECT_LIBRARIES aare-core aare-compiler-flags )
|
||||
if(AARE_CUDA)
|
||||
list(APPEND PROJECT_LIBRARIES aare-cuda)
|
||||
endif()
|
||||
include(cmake/package_config.cmake)
|
||||
endif()
|
||||
|
||||
+50
-15
@@ -15,20 +15,50 @@ else()
|
||||
find_package(pybind11 2.13 REQUIRED)
|
||||
endif()
|
||||
|
||||
# Add the compiled python extension
|
||||
pybind11_add_module(
|
||||
_aare # name of the module
|
||||
src/module.cpp # source file
|
||||
)
|
||||
|
||||
set_target_properties(_aare PROPERTIES
|
||||
LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}
|
||||
)
|
||||
# ---- Main CPU module --------------------------------------------------------
|
||||
# module.cpp is the only source for the main module. When AARE_CUDA=ON, the
|
||||
# CUDA bindings live in a *separate* Python extension (_aare_cuda.so) loaded
|
||||
# independently at runtime. This isolates the nvcc-compiled translation unit
|
||||
# into its own ELF image so pybind11's type registry cannot be corrupted by
|
||||
# weak-symbol collisions between gcc-emitted and nvcc-emitted template
|
||||
# instantiations.
|
||||
pybind11_add_module(_aare NO_EXTRAS src/module.cpp)
|
||||
|
||||
target_link_libraries(_aare PRIVATE aare_core aare_compiler_flags)
|
||||
|
||||
|
||||
target_include_directories(_aare SYSTEM PRIVATE
|
||||
$<TARGET_PROPERTY:Minuit2::Minuit2,INTERFACE_INCLUDE_DIRECTORIES>
|
||||
)
|
||||
|
||||
set_target_properties(_aare PROPERTIES
|
||||
LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/aare
|
||||
INTERPROCEDURAL_OPTIMIZATION FALSE
|
||||
)
|
||||
|
||||
# ---- CUDA module (separate .so) --------------------------------------------
|
||||
if(AARE_CUDA)
|
||||
pybind11_add_module(_aare_cuda NO_EXTRAS src/cuda_bindings.cu)
|
||||
|
||||
target_link_libraries(_aare_cuda PRIVATE aare_cuda aare_compiler_flags)
|
||||
|
||||
target_include_directories(_aare_cuda SYSTEM PRIVATE
|
||||
$<TARGET_PROPERTY:Minuit2::Minuit2,INTERFACE_INCLUDE_DIRECTORIES>
|
||||
)
|
||||
|
||||
set_target_properties(_aare_cuda PROPERTIES
|
||||
LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/aare
|
||||
INTERPROCEDURAL_OPTIMIZATION FALSE
|
||||
CUDA_RESOLVE_DEVICE_SYMBOLS ON
|
||||
CUDA_SEPARABLE_COMPILATION ON
|
||||
)
|
||||
|
||||
target_compile_options(_aare_cuda PRIVATE
|
||||
$<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>
|
||||
$<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>
|
||||
$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-fvisibility=hidden>
|
||||
$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-fPIC>
|
||||
)
|
||||
endif()
|
||||
|
||||
# List of python files to be copied to the build directory
|
||||
set( PYTHON_FILES
|
||||
@@ -51,9 +81,9 @@ foreach(FILE ${PYTHON_FILES})
|
||||
configure_file(${FILE} ${CMAKE_BINARY_DIR}/${FILE} )
|
||||
endforeach(FILE ${PYTHON_FILES})
|
||||
|
||||
set_target_properties(_aare PROPERTIES
|
||||
LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/aare
|
||||
)
|
||||
# set_target_properties(_aare PROPERTIES
|
||||
# LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/aare
|
||||
# )
|
||||
|
||||
set(PYTHON_EXAMPLES
|
||||
examples/play.py
|
||||
@@ -69,8 +99,13 @@ endforeach(FILE ${PYTHON_EXAMPLES})
|
||||
|
||||
|
||||
if(AARE_INSTALL_PYTHONEXT)
|
||||
set(AARE_PY_INSTALL_TARGETS _aare)
|
||||
if(AARE_CUDA)
|
||||
list(APPEND AARE_PY_INSTALL_TARGETS _aare_cuda)
|
||||
endif()
|
||||
|
||||
install(
|
||||
TARGETS _aare
|
||||
TARGETS ${AARE_PY_INSTALL_TARGETS}
|
||||
EXPORT "${TARGETS_EXPORT_NAME}"
|
||||
LIBRARY DESTINATION aare
|
||||
COMPONENT python
|
||||
@@ -80,5 +115,5 @@ if(AARE_INSTALL_PYTHONEXT)
|
||||
FILES ${PYTHON_FILES}
|
||||
DESTINATION aare
|
||||
COMPONENT python
|
||||
)
|
||||
)
|
||||
endif()
|
||||
@@ -49,6 +49,44 @@ def ClusterFinderMT(image_size, cluster_size = (3,3), dtype=np.int32, n_sigma=5,
|
||||
return cls(image_size, n_sigma=n_sigma, capacity=capacity, n_threads=n_threads)
|
||||
|
||||
|
||||
def _cuda_available():
|
||||
"""True if this build of aare was compiled with -DAARE_CUDA=ON."""
|
||||
return hasattr(_aare, "ClusterFinderCUDA_Cluster3x3i")
|
||||
|
||||
|
||||
def ClusterFinderCUDA(image_size, cluster_size=(3,3), n_sigma=5, dtype=np.int32,
|
||||
capacity=1024, n_streams=1):
|
||||
"""
|
||||
Factory function to create a ClusterFinderCUDA object. Provides a cleaner
|
||||
syntax for the templated ClusterFinderCUDA in C++. API mirrors
|
||||
ClusterFinder() plus CUDA-specific knobs (n_streams).
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from aare import ClusterFinderCUDA
|
||||
|
||||
cf = ClusterFinderCUDA(image_size=(512, 1024),
|
||||
cluster_size=(3, 3),
|
||||
n_sigma=5,
|
||||
n_streams=4)
|
||||
for frame in pedestal_frames:
|
||||
cf.push_pedestal_frame(frame)
|
||||
for i, frame in enumerate(data_frames):
|
||||
cf.find_clusters(frame, frame_number=i)
|
||||
clusters = cf.steal_clusters()
|
||||
"""
|
||||
if not _cuda_available():
|
||||
raise RuntimeError(
|
||||
"ClusterFinderCUDA is not available in this build of aare. "
|
||||
"Rebuild with -DAARE_CUDA=ON (and -DAARE_PYTHON_BINDINGS=ON)."
|
||||
)
|
||||
|
||||
cls = _get_class("ClusterFinderCUDA", cluster_size, dtype)
|
||||
return cls(image_size,
|
||||
n_sigma=n_sigma,
|
||||
capacity=capacity,
|
||||
n_streams=n_streams)
|
||||
|
||||
def ClusterCollector(clusterfindermt, dtype=np.int32):
|
||||
"""
|
||||
Factory function to create a ClusterCollector object. Provides a cleaner syntax for
|
||||
|
||||
@@ -2,6 +2,23 @@
|
||||
# Make the compiled classes that live in _aare available from aare.
|
||||
from . import _aare
|
||||
|
||||
# ---- CUDA module (optional) ------------------------------------------------
|
||||
# When the package was built with AARE_CUDA=ON, a sibling extension
|
||||
# _aare_cuda contains the ClusterFinderCUDA_* classes. We re-export them
|
||||
# onto _aare so user code can do `from aare import ClusterFinderCUDA_*`
|
||||
# regardless of which .so physically hosts the class. On a CPU-only build
|
||||
# the import fails silently and ClusterFinderCUDA_* classes simply aren't
|
||||
# present; the factory in ClusterFinder.py handles that case with a clear
|
||||
# RuntimeError.
|
||||
try:
|
||||
from . import _aare_cuda as _aare_cuda_mod
|
||||
for _name in dir(_aare_cuda_mod):
|
||||
if _name.startswith("ClusterFinderCUDA"):
|
||||
setattr(_aare, _name, getattr(_aare_cuda_mod, _name))
|
||||
del _name
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
from . import transform
|
||||
|
||||
from ._aare import File, RawMasterFile, RawSubFile, JungfrauDataFile
|
||||
@@ -14,6 +31,7 @@ from ._aare import corner
|
||||
# from ._aare import ClusterFinderMT, ClusterCollector, ClusterFileSink, ClusterVector_i
|
||||
|
||||
from .ClusterFinder import ClusterFinder, ClusterCollector, ClusterFinderMT, ClusterFileSink, ClusterFile
|
||||
from .ClusterFinder import ClusterFinderCUDA, _cuda_available
|
||||
from .ClusterVector import ClusterVector
|
||||
from .Cluster import Cluster
|
||||
|
||||
|
||||
@@ -0,0 +1,101 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
#pragma once
|
||||
#include "aare/ClusterFinderCUDA.hpp"
|
||||
#include "aare/ClusterVector.hpp"
|
||||
#include "aare/NDView.hpp"
|
||||
#include "aare/Pedestal.hpp"
|
||||
#include "np_helper.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <pybind11/pybind11.h>
|
||||
// #include <pybind11/stl.h>
|
||||
#include <pybind11/stl_bind.h>
|
||||
|
||||
namespace py = pybind11;
|
||||
using pd_type = double;
|
||||
|
||||
using namespace aare;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wunused-parameter"
|
||||
|
||||
namespace aare {
|
||||
|
||||
template <typename T, uint8_t ClusterSizeX, uint8_t ClusterSizeY,
|
||||
typename CoordType = uint16_t>
|
||||
void define_ClusterFinderCUDA(py::module &m, const std::string &typestr) {
|
||||
auto class_name = fmt::format("ClusterFinderCUDA_{}", typestr);
|
||||
|
||||
using ClusterType = Cluster<T, ClusterSizeX, ClusterSizeY, CoordType>;
|
||||
using CF = ClusterFinderCUDA<ClusterType, uint16_t, pd_type>;
|
||||
|
||||
py::class_<CF>(m, class_name.c_str())
|
||||
.def(py::init<Shape<2>, pd_type, size_t, int>(),
|
||||
py::arg("image_size"),
|
||||
py::arg("n_sigma") = 5.0,
|
||||
py::arg("capacity") = 1'000'000,
|
||||
py::arg("n_streams") = 1)
|
||||
|
||||
.def_property(
|
||||
"nSigma",
|
||||
&CF::get_nSigma,
|
||||
&CF::set_nSigma,
|
||||
R"(Number of sigma above the pedestal to consider a photon during cluster finding.)")
|
||||
|
||||
.def("push_pedestal_frame",
|
||||
[](CF &self, py::array_t<uint16_t> frame) {
|
||||
auto view = make_view_2d(frame);
|
||||
self.push_pedestal_frame(view);
|
||||
})
|
||||
|
||||
.def("clear_pedestal", &CF::clear_pedestal)
|
||||
|
||||
.def_property_readonly(
|
||||
"pedestal",
|
||||
[](CF &self) {
|
||||
auto pd = new NDArray<pd_type, 2>{};
|
||||
*pd = self.pedestal();
|
||||
return return_image_data(pd);
|
||||
})
|
||||
|
||||
.def_property_readonly(
|
||||
"noise",
|
||||
[](CF &self) {
|
||||
auto arr = new NDArray<pd_type, 2>{};
|
||||
*arr = self.noise();
|
||||
return return_image_data(arr);
|
||||
})
|
||||
|
||||
.def(
|
||||
"steal_clusters",
|
||||
[](CF &self, bool realloc_same_capacity) {
|
||||
ClusterVector<ClusterType> clusters =
|
||||
self.steal_clusters(realloc_same_capacity);
|
||||
return clusters;
|
||||
},
|
||||
py::arg("realloc_same_capacity") = false)
|
||||
|
||||
.def(
|
||||
"find_clusters",
|
||||
[](CF &self, py::array_t<uint16_t> frame, uint64_t frame_number) {
|
||||
auto view = make_view_2d(frame);
|
||||
self.find_clusters(view, frame_number);
|
||||
},
|
||||
py::arg("frame"), py::arg("frame_number") = 0)
|
||||
|
||||
.def(
|
||||
"find_clusters_batched",
|
||||
[](CF &self, py::array_t<uint16_t> frames, uint64_t first_frame) {
|
||||
// frames is expected as a 3D numpy array (n_frames, nrows, ncols)
|
||||
auto view = make_view_3d(frames);
|
||||
return self.find_clusters_batched(view, first_frame);
|
||||
},
|
||||
py::arg("frames"), py::arg("first_frame") = 0,
|
||||
R"(Process a 3D array of frames (n_frames, nrows, ncols) in parallel
|
||||
across the configured CUDA streams. Returns a list of ClusterVector, one per
|
||||
input frame.)");
|
||||
}
|
||||
|
||||
} // namespace aare
|
||||
|
||||
#pragma GCC diagnostic pop
|
||||
@@ -0,0 +1,67 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
//
|
||||
// CUDA-only Python extension module. Registers ClusterFinderCUDA along with
|
||||
// the ClusterVector and Cluster types it exposes in its return values, so
|
||||
// the module is self-contained — users can call steal_clusters() and get
|
||||
// back a usable ClusterVector without _aare needing to be imported first.
|
||||
|
||||
#include "bind_Cluster.hpp"
|
||||
#include "bind_ClusterVector.hpp"
|
||||
#include "bind_ClusterFinderCUDA.hpp"
|
||||
|
||||
#include <pybind11/pybind11.h>
|
||||
|
||||
namespace py = pybind11;
|
||||
|
||||
// Register the Cluster + ClusterVector pair for one (T, N, M) combination.
|
||||
// Subset of DEFINE_CLUSTER_BINDINGS from module.cpp: we register what
|
||||
// ClusterFinderCUDA actually returns, nothing more. File I/O, eta and
|
||||
// reduce_to_2x2 stay on the CPU side.
|
||||
#define DEFINE_CUDA_CLUSTER_TYPES(T, N, M, U, TYPE_CODE) \
|
||||
define_ClusterVector<T, N, M, U>(m, "Cluster" #N "x" #M #TYPE_CODE); \
|
||||
define_Cluster<T, N, M, U>(m, #N "x" #M #TYPE_CODE);
|
||||
|
||||
#define DEFINE_BINDINGS_CLUSTERFINDER_CUDA(T, N, M, U, TYPE_CODE) \
|
||||
aare::define_ClusterFinderCUDA<T, N, M, U>( \
|
||||
m, "Cluster" #N "x" #M #TYPE_CODE);
|
||||
|
||||
PYBIND11_MODULE(_aare_cuda, m) {
|
||||
|
||||
// Types first — finders reference them in their signatures.
|
||||
// SFINAE excludes 2x2 on ClusterFinderCUDA, so we skip it here too.
|
||||
DEFINE_CUDA_CLUSTER_TYPES(int, 3, 3, uint16_t, i);
|
||||
DEFINE_CUDA_CLUSTER_TYPES(double, 3, 3, uint16_t, d);
|
||||
DEFINE_CUDA_CLUSTER_TYPES(float, 3, 3, uint16_t, f);
|
||||
|
||||
DEFINE_CUDA_CLUSTER_TYPES(int, 5, 5, uint16_t, i);
|
||||
DEFINE_CUDA_CLUSTER_TYPES(double, 5, 5, uint16_t, d);
|
||||
DEFINE_CUDA_CLUSTER_TYPES(float, 5, 5, uint16_t, f);
|
||||
|
||||
DEFINE_CUDA_CLUSTER_TYPES(int, 7, 7, uint16_t, i);
|
||||
DEFINE_CUDA_CLUSTER_TYPES(double, 7, 7, uint16_t, d);
|
||||
DEFINE_CUDA_CLUSTER_TYPES(float, 7, 7, uint16_t, f);
|
||||
|
||||
DEFINE_CUDA_CLUSTER_TYPES(int, 9, 9, uint16_t, i);
|
||||
DEFINE_CUDA_CLUSTER_TYPES(double, 9, 9, uint16_t, d);
|
||||
DEFINE_CUDA_CLUSTER_TYPES(float, 9, 9, uint16_t, f);
|
||||
|
||||
// Finders
|
||||
DEFINE_BINDINGS_CLUSTERFINDER_CUDA(int, 3, 3, uint16_t, i);
|
||||
DEFINE_BINDINGS_CLUSTERFINDER_CUDA(double, 3, 3, uint16_t, d);
|
||||
DEFINE_BINDINGS_CLUSTERFINDER_CUDA(float, 3, 3, uint16_t, f);
|
||||
|
||||
DEFINE_BINDINGS_CLUSTERFINDER_CUDA(int, 5, 5, uint16_t, i);
|
||||
DEFINE_BINDINGS_CLUSTERFINDER_CUDA(double, 5, 5, uint16_t, d);
|
||||
DEFINE_BINDINGS_CLUSTERFINDER_CUDA(float, 5, 5, uint16_t, f);
|
||||
|
||||
DEFINE_BINDINGS_CLUSTERFINDER_CUDA(int, 7, 7, uint16_t, i);
|
||||
DEFINE_BINDINGS_CLUSTERFINDER_CUDA(double, 7, 7, uint16_t, d);
|
||||
DEFINE_BINDINGS_CLUSTERFINDER_CUDA(float, 7, 7, uint16_t, f);
|
||||
|
||||
DEFINE_BINDINGS_CLUSTERFINDER_CUDA(int, 9, 9, uint16_t, i);
|
||||
DEFINE_BINDINGS_CLUSTERFINDER_CUDA(double, 9, 9, uint16_t, d);
|
||||
DEFINE_BINDINGS_CLUSTERFINDER_CUDA(float, 9, 9, uint16_t, f);
|
||||
}
|
||||
|
||||
#undef DEFINE_CUDA_CLUSTER_TYPES
|
||||
#undef DEFINE_BINDINGS_CLUSTERFINDER_CUDA
|
||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user