mirror of
https://github.com/slsdetectorgroup/aare.git
synced 2026-06-06 12:48:40 +02:00
69151de3c7
- Non-photon pixels now update pedestal (push_fast equivalent) directly in the kernel, no atomics needed - Commented out quadrant significance test (c2): absent from sequential CPU code, was producing GPU-only clusters. - Added d_pd_sum to device allocations and host upload Build (sm_89): 46 registers, 0 spills, 100% occupancy. Verified on 256x256 Jungfrau data, 5000 frames, nSigma=5.0: CPU 8428 vs GPU 8471 clusters, 99.8% match 0.63 ms/frame CPU vs 0.04 ms/frame GPU (~16x)
21 lines
584 B
Makefile
21 lines
584 B
Makefile
CXX := /usr/bin/c++
|
|
NVCC := nvcc
|
|
ARCH := -arch=sm_89
|
|
CXXFLAGS := -std=c++17 -O3 --extended-lambda -ccbin $(CXX)
|
|
INCLUDES := -I../include -I../build/_deps/fmt-src/include
|
|
LDFLAGS := -L../build -L../build/_deps/fmt-build
|
|
LIBS := -laare_core -lfmt -lstdc++fs
|
|
DEFINES := -DAARE_LOG_LEVEL=logERROR
|
|
|
|
TARGET := test_cf_cuda
|
|
SRC := ClusterFinderCUDA.test.cu
|
|
|
|
all: $(TARGET)
|
|
|
|
$(TARGET): $(SRC) ../include/aare/ClusterFinderCUDA.cuh
|
|
$(NVCC) -Xptxas=-v $(ARCH) $(CXXFLAGS) $(DEFINES) $(INCLUDES) $(LDFLAGS) $< -o $@ $(LIBS)
|
|
|
|
clean:
|
|
rm -f $(TARGET)
|
|
|
|
.PHONY: all clean |