diff --git a/jungfrau/CMakeLists.txt b/jungfrau/CMakeLists.txt index bc556467..9e39eb35 100644 --- a/jungfrau/CMakeLists.txt +++ b/jungfrau/CMakeLists.txt @@ -6,14 +6,8 @@ ADD_LIBRARY(JFCalibration STATIC JFModulePedestal.cpp JFModulePedestal.h JFModuleGainCalibration.cpp JFModuleGainCalibration.h JFPedestalCalc.cpp JFPedestalCalc.h - ProcessJFPacket.cpp ProcessJFPacket.h - JFConversionGPU.h JFConversionGPU.cpp) + ProcessJFPacket.cpp ProcessJFPacket.h) SET_SOURCE_FILES_PROPERTIES(JFPedestalCalc.cpp JFConversionFloatingPoint.cpp JFConversionFixedPoint.cpp PROPERTIES COMPILE_FLAGS -Ofast) TARGET_LINK_LIBRARIES(JFCalibration JFJochProtoBuf Compression) - -IF (CMAKE_CUDA_COMPILER) - TARGET_SOURCES(JFCalibration PRIVATE JFConversionGPU.cu ) - TARGET_LINK_LIBRARIES(JFCalibration ${CUDART_LIBRARY} ${CMAKE_DL_LIBS} rt) -ENDIF() diff --git a/jungfrau/JFConversionGPU.cpp b/jungfrau/JFConversionGPU.cpp deleted file mode 100644 index c7fecc5e..00000000 --- a/jungfrau/JFConversionGPU.cpp +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright (2019-2023) Paul Scherrer Institute - -#ifndef JFJOCH_USE_CUDA - -#include "JFConversionGPU.h" - -JFConversionGPU::JFConversionGPU() {} - -JFConversionGPU::~JFConversionGPU() {} - -void JFConversionGPU::Setup(const JFModuleGainCalibration &gain_calibration, const JFModulePedestal &pedestal_g0, - const JFModulePedestal &pedestal_g1, const JFModulePedestal &pedestal_g2, double energy) { - alt_conv.Setup(gain_calibration, pedestal_g0, pedestal_g1, pedestal_g2, energy); -} - -void JFConversionGPU::ConvertModule(int16_t *dest, const uint16_t *source) { - alt_conv.ConvertModule(dest, source); -} - -void JFConversionGPU::Sync() {} - -#endif diff --git a/jungfrau/JFConversionGPU.cu b/jungfrau/JFConversionGPU.cu deleted file mode 100644 index b079c8c9..00000000 --- a/jungfrau/JFConversionGPU.cu +++ /dev/null @@ -1,135 +0,0 @@ -// Copyright (2019-2023) Paul Scherrer Institute - -#include "JFConversionGPU.h" -#include "../common/JFJochException.h" - -inline void cuda_err(cudaError_t val) { - if (val != cudaSuccess) - throw JFJochException(JFJochExceptionCategory::GPUCUDAError, cudaGetErrorString(val)); -} - -struct CudaStreamWrapper { - cudaStream_t v; -}; - -inline float one_over_gain_energy(double gain_factor, double energy) { - double tmp = gain_factor * energy; - if (!std::isfinite(tmp) || (tmp == 0.0)) - return std::numeric_limits::infinity(); - else - return static_cast(1.0 / (gain_factor * energy)); -} - -__global__ void gpu_jf_convert(int16_t *output, const uint16_t* input, - const uint16_t *pedestal_g0, - const uint16_t *pedestal_g1, - const uint16_t *pedestal_g2, - const float *gain_g0, - const float *gain_g1, - const float *gain_g2) { - uint32_t idx = blockDim.x*blockIdx.x + threadIdx.x; - - uint16_t gainbits = input[idx] & 0xc000; - uint16_t adc = input[idx] & 0x3fff; - int16_t pedestal_subtracted_adu; - - float expected = PIXEL_OUT_LOST; - - switch (gainbits) { - case 0: - pedestal_subtracted_adu = adc - pedestal_g0[idx]; - expected = static_cast(pedestal_subtracted_adu) * gain_g0[idx]; - break; - case 0x4000: - pedestal_subtracted_adu = adc - pedestal_g1[idx]; - expected = static_cast(pedestal_subtracted_adu) * gain_g1[idx]; - if (adc == 0) [[unlikely]] expected = PIXEL_OUT_G1_SATURATION; - break; - case 0xc000: - pedestal_subtracted_adu = adc - pedestal_g2[idx]; - expected = static_cast(pedestal_subtracted_adu) * gain_g2[idx]; - if (adc == 0) [[unlikely]] expected = PIXEL_OUT_SATURATION; - else if (adc == 0x3fff) [[unlikely]] expected = PIXEL_OUT_0xFFFF; - break; - default: - expected = PIXEL_OUT_GAINBIT_2; - break; - } - - output[idx] = std::round(expected); - - if (expected <= INT16_MIN) - output[idx] = PIXEL_OUT_LOST; - else if (expected >= INT16_MAX) - output[idx] = INT16_MAX; -} - -JFConversionGPU::JFConversionGPU() { - cudastream = new(CudaStreamWrapper); - - cuda_err(cudaStreamCreate(&cudastream->v)); - cuda_err(cudaMalloc(&gpu_pedestal_g0, RAW_MODULE_SIZE * sizeof(uint16_t))); - cuda_err(cudaMalloc(&gpu_pedestal_g1, RAW_MODULE_SIZE * sizeof(uint16_t))); - cuda_err(cudaMalloc(&gpu_pedestal_g2, RAW_MODULE_SIZE * sizeof(uint16_t))); - - cuda_err(cudaMalloc(&gpu_gain_g0, RAW_MODULE_SIZE * sizeof(float))); - cuda_err(cudaMalloc(&gpu_gain_g1, RAW_MODULE_SIZE * sizeof(float))); - cuda_err(cudaMalloc(&gpu_gain_g2, RAW_MODULE_SIZE * sizeof(float))); - - cuda_err(cudaMallocHost(&host_gain_g0, RAW_MODULE_SIZE * sizeof(float))); - cuda_err(cudaMallocHost(&host_gain_g1, RAW_MODULE_SIZE * sizeof(float))); - cuda_err(cudaMallocHost(&host_gain_g2, RAW_MODULE_SIZE * sizeof(float))); - - cuda_err(cudaMalloc(&gpu_input, RAW_MODULE_SIZE * sizeof(uint16_t))); - cuda_err(cudaMalloc(&gpu_output, RAW_MODULE_SIZE * sizeof(int16_t))); -} - -JFConversionGPU::~JFConversionGPU() { - cudaStreamSynchronize(cudastream->v); - cudaStreamDestroy(cudastream->v); - delete cudastream; - - cudaFree(gpu_pedestal_g0); - cudaFree(gpu_pedestal_g1); - cudaFree(gpu_pedestal_g2); - - cudaFree(gpu_gain_g0); - cudaFree(gpu_gain_g1); - cudaFree(gpu_gain_g2); - - cudaFreeHost(host_gain_g0); - cudaFreeHost(host_gain_g1); - cudaFreeHost(host_gain_g2); - - cudaFree(gpu_input); - cudaFree(gpu_output); -} - -void JFConversionGPU::Setup(const JFModuleGainCalibration &gain_calibration, const JFModulePedestal &pedestal_g0, - const JFModulePedestal &pedestal_g1, const JFModulePedestal &pedestal_g2, double energy) { - auto &gain_arr = gain_calibration.GetGainCalibration(); - - for (int i = 0; i < RAW_MODULE_SIZE; i++) { - host_gain_g0[i] = one_over_gain_energy(gain_arr[i], energy); - host_gain_g1[i] = one_over_gain_energy(gain_arr[i + RAW_MODULE_SIZE], energy); - host_gain_g2[i] = one_over_gain_energy(gain_arr[i + 2 * RAW_MODULE_SIZE], energy); - } - cudaMemcpy(gpu_pedestal_g0, pedestal_g0.GetPedestal(), RAW_MODULE_SIZE * sizeof(uint16_t), cudaMemcpyHostToDevice); - cudaMemcpy(gpu_pedestal_g1, pedestal_g1.GetPedestal(), RAW_MODULE_SIZE * sizeof(uint16_t), cudaMemcpyHostToDevice); - cudaMemcpy(gpu_pedestal_g2, pedestal_g2.GetPedestal(), RAW_MODULE_SIZE * sizeof(uint16_t), cudaMemcpyHostToDevice); - cudaMemcpy(gpu_gain_g0, host_gain_g0, RAW_MODULE_SIZE * sizeof(float), cudaMemcpyHostToDevice); - cudaMemcpy(gpu_gain_g1, host_gain_g1, RAW_MODULE_SIZE * sizeof(float), cudaMemcpyHostToDevice); - cudaMemcpy(gpu_gain_g2, host_gain_g2, RAW_MODULE_SIZE * sizeof(float), cudaMemcpyHostToDevice); -} - -void JFConversionGPU::ConvertModule(int16_t *dest, const uint16_t *source) { - cudaMemcpy(gpu_input, source, RAW_MODULE_SIZE * sizeof(uint16_t), cudaMemcpyHostToDevice); - gpu_jf_convert<<>>(gpu_output, gpu_input, - gpu_pedestal_g0, gpu_pedestal_g1, gpu_pedestal_g2, - gpu_gain_g0, gpu_gain_g1, gpu_gain_g2); - cudaMemcpy(dest, gpu_output, RAW_MODULE_SIZE * sizeof(uint16_t), cudaMemcpyDeviceToHost); -} - -void JFConversionGPU::Sync() { - cudaStreamSynchronize(cudastream->v); -} \ No newline at end of file diff --git a/jungfrau/JFConversionGPU.h b/jungfrau/JFConversionGPU.h deleted file mode 100644 index 23fa8187..00000000 --- a/jungfrau/JFConversionGPU.h +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright (2019-2023) Paul Scherrer Institute - -#ifndef JUNGFRAUJOCH_JFCONVERSIONGPU_CUH -#define JUNGFRAUJOCH_JFCONVERSIONGPU_CUH - -#include "JFConversion.h" -#include "JFConversionFixedPoint.h" - -struct CudaStreamWrapper; - -class JFConversionGPU : public JFConversion { -#ifdef JFJOCH_USE_CUDA - CudaStreamWrapper *cudastream = nullptr; - - uint16_t *gpu_pedestal_g0 = nullptr; - uint16_t *gpu_pedestal_g1 = nullptr; - uint16_t *gpu_pedestal_g2 = nullptr; - float *gpu_gain_g0 = nullptr; - float *gpu_gain_g1 = nullptr; - float *gpu_gain_g2 = nullptr; - - float *host_gain_g0 = nullptr; - float *host_gain_g1 = nullptr; - float *host_gain_g2 = nullptr; - - - uint16_t *gpu_input = nullptr; - int16_t *gpu_output = nullptr; -#else - JFConversionFixedPoint alt_conv; -#endif -public: - JFConversionGPU(); - JFConversionGPU(JFConversionGPU& other) = delete; - JFConversionGPU& operator=(JFConversionGPU& other) = delete; - ~JFConversionGPU(); - void Setup(const JFModuleGainCalibration &gain_calibration, - const JFModulePedestal &pedestal_g0, - const JFModulePedestal &pedestal_g1, - const JFModulePedestal &pedestal_g2, - double energy) override; - - void ConvertModule(int16_t *dest, const uint16_t *source) override; - void Sync() override; -}; - - -#endif //JUNGFRAUJOCH_JFCONVERSIONGPU_CUH diff --git a/tests/JFConversionTest.cpp b/tests/JFConversionTest.cpp index 4beaf223..34a9134d 100644 --- a/tests/JFConversionTest.cpp +++ b/tests/JFConversionTest.cpp @@ -4,7 +4,6 @@ #include "../tests/FPGAUnitTest.h" #include "../jungfrau/JFConversionFloatingPoint.h" -#include "../jungfrau/JFConversionGPU.h" #include "../jungfrau/JFConversionFixedPoint.h" void SetupPedestal( JFModulePedestal &pedestal_g0, JFModulePedestal &pedestal_g1, JFModulePedestal &pedestal_g2) { @@ -88,42 +87,6 @@ TEST_CASE("JFConversionFixedPoint_G0","[JFConversion]") { } } -TEST_CASE("JFConversionGPU_G0","[JFConversion]") { - JFConversionGPU conv; - JFConversionFloatingPoint conv_fp; - - JFModulePedestal pedestal_g0; - JFModulePedestal pedestal_g1; - JFModulePedestal pedestal_g2; - - SetupPedestal(pedestal_g0, pedestal_g1, pedestal_g2); - - JFModuleGainCalibration gain; - - std::vector energy{4.0, 6.0, 12.4, 25.0}; - for (auto &e: energy) { - conv.Setup(gain, pedestal_g0, pedestal_g1, pedestal_g2, e); - conv_fp.Setup(gain, pedestal_g0, pedestal_g1, pedestal_g2, e); - - std::vector input(RAW_MODULE_SIZE); - std::vector output_fp(RAW_MODULE_SIZE); - std::vector output_16bit(RAW_MODULE_SIZE); - - for (int i = 0; i < RAW_MODULE_SIZE; i++) - input[i] = i % 16384; - - conv.ConvertModule(output_16bit.data(), input.data()); - conv_fp.ConvertFP(output_fp.data(), input.data()); - conv.Sync(); - - auto err = Compare(output_16bit.data(), output_fp, RAW_MODULE_SIZE); - auto max_err = MaxErrorOnConversion(output_16bit.data(), output_fp, RAW_MODULE_SIZE); - std::cout << "Error on conversion " << err << " max error " << max_err << std::endl; - REQUIRE(err < 0.5); - REQUIRE(max_err <= 1.0); - } -} - TEST_CASE("JFConversionFixedPoint_G0_TestFile","[JFConversion]") { JFConversionFixedPoint conv; JFConversionFloatingPoint conv_fp; @@ -227,43 +190,6 @@ TEST_CASE("JFConversionFixedPoint_G1","[JFConversion]") { } } -TEST_CASE("JFConversionGPU_G1","[JFConversion]") { - JFConversionGPU conv; - JFConversionFloatingPoint conv_fp; - - JFModulePedestal pedestal_g0; - JFModulePedestal pedestal_g1; - JFModulePedestal pedestal_g2; - - SetupPedestal(pedestal_g0, pedestal_g1, pedestal_g2); - - JFModuleGainCalibration gain; - - std::vector energy{4.0, 6.0, 12.4, 25.0}; - for (auto &e: energy) { - conv.Setup(gain, pedestal_g0, pedestal_g1, pedestal_g2, e); - conv_fp.Setup(gain, pedestal_g0, pedestal_g1, pedestal_g2, e); - - std::vector input(RAW_MODULE_SIZE); - std::vector output_fp(RAW_MODULE_SIZE); - std::vector output_16bit(RAW_MODULE_SIZE); - - for (int i = 0; i < RAW_MODULE_SIZE; i++) - input[i] = (i % 16384) | 0x4000; - - conv.ConvertModule(output_16bit.data(), input.data()); - conv_fp.ConvertFP(output_fp.data(), input.data()); - conv.Sync(); - - auto err = Compare(output_16bit.data(), output_fp, RAW_MODULE_SIZE); - auto max_err = MaxErrorOnConversion(output_16bit.data(), output_fp, RAW_MODULE_SIZE); - - std::cout << "Error on conversion " << err << " max error " << max_err << std::endl; - REQUIRE(err < 0.5); - REQUIRE(max_err <= 1.0); - } -} - TEST_CASE("JFConversionFloatingPoint_G2","[JFConversion]") { JFConversionFloatingPoint conv; @@ -331,43 +257,6 @@ TEST_CASE("JFConversionFixedPoint_G2","[JFConversion]") { } } -TEST_CASE("JFConversionGPU_G2","[JFConversion]") { - JFConversionGPU conv; - JFConversionFloatingPoint conv_fp; - - JFModulePedestal pedestal_g0; - JFModulePedestal pedestal_g1; - JFModulePedestal pedestal_g2; - - SetupPedestal(pedestal_g0, pedestal_g1, pedestal_g2); - - JFModuleGainCalibration gain; - - std::vector energy{4.0, 6.0, 12.4, 25.0}; - for (auto &e: energy) { - conv.Setup(gain, pedestal_g0, pedestal_g1, pedestal_g2, e); - conv_fp.Setup(gain, pedestal_g0, pedestal_g1, pedestal_g2, e); - - std::vector input(RAW_MODULE_SIZE); - std::vector output_fp(RAW_MODULE_SIZE); - std::vector output_16bit(RAW_MODULE_SIZE); - - for (int i = 0; i < RAW_MODULE_SIZE; i++) - input[i] = (i % 16384) | 0xC000; - - conv.ConvertModule(output_16bit.data(), input.data()); - conv_fp.ConvertFP(output_fp.data(), input.data()); - conv.Sync(); - - auto err = Compare(output_16bit.data(), output_fp, RAW_MODULE_SIZE); - auto max_err = MaxErrorOnConversion(output_16bit.data(), output_fp, RAW_MODULE_SIZE); - - std::cout << "Error on conversion " << err << " max error " << max_err << std::endl; - REQUIRE(err < 0.5); - REQUIRE(max_err <= 1.0); - } -} - TEST_CASE("JFConversionFixedPoint_G1_TestFile","[JFConversion]") { JFConversionFixedPoint conv; JFConversionFloatingPoint conv_fp; diff --git a/tools/JFCalibrationPerfTest.cpp b/tools/JFCalibrationPerfTest.cpp index 6405a9b3..e6919e34 100644 --- a/tools/JFCalibrationPerfTest.cpp +++ b/tools/JFCalibrationPerfTest.cpp @@ -12,7 +12,6 @@ #include "../jungfrau/JFCalibration.h" #include "../jungfrau/JFConversionFloatingPoint.h" #include "../jungfrau/JFConversionFixedPoint.h" -#include "../jungfrau/JFConversionGPU.h" #include "../tests/FPGAUnitTest.h" #include "../jungfrau/jf_packet.h" #include "../jungfrau/ProcessJFPacket.h" @@ -147,9 +146,6 @@ int main () { logger.Info("Fixed point conversion"); test_conversion(logger); - logger.Info("Floating point conversion (GPU)"); - test_conversion(logger); - logger.Info("Packet processing"); test_packet_processing(logger); }