JFConversionGPU: Remove
This commit is contained in:
@@ -6,14 +6,8 @@ ADD_LIBRARY(JFCalibration STATIC
|
||||
JFModulePedestal.cpp JFModulePedestal.h
|
||||
JFModuleGainCalibration.cpp JFModuleGainCalibration.h
|
||||
JFPedestalCalc.cpp JFPedestalCalc.h
|
||||
ProcessJFPacket.cpp ProcessJFPacket.h
|
||||
JFConversionGPU.h JFConversionGPU.cpp)
|
||||
ProcessJFPacket.cpp ProcessJFPacket.h)
|
||||
|
||||
SET_SOURCE_FILES_PROPERTIES(JFPedestalCalc.cpp JFConversionFloatingPoint.cpp JFConversionFixedPoint.cpp PROPERTIES COMPILE_FLAGS -Ofast)
|
||||
|
||||
TARGET_LINK_LIBRARIES(JFCalibration JFJochProtoBuf Compression)
|
||||
|
||||
IF (CMAKE_CUDA_COMPILER)
|
||||
TARGET_SOURCES(JFCalibration PRIVATE JFConversionGPU.cu )
|
||||
TARGET_LINK_LIBRARIES(JFCalibration ${CUDART_LIBRARY} ${CMAKE_DL_LIBS} rt)
|
||||
ENDIF()
|
||||
|
||||
@@ -1,22 +0,0 @@
|
||||
// Copyright (2019-2023) Paul Scherrer Institute
|
||||
|
||||
#ifndef JFJOCH_USE_CUDA
|
||||
|
||||
#include "JFConversionGPU.h"
|
||||
|
||||
JFConversionGPU::JFConversionGPU() {}
|
||||
|
||||
JFConversionGPU::~JFConversionGPU() {}
|
||||
|
||||
void JFConversionGPU::Setup(const JFModuleGainCalibration &gain_calibration, const JFModulePedestal &pedestal_g0,
|
||||
const JFModulePedestal &pedestal_g1, const JFModulePedestal &pedestal_g2, double energy) {
|
||||
alt_conv.Setup(gain_calibration, pedestal_g0, pedestal_g1, pedestal_g2, energy);
|
||||
}
|
||||
|
||||
void JFConversionGPU::ConvertModule(int16_t *dest, const uint16_t *source) {
|
||||
alt_conv.ConvertModule(dest, source);
|
||||
}
|
||||
|
||||
void JFConversionGPU::Sync() {}
|
||||
|
||||
#endif
|
||||
@@ -1,135 +0,0 @@
|
||||
// Copyright (2019-2023) Paul Scherrer Institute
|
||||
|
||||
#include "JFConversionGPU.h"
|
||||
#include "../common/JFJochException.h"
|
||||
|
||||
inline void cuda_err(cudaError_t val) {
|
||||
if (val != cudaSuccess)
|
||||
throw JFJochException(JFJochExceptionCategory::GPUCUDAError, cudaGetErrorString(val));
|
||||
}
|
||||
|
||||
struct CudaStreamWrapper {
|
||||
cudaStream_t v;
|
||||
};
|
||||
|
||||
inline float one_over_gain_energy(double gain_factor, double energy) {
|
||||
double tmp = gain_factor * energy;
|
||||
if (!std::isfinite(tmp) || (tmp == 0.0))
|
||||
return std::numeric_limits<float>::infinity();
|
||||
else
|
||||
return static_cast<float>(1.0 / (gain_factor * energy));
|
||||
}
|
||||
|
||||
__global__ void gpu_jf_convert(int16_t *output, const uint16_t* input,
|
||||
const uint16_t *pedestal_g0,
|
||||
const uint16_t *pedestal_g1,
|
||||
const uint16_t *pedestal_g2,
|
||||
const float *gain_g0,
|
||||
const float *gain_g1,
|
||||
const float *gain_g2) {
|
||||
uint32_t idx = blockDim.x*blockIdx.x + threadIdx.x;
|
||||
|
||||
uint16_t gainbits = input[idx] & 0xc000;
|
||||
uint16_t adc = input[idx] & 0x3fff;
|
||||
int16_t pedestal_subtracted_adu;
|
||||
|
||||
float expected = PIXEL_OUT_LOST;
|
||||
|
||||
switch (gainbits) {
|
||||
case 0:
|
||||
pedestal_subtracted_adu = adc - pedestal_g0[idx];
|
||||
expected = static_cast<float>(pedestal_subtracted_adu) * gain_g0[idx];
|
||||
break;
|
||||
case 0x4000:
|
||||
pedestal_subtracted_adu = adc - pedestal_g1[idx];
|
||||
expected = static_cast<float>(pedestal_subtracted_adu) * gain_g1[idx];
|
||||
if (adc == 0) [[unlikely]] expected = PIXEL_OUT_G1_SATURATION;
|
||||
break;
|
||||
case 0xc000:
|
||||
pedestal_subtracted_adu = adc - pedestal_g2[idx];
|
||||
expected = static_cast<float>(pedestal_subtracted_adu) * gain_g2[idx];
|
||||
if (adc == 0) [[unlikely]] expected = PIXEL_OUT_SATURATION;
|
||||
else if (adc == 0x3fff) [[unlikely]] expected = PIXEL_OUT_0xFFFF;
|
||||
break;
|
||||
default:
|
||||
expected = PIXEL_OUT_GAINBIT_2;
|
||||
break;
|
||||
}
|
||||
|
||||
output[idx] = std::round(expected);
|
||||
|
||||
if (expected <= INT16_MIN)
|
||||
output[idx] = PIXEL_OUT_LOST;
|
||||
else if (expected >= INT16_MAX)
|
||||
output[idx] = INT16_MAX;
|
||||
}
|
||||
|
||||
JFConversionGPU::JFConversionGPU() {
|
||||
cudastream = new(CudaStreamWrapper);
|
||||
|
||||
cuda_err(cudaStreamCreate(&cudastream->v));
|
||||
cuda_err(cudaMalloc(&gpu_pedestal_g0, RAW_MODULE_SIZE * sizeof(uint16_t)));
|
||||
cuda_err(cudaMalloc(&gpu_pedestal_g1, RAW_MODULE_SIZE * sizeof(uint16_t)));
|
||||
cuda_err(cudaMalloc(&gpu_pedestal_g2, RAW_MODULE_SIZE * sizeof(uint16_t)));
|
||||
|
||||
cuda_err(cudaMalloc(&gpu_gain_g0, RAW_MODULE_SIZE * sizeof(float)));
|
||||
cuda_err(cudaMalloc(&gpu_gain_g1, RAW_MODULE_SIZE * sizeof(float)));
|
||||
cuda_err(cudaMalloc(&gpu_gain_g2, RAW_MODULE_SIZE * sizeof(float)));
|
||||
|
||||
cuda_err(cudaMallocHost(&host_gain_g0, RAW_MODULE_SIZE * sizeof(float)));
|
||||
cuda_err(cudaMallocHost(&host_gain_g1, RAW_MODULE_SIZE * sizeof(float)));
|
||||
cuda_err(cudaMallocHost(&host_gain_g2, RAW_MODULE_SIZE * sizeof(float)));
|
||||
|
||||
cuda_err(cudaMalloc(&gpu_input, RAW_MODULE_SIZE * sizeof(uint16_t)));
|
||||
cuda_err(cudaMalloc(&gpu_output, RAW_MODULE_SIZE * sizeof(int16_t)));
|
||||
}
|
||||
|
||||
JFConversionGPU::~JFConversionGPU() {
|
||||
cudaStreamSynchronize(cudastream->v);
|
||||
cudaStreamDestroy(cudastream->v);
|
||||
delete cudastream;
|
||||
|
||||
cudaFree(gpu_pedestal_g0);
|
||||
cudaFree(gpu_pedestal_g1);
|
||||
cudaFree(gpu_pedestal_g2);
|
||||
|
||||
cudaFree(gpu_gain_g0);
|
||||
cudaFree(gpu_gain_g1);
|
||||
cudaFree(gpu_gain_g2);
|
||||
|
||||
cudaFreeHost(host_gain_g0);
|
||||
cudaFreeHost(host_gain_g1);
|
||||
cudaFreeHost(host_gain_g2);
|
||||
|
||||
cudaFree(gpu_input);
|
||||
cudaFree(gpu_output);
|
||||
}
|
||||
|
||||
void JFConversionGPU::Setup(const JFModuleGainCalibration &gain_calibration, const JFModulePedestal &pedestal_g0,
|
||||
const JFModulePedestal &pedestal_g1, const JFModulePedestal &pedestal_g2, double energy) {
|
||||
auto &gain_arr = gain_calibration.GetGainCalibration();
|
||||
|
||||
for (int i = 0; i < RAW_MODULE_SIZE; i++) {
|
||||
host_gain_g0[i] = one_over_gain_energy(gain_arr[i], energy);
|
||||
host_gain_g1[i] = one_over_gain_energy(gain_arr[i + RAW_MODULE_SIZE], energy);
|
||||
host_gain_g2[i] = one_over_gain_energy(gain_arr[i + 2 * RAW_MODULE_SIZE], energy);
|
||||
}
|
||||
cudaMemcpy(gpu_pedestal_g0, pedestal_g0.GetPedestal(), RAW_MODULE_SIZE * sizeof(uint16_t), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(gpu_pedestal_g1, pedestal_g1.GetPedestal(), RAW_MODULE_SIZE * sizeof(uint16_t), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(gpu_pedestal_g2, pedestal_g2.GetPedestal(), RAW_MODULE_SIZE * sizeof(uint16_t), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(gpu_gain_g0, host_gain_g0, RAW_MODULE_SIZE * sizeof(float), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(gpu_gain_g1, host_gain_g1, RAW_MODULE_SIZE * sizeof(float), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(gpu_gain_g2, host_gain_g2, RAW_MODULE_SIZE * sizeof(float), cudaMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
void JFConversionGPU::ConvertModule(int16_t *dest, const uint16_t *source) {
|
||||
cudaMemcpy(gpu_input, source, RAW_MODULE_SIZE * sizeof(uint16_t), cudaMemcpyHostToDevice);
|
||||
gpu_jf_convert<<<RAW_MODULE_SIZE/128, 128>>>(gpu_output, gpu_input,
|
||||
gpu_pedestal_g0, gpu_pedestal_g1, gpu_pedestal_g2,
|
||||
gpu_gain_g0, gpu_gain_g1, gpu_gain_g2);
|
||||
cudaMemcpy(dest, gpu_output, RAW_MODULE_SIZE * sizeof(uint16_t), cudaMemcpyDeviceToHost);
|
||||
}
|
||||
|
||||
void JFConversionGPU::Sync() {
|
||||
cudaStreamSynchronize(cudastream->v);
|
||||
}
|
||||
@@ -1,48 +0,0 @@
|
||||
// Copyright (2019-2023) Paul Scherrer Institute
|
||||
|
||||
#ifndef JUNGFRAUJOCH_JFCONVERSIONGPU_CUH
|
||||
#define JUNGFRAUJOCH_JFCONVERSIONGPU_CUH
|
||||
|
||||
#include "JFConversion.h"
|
||||
#include "JFConversionFixedPoint.h"
|
||||
|
||||
struct CudaStreamWrapper;
|
||||
|
||||
class JFConversionGPU : public JFConversion {
|
||||
#ifdef JFJOCH_USE_CUDA
|
||||
CudaStreamWrapper *cudastream = nullptr;
|
||||
|
||||
uint16_t *gpu_pedestal_g0 = nullptr;
|
||||
uint16_t *gpu_pedestal_g1 = nullptr;
|
||||
uint16_t *gpu_pedestal_g2 = nullptr;
|
||||
float *gpu_gain_g0 = nullptr;
|
||||
float *gpu_gain_g1 = nullptr;
|
||||
float *gpu_gain_g2 = nullptr;
|
||||
|
||||
float *host_gain_g0 = nullptr;
|
||||
float *host_gain_g1 = nullptr;
|
||||
float *host_gain_g2 = nullptr;
|
||||
|
||||
|
||||
uint16_t *gpu_input = nullptr;
|
||||
int16_t *gpu_output = nullptr;
|
||||
#else
|
||||
JFConversionFixedPoint alt_conv;
|
||||
#endif
|
||||
public:
|
||||
JFConversionGPU();
|
||||
JFConversionGPU(JFConversionGPU& other) = delete;
|
||||
JFConversionGPU& operator=(JFConversionGPU& other) = delete;
|
||||
~JFConversionGPU();
|
||||
void Setup(const JFModuleGainCalibration &gain_calibration,
|
||||
const JFModulePedestal &pedestal_g0,
|
||||
const JFModulePedestal &pedestal_g1,
|
||||
const JFModulePedestal &pedestal_g2,
|
||||
double energy) override;
|
||||
|
||||
void ConvertModule(int16_t *dest, const uint16_t *source) override;
|
||||
void Sync() override;
|
||||
};
|
||||
|
||||
|
||||
#endif //JUNGFRAUJOCH_JFCONVERSIONGPU_CUH
|
||||
@@ -4,7 +4,6 @@
|
||||
|
||||
#include "../tests/FPGAUnitTest.h"
|
||||
#include "../jungfrau/JFConversionFloatingPoint.h"
|
||||
#include "../jungfrau/JFConversionGPU.h"
|
||||
#include "../jungfrau/JFConversionFixedPoint.h"
|
||||
|
||||
void SetupPedestal( JFModulePedestal &pedestal_g0, JFModulePedestal &pedestal_g1, JFModulePedestal &pedestal_g2) {
|
||||
@@ -88,42 +87,6 @@ TEST_CASE("JFConversionFixedPoint_G0","[JFConversion]") {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("JFConversionGPU_G0","[JFConversion]") {
|
||||
JFConversionGPU conv;
|
||||
JFConversionFloatingPoint conv_fp;
|
||||
|
||||
JFModulePedestal pedestal_g0;
|
||||
JFModulePedestal pedestal_g1;
|
||||
JFModulePedestal pedestal_g2;
|
||||
|
||||
SetupPedestal(pedestal_g0, pedestal_g1, pedestal_g2);
|
||||
|
||||
JFModuleGainCalibration gain;
|
||||
|
||||
std::vector<double> energy{4.0, 6.0, 12.4, 25.0};
|
||||
for (auto &e: energy) {
|
||||
conv.Setup(gain, pedestal_g0, pedestal_g1, pedestal_g2, e);
|
||||
conv_fp.Setup(gain, pedestal_g0, pedestal_g1, pedestal_g2, e);
|
||||
|
||||
std::vector<uint16_t> input(RAW_MODULE_SIZE);
|
||||
std::vector<float> output_fp(RAW_MODULE_SIZE);
|
||||
std::vector<int16_t> output_16bit(RAW_MODULE_SIZE);
|
||||
|
||||
for (int i = 0; i < RAW_MODULE_SIZE; i++)
|
||||
input[i] = i % 16384;
|
||||
|
||||
conv.ConvertModule(output_16bit.data(), input.data());
|
||||
conv_fp.ConvertFP(output_fp.data(), input.data());
|
||||
conv.Sync();
|
||||
|
||||
auto err = Compare(output_16bit.data(), output_fp, RAW_MODULE_SIZE);
|
||||
auto max_err = MaxErrorOnConversion(output_16bit.data(), output_fp, RAW_MODULE_SIZE);
|
||||
std::cout << "Error on conversion " << err << " max error " << max_err << std::endl;
|
||||
REQUIRE(err < 0.5);
|
||||
REQUIRE(max_err <= 1.0);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("JFConversionFixedPoint_G0_TestFile","[JFConversion]") {
|
||||
JFConversionFixedPoint conv;
|
||||
JFConversionFloatingPoint conv_fp;
|
||||
@@ -227,43 +190,6 @@ TEST_CASE("JFConversionFixedPoint_G1","[JFConversion]") {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("JFConversionGPU_G1","[JFConversion]") {
|
||||
JFConversionGPU conv;
|
||||
JFConversionFloatingPoint conv_fp;
|
||||
|
||||
JFModulePedestal pedestal_g0;
|
||||
JFModulePedestal pedestal_g1;
|
||||
JFModulePedestal pedestal_g2;
|
||||
|
||||
SetupPedestal(pedestal_g0, pedestal_g1, pedestal_g2);
|
||||
|
||||
JFModuleGainCalibration gain;
|
||||
|
||||
std::vector<double> energy{4.0, 6.0, 12.4, 25.0};
|
||||
for (auto &e: energy) {
|
||||
conv.Setup(gain, pedestal_g0, pedestal_g1, pedestal_g2, e);
|
||||
conv_fp.Setup(gain, pedestal_g0, pedestal_g1, pedestal_g2, e);
|
||||
|
||||
std::vector<uint16_t> input(RAW_MODULE_SIZE);
|
||||
std::vector<float> output_fp(RAW_MODULE_SIZE);
|
||||
std::vector<int16_t> output_16bit(RAW_MODULE_SIZE);
|
||||
|
||||
for (int i = 0; i < RAW_MODULE_SIZE; i++)
|
||||
input[i] = (i % 16384) | 0x4000;
|
||||
|
||||
conv.ConvertModule(output_16bit.data(), input.data());
|
||||
conv_fp.ConvertFP(output_fp.data(), input.data());
|
||||
conv.Sync();
|
||||
|
||||
auto err = Compare(output_16bit.data(), output_fp, RAW_MODULE_SIZE);
|
||||
auto max_err = MaxErrorOnConversion(output_16bit.data(), output_fp, RAW_MODULE_SIZE);
|
||||
|
||||
std::cout << "Error on conversion " << err << " max error " << max_err << std::endl;
|
||||
REQUIRE(err < 0.5);
|
||||
REQUIRE(max_err <= 1.0);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("JFConversionFloatingPoint_G2","[JFConversion]") {
|
||||
JFConversionFloatingPoint conv;
|
||||
|
||||
@@ -331,43 +257,6 @@ TEST_CASE("JFConversionFixedPoint_G2","[JFConversion]") {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("JFConversionGPU_G2","[JFConversion]") {
|
||||
JFConversionGPU conv;
|
||||
JFConversionFloatingPoint conv_fp;
|
||||
|
||||
JFModulePedestal pedestal_g0;
|
||||
JFModulePedestal pedestal_g1;
|
||||
JFModulePedestal pedestal_g2;
|
||||
|
||||
SetupPedestal(pedestal_g0, pedestal_g1, pedestal_g2);
|
||||
|
||||
JFModuleGainCalibration gain;
|
||||
|
||||
std::vector<double> energy{4.0, 6.0, 12.4, 25.0};
|
||||
for (auto &e: energy) {
|
||||
conv.Setup(gain, pedestal_g0, pedestal_g1, pedestal_g2, e);
|
||||
conv_fp.Setup(gain, pedestal_g0, pedestal_g1, pedestal_g2, e);
|
||||
|
||||
std::vector<uint16_t> input(RAW_MODULE_SIZE);
|
||||
std::vector<float> output_fp(RAW_MODULE_SIZE);
|
||||
std::vector<int16_t> output_16bit(RAW_MODULE_SIZE);
|
||||
|
||||
for (int i = 0; i < RAW_MODULE_SIZE; i++)
|
||||
input[i] = (i % 16384) | 0xC000;
|
||||
|
||||
conv.ConvertModule(output_16bit.data(), input.data());
|
||||
conv_fp.ConvertFP(output_fp.data(), input.data());
|
||||
conv.Sync();
|
||||
|
||||
auto err = Compare(output_16bit.data(), output_fp, RAW_MODULE_SIZE);
|
||||
auto max_err = MaxErrorOnConversion(output_16bit.data(), output_fp, RAW_MODULE_SIZE);
|
||||
|
||||
std::cout << "Error on conversion " << err << " max error " << max_err << std::endl;
|
||||
REQUIRE(err < 0.5);
|
||||
REQUIRE(max_err <= 1.0);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("JFConversionFixedPoint_G1_TestFile","[JFConversion]") {
|
||||
JFConversionFixedPoint conv;
|
||||
JFConversionFloatingPoint conv_fp;
|
||||
|
||||
@@ -12,7 +12,6 @@
|
||||
#include "../jungfrau/JFCalibration.h"
|
||||
#include "../jungfrau/JFConversionFloatingPoint.h"
|
||||
#include "../jungfrau/JFConversionFixedPoint.h"
|
||||
#include "../jungfrau/JFConversionGPU.h"
|
||||
#include "../tests/FPGAUnitTest.h"
|
||||
#include "../jungfrau/jf_packet.h"
|
||||
#include "../jungfrau/ProcessJFPacket.h"
|
||||
@@ -147,9 +146,6 @@ int main () {
|
||||
logger.Info("Fixed point conversion");
|
||||
test_conversion<JFConversionFixedPoint>(logger);
|
||||
|
||||
logger.Info("Floating point conversion (GPU)");
|
||||
test_conversion<JFConversionGPU>(logger);
|
||||
|
||||
logger.Info("Packet processing");
|
||||
test_packet_processing(logger);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user