From 032543e69a2594c319e8a08acbd10b1dcc61a5f8 Mon Sep 17 00:00:00 2001 From: Filip Leonarski Date: Mon, 19 Jun 2023 09:26:10 +0200 Subject: [PATCH] GPUImageAnalysis: Add option to include corrections in radial integration + keep rad.int. sum and count as floats --- image_analysis/GPUImageAnalysis.cpp | 4 +- image_analysis/GPUImageAnalysis.cu | 47 ++++++++++++--------- image_analysis/GPUImageAnalysis.h | 14 +++--- image_analysis/RadialIntegrationProfile.cpp | 2 +- image_analysis/RadialIntegrationProfile.h | 6 +-- tests/RadialIntegrationTest.cpp | 6 +-- 6 files changed, 45 insertions(+), 34 deletions(-) diff --git a/image_analysis/GPUImageAnalysis.cpp b/image_analysis/GPUImageAnalysis.cpp index b2b12019..1052469e 100644 --- a/image_analysis/GPUImageAnalysis.cpp +++ b/image_analysis/GPUImageAnalysis.cpp @@ -50,11 +50,11 @@ void GPUImageAnalysis::RunRadialIntegration() {} void GPUImageAnalysis::GetRadialIntegrationProfile(std::vector &result) {} -std::vector GPUImageAnalysis::GetRadialIntegrationSum() const { +std::vector GPUImageAnalysis::GetRadialIntegrationSum() const { return {}; } -std::vector GPUImageAnalysis::GetRadialIntegrationCount() const { +std::vector GPUImageAnalysis::GetRadialIntegrationCount() const { return {}; } diff --git a/image_analysis/GPUImageAnalysis.cu b/image_analysis/GPUImageAnalysis.cu index f42e9a01..19ff8aa9 100644 --- a/image_analysis/GPUImageAnalysis.cu +++ b/image_analysis/GPUImageAnalysis.cu @@ -177,25 +177,25 @@ __global__ void analyze_pixel(const int16_t *in, uint8_t *out, const spot_parame } __global__ void gpu_radial_integration(const int16_t *image, const uint16_t *rad_integration_mapping, - int32_t *bin_sum, int32_t *bin_count, uint32_t npixel, + float *corr, float *bin_sum, float *bin_count, uint32_t npixel, uint16_t nbins) { - extern __shared__ int32_t shared_mem[]; - int32_t* shared_sum = shared_mem; // shared buffer [nbins] - int32_t* shared_count = &shared_sum[nbins]; // shared buffer [nbins] + extern __shared__ float shared_mem_fp[]; + float* shared_sum = shared_mem_fp; // shared buffer [nbins] + float* shared_count = &shared_sum[nbins]; // shared buffer [nbins] uint32_t idx = blockDim.x*blockIdx.x + threadIdx.x; for (uint32_t i = threadIdx.x; i < 2 * nbins; i += blockDim.x) - shared_mem[i] = 0; + shared_mem_fp[i] = 0; __syncthreads(); for (uint32_t i = idx; i < npixel; i += blockDim.x * gridDim.x) { uint16_t bin = rad_integration_mapping[i]; - int32_t value = image[i]; + float value = static_cast(image[i]) * corr[i]; if ((value > INT16_MIN + 4) && (value < INT16_MAX - 4) && (bin < nbins)) { atomicAdd(&shared_sum[bin], value); - atomicAdd(&shared_count[bin], 1); + atomicAdd(&shared_count[bin], 1.0f); } } __syncthreads(); @@ -228,7 +228,7 @@ __global__ void apply_pixel_mask(int16_t *image, const uint8_t *mask, uint32_t n GPUImageAnalysis::GPUImageAnalysis(int32_t in_xpixels, int32_t in_ypixels, const std::vector &mask, int32_t gpu_device) : - xpixels(in_xpixels), ypixels(in_ypixels), gpu_out(nullptr), rad_integration_nbins(0) { + xpixels(in_xpixels), ypixels(in_ypixels), gpu_out(nullptr), rad_integration_nbins(0), numberOfSMs(1) { int device_count; cuda_err(cudaGetDeviceCount(&device_count)); @@ -278,14 +278,19 @@ GPUImageAnalysis::GPUImageAnalysis(int32_t xpixels, int32_t ypixels, const std:: if (rad_integration_nbins > 0) { cuda_err(cudaMalloc(&gpu_rad_integration_bin_map, xpixels * ypixels * sizeof(int16_t))); - cuda_err(cudaMalloc(&gpu_rad_integration_count, rad_integration_nbins * sizeof(int32_t))); - cuda_err(cudaMalloc(&gpu_rad_integration_sum, rad_integration_nbins * sizeof(int32_t))); + cuda_err(cudaMalloc(&gpu_rad_integration_corr, xpixels * ypixels * sizeof(float))); - cuda_err(cudaHostAlloc(&host_rad_integration_count, rad_integration_nbins * sizeof(int32_t), cudaHostAllocPortable)); - cuda_err(cudaHostAlloc(&host_rad_integration_sum, rad_integration_nbins * sizeof(int32_t), cudaHostAllocPortable)); + cuda_err(cudaMalloc(&gpu_rad_integration_count, rad_integration_nbins * sizeof(float))); + cuda_err(cudaMalloc(&gpu_rad_integration_sum, rad_integration_nbins * sizeof(float))); + cuda_err(cudaHostAlloc(&host_rad_integration_count, rad_integration_nbins * sizeof(float), cudaHostAllocPortable)); + cuda_err(cudaHostAlloc(&host_rad_integration_sum, rad_integration_nbins * sizeof(float), cudaHostAllocPortable)); cudaMemcpy(gpu_rad_integration_bin_map, rad_int_mapping.data(), xpixels*ypixels * sizeof(uint16_t), cudaMemcpyHostToDevice); + + std::vector corr(xpixels * ypixels, 1.0f); + cudaMemcpy(gpu_rad_integration_corr, corr.data(), + xpixels * ypixels * sizeof(float), cudaMemcpyHostToDevice); } } @@ -303,6 +308,7 @@ GPUImageAnalysis::~GPUImageAnalysis() { cudaFree(gpu_rad_integration_bin_map); cudaFree(gpu_rad_integration_count); cudaFree(gpu_rad_integration_sum); + cudaFree(gpu_rad_integration_corr); cudaFreeHost(host_rad_integration_count); cudaFreeHost(host_rad_integration_sum); } @@ -431,7 +437,8 @@ void GPUImageAnalysis::RunRadialIntegration() { cuda_err(cudaMemsetAsync(gpu_rad_integration_count, 0, rad_integration_nbins * sizeof(int32_t), cudastream->v)); gpu_radial_integration<<<40, numberOfCudaThreads, rad_integration_nbins * sizeof(uint32_t) * 2, cudastream->v>>>( - gpu_in, gpu_rad_integration_bin_map, gpu_rad_integration_sum, gpu_rad_integration_count, xpixels*ypixels, + gpu_in, gpu_rad_integration_bin_map, gpu_rad_integration_corr, + gpu_rad_integration_sum, gpu_rad_integration_count, xpixels*ypixels, rad_integration_nbins); cuda_err(cudaMemcpyAsync(host_rad_integration_count, gpu_rad_integration_count, @@ -459,24 +466,24 @@ void GPUImageAnalysis::GetRadialIntegrationProfile(std::vector &result) { } } -std::vector GPUImageAnalysis::GetRadialIntegrationSum() const { +std::vector GPUImageAnalysis::GetRadialIntegrationSum() const { if (rad_integration_nbins == 0) throw JFJochException(JFJochExceptionCategory::SpotFinderError, "Radial integration not initialized"); cuda_err(cudaStreamSynchronize(cudastream->v)); - std::vector out(rad_integration_nbins); + std::vector out(rad_integration_nbins); memcpy(out.data(), host_rad_integration_sum, rad_integration_nbins * sizeof(int32_t)); return out; } -std::vector GPUImageAnalysis::GetRadialIntegrationCount() const { +std::vector GPUImageAnalysis::GetRadialIntegrationCount() const { if (rad_integration_nbins == 0) throw JFJochException(JFJochExceptionCategory::SpotFinderError, "Radial integration not initialized"); cuda_err(cudaStreamSynchronize(cudastream->v)); - std::vector out(rad_integration_nbins); + std::vector out(rad_integration_nbins); memcpy(out.data(), host_rad_integration_count, rad_integration_nbins * sizeof(int32_t)); return out; } @@ -487,8 +494,8 @@ float GPUImageAnalysis::GetRadialIntegrationRangeValue(uint16_t min_bin, uint16_ cuda_err(cudaStreamSynchronize(cudastream->v)); - int64_t ret_sum = 0; - int64_t ret_count = 0; + float ret_sum = 0; + float ret_count = 0; for (int i = std::min(rad_integration_nbins,min_bin); i <= std::min((uint16_t)(rad_integration_nbins-1),max_bin); @@ -500,7 +507,7 @@ float GPUImageAnalysis::GetRadialIntegrationRangeValue(uint16_t min_bin, uint16_ if (ret_count == 0) return 0; else - return static_cast(ret_sum) / static_cast(ret_count); + return ret_sum / ret_count; } std::atomic GPUImageAnalysis::threadid{0}; diff --git a/image_analysis/GPUImageAnalysis.h b/image_analysis/GPUImageAnalysis.h index b5af0929..be15a5d0 100644 --- a/image_analysis/GPUImageAnalysis.h +++ b/image_analysis/GPUImageAnalysis.h @@ -41,9 +41,12 @@ class GPUImageAnalysis { uint16_t rad_integration_nbins; uint16_t *gpu_rad_integration_bin_map = nullptr; - int32_t *gpu_rad_integration_sum = nullptr; - int32_t *gpu_rad_integration_count = nullptr; - int32_t *host_rad_integration_sum = nullptr, *host_rad_integration_count = nullptr; + float *gpu_rad_integration_sum = nullptr; + float *gpu_rad_integration_count = nullptr; + float *gpu_rad_integration_corr = nullptr; + + float *host_rad_integration_sum = nullptr, *host_rad_integration_count = nullptr; + int numberOfSMs; const int numberOfCudaThreads = 128; // #threads per block that works well for Nvidia T4 const int numberOfWaves = 40; // #waves that works well for Nvidia T4 @@ -68,10 +71,11 @@ public: void GetSpotFinderResults(const DiffractionExperiment &experiment, const JFJochProtoBuf::DataProcessingSettings &settings, std::vector &vec); + void CalcRadialIntegrationCorr(const DiffractionExperiment& experiment); void RunRadialIntegration(); void GetRadialIntegrationProfile(std::vector &result); - [[nodiscard]] std::vector GetRadialIntegrationSum() const; - [[nodiscard]] std::vector GetRadialIntegrationCount() const; + [[nodiscard]] std::vector GetRadialIntegrationSum() const; + [[nodiscard]] std::vector GetRadialIntegrationCount() const; [[nodiscard]] float GetRadialIntegrationRangeValue(uint16_t min_bin, uint16_t max_bin); static bool GPUPresent(); diff --git a/image_analysis/RadialIntegrationProfile.cpp b/image_analysis/RadialIntegrationProfile.cpp index cbd66737..32705020 100644 --- a/image_analysis/RadialIntegrationProfile.cpp +++ b/image_analysis/RadialIntegrationProfile.cpp @@ -16,7 +16,7 @@ const std::vector &RadialIntegrationProfile::GetSolidAngleCorr() const { return corrections; } -void RadialIntegrationProfile::Add(const std::vector &in_sum, const std::vector &in_count) { +void RadialIntegrationProfile::Add(const std::vector &in_sum, const std::vector &in_count) { std::unique_lock ul(m); if ((in_sum.size() == sum.size()) && (in_count.size() == count.size())) { diff --git a/image_analysis/RadialIntegrationProfile.h b/image_analysis/RadialIntegrationProfile.h index f8b00c62..f02e823f 100644 --- a/image_analysis/RadialIntegrationProfile.h +++ b/image_analysis/RadialIntegrationProfile.h @@ -12,13 +12,13 @@ class RadialIntegrationProfile { mutable std::mutex m; - std::vector sum; - std::vector count; + std::vector sum; + std::vector count; std::vector bin_to_q; std::vector corrections; public: explicit RadialIntegrationProfile(RadialIntegrationMapping &mapping, const DiffractionExperiment &experiment); - void Add(const std::vector &sum, const std::vector &count); + void Add(const std::vector &sum, const std::vector &count); std::vector GetResult(bool solid_angle_correction) const; void GetPlot(JFJochProtoBuf::Plot &plot, bool solid_angle_correction) const; const std::vector &GetSolidAngleCorr() const; diff --git a/tests/RadialIntegrationTest.cpp b/tests/RadialIntegrationTest.cpp index 4f83a1a1..506803fa 100644 --- a/tests/RadialIntegrationTest.cpp +++ b/tests/RadialIntegrationTest.cpp @@ -87,8 +87,8 @@ TEST_CASE("RadialIntegrationProfile","[RadialIntegration]") { RadialIntegrationProfile profile(mapping, x); - std::vector sum(mapping.GetBinNumber()); - std::vector count(mapping.GetBinNumber()); + std::vector sum(mapping.GetBinNumber()); + std::vector count(mapping.GetBinNumber()); for (int i = 0; i < mapping.GetBinNumber(); i++) { sum[i] = i * i * 4; @@ -96,7 +96,7 @@ TEST_CASE("RadialIntegrationProfile","[RadialIntegration]") { } REQUIRE_NOTHROW(profile.Add(sum, count)); - std::vector sum_wr(mapping.GetBinNumber() - 1); + std::vector sum_wr(mapping.GetBinNumber() - 1); REQUIRE_THROWS(profile.Add(sum_wr, count)); JFJochProtoBuf::Plot plot;