diff --git a/image_analysis/MXAnalysisWithoutFPGA.cpp b/image_analysis/MXAnalysisWithoutFPGA.cpp index ceb02df2..3d8a6c63 100644 --- a/image_analysis/MXAnalysisWithoutFPGA.cpp +++ b/image_analysis/MXAnalysisWithoutFPGA.cpp @@ -16,6 +16,7 @@ #include "azint/AzIntEngineGPU.h" #include "spot_finding/ImageSpotFinderGPU.h" #include "image_preprocessing/ImagePreprocessorGPU.h" +#include "image_preprocessing/ImagePreprocessorBufferGPU.h" #include "../common/CUDAWrapper.h" #endif @@ -39,12 +40,14 @@ MXAnalysisWithoutFPGA::MXAnalysisWithoutFPGA(const DiffractionExperiment &in_exp #ifdef JFJOCH_USE_CUDA if (get_gpu_count() == 0) { #endif + preprocessor_buffer = std::make_unique(experiment.GetPixelsNum()); spotFinder = std::make_unique(experiment.GetXPixelsNum(), experiment.GetYPixelsNum()); azint = std::make_unique(integration); preprocessor = std::make_unique(in_experiment, in_mask); #ifdef JFJOCH_USE_CUDA } else { auto stream = std::make_shared(); + preprocessor_buffer = std::make_unique(experiment.GetPixelsNum()); preprocessor = std::make_unique(in_experiment, in_mask, stream); spotFinder = std::make_unique(experiment.GetXPixelsNum(), experiment.GetYPixelsNum(), stream); azint = std::make_unique(integration, stream); @@ -67,12 +70,12 @@ void MXAnalysisWithoutFPGA::Analyze(DataMessage &output, output.compression_time_s = std::chrono::duration(compression_end_time - compression_start_time).count(); const auto preprocessing_start_time = std::chrono::steady_clock::now(); - auto ret = preprocessor->Analyze(spotFinder->GetInputBuffer(), image_ptr, output.image.GetMode()); + auto ret = preprocessor->Analyze(*preprocessor_buffer, image_ptr, output.image.GetMode()); const auto preprocessing_end_time = std::chrono::steady_clock::now(); output.preprocessing_time_s = std::chrono::duration(preprocessing_end_time - preprocessing_start_time).count(); const auto azint_start_time = std::chrono::steady_clock::now(); - azint->Run(spotFinder->GetInputBuffer(), profile); + azint->Run(*preprocessor_buffer, profile); const auto azint_end_time = std::chrono::steady_clock::now(); output.azint_time_s = std::chrono::duration(azint_end_time - azint_start_time).count(); @@ -83,14 +86,14 @@ void MXAnalysisWithoutFPGA::Analyze(DataMessage &output, UpdateMaskResolution(spot_finding_settings); const auto spot_finding_start_time = std::chrono::steady_clock::now(); - const std::vector spots = spotFinder->Run(spot_finding_settings, mask_resolution); + const std::vector spots = spotFinder->Run(*preprocessor_buffer, spot_finding_settings, mask_resolution); SpotAnalyze(experiment, spot_finding_settings, spots, output); const auto spot_finding_end_time = std::chrono::steady_clock::now(); output.spot_finding_time_s = std::chrono::duration(spot_finding_end_time - spot_finding_start_time).count(); if (spot_finding_settings.indexing) indexer.ProcessImage(output, spot_finding_settings, - CompressedImage(spotFinder->GetInputBuffer(), experiment.GetXPixelsNum(), experiment.GetYPixelsNum()), + CompressedImage(preprocessor_buffer->getBuffer(), experiment.GetXPixelsNum(), experiment.GetYPixelsNum()), *prediction); } diff --git a/image_analysis/MXAnalysisWithoutFPGA.h b/image_analysis/MXAnalysisWithoutFPGA.h index 4f0f10ea..9deca52d 100644 --- a/image_analysis/MXAnalysisWithoutFPGA.h +++ b/image_analysis/MXAnalysisWithoutFPGA.h @@ -17,6 +17,7 @@ #include "azint/AzIntEngine.h" #include "IndexAndRefine.h" #include "image_preprocessing/ImagePreprocessor.h" +#include "image_preprocessing/ImagePreprocessorBuffer.h" // MXAnalysisWithoutFPGA is not thread safe - it has to owned by a single thread class MXAnalysisWithoutFPGA { @@ -34,6 +35,7 @@ class MXAnalysisWithoutFPGA { std::unique_ptr spotFinder; IndexAndRefine &indexer; std::unique_ptr prediction; + std::unique_ptr preprocessor_buffer; const PixelMask &mask; std::vector mask_resolution; diff --git a/image_analysis/azint/AzIntEngine.h b/image_analysis/azint/AzIntEngine.h index 97761aa3..adedb96f 100644 --- a/image_analysis/azint/AzIntEngine.h +++ b/image_analysis/azint/AzIntEngine.h @@ -5,6 +5,7 @@ #include "../../common/AzimuthalIntegration.h" #include "../../common/AzimuthalIntegrationProfile.h" +#include "../image_preprocessing/ImagePreprocessorBuffer.h" class AzIntEngine { protected: @@ -17,5 +18,5 @@ protected: public: AzIntEngine(const AzimuthalIntegration& integration); virtual ~AzIntEngine() = default; - virtual void Run(const std::vector &image, AzimuthalIntegrationProfile &profile) = 0; + virtual void Run(const ImagePreprocessorBuffer &image, AzimuthalIntegrationProfile &profile) = 0; }; diff --git a/image_analysis/azint/AzIntEngineCPU.cpp b/image_analysis/azint/AzIntEngineCPU.cpp index 7c3ede74..ab706bb8 100644 --- a/image_analysis/azint/AzIntEngineCPU.cpp +++ b/image_analysis/azint/AzIntEngineCPU.cpp @@ -6,7 +6,7 @@ AzIntEngineCPU::AzIntEngineCPU(const AzimuthalIntegration &integration) : AzIntEngine(integration) {} -void AzIntEngineCPU::Run(const std::vector &image, AzimuthalIntegrationProfile &profile){ +void AzIntEngineCPU::Run(const ImagePreprocessorBuffer &image, AzimuthalIntegrationProfile &profile){ for (int i = 0; i < azint_count.size(); i++) { azint_sum[i] = 0.0f; diff --git a/image_analysis/azint/AzIntEngineCPU.h b/image_analysis/azint/AzIntEngineCPU.h index f05ca5d5..9eeec271 100644 --- a/image_analysis/azint/AzIntEngineCPU.h +++ b/image_analysis/azint/AzIntEngineCPU.h @@ -8,5 +8,5 @@ class AzIntEngineCPU : public AzIntEngine { public: AzIntEngineCPU(const AzimuthalIntegration& integration); - void Run(const std::vector &image, AzimuthalIntegrationProfile &profile) override; + void Run(const ImagePreprocessorBuffer &image, AzimuthalIntegrationProfile &profile) override; }; \ No newline at end of file diff --git a/image_analysis/azint/AzIntEngineGPU.cu b/image_analysis/azint/AzIntEngineGPU.cu index 0fda4812..9700777f 100644 --- a/image_analysis/azint/AzIntEngineGPU.cu +++ b/image_analysis/azint/AzIntEngineGPU.cu @@ -98,8 +98,7 @@ AzIntEngineGPU::AzIntEngineGPU(const AzimuthalIntegration &integration, std::sha gpu_count(azint_bins), cpu_sum_reg(azint_sum), cpu_sum2_reg(azint_sum2), - cpu_count_reg(azint_count), - gpu_image(npixel) { + cpu_count_reg(azint_count) { cudaDeviceProp prop{}; cudaGetDeviceProperties(&prop, 0); @@ -115,22 +114,21 @@ AzIntEngineGPU::AzIntEngineGPU(const AzimuthalIntegration &integration, std::sha cudaMemcpyHostToDevice); } -void AzIntEngineGPU::Run(const std::vector &image, AzimuthalIntegrationProfile &profile) { +void AzIntEngineGPU::Run(const ImagePreprocessorBuffer &image, AzimuthalIntegrationProfile &profile) { if (image.size() != integration.GetPixelToBin().size()) throw std::runtime_error("ImageSpotFinder::AzimIntegration: Mismatch in size"); cuda_err(cudaMemsetAsync(gpu_sum, 0, sizeof(float) * azint_bins, *stream)); cuda_err(cudaMemsetAsync(gpu_sum2, 0, sizeof(float) * azint_bins, *stream)); cuda_err(cudaMemsetAsync(gpu_count, 0, sizeof(uint32_t) * azint_bins, *stream)); - cudaMemcpyAsync(gpu_image, image.data(), sizeof(int32_t) * npixel, cudaMemcpyHostToDevice, *stream); if (shared_needed < shared_size) { gpu_azim_shared<<>>( - gpu_pixel_to_bin,gpu_azint_correction,gpu_image, gpu_sum, gpu_sum2, + gpu_pixel_to_bin,gpu_azint_correction,image.getGPUBuffer(), gpu_sum, gpu_sum2, gpu_count, npixel, azint_bins ); } else { gpu_azim<<>>( - gpu_pixel_to_bin,gpu_azint_correction,gpu_image, gpu_sum, gpu_sum2, + gpu_pixel_to_bin,gpu_azint_correction,image.getGPUBuffer(), gpu_sum, gpu_sum2, gpu_count, npixel, azint_bins ); } diff --git a/image_analysis/azint/AzIntEngineGPU.h b/image_analysis/azint/AzIntEngineGPU.h index 2ecf7d75..ad01b9fa 100644 --- a/image_analysis/azint/AzIntEngineGPU.h +++ b/image_analysis/azint/AzIntEngineGPU.h @@ -22,9 +22,7 @@ class AzIntEngineGPU : public AzIntEngine { CudaRegisteredVector cpu_sum_reg; CudaRegisteredVector cpu_sum2_reg; CudaRegisteredVector cpu_count_reg; - - CudaDevicePtr gpu_image; public: AzIntEngineGPU(const AzimuthalIntegration& integration, std::shared_ptr stream); - void Run(const std::vector &image, AzimuthalIntegrationProfile &profile) override; + void Run(const ImagePreprocessorBuffer &image, AzimuthalIntegrationProfile &profile) override; }; diff --git a/image_analysis/image_preprocessing/CMakeLists.txt b/image_analysis/image_preprocessing/CMakeLists.txt index 75a5515a..4c5a5b25 100644 --- a/image_analysis/image_preprocessing/CMakeLists.txt +++ b/image_analysis/image_preprocessing/CMakeLists.txt @@ -1,11 +1,15 @@ ADD_LIBRARY(JFJochImagePreprocessing STATIC ImagePreprocessor.cpp ImagePreprocessor.h ImagePreprocessorCPU.cpp - ImagePreprocessorCPU.h) + ImagePreprocessorCPU.h + ImagePreprocessorBuffer.cpp + ImagePreprocessorBuffer.h) TARGET_LINK_LIBRARIES(JFJochImagePreprocessing JFJochCommon) IF (JFJOCH_CUDA_AVAILABLE) TARGET_SOURCES(JFJochImagePreprocessing PRIVATE ../indexing/CUDAMemHelpers.h - ImagePreprocessorGPU.cu ImagePreprocessorGPU.h) + ImagePreprocessorGPU.cu ImagePreprocessorGPU.h + ImagePreprocessorBufferGPU.cu + ImagePreprocessorBufferGPU.h) ENDIF() \ No newline at end of file diff --git a/image_analysis/image_preprocessing/ImagePreprocessor.h b/image_analysis/image_preprocessing/ImagePreprocessor.h index 7c2bbeb5..e105bf86 100644 --- a/image_analysis/image_preprocessing/ImagePreprocessor.h +++ b/image_analysis/image_preprocessing/ImagePreprocessor.h @@ -8,6 +8,7 @@ #include "../common/CompressedImage.h" #include "../common/DiffractionExperiment.h" #include "../common/PixelMask.h" +#include "ImagePreprocessorBuffer.h" struct alignas(8) ImageStatistics { unsigned long long error_pixel_count = 0; @@ -25,5 +26,5 @@ protected: public: ImagePreprocessor(const DiffractionExperiment &experiment); virtual ~ImagePreprocessor() = default; - virtual ImageStatistics Analyze(std::vector &processed_image, const uint8_t *decompressed_image, CompressedImageMode image_mode) = 0; + virtual ImageStatistics Analyze(ImagePreprocessorBuffer &processed_image, const uint8_t *decompressed_image, CompressedImageMode image_mode) = 0; }; diff --git a/image_analysis/image_preprocessing/ImagePreprocessorBuffer.cpp b/image_analysis/image_preprocessing/ImagePreprocessorBuffer.cpp new file mode 100644 index 00000000..421259be --- /dev/null +++ b/image_analysis/image_preprocessing/ImagePreprocessorBuffer.cpp @@ -0,0 +1,43 @@ +// SPDX-FileCopyrightText: 2026 Filip Leonarski, Paul Scherrer Institute +// SPDX-License-Identifier: GPL-3.0-only + +#include "ImagePreprocessorBuffer.h" + +ImagePreprocessorBuffer::ImagePreprocessorBuffer(size_t npixels) : buffer(npixels) {} + +// Standard CPU operation +std::vector &ImagePreprocessorBuffer::getBuffer() { + return buffer; +} + +const std::vector &ImagePreprocessorBuffer::getBuffer() const { + return buffer; +} + +int32_t &ImagePreprocessorBuffer::operator[](size_t i) { + return buffer[i]; +} + +const int32_t &ImagePreprocessorBuffer::operator[](size_t i) const { + return buffer[i]; +} + +size_t ImagePreprocessorBuffer::size() const { + return buffer.size(); +} + +int32_t *ImagePreprocessorBuffer::data() { + return buffer.data(); +} + +const int32_t *ImagePreprocessorBuffer::data() const { + return buffer.data(); +} + +int32_t *ImagePreprocessorBuffer::getGPUBuffer() { + return nullptr; +} + +const int32_t *ImagePreprocessorBuffer::getGPUBuffer() const { + return nullptr; +} diff --git a/image_analysis/image_preprocessing/ImagePreprocessorBuffer.h b/image_analysis/image_preprocessing/ImagePreprocessorBuffer.h new file mode 100644 index 00000000..5048571d --- /dev/null +++ b/image_analysis/image_preprocessing/ImagePreprocessorBuffer.h @@ -0,0 +1,30 @@ +// SPDX-FileCopyrightText: 2026 Filip Leonarski, Paul Scherrer Institute +// SPDX-License-Identifier: GPL-3.0-only + + +#pragma once + +#include +#include + +class ImagePreprocessorBuffer { +protected: + std::vector buffer; +public: + explicit ImagePreprocessorBuffer(size_t npixels); + virtual ~ImagePreprocessorBuffer() = default; + + // Standard CPU operation + std::vector &getBuffer(); + const std::vector &getBuffer() const; + int32_t &operator[](size_t i); + const int32_t &operator[](size_t i) const; + size_t size() const; + + int32_t *data(); + const int32_t *data() const; + + // GPU operations (overriden in ImagePreprocessorBufferGPU + virtual int32_t *getGPUBuffer(); + virtual const int32_t *getGPUBuffer() const; +}; diff --git a/image_analysis/image_preprocessing/ImagePreprocessorBufferGPU.cu b/image_analysis/image_preprocessing/ImagePreprocessorBufferGPU.cu new file mode 100644 index 00000000..465e4be3 --- /dev/null +++ b/image_analysis/image_preprocessing/ImagePreprocessorBufferGPU.cu @@ -0,0 +1,18 @@ +// SPDX-FileCopyrightText: 2026 Filip Leonarski, Paul Scherrer Institute +// SPDX-License-Identifier: GPL-3.0-only + +#include "ImagePreprocessorBufferGPU.h" + +ImagePreprocessorBufferGPU::ImagePreprocessorBufferGPU(size_t npixel) + : ImagePreprocessorBuffer(npixel), + gpu_image(npixel), + buffer_reg(buffer) { +} + +int32_t *ImagePreprocessorBufferGPU::getGPUBuffer() { + return gpu_image; +} + +const int32_t *ImagePreprocessorBufferGPU::getGPUBuffer() const { + return gpu_image; +} diff --git a/image_analysis/image_preprocessing/ImagePreprocessorBufferGPU.h b/image_analysis/image_preprocessing/ImagePreprocessorBufferGPU.h new file mode 100644 index 00000000..6db2c780 --- /dev/null +++ b/image_analysis/image_preprocessing/ImagePreprocessorBufferGPU.h @@ -0,0 +1,20 @@ +// SPDX-FileCopyrightText: 2026 Filip Leonarski, Paul Scherrer Institute +// SPDX-License-Identifier: GPL-3.0-only + +#pragma once + +#include + +#include "ImagePreprocessorBuffer.h" +#include "../indexing/CUDAMemHelpers.h" + +class ImagePreprocessorBufferGPU : public ImagePreprocessorBuffer { + CudaDevicePtr gpu_image; + CudaRegisteredVector buffer_reg; + +public: + explicit ImagePreprocessorBufferGPU(size_t npixel); + + int32_t *getGPUBuffer() override; + const int32_t *getGPUBuffer() const override; +}; diff --git a/image_analysis/image_preprocessing/ImagePreprocessorCPU.cpp b/image_analysis/image_preprocessing/ImagePreprocessorCPU.cpp index 28ad9fe8..dd71c20d 100644 --- a/image_analysis/image_preprocessing/ImagePreprocessorCPU.cpp +++ b/image_analysis/image_preprocessing/ImagePreprocessorCPU.cpp @@ -10,7 +10,7 @@ ImagePreprocessorCPU::ImagePreprocessorCPU(const DiffractionExperiment &experime mask_1bit[i] = (mask.GetMask().at(i) != 0); } -ImageStatistics ImagePreprocessorCPU::Analyze(std::vector &processed_image, const uint8_t *image_ptr, CompressedImageMode image_mode) { +ImageStatistics ImagePreprocessorCPU::Analyze(ImagePreprocessorBuffer &processed_image, const uint8_t *image_ptr, CompressedImageMode image_mode) { switch (image_mode) { case CompressedImageMode::Int8: return Analyze(processed_image, image_ptr, INT8_MIN, INT8_MAX); @@ -30,7 +30,7 @@ ImageStatistics ImagePreprocessorCPU::Analyze(std::vector &processed_im } template -ImageStatistics ImagePreprocessorCPU::Analyze(std::vector &processed_image, const uint8_t *input, T err_pixel_val, T sat_pixel_val) { +ImageStatistics ImagePreprocessorCPU::Analyze(ImagePreprocessorBuffer &processed_image, const uint8_t *input, T err_pixel_val, T sat_pixel_val) { if (processed_image.size() != npixels) throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "Processed image size mismatch"); diff --git a/image_analysis/image_preprocessing/ImagePreprocessorCPU.h b/image_analysis/image_preprocessing/ImagePreprocessorCPU.h index 9f68b9b0..aa326d77 100644 --- a/image_analysis/image_preprocessing/ImagePreprocessorCPU.h +++ b/image_analysis/image_preprocessing/ImagePreprocessorCPU.h @@ -9,8 +9,8 @@ class ImagePreprocessorCPU : public ImagePreprocessor { std::vector mask_1bit; - template ImageStatistics Analyze(std::vector &processed_image, const uint8_t *input, T err_value, T sat_value); + template ImageStatistics Analyze(ImagePreprocessorBuffer &processed_image, const uint8_t *input, T err_value, T sat_value); public: ImagePreprocessorCPU(const DiffractionExperiment &experiment, const PixelMask &mask); - ImageStatistics Analyze(std::vector &processed_image, const uint8_t *decompressed_image, CompressedImageMode image_mode) override; + ImageStatistics Analyze(ImagePreprocessorBuffer &processed_image, const uint8_t *decompressed_image, CompressedImageMode image_mode) override; }; diff --git a/image_analysis/image_preprocessing/ImagePreprocessorGPU.cu b/image_analysis/image_preprocessing/ImagePreprocessorGPU.cu index d253270d..5f13339a 100644 --- a/image_analysis/image_preprocessing/ImagePreprocessorGPU.cu +++ b/image_analysis/image_preprocessing/ImagePreprocessorGPU.cu @@ -94,7 +94,6 @@ ImagePreprocessorGPU::ImagePreprocessorGPU(const DiffractionExperiment &experime stream(stream), gpu_mask(npixels), gpu_decompressed_image(npixels * sizeof(uint32_t)), // Overshoot - if input image is 1- or 2-byte, then it is still fine, while memory loss is minimal - gpu_image(npixels), gpu_stats(1), cpu_stats(1), cpu_stats_reg(cpu_stats) { @@ -113,7 +112,7 @@ ImagePreprocessorGPU::ImagePreprocessorGPU(const DiffractionExperiment &experime blocks = 4 * prop.multiProcessorCount; } -ImageStatistics ImagePreprocessorGPU::Analyze(std::vector &processed_image, const uint8_t *image_ptr, +ImageStatistics ImagePreprocessorGPU::Analyze(ImagePreprocessorBuffer &processed_image, const uint8_t *image_ptr, CompressedImageMode image_mode) { switch (image_mode) { case CompressedImageMode::Int8: @@ -134,7 +133,7 @@ ImageStatistics ImagePreprocessorGPU::Analyze(std::vector &processed_im } template -ImageStatistics ImagePreprocessorGPU::Analyze(std::vector &processed_image, +ImageStatistics ImagePreprocessorGPU::Analyze(ImagePreprocessorBuffer &processed_image, const uint8_t *input, T err_value, T sat_value) { @@ -148,23 +147,15 @@ ImageStatistics ImagePreprocessorGPU::Analyze(std::vector &processed_im preprocess_kernel <<< blocks, threads, 0, *stream >>>( reinterpret_cast(gpu_decompressed_image.get()), gpu_mask, - gpu_image, + processed_image.getGPUBuffer(), gpu_stats, sat_value, err_value, npixels); - cudaMemcpyAsync(processed_image.data(), gpu_image, npixels * sizeof(int32_t), cudaMemcpyDeviceToHost, *stream); + cudaMemcpyAsync(processed_image.data(), processed_image.getGPUBuffer(), npixels * sizeof(int32_t), cudaMemcpyDeviceToHost, *stream); cudaMemcpyAsync(cpu_stats.data(), gpu_stats, sizeof(ImageStatistics), cudaMemcpyDeviceToHost, *stream); cudaStreamSynchronize(*stream); return cpu_stats[0]; } - -const int32_t *ImagePreprocessorGPU::GetImageDevicePtr() const { - return gpu_image; -} - -std::shared_ptr ImagePreprocessorGPU::GetStream() const { - return stream; -} diff --git a/image_analysis/image_preprocessing/ImagePreprocessorGPU.h b/image_analysis/image_preprocessing/ImagePreprocessorGPU.h index ed8f0284..652c429d 100644 --- a/image_analysis/image_preprocessing/ImagePreprocessorGPU.h +++ b/image_analysis/image_preprocessing/ImagePreprocessorGPU.h @@ -12,7 +12,6 @@ class ImagePreprocessorGPU : public ImagePreprocessor { int blocks; CudaDevicePtr gpu_mask; CudaDevicePtr gpu_decompressed_image; - CudaDevicePtr gpu_image; CudaDevicePtr gpu_stats; std::vector cpu_stats; @@ -20,12 +19,9 @@ class ImagePreprocessorGPU : public ImagePreprocessor { std::vector cpu_image; - template ImageStatistics Analyze(std::vector &processed_image, const uint8_t *input, T err_value, T sat_value); + template ImageStatistics Analyze(ImagePreprocessorBuffer &processed_image, const uint8_t *input, T err_value, T sat_value); public: ImagePreprocessorGPU(const DiffractionExperiment &experiment, const PixelMask &mask, std::shared_ptr stream); - ImageStatistics Analyze(std::vector &processed_image, const uint8_t *decompressed_image, CompressedImageMode image_mode) override; - - const int32_t* GetImageDevicePtr() const; - std::shared_ptr GetStream() const; + ImageStatistics Analyze(ImagePreprocessorBuffer &processed_image, const uint8_t *decompressed_image, CompressedImageMode image_mode) override; }; diff --git a/image_analysis/spot_finding/ImageSpotFinder.cpp b/image_analysis/spot_finding/ImageSpotFinder.cpp index 92719aac..895b8dba 100644 --- a/image_analysis/spot_finding/ImageSpotFinder.cpp +++ b/image_analysis/spot_finding/ImageSpotFinder.cpp @@ -7,7 +7,10 @@ #include "StrongPixelSet.h" ImageSpotFinder::ImageSpotFinder(int32_t width, int32_t height) -: width(width), height(height), input_buffer(width * height), output_buffer(width * height / 32 + 1) {} + : width(width), + height(height), + output_buffer(width * height / 32 + 1) { +} size_t ImageSpotFinder::OutputSize() const { return (width * height) / 32 + ((width * height % 32 != 0) ? 1 : 0); @@ -17,11 +20,9 @@ size_t ImageSpotFinder::OutputByteSize() const { return OutputSize() * sizeof(uint32_t); } -std::vector &ImageSpotFinder::GetInputBuffer() { - return input_buffer; -} - -std::vector ImageSpotFinder::ExtractSpots(const SpotFindingSettings &settings, const std::vector &res_mask) { +std::vector ImageSpotFinder::ExtractSpots(const ImagePreprocessorBuffer &image, + const SpotFindingSettings &settings, + const std::vector &res_mask) { StrongPixelSet pixel_set; for (int i = 0; i < OutputSize(); i++) { @@ -33,7 +34,7 @@ std::vector ImageSpotFinder::ExtractSpots(const SpotFindingSett size_t col = npixel % width; size_t line = npixel / width; if (line < height && res_mask[npixel] == 0) - pixel_set.AddStrongPixel(col, line, input_buffer[npixel]); + pixel_set.AddStrongPixel(col, line, image[npixel]); } } } diff --git a/image_analysis/spot_finding/ImageSpotFinder.h b/image_analysis/spot_finding/ImageSpotFinder.h index 3c0d3005..bf44245b 100644 --- a/image_analysis/spot_finding/ImageSpotFinder.h +++ b/image_analysis/spot_finding/ImageSpotFinder.h @@ -9,27 +9,23 @@ #include "../../common/DiffractionSpot.h" -#include "../common/AzimuthalIntegration.h" -#include "../common/AzimuthalIntegrationProfile.h" +#include "../image_preprocessing/ImagePreprocessorBuffer.h" class ImageSpotFinder { protected: const int32_t width, height; - std::vector input_buffer; std::vector output_buffer; ImageSpotFinder(int32_t width, int32_t height); size_t OutputSize() const; size_t OutputByteSize() const; - std::vector ExtractSpots(const SpotFindingSettings &settings, const std::vector &res_mask); + std::vector ExtractSpots(const ImagePreprocessorBuffer &image, const SpotFindingSettings &settings, const std::vector &res_mask); public: constexpr static int32_t MIN_VALID_PIXELS = 100; constexpr static int NBX = 15; - std::vector &GetInputBuffer(); - virtual ~ImageSpotFinder() = default; - virtual std::vector Run(const SpotFindingSettings &settings, const std::vector &res_mask) = 0; + virtual std::vector Run(const ImagePreprocessorBuffer &image, const SpotFindingSettings &settings, const std::vector &res_mask) = 0; }; diff --git a/image_analysis/spot_finding/ImageSpotFinderCPU.cpp b/image_analysis/spot_finding/ImageSpotFinderCPU.cpp index ab7611fe..97188897 100644 --- a/image_analysis/spot_finding/ImageSpotFinderCPU.cpp +++ b/image_analysis/spot_finding/ImageSpotFinderCPU.cpp @@ -9,7 +9,8 @@ ImageSpotFinderCPU::ImageSpotFinderCPU(int32_t in_width, int32_t in_height) : ImageSpotFinder(in_width, in_height) {} -std::vector ImageSpotFinderCPU::Run(const SpotFindingSettings &settings, +std::vector ImageSpotFinderCPU::Run(const ImagePreprocessorBuffer &image, + const SpotFindingSettings &settings, const std::vector &res_mask) { for (int i = 0; i < OutputSize(); i++) output_buffer[i] = 0; @@ -20,8 +21,7 @@ std::vector ImageSpotFinderCPU::Run(const SpotFindingSettings & if (settings.photon_count_threshold > 0) { for (int pxl = 0; pxl < height * width; pxl++) { int32_t bit = pxl % 32; - - int32_t pxl_val = input_buffer[pxl]; + int32_t pxl_val = image[pxl]; if (pxl_val == INT32_MAX || (pxl_val > settings.photon_count_threshold && pxl_val != INT32_MIN)) out.set(bit); @@ -45,8 +45,8 @@ std::vector ImageSpotFinderCPU::Run(const SpotFindingSettings & for (int col = 0; col < width; col++) { auto pxl = line * width + col; - if (input_buffer[pxl] != INT32_MAX && input_buffer[pxl] != INT32_MIN) { - int64_t tmp = input_buffer[pxl]; + if (image[pxl] != INT32_MAX && image[pxl] != INT32_MIN) { + int64_t tmp = image[pxl]; sum_vert[col] += tmp; sum2_vert[col] += tmp * tmp; valid_vert[col] += 1; @@ -59,8 +59,8 @@ std::vector ImageSpotFinderCPU::Run(const SpotFindingSettings & if (line < height - NBX) { auto pxl = (line + NBX) * width + col; - if (input_buffer[pxl] != INT32_MAX && input_buffer[pxl] != INT32_MIN) { - int64_t tmp = input_buffer[pxl]; + if (image[pxl] != INT32_MAX && image[pxl] != INT32_MIN) { + int64_t tmp = image[pxl]; sum_vert[col] += tmp; sum2_vert[col] += tmp * tmp; valid_vert[col] += 1; @@ -69,8 +69,8 @@ std::vector ImageSpotFinderCPU::Run(const SpotFindingSettings & if (line >= NBX + 1) { auto pxl = (line - (NBX + 1)) * width + col; - if (input_buffer[pxl] != INT32_MAX && input_buffer[pxl] != INT32_MIN) { - int64_t tmp = input_buffer[pxl]; + if (image[pxl] != INT32_MAX && image[pxl] != INT32_MIN) { + int64_t tmp = image[pxl]; sum_vert[col] -= tmp; sum2_vert[col] -= tmp * tmp; valid_vert[col] -= 1; @@ -102,7 +102,7 @@ std::vector ImageSpotFinderCPU::Run(const SpotFindingSettings & } const int32_t pxl = line * width + col; - int32_t pxl_val = input_buffer[pxl]; + int32_t pxl_val = image[pxl]; int64_t sum_local = sum - pxl_val; int64_t sum2_local = sum2 - pxl_val * pxl_val; int64_t valid_local = valid - 1; @@ -132,5 +132,5 @@ std::vector ImageSpotFinderCPU::Run(const SpotFindingSettings & if (height * width % 32 != 0) output_buffer[OutputSize() - 1] = out.to_ulong(); - return ExtractSpots(settings, res_mask); + return ExtractSpots(image, settings, res_mask); } diff --git a/image_analysis/spot_finding/ImageSpotFinderCPU.h b/image_analysis/spot_finding/ImageSpotFinderCPU.h index cc59b763..422222fa 100644 --- a/image_analysis/spot_finding/ImageSpotFinderCPU.h +++ b/image_analysis/spot_finding/ImageSpotFinderCPU.h @@ -21,7 +21,7 @@ class ImageSpotFinderCPU : public ImageSpotFinder { public: ImageSpotFinderCPU(int32_t width, int32_t height); - std::vector Run(const SpotFindingSettings &settings, const std::vector &res_mask); + std::vector Run(const ImagePreprocessorBuffer &image, const SpotFindingSettings &settings, const std::vector &res_mask); }; #endif //JFJOCH_IMAGESPOTFINDER_H diff --git a/image_analysis/spot_finding/ImageSpotFinderGPU.cu b/image_analysis/spot_finding/ImageSpotFinderGPU.cu index 199cc3ce..f1afc258 100644 --- a/image_analysis/spot_finding/ImageSpotFinderGPU.cu +++ b/image_analysis/spot_finding/ImageSpotFinderGPU.cu @@ -229,16 +229,15 @@ __global__ void analyze_pixel(const int32_t *in, uint32_t *prev_out, uint32_t *o } ImageSpotFinderGPU::ImageSpotFinderGPU(int32_t in_width, int32_t in_height, - std::shared_ptr stream) : stream(stream), - ImageSpotFinder(in_width, in_height), - input_buffer_reg(input_buffer), - output_buffer_reg(output_buffer) { - gpu_in = CudaDevicePtr(width * height); + std::shared_ptr stream) + : ImageSpotFinder(in_width, in_height), + stream(std::move(stream)), + output_buffer_reg(output_buffer) { gpu_out_0 = CudaDevicePtr(OutputSize()); gpu_out_1 = CudaDevicePtr(OutputSize()); } -std::vector ImageSpotFinderGPU::Run(const SpotFindingSettings &settings, const std::vector &res_mask) { +std::vector ImageSpotFinderGPU::Run(const ImagePreprocessorBuffer &image, const SpotFindingSettings &settings, const std::vector &res_mask) { spot_parameters spot_params{}; spot_params.height = height; spot_params.width = width; @@ -265,16 +264,15 @@ std::vector ImageSpotFinderGPU::Run(const SpotFindingSettings & ) * numberOfCudaThreads; const dim3 blocks(nBlocks, numberOfWaves); - cuda_err(cudaMemcpyAsync(gpu_in, input_buffer.data(), width * height * sizeof(int32_t), cudaMemcpyHostToDevice,* stream)); cuda_err(cudaMemsetAsync(gpu_out_0, 0, OutputSize() * sizeof(uint32_t), *stream)); cuda_err(cudaMemsetAsync(gpu_out_1, 0, OutputSize() * sizeof(uint32_t), *stream)); analyze_pixel<<>> - (gpu_in, gpu_out_1, gpu_out_0, spot_params); + (image.getGPUBuffer(), gpu_out_1, gpu_out_0, spot_params); analyze_pixel<<>> - (gpu_in, gpu_out_0, gpu_out_1, spot_params); + (image.getGPUBuffer(), gpu_out_0, gpu_out_1, spot_params); cuda_err(cudaMemcpyAsync(output_buffer.data(), gpu_out_1, OutputSize() * sizeof(uint32_t), cudaMemcpyDeviceToHost, *stream)); cuda_err(cudaStreamSynchronize(*stream)); - return ExtractSpots(settings, res_mask); + return ExtractSpots(image, settings, res_mask); } diff --git a/image_analysis/spot_finding/ImageSpotFinderGPU.h b/image_analysis/spot_finding/ImageSpotFinderGPU.h index 9e703464..92213a9d 100644 --- a/image_analysis/spot_finding/ImageSpotFinderGPU.h +++ b/image_analysis/spot_finding/ImageSpotFinderGPU.h @@ -13,11 +13,8 @@ class ImageSpotFinderGPU : public ImageSpotFinder { std::shared_ptr stream; - CudaDevicePtr gpu_in; CudaDevicePtr gpu_out_0; CudaDevicePtr gpu_out_1; - - CudaRegisteredVector input_buffer_reg; CudaRegisteredVector output_buffer_reg; const int numberOfCudaThreads = 128; // #threads per block that should work well for Nvidia L4 @@ -27,7 +24,7 @@ public: ImageSpotFinderGPU(int32_t width, int32_t height, std::shared_ptr stream); ~ImageSpotFinderGPU() override = default; - std::vector Run(const SpotFindingSettings &settings, const std::vector &res_mask) override; + std::vector Run(const ImagePreprocessorBuffer &image, const SpotFindingSettings &settings, const std::vector &res_mask) override; }; #endif //JFJOCH_IMAGEANALYSISGPU_H diff --git a/tests/ImageSpotFinderCPUTest.cpp b/tests/ImageSpotFinderCPUTest.cpp index dd173991..5d1ff417 100644 --- a/tests/ImageSpotFinderCPUTest.cpp +++ b/tests/ImageSpotFinderCPUTest.cpp @@ -9,8 +9,9 @@ TEST_CASE("ImageSpotFinderCPU_SignalToNoise") { size_t height = 100; ImageSpotFinderCPU s(width, height); - - auto &input = s.GetInputBuffer(); + ImagePreprocessorBuffer buffer(width * height); + + auto &input = buffer.getBuffer(); for (int i = 0; i < width * height; i++) input[i] = (i % 2) * 5 + 5; @@ -29,7 +30,7 @@ TEST_CASE("ImageSpotFinderCPU_SignalToNoise") { }; std::vector mask_resolution(width * height, false); - auto spots = s.Run(settings, mask_resolution); + auto spots = s.Run(buffer, settings, mask_resolution); REQUIRE(spots.size() == 2); REQUIRE(spots[0].RawCoord().y == 25); @@ -42,7 +43,8 @@ TEST_CASE("ImageSpotFinderCPU_SignalToNoise_Resolution") { ImageSpotFinderCPU s(width, height); - auto &input = s.GetInputBuffer(); + ImagePreprocessorBuffer buffer(width * height); + auto &input = buffer.getBuffer(); for (int i = 0; i < width * height; i++) input[i] = (i % 2) * 5 + 5; @@ -62,7 +64,7 @@ TEST_CASE("ImageSpotFinderCPU_SignalToNoise_Resolution") { std::vector mask_resolution(width * height, false); mask_resolution[width * 50 + 50] = true; - auto spots = s.Run(settings, mask_resolution); + auto spots = s.Run(buffer, settings, mask_resolution); REQUIRE(spots.size() == 1); REQUIRE(spots[0].RawCoord().x == 26); @@ -75,7 +77,8 @@ TEST_CASE("ImageSpotFinderCPU_CountThreshold_Resolution") { ImageSpotFinderCPU s(width, height); - auto &input = s.GetInputBuffer(); + ImagePreprocessorBuffer buffer(width * height); + auto &input = buffer.getBuffer(); for (int i = 0; i < width * height; i++) input[i] = (i % 2) * 5 + 5; @@ -95,7 +98,7 @@ TEST_CASE("ImageSpotFinderCPU_CountThreshold_Resolution") { std::vector mask_resolution(width * height, false); mask_resolution[width * 50 + 50] = true; - auto spots = s.Run(settings, mask_resolution); + auto spots = s.Run(buffer, settings, mask_resolution); REQUIRE(spots.size() == 2); REQUIRE(spots[0].RawCoord().y == 25); @@ -108,7 +111,8 @@ TEST_CASE("ImageSpotFinderCPU_CountThreshold_Mask") { ImageSpotFinderCPU s( width, height); - auto &input = s.GetInputBuffer(); + ImagePreprocessorBuffer buffer(width * height); + auto &input = buffer.getBuffer(); for (int i = 0; i < width * height; i++) input[i] = (i % 2) * 5 + 5; @@ -128,7 +132,7 @@ TEST_CASE("ImageSpotFinderCPU_CountThreshold_Mask") { }; std::vector mask_resolution(width * height, false); - auto spots = s.Run(settings, mask_resolution); + auto spots = s.Run(buffer, settings, mask_resolution); REQUIRE(spots.size() == 3); REQUIRE(spots[0].RawCoord().x == 26); @@ -144,7 +148,8 @@ TEST_CASE("ImageSpotFinderCPU_SignalToNoise_Mask") { size_t height = 100; ImageSpotFinderCPU s(width, height); - auto &input = s.GetInputBuffer(); + ImagePreprocessorBuffer buffer(width * height); + auto &input = buffer.getBuffer(); for (int i = 0; i < width * height; i++) input[i] = (i % 2) * 5 + 5; @@ -166,7 +171,7 @@ TEST_CASE("ImageSpotFinderCPU_SignalToNoise_Mask") { }; std::vector mask_resolution(width * height, false); - auto spots = s.Run(settings, mask_resolution); + auto spots = s.Run(buffer, settings, mask_resolution); REQUIRE(spots.size() == 3); REQUIRE(spots[0].RawCoord().x == 26); diff --git a/tests/ImageSpotFinderGPUTest.cpp b/tests/ImageSpotFinderGPUTest.cpp index 1860b7e1..447d59fc 100644 --- a/tests/ImageSpotFinderGPUTest.cpp +++ b/tests/ImageSpotFinderGPUTest.cpp @@ -7,27 +7,32 @@ #ifdef JFJOCH_USE_CUDA #include "../image_analysis/spot_finding/ImageSpotFinderGPU.h" +#include "../image_analysis/image_preprocessing/ImagePreprocessorBufferGPU.h" -static void fill_test_image(std::vector& input, size_t width, size_t height) { - input.resize(width * height); +static void fill_test_image(ImagePreprocessorBuffer& buffer, size_t width, size_t height) { for (size_t i = 0; i < width * height; i++) - input[i] = (i % 2) * 5 + 5; - input[width * 50 + 50] = 20; - input[width * 25 + 26] = 16; - input[width * 75 + 25] = 12; + buffer[i] = (i % 2) * 5 + 5; + buffer[width * 50 + 50] = 20; + buffer[width * 25 + 26] = 16; + buffer[width * 75 + 25] = 12; } // Helper to run GPU and get DiffractionSpot list via StrongPixelSet -> FindSpotsImage -static std::vector run_gpu_and_collect_spots(const std::vector& input, +static std::vector run_gpu_and_collect_spots(ImagePreprocessorBufferGPU &buffer, size_t width, size_t height, - const SpotFindingSettings& settings, - const std::vector& res_mask) -{ - ImageSpotFinderGPU gpu(static_cast(width), static_cast(height), std::make_shared()); + const SpotFindingSettings &settings, + const std::vector &res_mask) { + auto stream = std::make_shared(); + ImageSpotFinderGPU gpu(static_cast(width), static_cast(height), stream); REQUIRE(get_gpu_count() > 0); - memcpy(gpu.GetInputBuffer().data(), input.data(), width * height * sizeof(int32_t)); - return gpu.Run(settings, res_mask); + REQUIRE(cudaMemcpyAsync(buffer.getGPUBuffer(), + buffer.getBuffer().data(), + width * height * sizeof(int32_t), + cudaMemcpyHostToDevice, + *stream) == cudaSuccess); + + return gpu.Run(buffer, settings, res_mask); } // Mirror of ImageSpotFinder_SignalToNoise @@ -41,8 +46,8 @@ TEST_CASE("ImageSpotFinderGPU_SignalToNoise") { std::vector res_mask(width * height, false); std::vector mask(width * height, false); - std::vector input; - fill_test_image(input, width, height); + ImagePreprocessorBufferGPU buffer(width * height); + fill_test_image(buffer, width, height); SpotFindingSettings settings{ .signal_to_noise_threshold = 3.0, @@ -56,7 +61,7 @@ TEST_CASE("ImageSpotFinderGPU_SignalToNoise") { // GPU produces strong pixels; FindSpotsImage uses mask/resolution implicit in StrongPixelSet. // StrongPixelSet doesn't carry resolution/mask by itself, but FindSpotsImage(settings, vec) // matches CPU ImageSpotFinder test behavior for these synthetic inputs. - auto spots = run_gpu_and_collect_spots(input, width, height, settings, res_mask); + auto spots = run_gpu_and_collect_spots(buffer, width, height, settings, res_mask); REQUIRE(spots.size() == 2); REQUIRE(spots[0].RawCoord().y == 25); @@ -73,8 +78,8 @@ TEST_CASE("ImageSpotFinderGPU_CountThreshold") { std::vector res_mask(width * height, false); std::vector mask(width * height, false); - std::vector input; - fill_test_image(input, width, height); + ImagePreprocessorBufferGPU buffer(width * height); + fill_test_image(buffer, width, height); SpotFindingSettings settings{ .signal_to_noise_threshold = 0.0, @@ -88,7 +93,7 @@ TEST_CASE("ImageSpotFinderGPU_CountThreshold") { // GPU produces strong pixels; FindSpotsImage uses mask/resolution implicit in StrongPixelSet. // StrongPixelSet doesn't carry resolution/mask by itself, but FindSpotsImage(settings, vec) // matches CPU ImageSpotFinder test behavior for these synthetic inputs. - auto spots = run_gpu_and_collect_spots(input, width, height, settings, res_mask); + auto spots = run_gpu_and_collect_spots(buffer, width, height, settings, res_mask); REQUIRE(spots.size() == 3); REQUIRE(spots[0].RawCoord().y == 25); @@ -106,9 +111,8 @@ TEST_CASE("ImageSpotFinderGPU_20M") { std::vector res_mask(width * height, false); std::vector mask(width * height, false); - - std::vector input; - fill_test_image(input, width, height); + ImagePreprocessorBufferGPU buffer(width * height); + fill_test_image(buffer, width, height); SpotFindingSettings settings{ .signal_to_noise_threshold = 3.0, @@ -122,7 +126,7 @@ TEST_CASE("ImageSpotFinderGPU_20M") { // GPU produces strong pixels; FindSpotsImage uses mask/resolution implicit in StrongPixelSet. // StrongPixelSet doesn't carry resolution/mask by itself, but FindSpotsImage(settings, vec) // matches CPU ImageSpotFinder test behavior for these synthetic inputs. - auto spots = run_gpu_and_collect_spots(input, width, height, settings, res_mask); + auto spots = run_gpu_and_collect_spots(buffer, width, height, settings, res_mask); REQUIRE(spots.size() == 2); REQUIRE(spots[0].RawCoord().y == 25); diff --git a/tests/JFJochReceiverLiteTest.cpp b/tests/JFJochReceiverLiteTest.cpp index 3a9f119c..f64b81de 100644 --- a/tests/JFJochReceiverLiteTest.cpp +++ b/tests/JFJochReceiverLiteTest.cpp @@ -12,7 +12,7 @@ #include "../image_puller/TestImagePuller.h" TEST_CASE("JFJochReceiverLite", "[JFJochReceiver]") { - Logger logger("FJochReceiverLite"); + Logger logger("JFJochReceiverLite"); RegisterHDF5Filter();