From 310196d65095bb5d3b368cda414aa31249485d36 Mon Sep 17 00:00:00 2001 From: Filip Leonarski Date: Thu, 27 Apr 2023 22:01:55 +0200 Subject: [PATCH] CUDA is no longer compulsory --- README.md | 23 +++++---- image_analysis/CMakeLists.txt | 54 ++++++++++++-------- image_analysis/GPUImageAnalysis_Alt.cpp | 67 +++++++++++++++++++++++++ image_analysis/IndexerWrapper.cpp | 6 +++ image_analysis/IndexerWrapper.h | 4 ++ receiver/JFJochReceiver.cpp | 5 +- receiver/jfjoch_action_test.cpp | 3 +- 7 files changed, 129 insertions(+), 33 deletions(-) create mode 100644 image_analysis/GPUImageAnalysis_Alt.cpp diff --git a/README.md b/README.md index 3f38fc2f..1d372252 100644 --- a/README.md +++ b/README.md @@ -26,17 +26,20 @@ Instructions see [here](receiver/README.md) ## Software ### Dependencies -1. C++20 compiler and C++20 standard library (NOT provided by default RHEL 7 installation, need to install Developer Tools, tested with `devtools-11`) -2. CMake version 3.21 or newer + GNU make tool -3. HDF5 library version 1.10 or newer -4. ZeroMQ library -5. Google Remote Procedure Call (gRPC) - see notes below -6. CUDA compiler version 11 or newer (compulsory for receiver/tests) -7. Mellanox OFED - Infinibands Verbs (optional) -8. NUMA library (optional) -9. Node.js (optional) - to make frontend +Required: +* C++20 compiler and C++20 standard library; recommended GCC 11+ or clang 14+ (Intel OneAPI, AMD AOCC) +* CMake version 3.21 or newer + GNU make tool +* HDF5 library version 1.10 or newer +* ZeroMQ library +* Google Remote Procedure Call (gRPC) - see notes below -Additional dependencies: SLS Detector Package, tinycbor (Intel) and Zstandard (Facebook) are provided as GIT submodules. +Optional: +* CUDA compiler version 11 or newer - image analysis features won't work without it +* Mellanox OFED - Infinibands Verbs +* NUMA library +* Node.js - to make frontend + +provided as GIT submodules: SLS Detector Package, tinycbor (Intel) and Zstandard (Facebook). Directly included in the repository: * JSON parser/writer from N. Lohmann - see [github.com/nlohmann/json](https://github.com/nlohmann/json) diff --git a/image_analysis/CMakeLists.txt b/image_analysis/CMakeLists.txt index d3beb89d..6282ba8f 100644 --- a/image_analysis/CMakeLists.txt +++ b/image_analysis/CMakeLists.txt @@ -1,36 +1,48 @@ INCLUDE(CheckLanguage) CHECK_LANGUAGE(CUDA) -IF (CMAKE_CUDA_COMPILER) - ENABLE_LANGUAGE(CUDA) - MESSAGE(STATUS "CUDA VERSION: ${CMAKE_CUDA_COMPILER_VERSION}") -ELSE() - MESSAGE( FATAL_ERROR "CUDA is compulsory for image analysis." ) -ENDIF() SET(CMAKE_CUDA_ARCHITECTURES 70 75 80 86) # V100, T4, A100, RTX A4000 SET(CMAKE_CUDA_STANDARD 17) SET(CMAKE_CUDA_FLAGS_RELEASE "-O3") + ADD_LIBRARY(ImageAnalysis STATIC CrystalLattice.cpp CrystalLattice.h IndexerWrapper.cpp IndexerWrapper.h - GPUImageAnalysis.cu GPUImageAnalysis.h + GPUImageAnalysis.h RadialIntegration.cpp RadialIntegration.h RadialIntegrationMapping.cpp RadialIntegrationMapping.h - StrongPixelSet.cpp StrongPixelSet.h - fast-feedback-indexer/indexer/src/indexer.cpp - fast-feedback-indexer/indexer/src/ffbidx/indexer.h - fast-feedback-indexer/indexer/src/indexer_gpu.cu - fast-feedback-indexer/indexer/src/ffbidx/indexer_gpu.h - fast-feedback-indexer/indexer/src/log.cpp - fast-feedback-indexer/indexer/src/ffbidx/refine.h - fast-feedback-indexer/indexer/src/ffbidx/log.h - fast-feedback-indexer/indexer/src/ffbidx/exception.h) + StrongPixelSet.cpp StrongPixelSet.h GPUImageAnalysis_Alt.cpp) -TARGET_INCLUDE_DIRECTORIES(ImageAnalysis PUBLIC - fast-feedback-indexer/indexer/src/ - fast-feedback-indexer/eigen) +TARGET_LINK_LIBRARIES(ImageAnalysis CommonFunctions) -FIND_LIBRARY(CUDART_LIBRARY cudart_static PATHS ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES} REQUIRED) -TARGET_LINK_LIBRARIES(ImageAnalysis CommonFunctions ${CUDART_LIBRARY} ${CMAKE_DL_LIBS} rt) +TARGET_INCLUDE_DIRECTORIES(ImageAnalysis PUBLIC fast-feedback-indexer/eigen) + +IF (CMAKE_CUDA_COMPILER) + ENABLE_LANGUAGE(CUDA) + MESSAGE(STATUS "CUDA VERSION: ${CMAKE_CUDA_COMPILER_VERSION}") + + TARGET_SOURCES(ImageAnalysis PRIVATE GPUImageAnalysis.cu ) + + TARGET_COMPILE_DEFINITIONS(ImageAnalysis PUBLIC -DJFJOCH_USE_CUDA) + + TARGET_SOURCES(ImageAnalysis PRIVATE + fast-feedback-indexer/indexer/src/indexer.cpp + fast-feedback-indexer/indexer/src/indexer_gpu.cu + fast-feedback-indexer/indexer/src/log.cpp) + + TARGET_SOURCES(ImageAnalysis PUBLIC + fast-feedback-indexer/indexer/src/ffbidx/indexer.h + fast-feedback-indexer/indexer/src/ffbidx/indexer_gpu.h + fast-feedback-indexer/indexer/src/ffbidx/refine.h + fast-feedback-indexer/indexer/src/ffbidx/log.h + fast-feedback-indexer/indexer/src/ffbidx/exception.h) + + TARGET_INCLUDE_DIRECTORIES(ImageAnalysis PUBLIC fast-feedback-indexer/indexer/src/) + + FIND_LIBRARY(CUDART_LIBRARY cudart_static PATHS ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES} REQUIRED) + TARGET_LINK_LIBRARIES(ImageAnalysis ${CUDART_LIBRARY} ${CMAKE_DL_LIBS} rt) +ELSE() + MESSAGE(WARNING "CUDA is strongly recommended for image analysis." ) +ENDIF() diff --git a/image_analysis/GPUImageAnalysis_Alt.cpp b/image_analysis/GPUImageAnalysis_Alt.cpp new file mode 100644 index 00000000..b2b12019 --- /dev/null +++ b/image_analysis/GPUImageAnalysis_Alt.cpp @@ -0,0 +1,67 @@ +// Copyright (2019-2023) Paul Scherrer Institute +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef JFJOCH_USE_CUDA + +#include "GPUImageAnalysis.h" + +struct CudaStreamWrapper { + int32_t just_anything_as_this_wont_be_used; +}; + +GPUImageAnalysis::GPUImageAnalysis(int32_t in_xpixels, int32_t in_ypixels, const std::vector &mask, + int32_t gpu_device) : + xpixels(in_xpixels), ypixels(in_ypixels), gpu_out(nullptr), rad_integration_nbins(0) { +} + +GPUImageAnalysis::GPUImageAnalysis(int32_t xpixels, int32_t ypixels, const std::vector &mask, + const std::vector &rad_int_mapping, uint16_t rad_int_nbins, + int32_t gpu_device) : GPUImageAnalysis(xpixels, ypixels, mask, gpu_device) {} + +GPUImageAnalysis::GPUImageAnalysis(int32_t xpixels, int32_t ypixels, const std::vector &mask, + const RadialIntegrationMapping& mapping, + int32_t gpu_device) + : GPUImageAnalysis(xpixels, ypixels, mask, mapping.GetPixelToBinMapping(), + mapping.GetBinNumber(), gpu_device) {} + +GPUImageAnalysis::~GPUImageAnalysis() {} + +void GPUImageAnalysis::SetInputBuffer(void *ptr) {} + +bool GPUImageAnalysis::GPUPresent() { + return false; +} + +void GPUImageAnalysis::RunSpotFinder(const JFJochProtoBuf::DataProcessingSettings &settings) {} + +void GPUImageAnalysis::GetSpotFinderResults(StrongPixelSet &pixel_set) {} + +void GPUImageAnalysis::GetSpotFinderResults(const DiffractionExperiment &experiment, + const JFJochProtoBuf::DataProcessingSettings &settings, + std::vector &vec) {} + +void GPUImageAnalysis::RegisterBuffer() {} + +void GPUImageAnalysis::UnregisterBuffer() {} + +void GPUImageAnalysis::LoadDataToGPU(bool apply_pixel_mask_on_gpu) {} + +void GPUImageAnalysis::RunRadialIntegration() {} + +void GPUImageAnalysis::GetRadialIntegrationProfile(std::vector &result) {} + +std::vector GPUImageAnalysis::GetRadialIntegrationSum() const { + return {}; +} + +std::vector GPUImageAnalysis::GetRadialIntegrationCount() const { + return {}; +} + +float GPUImageAnalysis::GetRadialIntegrationRangeValue(uint16_t min_bin, uint16_t max_bin) { + return 0; +} + +std::atomic GPUImageAnalysis::threadid{0}; + +#endif diff --git a/image_analysis/IndexerWrapper.cpp b/image_analysis/IndexerWrapper.cpp index 033622b8..131e2f59 100644 --- a/image_analysis/IndexerWrapper.cpp +++ b/image_analysis/IndexerWrapper.cpp @@ -4,10 +4,13 @@ #include "IndexerWrapper.h" void IndexerWrapper::Setup(const UnitCell &cell) { +#ifdef JFJOCH_USE_CUDA indexer.iCellM() = CrystalLattice(cell).GetEigenMatrix(); +#endif } std::vector IndexerWrapper::Run(const std::vector &coord) { +#ifdef JFJOCH_USE_CUDA std::vector ret; if (coord.size() < MIN_SPOTS_TO_INDEX) @@ -32,4 +35,7 @@ std::vector IndexerWrapper::Run(const std::vector &coord) ret.emplace_back(indexer.oCell(id)); return ret; +#else + return {}; +#endif } \ No newline at end of file diff --git a/image_analysis/IndexerWrapper.h b/image_analysis/IndexerWrapper.h index 304ae7fd..2d1dc84f 100644 --- a/image_analysis/IndexerWrapper.h +++ b/image_analysis/IndexerWrapper.h @@ -9,12 +9,15 @@ #include "../common/Coord.h" #include "CrystalLattice.h" +#ifdef JFJOCH_USE_CUDA #include +#endif #define MIN_SPOTS_TO_INDEX (10) #define MAX_SPOTS_TO_INDEX (100) class IndexerWrapper { +#ifdef JFJOCH_USE_CUDA fast_feedback::config_runtime crt{ .num_sample_points = 32768 }; @@ -25,6 +28,7 @@ class IndexerWrapper { }; // default persistent config fast_feedback::refine::config_ifss conf_ifss{}; fast_feedback::refine::indexer_ifss indexer{cpers, crt, conf_ifss}; +#endif public: void Setup(const UnitCell &cell); std::vector Run(const std::vector &coord); diff --git a/receiver/JFJochReceiver.cpp b/receiver/JFJochReceiver.cpp index 1396204d..88c4d658 100644 --- a/receiver/JFJochReceiver.cpp +++ b/receiver/JFJochReceiver.cpp @@ -70,6 +70,9 @@ JFJochReceiver::JFJochReceiver(const JFJochProtoBuf::ReceiverInput &settings, if (preview_publisher != nullptr) preview_publisher->Start(experiment, calib.value()); + if (!GPUImageAnalysis::GPUPresent()) + logger.Info("GPU support missing"); + rad_int_mapping = std::make_unique(experiment, one_byte_mask.data()); spot_finder_mask = calib->CalculateOneByteMask(experiment); } @@ -308,7 +311,7 @@ int64_t JFJochReceiver::FrameTransformationThread() { bool send_image = false; // We send image if at least one module was collected in full - if ((spotfinder_stride > 0) && (image_number % spotfinder_stride == 0)) { + if (GPUImageAnalysis::GPUPresent() && (spotfinder_stride > 0) && (image_number % spotfinder_stride == 0)) { calculate_spots = true; if (rad_int_mapping) send_bkg_estimate = true; diff --git a/receiver/jfjoch_action_test.cpp b/receiver/jfjoch_action_test.cpp index 2244a575..cf020532 100644 --- a/receiver/jfjoch_action_test.cpp +++ b/receiver/jfjoch_action_test.cpp @@ -32,7 +32,8 @@ int main(int argc, char **argv) { for (int i = 0; i < nstreams; i++) detector_geom.push_back(nmodules); - DiffractionExperiment x(1, detector_geom); + DiffractionExperiment x(DetectorGeometry(nmodules, 2, 8, 36, true)); + x.Mode(DetectorMode::Conversion); x.ImagesPerTrigger(nimages).PedestalG0Frames(0).UseInternalPacketGenerator(true).PhotonEnergy_keV(12.4).NumTriggers(1); x.SpotFindingPeriod(std::chrono::milliseconds(processing_period)).MaskModuleEdges(false).MaskChipEdges(false);