From bb87ed2df46601ea7bec15cd33ee65b948cab4ae Mon Sep 17 00:00:00 2001 From: Filip Leonarski Date: Thu, 22 Jan 2026 21:20:20 +0100 Subject: [PATCH] BraggPredictionRot: Work in progress --- .../bragg_prediction/BraggPrediction.h | 1 + .../bragg_prediction/BraggPredictionRot.cpp | 140 +++++++++ .../bragg_prediction/BraggPredictionRot.h | 12 + .../bragg_prediction/BraggPredictionRotGPU.cu | 272 ++++++++++++++++++ .../bragg_prediction/BraggPredictionRotGPU.h | 48 ++++ .../bragg_prediction/CMakeLists.txt | 5 + 6 files changed, 478 insertions(+) create mode 100644 image_analysis/bragg_prediction/BraggPredictionRot.cpp create mode 100644 image_analysis/bragg_prediction/BraggPredictionRot.h create mode 100644 image_analysis/bragg_prediction/BraggPredictionRotGPU.cu create mode 100644 image_analysis/bragg_prediction/BraggPredictionRotGPU.h diff --git a/image_analysis/bragg_prediction/BraggPrediction.h b/image_analysis/bragg_prediction/BraggPrediction.h index 75eb7116..e6d54deb 100644 --- a/image_analysis/bragg_prediction/BraggPrediction.h +++ b/image_analysis/bragg_prediction/BraggPrediction.h @@ -15,6 +15,7 @@ struct BraggPredictionSettings { float ewald_dist_cutoff = 0.0005; int max_hkl = 100; char centering = 'P'; + float max_angle = 0.2f; }; class BraggPrediction { diff --git a/image_analysis/bragg_prediction/BraggPredictionRot.cpp b/image_analysis/bragg_prediction/BraggPredictionRot.cpp new file mode 100644 index 00000000..f8982c9d --- /dev/null +++ b/image_analysis/bragg_prediction/BraggPredictionRot.cpp @@ -0,0 +1,140 @@ +// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute +// SPDX-License-Identifier: GPL-3.0-only + +#include "BraggPredictionRot.h" +#include "../bragg_integration/SystematicAbsence.h" + + +int BraggPredictionRot::Calc(const DiffractionExperiment &experiment, const CrystalLattice &lattice, + const BraggPredictionSettings &settings) { + + const auto geom = experiment.GetDiffractionGeometry(); + const auto det_width_pxl = static_cast(experiment.GetXPixelsNum()); + const auto det_height_pxl = static_cast(experiment.GetYPixelsNum()); + + const float one_over_dmax = 1.0f / settings.high_res_A; + const float one_over_dmax_sq = one_over_dmax * one_over_dmax; + + float one_over_wavelength = 1.0f / geom.GetWavelength_A(); + + const Coord Astar = lattice.Astar(); + const Coord Bstar = lattice.Bstar(); + const Coord Cstar = lattice.Cstar(); + const Coord S0 = geom.GetScatteringVector(); + + std::vector rot = geom.GetPoniRotMatrix().transpose().arr(); + + // Precompute detector geometry constants + float beam_x = geom.GetBeamX_pxl(); + float beam_y = geom.GetBeamY_pxl(); + float det_distance = geom.GetDetectorDistance_mm(); + float pixel_size = geom.GetPixelSize_mm(); + float F = det_distance / pixel_size; + + const auto gon_opt = experiment.GetGoniometer(); + if (!gon_opt.has_value()) + throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, + "BraggPredictionRotationCPU requires a goniometer axis"); + const GoniometerAxis& gon = *gon_opt; + + const Coord m2 = gon.GetAxis().Normalize(); + const Coord m1 = (m2 % S0).Normalize(); + const Coord m3 = (m1 % m2).Normalize(); + + const float m2_S0 = m2 * S0; + const float m3_S0 = m3 * S0; + + int i = 0; + const float max_angle_rad = settings.max_angle * static_cast(M_PI) / 180.f; + + for (int h = -settings.max_hkl; h <= settings.max_hkl; h++) { + // Precompute A* h contribution + + for (int k = -settings.max_hkl; k <= settings.max_hkl; k++) { + // Accumulate B* k contribution + + for (int l = -settings.max_hkl; l <= settings.max_hkl; l++) { + if (systematic_absence(h, k, l, settings.centering)) + continue; + + if (i >= max_reflections) + continue; + Coord p0 = Astar * h + Bstar * k + Cstar * l; + + float p0_sq = p0 * p0; + if (p0_sq <= 0.0f || p0_sq > one_over_dmax_sq) + continue; + + const float p0_m1 = p0 * m1; + const float p0_m2 = p0 * m2; + const float p0_m3 = p0 * m3; + + const float rho_sq = p0_sq - (p0_m2 * p0_m2); + + const float p_m3 = (- p0_sq / 2 - p0_m2 * m2_S0) / m3_S0; + const float p_m2 = p0_m2; + const float p_m1_opt[2] = { + std::sqrt(rho_sq - p_m3 * p_m3), + -std::sqrt(rho_sq - p_m3 * p_m3) + }; + + // No solution for Laue equations + if ((rho_sq < p_m3 * p_m3) || (p0_sq > 4 * S0 * S0)) + continue; + + for (const auto& p_m1 : p_m1_opt) { + if (i >= max_reflections) + continue; + + const float cosphi = (p_m1 * p0_m1 + p_m3 * p0_m3) / rho_sq; + const float sinphi = (p_m1 * p0_m3 - p_m3 * p0_m1) / rho_sq; + Coord p = m1 * p_m1 + m2 * p_m2 + m3 * p_m3; // p0 vector "rotated" to diffracting condition + Coord S = S0 + p; + + float phi = -1.0f * std::atan2(sinphi, cosphi) * 180.0f / static_cast(M_PI); + + if (phi > max_angle_rad || phi < -max_angle_rad) + continue; + + const float lorentz_reciprocal = std::fabs(m2 * (S % S0))/(S*S0); + + Coord S_local = S0 + p0; + + // Inlined RecipToDector with rot1 and rot2 (rot3 = 0) + // Apply rotation matrix transpose + float S_rot_x = rot[0] * S_local.x + rot[1] * S_local.y + rot[2] * S_local.z; + float S_rot_y = rot[3] * S_local.x + rot[4] * S_local.y + rot[5] * S_local.z; + float S_rot_z = rot[6] * S_local.x + rot[7] * S_local.y + rot[8] * S_local.z; + + if (S_rot_z <= 0) + continue; + + float x = beam_x + F * S_rot_x / S_rot_z; + float y = beam_y + F * S_rot_y / S_rot_z; + + if ((x < 0) || (x >= det_width_pxl) || (y < 0) || (y >= det_height_pxl)) + continue; + + float dist_ewald_sphere = std::fabs(p0_sq - one_over_wavelength); + + float d = 1.0f / sqrtf(p0_sq); + reflections[i] = Reflection{ + .h = h, + .k = k, + .l = l, + .predicted_x = x, + .predicted_y = y, + .d = d, + .dist_ewald = dist_ewald_sphere, + .lp = lorentz_reciprocal, + .S_x = S.x, + .S_y = S.y, + .S_z = S.z + }; + i++; + } + } + } + } + return i; +} diff --git a/image_analysis/bragg_prediction/BraggPredictionRot.h b/image_analysis/bragg_prediction/BraggPredictionRot.h new file mode 100644 index 00000000..2973e76c --- /dev/null +++ b/image_analysis/bragg_prediction/BraggPredictionRot.h @@ -0,0 +1,12 @@ +// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute +// SPDX-License-Identifier: GPL-3.0-only + +#pragma once + +#include "BraggPrediction.h" + +class BraggPredictionRot : public BraggPrediction { + int Calc(const DiffractionExperiment &experiment, + const CrystalLattice &lattice, + const BraggPredictionSettings &settings) override; +}; diff --git a/image_analysis/bragg_prediction/BraggPredictionRotGPU.cu b/image_analysis/bragg_prediction/BraggPredictionRotGPU.cu new file mode 100644 index 00000000..25cfec18 --- /dev/null +++ b/image_analysis/bragg_prediction/BraggPredictionRotGPU.cu @@ -0,0 +1,272 @@ +// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute +// SPDX-License-Identifier: GPL-3.0-only + +#include "BraggPredictionRotGPU.h" + +#ifdef JFJOCH_USE_CUDA +#include "../indexing/CUDAMemHelpers.h" +#include +#include + +namespace { + __device__ inline bool is_odd(int v) { return (v & 1) != 0; } + + __device__ inline void cross3(float ax, float ay, float az, + float bx, float by, float bz, + float &cx, float &cy, float &cz) { + cx = ay * bz - az * by; + cy = az * bx - ax * bz; + cz = ax * by - ay * bx; + } + + __device__ inline float dot3(float ax, float ay, float az, + float bx, float by, float bz) { + return ax * bx + ay * by + az * bz; + } + + __device__ inline void normalize3(float &x, float &y, float &z) { + float len = sqrtf(x * x + y * y + z * z); + if (len < 1e-12f) { x = 0.0f; y = 0.0f; z = 0.0f; return; } + float inv = 1.0f / len; + x *= inv; y *= inv; z *= inv; + } + + __device__ inline bool compute_reflection_rot(const KernelConstsRot &C, int h, int k, int l, Reflection &out) { + if (h == 0 && k == 0 && l == 0) + return false; + + switch (C.centering) { + case 'I': + if (is_odd(h + k + l)) return false; + break; + case 'A': + if (is_odd(k + l)) return false; + break; + case 'B': + if (is_odd(h + l)) return false; + break; + case 'C': + if (is_odd(h + k)) return false; + break; + case 'F': + if (is_odd(h + k) || is_odd(h + l) || is_odd(k + l)) return false; + break; + case 'R': { + int mod = (-h + k + l) % 3; + if (mod < 0) mod += 3; + if (mod != 0) return false; + break; + } + default: + break; + } + + // p0 = A* h + B* k + C* l + float p0x = C.Astar.x * h + C.Bstar.x * k + C.Cstar.x * l; + float p0y = C.Astar.y * h + C.Bstar.y * k + C.Cstar.y * l; + float p0z = C.Astar.z * h + C.Bstar.z * k + C.Cstar.z * l; + + float p0_sq = p0x * p0x + p0y * p0y + p0z * p0z; + if (p0_sq <= 0.0f || p0_sq > C.one_over_dmax_sq) + return false; + + float p0_m1 = p0x * C.m1.x + p0y * C.m1.y + p0z * C.m1.z; + float p0_m2 = p0x * C.m2.x + p0y * C.m2.y + p0z * C.m2.z; + float p0_m3 = p0x * C.m3.x + p0y * C.m3.y + p0z * C.m3.z; + + float rho_sq = p0_sq - (p0_m2 * p0_m2); + float p_m3 = (-p0_sq / 2.0f - p0_m2 * C.m2_S0) / C.m3_S0; + float p_m2 = p0_m2; + + if (rho_sq < p_m3 * p_m3) return false; + if (p0_sq > 4.0f * dot3(C.S0.x, C.S0.y, C.S0.z, C.S0.x, C.S0.y, C.S0.z)) return false; + + float p_m1_pos = sqrtf(rho_sq - p_m3 * p_m3); + float p_m1_neg = -p_m1_pos; + + float p_m1_arr[2] = {p_m1_pos, p_m1_neg}; + + for (int idx = 0; idx < 2; ++idx) { + float p_m1 = p_m1_arr[idx]; + + float cosphi = (p_m1 * p0_m1 + p_m3 * p0_m3) / rho_sq; + float sinphi = (p_m1 * p0_m3 - p_m3 * p0_m1) / rho_sq; + + // p = m1*p_m1 + m2*p_m2 + m3*p_m3 + float px = C.m1.x * p_m1 + C.m2.x * p_m2 + C.m3.x * p_m3; + float py = C.m1.y * p_m1 + C.m2.y * p_m2 + C.m3.y * p_m3; + float pz = C.m1.z * p_m1 + C.m2.z * p_m2 + C.m3.z * p_m3; + + float Sx = C.S0.x + px; + float Sy = C.S0.y + py; + float Sz = C.S0.z + pz; + + float phi = -1.0f * atan2f(sinphi, cosphi) * 180.0f / static_cast(M_PI); + if (phi > C.max_angle_rad || phi < -C.max_angle_rad) + continue; + + // lorentz = |m2 . (S x S0)| / (S . S0) + float cx, cy, cz; + cross3(Sx, Sy, Sz, C.S0.x, C.S0.y, C.S0.z, cx, cy, cz); + float lorentz = fabsf(dot3(C.m2.x, C.m2.y, C.m2.z, cx, cy, cz)) / + dot3(Sx, Sy, Sz, C.S0.x, C.S0.y, C.S0.z); + + // S_local = S0 + p0 (note: p0, not p) + float Slx = C.S0.x + p0x; + float Sly = C.S0.y + p0y; + float Slz = C.S0.z + p0z; + + // Rotate to detector + float Srx = C.rot[0] * Slx + C.rot[1] * Sly + C.rot[2] * Slz; + float Sry = C.rot[3] * Slx + C.rot[4] * Sly + C.rot[5] * Slz; + float Srz = C.rot[6] * Slx + C.rot[7] * Sly + C.rot[8] * Slz; + + if (Srz <= 0.0f) continue; + + float coeff = C.coeff_const / Srz; + float x = C.beam_x + Srx * coeff; + float y = C.beam_y + Sry * coeff; + + if (x < 0.0f || x >= C.det_width_pxl || y < 0.0f || y >= C.det_height_pxl) + continue; + + float dist_ewald = fabsf(p0_sq - C.one_over_wavelength); + + out.h = h; + out.k = k; + out.l = l; + out.predicted_x = x; + out.predicted_y = y; + out.d = 1.0f / sqrtf(p0_sq); + out.dist_ewald = dist_ewald; + out.lp = lorentz; + out.S_x = Sx; + out.S_y = Sy; + out.S_z = Sz; + return true; + } + + return false; + } + + __global__ void bragg_rot_kernel_3d(const KernelConstsRot *__restrict__ kc, + int max_hkl, + int max_reflections, + Reflection *__restrict__ out, + int *__restrict__ counter) { + int range = 2 * max_hkl + 1; + int hi = blockIdx.x * blockDim.x + threadIdx.x; + int ki = blockIdx.y * blockDim.y + threadIdx.y; + int li = blockIdx.z * blockDim.z + threadIdx.z; + if (hi >= range || ki >= range || li >= range) return; + int h = hi - max_hkl; + int k = ki - max_hkl; + int l = li - max_hkl; + + Reflection r{}; + if (!compute_reflection_rot(*kc, h, k, l, r)) return; + + int pos = atomicAdd(counter, 1); + if (pos < max_reflections) out[pos] = r; + else atomicSub(counter, 1); + } + + inline KernelConstsRot BuildKernelConstsRot(const DiffractionExperiment &experiment, + const CrystalLattice &lattice, + float high_res_A, + float max_angle_deg, + char centering) { + KernelConstsRot kc{}; + auto geom = experiment.GetDiffractionGeometry(); + + kc.det_width_pxl = static_cast(experiment.GetXPixelsNum()); + kc.det_height_pxl = static_cast(experiment.GetYPixelsNum()); + kc.beam_x = geom.GetBeamX_pxl(); + kc.beam_y = geom.GetBeamY_pxl(); + kc.coeff_const = geom.GetDetectorDistance_mm() / geom.GetPixelSize_mm(); + + float one_over_dmax = 1.0f / high_res_A; + kc.one_over_dmax_sq = one_over_dmax * one_over_dmax; + kc.one_over_wavelength = 1.0f / geom.GetWavelength_A(); + kc.max_angle_rad = max_angle_deg * static_cast(M_PI) / 180.0f; + + kc.Astar = lattice.Astar(); + kc.Bstar = lattice.Bstar(); + kc.Cstar = lattice.Cstar(); + kc.S0 = geom.GetScatteringVector(); + + auto rotT = geom.GetPoniRotMatrix().transpose().arr(); + for (int i = 0; i < 9; ++i) kc.rot[i] = rotT[i]; + + kc.centering = centering; + return kc; + } + + inline void BuildGoniometerBasis(const DiffractionExperiment &experiment, KernelConstsRot &kc) { + const auto gon_opt = experiment.GetGoniometer(); + if (!gon_opt.has_value()) + throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, + "BraggPredictionRotationGPU requires a goniometer axis"); + const GoniometerAxis &gon = *gon_opt; + + // m2 = normalize(axis) + float m2x = gon.GetAxis().x; + float m2y = gon.GetAxis().y; + float m2z = gon.GetAxis().z; + normalize3(m2x, m2y, m2z); + + // m1 = normalize(m2 x S0) + float m1x, m1y, m1z; + cross3(m2x, m2y, m2z, kc.S0.x, kc.S0.y, kc.S0.z, m1x, m1y, m1z); + normalize3(m1x, m1y, m1z); + + // m3 = normalize(m1 x m2) + float m3x, m3y, m3z; + cross3(m1x, m1y, m1z, m2x, m2y, m2z, m3x, m3y, m3z); + normalize3(m3x, m3y, m3z); + + kc.m1 = Coord(m1x, m1y, m1z); + kc.m2 = Coord(m2x, m2y, m2z); + kc.m3 = Coord(m3x, m3y, m3z); + kc.m2_S0 = dot3(m2x, m2y, m2z, kc.S0.x, kc.S0.y, kc.S0.z); + kc.m3_S0 = dot3(m3x, m3y, m3z, kc.S0.x, kc.S0.y, kc.S0.z); + } +} // namespace + +BraggPredictionRotGPU::BraggPredictionRotGPU(int max_reflections) + : BraggPrediction(max_reflections), + reg_out(reflections), d_out(max_reflections), + dK(1), d_count(1), h_count(1) { +} + +int BraggPredictionRotGPU::Calc(const DiffractionExperiment &experiment, + const CrystalLattice &lattice, + const BraggPredictionSettings &settings) { + KernelConstsRot hK = BuildKernelConstsRot(experiment, lattice, settings.high_res_A, settings.max_angle, settings.centering); + BuildGoniometerBasis(experiment, hK); + + cudaMemcpyAsync(dK, &hK, sizeof(KernelConstsRot), cudaMemcpyHostToDevice, stream); + cudaMemsetAsync(d_count, 0, sizeof(int), stream); + + const int range = 2 * settings.max_hkl; + dim3 block(8, 8, 8); + dim3 grid((range + block.x - 1) / block.x, + (range + block.y - 1) / block.y, + (range + block.z - 1) / block.z); + + bragg_rot_kernel_3d<<>>(dK, settings.max_hkl, max_reflections, d_out, d_count); + + cudaMemcpyAsync(h_count, d_count, sizeof(int), cudaMemcpyDeviceToHost, stream); + cudaStreamSynchronize(stream); + + int count = *h_count.get(); + if (count > max_reflections) count = max_reflections; + if (count == 0) return 0; + + cudaMemcpyAsync(reflections.data(), d_out, sizeof(Reflection) * count, cudaMemcpyDeviceToHost, stream); + cudaStreamSynchronize(stream); + + return count; +} + +#endif \ No newline at end of file diff --git a/image_analysis/bragg_prediction/BraggPredictionRotGPU.h b/image_analysis/bragg_prediction/BraggPredictionRotGPU.h new file mode 100644 index 00000000..179a7869 --- /dev/null +++ b/image_analysis/bragg_prediction/BraggPredictionRotGPU.h @@ -0,0 +1,48 @@ +// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute +// SPDX-License-Identifier: GPL-3.0-only + + +#pragma once + +#include +#include "BraggPrediction.h" +#include "../indexing/CUDAMemHelpers.h" + +struct KernelConstsRot { + float det_width_pxl; + float det_height_pxl; + float beam_x; + float beam_y; + float coeff_const; + float one_over_wavelength; + float one_over_dmax_sq; + float max_angle_rad; + Coord Astar, Bstar, Cstar, S0; + Coord m1, m2, m3; + float m2_S0; + float m3_S0; + float rot[9]; + char centering; +}; + +class BraggPredictionRotGPU : public BraggPrediction { + CudaRegisteredVector reg_out; // requires stable storage + CudaDevicePtr d_out; + + // Dedicated stream + CudaStream stream; + + // Device allocations via helpers + CudaDevicePtr dK; + CudaDevicePtr d_count; + + // Host pinned buffer for async download (optional but faster) + CudaHostPtr h_count; + +public: + explicit BraggPredictionRotGPU(int max_reflections = 10000); + + int Calc(const DiffractionExperiment &experiment, + const CrystalLattice &lattice, + const BraggPredictionSettings &settings) override; +}; diff --git a/image_analysis/bragg_prediction/CMakeLists.txt b/image_analysis/bragg_prediction/CMakeLists.txt index 656dc783..7528ed73 100644 --- a/image_analysis/bragg_prediction/CMakeLists.txt +++ b/image_analysis/bragg_prediction/CMakeLists.txt @@ -5,6 +5,9 @@ ADD_LIBRARY(JFJochBraggPrediction STATIC BraggPredictionFactory.h BraggPredictionRotation.cpp BraggPredictionRotation.h + BraggPredictionRot.cpp + BraggPredictionRot.h + ) TARGET_LINK_LIBRARIES(JFJochBraggPrediction JFJochCommon) @@ -12,5 +15,7 @@ TARGET_LINK_LIBRARIES(JFJochBraggPrediction JFJochCommon) IF (JFJOCH_CUDA_AVAILABLE) TARGET_SOURCES(JFJochBraggPrediction PRIVATE ../indexing/CUDAMemHelpers.h + BraggPredictionRotGPU.cu + BraggPredictionRotGPU.h BraggPredictionGPU.cu BraggPredictionGPU.h) ENDIF()