BraggPredictionRot: Work in progress

This commit is contained in:
2026-01-22 21:20:20 +01:00
parent af3fea3e66
commit bb87ed2df4
6 changed files with 478 additions and 0 deletions

View File

@@ -15,6 +15,7 @@ struct BraggPredictionSettings {
float ewald_dist_cutoff = 0.0005;
int max_hkl = 100;
char centering = 'P';
float max_angle = 0.2f;
};
class BraggPrediction {

View File

@@ -0,0 +1,140 @@
// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
// SPDX-License-Identifier: GPL-3.0-only
#include "BraggPredictionRot.h"
#include "../bragg_integration/SystematicAbsence.h"
int BraggPredictionRot::Calc(const DiffractionExperiment &experiment, const CrystalLattice &lattice,
const BraggPredictionSettings &settings) {
const auto geom = experiment.GetDiffractionGeometry();
const auto det_width_pxl = static_cast<float>(experiment.GetXPixelsNum());
const auto det_height_pxl = static_cast<float>(experiment.GetYPixelsNum());
const float one_over_dmax = 1.0f / settings.high_res_A;
const float one_over_dmax_sq = one_over_dmax * one_over_dmax;
float one_over_wavelength = 1.0f / geom.GetWavelength_A();
const Coord Astar = lattice.Astar();
const Coord Bstar = lattice.Bstar();
const Coord Cstar = lattice.Cstar();
const Coord S0 = geom.GetScatteringVector();
std::vector<float> rot = geom.GetPoniRotMatrix().transpose().arr();
// Precompute detector geometry constants
float beam_x = geom.GetBeamX_pxl();
float beam_y = geom.GetBeamY_pxl();
float det_distance = geom.GetDetectorDistance_mm();
float pixel_size = geom.GetPixelSize_mm();
float F = det_distance / pixel_size;
const auto gon_opt = experiment.GetGoniometer();
if (!gon_opt.has_value())
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid,
"BraggPredictionRotationCPU requires a goniometer axis");
const GoniometerAxis& gon = *gon_opt;
const Coord m2 = gon.GetAxis().Normalize();
const Coord m1 = (m2 % S0).Normalize();
const Coord m3 = (m1 % m2).Normalize();
const float m2_S0 = m2 * S0;
const float m3_S0 = m3 * S0;
int i = 0;
const float max_angle_rad = settings.max_angle * static_cast<float>(M_PI) / 180.f;
for (int h = -settings.max_hkl; h <= settings.max_hkl; h++) {
// Precompute A* h contribution
for (int k = -settings.max_hkl; k <= settings.max_hkl; k++) {
// Accumulate B* k contribution
for (int l = -settings.max_hkl; l <= settings.max_hkl; l++) {
if (systematic_absence(h, k, l, settings.centering))
continue;
if (i >= max_reflections)
continue;
Coord p0 = Astar * h + Bstar * k + Cstar * l;
float p0_sq = p0 * p0;
if (p0_sq <= 0.0f || p0_sq > one_over_dmax_sq)
continue;
const float p0_m1 = p0 * m1;
const float p0_m2 = p0 * m2;
const float p0_m3 = p0 * m3;
const float rho_sq = p0_sq - (p0_m2 * p0_m2);
const float p_m3 = (- p0_sq / 2 - p0_m2 * m2_S0) / m3_S0;
const float p_m2 = p0_m2;
const float p_m1_opt[2] = {
std::sqrt(rho_sq - p_m3 * p_m3),
-std::sqrt(rho_sq - p_m3 * p_m3)
};
// No solution for Laue equations
if ((rho_sq < p_m3 * p_m3) || (p0_sq > 4 * S0 * S0))
continue;
for (const auto& p_m1 : p_m1_opt) {
if (i >= max_reflections)
continue;
const float cosphi = (p_m1 * p0_m1 + p_m3 * p0_m3) / rho_sq;
const float sinphi = (p_m1 * p0_m3 - p_m3 * p0_m1) / rho_sq;
Coord p = m1 * p_m1 + m2 * p_m2 + m3 * p_m3; // p0 vector "rotated" to diffracting condition
Coord S = S0 + p;
float phi = -1.0f * std::atan2(sinphi, cosphi) * 180.0f / static_cast<float>(M_PI);
if (phi > max_angle_rad || phi < -max_angle_rad)
continue;
const float lorentz_reciprocal = std::fabs(m2 * (S % S0))/(S*S0);
Coord S_local = S0 + p0;
// Inlined RecipToDector with rot1 and rot2 (rot3 = 0)
// Apply rotation matrix transpose
float S_rot_x = rot[0] * S_local.x + rot[1] * S_local.y + rot[2] * S_local.z;
float S_rot_y = rot[3] * S_local.x + rot[4] * S_local.y + rot[5] * S_local.z;
float S_rot_z = rot[6] * S_local.x + rot[7] * S_local.y + rot[8] * S_local.z;
if (S_rot_z <= 0)
continue;
float x = beam_x + F * S_rot_x / S_rot_z;
float y = beam_y + F * S_rot_y / S_rot_z;
if ((x < 0) || (x >= det_width_pxl) || (y < 0) || (y >= det_height_pxl))
continue;
float dist_ewald_sphere = std::fabs(p0_sq - one_over_wavelength);
float d = 1.0f / sqrtf(p0_sq);
reflections[i] = Reflection{
.h = h,
.k = k,
.l = l,
.predicted_x = x,
.predicted_y = y,
.d = d,
.dist_ewald = dist_ewald_sphere,
.lp = lorentz_reciprocal,
.S_x = S.x,
.S_y = S.y,
.S_z = S.z
};
i++;
}
}
}
}
return i;
}

View File

@@ -0,0 +1,12 @@
// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
// SPDX-License-Identifier: GPL-3.0-only
#pragma once
#include "BraggPrediction.h"
class BraggPredictionRot : public BraggPrediction {
int Calc(const DiffractionExperiment &experiment,
const CrystalLattice &lattice,
const BraggPredictionSettings &settings) override;
};

View File

@@ -0,0 +1,272 @@
// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
// SPDX-License-Identifier: GPL-3.0-only
#include "BraggPredictionRotGPU.h"
#ifdef JFJOCH_USE_CUDA
#include "../indexing/CUDAMemHelpers.h"
#include <cuda_runtime.h>
#include <cmath>
namespace {
__device__ inline bool is_odd(int v) { return (v & 1) != 0; }
__device__ inline void cross3(float ax, float ay, float az,
float bx, float by, float bz,
float &cx, float &cy, float &cz) {
cx = ay * bz - az * by;
cy = az * bx - ax * bz;
cz = ax * by - ay * bx;
}
__device__ inline float dot3(float ax, float ay, float az,
float bx, float by, float bz) {
return ax * bx + ay * by + az * bz;
}
__device__ inline void normalize3(float &x, float &y, float &z) {
float len = sqrtf(x * x + y * y + z * z);
if (len < 1e-12f) { x = 0.0f; y = 0.0f; z = 0.0f; return; }
float inv = 1.0f / len;
x *= inv; y *= inv; z *= inv;
}
__device__ inline bool compute_reflection_rot(const KernelConstsRot &C, int h, int k, int l, Reflection &out) {
if (h == 0 && k == 0 && l == 0)
return false;
switch (C.centering) {
case 'I':
if (is_odd(h + k + l)) return false;
break;
case 'A':
if (is_odd(k + l)) return false;
break;
case 'B':
if (is_odd(h + l)) return false;
break;
case 'C':
if (is_odd(h + k)) return false;
break;
case 'F':
if (is_odd(h + k) || is_odd(h + l) || is_odd(k + l)) return false;
break;
case 'R': {
int mod = (-h + k + l) % 3;
if (mod < 0) mod += 3;
if (mod != 0) return false;
break;
}
default:
break;
}
// p0 = A* h + B* k + C* l
float p0x = C.Astar.x * h + C.Bstar.x * k + C.Cstar.x * l;
float p0y = C.Astar.y * h + C.Bstar.y * k + C.Cstar.y * l;
float p0z = C.Astar.z * h + C.Bstar.z * k + C.Cstar.z * l;
float p0_sq = p0x * p0x + p0y * p0y + p0z * p0z;
if (p0_sq <= 0.0f || p0_sq > C.one_over_dmax_sq)
return false;
float p0_m1 = p0x * C.m1.x + p0y * C.m1.y + p0z * C.m1.z;
float p0_m2 = p0x * C.m2.x + p0y * C.m2.y + p0z * C.m2.z;
float p0_m3 = p0x * C.m3.x + p0y * C.m3.y + p0z * C.m3.z;
float rho_sq = p0_sq - (p0_m2 * p0_m2);
float p_m3 = (-p0_sq / 2.0f - p0_m2 * C.m2_S0) / C.m3_S0;
float p_m2 = p0_m2;
if (rho_sq < p_m3 * p_m3) return false;
if (p0_sq > 4.0f * dot3(C.S0.x, C.S0.y, C.S0.z, C.S0.x, C.S0.y, C.S0.z)) return false;
float p_m1_pos = sqrtf(rho_sq - p_m3 * p_m3);
float p_m1_neg = -p_m1_pos;
float p_m1_arr[2] = {p_m1_pos, p_m1_neg};
for (int idx = 0; idx < 2; ++idx) {
float p_m1 = p_m1_arr[idx];
float cosphi = (p_m1 * p0_m1 + p_m3 * p0_m3) / rho_sq;
float sinphi = (p_m1 * p0_m3 - p_m3 * p0_m1) / rho_sq;
// p = m1*p_m1 + m2*p_m2 + m3*p_m3
float px = C.m1.x * p_m1 + C.m2.x * p_m2 + C.m3.x * p_m3;
float py = C.m1.y * p_m1 + C.m2.y * p_m2 + C.m3.y * p_m3;
float pz = C.m1.z * p_m1 + C.m2.z * p_m2 + C.m3.z * p_m3;
float Sx = C.S0.x + px;
float Sy = C.S0.y + py;
float Sz = C.S0.z + pz;
float phi = -1.0f * atan2f(sinphi, cosphi) * 180.0f / static_cast<float>(M_PI);
if (phi > C.max_angle_rad || phi < -C.max_angle_rad)
continue;
// lorentz = |m2 . (S x S0)| / (S . S0)
float cx, cy, cz;
cross3(Sx, Sy, Sz, C.S0.x, C.S0.y, C.S0.z, cx, cy, cz);
float lorentz = fabsf(dot3(C.m2.x, C.m2.y, C.m2.z, cx, cy, cz)) /
dot3(Sx, Sy, Sz, C.S0.x, C.S0.y, C.S0.z);
// S_local = S0 + p0 (note: p0, not p)
float Slx = C.S0.x + p0x;
float Sly = C.S0.y + p0y;
float Slz = C.S0.z + p0z;
// Rotate to detector
float Srx = C.rot[0] * Slx + C.rot[1] * Sly + C.rot[2] * Slz;
float Sry = C.rot[3] * Slx + C.rot[4] * Sly + C.rot[5] * Slz;
float Srz = C.rot[6] * Slx + C.rot[7] * Sly + C.rot[8] * Slz;
if (Srz <= 0.0f) continue;
float coeff = C.coeff_const / Srz;
float x = C.beam_x + Srx * coeff;
float y = C.beam_y + Sry * coeff;
if (x < 0.0f || x >= C.det_width_pxl || y < 0.0f || y >= C.det_height_pxl)
continue;
float dist_ewald = fabsf(p0_sq - C.one_over_wavelength);
out.h = h;
out.k = k;
out.l = l;
out.predicted_x = x;
out.predicted_y = y;
out.d = 1.0f / sqrtf(p0_sq);
out.dist_ewald = dist_ewald;
out.lp = lorentz;
out.S_x = Sx;
out.S_y = Sy;
out.S_z = Sz;
return true;
}
return false;
}
__global__ void bragg_rot_kernel_3d(const KernelConstsRot *__restrict__ kc,
int max_hkl,
int max_reflections,
Reflection *__restrict__ out,
int *__restrict__ counter) {
int range = 2 * max_hkl + 1;
int hi = blockIdx.x * blockDim.x + threadIdx.x;
int ki = blockIdx.y * blockDim.y + threadIdx.y;
int li = blockIdx.z * blockDim.z + threadIdx.z;
if (hi >= range || ki >= range || li >= range) return;
int h = hi - max_hkl;
int k = ki - max_hkl;
int l = li - max_hkl;
Reflection r{};
if (!compute_reflection_rot(*kc, h, k, l, r)) return;
int pos = atomicAdd(counter, 1);
if (pos < max_reflections) out[pos] = r;
else atomicSub(counter, 1);
}
inline KernelConstsRot BuildKernelConstsRot(const DiffractionExperiment &experiment,
const CrystalLattice &lattice,
float high_res_A,
float max_angle_deg,
char centering) {
KernelConstsRot kc{};
auto geom = experiment.GetDiffractionGeometry();
kc.det_width_pxl = static_cast<float>(experiment.GetXPixelsNum());
kc.det_height_pxl = static_cast<float>(experiment.GetYPixelsNum());
kc.beam_x = geom.GetBeamX_pxl();
kc.beam_y = geom.GetBeamY_pxl();
kc.coeff_const = geom.GetDetectorDistance_mm() / geom.GetPixelSize_mm();
float one_over_dmax = 1.0f / high_res_A;
kc.one_over_dmax_sq = one_over_dmax * one_over_dmax;
kc.one_over_wavelength = 1.0f / geom.GetWavelength_A();
kc.max_angle_rad = max_angle_deg * static_cast<float>(M_PI) / 180.0f;
kc.Astar = lattice.Astar();
kc.Bstar = lattice.Bstar();
kc.Cstar = lattice.Cstar();
kc.S0 = geom.GetScatteringVector();
auto rotT = geom.GetPoniRotMatrix().transpose().arr();
for (int i = 0; i < 9; ++i) kc.rot[i] = rotT[i];
kc.centering = centering;
return kc;
}
inline void BuildGoniometerBasis(const DiffractionExperiment &experiment, KernelConstsRot &kc) {
const auto gon_opt = experiment.GetGoniometer();
if (!gon_opt.has_value())
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid,
"BraggPredictionRotationGPU requires a goniometer axis");
const GoniometerAxis &gon = *gon_opt;
// m2 = normalize(axis)
float m2x = gon.GetAxis().x;
float m2y = gon.GetAxis().y;
float m2z = gon.GetAxis().z;
normalize3(m2x, m2y, m2z);
// m1 = normalize(m2 x S0)
float m1x, m1y, m1z;
cross3(m2x, m2y, m2z, kc.S0.x, kc.S0.y, kc.S0.z, m1x, m1y, m1z);
normalize3(m1x, m1y, m1z);
// m3 = normalize(m1 x m2)
float m3x, m3y, m3z;
cross3(m1x, m1y, m1z, m2x, m2y, m2z, m3x, m3y, m3z);
normalize3(m3x, m3y, m3z);
kc.m1 = Coord(m1x, m1y, m1z);
kc.m2 = Coord(m2x, m2y, m2z);
kc.m3 = Coord(m3x, m3y, m3z);
kc.m2_S0 = dot3(m2x, m2y, m2z, kc.S0.x, kc.S0.y, kc.S0.z);
kc.m3_S0 = dot3(m3x, m3y, m3z, kc.S0.x, kc.S0.y, kc.S0.z);
}
} // namespace
BraggPredictionRotGPU::BraggPredictionRotGPU(int max_reflections)
: BraggPrediction(max_reflections),
reg_out(reflections), d_out(max_reflections),
dK(1), d_count(1), h_count(1) {
}
int BraggPredictionRotGPU::Calc(const DiffractionExperiment &experiment,
const CrystalLattice &lattice,
const BraggPredictionSettings &settings) {
KernelConstsRot hK = BuildKernelConstsRot(experiment, lattice, settings.high_res_A, settings.max_angle, settings.centering);
BuildGoniometerBasis(experiment, hK);
cudaMemcpyAsync(dK, &hK, sizeof(KernelConstsRot), cudaMemcpyHostToDevice, stream);
cudaMemsetAsync(d_count, 0, sizeof(int), stream);
const int range = 2 * settings.max_hkl;
dim3 block(8, 8, 8);
dim3 grid((range + block.x - 1) / block.x,
(range + block.y - 1) / block.y,
(range + block.z - 1) / block.z);
bragg_rot_kernel_3d<<<grid, block, 0, stream>>>(dK, settings.max_hkl, max_reflections, d_out, d_count);
cudaMemcpyAsync(h_count, d_count, sizeof(int), cudaMemcpyDeviceToHost, stream);
cudaStreamSynchronize(stream);
int count = *h_count.get();
if (count > max_reflections) count = max_reflections;
if (count == 0) return 0;
cudaMemcpyAsync(reflections.data(), d_out, sizeof(Reflection) * count, cudaMemcpyDeviceToHost, stream);
cudaStreamSynchronize(stream);
return count;
}
#endif

View File

@@ -0,0 +1,48 @@
// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
// SPDX-License-Identifier: GPL-3.0-only
#pragma once
#include <vector>
#include "BraggPrediction.h"
#include "../indexing/CUDAMemHelpers.h"
struct KernelConstsRot {
float det_width_pxl;
float det_height_pxl;
float beam_x;
float beam_y;
float coeff_const;
float one_over_wavelength;
float one_over_dmax_sq;
float max_angle_rad;
Coord Astar, Bstar, Cstar, S0;
Coord m1, m2, m3;
float m2_S0;
float m3_S0;
float rot[9];
char centering;
};
class BraggPredictionRotGPU : public BraggPrediction {
CudaRegisteredVector<Reflection> reg_out; // requires stable storage
CudaDevicePtr<Reflection> d_out;
// Dedicated stream
CudaStream stream;
// Device allocations via helpers
CudaDevicePtr<KernelConstsRot> dK;
CudaDevicePtr<int> d_count;
// Host pinned buffer for async download (optional but faster)
CudaHostPtr<int> h_count;
public:
explicit BraggPredictionRotGPU(int max_reflections = 10000);
int Calc(const DiffractionExperiment &experiment,
const CrystalLattice &lattice,
const BraggPredictionSettings &settings) override;
};

View File

@@ -5,6 +5,9 @@ ADD_LIBRARY(JFJochBraggPrediction STATIC
BraggPredictionFactory.h
BraggPredictionRotation.cpp
BraggPredictionRotation.h
BraggPredictionRot.cpp
BraggPredictionRot.h
)
TARGET_LINK_LIBRARIES(JFJochBraggPrediction JFJochCommon)
@@ -12,5 +15,7 @@ TARGET_LINK_LIBRARIES(JFJochBraggPrediction JFJochCommon)
IF (JFJOCH_CUDA_AVAILABLE)
TARGET_SOURCES(JFJochBraggPrediction PRIVATE
../indexing/CUDAMemHelpers.h
BraggPredictionRotGPU.cu
BraggPredictionRotGPU.h
BraggPredictionGPU.cu BraggPredictionGPU.h)
ENDIF()