Files
Jungfraujoch/image_analysis/bragg_prediction/BraggPredictionGPU.cu
Filip Leonarski 1ab257af6c
All checks were successful
Build Packages / build:rpm (ubuntu2404_nocuda) (push) Successful in 12m8s
Build Packages / build:rpm (rocky8_nocuda) (push) Successful in 12m57s
Build Packages / build:rpm (ubuntu2204_nocuda) (push) Successful in 12m55s
Build Packages / build:rpm (rocky8_sls9) (push) Successful in 12m0s
Build Packages / build:rpm (rocky9_nocuda) (push) Successful in 13m30s
Build Packages / Generate python client (push) Successful in 20s
Build Packages / Unit tests (push) Has been skipped
Build Packages / Create release (push) Has been skipped
Build Packages / Build documentation (push) Successful in 39s
Build Packages / build:rpm (rocky8) (push) Successful in 9m23s
Build Packages / build:rpm (rocky9_sls9) (push) Successful in 10m33s
Build Packages / build:rpm (ubuntu2404) (push) Successful in 8m2s
Build Packages / build:rpm (ubuntu2204) (push) Successful in 8m42s
Build Packages / build:rpm (rocky9) (push) Successful in 9m38s
v1.0.0-rc.125 (#32)
This is an UNSTABLE release. This version adds scalign and merging. These are experimental at the moment, and should not be used for production analysis.
If things go wrong with analysis, it is better to revert to 1.0.0-rc.124.

* jfjoch_broker: Improve logic on switching on/off spot finding
* jfjoch_broker: Increase maximum spot count for FFBIDX to 65536
* jfjoch_broker: Increase default maximum unit cell for FFT to 500 A (could have performance impact, TBD)
* jfjoch_process: Add scalign and merging functionality - program is experimental at the moment and should not be used for production analysis
* jfjoch_viewer: Display partiality and reciprocal Lorentz-polarization correction for each reflection
* jfjoch_writer: Save more information about each reflection

Reviewed-on: #32
Co-authored-by: Filip Leonarski <filip.leonarski@psi.ch>
Co-committed-by: Filip Leonarski <filip.leonarski@psi.ch>
2026-02-18 16:17:21 +01:00

177 lines
6.8 KiB
Plaintext

// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
// SPDX-License-Identifier: GPL-3.0-only
#include "BraggPredictionGPU.h"
#ifdef JFJOCH_USE_CUDA
#include "../indexing/CUDAMemHelpers.h"
#include <cuda_runtime.h>
namespace {
__device__ inline bool is_odd(int v) { return (v & 1) != 0; }
__device__ inline bool compute_reflection(const KernelConsts &C, int h, int k, int l, Reflection &out) {
if (h == 0 && k == 0 && l == 0)
return false;
// Systematic absences (centering only)
// P, I, A, B, C, F supported
switch (C.centering) {
case 'I':
if (is_odd(h + k + l))
return false;
break;
case 'A':
if (is_odd(k + l))
return false;
break;
case 'B':
if (is_odd(h + l))
return false;
break;
case 'C':
if (is_odd(h + k))
return false;
break;
case 'F':
if ((is_odd(h + k)) || (is_odd(h + l)) || (is_odd(k + l)))
return false;
break;
case 'R': {
// Rhombohedral in hexagonal setting (hR, a_h=b_h, gamma=120°):
// Condition: -h + k + l = 3n
int mod = (-h + k + l) % 3;
if (mod < 0) mod += 3;
if (mod != 0) return false;
break;
}
default:
break;
}
float Ah_x = C.Astar.x * h;
float Ah_y = C.Astar.y * h;
float Ah_z = C.Astar.z * h;
float AhBk_x = Ah_x + C.Bstar.x * k;
float AhBk_y = Ah_y + C.Bstar.y * k;
float AhBk_z = Ah_z + C.Bstar.z * k;
float recip_x = AhBk_x + C.Cstar.x * l;
float recip_y = AhBk_y + C.Cstar.y * l;
float recip_z = AhBk_z + C.Cstar.z * l;
float recip_sq = recip_x * recip_x + recip_y * recip_y + recip_z * recip_z;
if (recip_sq > C.one_over_dmax_sq) return false;
float Sx = recip_x + C.S0.x;
float Sy = recip_y + C.S0.y;
float Sz = recip_z + C.S0.z;
float S_len = sqrtf(Sx * Sx + Sy * Sy + Sz * Sz);
float dist_ewald = fabsf(S_len - C.one_over_wavelength);
if (dist_ewald > C.ewald_cutoff) return false;
float Srx = C.rot[0] * Sx + C.rot[1] * Sy + C.rot[2] * Sz;
float Sry = C.rot[3] * Sx + C.rot[4] * Sy + C.rot[5] * Sz;
float Srz = C.rot[6] * Sx + C.rot[7] * Sy + C.rot[8] * Sz;
if (Srz <= 0.0f) return false;
float coeff = C.coeff_const / Srz;
float x = C.beam_x + Srx * coeff;
float y = C.beam_y + Sry * coeff;
if (x < 0.0f || x >= C.det_width_pxl || y < 0.0f || y >= C.det_height_pxl) return false;
out.h = h;
out.k = k;
out.l = l;
out.delta_phi_deg = NAN;
out.predicted_x = x;
out.predicted_y = y;
out.d = 1.0f / sqrtf(recip_sq);
out.dist_ewald = dist_ewald;
out.rlp = 1.0f;
out.partiality = 1.0f;
out.zeta = 1.0f;
return true;
}
__global__ void bragg_kernel_3d(const KernelConsts *__restrict__ kc,
int max_hkl,
int max_reflections,
Reflection *__restrict__ out,
int *__restrict__ counter) {
int range = 2 * max_hkl + 1;
int hi = blockIdx.x * blockDim.x + threadIdx.x;
int ki = blockIdx.y * blockDim.y + threadIdx.y;
int li = blockIdx.z * blockDim.z + threadIdx.z;
if (hi >= range || ki >= range || li >= range) return;
int h = hi - max_hkl;
int k = ki - max_hkl;
int l = li - max_hkl;
Reflection r{};
if (!compute_reflection(*kc, h, k, l, r)) return;
int pos = atomicAdd(counter, 1);
if (pos < max_reflections) out[pos] = r;
else atomicSub(counter, 1);
}
inline KernelConsts BuildKernelConsts(const DiffractionExperiment &experiment,
const CrystalLattice &lattice,
float high_res_A,
float ewald_dist_cutoff,
char centering) {
KernelConsts kc{};
auto geom = experiment.GetDiffractionGeometry();
kc.det_width_pxl = static_cast<float>(experiment.GetXPixelsNum());
kc.det_height_pxl = static_cast<float>(experiment.GetYPixelsNum());
kc.beam_x = geom.GetBeamX_pxl();
kc.beam_y = geom.GetBeamY_pxl();
kc.coeff_const = geom.GetDetectorDistance_mm() / geom.GetPixelSize_mm();
float one_over_dmax = 1.0f / high_res_A;
kc.one_over_dmax_sq = one_over_dmax * one_over_dmax;
kc.one_over_wavelength = 1.0f / geom.GetWavelength_A();
kc.ewald_cutoff = ewald_dist_cutoff;
kc.Astar = lattice.Astar();
kc.Bstar = lattice.Bstar();
kc.Cstar = lattice.Cstar();
kc.S0 = geom.GetScatteringVector();
kc.centering = centering;
auto rotT = geom.GetPoniRotMatrix().transpose().arr();
for (int i = 0; i < 9; ++i) kc.rot[i] = rotT[i];
return kc;
}
} // namespace
BraggPredictionGPU::BraggPredictionGPU(int max_reflections)
: BraggPrediction(max_reflections),
reg_out(reflections), d_out(max_reflections),
dK(1), d_count(1), h_count(1) {
}
int BraggPredictionGPU::Calc(const DiffractionExperiment &experiment,
const CrystalLattice &lattice,
const BraggPredictionSettings &settings) {
// Build constants on host
KernelConsts hK = BuildKernelConsts(experiment, lattice, settings.high_res_A, settings.ewald_dist_cutoff, settings.centering);
cudaMemcpyAsync(dK, &hK, sizeof(KernelConsts), cudaMemcpyHostToDevice, stream);
cudaMemsetAsync(d_count, 0, sizeof(int), stream);
// Configure and launch on the stream
const int range = 2 * settings.max_hkl;
dim3 block(8, 8, 8);
dim3 grid((range + block.x - 1) / block.x,
(range + block.y - 1) / block.y,
(range + block.z - 1) / block.z);
bragg_kernel_3d<<<grid, block, 0, stream>>>(dK, settings.max_hkl, max_reflections, d_out, d_count);
// Async D2H count and synchronize
cudaMemcpyAsync(h_count, d_count, sizeof(int), cudaMemcpyDeviceToHost, stream);
cudaStreamSynchronize(stream);
int count = *h_count.get();
if (count > max_reflections) count = max_reflections;
if (count == 0)
return {};
cudaMemcpyAsync(reflections.data(), d_out, sizeof(Reflection) * count, cudaMemcpyDeviceToHost, stream);
cudaStreamSynchronize(stream);
return count;
}
#endif