Files
Jungfraujoch/image_analysis/indexing/FFTIndexer.cpp
T
leonarski_f f2f95c44f6 FFTIndexer: name the histogram axis one_over_d (1/d), not q/Q
The FFT histogram extent was written as maxQ = 2*pi/HighRes (the powder Q = 2*pi/d
convention) while the bin width and spot coordinates use the internal 1/d convention -
a unit conflation. It is benign: len_coeff = 2*max_length/histogram_size carries the
same factor, so recovered cell lengths are correct; the 2*pi only acts as a ~6.28x
zero-padding of the histogram.

Rewrite it transparently: one_over_d_max = 1/HighRes (1/d), with the 2*pi kept as an
explicitly-named oversampling factor. In this codebase Q always denotes 2*pi/d, so 1/d
is named one_over_d (never q). histogram_size is numerically identical to before, so
behaviour is unchanged (FFT de-novo crystal 2 95.44%, bit-identical merge). Documented
that the exact padding amount is load-bearing at the marginal-frame level (rounding 2*pi
to a nearby integer shifts indexing ~0.5-1%).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-16 07:24:59 +02:00

259 lines
10 KiB
C++

// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
// SPDX-License-Identifier: GPL-3.0-only
#include "FFTIndexer.h"
#include <Eigen/Eigen>
#include "PostIndexingRefinement.h"
FFTIndexer::FFTIndexer(const IndexingSettings &settings)
: max_length_A(settings.GetFFT_MaxUnitCell_A()),
min_length_A(settings.GetFFT_MinUnitCell_A()),
min_angle_deg(settings.GetFFT_MinAngle_deg()),
max_angle_deg(settings.GetFFT_MaxAngle_deg()),
nDirections(settings.GetFFT_NumVectors()),
result_fft(nDirections) {
// Reciprocal-magnitude histogram in one_over_d = 1/d units (the internal convention -
// the spot coordinates and histogram_spacing are 1/d too; the resolution limit converts
// as 1/HighRes). NB in this code Q always means the powder 2*pi/d, so 1/d is named
// one_over_d, never q. The histogram covers the data range [0, one_over_d_max] and is
// zero-padded by OVERSAMPLING for sub-bin peak localisation (finer cell lengths than the
// raw bin width gives). len_coeff (= 2*max_length/histogram_size) cancels the factor, so
// recovered lengths are independent of it. The padding factor is 2*pi: a historical value
// from when the extent was mistakenly written as 2*pi/d (the Q convention); it is kept
// because the exact amount sets which marginal frames index - rounding it to a nearby
// integer shifts the indexing rate ~0.5-1% (validated on the lyso datasets).
const float oversampling = 2.0f * static_cast<float>(M_PI);
const float one_over_d_max = 1.0f / settings.GetFFT_HighResolution_A();
histogram_spacing = 1.0f / (2.0f * max_length_A);
histogram_size = std::ceil(oversampling * one_over_d_max / histogram_spacing);
input_size = histogram_size * nDirections;
output_size = (histogram_size / 2 + 1) * nDirections;
if (max_length_A <= settings.GetFFT_HighResolution_A())
throw std::invalid_argument("Largest unit cell cannot be smaller than resolution");
if (nDirections <= 1)
throw std::invalid_argument("FFTWIndexer: number of directions must be > 1");
if (!(max_length_A > 0.f))
throw std::invalid_argument("FFTWIndexer: max_length_A must be > 0");
if (!(settings.GetFFT_HighResolution_A() > 0.f))
throw std::invalid_argument("FFTWIndexer: high resolution must be > 0");
if (histogram_size < 1)
throw std::invalid_argument("FFTWIndexer: histogram_size must be >= 1");
if (histogram_size > 1000000)
throw std::invalid_argument("FFTWIndexer: histogram_size too large");
SetupDirectionVectors();
}
void FFTIndexer::SetupDirectionVectors() {
direction_vectors.reserve(static_cast<size_t>(nDirections));
const double phi = (1.0 + std::sqrt(5.0)) / 2.0; // Golden ratio
const double golden_angle = 2.0 * M_PI / phi;
for (int i = 0; i < nDirections; i++) {
// Half-sphere distribution (z in [0,1])
double z = static_cast<double>(i) / static_cast<double>(nDirections - 1);
double theta = golden_angle * static_cast<double>(i);
double radius = std::sqrt(std::max(0.0, 1.0 - z * z));
double x = radius * std::cos(theta);
double y = radius * std::sin(theta);
// Add unit vector to the list
direction_vectors.emplace_back(static_cast<float>(x),
static_cast<float>(y),
static_cast<float>(z));
}
}
void FFTIndexer::SetupUnitCell(const std::optional<UnitCell> &cell) {
reference_unit_cell = cell;
}
void Sort(Coord &A, Coord &B, Coord &C) {
// A is the smallest, C is the largest
if (A.Length() > B.Length())
std::swap(A, B);
if (B.Length() > C.Length())
std::swap(B, C);
if (A.Length() > B.Length())
std::swap(A, B);
}
std::vector<CrystalLattice> FFTIndexer::ReduceResults(const std::vector<Coord> &results) const {
if (results.size() < 3)
return {};
std::vector<CrystalLattice> candidates;
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
for (int k = 0; k < 3; k++) {
if (i + j + k + 2 >= results.size())
break;
Coord A = results[i];
Coord B = results[(i + j + 1)];
Coord C = results[(i + j + 1) + k + 1];
// sort vectors by length for reduction
Sort(A,B,C);
CrystalLattice raw(A, B, C);
CrystalLattice reduced = raw.NiggliReduce(); // Reduce cell
const auto uc = reduced.GetUnitCell();
if (uc.a < min_length_A || uc.b < min_length_A || uc.c < min_length_A)
continue;
float alpha = uc.alpha, beta = uc.beta, gamma = uc.gamma;
if (alpha < min_angle_deg || alpha > max_angle_deg ||
beta < min_angle_deg || beta > max_angle_deg ||
gamma < min_angle_deg || gamma > max_angle_deg)
continue;
candidates.emplace_back(std::move(reduced));
}
}
}
return candidates;
}
std::vector<Coord> FFTIndexer::FilterFFTResults() const {
std::multimap<float, FFTResult> fft_result_map;
// Order vectors by max FFT magnitude and select the 30 strongest ones
size_t max_vectors = 30;
for (int i = 0; i < direction_vectors.size(); i++)
fft_result_map.insert(std::make_pair(result_fft[i].magnitude, result_fft[i]));
std::vector<FFTResult> fft_result_filtered;
int count = 0;
for (auto it = fft_result_map.rbegin();
it != fft_result_map.rend() && count < max_vectors;
++it, ++count) {
fft_result_filtered.emplace_back(it->second);
}
std::vector<Coord> ret;
// Remove vectors less than 5 deg apart, as most likely these are colinear
const float COS_5_DEG = std::cos(5.0f * M_PI / 180.0f);
// Minimum relative amplitude to accept a shorter vector (fundamental)
// over a longer one (harmonic).
// 0.25 means the fundamental frequency must have at least 25% of the
// intensity of the harmonic. If less, the odd-indexed spots are likely
// systematic absences or noise, and the longer vector is the true cell.
constexpr float MIN_FUNDAMENTAL_PEAK_RATIO = 0.25f;
std::vector<bool> ignore(fft_result_filtered.size(), false);
for (int i = 0; i < fft_result_filtered.size(); i++) {
if (ignore[i])
continue;
Coord dir_i = direction_vectors.at(fft_result_filtered[i].direction);
float len_i = fft_result_filtered[i].length;
int best_idx = i; // Index of the vector we currently plan to keep
for (int j = i + 1; j < fft_result_filtered.size(); j++) {
if (ignore[j]) continue;
Coord dir_j = direction_vectors.at(fft_result_filtered[j].direction);
// If vectors are colinear (angle < 5 deg)
if (std::fabs(dir_i * dir_j) > COS_5_DEG) {
ignore[j] = true;
// CHECK: Is the new candidate (j) shorter than current best (best_idx)?
// We prefer shorter vectors (fundamental periodicity) over longer ones (harmonics)
// BUT only if the shorter vector has "enough" amplitude to be real.
if (fft_result_filtered[j].length < len_i * 0.9f) {
// Compare against 'i' (the strongest in the cluster) to define the noise floor.
// Using 'best_idx' could allow stepping down into noise if we already swapped to a weak peak.
float magnitude_ratio = fft_result_filtered[j].magnitude / fft_result_filtered[i].magnitude;
// Heuristic: If the shorter vector has at least 25% of the amplitude of the
// stronger (longer) vector, assume the shorter one is the true unit cell.
// If it's less than 25%, the shorter peak is likely noise/aliasing,
// and the longer vector is the true primitive cell.
if (magnitude_ratio > MIN_FUNDAMENTAL_PEAK_RATIO) {
dir_i = dir_j;
len_i = fft_result_filtered[j].length;
best_idx = j;
}
}
}
}
Coord best_dir = direction_vectors.at(fft_result_filtered[best_idx].direction);
ret.push_back(best_dir * fft_result_filtered[best_idx].length);
}
// Sort filtered vectors by magnitude
std::sort(ret.begin(), ret.end(), [](const Coord &A, const Coord &B) {
return A.Length() < B.Length();
});
return ret;
}
std::vector<CrystalLattice> FFTIndexer::RunInternal(const std::vector<Coord> &coord, size_t nspots) {
if (nspots > coord.size())
nspots = coord.size();
if (nspots < viable_cell_min_spots)
return {};
assert(nspots <= FFT_MAX_SPOTS);
assert(coord.size() <= FFT_MAX_SPOTS);
ExecuteFFT(coord, nspots);
const auto f = FilterFFTResults();
const auto r = ReduceResults(f);
Eigen::MatrixX3<float> oCell(r.size() * 3u, 3u);
Eigen::VectorX<float> scores(r.size());
for (int i = 0; i < r.size(); i++) {
oCell(i * 3u, 0u) = r[i].Vec0().x;
oCell(i * 3u, 1u) = r[i].Vec0().y;
oCell(i * 3u, 2u) = r[i].Vec0().z;
oCell(i * 3u + 1, 0u) = r[i].Vec1().x;
oCell(i * 3u + 1, 1u) = r[i].Vec1().y;
oCell(i * 3u + 1, 2u) = r[i].Vec1().z;
oCell(i * 3u + 2, 0u) = r[i].Vec2().x;
oCell(i * 3u + 2, 1u) = r[i].Vec2().y;
oCell(i * 3u + 2, 2u) = r[i].Vec2().z;
// Bootstrap score
scores(i) = 0.2;
}
RefineParameters parameters{
.viable_cell_min_spots = viable_cell_min_spots,
.dist_tolerance_vs_reference = dist_tolerance_vs_reference,
.reference_unit_cell = reference_unit_cell,
.min_length_A = min_length_A,
.max_length_A = max_length_A,
.min_angle_deg = min_angle_deg,
.max_angle_deg = max_angle_deg,
.indexing_tolerance = indexing_tolerance
};
return Refine(coord, nspots, oCell, scores, parameters);
}