Jungfraujoch/image_analysis/indexing/FFTIndexer.cpp

// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
// SPDX-License-Identifier: GPL-3.0-only

#include "FFTIndexer.h"
#include <Eigen/Eigen>
#include "PostIndexingRefinement.h"

FFTIndexer::FFTIndexer(const IndexingSettings &settings)
    : max_length_A(settings.GetFFT_MaxUnitCell_A()),
      min_length_A(settings.GetFFT_MinUnitCell_A()),
      min_angle_deg(settings.GetFFT_MinAngle_deg()),
      max_angle_deg(settings.GetFFT_MaxAngle_deg()),
      nDirections(settings.GetFFT_NumVectors()),
      result_fft(nDirections) {

    // Reciprocal-magnitude histogram in one_over_d = 1/d units (the internal convention -
    // the spot coordinates and histogram_spacing are 1/d too; the resolution limit converts
    // as 1/HighRes). NB in this code Q always means the powder 2*pi/d, so 1/d is named
    // one_over_d, never q. The histogram covers the data range [0, one_over_d_max] and is
    // zero-padded by OVERSAMPLING for sub-bin peak localisation (finer cell lengths than the
    // raw bin width gives). len_coeff (= 2*max_length/histogram_size) cancels the factor, so
    // recovered lengths are independent of it. The padding factor is 2*pi: a historical value
    // from when the extent was mistakenly written as 2*pi/d (the Q convention); it is kept
    // because the exact amount sets which marginal frames index - rounding it to a nearby
    // integer shifts the indexing rate ~0.5-1% (validated on the lyso datasets).
    const float oversampling = 2.0f * static_cast<float>(M_PI);
    const float one_over_d_max = 1.0f / settings.GetFFT_HighResolution_A();

    histogram_spacing = 1.0f / (2.0f * max_length_A);
    histogram_size = std::ceil(oversampling * one_over_d_max / histogram_spacing);

    input_size = histogram_size * nDirections;
    output_size = (histogram_size / 2 + 1) * nDirections;

    if (max_length_A <= settings.GetFFT_HighResolution_A())
        throw std::invalid_argument("Largest unit cell cannot be smaller than resolution");
    if (nDirections <= 1)
        throw std::invalid_argument("FFTWIndexer: number of directions must be > 1");
    if (!(max_length_A > 0.f))
        throw std::invalid_argument("FFTWIndexer: max_length_A must be > 0");
    if (!(settings.GetFFT_HighResolution_A() > 0.f))
        throw std::invalid_argument("FFTWIndexer: high resolution must be > 0");
    if (histogram_size < 1)
        throw std::invalid_argument("FFTWIndexer: histogram_size must be >= 1");
    if (histogram_size > 1000000)
        throw std::invalid_argument("FFTWIndexer: histogram_size too large");

    SetupDirectionVectors();
}

void FFTIndexer::SetupDirectionVectors() {
    direction_vectors.reserve(static_cast<size_t>(nDirections));

    const double phi = (1.0 + std::sqrt(5.0)) / 2.0; // Golden ratio
    const double golden_angle = 2.0 * M_PI / phi;

    for (int i = 0; i < nDirections; i++) {
        // Half-sphere distribution (z in [0,1])
        double z = static_cast<double>(i) / static_cast<double>(nDirections - 1);
        double theta = golden_angle * static_cast<double>(i);
        double radius = std::sqrt(std::max(0.0, 1.0 - z * z));

        double x = radius * std::cos(theta);
        double y = radius * std::sin(theta);

        // Add unit vector to the list
        direction_vectors.emplace_back(static_cast<float>(x),
                                       static_cast<float>(y),
                                       static_cast<float>(z));
    }
}


void FFTIndexer::SetupUnitCell(const std::optional<UnitCell> &cell) {
    reference_unit_cell = cell;
}


void Sort(Coord &A, Coord &B, Coord &C) {
    // A is the smallest, C is the largest

    if (A.Length() > B.Length())
        std::swap(A, B);

    if (B.Length() > C.Length())
        std::swap(B, C);

    if (A.Length() > B.Length())
        std::swap(A, B);
}

std::vector<CrystalLattice> FFTIndexer::ReduceResults(const std::vector<Coord> &results) const {
    if (results.size() < 3)
        return {};

    std::vector<CrystalLattice> candidates;

    for (int i = 0; i < 3; i++) {
        for (int j = 0; j < 3; j++) {
            for (int k = 0; k < 3; k++) {
                if (i + j + k + 2 >= results.size())
                    break;
                Coord A = results[i];
                Coord B = results[(i + j + 1)];
                Coord C = results[(i + j + 1) + k + 1];

                // sort vectors by length for reduction
                Sort(A,B,C);

                CrystalLattice raw(A, B, C);
                CrystalLattice reduced = raw.NiggliReduce(); // Reduce cell
                const auto uc = reduced.GetUnitCell();

                if (uc.a < min_length_A || uc.b < min_length_A || uc.c < min_length_A)
                    continue;

                float alpha = uc.alpha, beta = uc.beta, gamma = uc.gamma;
                if (alpha < min_angle_deg || alpha > max_angle_deg ||
                    beta  < min_angle_deg || beta  > max_angle_deg ||
                    gamma < min_angle_deg || gamma > max_angle_deg)
                    continue;

                candidates.emplace_back(std::move(reduced));
            }
        }
    }

    return candidates;
}

std::vector<Coord> FFTIndexer::FilterFFTResults() const {
    std::multimap<float, FFTResult> fft_result_map;

    // Order vectors by max FFT magnitude and select the 30 strongest ones
    size_t max_vectors = 30;

    for (int i = 0; i < direction_vectors.size(); i++)
        fft_result_map.insert(std::make_pair(result_fft[i].magnitude, result_fft[i]));

    std::vector<FFTResult> fft_result_filtered;
    int count = 0;
    for (auto it = fft_result_map.rbegin();
         it != fft_result_map.rend() && count < max_vectors;
         ++it, ++count) {
        fft_result_filtered.emplace_back(it->second);
    }

    std::vector<Coord> ret;

    // Remove vectors less than 5 deg apart, as most likely these are colinear
    const float COS_5_DEG = std::cos(5.0f * M_PI / 180.0f);

    // Minimum relative amplitude to accept a shorter vector (fundamental)
    // over a longer one (harmonic).
    // 0.25 means the fundamental frequency must have at least 25% of the
    // intensity of the harmonic. If less, the odd-indexed spots are likely
    // systematic absences or noise, and the longer vector is the true cell.
    constexpr float MIN_FUNDAMENTAL_PEAK_RATIO = 0.25f;

    std::vector<bool> ignore(fft_result_filtered.size(), false);

    for (int i = 0; i < fft_result_filtered.size(); i++) {
        if (ignore[i])
            continue;

        Coord dir_i = direction_vectors.at(fft_result_filtered[i].direction);
        float len_i = fft_result_filtered[i].length;
        int best_idx = i; // Index of the vector we currently plan to keep

        for (int j = i + 1; j < fft_result_filtered.size(); j++) {
            if (ignore[j]) continue;

            Coord dir_j = direction_vectors.at(fft_result_filtered[j].direction);

            // If vectors are colinear (angle < 5 deg)
            if (std::fabs(dir_i * dir_j) > COS_5_DEG) {
                ignore[j] = true;

                // CHECK: Is the new candidate (j) shorter than current best (best_idx)?
                // We prefer shorter vectors (fundamental periodicity) over longer ones (harmonics)
                // BUT only if the shorter vector has "enough" amplitude to be real.

                if (fft_result_filtered[j].length < len_i * 0.9f) {
                    // Compare against 'i' (the strongest in the cluster) to define the noise floor.
                    // Using 'best_idx' could allow stepping down into noise if we already swapped to a weak peak.
                    float magnitude_ratio = fft_result_filtered[j].magnitude / fft_result_filtered[i].magnitude;

                    // Heuristic: If the shorter vector has at least 25% of the amplitude of the
                    // stronger (longer) vector, assume the shorter one is the true unit cell.
                    // If it's less than 25%, the shorter peak is likely noise/aliasing,
                    // and the longer vector is the true primitive cell.
                    if (magnitude_ratio > MIN_FUNDAMENTAL_PEAK_RATIO) {
                        dir_i = dir_j;
                        len_i = fft_result_filtered[j].length;
                        best_idx = j;
                    }
                }
            }
        }
        Coord best_dir = direction_vectors.at(fft_result_filtered[best_idx].direction);
        ret.push_back(best_dir * fft_result_filtered[best_idx].length);
    }

    // Sort filtered vectors by magnitude
    std::sort(ret.begin(), ret.end(), [](const Coord &A, const Coord &B) {
       return A.Length() < B.Length();
    });

    return ret;
}


std::vector<CrystalLattice> FFTIndexer::RunInternal(const std::vector<Coord> &coord, size_t nspots) {
    if (nspots > coord.size())
        nspots = coord.size();

    if (nspots < viable_cell_min_spots)
        return {};

    assert(nspots <= FFT_MAX_SPOTS);
    assert(coord.size() <= FFT_MAX_SPOTS);

    ExecuteFFT(coord, nspots);
    const auto f = FilterFFTResults();
    const auto r = ReduceResults(f);

    Eigen::MatrixX3<float> oCell(r.size() * 3u, 3u);
    Eigen::VectorX<float> scores(r.size());

    for (int i = 0; i < r.size(); i++) {
        oCell(i * 3u, 0u) = r[i].Vec0().x;
        oCell(i * 3u, 1u) = r[i].Vec0().y;
        oCell(i * 3u, 2u) = r[i].Vec0().z;

        oCell(i * 3u + 1, 0u) = r[i].Vec1().x;
        oCell(i * 3u + 1, 1u) = r[i].Vec1().y;
        oCell(i * 3u + 1, 2u) = r[i].Vec1().z;

        oCell(i * 3u + 2, 0u) = r[i].Vec2().x;
        oCell(i * 3u + 2, 1u) = r[i].Vec2().y;
        oCell(i * 3u + 2, 2u) = r[i].Vec2().z;

        // Bootstrap score
        scores(i) = 0.2;
    }

    RefineParameters parameters{
        .viable_cell_min_spots = viable_cell_min_spots,
        .dist_tolerance_vs_reference = dist_tolerance_vs_reference,
        .reference_unit_cell = reference_unit_cell,
        .min_length_A = min_length_A,
        .max_length_A = max_length_A,
        .min_angle_deg = min_angle_deg,
        .max_angle_deg = max_angle_deg,
        .indexing_tolerance = indexing_tolerance
    };

    return Refine(coord, nspots, oCell, scores, parameters);
}