jfjoch_process: Add French-Wilson (completely written by Opus 4.6...need to understand it properly first :) )

2026-02-08 19:02:48 +01:00
parent 6e28ad3523
commit 69ebbed8fb
6 changed files with 285 additions and 1 deletions
@@ -1,2 +1,2 @@
-ADD_LIBRARY(JFJochScaleMerge ScaleAndMerge.cpp ScaleAndMerge.h)
+ADD_LIBRARY(JFJochScaleMerge ScaleAndMerge.cpp ScaleAndMerge.h FrenchWilson.cpp FrenchWilson.h)
 TARGET_LINK_LIBRARIES(JFJochScaleMerge Ceres::ceres Eigen3::Eigen JFJochCommon)
@@ -0,0 +1,190 @@
+// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
+// SPDX-License-Identifier: GPL-3.0-only
+
+
+#include "FrenchWilson.h"
+#include "../../common/ResolutionShells.h"
+
+#include <algorithm>
+#include <cmath>
+#include <limits>
+#include <vector>
+
+namespace {
+
+struct PosteriorMoments {
+    double mean_I;    // <I_true>
+    double mean_F;    // <|F|>
+};
+
+/// Numerically stable posterior integration via log-shift
+PosteriorMoments IntegratePosteriorStable(double I_obs, double sigma_obs, double mean_I_bin,
+                                           bool acentric, int npts, double z_max) {
+    if (mean_I_bin <= 0.0 || !std::isfinite(mean_I_bin))
+        mean_I_bin = 1.0;
+    if (sigma_obs <= 0.0 || !std::isfinite(sigma_obs))
+        sigma_obs = std::max(std::abs(I_obs) * 0.1, 1e-6);
+
+    const double inv_2sig2 = 1.0 / (2.0 * sigma_obs * sigma_obs);
+    const double dz = z_max / static_cast<double>(npts);
+
+    // First pass: compute all log_w and find max
+    std::vector<double> log_w_arr(npts);
+    double max_log_w = -std::numeric_limits<double>::infinity();
+
+    for (int i = 0; i < npts; ++i) {
+        const double z = (static_cast<double>(i) + 0.5) * dz;
+        const double I_true = z * mean_I_bin;
+        const double diff = I_obs - I_true;
+
+        double log_prior;
+        if (acentric) {
+            log_prior = -z;
+        } else {
+            if (z <= 1e-30) {
+                log_w_arr[i] = -std::numeric_limits<double>::infinity();
+                continue;
+            }
+            log_prior = -0.5 * std::log(2.0 * M_PI * z) - z / 2.0;
+        }
+
+        log_w_arr[i] = log_prior + (-diff * diff * inv_2sig2);
+        if (log_w_arr[i] > max_log_w)
+            max_log_w = log_w_arr[i];
+    }
+
+    // Second pass: accumulate with shift
+    double sum_w = 0.0;
+    double sum_wI = 0.0;
+    double sum_wF = 0.0;
+
+    for (int i = 0; i < npts; ++i) {
+        const double z = (static_cast<double>(i) + 0.5) * dz;
+        const double I_true = z * mean_I_bin;
+        const double w = std::exp(log_w_arr[i] - max_log_w);
+        if (!std::isfinite(w)) continue;
+
+        sum_w  += w;
+        sum_wI += w * I_true;
+        sum_wF += w * std::sqrt(I_true);
+    }
+
+    PosteriorMoments m{};
+    if (sum_w > 0.0) {
+        m.mean_I = sum_wI / sum_w;
+        m.mean_F = sum_wF / sum_w;
+    } else {
+        const double I_pos = std::max(I_obs, 0.0);
+        m.mean_I = I_pos;
+        m.mean_F = std::sqrt(I_pos);
+    }
+
+    return m;
+}
+
+} // namespace
+
+std::vector<FrenchWilsonReflection>
+FrenchWilson(const std::vector<MergedReflection>& merged,
+             const FrenchWilsonOptions& opts) {
+
+    const size_t n = merged.size();
+    std::vector<FrenchWilsonReflection> out(n);
+
+    if (n == 0)
+        return out;
+
+    // --- Step 1: determine d-range and build ResolutionShells ---
+    float d_min = std::numeric_limits<float>::max();
+    float d_max = 0.0f;
+
+    for (const auto& r : merged) {
+        const auto d = static_cast<float>(r.d);
+        if (!std::isfinite(d) || d <= 0.0f) continue;
+        if (d < d_min) d_min = d;
+        if (d > d_max) d_max = d;
+    }
+
+    // Guard: if we couldn't determine a range, fall back to naive sqrt
+    if (d_min >= d_max || d_min <= 0.0f) {
+        for (size_t i = 0; i < n; ++i) {
+            out[i].h = merged[i].h;
+            out[i].k = merged[i].k;
+            out[i].l = merged[i].l;
+            out[i].sigmaI = merged[i].sigma;
+            const double I_pos = std::max(merged[i].I, 0.0);
+            out[i].I = I_pos;
+            out[i].F = std::sqrt(I_pos);
+            out[i].sigmaF = 0.0;
+        }
+        return out;
+    }
+
+    // Slight padding so that reflections exactly at d_min / d_max are included
+    const float d_min_padded = d_min * 0.999f;
+    const float d_max_padded = d_max * 1.001f;
+    const int nshells = std::max(1, opts.num_shells);
+
+    ResolutionShells shells(d_min_padded, d_max_padded, nshells);
+
+    // --- Step 2: assign each reflection to a shell and compute per-shell <I> ---
+    std::vector<int32_t> shell_id(n, -1);
+    std::vector<double> shell_sum_I(nshells, 0.0);
+    std::vector<int>    shell_count(nshells, 0);
+
+    for (size_t i = 0; i < n; ++i) {
+        const auto d = static_cast<float>(merged[i].d);
+        if (!std::isfinite(d) || d <= 0.0f) continue;
+        auto s = shells.GetShell(d);
+        if (!s.has_value()) continue;
+        shell_id[i] = s.value();
+
+        if (std::isfinite(merged[i].I) && std::isfinite(merged[i].sigma)) {
+            shell_sum_I[s.value()] += merged[i].I;
+            shell_count[s.value()] += 1;
+        }
+    }
+
+    std::vector<double> shell_mean_I(nshells, 1.0);
+    for (int s = 0; s < nshells; ++s) {
+        if (shell_count[s] >= opts.min_reflections_per_shell)
+            shell_mean_I[s] = std::max(shell_sum_I[s] / static_cast<double>(shell_count[s]), 1e-10);
+    }
+
+    // --- Step 3: apply French-Wilson to each reflection ---
+    for (size_t i = 0; i < n; ++i) {
+        out[i].h = merged[i].h;
+        out[i].k = merged[i].k;
+        out[i].l = merged[i].l;
+        out[i].sigmaI = merged[i].sigma;
+
+        const double I_obs = merged[i].I;
+        const double sigma = merged[i].sigma;
+
+        // If no valid shell or bad data, naive fallback
+        if (shell_id[i] < 0 || !std::isfinite(I_obs) || !std::isfinite(sigma) || sigma <= 0.0) {
+            const double I_pos = std::max(I_obs, 0.0);
+            out[i].I = I_pos;
+            out[i].F = std::sqrt(std::max(I_pos, 0.0));
+            out[i].sigmaF = 0.0;
+            continue;
+        }
+
+        const double meanI = shell_mean_I[shell_id[i]];
+
+        auto moments = IntegratePosteriorStable(
+            I_obs, sigma, meanI,
+            opts.acentric,
+            opts.num_quadrature_points,
+            opts.z_max);
+
+        out[i].I = moments.mean_I;
+        out[i].F = moments.mean_F;
+
+        // sigma(F) = sqrt( <F²> - <F>² ) = sqrt( <I> - <F>² )
+        const double var_F = moments.mean_I - moments.mean_F * moments.mean_F;
+        out[i].sigmaF = (var_F > 0.0) ? std::sqrt(var_F) : 0.0;
+    }
+
+    return out;
+}
@@ -0,0 +1,45 @@
+// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
+// SPDX-License-Identifier: GPL-3.0-only
+
+#pragma once
+
+#include <vector>
+#include <cstdint>
+
+#include "ScaleAndMerge.h"
+
+/// Result of the French-Wilson procedure for a single reflection
+struct FrenchWilsonReflection {
+    int h;
+    int k;
+    int l;
+    double F;          // <|F|>   posterior mean amplitude
+    double sigmaF;     // sigma(|F|) from posterior variance
+    double I;          // <I>     posterior mean intensity (always >= 0)
+    double sigmaI;     // original sigma(I) from merging
+};
+
+struct FrenchWilsonOptions {
+    /// Number of resolution shells (bins in 1/d² space)
+    int num_shells = 20;
+
+    /// Minimum number of reflections per shell to compute <I>
+    int min_reflections_per_shell = 10;
+
+    /// Whether the crystal is acentric (true) or centric (false).
+    bool acentric = true;
+
+    /// Number of quadrature points for numerical integration of the posterior
+    int num_quadrature_points = 200;
+
+    /// Upper integration limit in units of the Wilson scale (z_max = I / <I>)
+    double z_max = 20.0;
+};
+
+/// Apply the French-Wilson procedure to merged reflections.
+/// Requires that each MergedReflection has a valid d-spacing in its `d` field.
+///
+/// Returns one FrenchWilsonReflection per input reflection (same order).
+std::vector<FrenchWilsonReflection>
+FrenchWilson(const std::vector<MergedReflection>& merged,
+             const FrenchWilsonOptions& opts = {});
@@ -429,6 +429,24 @@ ScaleMergeResult ScaleAndMergeReflectionsCeres(const std::vector<Reflection>& ob
        out.merged[h].l = slotToHKL[h].l;
        out.merged[h].I = std::exp(log_Itrue[h]);
        out.merged[h].sigma = std::numeric_limits<double>::quiet_NaN();
+        out.merged[h].d = 0.0;
+    }
+
+    // Populate d from median of observations per HKL
+    {
+        std::vector<std::vector<double>> per_hkl_d(nhkl);
+        for (const auto& o : obs) {
+            const double d_val = static_cast<double>(o.r->d);
+            if (std::isfinite(d_val) && d_val > 0.0)
+                per_hkl_d[o.hkl_slot].push_back(d_val);
+        }
+        for (int h = 0; h < nhkl; ++h) {
+            auto& v = per_hkl_d[h];
+            if (!v.empty()) {
+                std::nth_element(v.begin(), v.begin() + static_cast<long>(v.size() / 2), v.end());
+                out.merged[h].d = v[v.size() / 2];
+            }
+        }
    }

    ceres::Covariance::Options cov_options;
@@ -55,6 +55,7 @@ struct MergedReflection {
    int l;
    double I;
    double sigma;
+    double d = 0.0;
 };

 struct ScaleMergeResult {
@@ -24,6 +24,7 @@
 #include "../image_analysis/IndexAndRefine.h"
 #include "../receiver/JFJochReceiverPlots.h"
 #include "../compression/JFJochCompressor.h"
+#include "../image_analysis/scale_merge/FrenchWilson.h"

 void print_usage(Logger &logger) {
    logger.Info("Usage ./jfjoch_analysis {<options>} <input.h5>");
@@ -424,6 +425,35 @@ int main(int argc, char **argv) {
                    logger.Info("Wrote {} image records to {}", scale_result->image_ids.size(), img_path);
                }
            }
+
+            // --- French-Wilson: convert I → F ---
+            {
+                // Build d-spacings vector parallel to merged
+                std::vector<double> d_spacings(scale_result->merged.size());
+                for (size_t i = 0; i < scale_result->merged.size(); ++i)
+                    d_spacings[i] = scale_result->merged[i].d;
+
+                FrenchWilsonOptions fw_opts;
+                fw_opts.acentric = true;   // typical for MX
+                fw_opts.num_bins = 20;
+
+                auto fw = FrenchWilson(scale_result->merged, d_spacings, fw_opts);
+
+                const std::string fw_path = output_prefix + "_amplitudes.hkl";
+                std::ofstream fw_file(fw_path);
+                if (!fw_file) {
+                    logger.Error("Cannot open {} for writing", fw_path);
+                } else {
+                    fw_file << "# h  k  l  F  sigmaF  I_fw  sigmaI\n";
+                    for (const auto& r : fw) {
+                        fw_file << r.h << " " << r.k << " " << r.l << " "
+                                << r.F << " " << r.sigmaF << " "
+                                << r.I << " " << r.sigmaI << "\n";
+                    }
+                    fw_file.close();
+                    logger.Info("French-Wilson: wrote {} amplitudes to {}", fw.size(), fw_path);
+                }
+            }
        } else {
            logger.Warning("Scaling skipped — too few reflections accumulated (need >= 20)");
            logger.Info("Scaling wall-clock time: {:.2f} s", scale_time);