jfjoch_process: Add French-Wilson (completely written by Opus 4.6...need to understand it properly first :) )

This commit is contained in:
2026-02-08 19:02:48 +01:00
parent 6e28ad3523
commit 69ebbed8fb
6 changed files with 285 additions and 1 deletions
+1 -1
View File
@@ -1,2 +1,2 @@
ADD_LIBRARY(JFJochScaleMerge ScaleAndMerge.cpp ScaleAndMerge.h)
ADD_LIBRARY(JFJochScaleMerge ScaleAndMerge.cpp ScaleAndMerge.h FrenchWilson.cpp FrenchWilson.h)
TARGET_LINK_LIBRARIES(JFJochScaleMerge Ceres::ceres Eigen3::Eigen JFJochCommon)
+190
View File
@@ -0,0 +1,190 @@
// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
// SPDX-License-Identifier: GPL-3.0-only
#include "FrenchWilson.h"
#include "../../common/ResolutionShells.h"
#include <algorithm>
#include <cmath>
#include <limits>
#include <vector>
namespace {
struct PosteriorMoments {
double mean_I; // <I_true>
double mean_F; // <|F|>
};
/// Numerically stable posterior integration via log-shift
PosteriorMoments IntegratePosteriorStable(double I_obs, double sigma_obs, double mean_I_bin,
bool acentric, int npts, double z_max) {
if (mean_I_bin <= 0.0 || !std::isfinite(mean_I_bin))
mean_I_bin = 1.0;
if (sigma_obs <= 0.0 || !std::isfinite(sigma_obs))
sigma_obs = std::max(std::abs(I_obs) * 0.1, 1e-6);
const double inv_2sig2 = 1.0 / (2.0 * sigma_obs * sigma_obs);
const double dz = z_max / static_cast<double>(npts);
// First pass: compute all log_w and find max
std::vector<double> log_w_arr(npts);
double max_log_w = -std::numeric_limits<double>::infinity();
for (int i = 0; i < npts; ++i) {
const double z = (static_cast<double>(i) + 0.5) * dz;
const double I_true = z * mean_I_bin;
const double diff = I_obs - I_true;
double log_prior;
if (acentric) {
log_prior = -z;
} else {
if (z <= 1e-30) {
log_w_arr[i] = -std::numeric_limits<double>::infinity();
continue;
}
log_prior = -0.5 * std::log(2.0 * M_PI * z) - z / 2.0;
}
log_w_arr[i] = log_prior + (-diff * diff * inv_2sig2);
if (log_w_arr[i] > max_log_w)
max_log_w = log_w_arr[i];
}
// Second pass: accumulate with shift
double sum_w = 0.0;
double sum_wI = 0.0;
double sum_wF = 0.0;
for (int i = 0; i < npts; ++i) {
const double z = (static_cast<double>(i) + 0.5) * dz;
const double I_true = z * mean_I_bin;
const double w = std::exp(log_w_arr[i] - max_log_w);
if (!std::isfinite(w)) continue;
sum_w += w;
sum_wI += w * I_true;
sum_wF += w * std::sqrt(I_true);
}
PosteriorMoments m{};
if (sum_w > 0.0) {
m.mean_I = sum_wI / sum_w;
m.mean_F = sum_wF / sum_w;
} else {
const double I_pos = std::max(I_obs, 0.0);
m.mean_I = I_pos;
m.mean_F = std::sqrt(I_pos);
}
return m;
}
} // namespace
std::vector<FrenchWilsonReflection>
FrenchWilson(const std::vector<MergedReflection>& merged,
const FrenchWilsonOptions& opts) {
const size_t n = merged.size();
std::vector<FrenchWilsonReflection> out(n);
if (n == 0)
return out;
// --- Step 1: determine d-range and build ResolutionShells ---
float d_min = std::numeric_limits<float>::max();
float d_max = 0.0f;
for (const auto& r : merged) {
const auto d = static_cast<float>(r.d);
if (!std::isfinite(d) || d <= 0.0f) continue;
if (d < d_min) d_min = d;
if (d > d_max) d_max = d;
}
// Guard: if we couldn't determine a range, fall back to naive sqrt
if (d_min >= d_max || d_min <= 0.0f) {
for (size_t i = 0; i < n; ++i) {
out[i].h = merged[i].h;
out[i].k = merged[i].k;
out[i].l = merged[i].l;
out[i].sigmaI = merged[i].sigma;
const double I_pos = std::max(merged[i].I, 0.0);
out[i].I = I_pos;
out[i].F = std::sqrt(I_pos);
out[i].sigmaF = 0.0;
}
return out;
}
// Slight padding so that reflections exactly at d_min / d_max are included
const float d_min_padded = d_min * 0.999f;
const float d_max_padded = d_max * 1.001f;
const int nshells = std::max(1, opts.num_shells);
ResolutionShells shells(d_min_padded, d_max_padded, nshells);
// --- Step 2: assign each reflection to a shell and compute per-shell <I> ---
std::vector<int32_t> shell_id(n, -1);
std::vector<double> shell_sum_I(nshells, 0.0);
std::vector<int> shell_count(nshells, 0);
for (size_t i = 0; i < n; ++i) {
const auto d = static_cast<float>(merged[i].d);
if (!std::isfinite(d) || d <= 0.0f) continue;
auto s = shells.GetShell(d);
if (!s.has_value()) continue;
shell_id[i] = s.value();
if (std::isfinite(merged[i].I) && std::isfinite(merged[i].sigma)) {
shell_sum_I[s.value()] += merged[i].I;
shell_count[s.value()] += 1;
}
}
std::vector<double> shell_mean_I(nshells, 1.0);
for (int s = 0; s < nshells; ++s) {
if (shell_count[s] >= opts.min_reflections_per_shell)
shell_mean_I[s] = std::max(shell_sum_I[s] / static_cast<double>(shell_count[s]), 1e-10);
}
// --- Step 3: apply French-Wilson to each reflection ---
for (size_t i = 0; i < n; ++i) {
out[i].h = merged[i].h;
out[i].k = merged[i].k;
out[i].l = merged[i].l;
out[i].sigmaI = merged[i].sigma;
const double I_obs = merged[i].I;
const double sigma = merged[i].sigma;
// If no valid shell or bad data, naive fallback
if (shell_id[i] < 0 || !std::isfinite(I_obs) || !std::isfinite(sigma) || sigma <= 0.0) {
const double I_pos = std::max(I_obs, 0.0);
out[i].I = I_pos;
out[i].F = std::sqrt(std::max(I_pos, 0.0));
out[i].sigmaF = 0.0;
continue;
}
const double meanI = shell_mean_I[shell_id[i]];
auto moments = IntegratePosteriorStable(
I_obs, sigma, meanI,
opts.acentric,
opts.num_quadrature_points,
opts.z_max);
out[i].I = moments.mean_I;
out[i].F = moments.mean_F;
// sigma(F) = sqrt( <F²> - <F>² ) = sqrt( <I> - <F>² )
const double var_F = moments.mean_I - moments.mean_F * moments.mean_F;
out[i].sigmaF = (var_F > 0.0) ? std::sqrt(var_F) : 0.0;
}
return out;
}
+45
View File
@@ -0,0 +1,45 @@
// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
// SPDX-License-Identifier: GPL-3.0-only
#pragma once
#include <vector>
#include <cstdint>
#include "ScaleAndMerge.h"
/// Result of the French-Wilson procedure for a single reflection
struct FrenchWilsonReflection {
int h;
int k;
int l;
double F; // <|F|> posterior mean amplitude
double sigmaF; // sigma(|F|) from posterior variance
double I; // <I> posterior mean intensity (always >= 0)
double sigmaI; // original sigma(I) from merging
};
struct FrenchWilsonOptions {
/// Number of resolution shells (bins in 1/d² space)
int num_shells = 20;
/// Minimum number of reflections per shell to compute <I>
int min_reflections_per_shell = 10;
/// Whether the crystal is acentric (true) or centric (false).
bool acentric = true;
/// Number of quadrature points for numerical integration of the posterior
int num_quadrature_points = 200;
/// Upper integration limit in units of the Wilson scale (z_max = I / <I>)
double z_max = 20.0;
};
/// Apply the French-Wilson procedure to merged reflections.
/// Requires that each MergedReflection has a valid d-spacing in its `d` field.
///
/// Returns one FrenchWilsonReflection per input reflection (same order).
std::vector<FrenchWilsonReflection>
FrenchWilson(const std::vector<MergedReflection>& merged,
const FrenchWilsonOptions& opts = {});
@@ -429,6 +429,24 @@ ScaleMergeResult ScaleAndMergeReflectionsCeres(const std::vector<Reflection>& ob
out.merged[h].l = slotToHKL[h].l;
out.merged[h].I = std::exp(log_Itrue[h]);
out.merged[h].sigma = std::numeric_limits<double>::quiet_NaN();
out.merged[h].d = 0.0;
}
// Populate d from median of observations per HKL
{
std::vector<std::vector<double>> per_hkl_d(nhkl);
for (const auto& o : obs) {
const double d_val = static_cast<double>(o.r->d);
if (std::isfinite(d_val) && d_val > 0.0)
per_hkl_d[o.hkl_slot].push_back(d_val);
}
for (int h = 0; h < nhkl; ++h) {
auto& v = per_hkl_d[h];
if (!v.empty()) {
std::nth_element(v.begin(), v.begin() + static_cast<long>(v.size() / 2), v.end());
out.merged[h].d = v[v.size() / 2];
}
}
}
ceres::Covariance::Options cov_options;
@@ -55,6 +55,7 @@ struct MergedReflection {
int l;
double I;
double sigma;
double d = 0.0;
};
struct ScaleMergeResult {
+30
View File
@@ -24,6 +24,7 @@
#include "../image_analysis/IndexAndRefine.h"
#include "../receiver/JFJochReceiverPlots.h"
#include "../compression/JFJochCompressor.h"
#include "../image_analysis/scale_merge/FrenchWilson.h"
void print_usage(Logger &logger) {
logger.Info("Usage ./jfjoch_analysis {<options>} <input.h5>");
@@ -424,6 +425,35 @@ int main(int argc, char **argv) {
logger.Info("Wrote {} image records to {}", scale_result->image_ids.size(), img_path);
}
}
// --- French-Wilson: convert I → F ---
{
// Build d-spacings vector parallel to merged
std::vector<double> d_spacings(scale_result->merged.size());
for (size_t i = 0; i < scale_result->merged.size(); ++i)
d_spacings[i] = scale_result->merged[i].d;
FrenchWilsonOptions fw_opts;
fw_opts.acentric = true; // typical for MX
fw_opts.num_bins = 20;
auto fw = FrenchWilson(scale_result->merged, d_spacings, fw_opts);
const std::string fw_path = output_prefix + "_amplitudes.hkl";
std::ofstream fw_file(fw_path);
if (!fw_file) {
logger.Error("Cannot open {} for writing", fw_path);
} else {
fw_file << "# h k l F sigmaF I_fw sigmaI\n";
for (const auto& r : fw) {
fw_file << r.h << " " << r.k << " " << r.l << " "
<< r.F << " " << r.sigmaF << " "
<< r.I << " " << r.sigmaI << "\n";
}
fw_file.close();
logger.Info("French-Wilson: wrote {} amplitudes to {}", fw.size(), fw_path);
}
}
} else {
logger.Warning("Scaling skipped — too few reflections accumulated (need >= 20)");
logger.Info("Scaling wall-clock time: {:.2f} s", scale_time);