diff --git a/image_analysis/scale_merge/Merge.cpp b/image_analysis/scale_merge/Merge.cpp index d604e092..5661d66e 100644 --- a/image_analysis/scale_merge/Merge.cpp +++ b/image_analysis/scale_merge/Merge.cpp @@ -260,6 +260,108 @@ void MergeOnTheFly::RefineErrorModel(const std::vector &outc error_model_active = true; } +// Per-crystal CC1/2-delta rejection (CrystFEL deltaCChalf style). Each image is assigned +// to one CC1/2 half, so removing an image only perturbs that half's per-reflection means. +// deltaCChalf_i = CC1/2(all) - CC1/2(without image i): a NEGATIVE value means removing the +// image RAISES CC1/2, i.e. it is inconsistent with the consensus. We flag images whose +// deltaCChalf is a low-side statistical outlier (< mean - nsigma*stddev). Reference-free. +// Two passes over the (retained) outcomes; per-image contributions are re-derived, not +// stored, so memory stays O(unique reflections + images) for full 200k-frame datasets. +std::vector MergeOnTheFly::DeltaCChalfReject(const std::vector &outcomes, + double nsigma) const { + struct Acc { double swI[2] = {0, 0}; double sw[2] = {0, 0}; size_t n[2] = {0, 0}; }; + std::unordered_map acc; + std::vector img_half(outcomes.size(), 0); + + std::mt19937 lrng{2026061600u}; + std::bernoulli_distribution hd{0.5}; + + // ---- pass 1: accumulate half-set sums, record each image's half ---- + auto contribution = [&](const Reflection &r, uint64_t &key, double &wI, double &w) -> bool { + if (generator.IsSystematicallyAbsent(r)) return false; + if (r.image_scale_corr <= 0.0 || !std::isfinite(r.image_scale_corr)) return false; + if (!AcceptReflection(r, high_resolution_limit)) return false; + if (r.partiality < min_partiality) return false; + const double I = static_cast(r.I) * r.image_scale_corr; + const double s = static_cast(r.sigma) * r.image_scale_corr; + if (!std::isfinite(I) || !std::isfinite(s) || s <= 0.0) return false; + w = 1.0 / (s * s); + wI = w * I; + key = generator(r).pack(); + return true; + }; + + for (size_t i = 0; i < outcomes.size(); ++i) { + const int half = hd(lrng) ? 1 : 0; + img_half[i] = half; + for (const auto &r : outcomes[i].reflections) { + uint64_t key; double wI, w; + if (!contribution(r, key, wI, w)) continue; + auto &a = acc[key]; + a.swI[half] += wI; a.sw[half] += w; a.n[half]++; + } + } + + // ---- baseline Pearson over reflections present in BOTH halves (x=half0, y=half1) ---- + auto pearson = [](double N, double Sx, double Sy, double Sxx, double Syy, double Sxy) -> double { + const double cov = N * Sxy - Sx * Sy; + const double vx = N * Sxx - Sx * Sx, vy = N * Syy - Sy * Sy; + const double den = std::sqrt(vx * vy); + return den > 0.0 ? cov / den : 0.0; + }; + double N = 0, Sx = 0, Sy = 0, Sxx = 0, Syy = 0, Sxy = 0; + for (const auto &kv : acc) { + const auto &a = kv.second; + if (a.n[0] == 0 || a.n[1] == 0) continue; + const double x = a.swI[0] / a.sw[0], y = a.swI[1] / a.sw[1]; + N += 1; Sx += x; Sy += y; Sxx += x * x; Syy += y * y; Sxy += x * y; + } + const double cc_base = pearson(N, Sx, Sy, Sxx, Syy, Sxy); + + // ---- pass 2: leave-one-out deltaCChalf per image ---- + std::vector delta(outcomes.size(), 0.0); + for (size_t i = 0; i < outcomes.size(); ++i) { + const int h = img_half[i]; + // aggregate this image's contributions per reflection key (an image may, rarely, + // touch the same ASU reflection twice) + std::unordered_map> mine; // key -> (sum wI, sum w), count via .first + std::unordered_map mine_n; + for (const auto &r : outcomes[i].reflections) { + uint64_t key; double wI, w; + if (!contribution(r, key, wI, w)) continue; + auto &p = mine[key]; p.first += wI; p.second += w; mine_n[key]++; + } + double n = N, sx = Sx, sy = Sy, sxx = Sxx, syy = Syy, sxy = Sxy; + for (const auto &m : mine) { + const auto &a = acc.at(m.first); + if (a.n[0] == 0 || a.n[1] == 0) continue; // reflection not in CC1/2 + const double x0 = a.swI[0] / a.sw[0], y0 = a.swI[1] / a.sw[1]; + n -= 1; sx -= x0; sy -= y0; sxx -= x0 * x0; syy -= y0 * y0; sxy -= x0 * y0; + const double swI_h = a.swI[h] - m.second.first; + const double sw_h = a.sw[h] - m.second.second; + if (a.n[h] - mine_n[m.first] == 0 || sw_h <= 0.0) continue; // reflection drops half h + const double mean_h = swI_h / sw_h; + const double xnew = (h == 0) ? mean_h : x0; + const double ynew = (h == 1) ? mean_h : y0; + n += 1; sx += xnew; sy += ynew; sxx += xnew * xnew; syy += ynew * ynew; sxy += xnew * ynew; + } + delta[i] = cc_base - pearson(n, sx, sy, sxx, syy, sxy); + } + + // ---- reject low-side outliers: delta < mean - nsigma*stddev ---- + double dm = 0, dv = 0; + for (double d : delta) dm += d; + dm /= std::max(1, delta.size()); + for (double d : delta) dv += (d - dm) * (d - dm); + const double dstd = std::sqrt(dv / std::max(1, delta.size())); + const double cut = dm - nsigma * dstd; + + std::vector reject(outcomes.size(), 0); + for (size_t i = 0; i < outcomes.size(); ++i) + reject[i] = (outcomes[i].reflections.empty() ? 0 : (delta[i] < cut ? 1 : 0)); + return reject; +} + bool MergeOnTheFly::Mask(const IntegrationOutcome &outcome, bool cc_mask) { if (reference_cell) { auto cell = outcome.latt.GetUnitCell(); diff --git a/image_analysis/scale_merge/Merge.h b/image_analysis/scale_merge/Merge.h index ab9877f4..458d9dab 100644 --- a/image_analysis/scale_merge/Merge.h +++ b/image_analysis/scale_merge/Merge.h @@ -114,6 +114,12 @@ public: void AddImage(const IntegrationOutcome& outcome, bool cc_mask = false); + // Per-crystal CC1/2-delta rejection (CrystFEL deltaCChalf): returns a per-image flag + // marking images whose removal would raise CC1/2 by a low-side outlier amount + // (deltaCChalf < mean - nsigma*stddev). Skip the flagged images when merging. + [[nodiscard]] std::vector DeltaCChalfReject(const std::vector &outcomes, + double nsigma) const; + MergeStatistics MergeStats(const std::vector &merged, const std::vector &reflections, const std::vector &reference = {}); diff --git a/tools/jfjoch_process.cpp b/tools/jfjoch_process.cpp index 7748ccdd..acadd7ba 100644 --- a/tools/jfjoch_process.cpp +++ b/tools/jfjoch_process.cpp @@ -73,6 +73,7 @@ void print_usage() { std::cout << " --scaling-high-resolution High resolution limit for spot finding (default: no limit)" << std::endl; std::cout << " --min-partiality Minimum partiality to accept reflection (default: 0.02)" << std::endl; std::cout << " --reject-outliers Per-observation merge outlier rejection, N sigma from the per-reflection median (default: off; e.g. 6, XDS/DIALS-style)" << std::endl; + std::cout << " --reject-delta-cchalf Per-crystal CC1/2-delta rejection: drop images with deltaCChalf below mean - N*stddev (default: off; e.g. 2.5)" << std::endl; std::cout << " --min-image-cc Per-image CC limit in percent (default: no limit)" << std::endl; std::cout << " --scaling-iterations Number of scaling iterations with no reference data (default: 3)" << std::endl; std::cout << " --scaling-output Output format for scaling results mtz|cif|txt (default: mtz)" << std::endl; @@ -101,7 +102,8 @@ enum { OPT_BANDWIDTH, OPT_INTEGRATION_RADIUS, OPT_REJECT_OUTLIERS, - OPT_PROFILE_MULTIPLIER + OPT_PROFILE_MULTIPLIER, + OPT_REJECT_DELTA_CCHALF }; static option long_options[] = { @@ -140,6 +142,7 @@ static option long_options[] = { {"bandwidth", required_argument, nullptr, OPT_BANDWIDTH}, {"integration-radius", required_argument, nullptr, OPT_INTEGRATION_RADIUS}, {"reject-outliers", required_argument, nullptr, OPT_REJECT_OUTLIERS}, + {"reject-delta-cchalf", required_argument, nullptr, OPT_REJECT_DELTA_CCHALF}, {"profile-multiplier", required_argument, nullptr, OPT_PROFILE_MULTIPLIER}, {nullptr, 0, nullptr, 0} }; @@ -323,6 +326,7 @@ int main(int argc, char **argv) { std::optional d_min_scale_merge; std::optional integration_radius_arg; // "r1" or "r1,r2,r3" std::optional outlier_reject_nsigma; // merge per-observation outlier rejection + std::optional delta_cchalf_nsigma; // per-crystal CC1/2-delta rejection std::optional profile_multiplier; // PixelRefine Term-2 profile-width multiplier if (argc == 1) { @@ -519,6 +523,9 @@ int main(int argc, char **argv) { case OPT_REJECT_OUTLIERS: outlier_reject_nsigma = std::stod(optarg); break; + case OPT_REJECT_DELTA_CCHALF: + delta_cchalf_nsigma = std::stod(optarg); + break; case OPT_PROFILE_MULTIPLIER: profile_multiplier = std::stof(optarg); break; @@ -1057,8 +1064,19 @@ int main(int argc, char **argv) { const double isa = (b > 0.0) ? 1.0 / b : std::numeric_limits::infinity(); logger.Info("Error model: sigma'^2 = {:.3f} sigma^2 + ({:.4f} I)^2 ISa = {:.1f}", a, b, isa); } - for (auto &i : indexer.GetIntegrationOutcome()) - merge_engine.AddImage(i); + const auto &merge_outcomes = indexer.GetIntegrationOutcome(); + std::vector dcch_reject; + if (delta_cchalf_nsigma) { + dcch_reject = merge_engine.DeltaCChalfReject(merge_outcomes, *delta_cchalf_nsigma); + const size_t nrej = std::count(dcch_reject.begin(), dcch_reject.end(), static_cast(1)); + logger.Info("CC1/2-delta rejection (deltaCChalf < mean - {:.1f} stddev) removed {} / {} images", + *delta_cchalf_nsigma, nrej, dcch_reject.size()); + } + for (size_t i = 0; i < merge_outcomes.size(); ++i) { + if (!dcch_reject.empty() && dcch_reject[i]) + continue; + merge_engine.AddImage(merge_outcomes[i]); + } if (merge_engine.RejectedCount() > 0) logger.Info("Outlier rejection (>{:.1f} sigma from the per-reflection median) removed {} observations", experiment.GetScalingSettings().GetOutlierRejectNsigma(), merge_engine.RejectedCount());