Remove the global (joint) scaling path

Profiling showed the joint Ceres solve took ~120 s versus ~3.5 s for the alternating per-image scaling loop (~35x) for no quality gain (HEWL anomalous 0.54x vs 0.53x), so it is not worth keeping. Drop GlobalScale.{h,cpp}, the jfjoch_process --global-scale flag, and ScalingSettings::GlobalScaling. While here, in the same scaling/process area: fold scale_fulls into ScalingSettings (alongside combine_3d) so the CLI and experiment carry it uniformly, add per-substep [timing] logging to the scaling/merge post-pass (including the serial MergeAll vs parallel ScaleAllImages split), and carry structured MergeStatistics + ISa in ProcessResult. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-25 14:22:10 +02:00
parent a37613690e
commit c84d581d3e
8 changed files with 71 additions and 641 deletions
@@ -133,6 +133,15 @@ bool ScalingSettings::GetCombine3D() const {
    return combine_3d;
 }

+ScalingSettings &ScalingSettings::ScaleFulls(bool input) {
+    scale_fulls = input;
+    return *this;
+}
+
+bool ScalingSettings::GetScaleFulls() const {
+    return scale_fulls;
+}
+
 double ScalingSettings::GetMinPartiality() const {
    return min_partiality;
 }
@@ -30,6 +30,10 @@ class ScalingSettings {
    // model, which stays Rotation - see Combine3D).
    bool combine_3d = false;

+    // Scale fulls: after the rot3d combine, refit a per-frame scale on the combined fulls (XDS
+    // order). A no-op without rot3d.
+    bool scale_fulls = false;
+
    double rfree_fraction = 0.05;
    IntensityFormat intensity_format = IntensityFormat::MTZ;

@@ -45,6 +49,7 @@ public:
    ScalingSettings& MinCCForImage(double min_cc_for_image);
    ScalingSettings& OutlierRejectNsigma(double input);
    ScalingSettings& Combine3D(bool input);
+    ScalingSettings& ScaleFulls(bool input);

    ScalingSettings& RfreeFraction(double input);
    ScalingSettings& FileFormat(IntensityFormat input);
@@ -73,6 +78,7 @@ public:
    [[nodiscard]] double GetMinCCForImage() const;
    [[nodiscard]] double GetOutlierRejectNsigma() const;
    [[nodiscard]] bool GetCombine3D() const;
+    [[nodiscard]] bool GetScaleFulls() const;

    [[nodiscard]] double GetRfreeFraction() const;
    [[nodiscard]] IntensityFormat GetFileFormat() const;
@@ -5,8 +5,6 @@ ADD_LIBRARY(JFJochScaleMerge
        Merge.h
        ScaleOnTheFly.cpp
        ScaleOnTheFly.h
-        GlobalScale.cpp
-        GlobalScale.h
        Combine3D.cpp
        Combine3D.h
        HKLKey.cpp
@@ -1,489 +0,0 @@
-// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
-// SPDX-License-Identifier: GPL-3.0-only
-
-#include "GlobalScale.h"
-
-#include <ceres/ceres.h>
-
-#include <thread>
-#include <iostream>
-#include <algorithm>
-#include <cmath>
-#include <limits>
-#include <stdexcept>
-#include <tuple>
-#include <unordered_map>
-#include <vector>
-
-namespace {
-    struct HKLKey {
-        int64_t h = 0;
-        int64_t k = 0;
-        int64_t l = 0;
-        bool is_positive = true; // only relevant if opt.merge_friedel == false
-
-        bool operator==(const HKLKey &o) const noexcept {
-            return h == o.h && k == o.k && l == o.l && is_positive == o.is_positive;
-        }
-    };
-
-    struct HKLKeyHash {
-        size_t operator()(const HKLKey &key) const noexcept {
-            auto mix = [](uint64_t x) {
-                x ^= x >> 33;
-                x *= 0xff51afd7ed558ccdULL;
-                x ^= x >> 33;
-                x *= 0xc4ceb9fe1a85ec53ULL;
-                x ^= x >> 33;
-                return x;
-            };
-            const uint64_t a = static_cast<uint64_t>(key.h);
-            const uint64_t b = static_cast<uint64_t>(key.k);
-            const uint64_t c = static_cast<uint64_t>(key.l);
-            const uint64_t d = static_cast<uint64_t>(key.is_positive ? 1 : 0);
-            return static_cast<size_t>(mix(a) ^ (mix(b) << 1) ^ (mix(c) << 2) ^ (mix(d) << 3));
-        }
-    };
-
-    inline double SafeSigma(double s, double min_sigma) {
-        if (!std::isfinite(s) || s <= 0.0)
-            return min_sigma;
-        return std::max(s, min_sigma);
-    }
-
-    inline double SafeD(double d) {
-        if (!std::isfinite(d) || d <= 0.0)
-            return std::numeric_limits<double>::quiet_NaN();
-        return d;
-    }
-
-    inline int SafeToInt(int64_t x) {
-        if (x < std::numeric_limits<int>::min() || x > std::numeric_limits<int>::max())
-            throw std::out_of_range("HKL index out of int range for Gemmi");
-        return static_cast<int>(x);
-    }
-
-    inline double SafeInv(double x, double fallback) {
-        if (!std::isfinite(x) || x == 0.0)
-            return fallback;
-        return 1.0 / x;
-    }
-
-    inline HKLKey CanonicalizeHKLKey(const Reflection &r, const GlobalScaleOptions &opt) {
-        HKLKey key{};
-        key.h = r.h;
-        key.k = r.k;
-        key.l = r.l;
-        key.is_positive = true;
-
-        if (!opt.space_group.has_value()) {
-            if (!opt.merge_friedel) {
-                const HKLKey neg{-r.h, -r.k, -r.l, true};
-                const bool pos = std::tie(key.h, key.k, key.l) >= std::tie(neg.h, neg.k, neg.l);
-                if (!pos) {
-                    key.h = -key.h;
-                    key.k = -key.k;
-                    key.l = -key.l;
-                    key.is_positive = false;
-                }
-            }
-            return key;
-        }
-
-        const gemmi::SpaceGroup &sg = *opt.space_group;
-        const gemmi::GroupOps gops = sg.operations();
-        const gemmi::ReciprocalAsu rasu(&sg);
-
-        const gemmi::Op::Miller in{{SafeToInt(r.h), SafeToInt(r.k), SafeToInt(r.l)}};
-        const auto [asu_hkl, sign_plus] = rasu.to_asu_sign(in, gops);
-
-        key.h = asu_hkl[0];
-        key.k = asu_hkl[1];
-        key.l = asu_hkl[2];
-        key.is_positive = opt.merge_friedel ? true : sign_plus;
-        return key;
-    }
-
-    struct IntensityRotResidual {
-        IntensityRotResidual(const Reflection &r, double sigma_obs, double wedge_deg)
-            : Iobs_(static_cast<double>(r.I)),
-              weight_(SafeInv(sigma_obs, 1.0)),
-              delta_phi_(r.delta_phi_deg),
-              lp_(SafeInv(r.rlp, 1.0)),
-              c1_(r.zeta / std::sqrt(2.0)) {
-        }
-
-        template<typename T>
-        bool operator()(const T *const G,
-                        const T *const mosaicity,
-                        const T *const Itrue,
-                        const T *const wedge,
-                        T *residual) const {
-            T partiality;
-            if (mosaicity[0] >= 0.0) {
-                const T half_wedge = wedge[0] / T(2.0);
-                const T arg_plus = T(delta_phi_ + half_wedge) * T(c1_) / mosaicity[0];
-                const T arg_minus = T(delta_phi_ - half_wedge) * T(c1_) / mosaicity[0];
-                partiality = (ceres::erf(arg_plus) - ceres::erf(arg_minus)) / T(2.0);
-            } else
-                partiality = T(1.0);
-
-            const T Ipred = G[0] * partiality * T(lp_) * Itrue[0];
-            residual[0] = (Ipred - T(Iobs_)) * T(weight_);
-            return true;
-        }
-
-        double Iobs_;
-        double weight_;
-        double delta_phi_;
-        double lp_;
-        double c1_;
-    };
-
-    struct IntensityStillResidual {
-        IntensityStillResidual(const Reflection &r, double sigma_obs)
-            : Iobs_(static_cast<double>(r.I)),
-              weight_(SafeInv(sigma_obs, 1.0)),
-              lp_(SafeInv(r.rlp, 1.0)),
-              dist_ewald_sq_(r.dist_ewald * r.dist_ewald) {
-        }
-
-        template<typename T>
-        bool operator()(const T *const G,
-                        const T *const R,
-                        const T *const Itrue,
-                        T *residual) const {
-            const T partiality = ceres::exp(-T(dist_ewald_sq_)/R[0]);
-            const T Ipred = G[0] * partiality * T(lp_) * Itrue[0];
-            residual[0] = (Ipred - T(Iobs_)) * T(weight_);
-            return true;
-        }
-
-        double Iobs_;
-        double weight_;
-        double lp_;
-        double dist_ewald_sq_;
-    };
-
-    struct IntensityFixedResidual {
-        IntensityFixedResidual(const Reflection &r, double sigma_obs, double partiality)
-            : Iobs_(static_cast<double>(r.I)),
-              weight_(SafeInv(sigma_obs, 1.0)),
-              corr_(partiality * SafeInv(r.rlp, 1.0))
-        {}
-
-        template<typename T>
-        bool operator()(const T *const G,
-                        const T *const Itrue,
-                        T *residual) const {
-            const T Ipred = T(corr_) * G[0] * Itrue[0];
-            residual[0] = (Ipred - T(Iobs_)) * T(weight_);
-            return true;
-        }
-
-        double Iobs_;
-        double weight_;
-        double corr_;
-    };
-
-    struct ScaleRegularizationResidual {
-        explicit ScaleRegularizationResidual(double sigma_k)
-            : inv_sigma_(SafeInv(sigma_k, 1.0)) {
-        }
-
-        template<typename T>
-        bool operator()(const T *const k, T *residual) const {
-            residual[0] = (k[0] - T(1.0)) * T(inv_sigma_);
-            return true;
-        }
-
-        double inv_sigma_;
-    };
-
-    struct SmoothnessRegularizationResidual {
-        explicit SmoothnessRegularizationResidual(double sigma)
-            : inv_sigma_(SafeInv(sigma, 1.0)) {
-        }
-
-        template<typename T>
-        bool operator()(const T *const k0,
-                        const T *const k1,
-                        const T *const k2,
-                        T *residual) const {
-            residual[0] = (ceres::log(k0[0]) + ceres::log(k2[0]) - T(2.0) * ceres::log(k1[0])) * T(inv_sigma_);
-            return true;
-        }
-
-        double inv_sigma_;
-    };
-
-    struct ObsRef {
-        const Reflection *r = nullptr;
-        int img_id = 0;
-        int hkl_slot = -1;
-        double sigma = 0.0;
-    };
-
-    void scale(const GlobalScaleOptions &opt,
-               std::vector<double> &g,
-               std::vector<double> &mosaicity,
-               std::vector<double> &R_sq,
-               const std::vector<uint8_t> &image_slot_used,
-               bool rotation_crystallography,
-               size_t nhkl,
-               const std::vector<ObsRef> &obs) {
-        ceres::Problem problem;
-
-        std::vector<double> Itrue(nhkl, 0.0);
-
-        // Initialize Itrue from per-HKL median of observed intensities
-        {
-            std::vector<std::vector<double> > per_hkl_I(nhkl);
-            for (const auto &o: obs) {
-                per_hkl_I[o.hkl_slot].push_back(static_cast<double>(o.r->I));
-            }
-            for (int h = 0; h < nhkl; ++h) {
-                auto &v = per_hkl_I[h];
-                if (v.empty()) {
-                    Itrue[h] = std::max(opt.min_sigma, 1e-6);
-                    continue;
-                }
-                std::nth_element(v.begin(), v.begin() + static_cast<long>(v.size() / 2), v.end());
-                double med = v[v.size() / 2];
-                if (!std::isfinite(med) || med <= opt.min_sigma)
-                    med = opt.min_sigma;
-                Itrue[h] = med;
-            }
-        }
-
-        double wedge = opt.wedge_deg.value_or(0.0);
-
-        for (const auto &o: obs) {
-            switch (opt.partiality_model) {
-                case GlobalScaleOptions::PartialityModel::Rotation: {
-                    auto *cost = new ceres::AutoDiffCostFunction<IntensityRotResidual, 1, 1, 1, 1, 1>(
-                        new IntensityRotResidual(*o.r, o.sigma, opt.wedge_deg.value_or(0.0)));
-                    problem.AddResidualBlock(cost,
-                                             nullptr,
-                                             &g[o.img_id],
-                                             &mosaicity[o.img_id],
-                                             &Itrue[o.hkl_slot],
-                                             &wedge);
-                }
-                break;
-                case GlobalScaleOptions::PartialityModel::Still: {
-                    auto *cost = new ceres::AutoDiffCostFunction<IntensityStillResidual, 1, 1, 1, 1>(
-                        new IntensityStillResidual(*o.r, o.sigma));
-                    problem.AddResidualBlock(cost,
-                                             nullptr,
-                                             &g[o.img_id],
-                                             &R_sq[o.img_id],
-                                             &Itrue[o.hkl_slot]);
-                }
-                break;
-                case GlobalScaleOptions::PartialityModel::Unity: {
-                    auto *cost = new ceres::AutoDiffCostFunction<IntensityFixedResidual, 1, 1, 1>(
-                        new IntensityFixedResidual(*o.r, o.sigma, 1.0));
-                    problem.AddResidualBlock(cost,
-                                             nullptr,
-                                             &g[o.img_id],
-                                             &Itrue[o.hkl_slot]);
-                }
-                break;
-                case GlobalScaleOptions::PartialityModel::Fixed: {
-                    auto *cost = new ceres::AutoDiffCostFunction<IntensityFixedResidual, 1, 1, 1>(
-                        new IntensityFixedResidual(*o.r, o.sigma, o.r->partiality));
-                    problem.AddResidualBlock(cost,
-                                             nullptr,
-                                             &g[o.img_id],
-                                             &Itrue[o.hkl_slot]);
-                }
-                break;
-            }
-        }
-
-        for (int i = 0; i < g.size(); ++i) {
-            if (image_slot_used[i]) {
-                auto *cost = new ceres::AutoDiffCostFunction<ScaleRegularizationResidual, 1, 1>(
-                    new ScaleRegularizationResidual(0.05));
-                problem.AddResidualBlock(cost, nullptr, &g[i]);
-            }
-        }
-
-        if (rotation_crystallography) {
-            if (opt.smoothen_g) {
-                for (int i = 0; i < g.size() - 2; ++i) {
-                    if (image_slot_used[i] && image_slot_used[i + 1] && image_slot_used[i + 2]) {
-                        auto *cost = new ceres::AutoDiffCostFunction<SmoothnessRegularizationResidual, 1, 1, 1, 1>(
-                            new SmoothnessRegularizationResidual(0.05));
-
-                        problem.AddResidualBlock(cost, nullptr, &g[i], &g[i + 1], &g[i + 2]);
-                    }
-                }
-            }
-
-            if (opt.smoothen_mos && opt.partiality_model == GlobalScaleOptions::PartialityModel::Rotation) {
-                for (int i = 0; i < mosaicity.size() - 2; ++i) {
-                    if (image_slot_used[i] && image_slot_used[i + 1] && image_slot_used[i + 2]) {
-                        auto *cost = new ceres::AutoDiffCostFunction<SmoothnessRegularizationResidual, 1, 1, 1, 1>(
-                            new SmoothnessRegularizationResidual(0.05));
-                        problem.AddResidualBlock(cost, nullptr, &mosaicity[i], &mosaicity[i + 1], &mosaicity[i + 2]);
-                    }
-                }
-            }
-        }
-
-        if (opt.partiality_model == GlobalScaleOptions::PartialityModel::Still) {
-            for (int i = 0; i < R_sq.size(); ++i) {
-                if (image_slot_used[i]) {
-                    problem.SetParameterLowerBound(&R_sq[i], 0, 1e-9);
-                    problem.SetParameterUpperBound(&R_sq[i], 0, 1.0);
-                }
-            }
-        }
-
-        // Scaling factors must be always positive
-        for (int i = 0; i < g.size(); i++) {
-            if (image_slot_used[i])
-                problem.SetParameterLowerBound(&g[i], 0, 1e-12);
-        }
-
-        // Mosaicity refinement + bounds
-        if (opt.partiality_model == GlobalScaleOptions::PartialityModel::Rotation) {
-            for (int i = 0; i < mosaicity.size(); ++i) {
-                if (image_slot_used[i]) {
-                    problem.SetParameterLowerBound(&mosaicity[i], 0, opt.mosaicity_min_deg);
-                    problem.SetParameterUpperBound(&mosaicity[i], 0, opt.mosaicity_max_deg);
-                }
-            }
-            if (!opt.refine_wedge)
-                problem.SetParameterBlockConstant(&wedge);
-            else
-                problem.SetParameterLowerBound(&wedge, 0, 0.0);
-        }
-
-        // use all available threads
-        unsigned int hw = std::thread::hardware_concurrency();
-        if (hw == 0)
-            hw = 1; // fallback
-
-        ceres::Solver::Options options;
-
-        options.linear_solver_type = ceres::SPARSE_NORMAL_CHOLESKY;
-        options.minimizer_progress_to_stdout = false;
-        options.max_num_iterations = opt.max_num_iterations;
-        options.max_solver_time_in_seconds = opt.max_solver_time_s;
-        options.num_threads = static_cast<int>(hw);
-        options.function_tolerance = 1e-4;
-
-        ceres::Solver::Summary summary;
-        ceres::Solve(options, &problem, &summary);
-        std::cout << summary.BriefReport() << std::endl;
-    }
-
-    void proc_obs(const std::vector<std::vector<Reflection> > &observations,
-        const GlobalScaleOptions &opt,
-        std::vector<uint8_t> &image_slot_used,
-        std::vector<ObsRef> &obs,
-        std::unordered_map<HKLKey, int, HKLKeyHash> &hklToSlot
-        ) {
-        for (int i = 0; i < observations.size(); i++) {
-            for (const auto &r: observations[i]) {
-                const double d = SafeD(r.d);
-                if (!std::isfinite(d))
-                    continue;
-                if (!std::isfinite(r.I))
-                    continue;
-
-                if (opt.d_min_limit_A > 0.0 && d < opt.d_min_limit_A)
-                    continue;
-
-                if (!std::isfinite(r.zeta) || r.zeta <= 0.0f)
-                    continue;
-                if (!std::isfinite(r.rlp) || r.rlp == 0.0f)
-                    continue;
-
-                const double sigma = SafeSigma(r.sigma, opt.min_sigma);
-
-                const int img_id = i / opt.image_cluster;
-                image_slot_used[img_id] = 1;
-
-                int hkl_slot;
-                try {
-                    const HKLKey key = CanonicalizeHKLKey(r, opt);
-                    auto it = hklToSlot.find(key);
-                    if (it == hklToSlot.end()) {
-                        hkl_slot = static_cast<int>(hklToSlot.size());
-                        hklToSlot.emplace(key, hkl_slot);
-                    } else {
-                        hkl_slot = it->second;
-                    }
-                } catch (...) {
-                    continue;
-                }
-
-                ObsRef o;
-                o.r = &r;
-                o.img_id = img_id;
-                o.hkl_slot = hkl_slot;
-                o.sigma = sigma;
-                obs.push_back(o);
-            }
-        }
-    }
-} // namespace
-
-GlobalScaleResult GlobalScaleCeres(const std::vector<std::vector<Reflection> > &observations,
-                                   const GlobalScaleOptions &opt) {
-    if (opt.image_cluster <= 0)
-        throw std::invalid_argument("image_cluster must be positive");
-
-    const bool rotation_crystallography = opt.wedge_deg.has_value();
-
-    size_t nrefl = 0;
-    for (const auto &i: observations)
-        nrefl += i.size();
-
-    std::vector<ObsRef> obs;
-    obs.reserve(nrefl);
-
-    std::unordered_map<HKLKey, int, HKLKeyHash> hklToSlot;
-    hklToSlot.reserve(nrefl);
-
-    size_t n_image_slots = observations.size() / opt.image_cluster +
-        (observations.size() % opt.image_cluster > 0 ? 1 : 0);
-
-    std::vector<uint8_t> image_slot_used(n_image_slots, 0);
-
-    proc_obs(observations, opt, image_slot_used, obs, hklToSlot);
-
-    const int nhkl = static_cast<int>(hklToSlot.size());
-
-    std::vector<double> g(n_image_slots, 1.0);
-    std::vector<double> mosaicity(n_image_slots, opt.mosaicity_init_deg);
-    std::vector<double> R_sq(n_image_slots, 0.001 * 0.001);
-
-    for (int i = 0; i < n_image_slots; i++) {
-        if (!image_slot_used[i]) {
-            mosaicity[i] = NAN;
-            g[i] = NAN;
-            R_sq[i] = NAN;
-        } else if (opt.mosaicity_init_deg_vec.size() > i && std::isfinite(opt.mosaicity_init_deg_vec[i])) {
-            mosaicity[i] = opt.mosaicity_init_deg_vec[i];
-        }
-    }
-
-    scale(opt, g, mosaicity, R_sq, image_slot_used, rotation_crystallography, nhkl, obs);
-
-    GlobalScaleResult out;
-    out.image_scale_g.resize(observations.size(), NAN);
-    out.mosaicity_deg.resize(observations.size(), NAN);
-    for (int i = 0; i < observations.size(); i++) {
-        size_t img_slot = i / opt.image_cluster;
-        if (image_slot_used[img_slot]) {
-            out.image_scale_g[i] = static_cast<float>(g[img_slot]);
-            out.mosaicity_deg[i] = static_cast<float>(mosaicity[img_slot]);
-        }
-    }
-    return out;
-}
@@ -1,54 +0,0 @@
-// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
-// SPDX-License-Identifier: GPL-3.0-only
-
-#pragma once
-
-#include <vector>
-#include <cstdint>
-#include <optional>
-
-#include "../../common/Reflection.h"
-#include "gemmi/symmetry.hpp"
-
-// Global (joint) scaling: instead of the alternating merge-then-scale-each-image loop, refine all
-// per-image scales G_i, per-image mosaicities, the shared per-HKL true intensities Itrue and the
-// global wedge together in a single Ceres problem. Returns only the per-image scale factors; the
-// merge afterwards is done by MergeOnTheFly (Merge.h), the same as the alternating path.
-struct GlobalScaleOptions {
-    int max_num_iterations = 100;
-    double max_solver_time_s = 60.0;
-    double min_sigma = 1e-3;
-
-    // Symmetry canonicalization of HKL prior to scaling. If not set, raw HKL are used as-is.
-    std::optional<gemmi::SpaceGroup> space_group;
-
-    // If true, treat Friedel mates as equivalent. If false, keep them separate (anomalous).
-    bool merge_friedel = true;
-
-    // Rotation (wedge) partiality model. Set the wedge to enable it; leave unset for stills.
-    std::optional<double> wedge_deg;
-
-    double mosaicity_init_deg = 0.17;
-    double mosaicity_min_deg = 1e-3;
-    double mosaicity_max_deg = 2.0;
-    std::vector<double> mosaicity_init_deg_vec;
-
-    bool smoothen_g = true;
-    bool smoothen_mos = true;
-
-    double d_min_limit_A = 0.0;
-
-    int64_t image_cluster = 1;
-
-    bool refine_wedge = false;
-
-    enum class PartialityModel {Fixed, Rotation, Unity, Still} partiality_model = PartialityModel::Fixed;
-};
-
-struct GlobalScaleResult {
-    std::vector<float> image_scale_g;   ///< per input image (NaN if the image had no usable reflections)
-    std::vector<float> mosaicity_deg;   ///< per input image (NaN unless the rotation model is used)
-};
-
-GlobalScaleResult GlobalScaleCeres(const std::vector<std::vector<Reflection>>& observations,
-                                   const GlobalScaleOptions& opt = {});
@@ -27,7 +27,6 @@
 #include "../image_analysis/image_preprocessing/ImagePreprocessorCPU.h"
 #include "../image_analysis/image_preprocessing/ImagePreprocessorBuffer.h"
 #include "../image_analysis/scale_merge/Merge.h"
-#include "../image_analysis/scale_merge/GlobalScale.h"
 #include "../image_analysis/scale_merge/ScaleOnTheFly.h"
 #include "../image_analysis/scale_merge/SearchSpaceGroup.h"
 #include "../image_analysis/scale_merge/Combine3D.h"
@@ -57,69 +56,6 @@ namespace {
        return ret;
    }

-    // Global (joint) scaling: refine all per-image scales/mosaicities and the shared per-HKL
-    // intensities in one Ceres problem (GlobalScaleCeres), then write the result back into each
-    // reflection's image_scale_corr exactly as ScaleOnTheFly does - recompute the rotation
-    // partiality from the refined per-image mosaicity, then image_scale_corr = rlp/(partiality*G).
-    // The downstream rot3d combine and MergeOnTheFly are unchanged, so every metric stays
-    // comparable with the alternating path; only how G and mosaicity are found differs.
-    void RunGlobalScaling(const DiffractionExperiment &experiment,
-                          std::vector<IntegrationOutcome> &outcomes, Logger &logger) {
-        const auto &s = experiment.GetScalingSettings();
-        const bool rotation = experiment.GetPartialityModel() == PartialityModel::Rotation;
-
-        std::vector<std::vector<Reflection>> observations;
-        observations.reserve(outcomes.size());
-        for (const auto &o: outcomes)
-            observations.push_back(o.reflections);
-
-        GlobalScaleOptions opt;
-        opt.merge_friedel = s.GetMergeFriedel();
-        if (const auto sg = experiment.GetGemmiSpaceGroup())
-            opt.space_group = *sg;
-        opt.d_min_limit_A = s.GetHighResolutionLimit_A().value_or(0.0);
-        opt.mosaicity_init_deg = s.GetDefaultMosaicity();
-        // The joint problem is large (per-image G + mosaicity + all Itrue + wedge); give the solver
-        // enough room to converge rather than the per-image default.
-        opt.max_solver_time_s = 300.0;
-        opt.max_num_iterations = 50;
-
-        double wedge = 0.0;
-        if (rotation) {
-            opt.partiality_model = GlobalScaleOptions::PartialityModel::Rotation;
-            wedge = experiment.GetRotationWedgeForScaling().value_or(0.0);
-            opt.wedge_deg = wedge;
-            opt.mosaicity_min_deg = s.GetMinMosaicity();
-            opt.mosaicity_max_deg = s.GetMaxMosaicity();
-        } else {
-            opt.partiality_model = GlobalScaleOptions::PartialityModel::Fixed;
-        }
-
-        const auto result = GlobalScaleCeres(observations, opt);
-
-        const double half_wedge = wedge / 2.0;
-        for (size_t i = 0; i < outcomes.size(); ++i) {
-            const double G = result.image_scale_g[i];
-            const double mos = result.mosaicity_deg[i];
-            for (auto &r: outcomes[i].reflections) {
-                if (rotation && std::isfinite(r.delta_phi_deg) && std::isfinite(r.zeta) && mos > 1e-6) {
-                    const double c1 = r.zeta / std::sqrt(2.0);
-                    r.partiality = static_cast<float>(
-                        (std::erf((r.delta_phi_deg + half_wedge) * c1 / mos)
-                         - std::erf((r.delta_phi_deg - half_wedge) * c1 / mos)) / 2.0);
-                }
-                const double denom = r.partiality * G;
-                r.image_scale_corr = (std::isfinite(r.rlp) && std::isfinite(denom) && denom > 0.0)
-                                         ? static_cast<float>(r.rlp / denom) : NAN;
-            }
-            if (std::isfinite(G))
-                outcomes[i].image_scale_g = static_cast<float>(G);
-            if (rotation && std::isfinite(mos))
-                outcomes[i].mosaicity_deg = static_cast<float>(mos);
-        }
-        logger.Info("Global scaling complete ({} images)", outcomes.size());
-    }
-
    // XDS-order scaling. The rot3d combine emits fulls with partiality == 1 (image_scale_corr == 1),
    // so they were only ever scaled as per-frame *partials* upstream - their per-frame scale is
    // entangled with the rocking-curve/partiality model. This refits a per-frame scale directly on
@@ -457,8 +393,21 @@ ProcessResult JFJochProcess::Run(JFJochProcessObserver *observer) {
    // Scaling and merging (full analysis only).
    if (full && !cancelled_ && result.indexing_rate.has_value() && result.indexing_rate > 0
        && (config_.run_scaling || !config_.reference_data.empty())) {
-        if (observer)
-            observer->OnPhase("Scaling and merging");
+        // Scaling/merging is a long post-pass; report each sub-step as a phase so the GUI progress
+        // bar reflects what is happening instead of freezing on one label. Also time each phase
+        // (logged on transition) so the bottlenecks are visible.
+        auto t_phase = std::chrono::steady_clock::now();
+        std::string prev_phase;
+        auto phase = [&](const std::string &p) {
+            const auto now = std::chrono::steady_clock::now();
+            if (!prev_phase.empty())
+                logger.Info("[timing] {}: {:.2f} s", prev_phase,
+                            std::chrono::duration<double>(now - t_phase).count());
+            t_phase = now;
+            prev_phase = p;
+            if (observer) observer->OnPhase(p);
+        };
+        phase("Scaling and merging");

        const bool pixel_refine_path =
            experiment_.GetIndexingSettings().GetGeomRefinementAlgorithm() == GeomRefinementAlgorithmEnum::PixelRefine;
@@ -466,31 +415,41 @@ ProcessResult JFJochProcess::Run(JFJochProcessObserver *observer) {
        // ScaleOnTheFly is only for the classical, no-reference path; with a reference (or
        // PixelRefine) each image is already scaled, so we merge directly.
        if (config_.reference_data.empty() && !pixel_refine_path) {
-            if (config_.global_scaling) {
-                logger.Info("Running global (joint) scaling ...");
-                RunGlobalScaling(experiment_, indexer->GetIntegrationOutcome(), logger);
-            } else {
-                logger.Info("Running scaling ...");
-                ScalingResult scale_result(0);
-                for (int i = 0; i < config_.scaling_iter; i++) {
-                    auto merge_result = MergeAll(experiment_, indexer->GetIntegrationOutcome(), false);
-                    scale_result = indexer->ScaleAllImages(merge_result);
-                }
+            logger.Info("Running scaling ...");
+            ScalingResult scale_result(0);
+            double t_merge_all = 0.0, t_scale = 0.0;
+            for (int i = 0; i < config_.scaling_iter; i++) {
+                phase("Scaling images (iteration " + std::to_string(i + 1) + "/"
+                      + std::to_string(config_.scaling_iter) + ")");
+                const auto a = std::chrono::steady_clock::now();
+                auto merge_result = MergeAll(experiment_, indexer->GetIntegrationOutcome(), false);
+                const auto b = std::chrono::steady_clock::now();
+                scale_result = indexer->ScaleAllImages(merge_result);
+                const auto c = std::chrono::steady_clock::now();
+                t_merge_all += std::chrono::duration<double>(b - a).count();
+                t_scale += std::chrono::duration<double>(c - b).count();
            }
+            logger.Info("[timing] scaling loop ({} iter): MergeAll(serial) {:.2f} s, ScaleAllImages(parallel) {:.2f} s",
+                        config_.scaling_iter, t_merge_all, t_scale);
        }

        // -P rot3d: weight-sum each reflection's per-frame partials into one full before merging, so
        // the error model sees counting statistics (high ISa) instead of rocking-curve slicing scatter.
        const bool rot3d = experiment_.GetScalingSettings().GetCombine3D();
        std::vector<IntegrationOutcome> combined;
-        if (rot3d)
+        if (rot3d) {
+            phase("Combining 3D partials");
            combined = CombineRotationObservations(indexer->GetIntegrationOutcome(), experiment_, &logger,
                                                   config_.observation_dump_path);
-        if (rot3d && config_.scale_fulls)
+        }
+        if (rot3d && experiment_.GetScalingSettings().GetScaleFulls()) {
+            phase("Scaling fulls (XDS order)");
            ScaleFulls(experiment_, combined, static_cast<int>(config_.scaling_iter), config_.nthreads, logger);
+        }
        const std::vector<IntegrationOutcome> &merge_input =
            rot3d ? combined : indexer->GetIntegrationOutcome();

+        phase("Merging");
        MergeOnTheFly merge_engine(experiment_);
        if (result.consensus_cell.has_value())
            merge_engine.ReferenceCell(*result.consensus_cell);
@@ -503,6 +462,7 @@ ProcessResult JFJochProcess::Run(JFJochProcessObserver *observer) {
            merge_engine.AddImage(outcome);

        auto merged_reflections = merge_engine.ExportReflections();
+        phase("Computing statistics");
        auto merged_statistics = merge_engine.MergeStats(merged_reflections, merge_input,
                                                         config_.reference_data);
        logger.Info("Merge complete ({} unique reflections)", merged_reflections.size());
@@ -523,8 +483,16 @@ ProcessResult JFJochProcess::Run(JFJochProcessObserver *observer) {
        stats_text << merged_statistics;
        result.merge_statistics_text = stats_text.str();

-        if (result.consensus_cell && write_files)
+        result.has_merge_statistics = true;
+        result.merge_statistics = merged_statistics;
+        result.error_model_isa = merge_engine.ErrorModelB() > 0 ? 1.0 / merge_engine.ErrorModelB() : 0.0;
+        result.has_reference = !config_.reference_data.empty();
+
+        if (result.consensus_cell && write_files) {
+            phase("Writing reflections");
            WriteReflections(merged_reflections, *result.consensus_cell, experiment_, config_.output_prefix);
+        }
+        phase("");   // flush the last phase's timing
    }

    if (writer) {
@@ -15,6 +15,7 @@
 #include "../common/JFJochMessages.h"        // DataMessage
 #include "../common/JFJochReceiverPlots.h"   // MeanProcessingTime
 #include "../image_analysis/spot_finding/SpotFindingSettings.h"
+#include "../image_analysis/scale_merge/Merge.h"   // MergeStatistics

 class JFJochHDF5Reader;

@@ -53,13 +54,6 @@ struct ProcessConfig {
    // per-image live scaling path when present.
    bool run_scaling = false;
    int64_t scaling_iter = 3;
-    // Global (joint) scaling instead of the alternating merge/scale loop: refine all per-image
-    // scales, mosaicities and the shared per-HKL intensities together in one Ceres problem.
-    bool global_scaling = false;
-    // XDS-order scaling: after the rot3d combine, refit a per-frame scale on the combined fulls
-    // (Unity model, no partiality term) so the per-frame scale is decoupled from rocking-curve
-    // model error. Only meaningful with -P rot3d.
-    bool scale_fulls = false;
    std::vector<MergedReflection> reference_data;

    // Diagnostic: if set, the -P rot3d combine writes the unmerged fulls here (for comparison vs XDS).
@@ -78,6 +72,13 @@ struct ProcessResult {
    MeanProcessingTime mean_processing_time{};
    std::optional<std::string> written_master_path;
    std::string merge_statistics_text;   // populated when scaling/merging ran
+
+    // Structured merge statistics (per-shell + overall), populated when scaling/merging ran. ISa is
+    // the error-model asymptotic I/sigma (1/b); has_reference is true when a reference MTZ drove CCref.
+    bool has_merge_statistics = false;
+    MergeStatistics merge_statistics;
+    double error_model_isa = 0.0;
+    bool has_reference = false;
 };

 // Callbacks for progress and live results. Methods may be called from worker threads, so an
@@ -49,7 +49,6 @@ void print_usage() {

    std::cout << "  Scaling and merging" << std::endl;
    std::cout << "   -M, --scale-merge               Scale and merge (refine mosaicity) and write scaled.hkl + image.dat" << std::endl;
-    std::cout << "   --global-scale                  Joint global scaling (all images + Itrue in one fit) instead of the alternating loop; implies -M" << std::endl;
    std::cout << "   --scale-fulls                   After -P rot3d combine, refit a per-frame scale on the fulls (XDS order, Unity model); implies -M" << std::endl;
    std::cout << "   -P, --partiality <txt>          Partiality model fixed|rot|rot3d|unity (default: fixed). rot3d = rot + 3D combine of per-frame partials" << std::endl;
    std::cout << "   -A, --anomalous                 Anomalous mode (don't merge Friedel pairs)" << std::endl;
@@ -94,7 +93,6 @@ enum {
    OPT_REFERENCE_COLUMN,
    OPT_DUMP_OBSERVATIONS,
    OPT_INTEGRATOR,
-    OPT_GLOBAL_SCALE,
    OPT_SCALE_FULLS
 };

@@ -116,7 +114,6 @@ static option long_options[] = {
    {"refine-bfactor", no_argument, nullptr, 'B'},
    {"wedge", optional_argument, nullptr, 'w'},
    {"scale-merge", no_argument, nullptr, 'M'},
-    {"global-scale", no_argument, nullptr, OPT_GLOBAL_SCALE},
    {"scale-fulls", no_argument, nullptr, OPT_SCALE_FULLS},
    {"refine", required_argument, nullptr, 'r'},

@@ -277,7 +274,6 @@ int main(int argc, char **argv) {
    int rotation_indexing_image_count = 100;
    std::optional<float> rotation_indexing_range;
    bool run_scaling = false;
-    bool global_scaling = false;
    bool scale_fulls = false;
    bool anomalous_mode = false;
    std::optional<int64_t> space_group_number;
@@ -508,10 +504,6 @@ int main(int argc, char **argv) {
        case 'M':
            run_scaling = true;
            break;
-        case OPT_GLOBAL_SCALE:
-            run_scaling = true;
-            global_scaling = true;
-            break;
        case OPT_SCALE_FULLS:
            run_scaling = true;
            scale_fulls = true;
@@ -722,6 +714,7 @@ int main(int argc, char **argv) {
    ScalingSettings scaling_settings;
    scaling_settings.SetPartialityModel(partiality_model);
    scaling_settings.Combine3D(combine_3d);
+    scaling_settings.ScaleFulls(scale_fulls);
    if (d_min_scale_merge)
        scaling_settings.HighResolutionLimit_A(d_min_scale_merge.value());
    scaling_settings.MergeFriedel(!anomalous_mode);
@@ -798,8 +791,6 @@ int main(int argc, char **argv) {
    config.rotation_indexing_image_count = rotation_indexing_image_count;
    config.forced_rotation_lattice = forced_rotation_lattice;
    config.run_scaling = run_scaling;
-    config.global_scaling = global_scaling;
-    config.scale_fulls = scale_fulls;
    config.scaling_iter = scaling_iter;
    config.reference_data = reference_data;
    config.observation_dump_path = dump_observations;