Files
Jungfraujoch/image_analysis/LoadFCalcFromMtz.cpp
T
leonarski_f e4b3064254 Scaling: reference dataset, 3D combine (-P rot3d), R-meas, profile-fit integrator
Reference dataset (a): LoadReferenceMtz adds column selection + cell/SG/resolution +
a data-vs-reference consistency check; jfjoch_process/jfjoch_scale gain
--reference-column; the viewer gets a Reference section in the MX settings dock
(worker-owned, independent of the loaded dataset) that flows into reprocessing jobs.

3D combine (-P rot3d): Combine3D weight-sums a reflection's per-frame partials into one
counting-limited full before merging (orthogonal ScalingSettings::combine_3d flag, not a
partiality model), with a de-biased Poisson variance. Crystal 2: ISa 1.7->8.4, R-meas
~67%->18.9%, intensities unchanged (CCref held).

Quality metrics (b): R-meas (Diederichs-Karplus) + redundancy columns in MergeStats; ISa
logged. jfjoch fulls 18.9% vs XDS 4.5% (same ASU/run).

Profile-fit integrator (experimental): ProfileIntegrate2D (--integrator gaussian|empirical)
is a reference-free, rot3d-compatible profile-fit extraction (the decomposed PixelRefine
intensity step). Gaussian: R-meas 18.9->14.6%, ISa ->9.5. Anisotropy/per-region add nothing
(the discriminating info is in the discarded rocking direction). See NEXTGEN_INTEGRATOR.md.
--dump-observations exports the unmerged fulls for XDS comparison.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-25 20:43:04 +02:00

161 lines
6.0 KiB
C++

// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
// SPDX-License-Identifier: GPL-3.0-only
#include "LoadFCalcFromMtz.h"
#include <cmath>
#include <cstdint>
#include <limits>
#include <sstream>
#include <stdexcept>
#include <string>
#include <vector>
#include <gemmi/mtz.hpp>
#include <gemmi/symmetry.hpp>
namespace {
// Reference intensities can come from a calculated structure factor F-model (squared to an
// intensity), or from a merged/observed mean-intensity column (used directly - this is what lets
// PixelRefine be self-seeded from the data's own previous merge). column_with_one_of_labels would
// hide which label matched, so the priority list is walked explicitly to record the choice.
const gemmi::Mtz::Column* SelectDefaultColumn(const gemmi::Mtz& mtz, bool& square) {
if (const auto* col = mtz.column_with_label("F-model", nullptr, 'F')) {
square = true;
return col;
}
for (const char* label : {"IMEAN", "I", "IOBS", "Iobs", "I-obs"}) {
if (const auto* col = mtz.column_with_label(label, nullptr, 'J')) {
square = false;
return col;
}
}
for (const auto& c : mtz.columns)
if (c.type == 'J') {
square = false;
return &c;
}
return nullptr;
}
} // namespace
ReferenceMtzData LoadReferenceMtz(const std::string& path,
const std::optional<std::string>& column) {
gemmi::Mtz mtz;
mtz.read_file_gz(path, true);
ReferenceMtzData out;
// Columns the caller may pick as reference intensities/structure factors.
for (const auto& c : mtz.columns)
if (c.type == 'J' || c.type == 'F')
out.candidate_columns.push_back({c.label, c.type});
const gemmi::Mtz::Column* col = nullptr;
if (column) {
col = mtz.column_with_label(*column, nullptr);
if (col == nullptr)
throw std::runtime_error("Reference MTZ has no column '" + *column + "'");
out.squared = (col->type == 'F');
out.default_column = false;
} else {
col = SelectDefaultColumn(mtz, out.squared);
if (col == nullptr)
throw std::runtime_error("MTZ has no F-model or intensity (J) column to use as reference");
out.default_column = true;
}
out.used_column = col->label;
out.used_column_type = col->type;
// Cell and space group the reference was recorded in, for display and the consistency check.
const gemmi::UnitCell& gc = mtz.get_cell();
const bool cell_valid = gc.a > 0 && gc.b > 0 && gc.c > 0;
if (cell_valid)
out.cell = UnitCell{static_cast<float>(gc.a), static_cast<float>(gc.b), static_cast<float>(gc.c),
static_cast<float>(gc.alpha), static_cast<float>(gc.beta),
static_cast<float>(gc.gamma)};
if (mtz.spacegroup != nullptr) {
out.space_group_number = mtz.spacegroup->number;
out.space_group_name = mtz.spacegroup->short_name();
out.point_group = mtz.spacegroup->point_group_hm();
}
out.reflections.reserve(static_cast<std::size_t>(mtz.nreflections));
const std::size_t stride = mtz.columns.size();
double d_min = std::numeric_limits<double>::max();
double d_max = 0.0;
for (int i = 0; i < mtz.nreflections; ++i) {
const float v = (*col)[static_cast<std::size_t>(i)];
if (std::isnan(v))
continue;
const std::size_t row = static_cast<std::size_t>(i) * stride;
MergedReflection r;
r.h = static_cast<int32_t>(mtz.data[row + 0]);
r.k = static_cast<int32_t>(mtz.data[row + 1]);
r.l = static_cast<int32_t>(mtz.data[row + 2]);
r.I = out.squared ? v * v : v;
r.sigma = NAN;
if (cell_valid) {
r.d = static_cast<float>(gc.calculate_d({{r.h, r.k, r.l}}));
if (std::isfinite(r.d) && r.d > 0.0f) {
d_min = std::min<double>(d_min, r.d);
d_max = std::max<double>(d_max, r.d);
}
}
out.reflections.emplace_back(r);
}
if (d_max > 0.0) {
out.d_min = d_min;
out.d_max = d_max;
}
return out;
}
std::string ReferenceConsistencyWarning(const ReferenceMtzData& reference,
const std::optional<UnitCell>& data_cell,
std::optional<int> data_space_group_number) {
std::vector<std::string> issues;
if (reference.cell && data_cell) {
const auto& r = *reference.cell;
const auto& d = *data_cell;
auto rel = [](float x, float y) { return y != 0.0f ? std::abs(x - y) / std::abs(y) : 0.0f; };
const bool len_off = rel(r.a, d.a) > 0.02f || rel(r.b, d.b) > 0.02f || rel(r.c, d.c) > 0.02f;
const bool ang_off = std::abs(r.alpha - d.alpha) > 1.5f || std::abs(r.beta - d.beta) > 1.5f
|| std::abs(r.gamma - d.gamma) > 1.5f;
if (len_off || ang_off) {
std::ostringstream ss;
ss.setf(std::ios::fixed);
ss.precision(2);
ss << "unit cell differs (reference " << r.a << " " << r.b << " " << r.c << " "
<< r.alpha << " " << r.beta << " " << r.gamma << ", data " << d.a << " " << d.b
<< " " << d.c << " " << d.alpha << " " << d.beta << " " << d.gamma << ")";
issues.push_back(ss.str());
}
}
if (!reference.point_group.empty() && data_space_group_number) {
const auto* sg = gemmi::find_spacegroup_by_number(*data_space_group_number);
if (sg != nullptr && reference.point_group != sg->point_group_hm())
issues.push_back("point group differs (reference " + reference.point_group + ", data "
+ std::string(sg->point_group_hm()) + ")");
}
if (issues.empty())
return {};
std::string out = "reference may not match the data: ";
for (std::size_t i = 0; i < issues.size(); ++i)
out += (i ? "; " : "") + issues[i];
return out;
}