Files
Jungfraujoch/image_analysis/LoadFCalcFromMtz.cpp
leonarski_f 38ea0ec237 Remove the experimental PixelRefine integrator
On the LysozymeJet5 serial stills the default Gaussian profile-fit
integrator (ProfileIntegrate2D) + reference scaling matched or beat
whole-PixelRefine on every per-shell CC1/2 (overall 95.7% vs 91.9%), ISa
(1.6 vs 1.2) and R-meas (98.5% vs 175%), with CCref a tie -- so PixelRefine
has no remaining advantage. Reference-based per-image scaling is
integrator-agnostic (IndexAndRefine::ReferenceIntensities builds a
ScaleOnTheFly(experiment, reference) applied to any integrator's output),
so the reference-dataset feature (CCref + reference scaling) is kept.

Delete image_analysis/pixel_refinement/, GeomRefinementAlgorithmEnum::
PixelRefine and its gates, BraggIntegrationSettings::ProfileMultiplier
(PixelRefine-only; R1 is shared and kept), and the -r pixelrefine /
--profile-multiplier CLI. The inherited lessons (mean background, de-biased
variance, tight-profile-loses / centroid floor, R-refinement futile) are
folded into NEXTGEN_INTEGRATOR.md.

NOTE: this transiently breaks the viewer build -- the committed viewer
still references the removed enum and ProfileMultiplier. It is fixed in the
next commit (the viewer feature work), held separate while the viewer UI is
being tested.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-25 20:43:04 +02:00

161 lines
6.0 KiB
C++

// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
// SPDX-License-Identifier: GPL-3.0-only
#include "LoadFCalcFromMtz.h"
#include <cmath>
#include <cstdint>
#include <limits>
#include <sstream>
#include <stdexcept>
#include <string>
#include <vector>
#include <gemmi/mtz.hpp>
#include <gemmi/symmetry.hpp>
namespace {
// Reference intensities can come from a calculated structure factor F-model (squared to an
// intensity), or from a merged/observed mean-intensity column (used directly - this is what lets
// reference-based scaling be self-seeded from the data's own previous merge). column_with_one_of_labels would
// hide which label matched, so the priority list is walked explicitly to record the choice.
const gemmi::Mtz::Column* SelectDefaultColumn(const gemmi::Mtz& mtz, bool& square) {
if (const auto* col = mtz.column_with_label("F-model", nullptr, 'F')) {
square = true;
return col;
}
for (const char* label : {"IMEAN", "I", "IOBS", "Iobs", "I-obs"}) {
if (const auto* col = mtz.column_with_label(label, nullptr, 'J')) {
square = false;
return col;
}
}
for (const auto& c : mtz.columns)
if (c.type == 'J') {
square = false;
return &c;
}
return nullptr;
}
} // namespace
ReferenceMtzData LoadReferenceMtz(const std::string& path,
const std::optional<std::string>& column) {
gemmi::Mtz mtz;
mtz.read_file_gz(path, true);
ReferenceMtzData out;
// Columns the caller may pick as reference intensities/structure factors.
for (const auto& c : mtz.columns)
if (c.type == 'J' || c.type == 'F')
out.candidate_columns.push_back({c.label, c.type});
const gemmi::Mtz::Column* col = nullptr;
if (column) {
col = mtz.column_with_label(*column, nullptr);
if (col == nullptr)
throw std::runtime_error("Reference MTZ has no column '" + *column + "'");
out.squared = (col->type == 'F');
out.default_column = false;
} else {
col = SelectDefaultColumn(mtz, out.squared);
if (col == nullptr)
throw std::runtime_error("MTZ has no F-model or intensity (J) column to use as reference");
out.default_column = true;
}
out.used_column = col->label;
out.used_column_type = col->type;
// Cell and space group the reference was recorded in, for display and the consistency check.
const gemmi::UnitCell& gc = mtz.get_cell();
const bool cell_valid = gc.a > 0 && gc.b > 0 && gc.c > 0;
if (cell_valid)
out.cell = UnitCell{static_cast<float>(gc.a), static_cast<float>(gc.b), static_cast<float>(gc.c),
static_cast<float>(gc.alpha), static_cast<float>(gc.beta),
static_cast<float>(gc.gamma)};
if (mtz.spacegroup != nullptr) {
out.space_group_number = mtz.spacegroup->number;
out.space_group_name = mtz.spacegroup->short_name();
out.point_group = mtz.spacegroup->point_group_hm();
}
out.reflections.reserve(static_cast<std::size_t>(mtz.nreflections));
const std::size_t stride = mtz.columns.size();
double d_min = std::numeric_limits<double>::max();
double d_max = 0.0;
for (int i = 0; i < mtz.nreflections; ++i) {
const float v = (*col)[static_cast<std::size_t>(i)];
if (std::isnan(v))
continue;
const std::size_t row = static_cast<std::size_t>(i) * stride;
MergedReflection r;
r.h = static_cast<int32_t>(mtz.data[row + 0]);
r.k = static_cast<int32_t>(mtz.data[row + 1]);
r.l = static_cast<int32_t>(mtz.data[row + 2]);
r.I = out.squared ? v * v : v;
r.sigma = NAN;
if (cell_valid) {
r.d = static_cast<float>(gc.calculate_d({{r.h, r.k, r.l}}));
if (std::isfinite(r.d) && r.d > 0.0f) {
d_min = std::min<double>(d_min, r.d);
d_max = std::max<double>(d_max, r.d);
}
}
out.reflections.emplace_back(r);
}
if (d_max > 0.0) {
out.d_min = d_min;
out.d_max = d_max;
}
return out;
}
std::string ReferenceConsistencyWarning(const ReferenceMtzData& reference,
const std::optional<UnitCell>& data_cell,
std::optional<int> data_space_group_number) {
std::vector<std::string> issues;
if (reference.cell && data_cell) {
const auto& r = *reference.cell;
const auto& d = *data_cell;
auto rel = [](float x, float y) { return y != 0.0f ? std::abs(x - y) / std::abs(y) : 0.0f; };
const bool len_off = rel(r.a, d.a) > 0.02f || rel(r.b, d.b) > 0.02f || rel(r.c, d.c) > 0.02f;
const bool ang_off = std::abs(r.alpha - d.alpha) > 1.5f || std::abs(r.beta - d.beta) > 1.5f
|| std::abs(r.gamma - d.gamma) > 1.5f;
if (len_off || ang_off) {
std::ostringstream ss;
ss.setf(std::ios::fixed);
ss.precision(2);
ss << "unit cell differs (reference " << r.a << " " << r.b << " " << r.c << " "
<< r.alpha << " " << r.beta << " " << r.gamma << ", data " << d.a << " " << d.b
<< " " << d.c << " " << d.alpha << " " << d.beta << " " << d.gamma << ")";
issues.push_back(ss.str());
}
}
if (!reference.point_group.empty() && data_space_group_number) {
const auto* sg = gemmi::find_spacegroup_by_number(*data_space_group_number);
if (sg != nullptr && reference.point_group != sg->point_group_hm())
issues.push_back("point group differs (reference " + reference.point_group + ", data "
+ std::string(sg->point_group_hm()) + ")");
}
if (issues.empty())
return {};
std::string out = "reference may not match the data: ";
for (std::size_t i = 0; i < issues.size(); ++i)
out += (i ? "; " : "") + issues[i];
return out;
}