38ea0ec237
On the LysozymeJet5 serial stills the default Gaussian profile-fit integrator (ProfileIntegrate2D) + reference scaling matched or beat whole-PixelRefine on every per-shell CC1/2 (overall 95.7% vs 91.9%), ISa (1.6 vs 1.2) and R-meas (98.5% vs 175%), with CCref a tie -- so PixelRefine has no remaining advantage. Reference-based per-image scaling is integrator-agnostic (IndexAndRefine::ReferenceIntensities builds a ScaleOnTheFly(experiment, reference) applied to any integrator's output), so the reference-dataset feature (CCref + reference scaling) is kept. Delete image_analysis/pixel_refinement/, GeomRefinementAlgorithmEnum:: PixelRefine and its gates, BraggIntegrationSettings::ProfileMultiplier (PixelRefine-only; R1 is shared and kept), and the -r pixelrefine / --profile-multiplier CLI. The inherited lessons (mean background, de-biased variance, tight-profile-loses / centroid floor, R-refinement futile) are folded into NEXTGEN_INTEGRATOR.md. NOTE: this transiently breaks the viewer build -- the committed viewer still references the removed enum and ProfileMultiplier. It is fixed in the next commit (the viewer feature work), held separate while the viewer UI is being tested. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
161 lines
6.0 KiB
C++
161 lines
6.0 KiB
C++
// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
|
|
// SPDX-License-Identifier: GPL-3.0-only
|
|
|
|
#include "LoadFCalcFromMtz.h"
|
|
|
|
#include <cmath>
|
|
#include <cstdint>
|
|
#include <limits>
|
|
#include <sstream>
|
|
#include <stdexcept>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
#include <gemmi/mtz.hpp>
|
|
#include <gemmi/symmetry.hpp>
|
|
|
|
namespace {
|
|
|
|
// Reference intensities can come from a calculated structure factor F-model (squared to an
|
|
// intensity), or from a merged/observed mean-intensity column (used directly - this is what lets
|
|
// reference-based scaling be self-seeded from the data's own previous merge). column_with_one_of_labels would
|
|
// hide which label matched, so the priority list is walked explicitly to record the choice.
|
|
const gemmi::Mtz::Column* SelectDefaultColumn(const gemmi::Mtz& mtz, bool& square) {
|
|
if (const auto* col = mtz.column_with_label("F-model", nullptr, 'F')) {
|
|
square = true;
|
|
return col;
|
|
}
|
|
for (const char* label : {"IMEAN", "I", "IOBS", "Iobs", "I-obs"}) {
|
|
if (const auto* col = mtz.column_with_label(label, nullptr, 'J')) {
|
|
square = false;
|
|
return col;
|
|
}
|
|
}
|
|
for (const auto& c : mtz.columns)
|
|
if (c.type == 'J') {
|
|
square = false;
|
|
return &c;
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
ReferenceMtzData LoadReferenceMtz(const std::string& path,
|
|
const std::optional<std::string>& column) {
|
|
gemmi::Mtz mtz;
|
|
mtz.read_file_gz(path, true);
|
|
|
|
ReferenceMtzData out;
|
|
|
|
// Columns the caller may pick as reference intensities/structure factors.
|
|
for (const auto& c : mtz.columns)
|
|
if (c.type == 'J' || c.type == 'F')
|
|
out.candidate_columns.push_back({c.label, c.type});
|
|
|
|
const gemmi::Mtz::Column* col = nullptr;
|
|
if (column) {
|
|
col = mtz.column_with_label(*column, nullptr);
|
|
if (col == nullptr)
|
|
throw std::runtime_error("Reference MTZ has no column '" + *column + "'");
|
|
out.squared = (col->type == 'F');
|
|
out.default_column = false;
|
|
} else {
|
|
col = SelectDefaultColumn(mtz, out.squared);
|
|
if (col == nullptr)
|
|
throw std::runtime_error("MTZ has no F-model or intensity (J) column to use as reference");
|
|
out.default_column = true;
|
|
}
|
|
out.used_column = col->label;
|
|
out.used_column_type = col->type;
|
|
|
|
// Cell and space group the reference was recorded in, for display and the consistency check.
|
|
const gemmi::UnitCell& gc = mtz.get_cell();
|
|
const bool cell_valid = gc.a > 0 && gc.b > 0 && gc.c > 0;
|
|
if (cell_valid)
|
|
out.cell = UnitCell{static_cast<float>(gc.a), static_cast<float>(gc.b), static_cast<float>(gc.c),
|
|
static_cast<float>(gc.alpha), static_cast<float>(gc.beta),
|
|
static_cast<float>(gc.gamma)};
|
|
if (mtz.spacegroup != nullptr) {
|
|
out.space_group_number = mtz.spacegroup->number;
|
|
out.space_group_name = mtz.spacegroup->short_name();
|
|
out.point_group = mtz.spacegroup->point_group_hm();
|
|
}
|
|
|
|
out.reflections.reserve(static_cast<std::size_t>(mtz.nreflections));
|
|
|
|
const std::size_t stride = mtz.columns.size();
|
|
double d_min = std::numeric_limits<double>::max();
|
|
double d_max = 0.0;
|
|
|
|
for (int i = 0; i < mtz.nreflections; ++i) {
|
|
const float v = (*col)[static_cast<std::size_t>(i)];
|
|
if (std::isnan(v))
|
|
continue;
|
|
|
|
const std::size_t row = static_cast<std::size_t>(i) * stride;
|
|
MergedReflection r;
|
|
r.h = static_cast<int32_t>(mtz.data[row + 0]);
|
|
r.k = static_cast<int32_t>(mtz.data[row + 1]);
|
|
r.l = static_cast<int32_t>(mtz.data[row + 2]);
|
|
r.I = out.squared ? v * v : v;
|
|
r.sigma = NAN;
|
|
|
|
if (cell_valid) {
|
|
r.d = static_cast<float>(gc.calculate_d({{r.h, r.k, r.l}}));
|
|
if (std::isfinite(r.d) && r.d > 0.0f) {
|
|
d_min = std::min<double>(d_min, r.d);
|
|
d_max = std::max<double>(d_max, r.d);
|
|
}
|
|
}
|
|
|
|
out.reflections.emplace_back(r);
|
|
}
|
|
|
|
if (d_max > 0.0) {
|
|
out.d_min = d_min;
|
|
out.d_max = d_max;
|
|
}
|
|
|
|
return out;
|
|
}
|
|
|
|
std::string ReferenceConsistencyWarning(const ReferenceMtzData& reference,
|
|
const std::optional<UnitCell>& data_cell,
|
|
std::optional<int> data_space_group_number) {
|
|
std::vector<std::string> issues;
|
|
|
|
if (reference.cell && data_cell) {
|
|
const auto& r = *reference.cell;
|
|
const auto& d = *data_cell;
|
|
auto rel = [](float x, float y) { return y != 0.0f ? std::abs(x - y) / std::abs(y) : 0.0f; };
|
|
const bool len_off = rel(r.a, d.a) > 0.02f || rel(r.b, d.b) > 0.02f || rel(r.c, d.c) > 0.02f;
|
|
const bool ang_off = std::abs(r.alpha - d.alpha) > 1.5f || std::abs(r.beta - d.beta) > 1.5f
|
|
|| std::abs(r.gamma - d.gamma) > 1.5f;
|
|
if (len_off || ang_off) {
|
|
std::ostringstream ss;
|
|
ss.setf(std::ios::fixed);
|
|
ss.precision(2);
|
|
ss << "unit cell differs (reference " << r.a << " " << r.b << " " << r.c << " "
|
|
<< r.alpha << " " << r.beta << " " << r.gamma << ", data " << d.a << " " << d.b
|
|
<< " " << d.c << " " << d.alpha << " " << d.beta << " " << d.gamma << ")";
|
|
issues.push_back(ss.str());
|
|
}
|
|
}
|
|
|
|
if (!reference.point_group.empty() && data_space_group_number) {
|
|
const auto* sg = gemmi::find_spacegroup_by_number(*data_space_group_number);
|
|
if (sg != nullptr && reference.point_group != sg->point_group_hm())
|
|
issues.push_back("point group differs (reference " + reference.point_group + ", data "
|
|
+ std::string(sg->point_group_hm()) + ")");
|
|
}
|
|
|
|
if (issues.empty())
|
|
return {};
|
|
|
|
std::string out = "reference may not match the data: ";
|
|
for (std::size_t i = 0; i < issues.size(); ++i)
|
|
out += (i ? "; " : "") + issues[i];
|
|
return out;
|
|
}
|