// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute // SPDX-License-Identifier: GPL-3.0-only #include "LoadFCalcFromMtz.h" #include #include #include #include #include #include #include #include #include namespace { // Reference intensities can come from a calculated structure factor F-model (squared to an // intensity), or from a merged/observed mean-intensity column (used directly - this is what lets // reference-based scaling be self-seeded from the data's own previous merge). column_with_one_of_labels would // hide which label matched, so the priority list is walked explicitly to record the choice. const gemmi::Mtz::Column* SelectDefaultColumn(const gemmi::Mtz& mtz, bool& square) { if (const auto* col = mtz.column_with_label("F-model", nullptr, 'F')) { square = true; return col; } for (const char* label : {"IMEAN", "I", "IOBS", "Iobs", "I-obs"}) { if (const auto* col = mtz.column_with_label(label, nullptr, 'J')) { square = false; return col; } } for (const auto& c : mtz.columns) if (c.type == 'J') { square = false; return &c; } return nullptr; } } // namespace ReferenceMtzData LoadReferenceMtz(const std::string& path, const std::optional& column) { gemmi::Mtz mtz; mtz.read_file_gz(path, true); ReferenceMtzData out; // Columns the caller may pick as reference intensities/structure factors. for (const auto& c : mtz.columns) if (c.type == 'J' || c.type == 'F') out.candidate_columns.push_back({c.label, c.type}); const gemmi::Mtz::Column* col = nullptr; if (column) { col = mtz.column_with_label(*column, nullptr); if (col == nullptr) throw std::runtime_error("Reference MTZ has no column '" + *column + "'"); out.squared = (col->type == 'F'); out.default_column = false; } else { col = SelectDefaultColumn(mtz, out.squared); if (col == nullptr) throw std::runtime_error("MTZ has no F-model or intensity (J) column to use as reference"); out.default_column = true; } out.used_column = col->label; out.used_column_type = col->type; // Cell and space group the reference was recorded in, for display and the consistency check. const gemmi::UnitCell& gc = mtz.get_cell(); const bool cell_valid = gc.a > 0 && gc.b > 0 && gc.c > 0; if (cell_valid) out.cell = UnitCell{static_cast(gc.a), static_cast(gc.b), static_cast(gc.c), static_cast(gc.alpha), static_cast(gc.beta), static_cast(gc.gamma)}; if (mtz.spacegroup != nullptr) { out.space_group_number = mtz.spacegroup->number; out.space_group_name = mtz.spacegroup->short_name(); out.point_group = mtz.spacegroup->point_group_hm(); } out.reflections.reserve(static_cast(mtz.nreflections)); const std::size_t stride = mtz.columns.size(); double d_min = std::numeric_limits::max(); double d_max = 0.0; for (int i = 0; i < mtz.nreflections; ++i) { const float v = (*col)[static_cast(i)]; if (std::isnan(v)) continue; const std::size_t row = static_cast(i) * stride; MergedReflection r; r.h = static_cast(mtz.data[row + 0]); r.k = static_cast(mtz.data[row + 1]); r.l = static_cast(mtz.data[row + 2]); r.I = out.squared ? v * v : v; r.sigma = NAN; if (cell_valid) { r.d = static_cast(gc.calculate_d({{r.h, r.k, r.l}})); if (std::isfinite(r.d) && r.d > 0.0f) { d_min = std::min(d_min, r.d); d_max = std::max(d_max, r.d); } } out.reflections.emplace_back(r); } if (d_max > 0.0) { out.d_min = d_min; out.d_max = d_max; } return out; } std::string ReferenceConsistencyWarning(const ReferenceMtzData& reference, const std::optional& data_cell, std::optional data_space_group_number) { std::vector issues; if (reference.cell && data_cell) { const auto& r = *reference.cell; const auto& d = *data_cell; auto rel = [](float x, float y) { return y != 0.0f ? std::abs(x - y) / std::abs(y) : 0.0f; }; const bool len_off = rel(r.a, d.a) > 0.02f || rel(r.b, d.b) > 0.02f || rel(r.c, d.c) > 0.02f; const bool ang_off = std::abs(r.alpha - d.alpha) > 1.5f || std::abs(r.beta - d.beta) > 1.5f || std::abs(r.gamma - d.gamma) > 1.5f; if (len_off || ang_off) { std::ostringstream ss; ss.setf(std::ios::fixed); ss.precision(2); ss << "unit cell differs (reference " << r.a << " " << r.b << " " << r.c << " " << r.alpha << " " << r.beta << " " << r.gamma << ", data " << d.a << " " << d.b << " " << d.c << " " << d.alpha << " " << d.beta << " " << d.gamma << ")"; issues.push_back(ss.str()); } } if (!reference.point_group.empty() && data_space_group_number) { const auto* sg = gemmi::find_spacegroup_by_number(*data_space_group_number); if (sg != nullptr && reference.point_group != sg->point_group_hm()) issues.push_back("point group differs (reference " + reference.point_group + ", data " + std::string(sg->point_group_hm()) + ")"); } if (issues.empty()) return {}; std::string out = "reference may not match the data: "; for (std::size_t i = 0; i < issues.size(); ++i) out += (i ? "; " : "") + issues[i]; return out; }