e4b3064254
Reference dataset (a): LoadReferenceMtz adds column selection + cell/SG/resolution + a data-vs-reference consistency check; jfjoch_process/jfjoch_scale gain --reference-column; the viewer gets a Reference section in the MX settings dock (worker-owned, independent of the loaded dataset) that flows into reprocessing jobs. 3D combine (-P rot3d): Combine3D weight-sums a reflection's per-frame partials into one counting-limited full before merging (orthogonal ScalingSettings::combine_3d flag, not a partiality model), with a de-biased Poisson variance. Crystal 2: ISa 1.7->8.4, R-meas ~67%->18.9%, intensities unchanged (CCref held). Quality metrics (b): R-meas (Diederichs-Karplus) + redundancy columns in MergeStats; ISa logged. jfjoch fulls 18.9% vs XDS 4.5% (same ASU/run). Profile-fit integrator (experimental): ProfileIntegrate2D (--integrator gaussian|empirical) is a reference-free, rot3d-compatible profile-fit extraction (the decomposed PixelRefine intensity step). Gaussian: R-meas 18.9->14.6%, ISa ->9.5. Anisotropy/per-region add nothing (the discriminating info is in the discarded rocking direction). See NEXTGEN_INTEGRATOR.md. --dump-observations exports the unmerged fulls for XDS comparison. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
414 lines
18 KiB
C++
414 lines
18 KiB
C++
// SPDX-FileCopyrightText: 2024 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
|
|
// SPDX-License-Identifier: GPL-3.0-only
|
|
|
|
#include <iostream>
|
|
#include <vector>
|
|
#include <string>
|
|
#include <future>
|
|
#include <chrono>
|
|
#include <fstream>
|
|
#include <getopt.h>
|
|
|
|
#include "../reader/JFJochHDF5Reader.h"
|
|
#include "../common/Logger.h"
|
|
#include "../common/DiffractionExperiment.h"
|
|
#include "../common/time_utc.h"
|
|
#include "../common/print_license.h"
|
|
#include "../image_analysis/MXAnalysisWithoutFPGA.h"
|
|
#include "../writer/FileWriter.h"
|
|
#include "../image_analysis/IndexAndRefine.h"
|
|
#include "../common/JFJochReceiverPlots.h"
|
|
#include "../compression/JFJochCompressor.h"
|
|
#include "../image_analysis/LoadFCalcFromMtz.h"
|
|
#include "../image_analysis/scale_merge/Merge.h"
|
|
#include "../image_analysis/scale_merge/SearchSpaceGroup.h"
|
|
#include "../image_analysis/scale_merge/Combine3D.h"
|
|
#include "../image_analysis/WriteReflections.h"
|
|
#include "../image_analysis/UpdateReflectionResolution.h"
|
|
|
|
void print_usage() {
|
|
std::cout << "Usage ./jfjoch_scale {<options>} <input.h5>" << std::endl;
|
|
std::cout << "Options:" << std::endl;
|
|
std::cout << " -o, --output-prefix <txt> Output file prefix (default: output)" << std::endl;
|
|
std::cout << " -N, --threads <num> Number of threads (default: 1)" << std::endl;
|
|
std::cout << " -s, --start-image <num> Start image number (default: 0)" << std::endl;
|
|
std::cout << " -e, --end-image <num> End image number (default: all)" << std::endl;
|
|
std::cout << " -v, --verbose Verbose output" << std::endl;
|
|
std::cout << "" << std::endl;
|
|
std::cout << " Scaling and merging" << std::endl;
|
|
std::cout << " -S, --space-group <num> Space group number" << std::endl;
|
|
std::cout << " --scaling-high-resolution <num> High resolution limit for scaling/merging (default: 0.0; no limit)"
|
|
<< std::endl;
|
|
std::cout << " -P, --partiality <txt> Partiality model fixed|rot|rot3d|unity (default: fixed); rot3d = rot + 3D combine" <<
|
|
std::endl;
|
|
std::cout << " -A, --anomalous Anomalous mode (don't merge Friedel pairs)" << std::endl;
|
|
std::cout << " -B, --refine-bfactor Refine per image B-factor" << std::endl;
|
|
std::cout << " -w, --wedge[=num] Refine image wedge during scaling with starting wedge value"
|
|
<< std::endl;
|
|
std::cout << " --min-partiality <num> Minimum partiality to accept reflection (default: 0.02)" <<
|
|
std::endl;
|
|
std::cout << " --min-image-cc <num> Per-image CC limit in percent (default: no limit)" << std::endl;
|
|
std::cout << " --scaling-iterations <num> Number of scaling iterations with no reference data (default: 3)"
|
|
<< std::endl;
|
|
std::cout << " --scaling-output <txt> Output format for scaling results mtz|cif|txt (default: mtz)" << std::endl;
|
|
std::cout << " -z, --reference-mtz <file> Reference MTZ file" << std::endl;
|
|
std::cout << " --reference-column <label> Reference MTZ column to use (default: auto - F-model, else IMEAN/I/...)" << std::endl;
|
|
}
|
|
|
|
enum {
|
|
OPT_MIN_PARTIALITY = 1000,
|
|
OPT_MIN_IMAGE_CC,
|
|
OPT_SCALING_ITERATIONS,
|
|
OPT_SCALING_OUTPUT,
|
|
OPT_SCALING_HIGH_RESOLUTION,
|
|
OPT_REFERENCE_COLUMN
|
|
};
|
|
|
|
|
|
static option long_options[] = {
|
|
{"verbose", no_argument, nullptr, 'v'},
|
|
{"output-prefix", required_argument, nullptr, 'o'},
|
|
{"threads", required_argument, nullptr, 'N'},
|
|
{"start-image", required_argument, nullptr, 's'},
|
|
{"end-image", required_argument, nullptr, 'e'},
|
|
{"reference-mtz", required_argument, nullptr, 'z'},
|
|
{"reference-column", required_argument, nullptr, OPT_REFERENCE_COLUMN},
|
|
{"space-group", required_argument, nullptr, 'S'},
|
|
{"partiality", required_argument, nullptr, 'P'},
|
|
{"anomalous", no_argument, nullptr, 'A'},
|
|
{"refine-bfactor", no_argument, nullptr, 'B'},
|
|
{"wedge", optional_argument, nullptr, 'w'},
|
|
{"scaling-high-resolution", required_argument, nullptr, OPT_SCALING_HIGH_RESOLUTION},
|
|
{"min-partiality", required_argument, nullptr, OPT_MIN_PARTIALITY},
|
|
{"min-image-cc", required_argument, nullptr, OPT_MIN_IMAGE_CC},
|
|
{"scaling-iterations", required_argument, nullptr, OPT_SCALING_ITERATIONS},
|
|
{"scaling-output", required_argument, nullptr, OPT_SCALING_OUTPUT},
|
|
{nullptr, 0, nullptr, 0}
|
|
};
|
|
|
|
int main(int argc, char **argv) {
|
|
RegisterHDF5Filter();
|
|
|
|
print_license("jfjoch_scale");
|
|
|
|
Logger logger("jfjoch_scale");
|
|
|
|
std::string input_file;
|
|
std::string output_prefix = "output";
|
|
int nthreads = 0;
|
|
int start_image = 0;
|
|
int end_image = -1; // -1 indicates process until end
|
|
bool verbose = false;
|
|
bool anomalous_mode = false;
|
|
std::optional<int64_t> space_group_number;
|
|
bool refine_bfactor = false;
|
|
bool refine_wedge = false;
|
|
std::optional<double> wedge_for_scaling;
|
|
std::string ref_mtz;
|
|
std::string ref_column;
|
|
double min_partiality = 0.02;
|
|
double min_image_cc = 0.0;
|
|
int64_t scaling_iter = 3;
|
|
|
|
IntensityFormat intensity_format = IntensityFormat::MTZ;
|
|
PartialityModel partiality_model = PartialityModel::Fixed;
|
|
bool combine_3d = false; // -P rot3d: weight-sum per-frame partials into fulls before merging
|
|
|
|
std::optional<float> d_min_scale_merge;
|
|
|
|
if (argc == 1) {
|
|
print_usage();
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
int opt;
|
|
int option_index = 0;
|
|
const char *short_opts = "vo:N:s:e:z:S:P:ABw::";
|
|
|
|
while ((opt = getopt_long(argc, argv, short_opts, long_options, &option_index)) != -1) {
|
|
switch (opt) {
|
|
case 'o':
|
|
output_prefix = optarg;
|
|
break;
|
|
case 'v':
|
|
verbose = true;
|
|
break;
|
|
case 'N':
|
|
nthreads = atoi(optarg);
|
|
break;
|
|
case 's':
|
|
start_image = atoi(optarg);
|
|
break;
|
|
case 'e':
|
|
end_image = atoi(optarg);
|
|
break;
|
|
case 'z':
|
|
ref_mtz = optarg;
|
|
break;
|
|
case OPT_REFERENCE_COLUMN:
|
|
ref_column = optarg;
|
|
break;
|
|
case 'S':
|
|
space_group_number = atoi(optarg);
|
|
break;
|
|
case 'P':
|
|
if (strcmp(optarg, "unity") == 0)
|
|
partiality_model = PartialityModel::Unity;
|
|
else if (strcmp(optarg, "fixed") == 0)
|
|
partiality_model = PartialityModel::Fixed;
|
|
else if (strcmp(optarg, "rot") == 0)
|
|
partiality_model = PartialityModel::Rotation;
|
|
else if (strcmp(optarg, "rot3d") == 0) {
|
|
partiality_model = PartialityModel::Rotation;
|
|
combine_3d = true;
|
|
}
|
|
else {
|
|
logger.Error("Invalid partiality mode: {}", optarg);
|
|
print_usage();
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
break;
|
|
case 'A':
|
|
anomalous_mode = true;
|
|
break;
|
|
case 'B':
|
|
refine_bfactor = true;
|
|
break;
|
|
case 'w':
|
|
refine_wedge = true;
|
|
if (optarg)
|
|
wedge_for_scaling = std::stod(optarg);
|
|
break;
|
|
case OPT_SCALING_HIGH_RESOLUTION:
|
|
d_min_scale_merge = atof(optarg);
|
|
logger.Info("High resolution limit for scaling/merging set to {:.2f} A", d_min_scale_merge.value());
|
|
break;
|
|
case OPT_MIN_PARTIALITY:
|
|
min_partiality = std::stod(optarg);
|
|
break;
|
|
case OPT_MIN_IMAGE_CC:
|
|
min_image_cc = std::stod(optarg);
|
|
break;
|
|
case OPT_SCALING_OUTPUT:
|
|
if (strcmp(optarg, "mtz") == 0) {
|
|
intensity_format = IntensityFormat::MTZ;
|
|
} else if (strcmp(optarg, "cif") == 0) {
|
|
intensity_format = IntensityFormat::mmCIF;
|
|
} else if (strcmp(optarg, "txt") == 0) {
|
|
intensity_format = IntensityFormat::Text;
|
|
} else {
|
|
logger.Error("Invalid intensity format: {}", optarg);
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
break;
|
|
case OPT_SCALING_ITERATIONS:
|
|
scaling_iter = atoi(optarg);
|
|
if (scaling_iter <= 0) {
|
|
logger.Error("Invalid scaling iteration count: {}", scaling_iter);
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Validate space group number early
|
|
const gemmi::SpaceGroup *space_group = nullptr;
|
|
if (space_group_number.has_value()) {
|
|
space_group = gemmi::find_spacegroup_by_number(space_group_number.value());
|
|
if (!space_group) {
|
|
logger.Error("Unknown space group number {}", space_group_number.value());
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
logger.Info("Using space group {} (number {})", space_group->hm, space_group_number.value());
|
|
}
|
|
|
|
std::vector<IntegrationOutcome> reflections;
|
|
std::vector<float> mosaicity;
|
|
|
|
if (optind >= argc) {
|
|
logger.Error("Input file not specified");
|
|
print_usage();
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
logger.Verbose(verbose);
|
|
logger.Info("Loading reflections");
|
|
|
|
// 1. Read Input files
|
|
JFJochHDF5Reader reader;
|
|
std::shared_ptr<const JFJochReaderDataset> dataset;
|
|
for (int i = optind; i < argc; i++) {
|
|
input_file = argv[optind];
|
|
try {
|
|
reader.ReadFile(input_file);
|
|
auto tmp_dataset = reader.GetDataset();
|
|
if (i == optind)
|
|
dataset = tmp_dataset;
|
|
|
|
uint64_t total_images_in_file = reader.GetNumberOfImages();
|
|
if (end_image < 0 || end_image >= total_images_in_file)
|
|
end_image = total_images_in_file - 1;
|
|
|
|
size_t mosaicity_size = mosaicity.size();
|
|
mosaicity.resize(mosaicity_size + end_image - start_image + 1);
|
|
for (int i = 0; i < end_image - start_image + 1; i++)
|
|
mosaicity[mosaicity_size + i] = dataset->mosaicity_deg[start_image + i];
|
|
|
|
reflections = reader.ReadReflections(start_image, end_image);
|
|
logger.Info("Loaded dataset from {}", input_file);
|
|
} catch (const std::exception &e) {
|
|
logger.Error("Error reading input file: {}", e.what());
|
|
// Hard exit is only for the first file
|
|
if (i == optind)
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
}
|
|
|
|
std::vector<MergedReflection> reference_data;
|
|
if (!ref_mtz.empty()) {
|
|
try {
|
|
const auto reference = LoadReferenceMtz(
|
|
ref_mtz, ref_column.empty() ? std::nullopt : std::optional<std::string>(ref_column));
|
|
reference_data = reference.reflections;
|
|
|
|
logger.Info("Loaded {} reference reflections from {} (column {}{}{})",
|
|
reference_data.size(), ref_mtz, reference.used_column,
|
|
reference.squared ? ", squared to intensity" : "",
|
|
reference.default_column ? ", auto-selected" : ", user-specified");
|
|
if (reference.d_max > 0.0)
|
|
logger.Info("Reference resolution range {:.2f} - {:.2f} A", reference.d_max, reference.d_min);
|
|
if (reference.cell.has_value())
|
|
logger.Info("Reference unit cell: a={:.3f} b={:.3f} c={:.3f} alpha={:.2f} beta={:.2f} gamma={:.2f}",
|
|
reference.cell->a, reference.cell->b, reference.cell->c,
|
|
reference.cell->alpha, reference.cell->beta, reference.cell->gamma);
|
|
if (!reference.space_group_name.empty())
|
|
logger.Info("Reference space group: {} (number {})",
|
|
reference.space_group_name, reference.space_group_number.value_or(0));
|
|
|
|
const auto data_sg = space_group_number.has_value()
|
|
? std::optional<int>(static_cast<int>(*space_group_number))
|
|
: (dataset->experiment.GetSpaceGroupNumber().has_value()
|
|
? std::optional<int>(static_cast<int>(*dataset->experiment.GetSpaceGroupNumber()))
|
|
: std::nullopt);
|
|
const auto warning = ReferenceConsistencyWarning(reference, dataset->experiment.GetUnitCell(), data_sg);
|
|
if (!warning.empty())
|
|
logger.Warning("{}", warning);
|
|
} catch (const std::exception &e) {
|
|
logger.Error("Error reading reference MTZ {}: {}", ref_mtz, e.what());
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
}
|
|
|
|
logger.Info("Starting analysis of {} images", reflections.size());
|
|
|
|
// 2. Setup Experiment & Components
|
|
DiffractionExperiment experiment(dataset->experiment);
|
|
experiment.BitDepthImage(32).Compression(CompressionAlgorithm::BSHUF_LZ4);
|
|
experiment.FilePrefix(output_prefix);
|
|
experiment.Mode(DetectorMode::Standard); // Ensure full image analysis
|
|
experiment.PixelSigned(true);
|
|
experiment.OverwriteExistingFiles(true);
|
|
experiment.PolarizationFactor(0.99);
|
|
experiment.SetFileWriterFormat(FileWriterFormat::NXmxLegacy);
|
|
experiment.SpaceGroupNumber(space_group_number);
|
|
experiment.NumTriggers(1);
|
|
|
|
ScalingSettings scaling_settings;
|
|
scaling_settings.SetPartialityModel(partiality_model);
|
|
scaling_settings.Combine3D(combine_3d);
|
|
if (d_min_scale_merge)
|
|
scaling_settings.HighResolutionLimit_A(d_min_scale_merge.value());
|
|
scaling_settings.MergeFriedel(!anomalous_mode);
|
|
scaling_settings.RefineB(refine_bfactor);
|
|
scaling_settings.RefineRotationWedge(refine_wedge);
|
|
if (wedge_for_scaling.has_value())
|
|
scaling_settings.RotationWedgeForScaling(wedge_for_scaling);
|
|
scaling_settings.MinPartiality(min_partiality);
|
|
scaling_settings.MinCCForImage(min_image_cc);
|
|
scaling_settings.FileFormat(intensity_format);
|
|
|
|
experiment.ImportScalingSettings(scaling_settings);
|
|
|
|
if (!experiment.GetUnitCell()) {
|
|
logger.Error("Experiment unit cell not found, cannot update reflection resolution");
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
auto refl_stats = UpdateReflectionResolution(experiment.GetUnitCell().value(), reflections);
|
|
|
|
logger.Info("Read {} reflections from {} images", refl_stats.n_reflections, refl_stats.n_images);
|
|
|
|
experiment.ImagesPerTrigger(refl_stats.n_images);
|
|
|
|
auto scale_start = std::chrono::steady_clock::now();
|
|
for (int i = 0; i < scaling_iter; i++) {
|
|
auto iter_start = std::chrono::steady_clock::now();
|
|
|
|
if (reference_data.empty()) {
|
|
auto merged = MergeAll(experiment, reflections);
|
|
ScaleOnTheFly scaling(experiment, merged);
|
|
scaling.Scale(reflections, nthreads);
|
|
} else {
|
|
ScaleOnTheFly scaling(experiment, reference_data);
|
|
scaling.Scale(reflections, nthreads);
|
|
}
|
|
|
|
// scale_result.SaveToFile(output_prefix + "_iter" + std::to_string(i) + "_scale.dat");
|
|
auto iter_end = std::chrono::steady_clock::now();
|
|
double iter_time = std::chrono::duration<double>(iter_end - iter_start).count();
|
|
logger.Info("Scaling iteration {} took {:.3f} seconds", i, iter_time);
|
|
}
|
|
|
|
auto scale_end = std::chrono::steady_clock::now();
|
|
double scale_time = std::chrono::duration<double>(scale_end - scale_start).count();
|
|
logger.Info("Scaling completed in {:.2f} s", scale_time);
|
|
|
|
auto merge_start = std::chrono::steady_clock::now();
|
|
|
|
// -P rot3d: weight-sum each reflection's per-frame partials into one full before merging.
|
|
const bool rot3d = combine_3d;
|
|
std::vector<IntegrationOutcome> combined;
|
|
if (rot3d)
|
|
combined = CombineRotationObservations(reflections, experiment, &logger);
|
|
const std::vector<IntegrationOutcome> &merge_input = rot3d ? combined : reflections;
|
|
|
|
MergeOnTheFly merge_engine(experiment);
|
|
merge_engine.ReferenceCell(experiment.GetUnitCell());
|
|
for (auto &i : merge_input)
|
|
merge_engine.AddImage(i);
|
|
|
|
auto merged_reflections = merge_engine.ExportReflections();
|
|
auto merged_statistics = merge_engine.MergeStats(merged_reflections, merge_input, reference_data);
|
|
|
|
auto merge_end = std::chrono::steady_clock::now();
|
|
double merge_time = std::chrono::duration<double>(merge_end - merge_start).count();
|
|
|
|
logger.Info("Merge completed in {:.2f} s ({} unique reflections)", merge_time, merged_reflections.size());
|
|
|
|
// Print resolution-shell statistics table
|
|
std::cout << merged_statistics;
|
|
|
|
const bool fixed_space_group = space_group || experiment.GetGemmiSpaceGroup().has_value();
|
|
|
|
if (!fixed_space_group) {
|
|
logger.Info("Searching for space group from P1-merged reflections ...");
|
|
|
|
SearchSpaceGroupOptions sg_opts;
|
|
sg_opts.crystal_system.reset();
|
|
sg_opts.centering = '\0';
|
|
sg_opts.merge_friedel = experiment.GetScalingSettings().GetMergeFriedel();
|
|
sg_opts.d_min_limit_A = experiment.GetScalingSettings().GetHighResolutionLimit_A().value_or(0.0);
|
|
sg_opts.min_i_over_sigma = 0.0;
|
|
sg_opts.min_operator_cc = 0.80;
|
|
sg_opts.min_pairs_per_operator = 20;
|
|
sg_opts.min_total_compared = 100;
|
|
sg_opts.test_systematic_absences = true;
|
|
|
|
const auto sg_search = SearchSpaceGroup(merged_reflections, sg_opts);
|
|
std::cout << std::endl << SearchSpaceGroupResultToText(sg_search) << std::endl;
|
|
}
|
|
|
|
if (!output_prefix.empty())
|
|
WriteReflections(merged_reflections, *experiment.GetUnitCell(), experiment, output_prefix);
|
|
}
|