Files
Jungfraujoch/tools/jfjoch_process.cpp
T
leonarski_f c84d581d3e Remove the global (joint) scaling path
Profiling showed the joint Ceres solve took ~120 s versus ~3.5 s for the
alternating per-image scaling loop (~35x) for no quality gain (HEWL
anomalous 0.54x vs 0.53x), so it is not worth keeping. Drop
GlobalScale.{h,cpp}, the jfjoch_process --global-scale flag, and
ScalingSettings::GlobalScaling.

While here, in the same scaling/process area: fold scale_fulls into
ScalingSettings (alongside combine_3d) so the CLI and experiment carry it
uniformly, add per-substep [timing] logging to the scaling/merge post-pass
(including the serial MergeAll vs parallel ScaleAllImages split), and carry
structured MergeStatistics + ISa in ProcessResult.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-25 20:43:04 +02:00

839 lines
35 KiB
C++

// SPDX-FileCopyrightText: 2024 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
// SPDX-License-Identifier: GPL-3.0-only
#include <algorithm>
#include <atomic>
#include <csignal>
#include <getopt.h>
#include <iostream>
#include <sstream>
#include <string>
#include <vector>
#include "../reader/JFJochHDF5Reader.h"
#include "../common/Logger.h"
#include "../common/DiffractionExperiment.h"
#include "../common/PixelMask.h"
#include "../common/print_license.h"
#include "../image_analysis/LoadFCalcFromMtz.h"
#include "../process/JFJochProcess.h"
void print_usage() {
std::cout << "Usage jfjoch_process {<options>} <input.h5>" << std::endl;
std::cout << "Options:" << std::endl;
std::cout << " -o, --output-prefix <txt> Output file prefix (default: output)" << std::endl;
std::cout << " -N, --threads <num> Number of threads (default: 1)" << std::endl;
std::cout << " -s, --start-image <num> Start image number (default: 0)" << std::endl;
std::cout << " -e, --end-image <num> End image number (default: all)" << std::endl;
std::cout << " -t, --stride <num> Image stride (default: 1)" << std::endl;
std::cout << " -v, --verbose Verbose output" << std::endl;
std::cout << std::endl;
std::cout << " Spot finding" << std::endl;
std::cout << " --spot-sigma <num> Noise sigma level for spot finding (default: 3.0)" << std::endl;
std::cout << " --spot-threshold <num> Photon count threshold for spot finding (default: 10)" << std::endl;
std::cout << " --spot-high-resolution <num> High resolution limit for spot finding (default: 1.5)" << std::endl;
std::cout << " --max-spots <num> Max spot count (default: 250)" << std::endl;
std::cout << std::endl;
std::cout << " Indexing" << std::endl;
std::cout << " -R, --two-pass-rotation[=num] Two-pass offline rotation indexing (optional: number of images, default: 100)" << std::endl;
std::cout << " --single-pass-rotation[=num] Use online-like single-pass rotation indexing (optional: min angular range deg)" << std::endl;
std::cout << " --redo-rotation-spots Redo spot finding for two-pass rotation indexing" << std::endl;
std::cout << " --force-rotation-lattice <vec> Force rotation indexer with external lattice (in Angstrom) : \"a0x,a0y,a0z,a1x,a1y,a1z,a2x,a2y,a2z\" (9 floats, skips first pass)" << std::endl;
std::cout << " -X, --indexing-algorithm <txt> Indexing algorithm (FFBIDX|FFT|FFTW|Auto|None)" << std::endl;
std::cout << " -S, --space-group <num> Space group number - used for both indexing and scaling" << std::endl;
std::cout << " -C, --unit-cell <cell> Fix reference unit cell: \"a,b,c,alpha,beta,gamma\"" << std::endl;
std::cout << " -r, --refine <txt> Geometry refinement algorithm (none|orientation|beam_and_lattice|pixelrefine)" << std::endl;
std::cout << std::endl;
std::cout << " Scaling and merging" << std::endl;
std::cout << " -M, --scale-merge Scale and merge (refine mosaicity) and write scaled.hkl + image.dat" << std::endl;
std::cout << " --scale-fulls After -P rot3d combine, refit a per-frame scale on the fulls (XDS order, Unity model); implies -M" << std::endl;
std::cout << " -P, --partiality <txt> Partiality model fixed|rot|rot3d|unity (default: fixed). rot3d = rot + 3D combine of per-frame partials" << std::endl;
std::cout << " -A, --anomalous Anomalous mode (don't merge Friedel pairs)" << std::endl;
std::cout << " -B, --refine-bfactor Refine per image B-factor" << std::endl;
std::cout << " -w, --wedge[=num] Refine image wedge during scaling with starting wedge value" << std::endl;
std::cout << " --scaling-high-resolution <num> High resolution limit for spot finding (default: no limit)" << std::endl;
std::cout << " --min-partiality <num> Minimum partiality to accept reflection (default: 0.02)" << std::endl;
std::cout << " --reject-outliers <num> Per-observation merge outlier rejection, N sigma from the per-reflection median (default: off; e.g. 6, XDS/DIALS-style)" << std::endl;
std::cout << " --reject-delta-cchalf <num> Per-crystal CC1/2-delta rejection: drop images with deltaCChalf below mean - N*stddev (default: off; e.g. 2.5)" << std::endl;
std::cout << " --min-image-cc <num> Per-image CC limit in percent (default: no limit)" << std::endl;
std::cout << " --scaling-iterations <num> Number of scaling iterations with no reference data (default: 3)" << std::endl;
std::cout << " --scaling-output <txt> Output format for scaling results mtz|cif|txt (default: mtz)" << std::endl;
std::cout << " -z, --reference-mtz <file> Reference MTZ file" << std::endl;
std::cout << " --reference-column <label> Reference MTZ column to use (default: auto - F-model, else IMEAN/I/...)" << std::endl;
std::cout << std::endl;
std::cout << " Pixel refinement (experimental, select via -r pixelrefine, needs --reference-mtz)" << std::endl;
std::cout << " --bandwidth <num> Relative X-ray bandwidth FWHM (e.g. 0.01 for 1% DMM); default from file or 0" << std::endl;
std::cout << " --integration-radius <r> Signal-box radius r1, or r1,r2,r3 (px). One value => r2=r1+2, r3=r1+4" << std::endl;
std::cout << " --integrator <txt> Spot integrator boxsum|gaussian|empirical (default: boxsum; experimental profile-fit)" << std::endl;
std::cout << " --profile-multiplier <num> PixelRefine: scale the measured tangential profile width R1 (default: 6; XDS-style generous aperture)" << std::endl;
}
enum {
OPT_SPOT_SIGMA = 1000,
OPT_SPOT_THRESHOLD,
OPT_SPOT_RESOLUTION,
OPT_MAX_SPOTS,
OPT_MIN_PARTIALITY,
OPT_MIN_IMAGE_CC,
OPT_SCALING_ITERATIONS,
OPT_SCALING_HIGH_RESOLUTION,
OPT_SCALING_OUTPUT,
OPT_SINGLE_PASS_ROTATION,
OPT_REDO_ROTATION_SPOTS,
OPT_FORCE_ROTATION_LATTICE,
OPT_BANDWIDTH,
OPT_INTEGRATION_RADIUS,
OPT_REJECT_OUTLIERS,
OPT_PROFILE_MULTIPLIER,
OPT_REJECT_DELTA_CCHALF,
OPT_REFERENCE_COLUMN,
OPT_DUMP_OBSERVATIONS,
OPT_INTEGRATOR,
OPT_SCALE_FULLS
};
static option long_options[] = {
{"verbose", no_argument, nullptr, 'v'},
{"output-prefix", required_argument, nullptr, 'o'},
{"threads", required_argument, nullptr, 'N'},
{"start-image", required_argument, nullptr, 's'},
{"end-image", required_argument, nullptr, 'e'},
{"stride", required_argument, nullptr, 't'},
{"indexing-algorithm", required_argument, nullptr, 'X'},
{"unit-cell", required_argument, nullptr, 'C'},
{"reference-mtz", required_argument, nullptr, 'z'},
{"reference-column", required_argument, nullptr, OPT_REFERENCE_COLUMN},
{"dump-observations", required_argument, nullptr, OPT_DUMP_OBSERVATIONS},
{"space-group", required_argument, nullptr, 'S'},
{"partiality", required_argument, nullptr, 'P'},
{"anomalous", no_argument, nullptr, 'A'},
{"refine-bfactor", no_argument, nullptr, 'B'},
{"wedge", optional_argument, nullptr, 'w'},
{"scale-merge", no_argument, nullptr, 'M'},
{"scale-fulls", no_argument, nullptr, OPT_SCALE_FULLS},
{"refine", required_argument, nullptr, 'r'},
{"two-pass-rotation", optional_argument, nullptr, 'R'},
{"single-pass-rotation", optional_argument, nullptr, OPT_SINGLE_PASS_ROTATION},
{"redo-rotation-spots", no_argument, nullptr, OPT_REDO_ROTATION_SPOTS},
{"force-rotation-lattice", required_argument, nullptr, OPT_FORCE_ROTATION_LATTICE},
{"spot-sigma", required_argument, nullptr, OPT_SPOT_SIGMA},
{"spot-threshold", required_argument, nullptr, OPT_SPOT_THRESHOLD},
{"spot-high-resolution", required_argument, nullptr, OPT_SPOT_RESOLUTION},
{"max-spots", required_argument, nullptr, OPT_MAX_SPOTS},
{"min-partiality", required_argument, nullptr, OPT_MIN_PARTIALITY},
{"min-image-cc", required_argument, nullptr, OPT_MIN_IMAGE_CC},
{"scaling-iterations", required_argument, nullptr, OPT_SCALING_ITERATIONS},
{"scaling-high-resolution", required_argument, nullptr, OPT_SCALING_HIGH_RESOLUTION},
{"scaling-output", required_argument, nullptr, OPT_SCALING_OUTPUT},
{"bandwidth", required_argument, nullptr, OPT_BANDWIDTH},
{"integration-radius", required_argument, nullptr, OPT_INTEGRATION_RADIUS},
{"integrator", required_argument, nullptr, OPT_INTEGRATOR},
{"reject-outliers", required_argument, nullptr, OPT_REJECT_OUTLIERS},
{"reject-delta-cchalf", required_argument, nullptr, OPT_REJECT_DELTA_CCHALF},
{"profile-multiplier", required_argument, nullptr, OPT_PROFILE_MULTIPLIER},
{nullptr, 0, nullptr, 0}
};
void trim_in_place(std::string &t) {
size_t b = 0;
while (b < t.size() && std::isspace(static_cast<unsigned char>(t[b]))) b++;
size_t e = t.size();
while (e > b && std::isspace(static_cast<unsigned char>(t[e - 1]))) e--;
t = t.substr(b, e - b);
};
bool parse_float_strict(const std::string &t, float &out) {
try {
size_t idx = 0;
out = std::stof(t, &idx);
return idx == t.size();
} catch (...) {
return false;
}
};
std::optional<UnitCell> parse_unit_cell_arg(const char *arg) {
if (!arg)
return std::nullopt;
std::string s(arg);
trim_in_place(s);
if (s.size() >= 2 && ((s.front() == '"' && s.back() == '"') || (s.front() == '\'' && s.back() == '\''))) {
s = s.substr(1, s.size() - 2);
trim_in_place(s);
}
std::vector<std::string> parts;
parts.reserve(6);
size_t start = 0;
while (true) {
size_t pos = s.find(',', start);
if (pos == std::string::npos) {
parts.push_back(s.substr(start));
break;
}
parts.push_back(s.substr(start, pos - start));
start = pos + 1;
}
if (parts.size() != 6)
return std::nullopt;
UnitCell uc{};
if (!parse_float_strict(parts[0], uc.a)) return std::nullopt;
if (!parse_float_strict(parts[1], uc.b)) return std::nullopt;
if (!parse_float_strict(parts[2], uc.c)) return std::nullopt;
if (!parse_float_strict(parts[3], uc.alpha)) return std::nullopt;
if (!parse_float_strict(parts[4], uc.beta)) return std::nullopt;
if (!parse_float_strict(parts[5], uc.gamma)) return std::nullopt;
return uc;
}
std::optional<CrystalLattice> parse_lattice_arg(const char *arg) {
if (!arg)
return std::nullopt;
std::string s(arg);
trim_in_place(s);
if (s.size() >= 2 && ((s.front() == '"' && s.back() == '"') || (s.front() == '\'' && s.back() == '\''))) {
s = s.substr(1, s.size() - 2);
trim_in_place(s);
}
std::vector<std::string> parts;
parts.reserve(9);
size_t start = 0;
while (true) {
size_t pos = s.find(',', start);
if (pos == std::string::npos) {
parts.push_back(s.substr(start));
break;
}
parts.push_back(s.substr(start, pos - start));
start = pos + 1;
}
if (parts.size() != 9)
return std::nullopt;
std::vector<float> vals(9);
for (int i = 0; i < 9; i++) {
if (!parse_float_strict(parts[i], vals[i]))
return std::nullopt;
}
return CrystalLattice(vals);
}
namespace {
std::atomic<JFJochProcess *> g_active_process{nullptr};
void handle_sigint(int) {
if (auto *p = g_active_process.load())
p->Cancel();
}
}
int main(int argc, char **argv) {
for (int i = 0; i < argc; i++) {
std::cout << argv[i] << " ";
}
std::cout << std::endl << std::endl;
RegisterHDF5Filter();
print_license("jfjoch_process");
Logger logger("jfjoch_process");
std::string input_file;
std::string output_prefix = "output";
int nthreads = 1;
int start_image = 0;
int end_image = -1; // -1 indicates process until end
int image_stride = 1;
bool verbose = false;
bool rotation_indexing = false;
bool two_pass_rotation = true;
bool reuse_rotation_spots = true;
int rotation_indexing_image_count = 100;
std::optional<float> rotation_indexing_range;
bool run_scaling = false;
bool scale_fulls = false;
bool anomalous_mode = false;
std::optional<int64_t> space_group_number;
std::optional<UnitCell> fixed_reference_unit_cell;
std::optional<int64_t> max_spot_count_override;
float sigma_spot_finding = 3.0;
int64_t photon_count_threshold_spot_finding = 10;
bool refine_bfactor = false;
bool refine_wedge = false;
std::optional<double> wedge_for_scaling;
std::string ref_mtz;
std::string ref_column;
std::string dump_observations; // diagnostic: dump unmerged -P rot3d fulls to this path
double min_partiality = 0.02;
double min_image_cc = 0.0;
int64_t scaling_iter = 3;
std::optional<CrystalLattice> forced_rotation_lattice;
std::optional<float> bandwidth_fwhm; // relative FWHM of dlambda/lambda
IndexingAlgorithmEnum indexing_algorithm = IndexingAlgorithmEnum::Auto;
GeomRefinementAlgorithmEnum refinement_algorithm = GeomRefinementAlgorithmEnum::BeamCenter;
IntensityFormat intensity_format = IntensityFormat::MTZ;
PartialityModel partiality_model = PartialityModel::Fixed;
bool combine_3d = false; // -P rot3d: weight-sum per-frame partials into fulls before merging
float d_min_spot_finding = 1.5;
std::optional<float> d_min_scale_merge;
std::optional<std::string> integration_radius_arg; // "r1" or "r1,r2,r3"
std::optional<IntegratorMode> integrator_mode; // --integrator boxsum|gaussian|empirical
std::optional<double> outlier_reject_nsigma; // merge per-observation outlier rejection
std::optional<double> delta_cchalf_nsigma; // per-crystal CC1/2-delta rejection
std::optional<float> profile_multiplier; // PixelRefine Term-2 profile-width multiplier
if (argc == 1) {
print_usage();
exit(EXIT_FAILURE);
}
int opt;
int option_index = 0;
const char *short_opts = "vo:N:s:e:t:R::X:C:z:FABw::S:MP:r:";
while ((opt = getopt_long(argc, argv, short_opts, long_options, &option_index)) != -1) {
switch (opt) {
case 'o':
output_prefix = optarg;
break;
case 'v':
verbose = true;
break;
case 'N':
nthreads = atoi(optarg);
break;
case 's':
start_image = atoi(optarg);
break;
case 'e':
end_image = atoi(optarg);
break;
case 't':
image_stride = atoi(optarg);
break;
case 'R':
if (rotation_indexing) {
logger.Error("Rotation indexing already enabled");
exit(EXIT_FAILURE);
}
rotation_indexing = true;
two_pass_rotation = true;
if (optarg)
rotation_indexing_image_count = atoi(optarg);
break;
case OPT_SINGLE_PASS_ROTATION:
if (rotation_indexing) {
logger.Error("Rotation indexing already enabled");
exit(EXIT_FAILURE);
}
rotation_indexing = true;
two_pass_rotation = false;
if (optarg)
rotation_indexing_range = atof(optarg);
break;
case OPT_REDO_ROTATION_SPOTS:
reuse_rotation_spots = false;
break;
case OPT_FORCE_ROTATION_LATTICE: {
if (rotation_indexing) {
logger.Error("Rotation indexing already enabled");
exit(EXIT_FAILURE);
}
rotation_indexing = true;
auto latt = parse_lattice_arg(optarg);
if (!latt.has_value()) {
logger.Error(
"Invalid rotation lattice. Expected: \"a0x,a0y,a0z,a1x,a1y,a1z,a2x,a2y,a2z\" (9 floats, comma-separated). Got: {}",
optarg ? optarg : "<null>");
print_usage();
exit(EXIT_FAILURE);
}
forced_rotation_lattice = latt;
auto uc = latt->GetUnitCell();
logger.Info(
"Forced rotation lattice set: a={:.3f} b={:.3f} c={:.3f} alpha={:.3f} beta={:.3f} gamma={:.3f}",
uc.a, uc.b, uc.c, uc.alpha, uc.beta, uc.gamma);
break;
}
case 'X': {
std::string alg = optarg ? optarg : "";
std::transform(alg.begin(), alg.end(), alg.begin(),
[](unsigned char c) { return static_cast<char>(std::tolower(c)); });
if (alg == "ffbidx")
indexing_algorithm = IndexingAlgorithmEnum::FFBIDX;
else if (alg == "fft")
indexing_algorithm = IndexingAlgorithmEnum::FFT;
else if (alg == "fftw")
indexing_algorithm = IndexingAlgorithmEnum::FFTW;
else if (alg == "auto")
indexing_algorithm = IndexingAlgorithmEnum::Auto;
else if (alg == "none")
indexing_algorithm = IndexingAlgorithmEnum::None;
else {
logger.Error("Invalid indexing algorithm: {}", alg);
print_usage();
exit(EXIT_FAILURE);
}
break;
}
case 'r': {
std::string alg = optarg ? optarg : "";
std::transform(alg.begin(), alg.end(), alg.begin(),
[](unsigned char c) { return static_cast<char>(std::tolower(c)); });
if (alg == "none")
refinement_algorithm = GeomRefinementAlgorithmEnum::None;
else if (alg == "beam_and_lattice")
refinement_algorithm = GeomRefinementAlgorithmEnum::BeamCenter;
else if (alg == "orientation")
refinement_algorithm = GeomRefinementAlgorithmEnum::OrientationOnly;
else if (alg == "pixelrefine")
refinement_algorithm = GeomRefinementAlgorithmEnum::PixelRefine;
else {
logger.Error("Invalid geom refinement algorithm: {}", alg);
print_usage();
exit(EXIT_FAILURE);
}
break;
}
case 'C': {
auto uc = parse_unit_cell_arg(optarg);
if (!uc.has_value()) {
logger.Error(
"Invalid unit cell. Expected: \"a,b,c,alpha,beta,gamma\" (6 floats, comma-separated, no spaces). Got: {}",
optarg ? optarg : "<null>");
print_usage();
exit(EXIT_FAILURE);
}
fixed_reference_unit_cell = uc;
logger.Info(
"Fixed reference unit cell set: a={:.3f} b={:.3f} c={:.3f} alpha={:.3f} beta={:.3f} gamma={:.3f}",
uc->a, uc->b, uc->c, uc->alpha, uc->beta, uc->gamma);
break;
}
case 'z':
ref_mtz = optarg;
break;
case OPT_REFERENCE_COLUMN:
ref_column = optarg;
break;
case OPT_DUMP_OBSERVATIONS:
dump_observations = optarg;
break;
case 'F':
indexing_algorithm = IndexingAlgorithmEnum::FFT;
break;
case 'A':
anomalous_mode = true;
break;
case 'B':
refine_bfactor = true;
break;
case 'w':
refine_wedge = true;
if (optarg)
wedge_for_scaling = std::stod(optarg);
break;
case 'S':
space_group_number = atoi(optarg);
break;
case 'P':
if (strcmp(optarg, "unity") == 0)
partiality_model = PartialityModel::Unity;
else if (strcmp(optarg, "fixed") == 0)
partiality_model = PartialityModel::Fixed;
else if (strcmp(optarg, "rot") == 0)
partiality_model = PartialityModel::Rotation;
else if (strcmp(optarg, "rot3d") == 0) {
partiality_model = PartialityModel::Rotation;
combine_3d = true;
}
else {
logger.Error("Invalid partiality mode: {}", optarg);
print_usage();
exit(EXIT_FAILURE);
}
break;
case OPT_SPOT_SIGMA:
sigma_spot_finding = atof(optarg);
logger.Info("Noise threshold level for spot finding set to {:.2f} sigma", sigma_spot_finding);
break;
case OPT_SPOT_THRESHOLD:
photon_count_threshold_spot_finding = atoi(optarg);
logger.Info("Photon-count threshold level for spot finding set to {:d}",
photon_count_threshold_spot_finding);
break;
case OPT_SPOT_RESOLUTION:
d_min_spot_finding = atof(optarg);
logger.Info("High resolution limit for spot finding set to {:.2f} A", d_min_spot_finding);
break;
case OPT_MAX_SPOTS:
max_spot_count_override = atoll(optarg);
logger.Info("Max spot count overridden to {}", max_spot_count_override.value());
break;
case 'M':
run_scaling = true;
break;
case OPT_SCALE_FULLS:
run_scaling = true;
scale_fulls = true;
break;
case OPT_MIN_PARTIALITY:
min_partiality = std::stod(optarg);
break;
case OPT_INTEGRATION_RADIUS:
integration_radius_arg = optarg;
break;
case OPT_INTEGRATOR:
if (strcmp(optarg, "boxsum") == 0) integrator_mode = IntegratorMode::BoxSum;
else if (strcmp(optarg, "gaussian") == 0) integrator_mode = IntegratorMode::ProfileGaussian;
else if (strcmp(optarg, "empirical") == 0) integrator_mode = IntegratorMode::ProfileEmpirical;
else { logger.Error("--integrator expects boxsum|gaussian|empirical"); return 1; }
break;
case OPT_REJECT_OUTLIERS:
outlier_reject_nsigma = std::stod(optarg);
break;
case OPT_REJECT_DELTA_CCHALF:
delta_cchalf_nsigma = std::stod(optarg);
break;
case OPT_PROFILE_MULTIPLIER:
profile_multiplier = std::stof(optarg);
break;
case OPT_MIN_IMAGE_CC:
min_image_cc = std::stod(optarg);
break;
case OPT_SCALING_HIGH_RESOLUTION:
d_min_scale_merge = atof(optarg);
break;
case OPT_SCALING_OUTPUT:
if (strcmp(optarg, "mtz") == 0) {
intensity_format = IntensityFormat::MTZ;
} else if (strcmp(optarg, "cif") == 0) {
intensity_format = IntensityFormat::mmCIF;
} else if (strcmp(optarg, "txt") == 0) {
intensity_format = IntensityFormat::Text;
} else {
logger.Error("Invalid intensity format: {}", optarg);
exit(EXIT_FAILURE);
}
break;
case OPT_SCALING_ITERATIONS:
scaling_iter = atoi(optarg);
if (scaling_iter <= 0) {
logger.Error("Invalid scaling iteration count: {}", scaling_iter);
exit(EXIT_FAILURE);
}
break;
case OPT_BANDWIDTH:
bandwidth_fwhm = atof(optarg);
if (!(bandwidth_fwhm.value() >= 0.0f)) {
logger.Error("Invalid bandwidth: {}", optarg);
exit(EXIT_FAILURE);
}
break;
default:
print_usage();
exit(EXIT_FAILURE);
}
}
if (optind != argc - 1) {
logger.Error("Input file not specified");
print_usage();
exit(EXIT_FAILURE);
}
input_file = argv[optind];
logger.Verbose(verbose);
// Validate space group number early
const gemmi::SpaceGroup *space_group = nullptr;
if (space_group_number.has_value()) {
space_group = gemmi::find_spacegroup_by_number(space_group_number.value());
if (!space_group) {
logger.Error("Unknown space group number {}", space_group_number.value());
exit(EXIT_FAILURE);
}
logger.Info("Using space group {} (number {})", space_group->hm, space_group_number.value());
}
// 1. Read Input File
JFJochHDF5Reader reader;
try {
reader.ReadFile(input_file);
} catch (const std::exception &e) {
logger.Error("Error reading input file: {}", e.what());
exit(EXIT_FAILURE);
}
const auto dataset = reader.GetDataset();
if (!dataset) {
logger.Error("No experiment dataset found in the input file");
exit(EXIT_FAILURE);
}
if (rotation_indexing_image_count <= 0) {
logger.Error("Invalid number of rotation indexing images: {}", rotation_indexing_image_count);
exit(EXIT_FAILURE);
}
logger.Info("Loaded dataset from {}", input_file);
std::vector<MergedReflection> reference_data;
if (!ref_mtz.empty()) {
try {
const auto reference = LoadReferenceMtz(
ref_mtz, ref_column.empty() ? std::nullopt : std::optional<std::string>(ref_column));
reference_data = reference.reflections;
logger.Info("Loaded {} reference reflections from {} (column {}{}{})",
reference_data.size(), ref_mtz, reference.used_column,
reference.squared ? ", squared to intensity" : "",
reference.default_column ? ", auto-selected" : ", user-specified");
if (reference.d_max > 0.0)
logger.Info("Reference resolution range {:.2f} - {:.2f} A", reference.d_max, reference.d_min);
if (reference.cell.has_value())
logger.Info("Reference unit cell: a={:.3f} b={:.3f} c={:.3f} alpha={:.2f} beta={:.2f} gamma={:.2f}",
reference.cell->a, reference.cell->b, reference.cell->c,
reference.cell->alpha, reference.cell->beta, reference.cell->gamma);
if (!reference.space_group_name.empty())
logger.Info("Reference space group: {} (number {})",
reference.space_group_name, reference.space_group_number.value_or(0));
const auto warning = ReferenceConsistencyWarning(
reference, fixed_reference_unit_cell,
space_group_number.has_value() ? std::optional<int>(static_cast<int>(*space_group_number))
: std::nullopt);
if (!warning.empty())
logger.Warning("{}", warning);
} catch (const std::exception &e) {
logger.Error("Error reading reference MTZ {}: {}", ref_mtz, e.what());
exit(EXIT_FAILURE);
}
}
uint64_t total_images_in_file = reader.GetNumberOfImages();
if (end_image < 0 || end_image > total_images_in_file)
end_image = total_images_in_file;
if (image_stride < 0) {
logger.Error("Image stride cannot be negative");
exit(EXIT_FAILURE);
}
if (image_stride == 0) {
logger.Error("Image stride cannot be zero");
exit(EXIT_FAILURE);
}
int images_to_process = (end_image - start_image) / image_stride;
if (images_to_process <= 0) {
logger.Warning("No images to process (Start: {}, End: {} Stride: {}, Total: {})", start_image, end_image,
image_stride, total_images_in_file);
return 0;
}
logger.Info("Starting analysis of {} images (range {}-{}) using {} threads",
images_to_process, start_image, end_image, nthreads);
// 2. Setup Experiment & Components
DiffractionExperiment experiment(dataset->experiment);
experiment.BitDepthImage(32).Compression(CompressionAlgorithm::BSHUF_LZ4);
experiment.FilePrefix(output_prefix);
experiment.Mode(DetectorMode::Standard); // Ensure full image analysis
experiment.PixelSigned(true);
experiment.OverwriteExistingFiles(true);
experiment.PolarizationFactor(0.99);
experiment.SetFileWriterFormat(FileWriterFormat::NXmxLegacy);
experiment.SpaceGroupNumber(space_group_number);
experiment.ImagesPerTrigger(images_to_process);
experiment.NumTriggers(1);
if (fixed_reference_unit_cell.has_value())
experiment.SetUnitCell(*fixed_reference_unit_cell);
if (max_spot_count_override.has_value()) {
experiment.MaxSpotCount(max_spot_count_override.value());
logger.Info("Max spot count overridden to {}", max_spot_count_override.value());
}
// X-ray bandwidth: CLI overrides the value carried in the dataset; otherwise
// keep whatever the dataset provided (0 / none -> monochromatic).
if (bandwidth_fwhm)
experiment.BandwidthFWHM(bandwidth_fwhm);
if (experiment.GetBandwidthFWHM())
logger.Info("X-ray bandwidth FWHM set to {:.4f}", experiment.GetBandwidthFWHM().value());
// PixelRefine integration needs reference intensities (the I_true hypothesis).
if (refinement_algorithm == GeomRefinementAlgorithmEnum::PixelRefine && reference_data.empty()) {
logger.Warning("-r pixelrefine needs --reference-mtz; falling back to beam_and_lattice");
refinement_algorithm = GeomRefinementAlgorithmEnum::BeamCenter;
}
// Configure Indexing
IndexingSettings indexing_settings;
indexing_settings.Algorithm(indexing_algorithm);
indexing_settings.RotationIndexing(rotation_indexing);
if (rotation_indexing_range.has_value())
indexing_settings.RotationIndexingMinAngularRange_deg(rotation_indexing_range.value());
indexing_settings.GeomRefinementAlgorithm(refinement_algorithm);
experiment.ImportIndexingSettings(indexing_settings);
ScalingSettings scaling_settings;
scaling_settings.SetPartialityModel(partiality_model);
scaling_settings.Combine3D(combine_3d);
scaling_settings.ScaleFulls(scale_fulls);
if (d_min_scale_merge)
scaling_settings.HighResolutionLimit_A(d_min_scale_merge.value());
scaling_settings.MergeFriedel(!anomalous_mode);
scaling_settings.RefineB(refine_bfactor);
scaling_settings.RefineRotationWedge(refine_wedge);
if (wedge_for_scaling.has_value())
scaling_settings.RotationWedgeForScaling(wedge_for_scaling);
scaling_settings.MinPartiality(min_partiality);
scaling_settings.MinCCForImage(min_image_cc);
if (outlier_reject_nsigma)
scaling_settings.OutlierRejectNsigma(*outlier_reject_nsigma);
scaling_settings.FileFormat(intensity_format);
experiment.ImportScalingSettings(scaling_settings);
// Integration radii: r1 (signal box), r2/r3 (background annulus). PixelRefine reads
// r1 as its shoebox radius; the classical integrator uses all three.
if (integration_radius_arg) {
std::vector<float> rr;
std::stringstream ss(*integration_radius_arg);
std::string tok;
while (std::getline(ss, tok, ',')) {
trim_in_place(tok);
if (!tok.empty()) rr.push_back(std::stof(tok));
}
float r1, r2, r3;
if (rr.size() == 1) { r1 = rr[0]; r2 = r1 + 2.0f; r3 = r1 + 4.0f; }
else if (rr.size() == 3) { r1 = rr[0]; r2 = rr[1]; r3 = rr[2]; }
else { logger.Error("--integration-radius expects r1 or r1,r2,r3"); return 1; }
BraggIntegrationSettings bis = experiment.GetBraggIntegrationSettings();
bis.R1(r1).R2(r2).R3(r3);
experiment.ImportBraggIntegrationSettings(bis);
logger.Info("Integration radii set to r1={:.1f} r2={:.1f} r3={:.1f}", r1, r2, r3);
}
if (profile_multiplier) {
BraggIntegrationSettings bis = experiment.GetBraggIntegrationSettings();
bis.ProfileMultiplier(*profile_multiplier);
experiment.ImportBraggIntegrationSettings(bis);
logger.Info("PixelRefine profile-width multiplier set to {:.1f}", *profile_multiplier);
}
if (integrator_mode) {
BraggIntegrationSettings bis = experiment.GetBraggIntegrationSettings();
bis.Integrator(*integrator_mode);
experiment.ImportBraggIntegrationSettings(bis);
logger.Info("Integrator set to {}", *integrator_mode == IntegratorMode::BoxSum ? "box-sum"
: *integrator_mode == IntegratorMode::ProfileGaussian ? "profile (gaussian)"
: "profile (empirical)");
}
SpotFindingSettings spot_settings;
spot_settings.enable = true;
spot_settings.indexing = true;
spot_settings.high_resolution_limit = d_min_spot_finding;
spot_settings.signal_to_noise_threshold = sigma_spot_finding;
spot_settings.photon_count_threshold = photon_count_threshold_spot_finding;
if (d_min_spot_finding > 0.0f)
spot_settings.high_resolution_limit = d_min_spot_finding;
// Run the shared full-analysis workflow (rotation indexing + scaling/merging live in
// JFJochProcess; the experiment above carries all algorithm settings).
ProcessConfig config;
config.mode = ProcessMode::FullAnalysis;
config.start_image = start_image;
config.end_image = end_image;
config.stride = image_stride;
config.nthreads = nthreads;
config.output_prefix = output_prefix;
config.spot_finding = spot_settings;
config.rotation_indexing = rotation_indexing;
config.two_pass_rotation = two_pass_rotation;
config.reuse_rotation_spots = reuse_rotation_spots;
config.rotation_indexing_image_count = rotation_indexing_image_count;
config.forced_rotation_lattice = forced_rotation_lattice;
config.run_scaling = run_scaling;
config.scaling_iter = scaling_iter;
config.reference_data = reference_data;
config.observation_dump_path = dump_observations;
JFJochProcess process(reader, experiment, dataset->pixel_mask, config);
g_active_process = &process;
std::signal(SIGINT, handle_sigint);
ProcessResult result;
try {
result = process.Run();
} catch (const std::exception &e) {
logger.Error("Processing failed: {}", e.what());
exit(EXIT_FAILURE);
}
g_active_process = nullptr;
if (!result.merge_statistics_text.empty())
std::cout << std::endl << result.merge_statistics_text << std::endl;
// Report statistics
std::cout << fmt::format("Processing time: {:.2f} s", result.processing_time_s) << std::endl;
std::cout << fmt::format("Frame rate: {:.2f} Hz", result.frame_rate_hz) << std::endl;
std::cout << fmt::format("Total throughput:{:.2f} MB/s", result.throughput_MBs) << std::endl;
if (result.indexing_rate.has_value())
std::cout << fmt::format("Indexing rate: {:.2f}%", result.indexing_rate.value() * 100.0) << std::endl;
if (result.consensus_cell.has_value()) {
const auto &c = result.consensus_cell.value();
std::cout << fmt::format("Unit cell: a={:.2f} b={:.2f} c={:.2f} alpha={:.2f} beta={:.2f} gamma={:.2f}",
c.a, c.b, c.c, c.alpha, c.beta, c.gamma) << std::endl;
}
const auto &t = result.mean_processing_time;
std::cout << fmt::format(
"Per-image time (mean; ms): decompress {:.2f} preprocess {:.2f} azint {:.2f} spot finding {:.2f} "
"indexing {:.2f} refinement {:.2f} indexing analysis {:.2f} prediction {:.2f} integration {:.2f} "
"scaling {:.2f} total {:.2f}",
t.compression * 1e3, t.preprocessing * 1e3, t.azint * 1e3, t.spot_finding * 1e3,
t.indexing * 1e3, t.refinement * 1e3, t.indexing_analysis * 1e3, t.bragg_prediction * 1e3,
t.integration * 1e3, t.image_scale * 1e3, t.processing * 1e3) << std::endl;
if (result.cancelled)
logger.Warning("Processing was cancelled after {} images", result.images_processed);
}