Files
Jungfraujoch/tools/jfjoch_process.cpp
leonarski_f 38ea0ec237 Remove the experimental PixelRefine integrator
On the LysozymeJet5 serial stills the default Gaussian profile-fit
integrator (ProfileIntegrate2D) + reference scaling matched or beat
whole-PixelRefine on every per-shell CC1/2 (overall 95.7% vs 91.9%), ISa
(1.6 vs 1.2) and R-meas (98.5% vs 175%), with CCref a tie -- so PixelRefine
has no remaining advantage. Reference-based per-image scaling is
integrator-agnostic (IndexAndRefine::ReferenceIntensities builds a
ScaleOnTheFly(experiment, reference) applied to any integrator's output),
so the reference-dataset feature (CCref + reference scaling) is kept.

Delete image_analysis/pixel_refinement/, GeomRefinementAlgorithmEnum::
PixelRefine and its gates, BraggIntegrationSettings::ProfileMultiplier
(PixelRefine-only; R1 is shared and kept), and the -r pixelrefine /
--profile-multiplier CLI. The inherited lessons (mean background, de-biased
variance, tight-profile-loses / centroid floor, R-refinement futile) are
folded into NEXTGEN_INTEGRATOR.md.

NOTE: this transiently breaks the viewer build -- the committed viewer
still references the removed enum and ProfileMultiplier. It is fixed in the
next commit (the viewer feature work), held separate while the viewer UI is
being tested.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-25 20:43:04 +02:00

816 lines
34 KiB
C++

// SPDX-FileCopyrightText: 2024 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
// SPDX-License-Identifier: GPL-3.0-only
#include <algorithm>
#include <atomic>
#include <csignal>
#include <getopt.h>
#include <iostream>
#include <sstream>
#include <string>
#include <vector>
#include "../reader/JFJochHDF5Reader.h"
#include "../common/Logger.h"
#include "../common/DiffractionExperiment.h"
#include "../common/PixelMask.h"
#include "../common/print_license.h"
#include "../image_analysis/LoadFCalcFromMtz.h"
#include "../process/JFJochProcess.h"
void print_usage() {
std::cout << "Usage jfjoch_process {<options>} <input.h5>" << std::endl;
std::cout << "Options:" << std::endl;
std::cout << " -o, --output-prefix <txt> Output file prefix (default: output)" << std::endl;
std::cout << " -N, --threads <num> Number of threads (default: 1)" << std::endl;
std::cout << " -s, --start-image <num> Start image number (default: 0)" << std::endl;
std::cout << " -e, --end-image <num> End image number (default: all)" << std::endl;
std::cout << " -t, --stride <num> Image stride (default: 1)" << std::endl;
std::cout << " -v, --verbose Verbose output" << std::endl;
std::cout << std::endl;
std::cout << " Spot finding" << std::endl;
std::cout << " --spot-sigma <num> Noise sigma level for spot finding (default: 3.0)" << std::endl;
std::cout << " --spot-threshold <num> Photon count threshold for spot finding (default: 10)" << std::endl;
std::cout << " --spot-high-resolution <num> High resolution limit for spot finding (default: 1.5)" << std::endl;
std::cout << " --max-spots <num> Max spot count (default: 250)" << std::endl;
std::cout << std::endl;
std::cout << " Indexing" << std::endl;
std::cout << " -R, --two-pass-rotation[=num] Two-pass offline rotation indexing (optional: number of images, default: 100)" << std::endl;
std::cout << " --single-pass-rotation[=num] Use online-like single-pass rotation indexing (optional: min angular range deg)" << std::endl;
std::cout << " --redo-rotation-spots Redo spot finding for two-pass rotation indexing" << std::endl;
std::cout << " --force-rotation-lattice <vec> Force rotation indexer with external lattice (in Angstrom) : \"a0x,a0y,a0z,a1x,a1y,a1z,a2x,a2y,a2z\" (9 floats, skips first pass)" << std::endl;
std::cout << " -X, --indexing-algorithm <txt> Indexing algorithm (FFBIDX|FFT|FFTW|Auto|None)" << std::endl;
std::cout << " -S, --space-group <num> Space group number - used for both indexing and scaling" << std::endl;
std::cout << " -C, --unit-cell <cell> Fix reference unit cell: \"a,b,c,alpha,beta,gamma\"" << std::endl;
std::cout << " -r, --refine <txt> Geometry refinement algorithm (none|orientation|beam_and_lattice)" << std::endl;
std::cout << std::endl;
std::cout << " Scaling and merging" << std::endl;
std::cout << " -M, --scale-merge Scale and merge (refine mosaicity) and write scaled.hkl + image.dat" << std::endl;
std::cout << " --scale-fulls After -P rot3d combine, refit a per-frame scale on the fulls (XDS order, Unity model); implies -M" << std::endl;
std::cout << " -P, --partiality <txt> Partiality model fixed|rot|rot3d|unity (default: fixed). rot3d = rot + 3D combine of per-frame partials" << std::endl;
std::cout << " -A, --anomalous Anomalous mode (don't merge Friedel pairs)" << std::endl;
std::cout << " -B, --refine-bfactor Refine per image B-factor" << std::endl;
std::cout << " -w, --wedge[=num] Refine image wedge during scaling with starting wedge value" << std::endl;
std::cout << " --scaling-high-resolution <num> High resolution limit for spot finding (default: no limit)" << std::endl;
std::cout << " --min-partiality <num> Minimum partiality to accept reflection (default: 0.02)" << std::endl;
std::cout << " --reject-outliers <num> Per-observation merge outlier rejection, N sigma from the per-reflection median (default: off; e.g. 6, XDS/DIALS-style)" << std::endl;
std::cout << " --reject-delta-cchalf <num> Per-crystal CC1/2-delta rejection: drop images with deltaCChalf below mean - N*stddev (default: off; e.g. 2.5)" << std::endl;
std::cout << " --min-image-cc <num> Per-image CC limit in percent (default: no limit)" << std::endl;
std::cout << " --scaling-iterations <num> Number of scaling iterations with no reference data (default: 3)" << std::endl;
std::cout << " --scaling-output <txt> Output format for scaling results mtz|cif|txt (default: mtz)" << std::endl;
std::cout << " -z, --reference-mtz <file> Reference MTZ file" << std::endl;
std::cout << " --reference-column <label> Reference MTZ column to use (default: auto - F-model, else IMEAN/I/...)" << std::endl;
std::cout << std::endl;
std::cout << " Integration" << std::endl;
std::cout << " --bandwidth <num> Relative X-ray bandwidth FWHM (e.g. 0.01 for 1% DMM); default from file or 0" << std::endl;
std::cout << " --integration-radius <r> Signal-box radius r1, or r1,r2,r3 (px). One value => r2=r1+2, r3=r1+4" << std::endl;
std::cout << " --integrator <txt> Spot integrator boxsum|gaussian|empirical (default: gaussian profile-fit; boxsum is the classical fallback)" << std::endl;
}
enum {
OPT_SPOT_SIGMA = 1000,
OPT_SPOT_THRESHOLD,
OPT_SPOT_RESOLUTION,
OPT_MAX_SPOTS,
OPT_MIN_PARTIALITY,
OPT_MIN_IMAGE_CC,
OPT_SCALING_ITERATIONS,
OPT_SCALING_HIGH_RESOLUTION,
OPT_SCALING_OUTPUT,
OPT_SINGLE_PASS_ROTATION,
OPT_REDO_ROTATION_SPOTS,
OPT_FORCE_ROTATION_LATTICE,
OPT_BANDWIDTH,
OPT_INTEGRATION_RADIUS,
OPT_REJECT_OUTLIERS,
OPT_REJECT_DELTA_CCHALF,
OPT_REFERENCE_COLUMN,
OPT_DUMP_OBSERVATIONS,
OPT_INTEGRATOR,
OPT_SCALE_FULLS
};
static option long_options[] = {
{"verbose", no_argument, nullptr, 'v'},
{"output-prefix", required_argument, nullptr, 'o'},
{"threads", required_argument, nullptr, 'N'},
{"start-image", required_argument, nullptr, 's'},
{"end-image", required_argument, nullptr, 'e'},
{"stride", required_argument, nullptr, 't'},
{"indexing-algorithm", required_argument, nullptr, 'X'},
{"unit-cell", required_argument, nullptr, 'C'},
{"reference-mtz", required_argument, nullptr, 'z'},
{"reference-column", required_argument, nullptr, OPT_REFERENCE_COLUMN},
{"dump-observations", required_argument, nullptr, OPT_DUMP_OBSERVATIONS},
{"space-group", required_argument, nullptr, 'S'},
{"partiality", required_argument, nullptr, 'P'},
{"anomalous", no_argument, nullptr, 'A'},
{"refine-bfactor", no_argument, nullptr, 'B'},
{"wedge", optional_argument, nullptr, 'w'},
{"scale-merge", no_argument, nullptr, 'M'},
{"scale-fulls", no_argument, nullptr, OPT_SCALE_FULLS},
{"refine", required_argument, nullptr, 'r'},
{"two-pass-rotation", optional_argument, nullptr, 'R'},
{"single-pass-rotation", optional_argument, nullptr, OPT_SINGLE_PASS_ROTATION},
{"redo-rotation-spots", no_argument, nullptr, OPT_REDO_ROTATION_SPOTS},
{"force-rotation-lattice", required_argument, nullptr, OPT_FORCE_ROTATION_LATTICE},
{"spot-sigma", required_argument, nullptr, OPT_SPOT_SIGMA},
{"spot-threshold", required_argument, nullptr, OPT_SPOT_THRESHOLD},
{"spot-high-resolution", required_argument, nullptr, OPT_SPOT_RESOLUTION},
{"max-spots", required_argument, nullptr, OPT_MAX_SPOTS},
{"min-partiality", required_argument, nullptr, OPT_MIN_PARTIALITY},
{"min-image-cc", required_argument, nullptr, OPT_MIN_IMAGE_CC},
{"scaling-iterations", required_argument, nullptr, OPT_SCALING_ITERATIONS},
{"scaling-high-resolution", required_argument, nullptr, OPT_SCALING_HIGH_RESOLUTION},
{"scaling-output", required_argument, nullptr, OPT_SCALING_OUTPUT},
{"bandwidth", required_argument, nullptr, OPT_BANDWIDTH},
{"integration-radius", required_argument, nullptr, OPT_INTEGRATION_RADIUS},
{"integrator", required_argument, nullptr, OPT_INTEGRATOR},
{"reject-outliers", required_argument, nullptr, OPT_REJECT_OUTLIERS},
{"reject-delta-cchalf", required_argument, nullptr, OPT_REJECT_DELTA_CCHALF},
{nullptr, 0, nullptr, 0}
};
void trim_in_place(std::string &t) {
size_t b = 0;
while (b < t.size() && std::isspace(static_cast<unsigned char>(t[b]))) b++;
size_t e = t.size();
while (e > b && std::isspace(static_cast<unsigned char>(t[e - 1]))) e--;
t = t.substr(b, e - b);
};
bool parse_float_strict(const std::string &t, float &out) {
try {
size_t idx = 0;
out = std::stof(t, &idx);
return idx == t.size();
} catch (...) {
return false;
}
};
std::optional<UnitCell> parse_unit_cell_arg(const char *arg) {
if (!arg)
return std::nullopt;
std::string s(arg);
trim_in_place(s);
if (s.size() >= 2 && ((s.front() == '"' && s.back() == '"') || (s.front() == '\'' && s.back() == '\''))) {
s = s.substr(1, s.size() - 2);
trim_in_place(s);
}
std::vector<std::string> parts;
parts.reserve(6);
size_t start = 0;
while (true) {
size_t pos = s.find(',', start);
if (pos == std::string::npos) {
parts.push_back(s.substr(start));
break;
}
parts.push_back(s.substr(start, pos - start));
start = pos + 1;
}
if (parts.size() != 6)
return std::nullopt;
UnitCell uc{};
if (!parse_float_strict(parts[0], uc.a)) return std::nullopt;
if (!parse_float_strict(parts[1], uc.b)) return std::nullopt;
if (!parse_float_strict(parts[2], uc.c)) return std::nullopt;
if (!parse_float_strict(parts[3], uc.alpha)) return std::nullopt;
if (!parse_float_strict(parts[4], uc.beta)) return std::nullopt;
if (!parse_float_strict(parts[5], uc.gamma)) return std::nullopt;
return uc;
}
std::optional<CrystalLattice> parse_lattice_arg(const char *arg) {
if (!arg)
return std::nullopt;
std::string s(arg);
trim_in_place(s);
if (s.size() >= 2 && ((s.front() == '"' && s.back() == '"') || (s.front() == '\'' && s.back() == '\''))) {
s = s.substr(1, s.size() - 2);
trim_in_place(s);
}
std::vector<std::string> parts;
parts.reserve(9);
size_t start = 0;
while (true) {
size_t pos = s.find(',', start);
if (pos == std::string::npos) {
parts.push_back(s.substr(start));
break;
}
parts.push_back(s.substr(start, pos - start));
start = pos + 1;
}
if (parts.size() != 9)
return std::nullopt;
std::vector<float> vals(9);
for (int i = 0; i < 9; i++) {
if (!parse_float_strict(parts[i], vals[i]))
return std::nullopt;
}
return CrystalLattice(vals);
}
namespace {
std::atomic<JFJochProcess *> g_active_process{nullptr};
void handle_sigint(int) {
if (auto *p = g_active_process.load())
p->Cancel();
}
}
int main(int argc, char **argv) {
for (int i = 0; i < argc; i++) {
std::cout << argv[i] << " ";
}
std::cout << std::endl << std::endl;
RegisterHDF5Filter();
print_license("jfjoch_process");
Logger logger("jfjoch_process");
std::string input_file;
std::string output_prefix = "output";
int nthreads = 1;
int start_image = 0;
int end_image = -1; // -1 indicates process until end
int image_stride = 1;
bool verbose = false;
bool rotation_indexing = false;
bool two_pass_rotation = true;
bool reuse_rotation_spots = true;
int rotation_indexing_image_count = 100;
std::optional<float> rotation_indexing_range;
bool run_scaling = false;
bool scale_fulls = false;
bool anomalous_mode = false;
std::optional<int64_t> space_group_number;
std::optional<UnitCell> fixed_reference_unit_cell;
std::optional<int64_t> max_spot_count_override;
float sigma_spot_finding = 3.0;
int64_t photon_count_threshold_spot_finding = 10;
bool refine_bfactor = false;
bool refine_wedge = false;
std::optional<double> wedge_for_scaling;
std::string ref_mtz;
std::string ref_column;
std::string dump_observations; // diagnostic: dump unmerged -P rot3d fulls to this path
double min_partiality = 0.02;
double min_image_cc = 0.0;
int64_t scaling_iter = 3;
std::optional<CrystalLattice> forced_rotation_lattice;
std::optional<float> bandwidth_fwhm; // relative FWHM of dlambda/lambda
IndexingAlgorithmEnum indexing_algorithm = IndexingAlgorithmEnum::Auto;
GeomRefinementAlgorithmEnum refinement_algorithm = GeomRefinementAlgorithmEnum::BeamCenter;
IntensityFormat intensity_format = IntensityFormat::MTZ;
PartialityModel partiality_model = PartialityModel::Fixed;
bool combine_3d = false; // -P rot3d: weight-sum per-frame partials into fulls before merging
float d_min_spot_finding = 1.5;
std::optional<float> d_min_scale_merge;
std::optional<std::string> integration_radius_arg; // "r1" or "r1,r2,r3"
std::optional<IntegratorMode> integrator_mode; // --integrator boxsum|gaussian|empirical
std::optional<double> outlier_reject_nsigma; // merge per-observation outlier rejection
std::optional<double> delta_cchalf_nsigma; // per-crystal CC1/2-delta rejection
if (argc == 1) {
print_usage();
exit(EXIT_FAILURE);
}
int opt;
int option_index = 0;
const char *short_opts = "vo:N:s:e:t:R::X:C:z:FABw::S:MP:r:";
while ((opt = getopt_long(argc, argv, short_opts, long_options, &option_index)) != -1) {
switch (opt) {
case 'o':
output_prefix = optarg;
break;
case 'v':
verbose = true;
break;
case 'N':
nthreads = atoi(optarg);
break;
case 's':
start_image = atoi(optarg);
break;
case 'e':
end_image = atoi(optarg);
break;
case 't':
image_stride = atoi(optarg);
break;
case 'R':
if (rotation_indexing) {
logger.Error("Rotation indexing already enabled");
exit(EXIT_FAILURE);
}
rotation_indexing = true;
two_pass_rotation = true;
if (optarg)
rotation_indexing_image_count = atoi(optarg);
break;
case OPT_SINGLE_PASS_ROTATION:
if (rotation_indexing) {
logger.Error("Rotation indexing already enabled");
exit(EXIT_FAILURE);
}
rotation_indexing = true;
two_pass_rotation = false;
if (optarg)
rotation_indexing_range = atof(optarg);
break;
case OPT_REDO_ROTATION_SPOTS:
reuse_rotation_spots = false;
break;
case OPT_FORCE_ROTATION_LATTICE: {
if (rotation_indexing) {
logger.Error("Rotation indexing already enabled");
exit(EXIT_FAILURE);
}
rotation_indexing = true;
auto latt = parse_lattice_arg(optarg);
if (!latt.has_value()) {
logger.Error(
"Invalid rotation lattice. Expected: \"a0x,a0y,a0z,a1x,a1y,a1z,a2x,a2y,a2z\" (9 floats, comma-separated). Got: {}",
optarg ? optarg : "<null>");
print_usage();
exit(EXIT_FAILURE);
}
forced_rotation_lattice = latt;
auto uc = latt->GetUnitCell();
logger.Info(
"Forced rotation lattice set: a={:.3f} b={:.3f} c={:.3f} alpha={:.3f} beta={:.3f} gamma={:.3f}",
uc.a, uc.b, uc.c, uc.alpha, uc.beta, uc.gamma);
break;
}
case 'X': {
std::string alg = optarg ? optarg : "";
std::transform(alg.begin(), alg.end(), alg.begin(),
[](unsigned char c) { return static_cast<char>(std::tolower(c)); });
if (alg == "ffbidx")
indexing_algorithm = IndexingAlgorithmEnum::FFBIDX;
else if (alg == "fft")
indexing_algorithm = IndexingAlgorithmEnum::FFT;
else if (alg == "fftw")
indexing_algorithm = IndexingAlgorithmEnum::FFTW;
else if (alg == "auto")
indexing_algorithm = IndexingAlgorithmEnum::Auto;
else if (alg == "none")
indexing_algorithm = IndexingAlgorithmEnum::None;
else {
logger.Error("Invalid indexing algorithm: {}", alg);
print_usage();
exit(EXIT_FAILURE);
}
break;
}
case 'r': {
std::string alg = optarg ? optarg : "";
std::transform(alg.begin(), alg.end(), alg.begin(),
[](unsigned char c) { return static_cast<char>(std::tolower(c)); });
if (alg == "none")
refinement_algorithm = GeomRefinementAlgorithmEnum::None;
else if (alg == "beam_and_lattice")
refinement_algorithm = GeomRefinementAlgorithmEnum::BeamCenter;
else if (alg == "orientation")
refinement_algorithm = GeomRefinementAlgorithmEnum::OrientationOnly;
else {
logger.Error("Invalid geom refinement algorithm: {}", alg);
print_usage();
exit(EXIT_FAILURE);
}
break;
}
case 'C': {
auto uc = parse_unit_cell_arg(optarg);
if (!uc.has_value()) {
logger.Error(
"Invalid unit cell. Expected: \"a,b,c,alpha,beta,gamma\" (6 floats, comma-separated, no spaces). Got: {}",
optarg ? optarg : "<null>");
print_usage();
exit(EXIT_FAILURE);
}
fixed_reference_unit_cell = uc;
logger.Info(
"Fixed reference unit cell set: a={:.3f} b={:.3f} c={:.3f} alpha={:.3f} beta={:.3f} gamma={:.3f}",
uc->a, uc->b, uc->c, uc->alpha, uc->beta, uc->gamma);
break;
}
case 'z':
ref_mtz = optarg;
break;
case OPT_REFERENCE_COLUMN:
ref_column = optarg;
break;
case OPT_DUMP_OBSERVATIONS:
dump_observations = optarg;
break;
case 'F':
indexing_algorithm = IndexingAlgorithmEnum::FFT;
break;
case 'A':
anomalous_mode = true;
break;
case 'B':
refine_bfactor = true;
break;
case 'w':
refine_wedge = true;
if (optarg)
wedge_for_scaling = std::stod(optarg);
break;
case 'S':
space_group_number = atoi(optarg);
break;
case 'P':
if (strcmp(optarg, "unity") == 0)
partiality_model = PartialityModel::Unity;
else if (strcmp(optarg, "fixed") == 0)
partiality_model = PartialityModel::Fixed;
else if (strcmp(optarg, "rot") == 0)
partiality_model = PartialityModel::Rotation;
else if (strcmp(optarg, "rot3d") == 0) {
partiality_model = PartialityModel::Rotation;
combine_3d = true;
}
else {
logger.Error("Invalid partiality mode: {}", optarg);
print_usage();
exit(EXIT_FAILURE);
}
break;
case OPT_SPOT_SIGMA:
sigma_spot_finding = atof(optarg);
logger.Info("Noise threshold level for spot finding set to {:.2f} sigma", sigma_spot_finding);
break;
case OPT_SPOT_THRESHOLD:
photon_count_threshold_spot_finding = atoi(optarg);
logger.Info("Photon-count threshold level for spot finding set to {:d}",
photon_count_threshold_spot_finding);
break;
case OPT_SPOT_RESOLUTION:
d_min_spot_finding = atof(optarg);
logger.Info("High resolution limit for spot finding set to {:.2f} A", d_min_spot_finding);
break;
case OPT_MAX_SPOTS:
max_spot_count_override = atoll(optarg);
logger.Info("Max spot count overridden to {}", max_spot_count_override.value());
break;
case 'M':
run_scaling = true;
break;
case OPT_SCALE_FULLS:
run_scaling = true;
scale_fulls = true;
break;
case OPT_MIN_PARTIALITY:
min_partiality = std::stod(optarg);
break;
case OPT_INTEGRATION_RADIUS:
integration_radius_arg = optarg;
break;
case OPT_INTEGRATOR:
if (strcmp(optarg, "boxsum") == 0) integrator_mode = IntegratorMode::BoxSum;
else if (strcmp(optarg, "gaussian") == 0) integrator_mode = IntegratorMode::ProfileGaussian;
else if (strcmp(optarg, "empirical") == 0) integrator_mode = IntegratorMode::ProfileEmpirical;
else { logger.Error("--integrator expects boxsum|gaussian|empirical"); return 1; }
break;
case OPT_REJECT_OUTLIERS:
outlier_reject_nsigma = std::stod(optarg);
break;
case OPT_REJECT_DELTA_CCHALF:
delta_cchalf_nsigma = std::stod(optarg);
break;
case OPT_MIN_IMAGE_CC:
min_image_cc = std::stod(optarg);
break;
case OPT_SCALING_HIGH_RESOLUTION:
d_min_scale_merge = atof(optarg);
break;
case OPT_SCALING_OUTPUT:
if (strcmp(optarg, "mtz") == 0) {
intensity_format = IntensityFormat::MTZ;
} else if (strcmp(optarg, "cif") == 0) {
intensity_format = IntensityFormat::mmCIF;
} else if (strcmp(optarg, "txt") == 0) {
intensity_format = IntensityFormat::Text;
} else {
logger.Error("Invalid intensity format: {}", optarg);
exit(EXIT_FAILURE);
}
break;
case OPT_SCALING_ITERATIONS:
scaling_iter = atoi(optarg);
if (scaling_iter <= 0) {
logger.Error("Invalid scaling iteration count: {}", scaling_iter);
exit(EXIT_FAILURE);
}
break;
case OPT_BANDWIDTH:
bandwidth_fwhm = atof(optarg);
if (!(bandwidth_fwhm.value() >= 0.0f)) {
logger.Error("Invalid bandwidth: {}", optarg);
exit(EXIT_FAILURE);
}
break;
default:
print_usage();
exit(EXIT_FAILURE);
}
}
if (optind != argc - 1) {
logger.Error("Input file not specified");
print_usage();
exit(EXIT_FAILURE);
}
input_file = argv[optind];
logger.Verbose(verbose);
// Validate space group number early
const gemmi::SpaceGroup *space_group = nullptr;
if (space_group_number.has_value()) {
space_group = gemmi::find_spacegroup_by_number(space_group_number.value());
if (!space_group) {
logger.Error("Unknown space group number {}", space_group_number.value());
exit(EXIT_FAILURE);
}
logger.Info("Using space group {} (number {})", space_group->hm, space_group_number.value());
}
// 1. Read Input File
JFJochHDF5Reader reader;
try {
reader.ReadFile(input_file);
} catch (const std::exception &e) {
logger.Error("Error reading input file: {}", e.what());
exit(EXIT_FAILURE);
}
const auto dataset = reader.GetDataset();
if (!dataset) {
logger.Error("No experiment dataset found in the input file");
exit(EXIT_FAILURE);
}
if (rotation_indexing_image_count <= 0) {
logger.Error("Invalid number of rotation indexing images: {}", rotation_indexing_image_count);
exit(EXIT_FAILURE);
}
logger.Info("Loaded dataset from {}", input_file);
std::vector<MergedReflection> reference_data;
if (!ref_mtz.empty()) {
try {
const auto reference = LoadReferenceMtz(
ref_mtz, ref_column.empty() ? std::nullopt : std::optional<std::string>(ref_column));
reference_data = reference.reflections;
logger.Info("Loaded {} reference reflections from {} (column {}{}{})",
reference_data.size(), ref_mtz, reference.used_column,
reference.squared ? ", squared to intensity" : "",
reference.default_column ? ", auto-selected" : ", user-specified");
if (reference.d_max > 0.0)
logger.Info("Reference resolution range {:.2f} - {:.2f} A", reference.d_max, reference.d_min);
if (reference.cell.has_value())
logger.Info("Reference unit cell: a={:.3f} b={:.3f} c={:.3f} alpha={:.2f} beta={:.2f} gamma={:.2f}",
reference.cell->a, reference.cell->b, reference.cell->c,
reference.cell->alpha, reference.cell->beta, reference.cell->gamma);
if (!reference.space_group_name.empty())
logger.Info("Reference space group: {} (number {})",
reference.space_group_name, reference.space_group_number.value_or(0));
const auto warning = ReferenceConsistencyWarning(
reference, fixed_reference_unit_cell,
space_group_number.has_value() ? std::optional<int>(static_cast<int>(*space_group_number))
: std::nullopt);
if (!warning.empty())
logger.Warning("{}", warning);
} catch (const std::exception &e) {
logger.Error("Error reading reference MTZ {}: {}", ref_mtz, e.what());
exit(EXIT_FAILURE);
}
}
uint64_t total_images_in_file = reader.GetNumberOfImages();
if (end_image < 0 || end_image > total_images_in_file)
end_image = total_images_in_file;
if (image_stride < 0) {
logger.Error("Image stride cannot be negative");
exit(EXIT_FAILURE);
}
if (image_stride == 0) {
logger.Error("Image stride cannot be zero");
exit(EXIT_FAILURE);
}
int images_to_process = (end_image - start_image) / image_stride;
if (images_to_process <= 0) {
logger.Warning("No images to process (Start: {}, End: {} Stride: {}, Total: {})", start_image, end_image,
image_stride, total_images_in_file);
return 0;
}
logger.Info("Starting analysis of {} images (range {}-{}) using {} threads",
images_to_process, start_image, end_image, nthreads);
// 2. Setup Experiment & Components
DiffractionExperiment experiment(dataset->experiment);
experiment.BitDepthImage(32).Compression(CompressionAlgorithm::BSHUF_LZ4);
experiment.FilePrefix(output_prefix);
experiment.Mode(DetectorMode::Standard); // Ensure full image analysis
experiment.PixelSigned(true);
experiment.OverwriteExistingFiles(true);
experiment.PolarizationFactor(0.99);
experiment.SetFileWriterFormat(FileWriterFormat::NXmxLegacy);
experiment.SpaceGroupNumber(space_group_number);
experiment.ImagesPerTrigger(images_to_process);
experiment.NumTriggers(1);
if (fixed_reference_unit_cell.has_value())
experiment.SetUnitCell(*fixed_reference_unit_cell);
if (max_spot_count_override.has_value()) {
experiment.MaxSpotCount(max_spot_count_override.value());
logger.Info("Max spot count overridden to {}", max_spot_count_override.value());
}
// X-ray bandwidth: CLI overrides the value carried in the dataset; otherwise
// keep whatever the dataset provided (0 / none -> monochromatic).
if (bandwidth_fwhm)
experiment.BandwidthFWHM(bandwidth_fwhm);
if (experiment.GetBandwidthFWHM())
logger.Info("X-ray bandwidth FWHM set to {:.4f}", experiment.GetBandwidthFWHM().value());
// Configure Indexing
IndexingSettings indexing_settings;
indexing_settings.Algorithm(indexing_algorithm);
indexing_settings.RotationIndexing(rotation_indexing);
if (rotation_indexing_range.has_value())
indexing_settings.RotationIndexingMinAngularRange_deg(rotation_indexing_range.value());
indexing_settings.GeomRefinementAlgorithm(refinement_algorithm);
experiment.ImportIndexingSettings(indexing_settings);
ScalingSettings scaling_settings;
scaling_settings.SetPartialityModel(partiality_model);
scaling_settings.Combine3D(combine_3d);
scaling_settings.ScaleFulls(scale_fulls);
if (d_min_scale_merge)
scaling_settings.HighResolutionLimit_A(d_min_scale_merge.value());
scaling_settings.MergeFriedel(!anomalous_mode);
scaling_settings.RefineB(refine_bfactor);
scaling_settings.RefineRotationWedge(refine_wedge);
if (wedge_for_scaling.has_value())
scaling_settings.RotationWedgeForScaling(wedge_for_scaling);
scaling_settings.MinPartiality(min_partiality);
scaling_settings.MinCCForImage(min_image_cc);
if (outlier_reject_nsigma)
scaling_settings.OutlierRejectNsigma(*outlier_reject_nsigma);
scaling_settings.FileFormat(intensity_format);
experiment.ImportScalingSettings(scaling_settings);
// Integration radii: r1 (signal box), r2/r3 (background annulus).
if (integration_radius_arg) {
std::vector<float> rr;
std::stringstream ss(*integration_radius_arg);
std::string tok;
while (std::getline(ss, tok, ',')) {
trim_in_place(tok);
if (!tok.empty()) rr.push_back(std::stof(tok));
}
float r1, r2, r3;
if (rr.size() == 1) { r1 = rr[0]; r2 = r1 + 2.0f; r3 = r1 + 4.0f; }
else if (rr.size() == 3) { r1 = rr[0]; r2 = rr[1]; r3 = rr[2]; }
else { logger.Error("--integration-radius expects r1 or r1,r2,r3"); return 1; }
BraggIntegrationSettings bis = experiment.GetBraggIntegrationSettings();
bis.R1(r1).R2(r2).R3(r3);
experiment.ImportBraggIntegrationSettings(bis);
logger.Info("Integration radii set to r1={:.1f} r2={:.1f} r3={:.1f}", r1, r2, r3);
}
if (integrator_mode) {
BraggIntegrationSettings bis = experiment.GetBraggIntegrationSettings();
bis.Integrator(*integrator_mode);
experiment.ImportBraggIntegrationSettings(bis);
logger.Info("Integrator set to {}", *integrator_mode == IntegratorMode::BoxSum ? "box-sum"
: *integrator_mode == IntegratorMode::ProfileGaussian ? "profile (gaussian)"
: "profile (empirical)");
}
SpotFindingSettings spot_settings;
spot_settings.enable = true;
spot_settings.indexing = true;
spot_settings.high_resolution_limit = d_min_spot_finding;
spot_settings.signal_to_noise_threshold = sigma_spot_finding;
spot_settings.photon_count_threshold = photon_count_threshold_spot_finding;
if (d_min_spot_finding > 0.0f)
spot_settings.high_resolution_limit = d_min_spot_finding;
// Run the shared full-analysis workflow (rotation indexing + scaling/merging live in
// JFJochProcess; the experiment above carries all algorithm settings).
ProcessConfig config;
config.mode = ProcessMode::FullAnalysis;
config.start_image = start_image;
config.end_image = end_image;
config.stride = image_stride;
config.nthreads = nthreads;
config.output_prefix = output_prefix;
config.spot_finding = spot_settings;
config.rotation_indexing = rotation_indexing;
config.two_pass_rotation = two_pass_rotation;
config.reuse_rotation_spots = reuse_rotation_spots;
config.rotation_indexing_image_count = rotation_indexing_image_count;
config.forced_rotation_lattice = forced_rotation_lattice;
config.run_scaling = run_scaling;
config.scaling_iter = scaling_iter;
config.reference_data = reference_data;
config.observation_dump_path = dump_observations;
JFJochProcess process(reader, experiment, dataset->pixel_mask, config);
g_active_process = &process;
std::signal(SIGINT, handle_sigint);
ProcessResult result;
try {
result = process.Run();
} catch (const std::exception &e) {
logger.Error("Processing failed: {}", e.what());
exit(EXIT_FAILURE);
}
g_active_process = nullptr;
if (!result.merge_statistics_text.empty())
std::cout << std::endl << result.merge_statistics_text << std::endl;
// Report statistics
std::cout << fmt::format("Processing time: {:.2f} s", result.processing_time_s) << std::endl;
std::cout << fmt::format("Frame rate: {:.2f} Hz", result.frame_rate_hz) << std::endl;
std::cout << fmt::format("Total throughput:{:.2f} MB/s", result.throughput_MBs) << std::endl;
if (result.indexing_rate.has_value())
std::cout << fmt::format("Indexing rate: {:.2f}%", result.indexing_rate.value() * 100.0) << std::endl;
if (result.consensus_cell.has_value()) {
const auto &c = result.consensus_cell.value();
std::cout << fmt::format("Unit cell: a={:.2f} b={:.2f} c={:.2f} alpha={:.2f} beta={:.2f} gamma={:.2f}",
c.a, c.b, c.c, c.alpha, c.beta, c.gamma) << std::endl;
}
const auto &t = result.mean_processing_time;
std::cout << fmt::format(
"Per-image time (mean; ms): decompress {:.2f} preprocess {:.2f} azint {:.2f} spot finding {:.2f} "
"indexing {:.2f} refinement {:.2f} indexing analysis {:.2f} prediction {:.2f} integration {:.2f} "
"scaling {:.2f} total {:.2f}",
t.compression * 1e3, t.preprocessing * 1e3, t.azint * 1e3, t.spot_finding * 1e3,
t.indexing * 1e3, t.refinement * 1e3, t.indexing_analysis * 1e3, t.bragg_prediction * 1e3,
t.integration * 1e3, t.image_scale * 1e3, t.processing * 1e3) << std::endl;
if (result.cancelled)
logger.Warning("Processing was cancelled after {} images", result.images_processed);
}