From 930cfb0b354924768361867cdc36a267b79401f1 Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Fri, 8 May 2026 12:13:13 +0200 Subject: [PATCH 001/132] HDF5DataFile: Saving images is a configurable option --- writer/FileWriter.cpp | 2 +- writer/HDF5DataFile.cpp | 73 +++++++++++++++++++++-------------------- writer/HDF5DataFile.h | 6 ++-- 3 files changed, 42 insertions(+), 39 deletions(-) diff --git a/writer/FileWriter.cpp b/writer/FileWriter.cpp index f46c4a32..c57119a7 100644 --- a/writer/FileWriter.cpp +++ b/writer/FileWriter.cpp @@ -81,7 +81,7 @@ void FileWriter::WriteHDF5(const DataMessage& msg) { files.resize(file_number + 1); if (!files[file_number]) { - files[file_number] = std::make_unique(start_message, file_number); + files[file_number] = std::make_unique(start_message, file_number, true); if (format == FileWriterFormat::NXmxIntegrated && master_file) files[file_number]->CreateFile(msg, master_file->GetFile()); } diff --git a/writer/HDF5DataFile.cpp b/writer/HDF5DataFile.cpp index 49626a9f..811b4675 100644 --- a/writer/HDF5DataFile.cpp +++ b/writer/HDF5DataFile.cpp @@ -20,7 +20,8 @@ #include "HDF5NXmx.h" #include "../common/time_utc.h" -HDF5DataFile::HDF5DataFile(const StartMessage &msg, uint64_t in_file_number) { +HDF5DataFile::HDF5DataFile(const StartMessage &msg, uint64_t in_file_number, bool write_images) : +write_images(write_images){ file_number = in_file_number; if (msg.overwrite.has_value()) @@ -110,7 +111,6 @@ HDF5DataFile::~HDF5DataFile() { if (data_file) { try { data_set.reset(); - data_set_image_number.reset(); data_file.reset(); if (manage_file) { std::error_code ec; @@ -124,42 +124,44 @@ HDF5DataFile::~HDF5DataFile() { } } -void HDF5DataFile::CreateFile(const DataMessage& msg, std::shared_ptr in_data_file, bool integrated) { - HDF5Dcpl dcpl; - - HDF5DataType data_type(msg.image.GetMode()); - - xpixel = msg.image.GetWidth(); - ypixel = msg.image.GetHeight(); - - dcpl.SetCompression(msg.image.GetCompressionAlgorithm(), JFJochBitShuffleCompressor::DefaultBlockSize); - dcpl.SetChunking( {1, ypixel, xpixel}); - - H5Pset_fill_time(dcpl.GetID(), H5D_FILL_TIME_NEVER); - H5Pset_alloc_time(dcpl.GetID(), H5D_ALLOC_TIME_INCR); - - switch (msg.image.GetMode()) { - case CompressedImageMode::Int8: - dcpl.SetFillValue8(INT8_MIN); - break; - case CompressedImageMode::Int16: - dcpl.SetFillValue16(INT16_MIN); - break; - case CompressedImageMode::Int32: - dcpl.SetFillValue32(INT32_MIN); - break; - default: - break; - } - +void HDF5DataFile::CreateFile(const DataMessage& msg, std::shared_ptr in_data_file) { data_file = in_data_file; HDF5Group(*data_file, "/entry").NXClass("NXentry"); - HDF5Group(*data_file, "/entry/data").NXClass("NXdata"); + if (write_images) { + HDF5Dcpl dcpl; + + HDF5DataType data_type(msg.image.GetMode()); + + xpixel = msg.image.GetWidth(); + ypixel = msg.image.GetHeight(); + + dcpl.SetCompression(msg.image.GetCompressionAlgorithm(), JFJochBitShuffleCompressor::DefaultBlockSize); + dcpl.SetChunking( {1, ypixel, xpixel}); + + H5Pset_fill_time(dcpl.GetID(), H5D_FILL_TIME_NEVER); + H5Pset_alloc_time(dcpl.GetID(), H5D_ALLOC_TIME_INCR); + + switch (msg.image.GetMode()) { + case CompressedImageMode::Int8: + dcpl.SetFillValue8(INT8_MIN); + break; + case CompressedImageMode::Int16: + dcpl.SetFillValue16(INT16_MIN); + break; + case CompressedImageMode::Int32: + dcpl.SetFillValue32(INT32_MIN); + break; + default: + break; + } + + HDF5Group(*data_file, "/entry/data").NXClass("NXdata"); + HDF5DataSpace data_space({1, ypixel, xpixel}, {H5S_UNLIMITED, ypixel, xpixel}); + data_set = std::make_unique(*data_file, "/entry/data/data", data_type, data_space, dcpl); + data_set->SetExtent({images_per_file, ypixel, xpixel}); + } - HDF5DataSpace data_space({1, ypixel, xpixel}, {H5S_UNLIMITED, ypixel, xpixel}); - data_set = std::make_unique(*data_file, "/entry/data/data", data_type, data_space, dcpl); - data_set->SetExtent({images_per_file, ypixel, xpixel}); for (auto &p: plugins) p->OpenFile(*data_file, msg, images_per_file); } @@ -186,7 +188,8 @@ void HDF5DataFile::Write(const DataMessage &msg, uint64_t image_number) { } nimages++; - data_set->WriteDirectChunk(msg.image.GetCompressed(), msg.image.GetCompressedSize(), {image_number, 0, 0}); + if (data_set) + data_set->WriteDirectChunk(msg.image.GetCompressed(), msg.image.GetCompressedSize(), {image_number, 0, 0}); for (auto &p: plugins) p->Write(msg, image_number); diff --git a/writer/HDF5DataFile.h b/writer/HDF5DataFile.h index 1346a333..334208c2 100644 --- a/writer/HDF5DataFile.h +++ b/writer/HDF5DataFile.h @@ -27,7 +27,6 @@ class HDF5DataFile { std::shared_ptr data_file = nullptr; std::unique_ptr data_set = nullptr; - std::unique_ptr data_set_image_number = nullptr; std::vector> plugins; size_t images_per_file; size_t xpixel; @@ -49,14 +48,15 @@ class HDF5DataFile { int64_t file_number; bool new_file = true; bool manage_file = false; + const bool write_images; public: - HDF5DataFile(const StartMessage &msg, uint64_t file_number); + HDF5DataFile(const StartMessage &msg, uint64_t file_number, bool write_images); ~HDF5DataFile(); std::optional Close(); void Write(const DataMessage& msg, uint64_t image_number); size_t GetNumImages() const; - void CreateFile(const DataMessage& msg, std::shared_ptr data_file, bool integrated = false); + void CreateFile(const DataMessage& msg, std::shared_ptr data_file); }; #endif //HDF5DATAFILE_H -- 2.52.0 From 173198be40c8f31d2a72aabd9388e0615772fec9 Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Fri, 8 May 2026 12:41:46 +0200 Subject: [PATCH 002/132] HDF5DataFile: Include File name explicitly + FileWriter: Handle NXmxIntegrated in a smarter way --- common/JFJochMessages.h | 1 + docs/CBOR.md | 1 + frame_serialize/CBORStream2Deserializer.cpp | 2 + frame_serialize/CBORStream2Serializer.cpp | 2 + writer/FileWriter.cpp | 58 ++++++++++++++------- writer/HDF5DataFile.cpp | 19 ++++--- writer/HDF5DataFile.h | 9 ++-- writer/HDF5NXmx.cpp | 8 ++- writer/HDF5NXmx.h | 1 + 9 files changed, 66 insertions(+), 35 deletions(-) diff --git a/common/JFJochMessages.h b/common/JFJochMessages.h index 259da9ff..e5abba6e 100644 --- a/common/JFJochMessages.h +++ b/common/JFJochMessages.h @@ -251,6 +251,7 @@ struct StartMessage { std::optional attenuator_transmission; std::optional write_master_file; + std::optional write_images; nlohmann::json user_data; diff --git a/docs/CBOR.md b/docs/CBOR.md index bb2b18e2..b8a785a6 100644 --- a/docs/CBOR.md +++ b/docs/CBOR.md @@ -96,6 +96,7 @@ There are minor differences at the moment: | | | type "azim": qmin, qmax (numbers) | | | - gain_file_names | Array(string) | Names of JUNGFRAU gain files used for the current detector | | | - write_master_file | bool | With multiple sockets, it selects which socket will provide master file | | +| - write_images | bool | Write images in the HDF5 file (if false, will only write metadata) | | | - data_reduction_factor_serialmx | uint64 | Data reduction factor for serial MX | | | - experiment_group | string | ID of instrument user, e.g., p-group (SLS/SwissFEL) or proposal number | | | - jfjoch_release | string | Jungfraujoch release number | | diff --git a/frame_serialize/CBORStream2Deserializer.cpp b/frame_serialize/CBORStream2Deserializer.cpp index 5013c556..e60188d4 100644 --- a/frame_serialize/CBORStream2Deserializer.cpp +++ b/frame_serialize/CBORStream2Deserializer.cpp @@ -1012,6 +1012,8 @@ namespace { ProcessROIConfig(message, j["roi"]); if (j.contains("gain_file_names")) message.gain_file_names = j["gain_file_names"]; + if (j.contains("write_images")) + message.write_images = j["write_images"]; if (j.contains("write_master_file")) message.write_master_file = j["write_master_file"]; if (j.contains("data_reduction_factor_serialmx")) diff --git a/frame_serialize/CBORStream2Serializer.cpp b/frame_serialize/CBORStream2Serializer.cpp index f2dc1a87..195e5e05 100644 --- a/frame_serialize/CBORStream2Serializer.cpp +++ b/frame_serialize/CBORStream2Serializer.cpp @@ -493,6 +493,8 @@ inline void CBOR_ENC_START_USER_DATA(CborEncoder& encoder, const char* key, j["gain_file_names"] = message.gain_file_names; if (message.write_master_file) j["write_master_file"] = message.write_master_file.value(); + if (message.write_images) + j["write_images"] = message.write_images.value(); if (message.data_reduction_factor_serialmx) j["data_reduction_factor_serialmx"] = message.data_reduction_factor_serialmx.value(); j["experiment_group"] = message.experiment_group; diff --git a/writer/FileWriter.cpp b/writer/FileWriter.cpp index c57119a7..7537bf76 100644 --- a/writer/FileWriter.cpp +++ b/writer/FileWriter.cpp @@ -71,26 +71,37 @@ void FileWriter::WriteHDF5(const DataMessage& msg) { if (msg.number < 0) throw JFJochException(JFJochExceptionCategory::ArrayOutOfBounds, "No support for negative images"); - const uint64_t file_number = (start_message.images_per_file == 0) ? 0 : msg.number / start_message.images_per_file; - const uint64_t image_number = (start_message.images_per_file == 0) ? msg.number : msg.number % start_message.images_per_file; - - if (closed_files.contains(file_number)) - return; - - if (files.size() <= file_number) - files.resize(file_number + 1); - - if (!files[file_number]) { - files[file_number] = std::make_unique(start_message, file_number, true); - if (format == FileWriterFormat::NXmxIntegrated && master_file) - files[file_number]->CreateFile(msg, master_file->GetFile()); - } - files[file_number]->Write(msg, image_number); - - if (files[file_number]->GetNumImages() == start_message.images_per_file) { - CloseFile(file_number); + if (format == FileWriterFormat::NXmxIntegrated && master_file) { + if (files.empty() ) + files.resize(1); + if (!files[0]) { + files[0] = std::make_unique(start_message, + 0, + HDF5Metadata::MasterFileName(start_message), + true); + files[0]->CreateFile(msg, master_file->GetFile()); + } + files[0]->Write(msg, msg.number); } else { - CloseOldFiles(static_cast(msg.number)); + const uint64_t file_number = (start_message.images_per_file == 0) ? 0 : msg.number / start_message.images_per_file; + const uint64_t image_number = (start_message.images_per_file == 0) ? msg.number : msg.number % start_message.images_per_file; + + if (closed_files.contains(file_number)) + return; + + if (files.size() <= file_number) + files.resize(file_number + 1); + + if (!files[file_number]) + files[file_number] = std::make_unique(start_message, file_number, + HDF5Metadata::DataFileName(start_message, file_number)); + files[file_number]->Write(msg, image_number); + + if (files[file_number]->GetNumImages() == start_message.images_per_file) { + CloseFile(file_number); + } else { + CloseOldFiles(static_cast(msg.number)); + } } } @@ -230,6 +241,15 @@ void FileWriter::WriteHDF5(const CompressedImage &msg) { void FileWriter::WriteHDF5(const EndMessage &msg) { if (master_file) { std::lock_guard lock(hdf5_mutex); + + if (format == FileWriterFormat::NXmxIntegrated) { + try { + CloseFile(0); + } catch (...) { + throw; + } + } + master_file->Finalize(msg); } } diff --git a/writer/HDF5DataFile.cpp b/writer/HDF5DataFile.cpp index 811b4675..bee8b278 100644 --- a/writer/HDF5DataFile.cpp +++ b/writer/HDF5DataFile.cpp @@ -20,10 +20,10 @@ #include "HDF5NXmx.h" #include "../common/time_utc.h" -HDF5DataFile::HDF5DataFile(const StartMessage &msg, uint64_t in_file_number, bool write_images) : -write_images(write_images){ - file_number = in_file_number; - +HDF5DataFile::HDF5DataFile(const StartMessage &msg, uint64_t file_number, const std::string &filename) : +filename(filename), +file_number(file_number), +write_images(msg.write_images.value_or(true)) { if (msg.overwrite.has_value()) overwrite = msg.overwrite.value(); @@ -31,9 +31,14 @@ write_images(write_images){ ypixel = 0; max_image_number = 0; nimages = 0; - filename = HDF5Metadata::DataFileName(msg, file_number); - image_low = file_number * msg.images_per_file; - images_per_file = msg.images_per_file; + + if (msg.file_format == FileWriterFormat::NXmxIntegrated) { + image_low = 0; + images_per_file = msg.number_of_images; + } else { + image_low = file_number * msg.images_per_file; + images_per_file = msg.images_per_file; + } timestamp.reserve(images_per_file); exptime.reserve(images_per_file); diff --git a/writer/HDF5DataFile.h b/writer/HDF5DataFile.h index 334208c2..aac77fb0 100644 --- a/writer/HDF5DataFile.h +++ b/writer/HDF5DataFile.h @@ -22,7 +22,10 @@ struct HDF5DataFileStatistics { }; class HDF5DataFile { - std::string filename; + const std::string filename; + const uint64_t file_number; + const bool write_images; + std::string tmp_filename; std::shared_ptr data_file = nullptr; @@ -45,12 +48,10 @@ class HDF5DataFile { bool closed = false; bool overwrite = false; - int64_t file_number; bool new_file = true; bool manage_file = false; - const bool write_images; public: - HDF5DataFile(const StartMessage &msg, uint64_t file_number, bool write_images); + HDF5DataFile(const StartMessage &msg, uint64_t file_number, const std::string &filename); ~HDF5DataFile(); std::optional Close(); void Write(const DataMessage& msg, uint64_t image_number); diff --git a/writer/HDF5NXmx.cpp b/writer/HDF5NXmx.cpp index b368808b..29f2a757 100644 --- a/writer/HDF5NXmx.cpp +++ b/writer/HDF5NXmx.cpp @@ -11,15 +11,13 @@ #include "../common/time_utc.h" #include "gemmi/symmetry.hpp" -namespace { - std::string GenFilename(const StartMessage &start) { - return fmt::format("{:s}_master.h5", start.file_prefix); - } +std::string HDF5Metadata::MasterFileName(const StartMessage &start) { + return fmt::format("{:s}_master.h5", start.file_prefix); } NXmx::NXmx(const StartMessage &start) : start_message(start), - filename(GenFilename(start)) { + filename(HDF5Metadata::MasterFileName(start)) { uint64_t tmp_suffix; try { if (!start.arm_date.empty()) diff --git a/writer/HDF5NXmx.h b/writer/HDF5NXmx.h index a02bfe2b..8bc0b36d 100644 --- a/writer/HDF5NXmx.h +++ b/writer/HDF5NXmx.h @@ -9,6 +9,7 @@ #include "HDF5Objects.h" namespace HDF5Metadata { + std::string MasterFileName(const StartMessage &msg); std::string DataFileName(const StartMessage &msg, int64_t file_number); } -- 2.52.0 From 6c8c953c92156c663060485db5204f8032eb3e5d Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Fri, 8 May 2026 13:31:03 +0200 Subject: [PATCH 003/132] jfjoch_process: Cleanup help + by default save HDF5, but without analysis results --- tools/jfjoch_process.cpp | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/tools/jfjoch_process.cpp b/tools/jfjoch_process.cpp index 01ee4432..a54d93a5 100644 --- a/tools/jfjoch_process.cpp +++ b/tools/jfjoch_process.cpp @@ -37,21 +37,30 @@ void print_usage(Logger &logger) { logger.Info(" -s Start image number (default: 0)"); logger.Info(" -e End image number (default: all)"); logger.Info(" -v Verbose output"); - logger.Info(" -R[num] Rotation indexing (optional: min angular range deg)"); - logger.Info(" -F Use FFT indexing algorithm (shortcut for -XFFT)"); - logger.Info(" -X Indexing algorithm (FFBIDX|FFT|FFTW|Auto|None)"); - logger.Info(" -x No least-square beam center refinement"); + logger.Info(" -W Include images in the written HDF5 file (otherwise only analysis results are saved)"); + logger.Info(" -U Unmerged intensities are written to a text file"); + logger.Info(""); + + logger.Info(" Spot finding"); + logger.Info(" -T Noise sigma level for spot finding (default: 3.0)"); + logger.Info(" -t Photon count threshold for spot finding (default: 10)"); logger.Info(" -d High resolution limit for spot finding (default: 1.5)"); + logger.Info(" -c Max spot count (default: 250)"); + logger.Info(""); + logger.Info(" Indexing"); + logger.Info(" -R[num] Rotation indexing (optional: min angular range deg)"); + logger.Info(" -X Indexing algorithm (FFBIDX|FFT|FFTW|Auto|None)"); + logger.Info(" -F Use FFT indexing algorithm (shortcut for -XFFT)"); + logger.Info(" -S Space group number - used for both indexing and scaling"); + logger.Info(" -C Fix reference unit cell: -C\"a,b,c,alpha,beta,gamma\" (comma-separated, no spaces; quotes optional)"); + logger.Info(" -x No least-square beam center refinement"); + + logger.Info(""); + logger.Info(" Scaling and merging"); logger.Info(" -D High resolution limit for scaling/merging (default: 0.0; no limit)"); - logger.Info(" -S Space group number"); logger.Info(" -M Scale and merge (refine mosaicity) and write scaled.hkl + image.dat"); logger.Info(" -P Partiality refinement fixed|rot|unity (default: fixed)"); logger.Info(" -A Anomalous mode (don't merge Friedel pairs)"); - logger.Info(" -C Fix reference unit cell: -C\"a,b,c,alpha,beta,gamma\" (comma-separated, no spaces; quotes optional)"); - logger.Info(" -c Max spot count (default: 250)"); - logger.Info(" -W HDF5 file with analysis results is written"); - logger.Info(" -T Noise sigma level for spot finding (default: 3.0)"); - logger.Info(" -t Photon count threshold for spot finding (default: 10)"); } void trim_in_place(std::string& t) { @@ -364,10 +373,12 @@ int main(int argc, char **argv) { start_message.pixel_mask["default"] = pixel_mask.GetMask(experiment); start_message.max_spot_count = experiment.GetMaxSpotCount(); + start_message.write_images = write_output; + start_message.file_format = FileWriterFormat::NXmxIntegrated; std::unique_ptr writer; try { - if (write_output) + if (!output_prefix.empty()) writer = std::make_unique(start_message); } catch (const std::exception &e) { logger.Error("Failed to initialize file writer: {}", e.what()); @@ -536,7 +547,6 @@ int main(int argc, char **argv) { logger.Info("Rotation Indexing found lattice"); } - // --- Optional: run scaling (mosaicity refinement) on accumulated reflections --- // --- Optional: run scaling (mosaicity refinement) on accumulated reflections --- if (run_scaling) { logger.Info("Running scaling (mosaicity refinement) ..."); -- 2.52.0 From cd5d97aa55522d4c189a37e23cc4c83d102abf36 Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Fri, 8 May 2026 13:34:12 +0200 Subject: [PATCH 004/132] FileWriter: Fix --- writer/FileWriter.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/writer/FileWriter.cpp b/writer/FileWriter.cpp index 7537bf76..77a6aa70 100644 --- a/writer/FileWriter.cpp +++ b/writer/FileWriter.cpp @@ -75,10 +75,7 @@ void FileWriter::WriteHDF5(const DataMessage& msg) { if (files.empty() ) files.resize(1); if (!files[0]) { - files[0] = std::make_unique(start_message, - 0, - HDF5Metadata::MasterFileName(start_message), - true); + files[0] = std::make_unique(start_message, 0, HDF5Metadata::MasterFileName(start_message)); files[0]->CreateFile(msg, master_file->GetFile()); } files[0]->Write(msg, msg.number); -- 2.52.0 From f5176b56a91c2010fb1af6639728662789b665b2 Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Fri, 8 May 2026 17:45:42 +0200 Subject: [PATCH 005/132] DiffractionExperiment: Max image number is 2 million + 1; it is unsafe to collect more --- common/Definitions.h | 2 ++ common/DiffractionExperiment.cpp | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/common/Definitions.h b/common/Definitions.h index 7ab85585..8fd9eb83 100644 --- a/common/Definitions.h +++ b/common/Definitions.h @@ -17,6 +17,8 @@ constexpr size_t CONVERTED_MODULE_COLS = 1030; constexpr size_t CONVERTED_MODULE_SIZE = CONVERTED_MODULE_LINES * CONVERTED_MODULE_COLS; constexpr size_t JUNGFRAU_PACKET_SIZE_BYTES = 8192; +constexpr int MAX_IMAGE_NUMBER = 2*1024*1024; + constexpr std::chrono::nanoseconds MIN_COUNT_TIME = std::chrono::microseconds(3); constexpr std::chrono::nanoseconds MIN_STORAGE_CELL_DELAY = std::chrono::nanoseconds(2100); constexpr std::chrono::nanoseconds MIN_FRAME_TIME_JUNGFRAU_HALF_SPEED = std::chrono::microseconds(1000); diff --git a/common/DiffractionExperiment.cpp b/common/DiffractionExperiment.cpp index d9cf91be..e3b6cd5b 100644 --- a/common/DiffractionExperiment.cpp +++ b/common/DiffractionExperiment.cpp @@ -1060,6 +1060,12 @@ DiffractionExperiment &DiffractionExperiment::ImportDatasetSettings(const Datase + std::to_string(MAX_FRAMES)); } + if (GetImageNum() > MAX_IMAGE_NUMBER) { + dataset = tmp; + throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, + "Number of images cannot exceed " + std::to_string(MAX_IMAGE_NUMBER)); + } + return *this; } -- 2.52.0 From 27bcb19328b36f99dcc8bc1b4db3c270232d38fc Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Fri, 8 May 2026 17:47:01 +0200 Subject: [PATCH 006/132] ImagePusher: Serializer defined inside ImagePusher class, not in subclasses --- image_pusher/HDF5FilePusher.cpp | 9 --------- image_pusher/ImagePusher.cpp | 4 ++++ image_pusher/ImagePusher.h | 5 +++++ image_pusher/TCPStreamPusher.cpp | 4 +--- image_pusher/TCPStreamPusher.h | 3 --- image_pusher/ZMQStream2Pusher.cpp | 2 -- image_pusher/ZMQStream2Pusher.h | 4 ---- 7 files changed, 10 insertions(+), 21 deletions(-) diff --git a/image_pusher/HDF5FilePusher.cpp b/image_pusher/HDF5FilePusher.cpp index d6c00c74..a5f9138c 100644 --- a/image_pusher/HDF5FilePusher.cpp +++ b/image_pusher/HDF5FilePusher.cpp @@ -27,12 +27,6 @@ void HDF5FilePusher::StartDataCollection(StartMessage &message) { StartMessage repub_message = message; repub_message.writer_notification_zmq_addr = ""; - size_t approx_size = 1024 * 1024; - for (const auto &[key, value] : repub_message.pixel_mask) - approx_size += value.size() * sizeof(uint32_t); - - std::vector serialization_buffer(approx_size); - CBORStream2Serializer serializer(serialization_buffer.data(), serialization_buffer.size()); serializer.SerializeSequenceStart(repub_message); repub_active = repub_socket->Send(serialization_buffer.data(), serializer.GetBufferSize(), true); @@ -59,9 +53,6 @@ bool HDF5FilePusher::EndDataCollection(const EndMessage &message) { if (repub_socket) { try { - size_t approx_size = 1024 * 1024; - std::vector serialization_buffer(approx_size); - CBORStream2Serializer serializer(serialization_buffer.data(), serialization_buffer.size()); serializer.SerializeSequenceEnd(message); if (repub_active) diff --git a/image_pusher/ImagePusher.cpp b/image_pusher/ImagePusher.cpp index 8f5ca4d3..cca67c54 100644 --- a/image_pusher/ImagePusher.cpp +++ b/image_pusher/ImagePusher.cpp @@ -13,6 +13,10 @@ void PrepareCBORImage(DataMessage& message, experiment.GetCompressionAlgorithm()); } +ImagePusher::ImagePusher() +: serialization_buffer(MESSAGE_SIZE_FOR_START_END), +serializer(serialization_buffer.data(), serialization_buffer.size()) {} + std::string ImagePusher::Finalize() { return ""; } diff --git a/image_pusher/ImagePusher.h b/image_pusher/ImagePusher.h index cb5e64d1..ac41375b 100644 --- a/image_pusher/ImagePusher.h +++ b/image_pusher/ImagePusher.h @@ -10,6 +10,7 @@ #include "../common/JFJochMessages.h" #include "../common/ZeroCopyReturnValue.h" #include "../common/Logger.h" +#include "../frame_serialize/CBORStream2Serializer.h" enum class ImagePusherType {HDF5, CBOR, TCP, ZMQ, Test, None}; @@ -33,6 +34,10 @@ void PrepareCBORImage(DataMessage& message, void *image, size_t image_size); class ImagePusher { +protected: + std::vector serialization_buffer; + CBORStream2Serializer serializer; + ImagePusher(); public: virtual void StartDataCollection(StartMessage& message) = 0; virtual bool EndDataCollection(const EndMessage& message) = 0; // Non-blocking diff --git a/image_pusher/TCPStreamPusher.cpp b/image_pusher/TCPStreamPusher.cpp index 0f777b68..d3184d2c 100644 --- a/image_pusher/TCPStreamPusher.cpp +++ b/image_pusher/TCPStreamPusher.cpp @@ -108,9 +108,7 @@ void TCPStreamPusher::CloseFd(std::atomic& fd) { TCPStreamPusher::TCPStreamPusher(const std::string& addr, size_t in_max_connections, std::optional in_send_buffer_size) - : serialization_buffer(256 * 1024 * 1024), - serializer(serialization_buffer.data(), serialization_buffer.size()), - endpoint(addr), + : endpoint(addr), max_connections(in_max_connections), send_buffer_size(in_send_buffer_size) { if (endpoint.empty()) diff --git a/image_pusher/TCPStreamPusher.h b/image_pusher/TCPStreamPusher.h index 69b4d59c..1b6eb0ef 100644 --- a/image_pusher/TCPStreamPusher.h +++ b/image_pusher/TCPStreamPusher.h @@ -97,9 +97,6 @@ class TCPStreamPusher : public ImagePusher { std::chrono::steady_clock::time_point last_keepalive_recv{}; }; - std::vector serialization_buffer; - CBORStream2Serializer serializer; - std::string endpoint; size_t max_connections; std::optional send_buffer_size; diff --git a/image_pusher/ZMQStream2Pusher.cpp b/image_pusher/ZMQStream2Pusher.cpp index ab4ce8f7..f115ce94 100644 --- a/image_pusher/ZMQStream2Pusher.cpp +++ b/image_pusher/ZMQStream2Pusher.cpp @@ -7,8 +7,6 @@ ZMQStream2Pusher::ZMQStream2Pusher(const std::vector &addr, std::optional send_buffer_high_watermark, std::optional send_buffer_size) - : serialization_buffer(256*1024*1024), - serializer(serialization_buffer.data(), serialization_buffer.size()) { if (addr.empty()) throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "No writer ZMQ address provided"); diff --git a/image_pusher/ZMQStream2Pusher.h b/image_pusher/ZMQStream2Pusher.h index 5f922363..aa5dbd36 100644 --- a/image_pusher/ZMQStream2Pusher.h +++ b/image_pusher/ZMQStream2Pusher.h @@ -9,12 +9,8 @@ #include "../preview/PreviewCounter.h" #include "ZMQWriterNotificationPuller.h" #include "ZMQStream2PusherSocket.h" -#include "../frame_serialize/CBORStream2Serializer.h" class ZMQStream2Pusher : public ImagePusher { - std::vector serialization_buffer; - CBORStream2Serializer serializer; - std::vector> socket; std::unique_ptr writer_notification_socket; -- 2.52.0 From 025c9b3aee266454c39a0ffb5f3826690c97e1b5 Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Fri, 8 May 2026 19:19:34 +0200 Subject: [PATCH 007/132] FileWriter: Master HDF5 contains scalar per-image processing results, reader is also made to prioritize reading from master file --- common/JFJochMessages.h | 34 +- common/ScanResult.h | 5 + common/ScanResultGenerator.cpp | 79 ++- common/ScanResultGenerator.h | 2 + docs/CBOR.md | 279 +++++---- frame_serialize/CBORStream2Deserializer.cpp | 74 ++- frame_serialize/CBORStream2Serializer.cpp | 39 +- frame_serialize/CborUtil.h | 1 + image_pusher/CBORFilePusher.cpp | 4 +- reader/JFJochHDF5Reader.cpp | 599 ++++++++++++++------ reader/JFJochHDF5Reader.h | 7 + reader/JFJochHttpReader.cpp | 1 + reader/JFJochReaderDataset.h | 1 + receiver/JFJochReceiver.cpp | 2 + tests/HDF5WritingTest.cpp | 4 +- tests/JFJochReaderTest.cpp | 274 +++++++++ writer/HDF5DataFilePluginDetector.cpp | 6 - writer/HDF5DataFilePluginDetector.h | 2 - writer/HDF5DataFilePluginImageStats.cpp | 5 + writer/HDF5DataFilePluginImageStats.h | 1 + writer/HDF5DataFilePluginMX.cpp | 4 +- writer/HDF5NXmx.cpp | 213 ++++--- writer/HDF5NXmx.h | 10 +- 23 files changed, 1261 insertions(+), 385 deletions(-) diff --git a/common/JFJochMessages.h b/common/JFJochMessages.h index e5abba6e..da24ebe8 100644 --- a/common/JFJochMessages.h +++ b/common/JFJochMessages.h @@ -24,7 +24,7 @@ #include "XrayFluorescenceSpectrum.h" #include "../symmetry/gemmi/symmetry.hpp" -constexpr const uint64_t user_data_release = 5; +constexpr const uint64_t user_data_release = 6; constexpr const uint64_t user_data_magic_number = 0x52320000UL | user_data_release; enum class CBORImageType {START, END, IMAGE, CALIBRATION, METADATA, NONE}; @@ -168,6 +168,15 @@ struct DataMessage { std::optional lattice_type; }; +struct HDF5DataSourceMessage { + std::string filename; + std::string dataset = "/entry/data/data"; + + uint64_t source_first_image = 0; + uint64_t virtual_first_image = 0; + uint64_t image_count = 0; +}; + struct StartMessage { float detector_distance; float beam_center_x; @@ -283,6 +292,8 @@ struct StartMessage { XrayFluorescenceSpectrum fluorescence_spectrum; std::optional detect_ice_rings; + + std::vector hdf5_source_data; }; struct EndMessage { @@ -306,7 +317,26 @@ struct EndMessage { std::optional rotation_lattice_type; std::optional rotation_lattice; - std::vector scale_factor; + // Vectors with end result: + std::vector data_collection_efficiency; + std::vector spot_count; + std::vector spot_count_ice_ring; + std::vector spot_count_low_res; + std::vector spot_count_indexed; + std::vector image_indexed; + std::vector v_bkg_estimate; + std::vector profile_radius; + std::vector mosaicity; + std::vector bFactor; + std::vector resolution_estimate; + std::vector min_viable_pixel_value; + std::vector max_viable_pixel_value; + std::vector saturated_pixel_count; + std::vector error_pixel_count; + std::vector image_scale_factor; + std::vector integrated_reflections; + std::vector niggli_class; + std::vector pixel_sum; }; struct MetadataMessage { diff --git a/common/ScanResult.h b/common/ScanResult.h index 62453123..28686df9 100644 --- a/common/ScanResult.h +++ b/common/ScanResult.h @@ -22,6 +22,7 @@ struct ScanResultElem { std::optional angle_deg; std::optional pixel_sum; + std::optional min_viable_pixel; std::optional max_viable_pixel; std::optional err_pixels; std::optional sat_pixels; @@ -36,6 +37,10 @@ struct ScanResultElem { std::optional res; std::optional uc; std::optional xfel_pulse_id; + std::optional mosaicity; + std::optional image_scale_factor; + std::optional niggli_class; + std::optional integrated_reflections; }; struct ScanResult { diff --git a/common/ScanResultGenerator.cpp b/common/ScanResultGenerator.cpp index 5c2c9df9..202bf068 100644 --- a/common/ScanResultGenerator.cpp +++ b/common/ScanResultGenerator.cpp @@ -1,8 +1,18 @@ // SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute // SPDX-License-Identifier: GPL-3.0-only +#include +#include + #include "ScanResultGenerator.h" +namespace { + template + T value_or_zero(const std::optional& v) { + return v.value_or(static_cast(0)); + } +} + ScanResultGenerator::ScanResultGenerator(const DiffractionExperiment &experiment) { grid_scan = experiment.GetGridScan(); goniometer_axis = experiment.GetGoniometer(); @@ -21,7 +31,7 @@ void ScanResultGenerator::Add(const DataMessage &message) { if (grid_scan) image_number = grid_scan->Rearrange(image_number); - if (image_number < v.size()) { + if (image_number >= 0 && static_cast(image_number) < v.size()) { if (grid_scan) { v[image_number].x = grid_scan->GetElementPosX_step(message.number); v[image_number].y = grid_scan->GetElementPosY_step(message.number); @@ -36,16 +46,22 @@ void ScanResultGenerator::Add(const DataMessage &message) { v[image_number].spot_count = message.spot_count; v[image_number].indexing_solution = message.indexing_result; v[image_number].profile_radius = message.profile_radius; + v[image_number].mosaicity = message.mosaicity_deg; v[image_number].b_factor = message.b_factor; v[image_number].uc = message.indexing_unit_cell; v[image_number].xfel_pulse_id = message.xfel_pulse_id; v[image_number].err_pixels = message.error_pixel_count; + v[image_number].min_viable_pixel = message.min_viable_pixel_value; v[image_number].max_viable_pixel = message.max_viable_pixel_value; v[image_number].sat_pixels = message.saturated_pixel_count; v[image_number].spot_count_ice = message.spot_count_ice_rings; v[image_number].spot_count_low_res = message.spot_count_low_res; v[image_number].spot_count_indexed = message.spot_count_indexed; v[image_number].res = message.resolution_estimate; + v[image_number].integrated_reflections = message.integrated_reflections; + + if (message.lattice_type) + v[image_number].niggli_class = message.lattice_type->niggli_class; } } @@ -56,3 +72,64 @@ ScanResult ScanResultGenerator::GetResult() const { ret.images = v; return ret; } + +void ScanResultGenerator::FillEndMessage(EndMessage &message) const { + std::unique_lock ul(m); + + size_t n = 0; + for (const auto &e: v) { + if (e.number >= 0) + n = std::max(n, static_cast(e.number) + 1); + } + if (n == 0) + return; + + message.data_collection_efficiency.resize(n); + message.spot_count.resize(n); + message.spot_count_ice_ring.resize(n); + message.spot_count_low_res.resize(n); + message.spot_count_indexed.resize(n); + message.image_indexed.resize(n); + message.v_bkg_estimate.resize(n); + message.profile_radius.resize(n); + message.mosaicity.resize(n); + message.bFactor.resize(n); + message.resolution_estimate.resize(n); + message.min_viable_pixel_value.resize(n); + message.max_viable_pixel_value.resize(n); + message.saturated_pixel_count.resize(n); + message.error_pixel_count.resize(n); + message.image_scale_factor.resize(n); + message.integrated_reflections.resize(n); + message.niggli_class.resize(n); + message.pixel_sum.resize(n); + + for (const auto &e: v) { + if (e.number < 0) + continue; + + const auto number = static_cast(e.number); + if (number >= n) + continue; + + message.data_collection_efficiency[number] = e.collection_efficiency; + message.spot_count[number] = static_cast(value_or_zero(e.spot_count)); + message.spot_count_ice_ring[number] = static_cast(value_or_zero(e.spot_count_ice)); + message.spot_count_low_res[number] = static_cast(value_or_zero(e.spot_count_low_res)); + message.spot_count_indexed[number] = static_cast(value_or_zero(e.spot_count_indexed)); + message.image_indexed[number] = static_cast(e.indexing_solution.value_or(0)); + message.v_bkg_estimate[number] = e.bkg.value_or(NAN); + message.profile_radius[number] = e.profile_radius.value_or(NAN); + message.mosaicity[number] = e.mosaicity.value_or(NAN); + message.bFactor[number] = e.b_factor.value_or(NAN); + message.resolution_estimate[number] = e.res.value_or(NAN); + message.min_viable_pixel_value[number] = value_or_zero(e.min_viable_pixel); + message.max_viable_pixel_value[number] = value_or_zero(e.max_viable_pixel); + message.saturated_pixel_count[number] = static_cast(value_or_zero(e.sat_pixels)); + message.error_pixel_count[number] = static_cast(value_or_zero(e.err_pixels)); + message.image_scale_factor[number] = e.image_scale_factor.value_or(NAN); + message.integrated_reflections[number] = static_cast(value_or_zero(e.integrated_reflections)); + message.niggli_class[number] = static_cast(value_or_zero(e.niggli_class)); + message.pixel_sum[number] = static_cast(value_or_zero(e.pixel_sum)); + } +} diff --git a/common/ScanResultGenerator.h b/common/ScanResultGenerator.h index 76be34e0..c7e59c99 100644 --- a/common/ScanResultGenerator.h +++ b/common/ScanResultGenerator.h @@ -25,6 +25,8 @@ public: explicit ScanResultGenerator(const DiffractionExperiment& experiment); void Add(const DataMessage& message); ScanResult GetResult() const; + + void FillEndMessage(EndMessage &message) const; }; diff --git a/docs/CBOR.md b/docs/CBOR.md index b8a785a6..cafb7292 100644 --- a/docs/CBOR.md +++ b/docs/CBOR.md @@ -12,106 +12,106 @@ There are minor differences at the moment: ## Start message -| Field name | Type | Description | Present in DECTRIS format | -|----------------------------------|----------------------|-----------------------------------------------------------------------------------------------------------------------------------|:-------------------------:| -| type | String | value "start" | X | -| magic_number | uint64 | Number used to describe version of the Jungfraujoch data interface - to allow to detect inconsistency between sender and receiver | | -| detector_distance | float | Detector distance \[m\] | | -| detector_translation | Array(float) | Detector translation vector \[m\] | X | -| beam_center_x | float | Beam center in X direction \[pixels\] | X | -| beam_center_y | float | Beam center in Y direction \[pixels\] | X | -| countrate_correction_enabled | bool | Countrate correction enabled | X | -| flatfield_enabled | bool | Flatfield enabled | X | -| number_of_images | uint64 | Number of images in the series | X | -| image_size_x | uint64 | Image width \[pixels\] | X | -| image_size_y | uint64 | Image height \[pixels\] | X | -| incident_energy | float | X-ray energy \[eV\] | X | -| incident_wavelength | float | X-ray wavelength \[Angstrom\] | X | -| frame_time | float | Frame time, if multiple frames per trigger \[s\] | X | -| count_time | float | Exposure time \[s\] | X | -| saturation_value | int64 | Maximum valid sample value | X | -| error_value | int64 (optional) | Value used in images to describe pixels that are in error state or missing | | -| pixel_size_x | float | Pixel width \[m\] | X | -| pixel_size_y | float | Pixel height \[m\] | X | -| sensor_thickness | float | Sensor thickness \[m\] | X | -| sensor_material | string | Sensor material | X | -| arm_date | date | Approximate date of arming | X | -| pixel_mask_enabled | bool | Pixel mask applied on images | X | -| detector_description | string | Name of the detector | X | -| detector_serial_number | string | Detector serial number | X | -| series_unique_id | string | Unique text ID of the series (run_name parameter) | X | -| series_id | uint64 | Unique numeric ID of the series (run_number parameter) | X | -| fluorescence | object (optional) | X-ray fluorescence spectrum collected at start | | -| - energy | Array(float) | Energy of measuring point \[eV\] | | -| - data | Array(float) | Fluorescence scan result `data` \[arbitrary units\]; must be strictly the same length as energy | | -| goniometer | Map | Definition of rotation axis (optional) | X | -| - `AXIS` | string | Rotation axis name (e.g. omega) - only one axis is supported in Jungfraujoch | X | -| - - increment | float | Rotation axis increment (per image) in degree \[deg\] | X | -| - - start | float | Rotation axis start angle \[deg\] | X | -| - - axis | Array(float) | Vector for the rotation axis | | -| - - helical_step | Array(float) | Translation for helical scan for 1 image \[m\] | | -| - - screening_wedge | Array(float) | Wedge for screening \[deg\] (increment would correspond to difference between screening points) | | -| grid_scan | object | Grid scan definition (optional and exclusive with rotation axis) | | -| - n_fast | uint64 | Number of elements along fast axis | | -| - n_slow | uint64 | Number of elements along slow axis | | -| - step_x_axis | float | Step along X axis, can be negative \[m\] | | -| - step_y_axis | float | Step along Y axis, can be negative \[m\] | | -| - snake_scan | bool | Snake scan (rows alternate direction) | | -| - vertical_scan | bool | Vertical scan (enabled: fast direction = Y, disabled: fast direction = X) | | -| jungfrau_conversion_enabled | bool (optional) | Applying JUNGFRAU pixel conversion (to photons or keV) | | -| jungfrau_conversion_factor | float (optional) | Factor used for JUNGFRAU conversion \[eV\] | | -| geometry_transformation_enabled | bool (optional) | Transformation from detector module geometry (512x1024) to full detector geometry | | -| pixel_mask | Map(string -> Image) | Pixel mask - multiple in case of storage cells | X | -| channels | Array(string) | List of image channels | X | -| max_spot_count | uint64 | Maximum number of spots identified in spot finding | | -| storage_cell_number | uint64 (optional) | Number of storage cells used by JUNGFRAU | | -| storage_cell_delay | Rational | Delay of storage cells in JUNGFRAU | | -| threshold_energy | float | Threshold energy for EIGER detector \[eV\] | | -| image_dtype | string | Pixel bit type (e.g. uint16) | X | -| unit_cell | object (optional) | Unit cell of the system: a, b, c \[angstrom\] and alpha, beta, gamma \[degree\] | | -| az_int_q_bin_count | uint64 | Number of azimuthal integration bins in the radial direction | | -| az_int_phi_bin_count | uint64 | Number of azimuthal integration bins in the phi angle direction | | -| az_int_bin_to_q | Array(float) | Q value for each azimuthal integration bin \[angstrom^-1\] | | -| az_int_bin_to_two_theta | Array(float) | Two theta angle value for each azimuthal integration bin \[deg\] | | -| az_int_bin_to_phi | Array(float) | Phi value for each azimuthal integration bin \[deg\] | | -| summation | uint64 | Factor of frame summation | | -| user_data | string | JSON serialized to string that can contain the following fields (all fields are optional): | X | -| - file_prefix | string | File prefix | | -| - images_per_file | uint64 | Number of images written per file | | -| - images_per_trigger | uint64 | Number of images collected per trigger | | -| - source_name | string | Facility name | | -| - source_type | string | Type of X-ray source (use NXsource/type values, for example "Synchrotron X-ray Source" or "Free-Electron Laser") | | -| - instrument_name | string | Instrument name | | -| - sample_name | string | Name of the sample | | -| - user | any valid JSON | Value of header_appendix provided at collection start to Jungfraujoch | | -| - attenuator_transmission | float | Attenuator transmission \[\] | | -| - total_flux | float | Total flux \[ph/s\] | | -| - space_group_number | uint64 | Space group number | | -| - summation_mode | string | Summation mode (internal\|fpga\|cpu) | | -| - overwrite | bool | Overwrite existing HDF5 files | | -| - file_format | int | File writer format: 0 = no master file, 1 = soft links, 2 = virtual dataset, 3 = CBF, 4 = TIFF | | -| - roi | Array(object) | ROI configurations; each element is one of: | | -| | | type "box": xmin, xmax, ymin, ymax (numbers) | | -| | | type "circle": r, x, y (numbers) | | -| | | type "azim": qmin, qmax (numbers) | | -| - gain_file_names | Array(string) | Names of JUNGFRAU gain files used for the current detector | | -| - write_master_file | bool | With multiple sockets, it selects which socket will provide master file | | -| - write_images | bool | Write images in the HDF5 file (if false, will only write metadata) | | -| - data_reduction_factor_serialmx | uint64 | Data reduction factor for serial MX | | -| - experiment_group | string | ID of instrument user, e.g., p-group (SLS/SwissFEL) or proposal number | | -| - jfjoch_release | string | Jungfraujoch release number | | -| - socket_number | uint64 | Number of ZeroMQ socket (on `jfjoch_broker` side) used for transmission | | -| - bit_depth_readout | uint64 | Bit depth of the detector readout | | -| - writer_notification_zmq_addr | string | ZeroMQ address to inform `jfjoch_broker` about writers that finished operation | | -| - xfel_pulse_id | uint64 | Pulse IDs are recorded for images | | -| - ring_current_mA | float | Ring current at the start of the measurement | | -| - sample_temperature_K | float | Sample temperature \[K\] | | -| - detect_ice_rings | bool | Ice ring detection feature is enabled | | -| - indexing_algorithm | string | Indexing algorithm used on-the-fly; allowed values: ffbidx, fft, fftw, none | | -| - geom_refinement_algorithm | string | Post-indexing detector geometry refinement algorithm; allowed values: none, beam_center, beam_center_tetragonal | | -| - poni_rot1 | float | Tilt of the detector rot1 according to PyFAI PONI convention \[rad\] | | -| - poni_rot2 | float | Tilt of the detector rot2 according to PyFAI PONI convention \[rad\] | | -| - poni_rot3 | float | Tilt of the detector rot3 according to PyFAI PONI convention \[rad\] | | +| Field name | Type | Description | Present in DECTRIS format | +|----------------------------------|----------------------|------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------:| +| type | String | value "start" | X | +| magic_number | uint64 | Number used to describe version of the Jungfraujoch data interface - to allow to detect inconsistency between sender and receiver | | +| detector_distance | float | Detector distance \[m\] | | +| detector_translation | Array(float) | Detector translation vector \[m\] | X | +| beam_center_x | float | Beam center in X direction \[pixels\] | X | +| beam_center_y | float | Beam center in Y direction \[pixels\] | X | +| countrate_correction_enabled | bool | Countrate correction enabled | X | +| flatfield_enabled | bool | Flatfield enabled | X | +| number_of_images | uint64 | Number of images in the series | X | +| image_size_x | uint64 | Image width \[pixels\] | X | +| image_size_y | uint64 | Image height \[pixels\] | X | +| incident_energy | float | X-ray energy \[eV\] | X | +| incident_wavelength | float | X-ray wavelength \[Angstrom\] | X | +| frame_time | float | Frame time, if multiple frames per trigger \[s\] | X | +| count_time | float | Exposure time \[s\] | X | +| saturation_value | int64 | Maximum valid sample value | X | +| error_value | int64 (optional) | Value used in images to describe pixels that are in error state or missing | | +| pixel_size_x | float | Pixel width \[m\] | X | +| pixel_size_y | float | Pixel height \[m\] | X | +| sensor_thickness | float | Sensor thickness \[m\] | X | +| sensor_material | string | Sensor material | X | +| arm_date | date | Approximate date of arming | X | +| pixel_mask_enabled | bool | Pixel mask applied on images | X | +| detector_description | string | Name of the detector | X | +| detector_serial_number | string | Detector serial number | X | +| series_unique_id | string | Unique text ID of the series (run_name parameter) | X | +| series_id | uint64 | Unique numeric ID of the series (run_number parameter) | X | +| fluorescence | object (optional) | X-ray fluorescence spectrum collected at start | | +| - energy | Array(float) | Energy of measuring point \[eV\] | | +| - data | Array(float) | Fluorescence scan result `data` \[arbitrary units\]; must be strictly the same length as energy | | +| goniometer | Map | Definition of rotation axis (optional) | X | +| - `AXIS` | string | Rotation axis name (e.g. omega) - only one axis is supported in Jungfraujoch | X | +| - - increment | float | Rotation axis increment (per image) in degree \[deg\] | X | +| - - start | float | Rotation axis start angle \[deg\] | X | +| - - axis | Array(float) | Vector for the rotation axis | | +| - - helical_step | Array(float) | Translation for helical scan for 1 image \[m\] | | +| - - screening_wedge | Array(float) | Wedge for screening \[deg\] (increment would correspond to difference between screening points) | | +| grid_scan | object | Grid scan definition (optional and exclusive with rotation axis) | | +| - n_fast | uint64 | Number of elements along fast axis | | +| - n_slow | uint64 | Number of elements along slow axis | | +| - step_x_axis | float | Step along X axis, can be negative \[m\] | | +| - step_y_axis | float | Step along Y axis, can be negative \[m\] | | +| - snake_scan | bool | Snake scan (rows alternate direction) | | +| - vertical_scan | bool | Vertical scan (enabled: fast direction = Y, disabled: fast direction = X) | | +| jungfrau_conversion_enabled | bool (optional) | Applying JUNGFRAU pixel conversion (to photons or keV) | | +| jungfrau_conversion_factor | float (optional) | Factor used for JUNGFRAU conversion \[eV\] | | +| geometry_transformation_enabled | bool (optional) | Transformation from detector module geometry (512x1024) to full detector geometry | | +| pixel_mask | Map(string -> Image) | Pixel mask - multiple in case of storage cells | X | +| channels | Array(string) | List of image channels | X | +| max_spot_count | uint64 | Maximum number of spots identified in spot finding | | +| storage_cell_number | uint64 (optional) | Number of storage cells used by JUNGFRAU | | +| storage_cell_delay | Rational | Delay of storage cells in JUNGFRAU | | +| threshold_energy | float | Threshold energy for EIGER detector \[eV\] | | +| image_dtype | string | Pixel bit type (e.g. uint16) | X | +| unit_cell | object (optional) | Unit cell of the system: a, b, c \[angstrom\] and alpha, beta, gamma \[degree\] | | +| az_int_q_bin_count | uint64 | Number of azimuthal integration bins in the radial direction | | +| az_int_phi_bin_count | uint64 | Number of azimuthal integration bins in the phi angle direction | | +| az_int_bin_to_q | Array(float) | Q value for each azimuthal integration bin \[angstrom^-1\] | | +| az_int_bin_to_two_theta | Array(float) | Two theta angle value for each azimuthal integration bin \[deg\] | | +| az_int_bin_to_phi | Array(float) | Phi value for each azimuthal integration bin \[deg\] | | +| summation | uint64 | Factor of frame summation | | +| user_data | string | JSON serialized to string that can contain the following fields (all fields are optional): | X | +| - file_prefix | string | File prefix | | +| - images_per_file | uint64 | Number of images written per file | | +| - images_per_trigger | uint64 | Number of images collected per trigger | | +| - source_name | string | Facility name | | +| - source_type | string | Type of X-ray source (use NXsource/type values, for example "Synchrotron X-ray Source" or "Free-Electron Laser") | | +| - instrument_name | string | Instrument name | | +| - sample_name | string | Name of the sample | | +| - user | any valid JSON | Value of header_appendix provided at collection start to Jungfraujoch | | +| - attenuator_transmission | float | Attenuator transmission \[\] | | +| - total_flux | float | Total flux \[ph/s\] | | +| - space_group_number | uint64 | Space group number | | +| - summation_mode | string | Summation mode (internal\|fpga\|cpu) | | +| - overwrite | bool | Overwrite existing HDF5 files | | +| - file_format | int | File writer format: 0 = only data files, 1 = NXmx legacy soft links, 2 = NXmx VDS, 3 = NXmx integrated, 4 = CBF, 5 = TIFF, 6 = no file written | | +| - roi | Array(object) | ROI configurations; each element is one of: | | +| | | type "box": xmin, xmax, ymin, ymax (numbers) | | +| | | type "circle": r, x, y (numbers) | | +| | | type "azim": qmin, qmax (numbers) | | +| - gain_file_names | Array(string) | Names of JUNGFRAU gain files used for the current detector | | +| - write_master_file | bool | With multiple sockets, it selects which socket will provide master file | | +| - write_images | bool | Write images in the HDF5 file (if false, will only write metadata) | | +| - data_reduction_factor_serialmx | uint64 | Data reduction factor for serial MX | | +| - experiment_group | string | ID of instrument user, e.g., p-group (SLS/SwissFEL) or proposal number | | +| - jfjoch_release | string | Jungfraujoch release number | | +| - socket_number | uint64 | Number of ZeroMQ socket (on `jfjoch_broker` side) used for transmission | | +| - bit_depth_readout | uint64 | Bit depth of the detector readout | | +| - writer_notification_zmq_addr | string | ZeroMQ address to inform `jfjoch_broker` about writers that finished operation | | +| - xfel_pulse_id | uint64 | Pulse IDs are recorded for images | | +| - ring_current_mA | float | Ring current at the start of the measurement | | +| - sample_temperature_K | float | Sample temperature \[K\] | | +| - detect_ice_rings | bool | Ice ring detection feature is enabled | | +| - indexing_algorithm | string | Indexing algorithm used on-the-fly; allowed values: ffbidx, fft, fftw, none | | +| - geom_refinement_algorithm | string | Post-indexing detector geometry refinement algorithm; allowed values: none, beam_center, beam_center_tetragonal | | +| - poni_rot1 | float | Tilt of the detector rot1 according to PyFAI PONI convention \[rad\] | | +| - poni_rot2 | float | Tilt of the detector rot2 according to PyFAI PONI convention \[rad\] | | +| - poni_rot3 | float | Tilt of the detector rot3 according to PyFAI PONI convention \[rad\] | | See [DECTRIS documentation](https://github.com/dectris/documentation/tree/main/stream_v2) for definition of Image as MultiDimArray with optional compression. @@ -253,28 +253,49 @@ See [DECTRIS documentation](https://github.com/dectris/documentation/tree/main/s ## End message -| Field name | Type | Description | Present in DECTRIS format | -|----------------------------|--------------------------|-----------------------------------------------------------------------------------------------------------------------------------|:-------------------------:| -| type | String | value "end" | X | -| magic_number | uint64 | Number used to describe version of the Jungfraujoch data interface - to allow to detect inconsistency between sender and receiver | | -| series_unique_id | string | Unique text ID of the series (run_name parameter) | X | -| series_id | uint64 | Unique numeric ID of the series (run_number parameter) | X | -| end_date | date | Approximate end date | | -| max_image_number | uint64 | Number of image with the highest number (this is counted from 1 - to distinguish zero images and one image) | | -| images_collected | uint64 | Number of image collected | | -| images_sent_to_write | uint64 | Number of image sent to writer; if writer queues were full, it is possible this is less than images collected | | -| data_collection_efficiency | float | Network packets collected / Network packets expected \[\] | | -| az_int_result | Map(text->Array(float)) | Azimuthal integration results, use az_int_bin_to_q from start message for legend | | -| adu_histogram | Map(text->Array(uint64)) | ADU values histogram | | -| adu_histogram_bin_width | uint64 | Width of bins in the above histogram \[ADU\] | | -| max_receiver_delay | uint64 | Internal performance of Jungfraujoch | | -| bkg_estimate | float | Mean background estimate for the whole run | | -| indexing_rate | float | Mean indexing rate for the whole run | | -| rotation_lattice_type | object | Bravais lattice classification of the total rotation solution over the run (if available); same schema as `lattice_type` | | -| - centering | string | One-letter centering code: P, A, B, C, I, F, or R | | -| - niggli_class | int64 | Integer identifier for the Niggli-reduced Bravais class | | -| - system | string | Crystal system: triclinic, monoclinic, orthorhombic, tetragonal, trigonal, hexagonal, cubic | | -| rotation_lattice | Array(9 * float) | Real-space lattice basis (flattened 3x3 in row-major), corresponding to the rotation indexing result | | +| Field name | Type | Description | Present in DECTRIS format | +|------------------------------------|--------------------------|-----------------------------------------------------------------------------------------------------------------------------------|:-------------------------:| +| type | String | value "end" | X | +| magic_number | uint64 | Number used to describe version of the Jungfraujoch data interface - to allow to detect inconsistency between sender and receiver | | +| series_unique_id | string | Unique text ID of the series (run_name parameter) | X | +| series_id | uint64 | Unique numeric ID of the series (run_number parameter) | X | +| end_date | date | Approximate end date | | +| max_image_number | uint64 | Number of image with the highest number; counted from 1 to distinguish zero images and one image | | +| images_collected | uint64 | Number of images collected | | +| images_sent_to_write | uint64 | Number of images sent to writer; if writer queues were full, it is possible this is less than images collected | | +| data_collection_efficiency | float | Overall network packets collected / network packets expected | | +| az_int_result | Map(text->Array(float)) | Azimuthal integration results, use az_int_bin_to_q from start message for legend | | +| adu_histogram | Map(text->Array(uint64)) | ADU values histogram | | +| adu_histogram_bin_width | uint64 | Width of bins in the above histogram \[ADU\] | | +| max_receiver_delay | uint64 | Internal performance of Jungfraujoch | | +| bkg_estimate | float | Mean background estimate for the whole run | | +| indexing_rate | float | Mean indexing rate for the whole run | | +| rotation_lattice_type | object | Bravais lattice classification of the total rotation solution over the run, if available; same schema as `lattice_type` | | +| - centering | string | One-letter centering code: P, A, B, C, I, F, or R | | +| - niggli_class | int64 | Integer identifier for the Niggli-reduced Bravais class | | +| - system | string | Crystal system: triclinic, monoclinic, orthorhombic, tetragonal, trigonal, hexagonal, cubic | | +| rotation_lattice | Array(9 * float) | Real-space lattice basis, flattened 3x3 in row-major order | | +| data_collection_efficiency_image | Array(float) | Per-image data collection efficiency. Missing values are encoded as 0 or 1 depending on producer context | | +| spot_count | Array(int32) | Per-image spot count | | +| spot_count_ice_ring | Array(int32) | Per-image number of spots within identified ice-ring resolution ranges | | +| spot_count_low_res | Array(int32) | Per-image number of low-resolution spots | | +| spot_count_indexed | Array(int32) | Per-image number of spots fitting indexing solution | | +| image_indexed | Array(uint8) | Per-image indexing result; 0 = not indexed, nonzero = indexed | | +| v_bkg_estimate | Array(float) | Per-image background estimate | | +| profile_radius | Array(float) | Per-image profile radius \[Angstrom^-1\] | | +| mosaicity | Array(float) | Per-image mosaicity \[degree\] | | +| bFactor | Array(float) | Per-image estimated B-factor \[Angstrom^2\] | | +| resolution_estimate | Array(float) | Per-image diffraction resolution estimate \[Angstrom\] | | +| min_viable_pixel_value | Array(int64) | Per-image minimum valid pixel value, excluding error/saturated pixels | | +| max_viable_pixel_value | Array(int64) | Per-image maximum valid pixel value, excluding error/saturated pixels | | +| saturated_pixel_count | Array(int32) | Per-image saturated pixel count | | +| error_pixel_count | Array(int32) | Per-image error pixel count | | +| image_scale_factor | Array(float) | Per-image scale factor, if scaling/merging was performed | | +| integrated_reflections | Array(int32) | Per-image count of integrated reflections | | +| niggli_class | Array(uint8) | Per-image Niggli class identifier for indexed images; 0 if unavailable | | +| pixel_sum | Array(int64) | Per-image sum of all valid pixels, excluding error/saturated pixels | | + +End-message vector fields are optional. When present, they provide master-file summary data so readers can inspect scan-level and per-image analysis results without opening every linked data file. Missing optional per-image values are encoded by the producer as zero unless otherwise noted. ## Calibration message @@ -301,4 +322,16 @@ Therefore `user_data` is serialized by Jungfraujoch as CBOR object. There is mem - Compression: - Uncompressed: raw CBOR byte string - Bitshuffle+LZ4: tag with \["bslz4", elem_size, bytes\] - - Bitshuffle+Zstandard: tag with \["bszstd", elem_size, bytes\] \ No newline at end of file + - Bitshuffle+Zstandard: tag with \["bszstd", elem_size, bytes\] + +### Notes on typed arrays + +Jungfraujoch uses RFC 8746-style typed byte-string tags for compact numeric arrays. + +Common tags used in this protocol include: + +- float32 little-endian arrays for `Array(float)` +- uint8 arrays for compact boolean/integer flags such as `image_indexed` +- int32 little-endian arrays for per-image counts +- int64 little-endian arrays for large per-image integer values +- uint64 little-endian arrays for histograms \ No newline at end of file diff --git a/frame_serialize/CBORStream2Deserializer.cpp b/frame_serialize/CBORStream2Deserializer.cpp index e60188d4..6b4d9fd6 100644 --- a/frame_serialize/CBORStream2Deserializer.cpp +++ b/frame_serialize/CBORStream2Deserializer.cpp @@ -228,6 +228,42 @@ namespace { return {ptr, len}; } + void GetCBORUInt8Array(CborValue &value, std::vector &v) { + if (GetCBORTag(value) != TagUnsignedInt8Bit) + throw JFJochException(JFJochExceptionCategory::CBORError, "Incorrect array type tag"); + + auto [ptr, len] = GetCBORByteString(value); + + v.resize(len); + memcpy(v.data(), ptr, len); + } + + void GetCBORInt32Array(CborValue &value, std::vector &v) { + if (GetCBORTag(value) != TagSignedInt32BitLE) + throw JFJochException(JFJochExceptionCategory::CBORError, "Incorrect array type tag"); + + auto [ptr, len] = GetCBORByteString(value); + + if (len % sizeof(int32_t)) + throw JFJochException(JFJochExceptionCategory::CBORError, "Size mismatch"); + + v.resize(len / sizeof(int32_t)); + memcpy(v.data(), ptr, len); + } + + void GetCBORInt64Array(CborValue &value, std::vector &v) { + if (GetCBORTag(value) != TagSignedInt64BitLE) + throw JFJochException(JFJochExceptionCategory::CBORError, "Incorrect array type tag"); + + auto [ptr, len] = GetCBORByteString(value); + + if (len % sizeof(int64_t)) + throw JFJochException(JFJochExceptionCategory::CBORError, "Size mismatch"); + + v.resize(len / sizeof(int64_t)); + memcpy(v.data(), ptr, len); + } + void GetCBORFloatArray(CborValue &value, std::vector &v) { if (GetCBORTag(value) != TagFloatLE) throw JFJochException(JFJochExceptionCategory::CBORError, "Incorrect array type tag"); @@ -1284,8 +1320,44 @@ namespace { message.bkg_estimate = GetCBORFloat(value); else if (key == "indexing_rate") message.indexing_rate = GetCBORFloat(value); + else if (key == "data_collection_efficiency_image") + GetCBORFloatArray(value, message.data_collection_efficiency); + else if (key == "spot_count") + GetCBORInt32Array(value, message.spot_count); + else if (key == "spot_count_ice_ring") + GetCBORInt32Array(value, message.spot_count_ice_ring); + else if (key == "spot_count_low_res") + GetCBORInt32Array(value, message.spot_count_low_res); + else if (key == "spot_count_indexed") + GetCBORInt32Array(value, message.spot_count_indexed); + else if (key == "image_indexed") + GetCBORUInt8Array(value, message.image_indexed); + else if (key == "v_bkg_estimate") + GetCBORFloatArray(value, message.v_bkg_estimate); + else if (key == "profile_radius") + GetCBORFloatArray(value, message.profile_radius); + else if (key == "mosaicity") + GetCBORFloatArray(value, message.mosaicity); + else if (key == "bFactor") + GetCBORFloatArray(value, message.bFactor); + else if (key == "resolution_estimate") + GetCBORFloatArray(value, message.resolution_estimate); + else if (key == "min_viable_pixel_value") + GetCBORInt64Array(value, message.min_viable_pixel_value); + else if (key == "max_viable_pixel_value") + GetCBORInt64Array(value, message.max_viable_pixel_value); + else if (key == "saturated_pixel_count") + GetCBORInt32Array(value, message.saturated_pixel_count); + else if (key == "error_pixel_count") + GetCBORInt32Array(value, message.error_pixel_count); else if (key == "image_scale_factor") - GetCBORFloatArray(value, message.scale_factor); + GetCBORFloatArray(value, message.image_scale_factor); + else if (key == "integrated_reflections") + GetCBORInt32Array(value, message.integrated_reflections); + else if (key == "niggli_class") + GetCBORUInt8Array(value, message.niggli_class); + else if (key == "pixel_sum") + GetCBORInt64Array(value, message.pixel_sum); else if (key == "rotation_lattice") { std::vector tmp; GetCBORFloatArray(value, tmp); diff --git a/frame_serialize/CBORStream2Serializer.cpp b/frame_serialize/CBORStream2Serializer.cpp index 195e5e05..fcd39f18 100644 --- a/frame_serialize/CBORStream2Serializer.cpp +++ b/frame_serialize/CBORStream2Serializer.cpp @@ -153,6 +153,23 @@ inline void CBOR_ENC(CborEncoder &encoder, const char* key, const std::vector& v) { + cborErr(cbor_encode_text_stringz(&encoder, key)); + cborErr(cbor_encode_tag(&encoder, TagUnsignedInt8Bit)); + cborErr(cbor_encode_byte_string(&encoder, v.data(), v.size() * sizeof(uint8_t))); +} + +inline void CBOR_ENC(CborEncoder &encoder, const char* key, const std::vector& v) { + cborErr(cbor_encode_text_stringz(&encoder, key)); + cborErr(cbor_encode_tag(&encoder, TagSignedInt32BitLE)); + cborErr(cbor_encode_byte_string(&encoder, reinterpret_cast(v.data()), v.size() * sizeof(int32_t))); +} + +inline void CBOR_ENC(CborEncoder &encoder, const char* key, const std::vector& v) { + cborErr(cbor_encode_text_stringz(&encoder, key)); + cborErr(cbor_encode_tag(&encoder, TagSignedInt64BitLE)); + cborErr(cbor_encode_byte_string(&encoder, reinterpret_cast(v.data()), v.size() * sizeof(int64_t))); +} inline void CBOR_ENC(CborEncoder &encoder, const char* key, const std::vector& v) { cborErr(cbor_encode_text_stringz(&encoder, key)); @@ -679,7 +696,27 @@ void CBORStream2Serializer::SerializeSequenceEnd(const EndMessage& message) { CBOR_ENC(mapEncoder, "rotation_lattice_type", message.rotation_lattice_type); if (message.rotation_lattice.has_value()) CBOR_ENC(mapEncoder, "rotation_lattice", message.rotation_lattice->GetVector()); - CBOR_ENC(mapEncoder, "image_scale_factor", message.scale_factor); + + CBOR_ENC(mapEncoder, "data_collection_efficiency_image", message.data_collection_efficiency); + CBOR_ENC(mapEncoder, "spot_count", message.spot_count); + CBOR_ENC(mapEncoder, "spot_count_ice_ring", message.spot_count_ice_ring); + CBOR_ENC(mapEncoder, "spot_count_low_res", message.spot_count_low_res); + CBOR_ENC(mapEncoder, "spot_count_indexed", message.spot_count_indexed); + CBOR_ENC(mapEncoder, "image_indexed", message.image_indexed); + CBOR_ENC(mapEncoder, "v_bkg_estimate", message.v_bkg_estimate); + CBOR_ENC(mapEncoder, "profile_radius", message.profile_radius); + CBOR_ENC(mapEncoder, "mosaicity", message.mosaicity); + CBOR_ENC(mapEncoder, "bFactor", message.bFactor); + CBOR_ENC(mapEncoder, "resolution_estimate", message.resolution_estimate); + CBOR_ENC(mapEncoder, "min_viable_pixel_value", message.min_viable_pixel_value); + CBOR_ENC(mapEncoder, "max_viable_pixel_value", message.max_viable_pixel_value); + CBOR_ENC(mapEncoder, "saturated_pixel_count", message.saturated_pixel_count); + CBOR_ENC(mapEncoder, "error_pixel_count", message.error_pixel_count); + CBOR_ENC(mapEncoder, "image_scale_factor", message.image_scale_factor); + CBOR_ENC(mapEncoder, "integrated_reflections", message.integrated_reflections); + CBOR_ENC(mapEncoder, "niggli_class", message.niggli_class); + CBOR_ENC(mapEncoder, "pixel_sum", message.pixel_sum); + cborErr(cbor_encoder_close_container(&encoder, &mapEncoder)); curr_size = cbor_encoder_get_buffer_size(&encoder, buffer); diff --git a/frame_serialize/CborUtil.h b/frame_serialize/CborUtil.h index 8d56bec7..2f2f3ea6 100644 --- a/frame_serialize/CborUtil.h +++ b/frame_serialize/CborUtil.h @@ -24,5 +24,6 @@ constexpr const CborTag TagUnsignedInt32BitLE = 0b01000110; constexpr const CborTag TagSignedInt32BitLE = 0b01001110; constexpr const CborTag TagUnsignedInt64BitLE = 0b01000111; +constexpr const CborTag TagSignedInt64BitLE = 0b01001111; #endif //JUNGFRAUJOCH_CBORUTIL_H diff --git a/image_pusher/CBORFilePusher.cpp b/image_pusher/CBORFilePusher.cpp index 5e39c481..f09a761e 100644 --- a/image_pusher/CBORFilePusher.cpp +++ b/image_pusher/CBORFilePusher.cpp @@ -11,7 +11,7 @@ void CBORFilePusher::StartDataCollection(StartMessage &message) { dataset_number++; img_number = 0; - size_t approx_size = 1024*1024; + size_t approx_size = 256*1024*1024; for (const auto &[key, value] : message.pixel_mask) approx_size += value.size() * sizeof(uint32_t); @@ -25,7 +25,7 @@ void CBORFilePusher::StartDataCollection(StartMessage &message) { bool CBORFilePusher::EndDataCollection(const EndMessage &message) { std::unique_lock ul(m); - size_t approx_size = 1024*1024; + size_t approx_size = 256*1024*1024; std::vector serialization_buffer(approx_size); CBORStream2Serializer serializer(serialization_buffer.data(), serialization_buffer.size()); diff --git a/reader/JFJochHDF5Reader.cpp b/reader/JFJochHDF5Reader.cpp index 46bace0b..4f4d3209 100644 --- a/reader/JFJochHDF5Reader.cpp +++ b/reader/JFJochHDF5Reader.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute // SPDX-License-Identifier: GPL-3.0-only +#include #include "JFJochHDF5Reader.h" #include "spdlog/fmt/fmt.h" #include "../image_analysis/bragg_integration/CalcISigma.h" @@ -55,7 +56,7 @@ inline std::pair parse_niggli_class(int64_t val) { } } -std::vector GetDimension(HDF5Object& object, const std::string& path) { +std::vector GetDimension(HDF5Object &object, const std::string &path) { const auto dim = object.GetDimension(path); if (dim.size() != 3) throw JFJochException(JFJochExceptionCategory::HDF5, "Wrong dimension of /entry/data/data"); @@ -93,7 +94,7 @@ std::string ResolveRelativeToMaster(const std::string &directory, return (std::filesystem::path(directory) / path).string(); } -template +template void JFJochHDF5Reader::ReadVector(std::vector &v, HDF5Object &file, const std::string &dataset_name, @@ -106,33 +107,125 @@ void JFJochHDF5Reader::ReadVector(std::vector &v, for (int i = 0; i < tmp.size(); i++) v[image0 + i] = tmp[i]; } - } catch (JFJochException &e) {} + } catch (JFJochException &e) { + } } -std::string removeSuffix(const std::string& s, const std::string& suffix) -{ +std::string removeSuffix(const std::string &s, const std::string &suffix) { if (s.ends_with(suffix)) return s.substr(0, s.size() - suffix.size()); return s; } -std::string dataset_name(const std::string& path) { +std::string dataset_name(const std::string &path) { std::string file = path; int pos = file.rfind('/'); if (pos != std::string::npos) - file = file.substr(pos+1); + file = file.substr(pos + 1); file = removeSuffix(file, "_master.h5"); -// If previous suffix was not found, try removing this one + // If previous suffix was not found, try removing this one file = removeSuffix(file, ".h5"); return file; } -void JFJochHDF5Reader::ReadFile(const std::string& filename) { +bool ReadReflectionsFromGroup(HDF5Object &file, + const std::string &image_group_name, + DataMessage &message) { + if (!file.Exists("/entry/reflections") || !file.Exists(image_group_name)) + return false; + + auto h = file.ReadOptVector(image_group_name + "/h"); + auto k = file.ReadOptVector(image_group_name + "/k"); + auto l = file.ReadOptVector(image_group_name + "/l"); + auto predicted_x = file.ReadOptVector(image_group_name + "/predicted_x"); + auto predicted_y = file.ReadOptVector(image_group_name + "/predicted_y"); + + auto d = file.ReadOptVector(image_group_name + "/d"); + auto int_sum = file.ReadOptVector(image_group_name + "/int_sum"); + auto int_err = file.ReadOptVector(image_group_name + "/int_err"); + auto bkg = file.ReadOptVector(image_group_name + "/background_mean"); + auto lp = file.ReadOptVector(image_group_name + "/lp"); + auto partiality = file.ReadOptVector(image_group_name + "/partiality"); + auto phi = file.ReadOptVector(image_group_name + "/delta_phi"); + auto zeta = file.ReadOptVector(image_group_name + "/zeta"); + + if (h.size() != l.size() || h.size() != k.size() || h.size() != d.size() + || h.size() != predicted_x.size() || h.size() != predicted_y.size() + || h.size() != int_sum.size() || h.size() != int_err.size() || h.size() != bkg.size()) + throw JFJochException(JFJochExceptionCategory::HDF5, "Wrong size of reflections dataset"); + + for (size_t i = 0; i < h.size(); i++) { + float lp_val = 0.0; + if (lp.size() > i && lp[i] != 0.0f) + lp_val = 1.0f / lp[i]; + + float partiality_val = -1.0f; + if (partiality.size() > i && partiality[i] >= 0.0f) + partiality_val = partiality[i]; + float delta_phi_val = NAN; + if (phi.size() > i) + delta_phi_val = phi[i]; + float zeta_val = NAN; + if (zeta.size() > i) + zeta_val = zeta[i]; + + Reflection r{ + .h = h.at(i), + .k = k.at(i), + .l = l.at(i), + .delta_phi_deg = delta_phi_val, + .predicted_x = predicted_x.at(i), + .predicted_y = predicted_y.at(i), + .d = d.at(i), + .I = int_sum.at(i), + .bkg = bkg.at(i), + .sigma = int_err.at(i), + .rlp = lp_val, + .partiality = partiality_val, + .zeta = zeta_val + }; + message.reflections.emplace_back(r); + } + + CalcISigma(message); + CalcWilsonBFactor(message, !message.b_factor.has_value()); + return true; +} + +template +std::optional ReadElementMasterFirst(HDF5Object &master_file, + HDF5Object &source_file, + const std::string &path, + hsize_t master_image, + hsize_t source_image) { + if (master_file.Exists(path)) + return master_file.ReadElement(path, master_image); + if (source_file.Exists(path)) + return source_file.ReadElement(path, source_image); + return {}; +} + +template +std::vector ReadVectorMasterFirst(HDF5Object &master_file, + HDF5Object &source_file, + const std::string &path, + const std::vector &master_start, + const std::vector &source_start, + const std::vector &size) { + if (master_file.Exists(path)) + return master_file.ReadOptVector(path, master_start, size); + if (source_file.Exists(path)) + return source_file.ReadOptVector(path, source_start, size); + return {}; +} + +void JFJochHDF5Reader::ReadFile(const std::string &filename) { std::unique_lock ul(hdf5_mutex); try { auto dataset = std::make_shared(); master_file = std::make_shared(filename); + master_filename = filename; dataset->experiment = default_experiment; std::filesystem::path master_path(filename); @@ -165,7 +258,7 @@ void JFJochHDF5Reader::ReadFile(const std::string& filename) { vds_data_mappings = ReadVDSImageMappings(*master_file, "/entry/data/data"); if (master_file->Exists("/entry/instrument/detector/detectorSpecific/data_collection_efficiency_image")) - dataset->efficiency = master_file->ReadVector( + dataset->efficiency = master_file->ReadVector( "/entry/instrument/detector/detectorSpecific/data_collection_efficiency_image"); else dataset->efficiency = std::vector(number_of_images, 1.0); @@ -198,6 +291,8 @@ void JFJochHDF5Reader::ReadFile(const std::string& filename) { dataset->profile_radius = master_file->ReadOptVector("/entry/MX/profileRadius"); dataset->mosaicity_deg = master_file->ReadOptVector("/entry/MX/mosaicity"); dataset->b_factor = master_file->ReadOptVector("/entry/MX/bFactor"); + dataset->scale_factor = master_file->ReadOptVector("/entry/MX/imageScaleFactor"); + dataset->integrated_reflections = master_file->ReadOptVector("/entry/MX/integratedReflections"); } if (master_file->Exists("/entry/image")) dataset->max_value = master_file->ReadOptVector("/entry/image/max_value"); @@ -317,8 +412,8 @@ void JFJochHDF5Reader::ReadFile(const std::string& filename) { number_of_images, fimages); ReadVector(dataset->mosaicity_deg, - data_file, "/entry/MX/mosaicity", - number_of_images, fimages); + data_file, "/entry/MX/mosaicity", + number_of_images, fimages); ReadVector(dataset->b_factor, data_file, "/entry/MX/bFactor", @@ -334,7 +429,8 @@ void JFJochHDF5Reader::ReadFile(const std::string& filename) { data_file, "/entry/image/max_value", number_of_images, fimages); } - } catch (JFJochException &e) {} + } catch (JFJochException &e) { + } if (nfiles == 0) images_per_file = fimages; @@ -353,17 +449,19 @@ void JFJochHDF5Reader::ReadFile(const std::string& filename) { dataset->experiment.IndexingAlgorithm(IndexingAlgorithmEnum::FFT); else if (indexing == "ffbidx") dataset->experiment.IndexingAlgorithm(IndexingAlgorithmEnum::FFBIDX); - - dataset->scale_factor = master_file->ReadOptVector("/entry/MX/imageScaleFactor"); } auto ring_current_A = master_file->GetOptFloat("/entry/source/current"); if (ring_current_A) dataset->experiment.RingCurrent_mA(ring_current_A.value() * 1000.0); - dataset->experiment.DetectIceRings(master_file->GetOptBool("/entry/instrument/detector/detectorSpecific/detect_ice_rings").value_or(false)); - dataset->experiment.PoniRot1_rad(master_file->GetOptFloat("/entry/instrument/detector/transformations/rot1").value_or(0.0)); - dataset->experiment.PoniRot2_rad(master_file->GetOptFloat("/entry/instrument/detector/transformations/rot2").value_or(0.0)); - dataset->experiment.PoniRot3_rad(master_file->GetOptFloat("/entry/instrument/detector/transformations/rot3").value_or(0.0)); + dataset->experiment.DetectIceRings( + master_file->GetOptBool("/entry/instrument/detector/detectorSpecific/detect_ice_rings").value_or(false)); + dataset->experiment.PoniRot1_rad( + master_file->GetOptFloat("/entry/instrument/detector/transformations/rot1").value_or(0.0)); + dataset->experiment.PoniRot2_rad( + master_file->GetOptFloat("/entry/instrument/detector/transformations/rot2").value_or(0.0)); + dataset->experiment.PoniRot3_rad( + master_file->GetOptFloat("/entry/instrument/detector/transformations/rot3").value_or(0.0)); dataset->experiment.SampleTemperature_K(master_file->GetOptFloat("/entry/sample/temperature")); dataset->experiment.BeamX_pxl(master_file->GetFloat("/entry/instrument/detector/beam_center_x")); @@ -374,7 +472,8 @@ void JFJochHDF5Reader::ReadFile(const std::string& filename) { det_distance = 0.1; // Set to 100 mm, if det distance is less than 1 mm dataset->experiment.DetectorDistance_mm(det_distance * 1000.0); - dataset->experiment.IncidentEnergy_keV(WVL_1A_IN_KEV / master_file->GetFloat("/entry/instrument/beam/incident_wavelength")); + dataset->experiment.IncidentEnergy_keV( + WVL_1A_IN_KEV / master_file->GetFloat("/entry/instrument/beam/incident_wavelength")); dataset->error_value = master_file->GetOptInt("/entry/instrument/detector/error_value"); @@ -391,12 +490,12 @@ void JFJochHDF5Reader::ReadFile(const std::string& filename) { dataset->experiment.Goniometer(omega); } else if (master_file->Exists("/entry/sample/grid_scan")) { GridScanSettings grid( - master_file->GetInt("/entry/sample/grid_scan/n_fast"), - master_file->GetFloat("/entry/sample/grid_scan/step_x") * 1e6f, - master_file->GetFloat("/entry/sample/grid_scan/step_y") * 1e6f, - master_file->GetOptBool("/entry/sample/grid_scan/snake_scan").value_or(false), - master_file->GetOptBool("/entry/sample/grid_scan/vertical_scan").value_or(false) - ); + master_file->GetInt("/entry/sample/grid_scan/n_fast"), + master_file->GetFloat("/entry/sample/grid_scan/step_x") * 1e6f, + master_file->GetFloat("/entry/sample/grid_scan/step_y") * 1e6f, + master_file->GetOptBool("/entry/sample/grid_scan/snake_scan").value_or(false), + master_file->GetOptBool("/entry/sample/grid_scan/vertical_scan").value_or(false) + ); grid.ImageNum(number_of_images); dataset->experiment.GridScan(grid); } @@ -405,18 +504,20 @@ void JFJochHDF5Reader::ReadFile(const std::string& filename) { auto tmp = master_file->ReadOptVector("/entry/sample/unit_cell"); if (tmp.size() == 6) dataset->experiment.SetUnitCell(UnitCell{ - .a = tmp[0], - .b = tmp[1], - .c = tmp[2], - .alpha = tmp[3], - .beta = tmp[4], - .gamma = tmp[5]}); + .a = tmp[0], + .b = tmp[1], + .c = tmp[2], + .alpha = tmp[3], + .beta = tmp[4], + .gamma = tmp[5] + }); dataset->experiment.SpaceGroupNumber(master_file->GetOptInt("/entry/sample/space_group_number")); dataset->experiment.SampleName(master_file->GetString("/entry/sample/name")); if (master_file->Exists("/entry/instrument/attenuator")) - dataset->experiment.AttenuatorTransmission(master_file->GetOptFloat("/entry/instrument/attenuator/attenuator_transmission")); + dataset->experiment.AttenuatorTransmission( + master_file->GetOptFloat("/entry/instrument/attenuator/attenuator_transmission")); dataset->experiment.TotalFlux(master_file->GetOptFloat("/entry/instrument/beam/total_flux")); if (master_file->Exists("/entry/azint") && master_file->Exists("/entry/azint/bin_to_q")) { @@ -432,14 +533,14 @@ void JFJochHDF5Reader::ReadFile(const std::string& filename) { dataset->azimuthal_bins = dim[0]; dataset->q_bins = dim[1]; dataset->az_int_bin_to_q.resize(dim[0] * dim[1]); - bin_to_q_dataset.ReadVector(dataset->az_int_bin_to_q, {0,0}, dim); + bin_to_q_dataset.ReadVector(dataset->az_int_bin_to_q, {0, 0}, dim); } else throw JFJochException(JFJochExceptionCategory::HDF5, "Wrong dimension of /entry/azint/image dataset"); if (master_file->Exists("/entry/azint/bin_to_phi")) { HDF5DataSet bin_to_phi_dataset(*master_file, "/entry/azint/bin_to_phi"); if (dataset->q_bins > 0) { dataset->az_int_bin_to_phi.resize(dim[0] * dim[1]); - bin_to_phi_dataset.ReadVector(dataset->az_int_bin_to_phi, {0,0}, dim); + bin_to_phi_dataset.ReadVector(dataset->az_int_bin_to_phi, {0, 0}, dim); } else { bin_to_phi_dataset.ReadVector(dataset->az_int_bin_to_phi); } @@ -461,7 +562,7 @@ void JFJochHDF5Reader::ReadFile(const std::string& filename) { detector.SaturationLimit(master_file->GetInt("/entry/instrument/detector/saturation_value")); detector.MinFrameTime(std::chrono::microseconds(0)); detector.MinCountTime(std::chrono::microseconds(0)); - detector.ReadOutTime(std::chrono::nanoseconds (0)); + detector.ReadOutTime(std::chrono::nanoseconds(0)); dataset->experiment.Detector(detector); dataset->experiment.FrameTime( @@ -480,9 +581,9 @@ void JFJochHDF5Reader::ReadFile(const std::string& filename) { if (image_size_x * image_size_y > 0) { auto mask_tmp = master_file->ReadOptVector( - "/entry/instrument/detector/pixel_mask", - {0, 0}, - {image_size_y, image_size_x} + "/entry/instrument/detector/pixel_mask", + {0, 0}, + {image_size_y, image_size_x} ); if (mask_tmp.empty()) mask_tmp = std::vector(image_size_x * image_size_y); @@ -490,7 +591,7 @@ void JFJochHDF5Reader::ReadFile(const std::string& filename) { } dataset->experiment.ImagesPerTrigger(number_of_images); SetStartMessage(dataset); - } catch (const std::exception& e) { + } catch (const std::exception &e) { master_file = {}; number_of_images = 0; SetStartMessage({}); @@ -531,11 +632,14 @@ CompressedImage JFJochHDF5Reader::LoadImageDataset(std::vector &tmp, HD } if (datatype.IsFloat()) - throw JFJochException(JFJochExceptionCategory::InputParameterInvalid,"Float datasets not supported at this time"); + throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, + "Float datasets not supported at this time"); - return {tmp, dim[2], dim[1], - CalcImageMode(datatype.GetElemSize(), datatype.IsFloat(), datatype.IsSigned()), - algorithm}; + return { + tmp, dim[2], dim[1], + CalcImageMode(datatype.GetElemSize(), datatype.IsFloat(), datatype.IsSigned()), + algorithm + }; } std::pair, uint32_t> JFJochHDF5Reader::GetImageLocation(int64_t image_number) { @@ -558,7 +662,7 @@ std::pair, uint32_t> JFJochHDF5Reader::GetImag image_id = static_cast(mapping.SourceImage(image)); data_file = std::make_shared( - ResolveRelativeToMaster(master_file_directory, mapping.filename) + ResolveRelativeToMaster(master_file_directory, mapping.filename) ); return {std::move(data_file), image_id}; } @@ -601,69 +705,106 @@ bool JFJochHDF5Reader::LoadImage_i(std::shared_ptr &dataset "Cannot load image if file not loaded"); auto [source_file, image_id] = GetImageLocation(image_number); - + message.image = LoadImageDataset(buffer, *source_file, image_id); message.number = image_number; - auto spot_count_opt = source_file->ReadElement("/entry/MX/nPeaks", image_id); + const auto master_image = static_cast(image_number); + const auto source_image = static_cast(image_id); + + auto spot_count_opt = ReadElementMasterFirst(*master_file, + *source_file, + "/entry/MX/nPeaks", + master_image, + source_image); if (spot_count_opt.has_value() && spot_count_opt.value() > 0) { size_t spot_count = spot_count_opt.value(); - auto spot_x = source_file->ReadVector( - "/entry/MX/peakXPosRaw", - {(hsize_t) image_id, 0}, - {1, spot_count} + auto spot_x = ReadVectorMasterFirst( + *master_file, + *source_file, + "/entry/MX/peakXPosRaw", + {master_image, 0}, + {source_image, 0}, + {1, spot_count} ); - auto spot_y = source_file->ReadVector( - "/entry/MX/peakYPosRaw", - {(hsize_t) image_id, 0}, - {1, spot_count} + auto spot_y = ReadVectorMasterFirst( + *master_file, + *source_file, + "/entry/MX/peakYPosRaw", + {master_image, 0}, + {source_image, 0}, + {1, spot_count} ); - auto spot_intensity = source_file->ReadVector( - "/entry/MX/peakTotalIntensity", - {(hsize_t) image_id, 0}, - {1, spot_count} - ); - auto spot_indexed = source_file->ReadOptVector( - "/entry/MX/peakIndexed", - {(hsize_t) image_id, 0}, - {1, spot_count} + auto spot_intensity = ReadVectorMasterFirst( + *master_file, + *source_file, + "/entry/MX/peakTotalIntensity", + {master_image, 0}, + {source_image, 0}, + {1, spot_count} ); - auto spot_ice = source_file->ReadOptVector( - "/entry/MX/peakIceRingRes", - {(hsize_t) image_id, 0}, - {1, spot_count} + if (spot_x.size() < spot_count || spot_y.size() < spot_count || spot_intensity.size() < spot_count) + throw JFJochException(JFJochExceptionCategory::HDF5, "Wrong size of spot dataset"); + + auto spot_indexed = ReadVectorMasterFirst( + *master_file, + *source_file, + "/entry/MX/peakIndexed", + {master_image, 0}, + {source_image, 0}, + {1, spot_count} ); - auto spot_h = source_file->ReadOptVector( - "/entry/MX/peakH", - {(hsize_t) image_id, 0}, - {1, spot_count} + auto spot_ice = ReadVectorMasterFirst( + *master_file, + *source_file, + "/entry/MX/peakIceRingRes", + {master_image, 0}, + {source_image, 0}, + {1, spot_count} ); - auto spot_k = source_file->ReadOptVector( - "/entry/MX/peakK", - {(hsize_t) image_id, 0}, - {1, spot_count} + auto spot_h = ReadVectorMasterFirst( + *master_file, + *source_file, + "/entry/MX/peakH", + {master_image, 0}, + {source_image, 0}, + {1, spot_count} ); - auto spot_l = source_file->ReadOptVector( - "/entry/MX/peakL", - {(hsize_t) image_id, 0}, - {1, spot_count} + auto spot_k = ReadVectorMasterFirst( + *master_file, + *source_file, + "/entry/MX/peakK", + {master_image, 0}, + {source_image, 0}, + {1, spot_count} ); - auto spot_dist_ewald_sphere = source_file->ReadOptVector( - "/entry/MX/peakDistEwaldSphere", - {(hsize_t) image_id, 0}, - {1, spot_count} + auto spot_l = ReadVectorMasterFirst( + *master_file, + *source_file, + "/entry/MX/peakL", + {master_image, 0}, + {source_image, 0}, + {1, spot_count} + ); + + auto spot_dist_ewald_sphere = ReadVectorMasterFirst( + *master_file, + *source_file, + "/entry/MX/peakDistEwaldSphere", + {master_image, 0}, + {source_image, 0}, + {1, spot_count} ); auto geom = dataset->experiment.GetDiffractionGeometry(); for (int i = 0; i < spot_count; i++) { - auto x = spot_x.at(i); auto y = spot_y.at(i); auto d = geom.PxlToRes(x, y); @@ -689,40 +830,47 @@ bool JFJochHDF5Reader::LoadImage_i(std::shared_ptr &dataset s.ice_ring = (spot_ice.at(i) != 0); message.spots.emplace_back(s); } - if (source_file->Exists("/entry/MX/peakCountUnfiltered")) - message.spot_count = source_file->ReadElement("/entry/MX/peakCountUnfiltered", image_id); + + if (auto v = ReadElementMasterFirst(*master_file, *source_file, + "/entry/MX/peakCountUnfiltered", + master_image, source_image); v) + message.spot_count = v; else - message.spot_count = source_file->ReadElement("/entry/MX/nPeaks", image_id); - if (source_file->Exists("/entry/MX/peakCountIceRingRes")) - message.spot_count_ice_rings = source_file->ReadElement("/entry/MX/peakCountIceRingRes", image_id); - if (source_file->Exists("/entry/MX/peakCountLowRes")) - message.spot_count_low_res = source_file->ReadElement("/entry/MX/peakCountLowRes", image_id); - if (source_file->Exists("/entry/MX/peakCountIndexed")) - message.spot_count_indexed = source_file->ReadElement("/entry/MX/peakCountIndexed", image_id); + message.spot_count = spot_count_opt; + + message.spot_count_ice_rings = ReadElementMasterFirst( + *master_file, *source_file, "/entry/MX/peakCountIceRingRes", master_image, source_image); + message.spot_count_low_res = ReadElementMasterFirst( + *master_file, *source_file, "/entry/MX/peakCountLowRes", master_image, source_image); + message.spot_count_indexed = ReadElementMasterFirst( + *master_file, *source_file, "/entry/MX/peakCountIndexed", master_image, source_image); GenerateSpotPlot(message, 1.5); } - if (source_file->Exists("/entry/MX/integratedReflections")) - message.integrated_reflections = source_file->ReadElement("/entry/MX/integratedReflections", image_id); - if (!dataset->az_int_bin_to_q.empty()) { - if (dataset->azimuthal_bins == 0) - message.az_int_profile = source_file->ReadOptVector( + if (dataset->azimuthal_bins == 0) { + message.az_int_profile = ReadVectorMasterFirst( + *master_file, + *source_file, "/entry/azint/image", - {(hsize_t) image_id, 0}, + {master_image, 0}, + {source_image, 0}, {1, dataset->az_int_bin_to_q.size()} ); - else { - message.az_int_profile.resize(dataset->azimuthal_bins * dataset->q_bins, 0); - message.az_int_profile = source_file->ReadOptVector( + } else { + message.az_int_profile = ReadVectorMasterFirst( + *master_file, + *source_file, "/entry/azint/image", - {(hsize_t) image_id, 0, 0}, + {master_image, 0, 0}, + {source_image, 0, 0}, {1, dataset->azimuthal_bins, dataset->q_bins} ); } } - + if (dataset->integrated_reflections.size() > image_number) + message.integrated_reflections = static_cast(std::lround(dataset->integrated_reflections.at(image_number))); if (dataset->resolution_estimate.size() > image_number) message.resolution_estimate = dataset->resolution_estimate[image_number]; if (dataset->indexing_result.size() > image_number) @@ -740,17 +888,30 @@ bool JFJochHDF5Reader::LoadImage_i(std::shared_ptr &dataset if (dataset->indexing_result.size() > image_number && dataset->indexing_result[image_number] != 0 - && source_file->Exists("/entry/MX") - && source_file->Exists("/entry/MX/latticeIndexed")) { + && (master_file->Exists("/entry/MX/latticeIndexed") || + source_file->Exists("/entry/MX/latticeIndexed"))) { + std::vector tmp = ReadVectorMasterFirst( + *master_file, + *source_file, + "/entry/MX/latticeIndexed", + {master_image, 0}, + {source_image, 0}, + {1, 9} + ); - std::vector tmp = source_file->ReadVector( - "/entry/MX/latticeIndexed", - {(hsize_t) image_id, 0}, - {1, 9} - ); - message.indexing_lattice = CrystalLattice(tmp); + if (tmp.size() == 9) + message.indexing_lattice = CrystalLattice(tmp); + + std::optional niggli_opt; + if (master_file->Exists("/entry/MX/niggli_class")) + niggli_opt = master_file->ReadElement("/entry/MX/niggli_class", image_number); + else if (master_file->Exists("/entry/MX/niggliClass")) + niggli_opt = master_file->ReadElement("/entry/MX/niggliClass", image_number); + else if (source_file->Exists("/entry/MX/niggli_class")) + niggli_opt = source_file->ReadElement("/entry/MX/niggli_class", image_id); + else if (source_file->Exists("/entry/MX/niggliClass")) + niggli_opt = source_file->ReadElement("/entry/MX/niggliClass", image_id); - std::optional niggli_opt = source_file->ReadElement("/entry/MX/niggli_class", image_id); if (niggli_opt) { auto symm_info = parse_niggli_class(niggli_opt.value()); @@ -762,65 +923,11 @@ bool JFJochHDF5Reader::LoadImage_i(std::shared_ptr &dataset } } - std::string image_group_name = fmt::format("/entry/reflections/image_{:06d}", image_id); + const std::string master_reflection_group_name = fmt::format("/entry/reflections/image_{:06d}", image_number); + const std::string source_reflection_group_name = fmt::format("/entry/reflections/image_{:06d}", image_id); - if (source_file->Exists("/entry/reflections") && source_file->Exists(image_group_name)) { - auto h = source_file->ReadOptVector(image_group_name + "/h"); - auto k = source_file->ReadOptVector(image_group_name + "/k"); - auto l = source_file->ReadOptVector(image_group_name + "/l"); - auto predicted_x = source_file->ReadOptVector(image_group_name + "/predicted_x"); - auto predicted_y = source_file->ReadOptVector(image_group_name + "/predicted_y"); - - auto d = source_file->ReadOptVector(image_group_name + "/d"); - auto int_sum = source_file->ReadOptVector(image_group_name + "/int_sum"); - auto int_err = source_file->ReadOptVector(image_group_name + "/int_err"); - auto bkg = source_file->ReadOptVector(image_group_name + "/background_mean"); - auto lp = source_file->ReadOptVector(image_group_name + "/lp"); - auto partiality = source_file->ReadOptVector(image_group_name + "/partiality"); - auto phi = source_file->ReadOptVector(image_group_name + "/delta_phi"); - auto zeta = source_file->ReadOptVector(image_group_name + "/zeta"); - - if (h.size() != l.size() || h.size() != k.size() || h.size() != d.size() - || h.size() != predicted_x.size() || h.size() != predicted_y.size() - || h.size() != int_sum.size() || h.size() != int_err.size() || h.size() != bkg.size()) - throw JFJochException(JFJochExceptionCategory::HDF5, "Wrong size of reflections dataset"); - - for (size_t i = 0; i < h.size(); i++) { - float lp_val = 0.0; - if (lp.size() > i && lp[i] != 0.0f) - lp_val = 1.0f / lp[i]; - - float partiality_val = -1.0f; - if (partiality.size() > i && partiality[i] >= 0.0f) - partiality_val = partiality[i]; - float delta_phi_val = NAN; - if (phi.size() > i) - delta_phi_val = phi[i]; - float zeta_val = NAN; - if (zeta.size() > i) - zeta_val = zeta[i]; - - Reflection r{ - .h = h.at(i), - .k = k.at(i), - .l = l.at(i), - .delta_phi_deg = delta_phi_val, - .predicted_x = predicted_x.at(i), - .predicted_y = predicted_y.at(i), - .d = d.at(i), - .I = int_sum.at(i), - .bkg = bkg.at(i), - .sigma = int_err.at(i), - .rlp = lp_val, - .partiality = partiality_val, - .zeta = zeta_val - }; - message.reflections.emplace_back(r); - } - - CalcISigma(message); - CalcWilsonBFactor(message, !message.b_factor.has_value()); - } + if (!ReadReflectionsFromGroup(*master_file, master_reflection_group_name, message)) + ReadReflectionsFromGroup(*source_file, source_reflection_group_name, message); return true; } @@ -832,6 +939,7 @@ void JFJochHDF5Reader::Close() { legacy_format_files.clear(); vds_data_mappings.clear(); master_file_directory.clear(); + master_filename.clear(); SetStartMessage({}); } @@ -892,7 +1000,148 @@ CompressedImage JFJochHDF5Reader::ReadCalibration(std::vector &tmp, con dataset.ReadVectorToU8(tmp, start, {dim[0], dim[1]}); algorithm = CompressionAlgorithm::NO_COMPRESSION; - return {tmp, dim[1], dim[0], - CalcImageMode(datatype.GetElemSize(), datatype.IsFloat(), datatype.IsSigned()), - algorithm}; + return { + tmp, dim[1], dim[0], + CalcImageMode(datatype.GetElemSize(), datatype.IsFloat(), datatype.IsSigned()), + algorithm + }; } + +template +std::vector ReadVectorMasterFirst(HDF5Object &master_file, + HDF5Object &source_file, + const std::string &path, + const std::vector &master_start, + const std::vector &source_start, + const std::vector &size) { + if (master_file.Exists(path)) + return master_file.ReadOptVector(path, master_start, size); + if (source_file.Exists(path)) + return source_file.ReadOptVector(path, source_start, size); + return {}; +} + +void AppendOrExtendSourceMapping(std::vector &ret, + const std::string &filename, + const std::string &dataset, + uint64_t source_first_image, + uint64_t virtual_first_image, + uint64_t image_count) { + if (image_count == 0) + return; + + if (!ret.empty()) { + auto &last = ret.back(); + if (last.filename == filename + && last.dataset == dataset + && last.source_first_image + last.image_count == source_first_image + && last.virtual_first_image + last.image_count == virtual_first_image) { + last.image_count += image_count; + return; + } + } + + ret.push_back(HDF5DataSourceMessage{ + .filename = filename, + .dataset = dataset, + .source_first_image = source_first_image, + .virtual_first_image = virtual_first_image, + .image_count = image_count + }); +}; + +std::vector JFJochHDF5Reader::GetHDF5DataSource( + uint64_t first_image, + std::optional image_count +) const { + std::unique_lock ul(hdf5_mutex); + + if (!master_file) + throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, + "Cannot generate HDF5 source mapping if file not loaded"); + + if (first_image > number_of_images) + throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, + "First image outside dataset range"); + + const uint64_t requested_count = image_count.value_or(number_of_images - first_image); + if (first_image + requested_count > number_of_images) + throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, + "Requested image range outside dataset range"); + + std::vector ret; + if (requested_count == 0) + return ret; + + // Integrated / contiguous source: link directly to original master file. + if (format == FileWriterFormat::NXmxVDS && data_layout != HDF5DataSetLayout::VIRTUAL) { + AppendOrExtendSourceMapping(ret, + master_filename, + "/entry/data/data", + first_image, + 0, + requested_count); + return ret; + } + + // VDS source: expand VDS mappings to original source files, not to the VDS master. + if (format == FileWriterFormat::NXmxVDS && data_layout == HDF5DataSetLayout::VIRTUAL) { + for (uint64_t local_image = 0; local_image < requested_count; ++local_image) { + const hsize_t virtual_image = first_image + local_image; + + bool found = false; + for (const auto &mapping: vds_data_mappings) { + if (!mapping.ContainsVirtualImage(virtual_image)) + continue; + + const uint64_t source_image = mapping.SourceImage(virtual_image); + const std::string filename = ResolveRelativeToMaster(master_file_directory, mapping.filename); + const std::string dataset = mapping.dataset.empty() ? "/entry/data/data" : mapping.dataset; + + AppendOrExtendSourceMapping(ret, + filename, + dataset, + source_image, + local_image, + 1); + found = true; + break; + } + + if (!found) + throw JFJochException(JFJochExceptionCategory::HDF5, + "Image not covered by /entry/data/data VDS mappings"); + } + + return ret; + } + + // Legacy source: link directly to linked data files. + if (format == FileWriterFormat::NXmxLegacy) { + if (images_per_file == 0) + throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, + "Cannot generate HDF5 source mapping: images_per_file is zero"); + + for (uint64_t local_image = 0; local_image < requested_count; ++local_image) { + const uint64_t source_global_image = first_image + local_image; + const uint64_t file_id = source_global_image / images_per_file; + const uint64_t source_image = source_global_image % images_per_file; + + if (file_id >= legacy_format_files.size()) + throw JFJochException(JFJochExceptionCategory::HDF5, + "Legacy image source file missing"); + + AppendOrExtendSourceMapping(ret, + legacy_format_files.at(file_id), + "/entry/data/data", + source_image, + local_image, + 1); + } + + return ret; + } + + throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, + "Unsupported HDF5 file layout for source mapping"); +} \ No newline at end of file diff --git a/reader/JFJochHDF5Reader.h b/reader/JFJochHDF5Reader.h index 49e7e137..8f3781e4 100644 --- a/reader/JFJochHDF5Reader.h +++ b/reader/JFJochHDF5Reader.h @@ -16,6 +16,7 @@ class JFJochHDF5Reader : public JFJochReader { std::vector legacy_format_files; std::vector vds_data_mappings; std::string master_file_directory; + std::string master_filename; size_t images_per_file = 1; size_t number_of_images = 0; @@ -43,8 +44,14 @@ public: void ReadFile(const std::string& filename); uint64_t GetNumberOfImages() const override; + void Close() override; + std::vector GetHDF5DataSource( + uint64_t first_image = 0, + std::optional image_count = {} + ) const; + CompressedImage ReadCalibration(std::vector &tmp, const std::string &name) const; std::shared_ptr GetRawImage(int64_t image_number) override; diff --git a/reader/JFJochHttpReader.cpp b/reader/JFJochHttpReader.cpp index 08935fc5..1f453037 100644 --- a/reader/JFJochHttpReader.cpp +++ b/reader/JFJochHttpReader.cpp @@ -182,6 +182,7 @@ std::shared_ptr JFJochHttpReader::UpdateDataset_i() { dataset->b_factor = GetPlot_i("b_factor"); dataset->resolution_estimate = GetPlot_i("resolution_estimate"); dataset->efficiency = GetPlot_i("image_collection_efficiency"); + dataset->integrated_reflections = GetPlot_i("integrated_reflections"); if (msg->start_message->goniometer) dataset->experiment.Goniometer(msg->start_message->goniometer); diff --git a/reader/JFJochReaderDataset.h b/reader/JFJochReaderDataset.h index e13c100b..d3d48f15 100644 --- a/reader/JFJochReaderDataset.h +++ b/reader/JFJochReaderDataset.h @@ -40,6 +40,7 @@ struct JFJochReaderDataset { std::vector profile_radius; std::vector mosaicity_deg; std::vector b_factor; + std::vector integrated_reflections; std::vector scale_factor; diff --git a/receiver/JFJochReceiver.cpp b/receiver/JFJochReceiver.cpp index 40c5b5a7..46bbd53b 100644 --- a/receiver/JFJochReceiver.cpp +++ b/receiver/JFJochReceiver.cpp @@ -175,6 +175,8 @@ void JFJochReceiver::SendEndMessage() { for (int i = 0; i < adu_histogram_module.size(); i++) message.adu_histogram["module" + std::to_string(i)] = adu_histogram_module[i]->GetHistogram(); + scan_result.FillEndMessage(message); + if (push_images_to_writer) { if (!image_pusher.EndDataCollection(message)) logger.Error("End message not sent via ZeroMQ (time-out)"); diff --git a/tests/HDF5WritingTest.cpp b/tests/HDF5WritingTest.cpp index 1269b116..a1ec0525 100644 --- a/tests/HDF5WritingTest.cpp +++ b/tests/HDF5WritingTest.cpp @@ -468,7 +468,9 @@ TEST_CASE("HDF5Writer", "[HDF5][Full]") { DiffractionExperiment x(DetJF4M()); std::vector spots; - x.FilePrefix("test02_1p10").ImagesPerTrigger(5).ImagesPerFile(2).Compression(CompressionAlgorithm::NO_COMPRESSION); + x.FilePrefix("test02_1p10").ImagesPerTrigger(5).ImagesPerFile(2).Compression(CompressionAlgorithm::NO_COMPRESSION) + .OverwriteExistingFiles(true); + StartMessage start_message; x.FillMessage(start_message); diff --git a/tests/JFJochReaderTest.cpp b/tests/JFJochReaderTest.cpp index 3807a470..b08b008a 100644 --- a/tests/JFJochReaderTest.cpp +++ b/tests/JFJochReaderTest.cpp @@ -1670,4 +1670,278 @@ TEST_CASE("JFJochReader_GetRawImage_Integrated", "[HDF5][Full]") { remove("test_read_raw_image_master.h5"); // No leftover HDF5 objects REQUIRE(H5Fget_obj_count(H5F_OBJ_ALL, H5F_OBJ_ALL) == 0); +} + +TEST_CASE("JFJochReader_HDF5DataSource_Integrated", "[HDF5][Full]") { + DiffractionExperiment x(DetJF(1)); + + x.FilePrefix("source_integrated").ImagesPerTrigger(5).OverwriteExistingFiles(true); + x.BitDepthImage(16).SetFileWriterFormat(FileWriterFormat::NXmxIntegrated).PixelSigned(true); + x.Compression(CompressionAlgorithm::NO_COMPRESSION); + + std::vector image(x.GetPixelsNum(), 17); + + RegisterHDF5Filter(); + { + StartMessage start_message; + x.FillMessage(start_message); + FileWriter writer(start_message); + + for (int i = 0; i < x.GetImageNum(); i++) { + image[5678] = static_cast(100 + i); + + DataMessage message{}; + message.image = CompressedImage(image, x.GetXPixelsNum(), x.GetYPixelsNum()); + message.number = i; + REQUIRE_NOTHROW(writer.WriteHDF5(message)); + } + + EndMessage end_message; + end_message.max_image_number = x.GetImageNum(); + writer.WriteHDF5(end_message); + writer.Finalize(); + } + + { + JFJochHDF5Reader reader; + REQUIRE_NOTHROW(reader.ReadFile("source_integrated_master.h5")); + + auto source = reader.GetHDF5DataSource(1, 3); + REQUIRE(source.size() == 1); + + CHECK(source[0].filename == "source_integrated_master.h5"); + CHECK(source[0].dataset == "/entry/data/data"); + CHECK(source[0].source_first_image == 1); + CHECK(source[0].virtual_first_image == 0); + CHECK(source[0].image_count == 3); + } + + remove("source_integrated_master.h5"); + REQUIRE(H5Fget_obj_count(H5F_OBJ_ALL, H5F_OBJ_ALL) == 0); +} + +TEST_CASE("JFJochReader_HDF5DataSource_VDS", "[HDF5][Full]") { + DiffractionExperiment x(DetJF(1)); + + x.FilePrefix("source_vds_mapping").ImagesPerTrigger(5).ImagesPerFile(2).OverwriteExistingFiles(true); + x.BitDepthImage(16).SetFileWriterFormat(FileWriterFormat::NXmxVDS).PixelSigned(true); + x.Compression(CompressionAlgorithm::NO_COMPRESSION); + + std::vector image(x.GetPixelsNum(), 21); + + RegisterHDF5Filter(); + { + StartMessage start_message; + x.FillMessage(start_message); + FileWriter writer(start_message); + + for (int i = 0; i < x.GetImageNum(); i++) { + image[5678] = static_cast(200 + i); + + DataMessage message{}; + message.image = CompressedImage(image, x.GetXPixelsNum(), x.GetYPixelsNum()); + message.number = i; + REQUIRE_NOTHROW(writer.WriteHDF5(message)); + } + + EndMessage end_message; + end_message.max_image_number = x.GetImageNum(); + writer.WriteHDF5(end_message); + writer.Finalize(); + } + + { + JFJochHDF5Reader reader; + REQUIRE_NOTHROW(reader.ReadFile("source_vds_mapping_master.h5")); + + // Range crosses file boundary: + // global images 1,2,3 map to: + // data_000001 image 1 + // data_000002 images 0,1 + auto source = reader.GetHDF5DataSource(1, 3); + REQUIRE(source.size() == 2); + + CHECK(source[0].filename == "source_vds_mapping_data_000001.h5"); + CHECK(source[0].dataset == "/entry/data/data"); + CHECK(source[0].source_first_image == 1); + CHECK(source[0].virtual_first_image == 0); + CHECK(source[0].image_count == 1); + + CHECK(source[1].filename == "source_vds_mapping_data_000002.h5"); + CHECK(source[1].dataset == "/entry/data/data"); + CHECK(source[1].source_first_image == 0); + CHECK(source[1].virtual_first_image == 1); + CHECK(source[1].image_count == 2); + } + + remove("source_vds_mapping_master.h5"); + remove("source_vds_mapping_data_000001.h5"); + remove("source_vds_mapping_data_000002.h5"); + remove("source_vds_mapping_data_000003.h5"); + REQUIRE(H5Fget_obj_count(H5F_OBJ_ALL, H5F_OBJ_ALL) == 0); +} + +TEST_CASE("JFJochReader_HDF5DataSource_Legacy", "[HDF5][Full]") { + DiffractionExperiment x(DetJF(1)); + + x.FilePrefix("source_legacy_mapping").ImagesPerTrigger(5).ImagesPerFile(2).OverwriteExistingFiles(true); + x.BitDepthImage(16).SetFileWriterFormat(FileWriterFormat::NXmxLegacy).PixelSigned(true); + x.Compression(CompressionAlgorithm::NO_COMPRESSION); + + std::vector image(x.GetPixelsNum(), 31); + + RegisterHDF5Filter(); + { + StartMessage start_message; + x.FillMessage(start_message); + FileWriter writer(start_message); + + for (int i = 0; i < x.GetImageNum(); i++) { + image[5678] = static_cast(300 + i); + + DataMessage message{}; + message.image = CompressedImage(image, x.GetXPixelsNum(), x.GetYPixelsNum()); + message.number = i; + REQUIRE_NOTHROW(writer.WriteHDF5(message)); + } + + EndMessage end_message; + end_message.max_image_number = x.GetImageNum(); + writer.WriteHDF5(end_message); + writer.Finalize(); + } + + { + JFJochHDF5Reader reader; + REQUIRE_NOTHROW(reader.ReadFile("source_legacy_mapping_master.h5")); + + auto source = reader.GetHDF5DataSource(1, 3); + REQUIRE(source.size() == 2); + + CHECK(source[0].filename == "source_legacy_mapping_data_000001.h5"); + CHECK(source[0].dataset == "/entry/data/data"); + CHECK(source[0].source_first_image == 1); + CHECK(source[0].virtual_first_image == 0); + CHECK(source[0].image_count == 1); + + CHECK(source[1].filename == "source_legacy_mapping_data_000002.h5"); + CHECK(source[1].dataset == "/entry/data/data"); + CHECK(source[1].source_first_image == 0); + CHECK(source[1].virtual_first_image == 1); + CHECK(source[1].image_count == 2); + } + + remove("source_legacy_mapping_master.h5"); + remove("source_legacy_mapping_data_000001.h5"); + remove("source_legacy_mapping_data_000002.h5"); + remove("source_legacy_mapping_data_000003.h5"); + REQUIRE(H5Fget_obj_count(H5F_OBJ_ALL, H5F_OBJ_ALL) == 0); +} + +TEST_CASE("JFJochReader_ProcessingHDF5_FromVDS_MapsToDataFiles", "[HDF5][Full]") { + DiffractionExperiment x(DetJF(1)); + + x.FilePrefix("proc_source_vds").ImagesPerTrigger(5).ImagesPerFile(2).OverwriteExistingFiles(true); + x.BitDepthImage(16).SetFileWriterFormat(FileWriterFormat::NXmxVDS).PixelSigned(true); + x.Compression(CompressionAlgorithm::NO_COMPRESSION); + + std::vector image(x.GetPixelsNum(), 51); + + RegisterHDF5Filter(); + { + StartMessage start_message; + x.FillMessage(start_message); + FileWriter writer(start_message); + + for (int i = 0; i < x.GetImageNum(); i++) { + image[5678] = static_cast(500 + i); + + DataMessage message{}; + message.image = CompressedImage(image, x.GetXPixelsNum(), x.GetYPixelsNum()); + message.number = i; + REQUIRE_NOTHROW(writer.WriteHDF5(message)); + } + + EndMessage end_message; + end_message.max_image_number = x.GetImageNum(); + writer.WriteHDF5(end_message); + writer.Finalize(); + } + + std::vector source_data; + { + JFJochHDF5Reader reader; + REQUIRE_NOTHROW(reader.ReadFile("proc_source_vds_master.h5")); + source_data = reader.GetHDF5DataSource(1, 3); + + REQUIRE(source_data.size() == 2); + CHECK(source_data[0].filename == "proc_source_vds_data_000001.h5"); + CHECK(source_data[1].filename == "proc_source_vds_data_000002.h5"); + } + + { + DiffractionExperiment proc_x = x; + proc_x.FilePrefix("proc_from_vds") + .ImagesPerTrigger(3) + .SetFileWriterFormat(FileWriterFormat::NXmxIntegrated) + .OverwriteExistingFiles(true); + + StartMessage start_message; + proc_x.FillMessage(start_message); + start_message.number_of_images = 3; + start_message.images_per_file = 3; + start_message.write_images = false; + start_message.write_master_file = true; + start_message.hdf5_source_data = source_data; + + FileWriter writer(start_message); + + for (int i = 0; i < 3; i++) { + DataMessage message{}; + message.number = i; + message.original_number = i + 1; + message.image = CompressedImage(image, x.GetXPixelsNum(), x.GetYPixelsNum()); + message.spot_count = 200 + i; + REQUIRE_NOTHROW(writer.WriteHDF5(message)); + } + + EndMessage end_message; + end_message.max_image_number = 3; + writer.WriteHDF5(end_message); + writer.Finalize(); + } + + { + HDF5ReadOnlyFile file("proc_from_vds_master.h5"); + HDF5DataSet data(file, "/entry/data/data"); + HDF5Dcpl dcpl(data); + + REQUIRE(dcpl.GetLayout() == HDF5DataSetLayout::VIRTUAL); + + auto mappings = dcpl.GetVirtualMappings(); + REQUIRE(mappings.size() == 2); + + CHECK(mappings[0].filename == "proc_source_vds_data_000001.h5"); + CHECK(mappings[1].filename == "proc_source_vds_data_000002.h5"); + } + + { + JFJochHDF5Reader reader; + REQUIRE_NOTHROW(reader.ReadFile("proc_from_vds_master.h5")); + + auto img0 = reader.LoadImage(0); + REQUIRE(img0); + CHECK(img0->Image()[5678] == 501); + + auto img2 = reader.LoadImage(2); + REQUIRE(img2); + CHECK(img2->Image()[5678] == 503); + } + + remove("proc_source_vds_master.h5"); + remove("proc_source_vds_data_000001.h5"); + remove("proc_source_vds_data_000002.h5"); + remove("proc_source_vds_data_000003.h5"); + remove("proc_from_vds_master.h5"); + REQUIRE(H5Fget_obj_count(H5F_OBJ_ALL, H5F_OBJ_ALL) == 0); } \ No newline at end of file diff --git a/writer/HDF5DataFilePluginDetector.cpp b/writer/HDF5DataFilePluginDetector.cpp index a7ab1efc..775c5bab 100644 --- a/writer/HDF5DataFilePluginDetector.cpp +++ b/writer/HDF5DataFilePluginDetector.cpp @@ -15,8 +15,6 @@ void HDF5DataFilePluginDetector::OpenFile(HDF5File &in_data_file, const DataMess efficiency.reserve(images_per_file); packets_received.reserve(images_per_file); packets_expected.reserve(images_per_file); - pixel_sum.reserve(images_per_file); - processing_time.reserve(images_per_file); } void HDF5DataFilePluginDetector::Write(const DataMessage &msg, uint64_t image_number) { @@ -33,8 +31,6 @@ void HDF5DataFilePluginDetector::Write(const DataMessage &msg, uint64_t image_nu packets_received[image_number] = msg.packets_received.value(); if (msg.packets_expected.has_value()) packets_expected[image_number] = msg.packets_expected.value(); - if (msg.pixel_sum.has_value()) - pixel_sum[image_number] = msg.pixel_sum.value(); if (msg.image_collection_efficiency.has_value()) efficiency[image_number] = msg.image_collection_efficiency.value(); @@ -57,7 +53,5 @@ void HDF5DataFilePluginDetector::WriteFinal(HDF5File &data_file) { data_file.SaveVector(prefix + "/packets_received", packets_received.vec()); if (!packets_expected.empty()) data_file.SaveVector(prefix + "/packets_expected", packets_expected.vec()); - if (!pixel_sum.empty()) - data_file.SaveVector(prefix + "/pixel_sum", pixel_sum.vec()); data_file.SaveVector(prefix + "/data_collection_efficiency_image", efficiency.vec()); } diff --git a/writer/HDF5DataFilePluginDetector.h b/writer/HDF5DataFilePluginDetector.h index c61d7d51..99a8f6b6 100644 --- a/writer/HDF5DataFilePluginDetector.h +++ b/writer/HDF5DataFilePluginDetector.h @@ -16,8 +16,6 @@ class HDF5DataFilePluginDetector : public HDF5DataFilePlugin { AutoIncrVector efficiency; AutoIncrVector packets_received; AutoIncrVector packets_expected; - AutoIncrVector pixel_sum; - AutoIncrVector processing_time{NAN}; public: HDF5DataFilePluginDetector(const StartMessage& msg); void OpenFile(HDF5File &data_file, const DataMessage& msg, size_t images_per_file) override; diff --git a/writer/HDF5DataFilePluginImageStats.cpp b/writer/HDF5DataFilePluginImageStats.cpp index 74c8982e..ac2cd790 100644 --- a/writer/HDF5DataFilePluginImageStats.cpp +++ b/writer/HDF5DataFilePluginImageStats.cpp @@ -8,6 +8,7 @@ void HDF5DataFilePluginImageStats::OpenFile(HDF5File &data_file, const DataMessa min_value.reserve(images_per_file); error_pixels.reserve(images_per_file); saturated_pixels.reserve(images_per_file); + pixel_sum.reserve(images_per_file); } void HDF5DataFilePluginImageStats::Write(const DataMessage &msg, uint64_t image_number) { @@ -19,6 +20,8 @@ void HDF5DataFilePluginImageStats::Write(const DataMessage &msg, uint64_t image_ error_pixels[image_number] = msg.error_pixel_count.value(); if (msg.saturated_pixel_count) saturated_pixels[image_number] = msg.saturated_pixel_count.value(); + if (msg.pixel_sum) + pixel_sum[image_number] = msg.pixel_sum.value(); } void HDF5DataFilePluginImageStats::WriteFinal(HDF5File &data_file) { @@ -31,4 +34,6 @@ void HDF5DataFilePluginImageStats::WriteFinal(HDF5File &data_file) { data_file.SaveVector("/entry/image/error_pixels", error_pixels.vec()); if (!saturated_pixels.empty()) data_file.SaveVector("/entry/image/saturated_pixels", saturated_pixels.vec()); + if (!pixel_sum.empty()) + data_file.SaveVector("/entry/image/pixel_sum", pixel_sum.vec()); } diff --git a/writer/HDF5DataFilePluginImageStats.h b/writer/HDF5DataFilePluginImageStats.h index 0d4dc87e..b23e0ae5 100644 --- a/writer/HDF5DataFilePluginImageStats.h +++ b/writer/HDF5DataFilePluginImageStats.h @@ -13,6 +13,7 @@ class HDF5DataFilePluginImageStats : public HDF5DataFilePlugin { AutoIncrVector min_value; AutoIncrVector error_pixels; AutoIncrVector saturated_pixels; + AutoIncrVector pixel_sum; public: void OpenFile(HDF5File &data_file, const DataMessage& msg, size_t images_per_file) override; void Write(const DataMessage& msg, uint64_t image_number) override; diff --git a/writer/HDF5DataFilePluginMX.cpp b/writer/HDF5DataFilePluginMX.cpp index fa3951bc..e8f372a3 100644 --- a/writer/HDF5DataFilePluginMX.cpp +++ b/writer/HDF5DataFilePluginMX.cpp @@ -225,9 +225,9 @@ void HDF5DataFilePluginMX::WriteFinal(HDF5File &data_file) { if (!beam_corr_y.empty()) data_file.SaveVector("/entry/MX/beam_corr_y", beam_corr_y.vec())->Units("pixel"); if (!niggli_class.empty()) - data_file.SaveVector("/entry/MX/niggli_class", niggli_class.vec()); + data_file.SaveVector("/entry/MX/niggliClass", niggli_class.vec()); if (!bravais_lattice.empty()) - data_file.SaveVector("/entry/MX/bravais_lattice", bravais_lattice.vec()); + data_file.SaveVector("/entry/MX/bravaisLattice", bravais_lattice.vec()); if (!resolution_estimate.empty()) data_file.SaveVector("/entry/MX/resolutionEstimate", resolution_estimate.vec())->Units("Angstrom"); if (!integrated_reflections.empty()) diff --git a/writer/HDF5NXmx.cpp b/writer/HDF5NXmx.cpp index 29f2a757..fdee855c 100644 --- a/writer/HDF5NXmx.cpp +++ b/writer/HDF5NXmx.cpp @@ -21,7 +21,7 @@ NXmx::NXmx(const StartMessage &start) uint64_t tmp_suffix; try { if (!start.arm_date.empty()) - tmp_suffix = parse_UTC_to_ms(start.arm_date); + tmp_suffix = parse_UTC_to_ms(start.arm_date); } catch (...) { tmp_suffix = std::chrono::system_clock::now().time_since_epoch().count(); } @@ -33,7 +33,8 @@ NXmx::NXmx(const StartMessage &start) MakeDirectory(filename); - bool v1_10 = (start.file_format == FileWriterFormat::NXmxVDS); + bool v1_10 = (start.file_format == FileWriterFormat::NXmxVDS) + || !start.hdf5_source_data.empty(); hdf5_file = std::make_shared(tmp_filename, v1_10); hdf5_file->Attr("file_name", filename); @@ -119,54 +120,11 @@ void NXmx::LinkToData_VDS(const StartMessage &start, const EndMessage &end) { data_dataset->Attr("image_nr_low", (int32_t) 1) .Attr("image_nr_high",(int32_t) total_images); - VDS(start, - "/entry/detector/data_collection_efficiency_image", - "/entry/instrument/detector/detectorSpecific/data_collection_efficiency_image", - {total_images}, - HDF5DataType(0.0f)); - - VDS(start, - "/entry/detector/pixel_sum", - "/entry/instrument/detector/detectorSpecific/pixel_sum", - {total_images}, - HDF5DataType((int64_t) 0)); - - HDF5Group(*hdf5_file, "/entry/image").NXClass("NXCollection"); - - VDS(start, - "/entry/image/max_value", - {total_images}, - HDF5DataType((int64_t)0)); - - VDS(start, - "/entry/image/min_value", - {total_images}, - HDF5DataType((int64_t)0)); - - VDS(start, - "/entry/image/error_pixels", - {total_images}, - HDF5DataType((int64_t)0)); - - VDS(start, - "/entry/image/saturated_pixels", - {total_images}, - HDF5DataType((int64_t)0)); - if (start.max_spot_count > 0) { VDS(start, "/entry/MX/peakXPosRaw",{total_images, start.max_spot_count}, HDF5DataType(0.0f)); VDS(start, "/entry/MX/peakYPosRaw",{total_images, start.max_spot_count}, HDF5DataType(0.0f)); VDS(start, "/entry/MX/peakTotalIntensity",{total_images, start.max_spot_count}, HDF5DataType(0.0f)); VDS(start, "/entry/MX/peakIceRingRes", {total_images, start.max_spot_count}, HDF5DataType((uint8_t) 0)); - VDS(start, "/entry/MX/nPeaks",{total_images}, HDF5DataType((uint32_t) 0)); - VDS(start, "/entry/MX/strongPixels", {total_images}, HDF5DataType((uint32_t) 0)); - VDS(start, "/entry/MX/bkgEstimate", {total_images}, HDF5DataType(0.0f)); - VDS(start, "/entry/MX/resolutionEstimate",{total_images}, HDF5DataType(0.0f)); - - VDS(start, "/entry/MX/peakCountUnfiltered",{total_images}, HDF5DataType((uint32_t) 0)); - VDS(start, "/entry/MX/peakCountIceRingRes",{total_images}, HDF5DataType((uint32_t) 0)); - VDS(start, "/entry/MX/peakCountLowRes",{total_images}, HDF5DataType((uint32_t) 0)); - VDS(start, "/entry/MX/peakCountIndexed",{total_images}, HDF5DataType((uint32_t) 0)); } if (start.indexing_algorithm != IndexingAlgorithmEnum::None) { @@ -175,11 +133,7 @@ void NXmx::LinkToData_VDS(const StartMessage &start, const EndMessage &end) { VDS(start, "/entry/MX/peakK", {total_images, start.max_spot_count}, HDF5DataType((int32_t) 0)); VDS(start, "/entry/MX/peakL", {total_images, start.max_spot_count}, HDF5DataType((int32_t) 0)); VDS(start, "/entry/MX/peakDistEwaldSphere", {total_images, start.max_spot_count}, HDF5DataType((float) 0)); - - VDS(start, "/entry/MX/imageIndexed", {total_images}, HDF5DataType((uint8_t) 0)); VDS(start, "/entry/MX/latticeIndexed", {total_images,9}, HDF5DataType((float) 0))->Units("Angstrom"); - VDS(start, "/entry/MX/profileRadius", {total_images}, HDF5DataType(0.0f))->Units("Angstrom^-1"); - VDS(start, "/entry/MX/bFactor", {total_images}, HDF5DataType(0.0f))->Units("Angstrom^2"); } if (start.geom_refinement_algorithm != GeomRefinementAlgorithmEnum::None) { @@ -215,20 +169,89 @@ void NXmx::LinkToData_VDS(const StartMessage &start, const EndMessage &end) { {total_images}, HDF5DataType((uint8_t) 0)); - if (!start.rois.empty()) { - HDF5Group(*hdf5_file, "/entry/roi").NXClass("NXcollection"); + LinkToReflections_VDS(start, end); + } +} - for (const auto &r: start.rois) { - std::string roi = r.name; - HDF5Group(*hdf5_file, "/entry/roi/" + roi); - VDS(start, "/entry/roi/" + roi + "/max", {total_images}, HDF5DataType((int64_t) 0)); - VDS(start, "/entry/roi/" + roi + "/sum", {total_images}, HDF5DataType((int64_t) 0)); - VDS(start, "/entry/roi/" + roi + "/sum_sq", {total_images}, HDF5DataType((int64_t) 0)); - VDS(start, "/entry/roi/" + roi + "/npixel", {total_images}, HDF5DataType((int64_t) 0)); - VDS(start, "/entry/roi/" + roi + "/x", {total_images}, HDF5DataType((float) 0)); - VDS(start, "/entry/roi/" + roi + "/y", {total_images}, HDF5DataType((float) 0)); - } - } +void NXmx::LinkToData_ProcessingVDS(const StartMessage &start, const EndMessage &end) { + if (start.hdf5_source_data.empty() || end.max_image_number == 0) + return; + + const hsize_t total_images = end.max_image_number; + const hsize_t width = start.image_size_x; + const hsize_t height = start.image_size_y; + + HDF5Group(*hdf5_file, "/entry/data").NXClass("NXdata"); + + HDF5DataSpace full_data_space({total_images, height, width}); + HDF5Dcpl dcpl; + dcpl.SetChunking({1, height, width}); + + for (const auto &mapping: start.hdf5_source_data) { + if (mapping.image_count == 0) + continue; + + if (mapping.virtual_first_image + mapping.image_count > total_images) + throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, + "Processing VDS mapping exceeds output image count"); + + const std::string source_dataset = mapping.dataset.empty() + ? "/entry/data/data" + : mapping.dataset; + + HDF5DataSpace virtual_data_space({total_images, height, width}); + virtual_data_space.SelectHyperslab( + {static_cast(mapping.virtual_first_image), 0, 0}, + {static_cast(mapping.image_count), height, width} + ); + + const hsize_t source_extent_images = mapping.source_first_image + mapping.image_count; + HDF5DataSpace source_data_space({source_extent_images, height, width}); + source_data_space.SelectHyperslab( + {static_cast(mapping.source_first_image), 0, 0}, + {static_cast(mapping.image_count), height, width} + ); + + dcpl.SetVirtual(mapping.filename, + source_dataset, + source_data_space, + virtual_data_space); + } + + auto data_dataset = std::make_unique( + *hdf5_file, + "/entry/data/data", + HDF5DataType(start.bit_depth_image / 8, start.pixel_signed), + full_data_space, + dcpl + ); + + data_dataset->Attr("image_nr_low", static_cast(1)) + .Attr("image_nr_high", static_cast(total_images)); +} + +void NXmx::LinkToReflections_VDS(const StartMessage &start, const EndMessage &end) { + if (end.integrated_reflections.empty()) + return; + + HDF5Group(*hdf5_file, "/entry/reflections").NXClass("NXcollection"); + + for (size_t image = 0; image < end.integrated_reflections.size(); ++image) { + if (end.integrated_reflections[image] <= 0) + continue; + + if (start.images_per_file <= 0) + continue; + + const uint64_t file_id = image / static_cast(start.images_per_file); + const uint64_t image_in_file = image % static_cast(start.images_per_file); + + const std::string local_name = fmt::format("/entry/reflections/image_{:06d}", image); + const std::string source_name = fmt::format("/entry/reflections/image_{:06d}", image_in_file); + + hdf5_file->ExternalLink(HDF5Metadata::DataFileName(start, file_id), + source_name, + local_name); } } @@ -696,6 +719,19 @@ void NXmx::ADUHistogram(const EndMessage &end) { } } +template +void SaveVectorIfMissing(HDF5Object &object, + const std::string &path, + const std::vector &values, + const std::string &units = "") { + if (values.empty() || object.Exists(path)) + return; + + auto dataset = object.SaveVector(path, values); + if (!units.empty()) + dataset->Units(units); +} + void NXmx::Finalize(const EndMessage &end) { try { if (!hdf5_file) @@ -715,6 +751,7 @@ void NXmx::Finalize(const EndMessage &end) { Sample(start_message, end); AzimuthalIntegration(start_message, end); ADUHistogram(end); + EndResultVectors(end); switch (start_message.file_format.value_or(FileWriterFormat::NXmxLegacy)) { case FileWriterFormat::NXmxLegacy: @@ -724,6 +761,9 @@ void NXmx::Finalize(const EndMessage &end) { LinkToData_VDS(start_message, end); break; case FileWriterFormat::NXmxIntegrated: + if (!start_message.hdf5_source_data.empty()) + LinkToData_ProcessingVDS(start_message, end); + break; default: break; } @@ -742,9 +782,6 @@ void NXmx::Finalize(const EndMessage &end) { SaveScalar(*hdf5_file, "/entry/MX/bkgEstimateMean", end.bkg_estimate.value()); } - if (!end.scale_factor.empty()) - SaveVector(*hdf5_file, "/entry/MX/imageScaleFactor", end.scale_factor); - hdf5_file->Close(); hdf5_file.reset(); } catch (const JFJochException &e) { @@ -784,3 +821,49 @@ void NXmx::UserData(const StartMessage &start) { std::shared_ptr NXmx::GetFile() { return hdf5_file; } + +void NXmx::EndResultVectors(const EndMessage &end) { + if (!end.data_collection_efficiency.empty()) { + HDF5Group det_specific(*hdf5_file, "/entry/instrument/detector/detectorSpecific"); + det_specific.NXClass("NXcollection"); + SaveVectorIfMissing(*hdf5_file, + "/entry/instrument/detector/detectorSpecific/data_collection_efficiency_image", + end.data_collection_efficiency); + } + + if (!end.max_viable_pixel_value.empty() || + !end.min_viable_pixel_value.empty() || + !end.error_pixel_count.empty() || + !end.saturated_pixel_count.empty() || + !end.pixel_sum.empty()) { + HDF5Group image_group(*hdf5_file, "/entry/image"); + image_group.NXClass("NXcollection"); + + SaveVectorIfMissing(*hdf5_file, "/entry/image/max_value", end.max_viable_pixel_value); + SaveVectorIfMissing(*hdf5_file, "/entry/image/min_value", end.min_viable_pixel_value); + SaveVectorIfMissing(*hdf5_file, "/entry/image/error_pixels", end.error_pixel_count); + SaveVectorIfMissing(*hdf5_file, "/entry/image/saturated_pixels", end.saturated_pixel_count); + SaveVectorIfMissing(*hdf5_file, "/entry/image/pixel_sum", end.pixel_sum); + } + + HDF5Group mx_group(*hdf5_file, "/entry/MX"); + mx_group.NXClass("NXcollection"); + + SaveVectorIfMissing(*hdf5_file, "/entry/MX/nPeaks", end.spot_count); + SaveVectorIfMissing(*hdf5_file, "/entry/MX/peakCountIceRingRes", end.spot_count_ice_ring); + SaveVectorIfMissing(*hdf5_file, "/entry/MX/peakCountLowRes", end.spot_count_low_res); + SaveVectorIfMissing(*hdf5_file, "/entry/MX/peakCountIndexed", end.spot_count_indexed); + SaveVectorIfMissing(*hdf5_file, "/entry/MX/imageIndexed", end.image_indexed); + SaveVectorIfMissing(*hdf5_file, "/entry/MX/bkgEstimate", end.v_bkg_estimate); + SaveVectorIfMissing(*hdf5_file, "/entry/MX/profileRadius", end.profile_radius, "Angstrom^-1"); + SaveVectorIfMissing(*hdf5_file, "/entry/MX/mosaicity", end.mosaicity, "deg"); + SaveVectorIfMissing(*hdf5_file, "/entry/MX/bFactor", end.bFactor, "Angstrom^2"); + SaveVectorIfMissing(*hdf5_file, "/entry/MX/resolutionEstimate", end.resolution_estimate, "Angstrom"); + SaveVectorIfMissing(*hdf5_file, "/entry/MX/imageScaleFactor", end.image_scale_factor); + SaveVectorIfMissing(*hdf5_file, "/entry/MX/integratedReflections", end.integrated_reflections); + + if (!end.niggli_class.empty()) { + SaveVectorIfMissing(*hdf5_file, "/entry/MX/niggliClass", end.niggli_class); + SaveVectorIfMissing(*hdf5_file, "/entry/MX/niggli_class", end.niggli_class); + } +} diff --git a/writer/HDF5NXmx.h b/writer/HDF5NXmx.h index 8bc0b36d..30612dc0 100644 --- a/writer/HDF5NXmx.h +++ b/writer/HDF5NXmx.h @@ -1,8 +1,7 @@ // SPDX-FileCopyrightText: 2024 Filip Leonarski, Paul Scherrer Institute // SPDX-License-Identifier: GPL-3.0-only -#ifndef JUNGFRAUJOCH_HDF5NXMX_H -#define JUNGFRAUJOCH_HDF5NXMX_H +#pragma once #include "../common/JFJochMessages.h" @@ -23,6 +22,9 @@ class NXmx { void LinkToData(const StartMessage &start, const EndMessage &end); void LinkToData_VDS(const StartMessage &start, const EndMessage &end); + void LinkToData_ProcessingVDS(const StartMessage &start, const EndMessage &end); + void LinkToReflections_VDS(const StartMessage &start, const EndMessage &end); + std::unique_ptr VDS(const StartMessage &start, const std::string& name, const std::vector &dim, @@ -49,9 +51,11 @@ class NXmx { void SaveCBORImage(const std::string& hdf5_path, const CompressedImage &image); void AzimuthalIntegration(const StartMessage &start, const EndMessage &end); void ADUHistogram(const EndMessage &end); + void EndResultVectors(const EndMessage &end); void UserData(const StartMessage &start); void MX(const StartMessage &start); void Fluorescence(const StartMessage &start); + public: NXmx(const StartMessage& start); ~NXmx(); @@ -62,5 +66,3 @@ public: std::shared_ptr GetFile(); }; - -#endif //JUNGFRAUJOCH_HDF5NXMX_H -- 2.52.0 From 91c714f1a4623004f929c77a036307e4610dd10b Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Fri, 8 May 2026 19:40:12 +0200 Subject: [PATCH 008/132] Fix --- reader/JFJochHDF5Reader.cpp | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/reader/JFJochHDF5Reader.cpp b/reader/JFJochHDF5Reader.cpp index 4f4d3209..bb4945b0 100644 --- a/reader/JFJochHDF5Reader.cpp +++ b/reader/JFJochHDF5Reader.cpp @@ -1007,20 +1007,6 @@ CompressedImage JFJochHDF5Reader::ReadCalibration(std::vector &tmp, con }; } -template -std::vector ReadVectorMasterFirst(HDF5Object &master_file, - HDF5Object &source_file, - const std::string &path, - const std::vector &master_start, - const std::vector &source_start, - const std::vector &size) { - if (master_file.Exists(path)) - return master_file.ReadOptVector(path, master_start, size); - if (source_file.Exists(path)) - return source_file.ReadOptVector(path, source_start, size); - return {}; -} - void AppendOrExtendSourceMapping(std::vector &ret, const std::string &filename, const std::string &dataset, -- 2.52.0 From da15714080571ec6d8889379b1726b97d5530f1e Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Fri, 8 May 2026 19:56:24 +0200 Subject: [PATCH 009/132] jfjoch_test: Fix tests --- tests/JFJochReaderTest.cpp | 125 +++++++++++++------------------------ 1 file changed, 43 insertions(+), 82 deletions(-) diff --git a/tests/JFJochReaderTest.cpp b/tests/JFJochReaderTest.cpp index b08b008a..2cd74eb6 100644 --- a/tests/JFJochReaderTest.cpp +++ b/tests/JFJochReaderTest.cpp @@ -4,6 +4,7 @@ #include #include "../common/DiffractionExperiment.h" +#include "../common/ScanResultGenerator.h" #include "../writer/FileWriter.h" #include "../reader/JFJochHDF5Reader.h" #include "../compression/JFJochCompressor.h" @@ -39,13 +40,13 @@ TEST_CASE("HDF5DataType_ElemType","[HDF5]") { TEST_CASE("JFJochReader_MasterFile", "[HDF5][Full]") { DiffractionExperiment x(DetJF(1)); + x.FilePrefix("test08").ImagesPerTrigger(950).OverwriteExistingFiles(true); x.BeamX_pxl(100).BeamY_pxl(200).DetectorDistance_mm(150) .IncidentEnergy_keV(WVL_1A_IN_KEV) .FrameTime(std::chrono::microseconds(500), std::chrono::microseconds(10)) .SetUnitCell(UnitCell{.a= 10, .b= 20, .c= 30, .alpha= 90, .beta= 101, .gamma = 90}); RegisterHDF5Filter(); - { StartMessage start_message; x.FillMessage(start_message); @@ -210,6 +211,8 @@ TEST_CASE("JFJochReader_PixelMask", "[HDF5][Full]") { pixel_mask[x.GetPixelsNum() - 1] = 4; pixel_mask[0] = 256; + ScanResultGenerator generator(x); + std::vector image(x.GetPixelsNum(), 0); { StartMessage start_message; @@ -223,9 +226,11 @@ TEST_CASE("JFJochReader_PixelMask", "[HDF5][Full]") { message.number = 0; REQUIRE_NOTHROW(file_set.WriteHDF5(message)); - + REQUIRE_NOTHROW(generator.Add(message)); EndMessage end_message; end_message.max_image_number = 1; + generator.FillEndMessage(end_message); + file_set.WriteHDF5(end_message); file_set.Finalize(); @@ -263,6 +268,7 @@ TEST_CASE("JFJochReader_Goniometer", "[HDF5][Full]") { RegisterHDF5Filter(); + ScanResultGenerator generator(x); std::vector image(x.GetPixelsNum(), 0); { StartMessage start_message; @@ -276,10 +282,12 @@ TEST_CASE("JFJochReader_Goniometer", "[HDF5][Full]") { message.number = i; REQUIRE_NOTHROW(file_set.WriteHDF5(message)); + REQUIRE_NOTHROW(generator.Add(message)); } EndMessage end_message; end_message.max_image_number = 5; + generator.FillEndMessage(end_message); file_set.WriteHDF5(end_message); file_set.Finalize(); @@ -320,6 +328,7 @@ TEST_CASE("JFJochReader_GridScan", "[HDF5][Full]") { x.ImportDatasetSettings(d); RegisterHDF5Filter(); + ScanResultGenerator generator(x); std::vector image(x.GetPixelsNum(), 0); { @@ -334,10 +343,12 @@ TEST_CASE("JFJochReader_GridScan", "[HDF5][Full]") { message.number = i; REQUIRE_NOTHROW(file_set.WriteHDF5(message)); + generator.Add(message); } EndMessage end_message; end_message.max_image_number = 5; + generator.FillEndMessage(end_message); file_set.WriteHDF5(end_message); file_set.Finalize(); @@ -378,6 +389,8 @@ TEST_CASE("JFJochReader_DataI16", "[HDF5][Full]") { image[2] = 456; image[3] = -3456; + ScanResultGenerator generator(x); + RegisterHDF5Filter(); { StartMessage start_message; @@ -395,12 +408,13 @@ TEST_CASE("JFJochReader_DataI16", "[HDF5][Full]") { message.bkg_estimate = i * 345.6; message.number = i; message.profile_radius = 123.09; - + generator.Add(message); REQUIRE_NOTHROW(file_set.WriteHDF5(message)); } EndMessage end_message; end_message.max_image_number = x.GetImageNum(); + generator.FillEndMessage(end_message); file_set.WriteHDF5(end_message); file_set.Finalize(); } @@ -529,6 +543,8 @@ TEST_CASE("JFJochReader_DataU16", "[HDF5][Full]") { image[1] = INT16_MAX; image[2] = 456; + ScanResultGenerator generator(x); + RegisterHDF5Filter(); { StartMessage start_message; @@ -547,10 +563,11 @@ TEST_CASE("JFJochReader_DataU16", "[HDF5][Full]") { message.number = i; REQUIRE_NOTHROW(file_set.WriteHDF5(message)); - + generator.Add(message); } EndMessage end_message; end_message.max_image_number = x.GetImageNum(); + generator.FillEndMessage(end_message); file_set.WriteHDF5(end_message); file_set.Finalize(); } @@ -597,6 +614,8 @@ TEST_CASE("JFJochReader_DataI32", "[HDF5][Full]") { image[1] = INT32_MIN; image[2] = 456; + ScanResultGenerator generator(x); + RegisterHDF5Filter(); { StartMessage start_message; @@ -613,10 +632,11 @@ TEST_CASE("JFJochReader_DataI32", "[HDF5][Full]") { message.number = i; REQUIRE_NOTHROW(file_set.WriteHDF5(message)); - + generator.Add(message); } EndMessage end_message; end_message.max_image_number = x.GetImageNum(); + generator.FillEndMessage(end_message); file_set.WriteHDF5(end_message); file_set.Finalize(); } @@ -663,6 +683,8 @@ TEST_CASE("JFJochReader_DataU32", "[HDF5][Full]") { image[3] = INT32_MAX; image[4] = INT32_MAX - 1; + ScanResultGenerator generator(x); + RegisterHDF5Filter(); { StartMessage start_message; @@ -678,11 +700,12 @@ TEST_CASE("JFJochReader_DataU32", "[HDF5][Full]") { message.spots = spots; message.number = i; + generator.Add(message); REQUIRE_NOTHROW(file_set.WriteHDF5(message)); - } EndMessage end_message; end_message.max_image_number = x.GetImageNum(); + generator.FillEndMessage(end_message); file_set.WriteHDF5(end_message); file_set.Finalize(); } @@ -731,6 +754,8 @@ TEST_CASE("JFJochReader_Summation", "[HDF5][Full]") { image_3[0] = INT16_MAX; image_2[1] = INT16_MIN; + ScanResultGenerator generator(x); + RegisterHDF5Filter(); { StartMessage start_message; @@ -745,17 +770,21 @@ TEST_CASE("JFJochReader_Summation", "[HDF5][Full]") { message.image = CompressedImage(image_1, x.GetXPixelsNum(), x.GetYPixelsNum()); message.number = 0; REQUIRE_NOTHROW(file_set.WriteHDF5(message)); + generator.Add(message); message.image = CompressedImage(image_2, x.GetXPixelsNum(), x.GetYPixelsNum()); message.number = 1; REQUIRE_NOTHROW(file_set.WriteHDF5(message)); + generator.Add(message); message.image = CompressedImage(image_3, x.GetXPixelsNum(), x.GetYPixelsNum()); message.number = 2; REQUIRE_NOTHROW(file_set.WriteHDF5(message)); + generator.Add(message); EndMessage end_message; end_message.max_image_number = x.GetImageNum(); + generator.FillEndMessage(end_message); file_set.WriteHDF5(end_message); file_set.Finalize(); } @@ -800,6 +829,8 @@ TEST_CASE("JFJochReader_Summation_5", "[HDF5][Full]") { image_3[0] = INT16_MAX; image_2[1] = INT16_MIN; + ScanResultGenerator generator(x); + RegisterHDF5Filter(); { StartMessage start_message; @@ -814,25 +845,31 @@ TEST_CASE("JFJochReader_Summation_5", "[HDF5][Full]") { message.image = CompressedImage(image_1, x.GetXPixelsNum(), x.GetYPixelsNum()); message.number = 0; REQUIRE_NOTHROW(file_set.WriteHDF5(message)); + generator.Add(message); message.image = CompressedImage(image_2, x.GetXPixelsNum(), x.GetYPixelsNum()); message.number = 1; REQUIRE_NOTHROW(file_set.WriteHDF5(message)); + generator.Add(message); message.image = CompressedImage(image_3, x.GetXPixelsNum(), x.GetYPixelsNum()); message.number = 2; REQUIRE_NOTHROW(file_set.WriteHDF5(message)); + generator.Add(message); message.image = CompressedImage(image_4, x.GetXPixelsNum(), x.GetYPixelsNum()); message.number = 3; REQUIRE_NOTHROW(file_set.WriteHDF5(message)); + generator.Add(message); message.image = CompressedImage(image_5, x.GetXPixelsNum(), x.GetYPixelsNum()); message.number = 4; REQUIRE_NOTHROW(file_set.WriteHDF5(message)); + generator.Add(message); EndMessage end_message; end_message.max_image_number = x.GetImageNum(); + generator.FillEndMessage(end_message); file_set.WriteHDF5(end_message); file_set.Finalize(); } @@ -860,82 +897,6 @@ TEST_CASE("JFJochReader_Summation_5", "[HDF5][Full]") { REQUIRE(H5Fget_obj_count(H5F_OBJ_ALL, H5F_OBJ_ALL) == 0); } -TEST_CASE("JFJochReader_ROI", "[HDF5][Full]") { - DiffractionExperiment x(DetJF(1)); - - x.FilePrefix("test25").ImagesPerTrigger(4).OverwriteExistingFiles(true); - x.BitDepthImage(16).ImagesPerFile(1).SetFileWriterFormat(FileWriterFormat::NXmxVDS).PixelSigned(false); - x.Compression(CompressionAlgorithm::NO_COMPRESSION); - - x.ROI().SetROI(ROIDefinition{ - .boxes = {ROIBox("beam", 100, 120, 20, 30)}, - .circles = {ROICircle("roi1", 500, 800, 10)} - }); - - std::vector image(x.GetPixelsNum()); - - RegisterHDF5Filter(); - { - StartMessage start_message; - x.FillMessage(start_message); - FileWriter file_set(start_message); - - for (int i = 0; i < x.GetImageNum(); i++) { - std::vector spots; - - DataMessage message{}; - message.image = CompressedImage(image, x.GetXPixelsNum(), x.GetYPixelsNum()); - message.spots = spots; - message.number = i; - message.roi["beam"] = ROIMessage{ - .sum = 12 + i, - .sum_square = (uint64_t) 123 * i, - .max_count = 123 - i, - .pixels = (uint64_t) 189 + i - }; - - message.roi["roi1"] = ROIMessage{ - .sum = 25 + i, - .sum_square = (uint64_t) 15 * i, - .max_count = 67 - i, - .pixels = (uint64_t) 95 + i - }; - REQUIRE_NOTHROW(file_set.WriteHDF5(message)); - - } - EndMessage end_message; - end_message.max_image_number = x.GetImageNum(); - file_set.WriteHDF5(end_message); - file_set.Finalize(); - } - { - JFJochHDF5Reader reader; - REQUIRE_NOTHROW(reader.ReadFile("test25_master.h5")); - auto dataset = reader.GetDataset(); - CHECK(dataset->experiment.GetImageNum() == 4); - - CHECK(dataset->roi.size() == 2); - REQUIRE(dataset->roi_max.size() == 2); - int index = 0; - if (dataset->roi[1] == "beam") - index = 1; - - REQUIRE(dataset->roi_max[index].size() == 4); - CHECK(dataset->roi_max[index][2] == 123 - 2); - CHECK(dataset->roi_sum_sq[index][0] == 0); - CHECK(dataset->roi_sum_sq[index][1] == 123); - CHECK(dataset->roi_npixel[index][3] == 189 + 3); - CHECK(dataset->roi_sum[1 - index][3] == 25 + 3); - } - remove("test25_master.h5"); - remove("test25_data_000001.h5"); - remove("test25_data_000002.h5"); - remove("test25_data_000003.h5"); - remove("test25_data_000004.h5"); -// No leftover HDF5 objects - REQUIRE(H5Fget_obj_count(H5F_OBJ_ALL, H5F_OBJ_ALL) == 0); -} - TEST_CASE("JFJochReader_Azint", "[HDF5][Full]") { DiffractionExperiment x(DetJF(1)); -- 2.52.0 From 8b5eb5a2086d7a9f9337b8539d0b3187f06e1117 Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Fri, 8 May 2026 19:59:52 +0200 Subject: [PATCH 010/132] HDF5NXmx: Don't write nPeaks in HDF5NXmx - this needs to be consistent with spot finding results, so should come from spot finder --- writer/HDF5NXmx.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/writer/HDF5NXmx.cpp b/writer/HDF5NXmx.cpp index fdee855c..fdef2946 100644 --- a/writer/HDF5NXmx.cpp +++ b/writer/HDF5NXmx.cpp @@ -849,7 +849,6 @@ void NXmx::EndResultVectors(const EndMessage &end) { HDF5Group mx_group(*hdf5_file, "/entry/MX"); mx_group.NXClass("NXcollection"); - SaveVectorIfMissing(*hdf5_file, "/entry/MX/nPeaks", end.spot_count); SaveVectorIfMissing(*hdf5_file, "/entry/MX/peakCountIceRingRes", end.spot_count_ice_ring); SaveVectorIfMissing(*hdf5_file, "/entry/MX/peakCountLowRes", end.spot_count_low_res); SaveVectorIfMissing(*hdf5_file, "/entry/MX/peakCountIndexed", end.spot_count_indexed); @@ -862,8 +861,6 @@ void NXmx::EndResultVectors(const EndMessage &end) { SaveVectorIfMissing(*hdf5_file, "/entry/MX/imageScaleFactor", end.image_scale_factor); SaveVectorIfMissing(*hdf5_file, "/entry/MX/integratedReflections", end.integrated_reflections); - if (!end.niggli_class.empty()) { + if (!end.niggli_class.empty()) SaveVectorIfMissing(*hdf5_file, "/entry/MX/niggliClass", end.niggli_class); - SaveVectorIfMissing(*hdf5_file, "/entry/MX/niggli_class", end.niggli_class); - } } -- 2.52.0 From 1b8c528ed2e2fe3091220c0f93c456deb8b3d348 Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Fri, 8 May 2026 20:02:46 +0200 Subject: [PATCH 011/132] HDF5NXmx: Clean-up --- writer/HDF5NXmx.cpp | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/writer/HDF5NXmx.cpp b/writer/HDF5NXmx.cpp index fdef2946..08603294 100644 --- a/writer/HDF5NXmx.cpp +++ b/writer/HDF5NXmx.cpp @@ -125,6 +125,7 @@ void NXmx::LinkToData_VDS(const StartMessage &start, const EndMessage &end) { VDS(start, "/entry/MX/peakYPosRaw",{total_images, start.max_spot_count}, HDF5DataType(0.0f)); VDS(start, "/entry/MX/peakTotalIntensity",{total_images, start.max_spot_count}, HDF5DataType(0.0f)); VDS(start, "/entry/MX/peakIceRingRes", {total_images, start.max_spot_count}, HDF5DataType((uint8_t) 0)); + VDS(start, "/entry/MX/nPeaks", {total_images}, HDF5DataType((uint32_t) 0)); } if (start.indexing_algorithm != IndexingAlgorithmEnum::None) { @@ -136,16 +137,6 @@ void NXmx::LinkToData_VDS(const StartMessage &start, const EndMessage &end) { VDS(start, "/entry/MX/latticeIndexed", {total_images,9}, HDF5DataType((float) 0))->Units("Angstrom"); } - if (start.geom_refinement_algorithm != GeomRefinementAlgorithmEnum::None) { - VDS(start, "/entry/detector/beam_center_x", - "/entry/instrument/detector/refined_beam_center_x", - {total_images}, - HDF5DataType(0.0f)); - VDS(start, "/entry/detector/beam_center_y", - "/entry/instrument/detector/refined_beam_center_y", - {total_images}, - HDF5DataType(0.0f)); - } if (!start.az_int_bin_to_q.empty()) { size_t azimuthal_bins = start.az_int_phi_bin_count.value_or(1); size_t q_bins = start.az_int_q_bin_count.value_or(1); -- 2.52.0 From 0503ef02716fd8ccc402115dd9c8e08bef397f86 Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Fri, 8 May 2026 20:06:41 +0200 Subject: [PATCH 012/132] jfjoch_process: Adapt behavior to write file with processing results and link to original images (to be tested) --- tools/jfjoch_process.cpp | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/tools/jfjoch_process.cpp b/tools/jfjoch_process.cpp index a54d93a5..80cb813a 100644 --- a/tools/jfjoch_process.cpp +++ b/tools/jfjoch_process.cpp @@ -376,15 +376,6 @@ int main(int argc, char **argv) { start_message.write_images = write_output; start_message.file_format = FileWriterFormat::NXmxIntegrated; - std::unique_ptr writer; - try { - if (!output_prefix.empty()) - writer = std::make_unique(start_message); - } catch (const std::exception &e) { - logger.Error("Failed to initialize file writer: {}", e.what()); - exit(EXIT_FAILURE); - } - // 4. Processing Setup int total_images_in_file = reader.GetNumberOfImages(); if (end_image < 0 || end_image > total_images_in_file) @@ -405,6 +396,22 @@ int main(int argc, char **argv) { std::atomic total_uncompressed_bytes = 0; std::atomic max_image_number_sent = 0; + start_message.file_format = FileWriterFormat::NXmxIntegrated; + start_message.write_master_file = true; + start_message.write_images = false; + start_message.number_of_images = images_to_process; + start_message.images_per_file = images_to_process; + start_message.hdf5_source_data = reader.GetHDF5DataSource(start_image, images_to_process); + + std::unique_ptr writer; + try { + if (!output_prefix.empty()) + writer = std::make_unique(start_message); + } catch (const std::exception &e) { + logger.Error("Failed to initialize file writer: {}", e.what()); + exit(EXIT_FAILURE); + } + // Mimic JFJochReceiver lattice handling (IndexAndRefine handles the logic per thread, // but we need a central accumulator or use the pool's functionality if IndexAndRefine wraps it) // Here we will use per-thread IndexAndRefine which uses the shared thread pool. -- 2.52.0 From 93914e1fb98597a59e65c996f2958b07210ba7ef Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Fri, 8 May 2026 20:09:46 +0200 Subject: [PATCH 013/132] jfjoch_process: Fix typo --- tools/jfjoch_process.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/jfjoch_process.cpp b/tools/jfjoch_process.cpp index 80cb813a..f4857a22 100644 --- a/tools/jfjoch_process.cpp +++ b/tools/jfjoch_process.cpp @@ -623,7 +623,7 @@ int main(int argc, char **argv) { } if (scale_result) { - end_msg.scale_factor = scale_result->image_scale_g; + end_msg.image_scale_factor = scale_result->image_scale_g; logger.Info("Scaling completed in {:.2f} s ({} unique reflections)", scale_time, scale_result->merged.size()); -- 2.52.0 From d71899933512c3501bb9333087e3be61077b72f9 Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Sat, 9 May 2026 08:04:04 +0200 Subject: [PATCH 014/132] FileWriter: Allow to save master files with custom suffix (like _process.h5) --- common/JFJochMessages.h | 1 + tools/jfjoch_process.cpp | 1 + viewer/JFJochViewerMenu.cpp | 2 +- writer/HDF5NXmx.cpp | 2 ++ 4 files changed, 5 insertions(+), 1 deletion(-) diff --git a/common/JFJochMessages.h b/common/JFJochMessages.h index da24ebe8..024a19bc 100644 --- a/common/JFJochMessages.h +++ b/common/JFJochMessages.h @@ -294,6 +294,7 @@ struct StartMessage { std::optional detect_ice_rings; std::vector hdf5_source_data; + std::optional master_suffix; }; struct EndMessage { diff --git a/tools/jfjoch_process.cpp b/tools/jfjoch_process.cpp index f4857a22..ee829196 100644 --- a/tools/jfjoch_process.cpp +++ b/tools/jfjoch_process.cpp @@ -375,6 +375,7 @@ int main(int argc, char **argv) { start_message.max_spot_count = experiment.GetMaxSpotCount(); start_message.write_images = write_output; start_message.file_format = FileWriterFormat::NXmxIntegrated; + start_message.master_suffix = "process"; // 4. Processing Setup int total_images_in_file = reader.GetNumberOfImages(); diff --git a/viewer/JFJochViewerMenu.cpp b/viewer/JFJochViewerMenu.cpp index 7a560b1a..ff630a4d 100644 --- a/viewer/JFJochViewerMenu.cpp +++ b/viewer/JFJochViewerMenu.cpp @@ -109,7 +109,7 @@ void JFJochViewerMenu::openSelected() { this, "Open File", // Dialog title "", // Default folder - "HDF5 Master Files (*_master.h5);; HDF5 Files (*.h5);;All Files (*)" // Filter for .h5 files + "HDF5 Master Files (*_master.h5 *_process.h5);; HDF5 Files (*.h5);;All Files (*)" // Filter for .h5 files ); if (!fileName.isEmpty()) diff --git a/writer/HDF5NXmx.cpp b/writer/HDF5NXmx.cpp index 08603294..9f0c45ee 100644 --- a/writer/HDF5NXmx.cpp +++ b/writer/HDF5NXmx.cpp @@ -12,6 +12,8 @@ #include "gemmi/symmetry.hpp" std::string HDF5Metadata::MasterFileName(const StartMessage &start) { + if (start.master_suffix.has_value()) + return fmt::format("{:s}_{:s}.h5", start.file_prefix, start.master_suffix.value()); return fmt::format("{:s}_master.h5", start.file_prefix); } -- 2.52.0 From 38c2826c09086dc9c9d3947380b73ba3210a05b3 Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Sat, 9 May 2026 08:39:07 +0200 Subject: [PATCH 015/132] JFJochHDF5Reader: Add ReadReflections function --- reader/JFJochHDF5Reader.cpp | 48 ++++++++++--- reader/JFJochHDF5Reader.h | 2 + tests/JFJochReaderTest.cpp | 130 ++++++++++++++++++++++++++++++++++++ 3 files changed, 172 insertions(+), 8 deletions(-) diff --git a/reader/JFJochHDF5Reader.cpp b/reader/JFJochHDF5Reader.cpp index bb4945b0..8da235cb 100644 --- a/reader/JFJochHDF5Reader.cpp +++ b/reader/JFJochHDF5Reader.cpp @@ -131,7 +131,7 @@ std::string dataset_name(const std::string &path) { bool ReadReflectionsFromGroup(HDF5Object &file, const std::string &image_group_name, - DataMessage &message) { + std::vector &reflections) { if (!file.Exists("/entry/reflections") || !file.Exists(image_group_name)) return false; @@ -185,11 +185,9 @@ bool ReadReflectionsFromGroup(HDF5Object &file, .partiality = partiality_val, .zeta = zeta_val }; - message.reflections.emplace_back(r); + reflections.emplace_back(r); } - CalcISigma(message); - CalcWilsonBFactor(message, !message.b_factor.has_value()); return true; } @@ -926,9 +924,12 @@ bool JFJochHDF5Reader::LoadImage_i(std::shared_ptr &dataset const std::string master_reflection_group_name = fmt::format("/entry/reflections/image_{:06d}", image_number); const std::string source_reflection_group_name = fmt::format("/entry/reflections/image_{:06d}", image_id); - if (!ReadReflectionsFromGroup(*master_file, master_reflection_group_name, message)) - ReadReflectionsFromGroup(*source_file, source_reflection_group_name, message); - + if (!ReadReflectionsFromGroup(*master_file, master_reflection_group_name, message.reflections)) + ReadReflectionsFromGroup(*source_file, source_reflection_group_name, message.reflections); + if (!message.reflections.empty()) { + CalcISigma(message); + CalcWilsonBFactor(message, !message.b_factor.has_value()); + } return true; } @@ -1130,4 +1131,35 @@ std::vector JFJochHDF5Reader::GetHDF5DataSource( throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "Unsupported HDF5 file layout for source mapping"); -} \ No newline at end of file +} + +std::vector > JFJochHDF5Reader::ReadReflections(size_t start_image, std::optional end_image) const +{ + std::unique_lock ul(hdf5_mutex); + if (start_image >= number_of_images) + throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, + "start_image must be less than number_of_images"); + + if (end_image.has_value() && end_image.value() < start_image) + throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, + "end_image must be greater than start_image if provided"); + + int end_image_val = end_image.value_or(number_of_images); + + if (end_image_val > number_of_images) + throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, + "end_image_val must be less than or equal to number_of_images"); + if (!master_file) + throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, + "Cannot read all reflections if file not loaded"); + + std::vector > ret(end_image_val - start_image); + + for (int i = 0; i < end_image_val - start_image; ++i) + ReadReflectionsFromGroup(*master_file, + fmt::format("/entry/reflections/image_{:06d}", start_image + i), + ret[i]); + + return ret; + +} diff --git a/reader/JFJochHDF5Reader.h b/reader/JFJochHDF5Reader.h index 8f3781e4..4f3d39dc 100644 --- a/reader/JFJochHDF5Reader.h +++ b/reader/JFJochHDF5Reader.h @@ -52,6 +52,8 @@ public: std::optional image_count = {} ) const; + std::vector> ReadReflections(size_t start_image = 0, std::optional end_image = {}) const; + CompressedImage ReadCalibration(std::vector &tmp, const std::string &name) const; std::shared_ptr GetRawImage(int64_t image_number) override; diff --git a/tests/JFJochReaderTest.cpp b/tests/JFJochReaderTest.cpp index 2cd74eb6..83501b43 100644 --- a/tests/JFJochReaderTest.cpp +++ b/tests/JFJochReaderTest.cpp @@ -1904,5 +1904,135 @@ TEST_CASE("JFJochReader_ProcessingHDF5_FromVDS_MapsToDataFiles", "[HDF5][Full]") remove("proc_source_vds_data_000002.h5"); remove("proc_source_vds_data_000003.h5"); remove("proc_from_vds_master.h5"); + REQUIRE(H5Fget_obj_count(H5F_OBJ_ALL, H5F_OBJ_ALL) == 0); +} + +TEST_CASE("JFJochReader_ReadReflections_VDS", "[HDF5][Full]") { + DiffractionExperiment x(DetJF(1)); + + x.FilePrefix("read_reflections_vds") + .ImagesPerTrigger(4) + .ImagesPerFile(1) + .OverwriteExistingFiles(true) + .BitDepthImage(16) + .PixelSigned(true) + .SetFileWriterFormat(FileWriterFormat::NXmxVDS) + .IndexingAlgorithm(IndexingAlgorithmEnum::FFT) + .Compression(CompressionAlgorithm::NO_COMPRESSION); + + std::vector image(x.GetPixelsNum(), 0); + + RegisterHDF5Filter(); + + { + StartMessage start_message; + x.FillMessage(start_message); + + FileWriter writer(start_message); + ScanResultGenerator scan_result(x); + + for (int i = 0; i < x.GetImageNum(); i++) { + DataMessage message{}; + message.image = CompressedImage(image, x.GetXPixelsNum(), x.GetYPixelsNum()); + message.number = i; + + if (i == 1 || i == 3) { + message.integrated_reflections = 2; + message.reflections = { + Reflection{ + .h = static_cast(10 + i), + .k = 20, + .l = 30, + .delta_phi_deg = 0.1f, + .predicted_x = 100.0f + static_cast(i), + .predicted_y = 200.0f + static_cast(i), + .d = 1.5f, + .I = 1000.0f + static_cast(i), + .bkg = 10.0f, + .sigma = 2.0f, + .rlp = 1.0f, + .partiality = 0.9f, + .zeta = 0.01f + }, + Reflection{ + .h = static_cast(40 + i), + .k = 50, + .l = 60, + .delta_phi_deg = 0.2f, + .predicted_x = 300.0f + static_cast(i), + .predicted_y = 400.0f + static_cast(i), + .d = 2.5f, + .I = 2000.0f + static_cast(i), + .bkg = 20.0f, + .sigma = 3.0f, + .rlp = 1.0f, + .partiality = 0.8f, + .zeta = 0.02f + } + }; + } + + REQUIRE_NOTHROW(writer.WriteHDF5(message)); + scan_result.Add(message); + } + + EndMessage end_message; + end_message.max_image_number = x.GetImageNum(); + scan_result.FillEndMessage(end_message); + + writer.WriteHDF5(end_message); + writer.Finalize(); + } + + { + JFJochHDF5Reader reader; + REQUIRE_NOTHROW(reader.ReadFile("read_reflections_vds_master.h5")); + + auto reflections = reader.ReadReflections(); + + REQUIRE(reflections.size() == 4); + + CHECK(reflections[0].empty()); + + REQUIRE(reflections[1].size() == 2); + CHECK(reflections[1][0].h == 11); + CHECK(reflections[1][0].k == 20); + CHECK(reflections[1][0].l == 30); + CHECK(reflections[1][0].I == Catch::Approx(1001.0f)); + CHECK(reflections[1][0].predicted_x == Catch::Approx(101.0f)); + CHECK(reflections[1][0].predicted_y == Catch::Approx(201.0f)); + + CHECK(reflections[2].empty()); + + REQUIRE(reflections[3].size() == 2); + CHECK(reflections[3][0].h == 13); + CHECK(reflections[3][0].I == Catch::Approx(1003.0f)); + CHECK(reflections[3][1].h == 43); + CHECK(reflections[3][1].I == Catch::Approx(2003.0f)); + } + + { + JFJochHDF5Reader reader; + REQUIRE_NOTHROW(reader.ReadFile("read_reflections_vds_master.h5")); + + auto reflections = reader.ReadReflections(1, 4); + + REQUIRE(reflections.size() == 3); + + REQUIRE(reflections[0].size() == 2); // original image 1 + CHECK(reflections[0][0].h == 11); + + CHECK(reflections[1].empty()); // original image 2 + + REQUIRE(reflections[2].size() == 2); // original image 3 + CHECK(reflections[2][0].h == 13); + } + + remove("read_reflections_vds_master.h5"); + remove("read_reflections_vds_data_000001.h5"); + remove("read_reflections_vds_data_000002.h5"); + remove("read_reflections_vds_data_000003.h5"); + remove("read_reflections_vds_data_000004.h5"); + REQUIRE(H5Fget_obj_count(H5F_OBJ_ALL, H5F_OBJ_ALL) == 0); } \ No newline at end of file -- 2.52.0 From cf2ca90fb74c5ea39309892b4d1082b8bc09fab8 Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Sat, 9 May 2026 18:17:11 +0200 Subject: [PATCH 016/132] jfjoch_test: Improve a bit XtalOptimizer_hexagonal test hoping for more consistent result --- tests/XtalOptimizerTest.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/XtalOptimizerTest.cpp b/tests/XtalOptimizerTest.cpp index f968b150..1399f0c0 100644 --- a/tests/XtalOptimizerTest.cpp +++ b/tests/XtalOptimizerTest.cpp @@ -300,13 +300,13 @@ TEST_CASE("XtalOptimizer_hexagonal") { xtal_opt.geom.BeamX_pxl(1007).BeamY_pxl(990).DetectorDistance_mm(200) .PoniRot1_rad(0.01).PoniRot2_rad(0.02); xtal_opt.crystal_system = gemmi::CrystalSystem::Hexagonal; - xtal_opt.max_time = 30.0; + xtal_opt.max_time = 60.0; auto start = std::chrono::high_resolution_clock::now(); - REQUIRE(XtalOptimizer(xtal_opt, spots)); + bool ret = XtalOptimizer(xtal_opt, spots); auto end = std::chrono::high_resolution_clock::now(); std::cout << "XtalOptimizer took " << std::chrono::duration_cast(end - start).count() << " microseconds" << std::endl; - + REQUIRE(ret); auto uc_i = latt_i.GetUnitCell(); auto uc_o = xtal_opt.latt.GetUnitCell(); -- 2.52.0 From cc42ae6bf66183a7a851dd00f66484df31830769 Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Sat, 9 May 2026 20:43:39 +0200 Subject: [PATCH 017/132] jfjoch_process: Fix bounds for VDS mapping --- tools/jfjoch_process.cpp | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/tools/jfjoch_process.cpp b/tools/jfjoch_process.cpp index ee829196..21f88512 100644 --- a/tools/jfjoch_process.cpp +++ b/tools/jfjoch_process.cpp @@ -395,7 +395,6 @@ int main(int argc, char **argv) { std::atomic processed_count = 0; std::atomic total_uncompressed_bytes = 0; - std::atomic max_image_number_sent = 0; start_message.file_format = FileWriterFormat::NXmxIntegrated; start_message.write_master_file = true; @@ -449,7 +448,8 @@ int main(int argc, char **argv) { DataMessage msg{}; msg.image = img->image; - msg.number = image_idx; + msg.number = current_idx_offset; + msg.original_number = image_idx; msg.image_collection_efficiency = dataset->efficiency[image_idx]; total_uncompressed_bytes += msg.image.GetUncompressedSize(); @@ -477,13 +477,6 @@ int main(int argc, char **argv) { if (writer) writer->Write(msg); - // Update max sent tracking - uint64_t current_max = max_image_number_sent.load(); - while (static_cast(msg.number) > current_max) { - if (max_image_number_sent.compare_exchange_weak(current_max, static_cast(msg.number))) - break; - } - finished_count.fetch_add(1); // Progress log @@ -529,7 +522,7 @@ int main(int argc, char **argv) { // 5. Finalize Statistics and Write EndMessage EndMessage end_msg; - end_msg.max_image_number = max_image_number_sent; + end_msg.max_image_number = images_to_process; end_msg.images_collected_count = images_to_process; end_msg.images_sent_to_write_count = images_to_process; end_msg.end_date = time_UTC(std::chrono::system_clock::now()); -- 2.52.0 From e392a3ae1bf4315207cd4c1b51b24781368616b8 Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Sun, 10 May 2026 10:34:24 +0200 Subject: [PATCH 018/132] ScaleAndMerge: Simplify merge and go back to "standard" weights --- image_analysis/scale_merge/ScaleAndMerge.cpp | 100 ++++++++----------- 1 file changed, 41 insertions(+), 59 deletions(-) diff --git a/image_analysis/scale_merge/ScaleAndMerge.cpp b/image_analysis/scale_merge/ScaleAndMerge.cpp index d5baccf0..6b60439b 100644 --- a/image_analysis/scale_merge/ScaleAndMerge.cpp +++ b/image_analysis/scale_merge/ScaleAndMerge.cpp @@ -48,14 +48,6 @@ namespace { } }; - inline int RoundImageId(float image_number, double rounding_step) { - if (!(rounding_step > 0.0)) - rounding_step = 1.0; - const double x = static_cast(image_number) / rounding_step; - const double r = std::round(x) * rounding_step; - return static_cast(std::llround(r / rounding_step)); - } - inline double SafeSigma(double s, double min_sigma) { if (!std::isfinite(s) || s <= 0.0) return min_sigma; @@ -235,13 +227,7 @@ namespace { int img_id = 0; int hkl_slot = -1; double sigma = 0.0; - }; - - struct CorrectedObs { - int hkl_slot; - double I_corr; - double sigma_corr; - double weight; + double correction = NAN; }; void scale(const ScaleMergeOptions &opt, @@ -403,35 +389,39 @@ namespace { std::cout << summary.FullReport() << std::endl; } - void merge(size_t nhkl, ScaleMergeResult &out, const std::vector &corr_obs) { - // ---- Merge (XDS/XSCALE style: inverse-variance weighted mean) ---- + void merge(size_t nhkl, ScaleMergeResult &out, const std::vector &obs) { + // ---- Classical crystallographic merging: inverse-variance weighted mean ---- + // For each observation: I_corr = I_obs / correction + // sigma_corr = sigma_obs / correction + // w = 1 / sigma_corr^2 = correction^2 / sigma_obs^2 struct HKLAccum { - double sum_wI = 0.0; - double sum_w = 0.0; - double sum_wsigma2 = 0.0; + double sum_wI = 0.0; // sum of w * I_corr + double sum_w = 0.0; // sum of w }; std::vector accum(nhkl); - for (const auto &co: corr_obs) { - const double w = co.weight / (co.sigma_corr * co.sigma_corr); - auto &a = accum[co.hkl_slot]; - a.sum_wI += w * co.I_corr; - a.sum_w += w; - a.sum_wsigma2 += w * w * co.sigma_corr * co.sigma_corr; + for (const auto &o : obs) { + if (o.correction <= 0.0 || o.r == nullptr) + continue; + const double I_corr = static_cast(o.r->I) * o.correction; + const double sigma_corr = o.sigma * o.correction; + const double w = 1.0 / (sigma_corr * sigma_corr); + auto &a = accum[o.hkl_slot]; + a.sum_wI += w * I_corr; + a.sum_w += w; } for (int h = 0; h < nhkl; ++h) { const auto &a = accum[h]; if (a.sum_w <= 0.0) continue; - - out.merged[h].I = a.sum_wI / a.sum_w; - out.merged[h].sigma = std::sqrt(a.sum_wsigma2) / a.sum_w; + out.merged[h].I = a.sum_wI / a.sum_w; + out.merged[h].sigma = 1.0 / std::sqrt(a.sum_w); // propagated sigma of the weighted mean } } - void stats(const ScaleMergeOptions &opt, size_t nhkl, ScaleMergeResult &out, const std::vector &corr_obs) + void stats(const ScaleMergeOptions &opt, size_t nhkl, ScaleMergeResult &out, const std::vector &obs) // ---- Compute per-shell merging statistics ---- { constexpr int kStatShells = 10; @@ -477,13 +467,16 @@ namespace { }; std::vector per_hkl(nhkl); - for (const auto &co: corr_obs) { - if (hkl_shell[co.hkl_slot] < 0) + for (const auto &o: obs) { + if (o.correction <= 0.0) continue; - auto &hs = per_hkl[co.hkl_slot]; - hs.sum_I += co.I_corr; + if (hkl_shell[o.hkl_slot] < 0) + continue; + const double I_corr = static_cast(o.r->I) * o.correction; + auto &hs = per_hkl[o.hkl_slot]; + hs.sum_I += I_corr; hs.n += 1; - hs.I_list.push_back(co.I_corr); + hs.I_list.push_back(I_corr); } // Accumulators per shell @@ -592,31 +585,28 @@ namespace { std::vector &g, std::vector &mosaicity, std::vector &R_sq, - const std::vector &obs, - std::vector &corr_obs) { + std::vector &obs) { // ---- Compute corrected observations once (used for both merging and statistics) ---- const double half_wedge = opt.wedge_deg.value_or(0.0) / 2.0; - for (const auto &o: obs) { + for (auto &o: obs) { const Reflection &r = *o.r; - const double lp = SafeInv(static_cast(r.rlp), 1.0); + const double lp = SafeInv(static_cast(r.rlp), 1.0); const double G_i = g[o.img_id]; - // Compute partiality with refined mosaicity double partiality = 1.0; - switch (opt.partiality_model) { case ScaleMergeOptions::PartialityModel::Fixed: partiality = r.partiality; break; case ScaleMergeOptions::PartialityModel::Rotation: { - const double c1 = r.zeta / std::sqrt(2.0); - const double arg_plus = (r.delta_phi_deg + half_wedge) * c1 / mosaicity[o.img_id]; + const double c1 = r.zeta / std::sqrt(2.0); + const double arg_plus = (r.delta_phi_deg + half_wedge) * c1 / mosaicity[o.img_id]; const double arg_minus = (r.delta_phi_deg - half_wedge) * c1 / mosaicity[o.img_id]; partiality = (std::erf(arg_plus) - std::erf(arg_minus)) / 2.0; - } break; + } case ScaleMergeOptions::PartialityModel::Still: partiality = std::exp(-r.dist_ewald * r.dist_ewald / R_sq[o.img_id]); break; @@ -624,19 +614,13 @@ namespace { break; } - if (partiality <= opt.min_partiality_for_merge) + if (partiality <= opt.min_partiality_for_merge) { + o.correction = 0.0; // mark as excluded continue; + } const double correction = G_i * partiality * lp; - if (correction <= 0.0) - continue; - - corr_obs.push_back({ - o.hkl_slot, - static_cast(r.I) / correction, - o.sigma / correction, - 1 / correction - }); + o.correction = (correction > 0.0) ? 1.0 / correction : 0.0; } } @@ -705,10 +689,8 @@ ScaleMergeResult ScaleAndMergeReflectionsCeres(const std::vector obs; - std::vector corr_obs; obs.reserve(nrefl); - corr_obs.reserve(nrefl); std::unordered_map hklToSlot; hklToSlot.reserve(nrefl); @@ -781,9 +763,9 @@ ScaleMergeResult ScaleAndMergeReflectionsCeres(const std::vector Date: Sun, 10 May 2026 10:42:56 +0200 Subject: [PATCH 019/132] ScaleAndMerge: Add "just" merge function for now --- image_analysis/scale_merge/ScaleAndMerge.cpp | 103 +++++++++++++++++-- image_analysis/scale_merge/ScaleAndMerge.h | 7 +- 2 files changed, 103 insertions(+), 7 deletions(-) diff --git a/image_analysis/scale_merge/ScaleAndMerge.cpp b/image_analysis/scale_merge/ScaleAndMerge.cpp index 6b60439b..d2027c8c 100644 --- a/image_analysis/scale_merge/ScaleAndMerge.cpp +++ b/image_analysis/scale_merge/ScaleAndMerge.cpp @@ -390,11 +390,9 @@ namespace { } void merge(size_t nhkl, ScaleMergeResult &out, const std::vector &obs) { - // ---- Classical crystallographic merging: inverse-variance weighted mean ---- - // For each observation: I_corr = I_obs / correction - // sigma_corr = sigma_obs / correction - // w = 1 / sigma_corr^2 = correction^2 / sigma_obs^2 - + // Merging + // For weighting, we are extra multiplying weight by total correction value to down-weight reflections + // which come from very weak images and/or low partiality struct HKLAccum { double sum_wI = 0.0; // sum of w * I_corr double sum_w = 0.0; // sum of w @@ -406,7 +404,7 @@ namespace { continue; const double I_corr = static_cast(o.r->I) * o.correction; const double sigma_corr = o.sigma * o.correction; - const double w = 1.0 / (sigma_corr * sigma_corr); + const double w = o.correction / (sigma_corr * sigma_corr); auto &a = accum[o.hkl_slot]; a.sum_wI += w * I_corr; a.sum_w += w; @@ -769,3 +767,96 @@ ScaleMergeResult ScaleAndMergeReflectionsCeres(const std::vector> &observations, + const ScaleMergeOptions &opt) { + size_t nrefl = 0; + for (const auto &i: observations) + nrefl += i.size(); + + std::vector obs; + obs.reserve(nrefl); + + std::unordered_map hklToSlot; + hklToSlot.reserve(nrefl); + + for (int i = 0; i < (int)observations.size(); i++) { + for (const auto &r: observations[i]) { + if (!std::isfinite(r.I) || !std::isfinite(r.d) || r.d <= 0.0f) + continue; + if (opt.d_min_limit_A > 0.0 && r.d < opt.d_min_limit_A) + continue; + if (!std::isfinite(r.rlp) || r.rlp == 0.0f) + continue; + if (r.partiality <= opt.min_partiality_for_merge) + continue; + + // correction stored as 1 / (G * partiality * LP), with G = 1 + const double lp = SafeInv(static_cast(r.rlp), 1.0); + const double correction = SafeInv(r.partiality * lp, 0.0); + if (correction <= 0.0) + continue; + + int hkl_slot; + try { + const HKLKey key = CanonicalizeHKLKey(r, opt); + auto it = hklToSlot.find(key); + if (it == hklToSlot.end()) { + hkl_slot = static_cast(hklToSlot.size()); + hklToSlot.emplace(key, hkl_slot); + } else { + hkl_slot = it->second; + } + } catch (...) { + continue; + } + + ObsRef o; + o.r = &r; + o.img_id = i; + o.hkl_slot = hkl_slot; + o.sigma = SafeSigma(r.sigma, opt.min_sigma); + o.correction = correction; + obs.push_back(o); + } + } + + const int nhkl = static_cast(hklToSlot.size()); + + std::vector slotToHKL(nhkl); + for (const auto &kv: hklToSlot) + slotToHKL[kv.second] = kv.first; + + ScaleMergeResult out; + out.merged.resize(nhkl); + for (int h = 0; h < nhkl; ++h) { + out.merged[h].h = slotToHKL[h].h; + out.merged[h].k = slotToHKL[h].k; + out.merged[h].l = slotToHKL[h].l; + out.merged[h].I = 0.0; + out.merged[h].sigma = 0.0; + out.merged[h].d = 0.0; + } + + // Populate d from median of observations per HKL + { + std::vector> per_hkl_d(nhkl); + for (const auto &o: obs) { + const double d_val = static_cast(o.r->d); + if (std::isfinite(d_val) && d_val > 0.0) + per_hkl_d[o.hkl_slot].push_back(d_val); + } + for (int h = 0; h < nhkl; ++h) { + auto &v = per_hkl_d[h]; + if (!v.empty()) { + std::nth_element(v.begin(), v.begin() + (long)(v.size() / 2), v.end()); + out.merged[h].d = v[v.size() / 2]; + } + } + } + + merge(nhkl, out, obs); + stats(opt, nhkl, out, obs); + + return out; +} diff --git a/image_analysis/scale_merge/ScaleAndMerge.h b/image_analysis/scale_merge/ScaleAndMerge.h index d154e941..e92b7e69 100644 --- a/image_analysis/scale_merge/ScaleAndMerge.h +++ b/image_analysis/scale_merge/ScaleAndMerge.h @@ -91,4 +91,9 @@ struct ScaleMergeResult { }; ScaleMergeResult ScaleAndMergeReflectionsCeres(const std::vector>& observations, - const ScaleMergeOptions& opt = {}); \ No newline at end of file + const ScaleMergeOptions& opt = {}); + +/// Merge reflections without any scaling (G = 1, partiality and LP taken as-is from the Reflection). +/// Uses the same HKL canonicalization and statistics as ScaleAndMergeReflectionsCeres. +ScaleMergeResult MergeReflections(const std::vector>& observations, + const ScaleMergeOptions& opt = {}); \ No newline at end of file -- 2.52.0 From 81af116b4de829416c9d40cb4f52ab76a869c081 Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Sun, 10 May 2026 10:51:01 +0200 Subject: [PATCH 020/132] ScaleAndMerge: Minor clean-up --- image_analysis/scale_merge/ScaleAndMerge.cpp | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/image_analysis/scale_merge/ScaleAndMerge.cpp b/image_analysis/scale_merge/ScaleAndMerge.cpp index d2027c8c..aba8d3ee 100644 --- a/image_analysis/scale_merge/ScaleAndMerge.cpp +++ b/image_analysis/scale_merge/ScaleAndMerge.cpp @@ -20,9 +20,9 @@ namespace { struct HKLKey { - int64_t h = 0; - int64_t k = 0; - int64_t l = 0; + int32_t h = 0; + int32_t k = 0; + int32_t l = 0; bool is_positive = true; // only relevant if opt.merge_friedel == false bool operator==(const HKLKey &o) const noexcept { @@ -60,12 +60,6 @@ namespace { return d; } - inline int SafeToInt(int64_t x) { - if (x < std::numeric_limits::min() || x > std::numeric_limits::max()) - throw std::out_of_range("HKL index out of int range for Gemmi"); - return static_cast(x); - } - inline double SafeInv(double x, double fallback) { if (!std::isfinite(x) || x == 0.0) return fallback; @@ -97,7 +91,7 @@ namespace { const gemmi::GroupOps gops = sg.operations(); const gemmi::ReciprocalAsu rasu(&sg); - const gemmi::Op::Miller in{{SafeToInt(r.h), SafeToInt(r.k), SafeToInt(r.l)}}; + const gemmi::Op::Miller in{r.h, r.k, r.l}; const auto [asu_hkl, sign_plus] = rasu.to_asu_sign(in, gops); key.h = asu_hkl[0]; -- 2.52.0 From a90ec13418b607dba965196d8f642ec92ae79648 Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Sun, 10 May 2026 13:39:32 +0200 Subject: [PATCH 021/132] Split scaling and merge into separate files (WIP) --- image_analysis/scale_merge/CMakeLists.txt | 10 +- image_analysis/scale_merge/HKLKey.cpp | 46 + image_analysis/scale_merge/HKLKey.h | 21 + image_analysis/scale_merge/Merge.cpp | 340 ++++++++ image_analysis/scale_merge/Merge.h | 88 ++ image_analysis/scale_merge/ScaleAll.cpp | 461 ++++++++++ image_analysis/scale_merge/ScaleAll.h | 9 + image_analysis/scale_merge/ScaleAndMerge.cpp | 856 ------------------- image_analysis/scale_merge/ScaleAndMerge.h | 99 --- image_analysis/scale_merge/ScaleOnTheFly.cpp | 5 + image_analysis/scale_merge/ScaleOnTheFly.h | 10 + 11 files changed, 988 insertions(+), 957 deletions(-) create mode 100644 image_analysis/scale_merge/HKLKey.cpp create mode 100644 image_analysis/scale_merge/HKLKey.h create mode 100644 image_analysis/scale_merge/Merge.cpp create mode 100644 image_analysis/scale_merge/Merge.h create mode 100644 image_analysis/scale_merge/ScaleAll.cpp create mode 100644 image_analysis/scale_merge/ScaleAll.h delete mode 100644 image_analysis/scale_merge/ScaleAndMerge.cpp delete mode 100644 image_analysis/scale_merge/ScaleAndMerge.h create mode 100644 image_analysis/scale_merge/ScaleOnTheFly.cpp create mode 100644 image_analysis/scale_merge/ScaleOnTheFly.h diff --git a/image_analysis/scale_merge/CMakeLists.txt b/image_analysis/scale_merge/CMakeLists.txt index b1ab0985..3f06312e 100644 --- a/image_analysis/scale_merge/CMakeLists.txt +++ b/image_analysis/scale_merge/CMakeLists.txt @@ -1,4 +1,10 @@ -ADD_LIBRARY(JFJochScaleMerge ScaleAndMerge.cpp ScaleAndMerge.h FrenchWilson.cpp FrenchWilson.h +ADD_LIBRARY(JFJochScaleMerge ScaleAll.cpp ScaleAll.h FrenchWilson.cpp FrenchWilson.h SearchSpaceGroup.cpp - SearchSpaceGroup.h) + SearchSpaceGroup.h + Merge.cpp + Merge.h + ScaleOnTheFly.cpp + ScaleOnTheFly.h + HKLKey.cpp + HKLKey.h) TARGET_LINK_LIBRARIES(JFJochScaleMerge Ceres::ceres Eigen3::Eigen JFJochCommon) \ No newline at end of file diff --git a/image_analysis/scale_merge/HKLKey.cpp b/image_analysis/scale_merge/HKLKey.cpp new file mode 100644 index 00000000..1cbb3852 --- /dev/null +++ b/image_analysis/scale_merge/HKLKey.cpp @@ -0,0 +1,46 @@ +// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute +// SPDX-License-Identifier: GPL-3.0-only + +#include + +#include "HKLKey.h" + +HKLKey CanonicalHKL(const Reflection &r, bool merge_friedel, const std::optional &sg) { + HKLKey key{r.h, r.k, r.l, true}; + + if (!sg.has_value()) { + if (!merge_friedel) { + const HKLKey neg{-r.h, -r.k, -r.l, true}; + if (std::tie(key.h, key.k, key.l) < std::tie(neg.h, neg.k, neg.l)) { + key.h = -key.h; + key.k = -key.k; + key.l = -key.l; + key.plus = false; + } + } + } else { + const auto ops = sg->operations(); + const gemmi::ReciprocalAsu asu(sg.get()); + + const gemmi::Op::Miller in{r.h, r.k, r.l}; + const auto [hkl, sign_plus] = asu.to_asu_sign(in, ops); + + key.h = hkl[0]; + key.k = hkl[1]; + key.l = hkl[2]; + key.plus = merge_friedel ? true : sign_plus; + } + return key; +} + +bool AcceptReflection(const Reflection &r, double d_min_limit) { + if (!std::isfinite(r.I)) + return false; + if (!std::isfinite(r.d) || r.d <= 0.0f) + return false; + if (d_min_limit > 0.0 && r.d < d_min_limit) + return false; + if (!std::isfinite(r.rlp) || r.rlp == 0.0f) + return false; + return true; +} diff --git a/image_analysis/scale_merge/HKLKey.h b/image_analysis/scale_merge/HKLKey.h new file mode 100644 index 00000000..220bb398 --- /dev/null +++ b/image_analysis/scale_merge/HKLKey.h @@ -0,0 +1,21 @@ +// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute +// SPDX-License-Identifier: GPL-3.0-only + +#pragma once + +#include +#include "../../common/Reflection.h" + +struct HKLKey { + int h = 0; + int k = 0; + int l = 0; + bool plus = true; + + bool operator<(const HKLKey &o) const { + return std::tie(h, k, l, plus) < std::tie(o.h, o.k, o.l, o.plus); + } +}; + +HKLKey CanonicalHKL(const Reflection &r, bool merge_friedel, const std::optional &sg); +bool AcceptReflection(const Reflection &r, double d_min_limit); \ No newline at end of file diff --git a/image_analysis/scale_merge/Merge.cpp b/image_analysis/scale_merge/Merge.cpp new file mode 100644 index 00000000..ee10394b --- /dev/null +++ b/image_analysis/scale_merge/Merge.cpp @@ -0,0 +1,340 @@ +// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute +// SPDX-License-Identifier: GPL-3.0-only + +#include "Merge.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "../../common/ResolutionShells.h" +#include "HKLKey.h" + +namespace { + struct Obs { + const Reflection *r = nullptr; + int hkl = -1; + double sigma = 1.0; + double correction = 0.0; + }; + + double SafeSigma(double sigma, double min_sigma) { + if (!std::isfinite(sigma) || sigma <= 0.0) + return min_sigma; + return std::max(sigma, min_sigma); + } + + double SafeInv(double x, double fallback) { + if (!std::isfinite(x) || x == 0.0) + return fallback; + return 1.0 / x; + } + + std::vector BuildObservations(const std::vector> &observations, + const ReflectionCorrections &corrections, + const ScaleMergeOptions &opt, + std::vector &slot_to_hkl) { + std::map hkl_to_slot; + std::vector out; + + size_t nrefl = 0; + for (const auto &image: observations) + nrefl += image.size(); + out.reserve(nrefl); + + size_t correction_pos = 0; + + for (const auto &image: observations) { + for (const auto &r: image) { + const double correction = correction_pos < corrections.size() ? corrections[correction_pos] : 0.0; + ++correction_pos; + + if (correction <= 0.0 || !std::isfinite(correction)) + continue; + if (!AcceptReflection(r, opt.d_min_limit_A)) + continue; + + HKLKey key; + try { + key = CanonicalHKL(r, opt.merge_friedel, opt.space_group); + } catch (...) { + continue; + } + + auto it = hkl_to_slot.find(key); + if (it == hkl_to_slot.end()) { + const int slot = static_cast(slot_to_hkl.size()); + it = hkl_to_slot.emplace(key, slot).first; + slot_to_hkl.push_back(key); + } + + out.push_back({ + .r = &r, + .hkl = it->second, + .sigma = SafeSigma(r.sigma, opt.min_sigma), + .correction = correction + }); + } + } + + return out; + } + + ScaleMergeResult InitResult(const std::vector &slot_to_hkl, + const std::vector &obs) { + ScaleMergeResult out; + out.merged.resize(slot_to_hkl.size()); + + for (int i = 0; i < static_cast(slot_to_hkl.size()); ++i) { + out.merged[i].h = slot_to_hkl[i].h; + out.merged[i].k = slot_to_hkl[i].k; + out.merged[i].l = slot_to_hkl[i].l; + out.merged[i].I = 0.0; + out.merged[i].sigma = 0.0; + out.merged[i].d = 0.0; + } + + std::vector> d_values(slot_to_hkl.size()); + for (const auto &o: obs) { + if (std::isfinite(o.r->d) && o.r->d > 0.0f) + d_values[o.hkl].push_back(o.r->d); + } + + for (int h = 0; h < static_cast(d_values.size()); ++h) { + auto &v = d_values[h]; + if (v.empty()) + continue; + + std::nth_element(v.begin(), v.begin() + static_cast(v.size() / 2), v.end()); + out.merged[h].d = v[v.size() / 2]; + } + + return out; + } + + void Merge(size_t nhkl, ScaleMergeResult &out, const std::vector &obs) { + struct Accum { + double sum_wI = 0.0; + double sum_w = 0.0; + double sum_wsigma2 = 0.0; + }; + + std::vector acc(nhkl); + + for (const auto &o: obs) { + const double I_corr = static_cast(o.r->I) * o.correction; + const double sigma_corr = o.sigma * o.correction; + + if (!std::isfinite(I_corr) || !std::isfinite(sigma_corr) || sigma_corr <= 0.0) + continue; + + // Extra factor o.correction down-weights weak images / low partiality observations. + const double w = o.correction / (sigma_corr * sigma_corr); + + auto &a = acc[o.hkl]; + a.sum_wI += w * I_corr; + a.sum_w += w; + a.sum_wsigma2 += w * w * sigma_corr * sigma_corr; + } + + for (int h = 0; h < static_cast(nhkl); ++h) { + const auto &a = acc[h]; + if (a.sum_w <= 0.0) + continue; + + out.merged[h].I = a.sum_wI / a.sum_w; + out.merged[h].sigma = std::sqrt(a.sum_wsigma2) / a.sum_w; + } + } + + void Stats(const ScaleMergeOptions &opt, ScaleMergeResult &out, const std::vector &obs) { + constexpr int n_shells = 10; + + float d_min = std::numeric_limits::max(); + float d_max = 0.0f; + + for (const auto &m: out.merged) { + const auto d = static_cast(m.d); + if (!std::isfinite(d) || d <= 0.0f) + continue; + if (opt.d_min_limit_A > 0.0 && d < static_cast(opt.d_min_limit_A)) + continue; + + d_min = std::min(d_min, d); + d_max = std::max(d_max, d); + } + + if (!(d_min < d_max && d_min > 0.0f)) + return; + + const float d_min_pad = d_min * 0.999f; + const float d_max_pad = d_max * 1.001f; + + ResolutionShells shells(d_min_pad, d_max_pad, n_shells); + const auto shell_mean_1_d2 = shells.GetShellMeanOneOverResSq(); + const auto shell_min_res = shells.GetShellMinRes(); + + std::vector hkl_shell(out.merged.size(), -1); + for (int h = 0; h < static_cast(out.merged.size()); ++h) { + auto s = shells.GetShell(out.merged[h].d); + if (s) + hkl_shell[h] = *s; + } + + struct PerHKL { + double sum_I = 0.0; + std::vector I; + }; + + std::vector per_hkl(out.merged.size()); + + for (const auto &o: obs) { + if (o.hkl < 0 || o.hkl >= static_cast(per_hkl.size())) + continue; + if (hkl_shell[o.hkl] < 0) + continue; + + const double I_corr = static_cast(o.r->I) * o.correction; + if (!std::isfinite(I_corr)) + continue; + + per_hkl[o.hkl].sum_I += I_corr; + per_hkl[o.hkl].I.push_back(I_corr); + } + + struct ShellAccum { + int total_obs = 0; + std::unordered_set unique; + double rmeas_num = 0.0; + double rmeas_den = 0.0; + double sum_i_over_sigma = 0.0; + int n_i_over_sigma = 0; + }; + + std::vector acc(n_shells); + + for (int h = 0; h < static_cast(per_hkl.size()); ++h) { + const int s = hkl_shell[h]; + if (s < 0 || per_hkl[h].I.empty()) + continue; + + auto &sa = acc[s]; + const auto &ph = per_hkl[h]; + const int n = static_cast(ph.I.size()); + const double mean_I = ph.sum_I / n; + + sa.unique.insert(h); + sa.total_obs += n; + + if (n >= 2) { + double sum_abs_dev = 0.0; + for (double I: ph.I) + sum_abs_dev += std::abs(I - mean_I); + + sa.rmeas_num += std::sqrt(static_cast(n) / (n - 1.0)) * sum_abs_dev; + } + + for (double I: ph.I) + sa.rmeas_den += std::abs(I); + + if (out.merged[h].sigma > 0.0) { + sa.sum_i_over_sigma += out.merged[h].I / out.merged[h].sigma; + ++sa.n_i_over_sigma; + } + } + + out.statistics.shells.resize(n_shells); + + for (int s = 0; s < n_shells; ++s) { + const auto &sa = acc[s]; + auto &ss = out.statistics.shells[s]; + + ss.mean_one_over_d2 = shell_mean_1_d2[s]; + ss.d_min = shell_min_res[s]; + ss.d_max = s == 0 ? d_max_pad : shell_min_res[s - 1]; + ss.total_observations = sa.total_obs; + ss.unique_reflections = static_cast(sa.unique.size()); + ss.rmeas = sa.rmeas_den > 0.0 ? sa.rmeas_num / sa.rmeas_den : 0.0; + ss.mean_i_over_sigma = sa.n_i_over_sigma > 0 + ? sa.sum_i_over_sigma / sa.n_i_over_sigma + : 0.0; + ss.completeness = 0.0; + ss.possible_reflections = 0; + } + + auto &overall = out.statistics.overall; + overall.d_min = d_min; + overall.d_max = d_max; + + std::unordered_set all_unique; + double rmeas_num = 0.0; + double rmeas_den = 0.0; + double sum_i_over_sigma = 0.0; + int n_i_over_sigma = 0; + + for (const auto &sa: acc) { + overall.total_observations += sa.total_obs; + all_unique.insert(sa.unique.begin(), sa.unique.end()); + rmeas_num += sa.rmeas_num; + rmeas_den += sa.rmeas_den; + sum_i_over_sigma += sa.sum_i_over_sigma; + n_i_over_sigma += sa.n_i_over_sigma; + } + + overall.unique_reflections = static_cast(all_unique.size()); + overall.rmeas = rmeas_den > 0.0 ? rmeas_num / rmeas_den : 0.0; + overall.mean_i_over_sigma = n_i_over_sigma > 0 ? sum_i_over_sigma / n_i_over_sigma : 0.0; + overall.completeness = 0.0; + overall.possible_reflections = 0; + } +} + +ScaleMergeResult MergeReflections(const std::vector> &observations, + const ReflectionCorrections &corrections, + const ScaleMergeOptions &opt) { + std::vector slot_to_hkl; + auto obs = BuildObservations(observations, corrections, opt, slot_to_hkl); + + auto out = InitResult(slot_to_hkl, obs); + + Merge(slot_to_hkl.size(), out, obs); + Stats(opt, out, obs); + + return out; +} + +ScaleMergeResult MergeReflections(const std::vector> &observations, + const ScaleMergeOptions &opt) { + ReflectionCorrections corrections; + corrections.reserve([&] { + size_t n = 0; + for (const auto &image: observations) + n += image.size(); + return n; + }()); + + for (const auto &image: observations) { + for (const auto &r: image) { + if (!AcceptReflection(r, opt.d_min_limit_A)) { + corrections.push_back(0.0); + continue; + } + + if (r.partiality <= opt.min_partiality_for_merge) { + corrections.push_back(0.0); + continue; + } + + const double lp = SafeInv(r.rlp, 1.0); + const double correction = SafeInv(static_cast(r.partiality) * lp, 0.0); + + corrections.push_back(correction > 0.0 && std::isfinite(correction) ? correction : 0.0); + } + } + + return MergeReflections(observations, corrections, opt); +} \ No newline at end of file diff --git a/image_analysis/scale_merge/Merge.h b/image_analysis/scale_merge/Merge.h new file mode 100644 index 00000000..d98e570d --- /dev/null +++ b/image_analysis/scale_merge/Merge.h @@ -0,0 +1,88 @@ +// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute +// SPDX-License-Identifier: GPL-3.0-only + +#pragma once + +#include +#include +#include + +#include "../../common/Reflection.h" +#include "gemmi/symmetry.hpp" + +struct ScaleMergeOptions { + int max_num_iterations = 100; + double max_solver_time_s = 1.0; + + double image_number_rounding = 1.0; + double min_sigma = 1e-3; + + std::optional space_group; + bool merge_friedel = true; + + std::optional wedge_deg; + + double mosaicity_init_deg = 0.17; + double mosaicity_min_deg = 1e-3; + double mosaicity_max_deg = 2.0; + std::vector mosaicity_init_deg_vec; + + bool regularize_scale_to_one = true; + double scale_regularization_sigma = 0.05; + + double min_partiality_for_merge = 0.2; + bool smoothen_g = true; + bool smoothen_mos = true; + + double d_min_limit_A = 0.0; + + int64_t image_cluster = 1; + + bool refine_wedge = false; + + enum class PartialityModel { Fixed, Rotation, Unity, Still } partiality_model = PartialityModel::Fixed; +}; + +struct MergedReflection { + int h; + int k; + int l; + double I; + double sigma; + double d = 0.0; +}; + +struct MergeStatisticsShell { + float d_min = 0.0f; + float d_max = 0.0f; + float mean_one_over_d2 = 0; + int total_observations = 0; + int unique_reflections = 0; + double rmeas = 0.0; + double mean_i_over_sigma = 0.0; + double completeness = 0.0; + int possible_reflections = 0; +}; + +struct MergeStatistics { + std::vector shells; + MergeStatisticsShell overall; +}; + +struct ScaleMergeResult { + std::vector merged; + std::vector image_scale_g; + std::vector mosaicity_deg; + MergeStatistics statistics; +}; + +/// One multiplicative correction per accepted input reflection, in image-major order. +/// If correction <= 0, that reflection is skipped by merge. +using ReflectionCorrections = std::vector; + +ScaleMergeResult MergeReflections(const std::vector> &observations, + const ReflectionCorrections &corrections, + const ScaleMergeOptions &opt = {}); + +ScaleMergeResult MergeReflections(const std::vector> &observations, + const ScaleMergeOptions &opt = {}); \ No newline at end of file diff --git a/image_analysis/scale_merge/ScaleAll.cpp b/image_analysis/scale_merge/ScaleAll.cpp new file mode 100644 index 00000000..80973566 --- /dev/null +++ b/image_analysis/scale_merge/ScaleAll.cpp @@ -0,0 +1,461 @@ +// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute +// SPDX-License-Identifier: GPL-3.0-only + +#include "ScaleAll.h" + +#include + +#include +#include +#include +#include +#include +#include +#include "HKLKey.h" + +namespace { + struct ScaleObs { + const Reflection *r = nullptr; + int image = 0; + int hkl = -1; + double sigma = 1.0; + }; + + double SafeSigma(double sigma, double min_sigma) { + if (!std::isfinite(sigma) || sigma <= 0.0) + return min_sigma; + return std::max(sigma, min_sigma); + } + + double SafeInv(double x, double fallback) { + if (!std::isfinite(x) || x == 0.0) + return fallback; + return 1.0 / x; + } + + bool AcceptReflectionForScaling(const Reflection &r, const ScaleMergeOptions &opt) { + if (!AcceptReflection(r, opt.d_min_limit_A)) + return false; + + switch (opt.partiality_model) { + case ScaleMergeOptions::PartialityModel::Rotation: + return std::isfinite(r.zeta) && r.zeta > 0.0f; + + case ScaleMergeOptions::PartialityModel::Still: + return std::isfinite(r.dist_ewald); + + case ScaleMergeOptions::PartialityModel::Fixed: + case ScaleMergeOptions::PartialityModel::Unity: + return true; + } + + return true; + } + + struct IntensityFixedResidual { + IntensityFixedResidual(const Reflection &r, double sigma, double partiality) + : Iobs(r.I), + weight(SafeInv(sigma, 1.0)), + correction(partiality * SafeInv(r.rlp, 1.0)) { + } + + template + bool operator()(const T *const G, const T *const Itrue, T *residual) const { + residual[0] = (T(correction) * G[0] * Itrue[0] - T(Iobs)) * T(weight); + return true; + } + + double Iobs; + double weight; + double correction; + }; + + struct IntensityRotationResidual { + IntensityRotationResidual(const Reflection &r, double sigma) + : Iobs(r.I), + weight(SafeInv(sigma, 1.0)), + delta_phi_deg(r.delta_phi_deg), + lp(SafeInv(r.rlp, 1.0)), + c1(r.zeta / std::sqrt(2.0)) { + } + + template + bool operator()(const T *const G, + const T *const mosaicity, + const T *const Itrue, + const T *const wedge, + T *residual) const { + const T half_wedge = wedge[0] / T(2.0); + const T arg_plus = T(delta_phi_deg + half_wedge) * T(c1) / mosaicity[0]; + const T arg_minus = T(delta_phi_deg - half_wedge) * T(c1) / mosaicity[0]; + const T partiality = (ceres::erf(arg_plus) - ceres::erf(arg_minus)) / T(2.0); + + residual[0] = (G[0] * partiality * T(lp) * Itrue[0] - T(Iobs)) * T(weight); + return true; + } + + double Iobs; + double weight; + double delta_phi_deg; + double lp; + double c1; + }; + + struct IntensityStillResidual { + IntensityStillResidual(const Reflection &r, double sigma) + : Iobs(r.I), + weight(SafeInv(sigma, 1.0)), + lp(SafeInv(r.rlp, 1.0)), + dist_ewald_sq(r.dist_ewald * r.dist_ewald) { + } + + template + bool operator()(const T *const G, + const T *const R_sq, + const T *const Itrue, + T *residual) const { + const T partiality = ceres::exp(-T(dist_ewald_sq) / R_sq[0]); + residual[0] = (G[0] * partiality * T(lp) * Itrue[0] - T(Iobs)) * T(weight); + return true; + } + + double Iobs; + double weight; + double lp; + double dist_ewald_sq; + }; + + struct ScaleRegularizationResidual { + explicit ScaleRegularizationResidual(double sigma) + : inv_sigma(SafeInv(sigma, 1.0)) { + } + + template + bool operator()(const T *const k, T *residual) const { + residual[0] = (k[0] - T(1.0)) * T(inv_sigma); + return true; + } + + double inv_sigma; + }; + + struct SmoothnessResidual { + explicit SmoothnessResidual(double sigma) + : inv_sigma(SafeInv(sigma, 1.0)) { + } + + template + bool operator()(const T *const x0, + const T *const x1, + const T *const x2, + T *residual) const { + residual[0] = (ceres::log(x0[0]) + ceres::log(x2[0]) - T(2.0) * ceres::log(x1[0])) * T(inv_sigma); + return true; + } + + double inv_sigma; + }; + + std::vector BuildScaleObs(const std::vector> &observations, + const ScaleMergeOptions &opt, + std::vector &image_used, + int &nhkl) { + std::map hkl_to_slot; + std::vector obs; + + size_t nrefl = 0; + for (const auto &image: observations) + nrefl += image.size(); + obs.reserve(nrefl); + + for (int image = 0; image < static_cast(observations.size()); ++image) { + const int image_slot = image / static_cast(opt.image_cluster); + + for (const auto &r: observations[image]) { + if (!AcceptReflectionForScaling(r, opt)) + continue; + + const auto key = CanonicalHKL(r, opt.merge_friedel, opt.space_group); + auto it = hkl_to_slot.find(key); + if (it == hkl_to_slot.end()) { + const int slot = static_cast(hkl_to_slot.size()); + it = hkl_to_slot.emplace(key, slot).first; + } + + image_used[image_slot] = 1; + + obs.push_back({ + .r = &r, + .image = image_slot, + .hkl = it->second, + .sigma = SafeSigma(r.sigma, opt.min_sigma) + }); + } + } + + nhkl = static_cast(hkl_to_slot.size()); + return obs; + } + + std::vector InitialIntensities(int nhkl, + const ScaleMergeOptions &opt, + const std::vector &obs) { + std::vector> values(nhkl); + + for (const auto &o: obs) + values[o.hkl].push_back(o.r->I); + + std::vector Itrue(nhkl, opt.min_sigma); + + for (int h = 0; h < nhkl; ++h) { + auto &v = values[h]; + if (v.empty()) + continue; + + std::nth_element(v.begin(), v.begin() + static_cast(v.size() / 2), v.end()); + + Itrue[h] = v[v.size() / 2]; + if (!std::isfinite(Itrue[h]) || Itrue[h] <= opt.min_sigma) + Itrue[h] = opt.min_sigma; + } + + return Itrue; + } + + void Scale(const ScaleMergeOptions &opt, + const std::vector &obs, + const std::vector &image_used, + int nhkl, + std::vector &G, + std::vector &mosaicity, + std::vector &R_sq) { + ceres::Problem problem; + + auto Itrue = InitialIntensities(nhkl, opt, obs); + double wedge = opt.wedge_deg.value_or(0.0); + + for (const auto &o: obs) { + switch (opt.partiality_model) { + case ScaleMergeOptions::PartialityModel::Rotation: { + auto *cost = new ceres::AutoDiffCostFunction( + new IntensityRotationResidual(*o.r, o.sigma)); + problem.AddResidualBlock(cost, nullptr, &G[o.image], &mosaicity[o.image], &Itrue[o.hkl], &wedge); + break; + } + + case ScaleMergeOptions::PartialityModel::Still: { + auto *cost = new ceres::AutoDiffCostFunction( + new IntensityStillResidual(*o.r, o.sigma)); + problem.AddResidualBlock(cost, nullptr, &G[o.image], &R_sq[o.image], &Itrue[o.hkl]); + break; + } + + case ScaleMergeOptions::PartialityModel::Unity: { + auto *cost = new ceres::AutoDiffCostFunction( + new IntensityFixedResidual(*o.r, o.sigma, 1.0)); + problem.AddResidualBlock(cost, nullptr, &G[o.image], &Itrue[o.hkl]); + break; + } + + case ScaleMergeOptions::PartialityModel::Fixed: { + auto *cost = new ceres::AutoDiffCostFunction( + new IntensityFixedResidual(*o.r, o.sigma, o.r->partiality)); + problem.AddResidualBlock(cost, nullptr, &G[o.image], &Itrue[o.hkl]); + break; + } + } + } + + for (int i = 0; i < static_cast(G.size()); ++i) { + if (!image_used[i]) + continue; + + problem.SetParameterLowerBound(&G[i], 0, 1e-12); + + if (opt.regularize_scale_to_one) { + auto *cost = new ceres::AutoDiffCostFunction( + new ScaleRegularizationResidual(opt.scale_regularization_sigma)); + problem.AddResidualBlock(cost, nullptr, &G[i]); + } + } + + if (opt.smoothen_g) { + for (int i = 0; i + 2 < static_cast(G.size()); ++i) { + if (!(image_used[i] && image_used[i + 1] && image_used[i + 2])) + continue; + + auto *cost = new ceres::AutoDiffCostFunction( + new SmoothnessResidual(0.05)); + problem.AddResidualBlock(cost, nullptr, &G[i], &G[i + 1], &G[i + 2]); + } + } + + if (opt.partiality_model == ScaleMergeOptions::PartialityModel::Rotation) { + for (int i = 0; i < static_cast(mosaicity.size()); ++i) { + if (!image_used[i]) + continue; + + problem.SetParameterLowerBound(&mosaicity[i], 0, opt.mosaicity_min_deg); + problem.SetParameterUpperBound(&mosaicity[i], 0, opt.mosaicity_max_deg); + } + + if (opt.smoothen_mos) { + for (int i = 0; i + 2 < static_cast(mosaicity.size()); ++i) { + if (!(image_used[i] && image_used[i + 1] && image_used[i + 2])) + continue; + + auto *cost = new ceres::AutoDiffCostFunction( + new SmoothnessResidual(0.05)); + problem.AddResidualBlock(cost, nullptr, &mosaicity[i], &mosaicity[i + 1], &mosaicity[i + 2]); + } + } + + if (!opt.refine_wedge) + problem.SetParameterBlockConstant(&wedge); + else + problem.SetParameterLowerBound(&wedge, 0, 0.0); + } + + if (opt.partiality_model == ScaleMergeOptions::PartialityModel::Still) { + for (int i = 0; i < static_cast(R_sq.size()); ++i) { + if (!image_used[i]) + continue; + + problem.SetParameterLowerBound(&R_sq[i], 0, 1e-9); + problem.SetParameterUpperBound(&R_sq[i], 0, 1.0); + } + } + + unsigned int hw = std::thread::hardware_concurrency(); + if (hw == 0) + hw = 1; + + ceres::Solver::Options options; + options.linear_solver_type = ceres::SPARSE_NORMAL_CHOLESKY; + options.minimizer_progress_to_stdout = true; + options.max_num_iterations = opt.max_num_iterations; + options.max_solver_time_in_seconds = opt.max_solver_time_s; + options.num_threads = static_cast(hw); + options.function_tolerance = 1e-4; + + ceres::Solver::Summary summary; + ceres::Solve(options, &problem, &summary); + + std::cout << summary.FullReport() << std::endl; + } + + double Partiality(const Reflection &r, + const ScaleMergeOptions &opt, + int image_slot, + const std::vector &mosaicity, + const std::vector &R_sq) { + switch (opt.partiality_model) { + case ScaleMergeOptions::PartialityModel::Fixed: + return r.partiality; + + case ScaleMergeOptions::PartialityModel::Unity: + return 1.0; + + case ScaleMergeOptions::PartialityModel::Rotation: { + const double half_wedge = opt.wedge_deg.value_or(0.0) / 2.0; + const double c1 = r.zeta / std::sqrt(2.0); + const double arg_plus = (r.delta_phi_deg + half_wedge) * c1 / mosaicity[image_slot]; + const double arg_minus = (r.delta_phi_deg - half_wedge) * c1 / mosaicity[image_slot]; + + return (std::erf(arg_plus) - std::erf(arg_minus)) / 2.0; + } + + case ScaleMergeOptions::PartialityModel::Still: + return std::exp(-r.dist_ewald * r.dist_ewald / R_sq[image_slot]); + } + + return 1.0; + } + + ReflectionCorrections CalcCorrections(const std::vector> &observations, + const ScaleMergeOptions &opt, + const std::vector &G, + const std::vector &mosaicity, + const std::vector &R_sq) { + ReflectionCorrections corrections; + + size_t nrefl = 0; + for (const auto &image: observations) + nrefl += image.size(); + corrections.reserve(nrefl); + + for (int image = 0; image < static_cast(observations.size()); ++image) { + const int image_slot = image / static_cast(opt.image_cluster); + + for (const auto &r: observations[image]) { + if (!AcceptReflectionForScaling(r, opt)) { + corrections.push_back(0.0); + continue; + } + + const double partiality = Partiality(r, opt, image_slot, mosaicity, R_sq); + + if (partiality <= opt.min_partiality_for_merge) { + corrections.push_back(0.0); + continue; + } + + const double lp = SafeInv(r.rlp, 1.0); + const double denom = G[image_slot] * partiality * lp; + const double correction = denom > 0.0 ? 1.0 / denom : 0.0; + + corrections.push_back(std::isfinite(correction) ? correction : 0.0); + } + } + + return corrections; + } +} + +ScaleMergeResult ScaleAndMergeReflectionsCeres(const std::vector> &observations, + const ScaleMergeOptions &opt) { + if (opt.image_cluster <= 0) + throw std::invalid_argument("image_cluster must be positive"); + + const size_t n_image_slots = observations.size() / opt.image_cluster + + (observations.size() % opt.image_cluster > 0 ? 1 : 0); + + std::vector image_used(n_image_slots, 0); + + int nhkl = 0; + auto scale_obs = BuildScaleObs(observations, opt, image_used, nhkl); + + std::vector G(n_image_slots, 1.0); + std::vector mosaicity(n_image_slots, opt.mosaicity_init_deg); + std::vector R_sq(n_image_slots, 0.001 * 0.001); + + for (int i = 0; i < static_cast(n_image_slots); ++i) { + if (!image_used[i]) { + G[i] = NAN; + mosaicity[i] = NAN; + R_sq[i] = NAN; + } else if (opt.mosaicity_init_deg_vec.size() > static_cast(i) && + std::isfinite(opt.mosaicity_init_deg_vec[i])) { + mosaicity[i] = opt.mosaicity_init_deg_vec[i]; + } + } + + Scale(opt, scale_obs, image_used, nhkl, G, mosaicity, R_sq); + + auto corrections = CalcCorrections(observations, opt, G, mosaicity, R_sq); + auto out = MergeReflections(observations, corrections, opt); + + out.image_scale_g.resize(observations.size(), NAN); + out.mosaicity_deg.resize(observations.size(), NAN); + + for (int image = 0; image < static_cast(observations.size()); ++image) { + const int image_slot = image / static_cast(opt.image_cluster); + + if (image_slot < static_cast(image_used.size()) && image_used[image_slot]) { + out.image_scale_g[image] = G[image_slot]; + out.mosaicity_deg[image] = mosaicity[image_slot]; + } + } + + return out; +} \ No newline at end of file diff --git a/image_analysis/scale_merge/ScaleAll.h b/image_analysis/scale_merge/ScaleAll.h new file mode 100644 index 00000000..4df68076 --- /dev/null +++ b/image_analysis/scale_merge/ScaleAll.h @@ -0,0 +1,9 @@ +// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute +// SPDX-License-Identifier: GPL-3.0-only + +#pragma once + +#include "Merge.h" + +ScaleMergeResult ScaleAndMergeReflectionsCeres(const std::vector>& observations, + const ScaleMergeOptions& opt = {}); \ No newline at end of file diff --git a/image_analysis/scale_merge/ScaleAndMerge.cpp b/image_analysis/scale_merge/ScaleAndMerge.cpp deleted file mode 100644 index aba8d3ee..00000000 --- a/image_analysis/scale_merge/ScaleAndMerge.cpp +++ /dev/null @@ -1,856 +0,0 @@ -// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute -// SPDX-License-Identifier: GPL-3.0-only - -#include "ScaleAndMerge.h" - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "../../common/ResolutionShells.h" - -namespace { - struct HKLKey { - int32_t h = 0; - int32_t k = 0; - int32_t l = 0; - bool is_positive = true; // only relevant if opt.merge_friedel == false - - bool operator==(const HKLKey &o) const noexcept { - return h == o.h && k == o.k && l == o.l && is_positive == o.is_positive; - } - }; - - struct HKLKeyHash { - size_t operator()(const HKLKey &key) const noexcept { - auto mix = [](uint64_t x) { - x ^= x >> 33; - x *= 0xff51afd7ed558ccdULL; - x ^= x >> 33; - x *= 0xc4ceb9fe1a85ec53ULL; - x ^= x >> 33; - return x; - }; - const uint64_t a = static_cast(key.h); - const uint64_t b = static_cast(key.k); - const uint64_t c = static_cast(key.l); - const uint64_t d = static_cast(key.is_positive ? 1 : 0); - return static_cast(mix(a) ^ (mix(b) << 1) ^ (mix(c) << 2) ^ (mix(d) << 3)); - } - }; - - inline double SafeSigma(double s, double min_sigma) { - if (!std::isfinite(s) || s <= 0.0) - return min_sigma; - return std::max(s, min_sigma); - } - - inline double SafeD(double d) { - if (!std::isfinite(d) || d <= 0.0) - return std::numeric_limits::quiet_NaN(); - return d; - } - - inline double SafeInv(double x, double fallback) { - if (!std::isfinite(x) || x == 0.0) - return fallback; - return 1.0 / x; - } - - inline HKLKey CanonicalizeHKLKey(const Reflection &r, const ScaleMergeOptions &opt) { - HKLKey key{}; - key.h = r.h; - key.k = r.k; - key.l = r.l; - key.is_positive = true; - - if (!opt.space_group.has_value()) { - if (!opt.merge_friedel) { - const HKLKey neg{-r.h, -r.k, -r.l, true}; - const bool pos = std::tie(key.h, key.k, key.l) >= std::tie(neg.h, neg.k, neg.l); - if (!pos) { - key.h = -key.h; - key.k = -key.k; - key.l = -key.l; - key.is_positive = false; - } - } - return key; - } - - const gemmi::SpaceGroup &sg = *opt.space_group; - const gemmi::GroupOps gops = sg.operations(); - const gemmi::ReciprocalAsu rasu(&sg); - - const gemmi::Op::Miller in{r.h, r.k, r.l}; - const auto [asu_hkl, sign_plus] = rasu.to_asu_sign(in, gops); - - key.h = asu_hkl[0]; - key.k = asu_hkl[1]; - key.l = asu_hkl[2]; - key.is_positive = opt.merge_friedel ? true : sign_plus; - return key; - } - - struct IntensityRotResidual { - IntensityRotResidual(const Reflection &r, double sigma_obs, double wedge_deg) - : Iobs_(static_cast(r.I)), - weight_(SafeInv(sigma_obs, 1.0)), - delta_phi_(r.delta_phi_deg), - lp_(SafeInv(r.rlp, 1.0)), - c1_(r.zeta / std::sqrt(2.0)), - partiality_(r.partiality) { - } - - template - bool operator()(const T *const G, - const T *const mosaicity, - const T *const Itrue, - const T *const wedge, - T *residual) const { - T partiality; - if (mosaicity[0] >= 0.0) { - const T half_wedge = wedge[0] / T(2.0); - const T arg_plus = T(delta_phi_ + half_wedge) * T(c1_) / mosaicity[0]; - const T arg_minus = T(delta_phi_ - half_wedge) * T(c1_) / mosaicity[0]; - partiality = (ceres::erf(arg_plus) - ceres::erf(arg_minus)) / T(2.0); - } else - partiality = T(1.0); - - const T Ipred = G[0] * partiality * T(lp_) * Itrue[0]; - residual[0] = (Ipred - T(Iobs_)) * T(weight_); - return true; - } - - double Iobs_; - double weight_; - double delta_phi_; - double lp_; - double c1_; - double partiality_; - }; - - struct IntensityStillResidual { - IntensityStillResidual(const Reflection &r, double sigma_obs) - : Iobs_(static_cast(r.I)), - weight_(SafeInv(sigma_obs, 1.0)), - lp_(SafeInv(r.rlp, 1.0)), - dist_ewald_sq_(r.dist_ewald * r.dist_ewald) { - } - - template - bool operator()(const T *const G, - const T *const R, - const T *const Itrue, - T *residual) const { - const T partiality = ceres::exp(-T(dist_ewald_sq_)/R[0]); - const T Ipred = G[0] * partiality * T(lp_) * Itrue[0]; - residual[0] = (Ipred - T(Iobs_)) * T(weight_); - return true; - } - - double Iobs_; - double weight_; - double lp_; - double dist_ewald_sq_; - }; - - struct IntensityFixedResidual { - IntensityFixedResidual(const Reflection &r, double sigma_obs, double partiality) - : Iobs_(static_cast(r.I)), - weight_(SafeInv(sigma_obs, 1.0)), - corr_(partiality * SafeInv(r.rlp, 1.0)) - {} - - template - bool operator()(const T *const G, - const T *const Itrue, - T *residual) const { - const T Ipred = T(corr_) * G[0] * Itrue[0]; - residual[0] = (Ipred - T(Iobs_)) * T(weight_); - return true; - } - - double Iobs_; - double weight_; - double corr_; - }; - - struct ScaleRegularizationResidual { - explicit ScaleRegularizationResidual(double sigma_k) - : inv_sigma_(SafeInv(sigma_k, 1.0)) { - } - - template - bool operator()(const T *const k, T *residual) const { - residual[0] = (k[0] - T(1.0)) * T(inv_sigma_); - return true; - } - - double inv_sigma_; - }; - - struct SmoothnessRegularizationResidual { - explicit SmoothnessRegularizationResidual(double sigma) - : inv_sigma_(SafeInv(sigma, 1.0)) { - } - - template - bool operator()(const T *const k0, - const T *const k1, - const T *const k2, - T *residual) const { - residual[0] = (ceres::log(k0[0]) + ceres::log(k2[0]) - T(2.0) * ceres::log(k1[0])) * T(inv_sigma_); - return true; - } - - double inv_sigma_; - }; - - struct ObsRef { - const Reflection *r = nullptr; - int img_id = 0; - int hkl_slot = -1; - double sigma = 0.0; - double correction = NAN; - }; - - void scale(const ScaleMergeOptions &opt, - std::vector &g, - std::vector &mosaicity, - std::vector &R_sq, - const std::vector &image_slot_used, - bool rotation_crystallography, - size_t nhkl, - const std::vector &obs) { - ceres::Problem problem; - - std::vector Itrue(nhkl, 0.0); - - // Initialize Itrue from per-HKL median of observed intensities - { - std::vector > per_hkl_I(nhkl); - for (const auto &o: obs) { - per_hkl_I[o.hkl_slot].push_back(static_cast(o.r->I)); - } - for (int h = 0; h < nhkl; ++h) { - auto &v = per_hkl_I[h]; - if (v.empty()) { - Itrue[h] = std::max(opt.min_sigma, 1e-6); - continue; - } - std::nth_element(v.begin(), v.begin() + static_cast(v.size() / 2), v.end()); - double med = v[v.size() / 2]; - if (!std::isfinite(med) || med <= opt.min_sigma) - med = opt.min_sigma; - Itrue[h] = med; - } - } - - double wedge = opt.wedge_deg.value_or(0.0); - - std::vector is_valid_hkl_slot(nhkl, false); - for (const auto &o: obs) { - switch (opt.partiality_model) { - case ScaleMergeOptions::PartialityModel::Rotation: { - auto *cost = new ceres::AutoDiffCostFunction( - new IntensityRotResidual(*o.r, o.sigma, opt.wedge_deg.value_or(0.0))); - problem.AddResidualBlock(cost, - nullptr, - &g[o.img_id], - &mosaicity[o.img_id], - &Itrue[o.hkl_slot], - &wedge); - } - break; - case ScaleMergeOptions::PartialityModel::Still: { - auto *cost = new ceres::AutoDiffCostFunction( - new IntensityStillResidual(*o.r, o.sigma)); - problem.AddResidualBlock(cost, - nullptr, - &g[o.img_id], - &R_sq[o.img_id], - &Itrue[o.hkl_slot]); - } - break; - case ScaleMergeOptions::PartialityModel::Unity: { - auto *cost = new ceres::AutoDiffCostFunction( - new IntensityFixedResidual(*o.r, o.sigma, 1.0)); - problem.AddResidualBlock(cost, - nullptr, - &g[o.img_id], - &Itrue[o.hkl_slot]); - } - break; - case ScaleMergeOptions::PartialityModel::Fixed: { - auto *cost = new ceres::AutoDiffCostFunction( - new IntensityFixedResidual(*o.r, o.sigma, o.r->partiality)); - problem.AddResidualBlock(cost, - nullptr, - &g[o.img_id], - &Itrue[o.hkl_slot]); - } - break; - } - is_valid_hkl_slot[o.hkl_slot] = true; - } - - for (int i = 0; i < g.size(); ++i) { - if (image_slot_used[i]) { - auto *cost = new ceres::AutoDiffCostFunction( - new ScaleRegularizationResidual(0.05)); - problem.AddResidualBlock(cost, nullptr, &g[i]); - } - } - - if (rotation_crystallography) { - if (opt.smoothen_g) { - for (int i = 0; i < g.size() - 2; ++i) { - if (image_slot_used[i] && image_slot_used[i + 1] && image_slot_used[i + 2]) { - auto *cost = new ceres::AutoDiffCostFunction( - new SmoothnessRegularizationResidual(0.05)); - - problem.AddResidualBlock(cost, nullptr, &g[i], &g[i + 1], &g[i + 2]); - } - } - } - - if (opt.smoothen_mos && opt.partiality_model == ScaleMergeOptions::PartialityModel::Rotation) { - for (int i = 0; i < mosaicity.size() - 2; ++i) { - if (image_slot_used[i] && image_slot_used[i + 1] && image_slot_used[i + 2]) { - auto *cost = new ceres::AutoDiffCostFunction( - new SmoothnessRegularizationResidual(0.05)); - problem.AddResidualBlock(cost, nullptr, &mosaicity[i], &mosaicity[i + 1], &mosaicity[i + 2]); - } - } - } - } - - if (opt.partiality_model == ScaleMergeOptions::PartialityModel::Still) { - for (int i = 0; i < R_sq.size(); ++i) { - if (image_slot_used[i]) { - problem.SetParameterLowerBound(&R_sq[i], 0, 1e-9); - problem.SetParameterUpperBound(&R_sq[i], 0, 1.0); - } - } - } - - // Scaling factors must be always positive - for (int i = 0; i < g.size(); i++) { - if (image_slot_used[i]) - problem.SetParameterLowerBound(&g[i], 0, 1e-12); - } - - // Mosaicity refinement + bounds - if (opt.partiality_model == ScaleMergeOptions::PartialityModel::Rotation) { - for (int i = 0; i < mosaicity.size(); ++i) { - if (image_slot_used[i]) { - problem.SetParameterLowerBound(&mosaicity[i], 0, opt.mosaicity_min_deg); - problem.SetParameterUpperBound(&mosaicity[i], 0, opt.mosaicity_max_deg); - } - } - if (!opt.refine_wedge) - problem.SetParameterBlockConstant(&wedge); - else - problem.SetParameterLowerBound(&wedge, 0, 0.0); - } - - // use all available threads - unsigned int hw = std::thread::hardware_concurrency(); - if (hw == 0) - hw = 1; // fallback - - ceres::Solver::Options options; - - options.linear_solver_type = ceres::SPARSE_NORMAL_CHOLESKY; - options.minimizer_progress_to_stdout = true; - options.max_num_iterations = opt.max_num_iterations; - options.max_solver_time_in_seconds = opt.max_solver_time_s; - options.num_threads = static_cast(hw); - options.function_tolerance = 1e-4; - - ceres::Solver::Summary summary; - ceres::Solve(options, &problem, &summary); - std::cout << summary.FullReport() << std::endl; - } - - void merge(size_t nhkl, ScaleMergeResult &out, const std::vector &obs) { - // Merging - // For weighting, we are extra multiplying weight by total correction value to down-weight reflections - // which come from very weak images and/or low partiality - struct HKLAccum { - double sum_wI = 0.0; // sum of w * I_corr - double sum_w = 0.0; // sum of w - }; - std::vector accum(nhkl); - - for (const auto &o : obs) { - if (o.correction <= 0.0 || o.r == nullptr) - continue; - const double I_corr = static_cast(o.r->I) * o.correction; - const double sigma_corr = o.sigma * o.correction; - const double w = o.correction / (sigma_corr * sigma_corr); - auto &a = accum[o.hkl_slot]; - a.sum_wI += w * I_corr; - a.sum_w += w; - } - - for (int h = 0; h < nhkl; ++h) { - const auto &a = accum[h]; - if (a.sum_w <= 0.0) - continue; - out.merged[h].I = a.sum_wI / a.sum_w; - out.merged[h].sigma = 1.0 / std::sqrt(a.sum_w); // propagated sigma of the weighted mean - } - } - - void stats(const ScaleMergeOptions &opt, size_t nhkl, ScaleMergeResult &out, const std::vector &obs) - // ---- Compute per-shell merging statistics ---- - { - constexpr int kStatShells = 10; - - float stat_d_min = std::numeric_limits::max(); - float stat_d_max = 0.0f; - for (int h = 0; h < nhkl; ++h) { - const auto d = static_cast(out.merged[h].d); - if (std::isfinite(d) && d > 0.0f) { - if (opt.d_min_limit_A > 0.0 && d < static_cast(opt.d_min_limit_A)) - continue; - stat_d_min = std::min(stat_d_min, d); - stat_d_max = std::max(stat_d_max, d); - } - } - - if (stat_d_min < stat_d_max && stat_d_min > 0.0f) { - const float d_min_pad = stat_d_min * 0.999f; - const float d_max_pad = stat_d_max * 1.001f; - ResolutionShells stat_shells(d_min_pad, d_max_pad, kStatShells); - - const auto shell_mean_1_d2 = stat_shells.GetShellMeanOneOverResSq(); - const auto shell_min_res = stat_shells.GetShellMinRes(); - - // Assign each unique reflection to a shell - std::vector hkl_shell(nhkl, -1); - for (int h = 0; h < nhkl; ++h) { - const auto d = static_cast(out.merged[h].d); - if (std::isfinite(d) && d > 0.0f) { - if (opt.d_min_limit_A > 0.0 && d < static_cast(opt.d_min_limit_A)) - continue; - auto s = stat_shells.GetShell(d); - if (s.has_value()) - hkl_shell[h] = s.value(); - } - } - - // Per-HKL: collect corrected intensities for Rmeas - struct HKLStats { - double sum_I = 0.0; - int n = 0; - std::vector I_list; - }; - std::vector per_hkl(nhkl); - - for (const auto &o: obs) { - if (o.correction <= 0.0) - continue; - if (hkl_shell[o.hkl_slot] < 0) - continue; - const double I_corr = static_cast(o.r->I) * o.correction; - auto &hs = per_hkl[o.hkl_slot]; - hs.sum_I += I_corr; - hs.n += 1; - hs.I_list.push_back(I_corr); - } - - // Accumulators per shell - struct ShellAccum { - int total_obs = 0; - std::unordered_set unique_hkls; - double rmeas_num = 0.0; - double rmeas_den = 0.0; - double sum_i_over_sig = 0.0; - int n_merged_with_sigma = 0; - }; - std::vector shell_acc(kStatShells); - - for (int h = 0; h < nhkl; ++h) { - if (hkl_shell[h] < 0) - continue; - const int s = hkl_shell[h]; - auto &sa = shell_acc[s]; - const auto &hs = per_hkl[h]; - if (hs.n == 0) - continue; - - sa.unique_hkls.insert(h); - sa.total_obs += hs.n; - - const double mean_I = hs.sum_I / static_cast(hs.n); - - if (hs.n >= 2) { - double sum_abs_dev = 0.0; - for (double Ii: hs.I_list) - sum_abs_dev += std::abs(Ii - mean_I); - sa.rmeas_num += std::sqrt(static_cast(hs.n) / (hs.n - 1.0)) * sum_abs_dev; - } - - for (double Ii: hs.I_list) - sa.rmeas_den += std::abs(Ii); - - if (out.merged[h].sigma > 0.0) { - sa.sum_i_over_sig += out.merged[h].I / out.merged[h].sigma; - sa.n_merged_with_sigma += 1; - } - } - - // Completeness (not yet available without unit cell) - std::vector possible_per_shell(kStatShells, 0); - int total_possible = 0; - bool have_completeness = false; - - // Fill output statistics - out.statistics.shells.resize(kStatShells); - for (int s = 0; s < kStatShells; ++s) { - auto &ss = out.statistics.shells[s]; - const auto &sa = shell_acc[s]; - - ss.mean_one_over_d2 = shell_mean_1_d2[s]; - ss.d_min = shell_min_res[s]; - ss.d_max = (s == 0) ? d_max_pad : shell_min_res[s - 1]; - - ss.total_observations = sa.total_obs; - ss.unique_reflections = static_cast(sa.unique_hkls.size()); - ss.rmeas = (sa.rmeas_den > 0.0) ? (sa.rmeas_num / sa.rmeas_den) : 0.0; - ss.mean_i_over_sigma = (sa.n_merged_with_sigma > 0) - ? (sa.sum_i_over_sig / sa.n_merged_with_sigma) - : 0.0; - ss.possible_reflections = possible_per_shell[s]; - ss.completeness = (have_completeness && possible_per_shell[s] > 0) - ? static_cast(sa.unique_hkls.size()) / possible_per_shell[s] - : 0.0; - } - - // Overall statistics - { - auto &ov = out.statistics.overall; - ov.d_min = stat_d_min; - ov.d_max = stat_d_max; - ov.mean_one_over_d2 = 0.0f; - - int total_obs_all = 0; - std::unordered_set all_unique; - double rmeas_num_all = 0.0, rmeas_den_all = 0.0; - double sum_isig_all = 0.0; - int n_isig_all = 0; - - for (int s = 0; s < kStatShells; ++s) { - const auto &sa = shell_acc[s]; - total_obs_all += sa.total_obs; - all_unique.insert(sa.unique_hkls.begin(), sa.unique_hkls.end()); - rmeas_num_all += sa.rmeas_num; - rmeas_den_all += sa.rmeas_den; - sum_isig_all += sa.sum_i_over_sig; - n_isig_all += sa.n_merged_with_sigma; - } - - ov.total_observations = total_obs_all; - ov.unique_reflections = static_cast(all_unique.size()); - ov.rmeas = (rmeas_den_all > 0.0) ? (rmeas_num_all / rmeas_den_all) : 0.0; - ov.mean_i_over_sigma = (n_isig_all > 0) ? (sum_isig_all / n_isig_all) : 0.0; - ov.possible_reflections = total_possible; - ov.completeness = (have_completeness && total_possible > 0) - ? static_cast(all_unique.size()) / total_possible - : 0.0; - } - } - } - void calc_obs(const ScaleMergeOptions &opt, - std::vector &g, - std::vector &mosaicity, - std::vector &R_sq, - std::vector &obs) { - - // ---- Compute corrected observations once (used for both merging and statistics) ---- - const double half_wedge = opt.wedge_deg.value_or(0.0) / 2.0; - - for (auto &o: obs) { - const Reflection &r = *o.r; - const double lp = SafeInv(static_cast(r.rlp), 1.0); - const double G_i = g[o.img_id]; - - double partiality = 1.0; - switch (opt.partiality_model) { - case ScaleMergeOptions::PartialityModel::Fixed: - partiality = r.partiality; - break; - case ScaleMergeOptions::PartialityModel::Rotation: { - const double c1 = r.zeta / std::sqrt(2.0); - const double arg_plus = (r.delta_phi_deg + half_wedge) * c1 / mosaicity[o.img_id]; - const double arg_minus = (r.delta_phi_deg - half_wedge) * c1 / mosaicity[o.img_id]; - partiality = (std::erf(arg_plus) - std::erf(arg_minus)) / 2.0; - break; - } - case ScaleMergeOptions::PartialityModel::Still: - partiality = std::exp(-r.dist_ewald * r.dist_ewald / R_sq[o.img_id]); - break; - case ScaleMergeOptions::PartialityModel::Unity: - break; - } - - if (partiality <= opt.min_partiality_for_merge) { - o.correction = 0.0; // mark as excluded - continue; - } - - const double correction = G_i * partiality * lp; - o.correction = (correction > 0.0) ? 1.0 / correction : 0.0; - } - } - - void proc_obs(const std::vector > &observations, - const ScaleMergeOptions &opt, - std::vector &image_slot_used, - std::vector &obs, - std::unordered_map &hklToSlot - ) { - for (int i = 0; i < observations.size(); i++) { - for (const auto &r: observations[i]) { - const double d = SafeD(r.d); - if (!std::isfinite(d)) - continue; - if (!std::isfinite(r.I)) - continue; - - if (opt.d_min_limit_A > 0.0 && d < opt.d_min_limit_A) - continue; - - if (!std::isfinite(r.zeta) || r.zeta <= 0.0f) - continue; - if (!std::isfinite(r.rlp) || r.rlp == 0.0f) - continue; - - const double sigma = SafeSigma(r.sigma, opt.min_sigma); - - const int img_id = i / opt.image_cluster; - image_slot_used[img_id] = 1; - - int hkl_slot; - try { - const HKLKey key = CanonicalizeHKLKey(r, opt); - auto it = hklToSlot.find(key); - if (it == hklToSlot.end()) { - hkl_slot = static_cast(hklToSlot.size()); - hklToSlot.emplace(key, hkl_slot); - } else { - hkl_slot = it->second; - } - } catch (...) { - continue; - } - - ObsRef o; - o.r = &r; - o.img_id = img_id; - o.hkl_slot = hkl_slot; - o.sigma = sigma; - obs.push_back(o); - } - } - - } -} // namespace - -ScaleMergeResult ScaleAndMergeReflectionsCeres(const std::vector > &observations, - const ScaleMergeOptions &opt) { - if (opt.image_cluster <= 0) - throw std::invalid_argument("image_cluster must be positive"); - - const bool rotation_crystallography = opt.wedge_deg.has_value(); - - size_t nrefl = 0; - for (const auto &i: observations) - nrefl += i.size(); - - std::vector obs; - - obs.reserve(nrefl); - - std::unordered_map hklToSlot; - hklToSlot.reserve(nrefl); - - size_t n_image_slots = observations.size() / opt.image_cluster + - (observations.size() % opt.image_cluster > 0 ? 1 : 0); - - std::vector image_slot_used(n_image_slots, 0); - - proc_obs(observations, opt, image_slot_used, obs, hklToSlot); - - const int nhkl = static_cast(hklToSlot.size()); - - std::vector g(n_image_slots, 1.0); - std::vector mosaicity(n_image_slots, opt.mosaicity_init_deg); - std::vector R_sq(n_image_slots, 0.001 * 0.001); - - for (int i = 0; i < n_image_slots; i++) { - if (!image_slot_used[i]) { - mosaicity[i] = NAN; - g[i] = NAN; - R_sq[i] = NAN; - } else if (opt.mosaicity_init_deg_vec.size() > i && std::isfinite(opt.mosaicity_init_deg_vec[i])) { - mosaicity[i] = opt.mosaicity_init_deg_vec[i]; - } - } - - scale(opt, g, mosaicity, R_sq, image_slot_used, rotation_crystallography, nhkl, obs); - - ScaleMergeResult out; - - out.image_scale_g.resize(observations.size(), NAN); - out.mosaicity_deg.resize(observations.size(), NAN); - for (int i = 0; i < observations.size(); i++) { - size_t img_slot = i / opt.image_cluster; - if (image_slot_used[img_slot]) { - out.image_scale_g[i] = g[img_slot]; - out.mosaicity_deg[i] = mosaicity[img_slot]; - } - } - - std::vector slotToHKL(nhkl); - for (const auto &kv: hklToSlot) - slotToHKL[kv.second] = kv.first; - - out.merged.resize(nhkl); - for (int h = 0; h < nhkl; ++h) { - out.merged[h].h = slotToHKL[h].h; - out.merged[h].k = slotToHKL[h].k; - out.merged[h].l = slotToHKL[h].l; - out.merged[h].I = 0.0; - out.merged[h].sigma = 0.0; - out.merged[h].d = 0.0; - } - - // Populate d from median of observations per HKL - { - std::vector > per_hkl_d(nhkl); - for (const auto &o: obs) { - const double d_val = static_cast(o.r->d); - if (std::isfinite(d_val) && d_val > 0.0) - per_hkl_d[o.hkl_slot].push_back(d_val); - } - for (int h = 0; h < nhkl; ++h) { - auto &v = per_hkl_d[h]; - if (!v.empty()) { - std::nth_element(v.begin(), v.begin() + static_cast(v.size() / 2), v.end()); - out.merged[h].d = v[v.size() / 2]; - } - } - } - - calc_obs(opt, g, mosaicity, R_sq, obs); - merge(nhkl, out, obs); - stats(opt, nhkl, out, obs); - - return out; -} - -ScaleMergeResult MergeReflections(const std::vector> &observations, - const ScaleMergeOptions &opt) { - size_t nrefl = 0; - for (const auto &i: observations) - nrefl += i.size(); - - std::vector obs; - obs.reserve(nrefl); - - std::unordered_map hklToSlot; - hklToSlot.reserve(nrefl); - - for (int i = 0; i < (int)observations.size(); i++) { - for (const auto &r: observations[i]) { - if (!std::isfinite(r.I) || !std::isfinite(r.d) || r.d <= 0.0f) - continue; - if (opt.d_min_limit_A > 0.0 && r.d < opt.d_min_limit_A) - continue; - if (!std::isfinite(r.rlp) || r.rlp == 0.0f) - continue; - if (r.partiality <= opt.min_partiality_for_merge) - continue; - - // correction stored as 1 / (G * partiality * LP), with G = 1 - const double lp = SafeInv(static_cast(r.rlp), 1.0); - const double correction = SafeInv(r.partiality * lp, 0.0); - if (correction <= 0.0) - continue; - - int hkl_slot; - try { - const HKLKey key = CanonicalizeHKLKey(r, opt); - auto it = hklToSlot.find(key); - if (it == hklToSlot.end()) { - hkl_slot = static_cast(hklToSlot.size()); - hklToSlot.emplace(key, hkl_slot); - } else { - hkl_slot = it->second; - } - } catch (...) { - continue; - } - - ObsRef o; - o.r = &r; - o.img_id = i; - o.hkl_slot = hkl_slot; - o.sigma = SafeSigma(r.sigma, opt.min_sigma); - o.correction = correction; - obs.push_back(o); - } - } - - const int nhkl = static_cast(hklToSlot.size()); - - std::vector slotToHKL(nhkl); - for (const auto &kv: hklToSlot) - slotToHKL[kv.second] = kv.first; - - ScaleMergeResult out; - out.merged.resize(nhkl); - for (int h = 0; h < nhkl; ++h) { - out.merged[h].h = slotToHKL[h].h; - out.merged[h].k = slotToHKL[h].k; - out.merged[h].l = slotToHKL[h].l; - out.merged[h].I = 0.0; - out.merged[h].sigma = 0.0; - out.merged[h].d = 0.0; - } - - // Populate d from median of observations per HKL - { - std::vector> per_hkl_d(nhkl); - for (const auto &o: obs) { - const double d_val = static_cast(o.r->d); - if (std::isfinite(d_val) && d_val > 0.0) - per_hkl_d[o.hkl_slot].push_back(d_val); - } - for (int h = 0; h < nhkl; ++h) { - auto &v = per_hkl_d[h]; - if (!v.empty()) { - std::nth_element(v.begin(), v.begin() + (long)(v.size() / 2), v.end()); - out.merged[h].d = v[v.size() / 2]; - } - } - } - - merge(nhkl, out, obs); - stats(opt, nhkl, out, obs); - - return out; -} diff --git a/image_analysis/scale_merge/ScaleAndMerge.h b/image_analysis/scale_merge/ScaleAndMerge.h deleted file mode 100644 index e92b7e69..00000000 --- a/image_analysis/scale_merge/ScaleAndMerge.h +++ /dev/null @@ -1,99 +0,0 @@ -// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute -// SPDX-License-Identifier: GPL-3.0-only - -#pragma once - -#include -#include -#include - -#include "../../common/Reflection.h" -#include "gemmi/symmetry.hpp" - -struct ScaleMergeOptions { - int max_num_iterations = 100; - double max_solver_time_s = 1.0; - - double image_number_rounding = 1.0; - double min_sigma = 1e-3; - - // Symmetry canonicalization of HKL prior to merging/scaling. - // If not set, the routine uses raw HKL as-is. - std::optional space_group; - - // If true, treat Friedel mates as equivalent (merge anomalous pairs). - // If false, keep them separate by including a sign flag in the HKL key. - bool merge_friedel = true; - - // --- Kabsch(XDS)-style partiality model --- - // Rotation range (wedge) used in partiality calculation. - // Set to 0 to disable partiality correction. - std::optional wedge_deg; - - // --- Mosaicity (user input in degrees; internally converted to radians) --- - double mosaicity_init_deg = 0.17; - double mosaicity_min_deg = 1e-3; - double mosaicity_max_deg = 2.0; - std::vector mosaicity_init_deg_vec; - - // --- Optional: regularize per-image scale k towards 1 (Kabsch-like) --- - bool regularize_scale_to_one = true; - double scale_regularization_sigma = 0.05; - - double min_partiality_for_merge = 0.2; - bool smoothen_g = true; - bool smoothen_mos = true; - - double d_min_limit_A = 0.0; - - int64_t image_cluster = 1; - - bool refine_wedge = false; - - enum class PartialityModel {Fixed, Rotation, Unity, Still} partiality_model = PartialityModel::Fixed; -}; - -struct MergedReflection { - int h; - int k; - int l; - double I; - double sigma; - double d = 0.0; -}; - -/// Per-resolution-shell merging statistics -struct MergeStatisticsShell { - float d_min = 0.0f; ///< High-resolution limit of this shell (Å) - float d_max = 0.0f; ///< Low-resolution limit of this shell (Å) - float mean_one_over_d2 = 0; ///< Mean 1/d² in shell - int total_observations = 0; ///< Total number of (corrected) observations - int unique_reflections = 0; ///< Number of unique HKLs with observations - double rmeas = 0.0; ///< Redundancy-independent merging R-factor - double mean_i_over_sigma = 0.0; ///< Mean I/σ(I) of the merged reflections - double completeness = 0.0; ///< Fraction of possible reflections observed (0 if unknown) - int possible_reflections = 0;///< Theoretical number of reflections in this shell (0 if unknown) -}; - -/// Overall + per-shell merging statistics -struct MergeStatistics { - std::vector shells; - MergeStatisticsShell overall; ///< Statistics over all shells combined -}; - -struct ScaleMergeResult { - std::vector merged; - std::vector image_scale_g; - std::vector mosaicity_deg; - - /// Per-shell and overall merging statistics (populated after merging) - MergeStatistics statistics; -}; - -ScaleMergeResult ScaleAndMergeReflectionsCeres(const std::vector>& observations, - const ScaleMergeOptions& opt = {}); - -/// Merge reflections without any scaling (G = 1, partiality and LP taken as-is from the Reflection). -/// Uses the same HKL canonicalization and statistics as ScaleAndMergeReflectionsCeres. -ScaleMergeResult MergeReflections(const std::vector>& observations, - const ScaleMergeOptions& opt = {}); \ No newline at end of file diff --git a/image_analysis/scale_merge/ScaleOnTheFly.cpp b/image_analysis/scale_merge/ScaleOnTheFly.cpp new file mode 100644 index 00000000..8f938faf --- /dev/null +++ b/image_analysis/scale_merge/ScaleOnTheFly.cpp @@ -0,0 +1,5 @@ +// +// Created by leonarski_f on 10.05.2026. +// + +#include "OnTheFlyScaling.h" diff --git a/image_analysis/scale_merge/ScaleOnTheFly.h b/image_analysis/scale_merge/ScaleOnTheFly.h new file mode 100644 index 00000000..a24f12d4 --- /dev/null +++ b/image_analysis/scale_merge/ScaleOnTheFly.h @@ -0,0 +1,10 @@ +// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute +// SPDX-License-Identifier: GPL-3.0-only + +#include "Merge.h" + +class OnTheFlyScaling { + + +}; + -- 2.52.0 From a441e96b50fdd3641d3d1ced942d136b290e044a Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Sun, 10 May 2026 13:44:01 +0200 Subject: [PATCH 022/132] HKLKey: Fix --- image_analysis/scale_merge/HKLKey.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/image_analysis/scale_merge/HKLKey.cpp b/image_analysis/scale_merge/HKLKey.cpp index 1cbb3852..76490f94 100644 --- a/image_analysis/scale_merge/HKLKey.cpp +++ b/image_analysis/scale_merge/HKLKey.cpp @@ -4,11 +4,12 @@ #include #include "HKLKey.h" +#include "gemmi/symmetry.hpp" -HKLKey CanonicalHKL(const Reflection &r, bool merge_friedel, const std::optional &sg) { +HKLKey CanonicalHKL(const Reflection &r, bool merge_friedel, const std::optional &in_sg) { HKLKey key{r.h, r.k, r.l, true}; - if (!sg.has_value()) { + if (!in_sg.has_value()) { if (!merge_friedel) { const HKLKey neg{-r.h, -r.k, -r.l, true}; if (std::tie(key.h, key.k, key.l) < std::tie(neg.h, neg.k, neg.l)) { @@ -19,8 +20,9 @@ HKLKey CanonicalHKL(const Reflection &r, bool merge_friedel, const std::optional } } } else { - const auto ops = sg->operations(); - const gemmi::ReciprocalAsu asu(sg.get()); + gemmi::SpaceGroup sg = in_sg.value(); + const auto ops = sg.operations(); + const gemmi::ReciprocalAsu asu(&sg); const gemmi::Op::Miller in{r.h, r.k, r.l}; const auto [hkl, sign_plus] = asu.to_asu_sign(in, ops); -- 2.52.0 From b9af590ff5aded960ed9b8bd648c49858e440a03 Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Sun, 10 May 2026 16:05:45 +0200 Subject: [PATCH 023/132] Refactor splitting scale and merge --- common/Reflection.h | 10 ++++ image_analysis/IndexAndRefine.cpp | 3 +- image_analysis/IndexAndRefine.h | 4 +- .../bragg_integration/BraggIntegrate2D.cpp | 1 + .../bragg_prediction/BraggPrediction.cpp | 3 +- .../bragg_prediction/BraggPredictionGPU.cu | 1 + .../bragg_prediction/BraggPredictionRot.cpp | 3 +- .../bragg_prediction/BraggPredictionRotGPU.cu | 1 + image_analysis/scale_merge/FrenchWilson.cpp | 2 +- image_analysis/scale_merge/FrenchWilson.h | 2 +- image_analysis/scale_merge/HKLKey.cpp | 16 ++++-- image_analysis/scale_merge/HKLKey.h | 4 ++ image_analysis/scale_merge/Merge.cpp | 56 +++---------------- image_analysis/scale_merge/Merge.h | 18 ------ image_analysis/scale_merge/ScaleAll.cpp | 42 +++++++------- image_analysis/scale_merge/ScaleAll.h | 2 +- image_analysis/scale_merge/ScaleOnTheFly.cpp | 16 ++++-- image_analysis/scale_merge/ScaleOnTheFly.h | 13 ++++- .../scale_merge/SearchSpaceGroup.cpp | 6 +- image_analysis/scale_merge/SearchSpaceGroup.h | 2 +- 20 files changed, 96 insertions(+), 109 deletions(-) diff --git a/common/Reflection.h b/common/Reflection.h index b73dd8aa..219c2763 100644 --- a/common/Reflection.h +++ b/common/Reflection.h @@ -24,7 +24,17 @@ struct Reflection { float rlp; float partiality; float zeta; + float scaling_correction; // I_true = scaling_correction * I; scaling_correction = rlp / (partiality * image_scale) bool observed = false; }; +struct MergedReflection { + int32_t h; + int32_t k; + int32_t l; + float I; + float sigma; + float d = 0.0; +}; + #endif //JFJOCH_REFLECTION_H diff --git a/image_analysis/IndexAndRefine.cpp b/image_analysis/IndexAndRefine.cpp index f12b26b8..fcb68708 100644 --- a/image_analysis/IndexAndRefine.cpp +++ b/image_analysis/IndexAndRefine.cpp @@ -9,6 +9,7 @@ #include "indexing/AnalyzeIndexing.h" #include "indexing/FFTIndexer.h" #include "lattice_search/LatticeSearch.h" +#include "scale_merge/ScaleAll.h" IndexAndRefine::IndexAndRefine(const DiffractionExperiment &x, IndexerThreadPool *indexer) : index_ice_rings(x.GetIndexingSettings().GetIndexIceRings()), @@ -275,7 +276,7 @@ std::optional IndexAndRefine::Finalize() { return {}; } -std::optional IndexAndRefine::ScaleRotationData(const ScaleMergeOptions &opts) const { +std::optional IndexAndRefine::ScaleRotationData(const ScaleMergeOptions &opts) { size_t nrefl = 0; for (const auto &i: reflections) nrefl += i.size(); diff --git a/image_analysis/IndexAndRefine.h b/image_analysis/IndexAndRefine.h index a3b1d00a..59577fc1 100644 --- a/image_analysis/IndexAndRefine.h +++ b/image_analysis/IndexAndRefine.h @@ -12,7 +12,7 @@ #include "bragg_prediction/BraggPrediction.h" #include "indexing/IndexerThreadPool.h" #include "lattice_search/LatticeSearch.h" -#include "scale_merge/ScaleAndMerge.h" +#include "scale_merge/Merge.h" #include "RotationIndexer.h" #include "RotationParameters.h" @@ -63,7 +63,7 @@ public: /// Run scale-and-merge on accumulated reflections to refine per-image /// mosaicity (and optionally B-factors / scale factors). /// Returns std::nullopt if there are too few reflections to be meaningful. - std::optional ScaleRotationData(const ScaleMergeOptions &opts = {}) const; + std::optional ScaleRotationData(const ScaleMergeOptions &opts = {}); std::optional Finalize(); }; diff --git a/image_analysis/bragg_integration/BraggIntegrate2D.cpp b/image_analysis/bragg_integration/BraggIntegrate2D.cpp index 21438d67..dba2d7c2 100644 --- a/image_analysis/bragg_integration/BraggIntegrate2D.cpp +++ b/image_analysis/bragg_integration/BraggIntegrate2D.cpp @@ -167,6 +167,7 @@ std::vector IntegrateInternal(const DiffractionExperiment &experimen if (r.observed) { if (experiment.GetPolarizationFactor()) r.rlp /= geom.CalcAzIntPolarizationCorr(r.predicted_x, r.predicted_y, experiment.GetPolarizationFactor().value()); + r.scaling_correction = r.rlp / r.partiality; r.image_number = static_cast(image_number); ret.emplace_back(r); } diff --git a/image_analysis/bragg_prediction/BraggPrediction.cpp b/image_analysis/bragg_prediction/BraggPrediction.cpp index 0e982ad6..e2f7ef9b 100644 --- a/image_analysis/bragg_prediction/BraggPrediction.cpp +++ b/image_analysis/bragg_prediction/BraggPrediction.cpp @@ -131,7 +131,8 @@ int BraggPrediction::Calc(const DiffractionExperiment &experiment, const Crystal .dist_ewald = dist_ewald_sphere, .rlp = 1.0, .partiality = 1.0, - .zeta = 1.0 + .zeta = 1.0, + .scaling_correction = 1.0 }; ++i; } diff --git a/image_analysis/bragg_prediction/BraggPredictionGPU.cu b/image_analysis/bragg_prediction/BraggPredictionGPU.cu index e561542e..650bd803 100644 --- a/image_analysis/bragg_prediction/BraggPredictionGPU.cu +++ b/image_analysis/bragg_prediction/BraggPredictionGPU.cu @@ -115,6 +115,7 @@ namespace { out.rlp = 1.0f; out.partiality = 1.0f; out.zeta = 1.0f; + out.scaling_correction = 1.0f; return true; } diff --git a/image_analysis/bragg_prediction/BraggPredictionRot.cpp b/image_analysis/bragg_prediction/BraggPredictionRot.cpp index adeb4528..51aa9676 100644 --- a/image_analysis/bragg_prediction/BraggPredictionRot.cpp +++ b/image_analysis/bragg_prediction/BraggPredictionRot.cpp @@ -141,7 +141,8 @@ int BraggPredictionRot::Calc(const DiffractionExperiment &experiment, const Crys .dist_ewald = dist_ewald_sphere, .rlp = lorentz_reciprocal, .partiality = partiality, - .zeta = zeta_abs + .zeta = zeta_abs, + .scaling_correction = lorentz_reciprocal / partiality, }; i++; } diff --git a/image_analysis/bragg_prediction/BraggPredictionRotGPU.cu b/image_analysis/bragg_prediction/BraggPredictionRotGPU.cu index 7ced01d3..0508f969 100644 --- a/image_analysis/bragg_prediction/BraggPredictionRotGPU.cu +++ b/image_analysis/bragg_prediction/BraggPredictionRotGPU.cu @@ -158,6 +158,7 @@ namespace { out[count].rlp = lorentz; out[count].partiality = partiality; out[count].zeta = zeta_abs; + out[count].scaling_correction = lorentz / partiality; count++; } return count; diff --git a/image_analysis/scale_merge/FrenchWilson.cpp b/image_analysis/scale_merge/FrenchWilson.cpp index ebcf35d9..6107d562 100644 --- a/image_analysis/scale_merge/FrenchWilson.cpp +++ b/image_analysis/scale_merge/FrenchWilson.cpp @@ -112,7 +112,7 @@ FrenchWilson(const std::vector& merged, out[i].k = merged[i].k; out[i].l = merged[i].l; out[i].sigmaI = merged[i].sigma; - const double I_pos = std::max(merged[i].I, 0.0); + const double I_pos = std::max(merged[i].I, 0.0f); out[i].I = I_pos; out[i].F = std::sqrt(I_pos); out[i].sigmaF = 0.0; diff --git a/image_analysis/scale_merge/FrenchWilson.h b/image_analysis/scale_merge/FrenchWilson.h index b727e9a0..9c868b74 100644 --- a/image_analysis/scale_merge/FrenchWilson.h +++ b/image_analysis/scale_merge/FrenchWilson.h @@ -6,7 +6,7 @@ #include #include -#include "ScaleAndMerge.h" +#include "Merge.h" /// Result of the French-Wilson procedure for a single reflection struct FrenchWilsonReflection { diff --git a/image_analysis/scale_merge/HKLKey.cpp b/image_analysis/scale_merge/HKLKey.cpp index 76490f94..f5ded9cf 100644 --- a/image_analysis/scale_merge/HKLKey.cpp +++ b/image_analysis/scale_merge/HKLKey.cpp @@ -6,12 +6,12 @@ #include "HKLKey.h" #include "gemmi/symmetry.hpp" -HKLKey CanonicalHKL(const Reflection &r, bool merge_friedel, const std::optional &in_sg) { - HKLKey key{r.h, r.k, r.l, true}; +HKLKey CanonicalHKL(int32_t h, int32_t k, int32_t l, bool merge_friedel, const std::optional &in_sg) { + HKLKey key{h, k, l, true}; if (!in_sg.has_value()) { if (!merge_friedel) { - const HKLKey neg{-r.h, -r.k, -r.l, true}; + const HKLKey neg{-h, -k, -l, true}; if (std::tie(key.h, key.k, key.l) < std::tie(neg.h, neg.k, neg.l)) { key.h = -key.h; key.k = -key.k; @@ -24,7 +24,7 @@ HKLKey CanonicalHKL(const Reflection &r, bool merge_friedel, const std::optional const auto ops = sg.operations(); const gemmi::ReciprocalAsu asu(&sg); - const gemmi::Op::Miller in{r.h, r.k, r.l}; + const gemmi::Op::Miller in{h, k, l}; const auto [hkl, sign_plus] = asu.to_asu_sign(in, ops); key.h = hkl[0]; @@ -35,6 +35,14 @@ HKLKey CanonicalHKL(const Reflection &r, bool merge_friedel, const std::optional return key; } +HKLKey CanonicalHKL(const Reflection &r, bool merge_friedel, const std::optional &sg) { + return CanonicalHKL(r.h, r.k, r.l, merge_friedel, sg); +} + +HKLKey CanonicalHKL(const MergedReflection &r, bool merge_friedel, const std::optional &sg) { + return CanonicalHKL(r.h, r.k, r.l, merge_friedel, sg); +} + bool AcceptReflection(const Reflection &r, double d_min_limit) { if (!std::isfinite(r.I)) return false; diff --git a/image_analysis/scale_merge/HKLKey.h b/image_analysis/scale_merge/HKLKey.h index 220bb398..92fc592e 100644 --- a/image_analysis/scale_merge/HKLKey.h +++ b/image_analysis/scale_merge/HKLKey.h @@ -5,6 +5,7 @@ #include #include "../../common/Reflection.h" +#include "gemmi/symmetry.hpp" struct HKLKey { int h = 0; @@ -18,4 +19,7 @@ struct HKLKey { }; HKLKey CanonicalHKL(const Reflection &r, bool merge_friedel, const std::optional &sg); +HKLKey CanonicalHKL(const MergedReflection &r, bool merge_friedel, const std::optional &sg); +HKLKey CanonicalHKL(int32_t h, int32_t k, int32_t l, bool merge_friedel, const std::optional &sg); + bool AcceptReflection(const Reflection &r, double d_min_limit); \ No newline at end of file diff --git a/image_analysis/scale_merge/Merge.cpp b/image_analysis/scale_merge/Merge.cpp index ee10394b..a34c43aa 100644 --- a/image_analysis/scale_merge/Merge.cpp +++ b/image_analysis/scale_merge/Merge.cpp @@ -19,7 +19,6 @@ namespace { const Reflection *r = nullptr; int hkl = -1; double sigma = 1.0; - double correction = 0.0; }; double SafeSigma(double sigma, double min_sigma) { @@ -35,7 +34,6 @@ namespace { } std::vector BuildObservations(const std::vector> &observations, - const ReflectionCorrections &corrections, const ScaleMergeOptions &opt, std::vector &slot_to_hkl) { std::map hkl_to_slot; @@ -46,14 +44,10 @@ namespace { nrefl += image.size(); out.reserve(nrefl); - size_t correction_pos = 0; - for (const auto &image: observations) { for (const auto &r: image) { - const double correction = correction_pos < corrections.size() ? corrections[correction_pos] : 0.0; - ++correction_pos; - if (correction <= 0.0 || !std::isfinite(correction)) + if (r.scaling_correction <= 0.0 || !std::isfinite(r.scaling_correction)) continue; if (!AcceptReflection(r, opt.d_min_limit_A)) continue; @@ -75,8 +69,7 @@ namespace { out.push_back({ .r = &r, .hkl = it->second, - .sigma = SafeSigma(r.sigma, opt.min_sigma), - .correction = correction + .sigma = SafeSigma(r.sigma, opt.min_sigma) }); } } @@ -126,14 +119,14 @@ namespace { std::vector acc(nhkl); for (const auto &o: obs) { - const double I_corr = static_cast(o.r->I) * o.correction; - const double sigma_corr = o.sigma * o.correction; + const double I_corr = static_cast(o.r->I) * o.r->scaling_correction; + const double sigma_corr = o.sigma * o.r->scaling_correction; if (!std::isfinite(I_corr) || !std::isfinite(sigma_corr) || sigma_corr <= 0.0) continue; - // Extra factor o.correction down-weights weak images / low partiality observations. - const double w = o.correction / (sigma_corr * sigma_corr); + // Extra factor o.r->scaling_correction down-weights weak images / low partiality observations. + const double w = o.r->scaling_correction / (sigma_corr * sigma_corr); auto &a = acc[o.hkl]; a.sum_wI += w * I_corr; @@ -198,7 +191,7 @@ namespace { if (hkl_shell[o.hkl] < 0) continue; - const double I_corr = static_cast(o.r->I) * o.correction; + const double I_corr = static_cast(o.r->I) * o.r->scaling_correction; if (!std::isfinite(I_corr)) continue; @@ -294,10 +287,9 @@ namespace { } ScaleMergeResult MergeReflections(const std::vector> &observations, - const ReflectionCorrections &corrections, const ScaleMergeOptions &opt) { std::vector slot_to_hkl; - auto obs = BuildObservations(observations, corrections, opt, slot_to_hkl); + auto obs = BuildObservations(observations, opt, slot_to_hkl); auto out = InitResult(slot_to_hkl, obs); @@ -306,35 +298,3 @@ ScaleMergeResult MergeReflections(const std::vector> &ob return out; } - -ScaleMergeResult MergeReflections(const std::vector> &observations, - const ScaleMergeOptions &opt) { - ReflectionCorrections corrections; - corrections.reserve([&] { - size_t n = 0; - for (const auto &image: observations) - n += image.size(); - return n; - }()); - - for (const auto &image: observations) { - for (const auto &r: image) { - if (!AcceptReflection(r, opt.d_min_limit_A)) { - corrections.push_back(0.0); - continue; - } - - if (r.partiality <= opt.min_partiality_for_merge) { - corrections.push_back(0.0); - continue; - } - - const double lp = SafeInv(r.rlp, 1.0); - const double correction = SafeInv(static_cast(r.partiality) * lp, 0.0); - - corrections.push_back(correction > 0.0 && std::isfinite(correction) ? correction : 0.0); - } - } - - return MergeReflections(observations, corrections, opt); -} \ No newline at end of file diff --git a/image_analysis/scale_merge/Merge.h b/image_analysis/scale_merge/Merge.h index d98e570d..6b9987d6 100644 --- a/image_analysis/scale_merge/Merge.h +++ b/image_analysis/scale_merge/Merge.h @@ -30,7 +30,6 @@ struct ScaleMergeOptions { bool regularize_scale_to_one = true; double scale_regularization_sigma = 0.05; - double min_partiality_for_merge = 0.2; bool smoothen_g = true; bool smoothen_mos = true; @@ -43,15 +42,6 @@ struct ScaleMergeOptions { enum class PartialityModel { Fixed, Rotation, Unity, Still } partiality_model = PartialityModel::Fixed; }; -struct MergedReflection { - int h; - int k; - int l; - double I; - double sigma; - double d = 0.0; -}; - struct MergeStatisticsShell { float d_min = 0.0f; float d_max = 0.0f; @@ -76,13 +66,5 @@ struct ScaleMergeResult { MergeStatistics statistics; }; -/// One multiplicative correction per accepted input reflection, in image-major order. -/// If correction <= 0, that reflection is skipped by merge. -using ReflectionCorrections = std::vector; - -ScaleMergeResult MergeReflections(const std::vector> &observations, - const ReflectionCorrections &corrections, - const ScaleMergeOptions &opt = {}); - ScaleMergeResult MergeReflections(const std::vector> &observations, const ScaleMergeOptions &opt = {}); \ No newline at end of file diff --git a/image_analysis/scale_merge/ScaleAll.cpp b/image_analysis/scale_merge/ScaleAll.cpp index 80973566..ba761e45 100644 --- a/image_analysis/scale_merge/ScaleAll.cpp +++ b/image_analysis/scale_merge/ScaleAll.cpp @@ -175,7 +175,12 @@ namespace { if (!AcceptReflectionForScaling(r, opt)) continue; - const auto key = CanonicalHKL(r, opt.merge_friedel, opt.space_group); + HKLKey key; + try { + key = CanonicalHKL(r, opt.merge_friedel, opt.space_group); + } catch (...) { + continue; + } auto it = hkl_to_slot.find(key); if (it == hkl_to_slot.end()) { const int slot = static_cast(hkl_to_slot.size()); @@ -372,47 +377,41 @@ namespace { return 1.0; } - ReflectionCorrections CalcCorrections(const std::vector> &observations, + void CalcCorrections(std::vector> &observations, const ScaleMergeOptions &opt, const std::vector &G, const std::vector &mosaicity, const std::vector &R_sq) { - ReflectionCorrections corrections; size_t nrefl = 0; for (const auto &image: observations) nrefl += image.size(); - corrections.reserve(nrefl); for (int image = 0; image < static_cast(observations.size()); ++image) { const int image_slot = image / static_cast(opt.image_cluster); - for (const auto &r: observations[image]) { + for (auto &r: observations[image]) { if (!AcceptReflectionForScaling(r, opt)) { - corrections.push_back(0.0); + r.scaling_correction = 0.0; continue; } const double partiality = Partiality(r, opt, image_slot, mosaicity, R_sq); - - if (partiality <= opt.min_partiality_for_merge) { - corrections.push_back(0.0); - continue; + if (!std::isfinite(partiality) || partiality < 0.01) { + r.partiality = 0.0; + r.scaling_correction = 0.0; + } else { + const double denom = G[image_slot] * partiality; + const double correction = denom > 0.0 ? r.rlp / denom : 0.0; + r.partiality = partiality; + r.scaling_correction = std::isfinite(correction) ? correction : 0.0; } - - const double lp = SafeInv(r.rlp, 1.0); - const double denom = G[image_slot] * partiality * lp; - const double correction = denom > 0.0 ? 1.0 / denom : 0.0; - - corrections.push_back(std::isfinite(correction) ? correction : 0.0); } } - - return corrections; } } -ScaleMergeResult ScaleAndMergeReflectionsCeres(const std::vector> &observations, +ScaleMergeResult ScaleAndMergeReflectionsCeres(std::vector> &observations, const ScaleMergeOptions &opt) { if (opt.image_cluster <= 0) throw std::invalid_argument("image_cluster must be positive"); @@ -442,8 +441,9 @@ ScaleMergeResult ScaleAndMergeReflectionsCeres(const std::vector>& observations, +ScaleMergeResult ScaleAndMergeReflectionsCeres(std::vector>& observations, const ScaleMergeOptions& opt = {}); \ No newline at end of file diff --git a/image_analysis/scale_merge/ScaleOnTheFly.cpp b/image_analysis/scale_merge/ScaleOnTheFly.cpp index 8f938faf..5bf75560 100644 --- a/image_analysis/scale_merge/ScaleOnTheFly.cpp +++ b/image_analysis/scale_merge/ScaleOnTheFly.cpp @@ -1,5 +1,13 @@ -// -// Created by leonarski_f on 10.05.2026. -// +// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute +// SPDX-License-Identifier: GPL-3.0-only -#include "OnTheFlyScaling.h" +#include "ScaleOnTheFly.h" + +ScaleOnTheFly::ScaleOnTheFly(const std::vector &ref, + std::optional sg, + bool merge_friedel) : sg(sg), merge_friedel(merge_friedel) { + for (const auto &r : ref) { + const auto key = CanonicalHKL(r, merge_friedel, sg); + reference_data[key] = r.I; + } +} diff --git a/image_analysis/scale_merge/ScaleOnTheFly.h b/image_analysis/scale_merge/ScaleOnTheFly.h index a24f12d4..21b4a2ba 100644 --- a/image_analysis/scale_merge/ScaleOnTheFly.h +++ b/image_analysis/scale_merge/ScaleOnTheFly.h @@ -1,10 +1,19 @@ // SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute // SPDX-License-Identifier: GPL-3.0-only +#pragma once + +#include "HKLKey.h" #include "Merge.h" -class OnTheFlyScaling { - +#include +class ScaleOnTheFly { + std::optional sg; + bool merge_friedel; + std::map reference_data; +public: + ScaleOnTheFly(const std::vector &ref, std::optional sg, bool merge_friedel = true); + void Scale(std::vector &reflections); }; diff --git a/image_analysis/scale_merge/SearchSpaceGroup.cpp b/image_analysis/scale_merge/SearchSpaceGroup.cpp index ebb72f98..add88fcb 100644 --- a/image_analysis/scale_merge/SearchSpaceGroup.cpp +++ b/image_analysis/scale_merge/SearchSpaceGroup.cpp @@ -244,10 +244,10 @@ namespace { const int idx = bin_index(r.d); auto& bin = bins[static_cast(idx)]; - bin.d_min_A = std::min(bin.d_min_A, r.d); - bin.d_max_A = std::max(bin.d_max_A, r.d); + bin.d_min_A = std::min(bin.d_min_A, r.d); + bin.d_max_A = std::max(bin.d_max_A, r.d); - const double i_over_sigma = std::max(0.0, r.I / r.sigma); + const double i_over_sigma = std::max(0.0, r.I / r.sigma); const gemmi::Op::Miller hkl{{r.h, r.k, r.l}}; if (gops.is_systematically_absent(hkl)) { diff --git a/image_analysis/scale_merge/SearchSpaceGroup.h b/image_analysis/scale_merge/SearchSpaceGroup.h index a66e0fef..00c56ba9 100644 --- a/image_analysis/scale_merge/SearchSpaceGroup.h +++ b/image_analysis/scale_merge/SearchSpaceGroup.h @@ -7,7 +7,7 @@ #include #include -#include "ScaleAndMerge.h" +#include "Merge.h" #include "gemmi/symmetry.hpp" struct SpaceGroupOperatorScore { -- 2.52.0 From 4b6a042e4a4e97a916cef47b1b7e2cedd8146e3b Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Sun, 10 May 2026 16:40:11 +0200 Subject: [PATCH 024/132] jfjoch_test: Fix TCPImagePusherTest to ensure it is more stable --- tests/TCPImagePusherTest.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/TCPImagePusherTest.cpp b/tests/TCPImagePusherTest.cpp index 6c675824..7b558518 100644 --- a/tests/TCPImagePusherTest.cpp +++ b/tests/TCPImagePusherTest.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-3.0-only #include +#include #include #include "../image_pusher/TCPStreamPusher.h" @@ -425,7 +426,10 @@ TEST_CASE("TCPImageCommTest_AutoPort_StarBind", "[TCP]") { TCPStreamPusher pusher("tcp://127.0.0.1:*", 1); TCPImagePuller puller(pusher.GetAddress()[0], 64 * 1024 * 1024); - std::thread receiver([&] { + std::this_thread::sleep_for(std::chrono::seconds(2)); + REQUIRE(pusher.GetConnectedWriters() == 1); + + std::future receiver = std::async(std::launch::async, [&] { bool seen_end = false; uint64_t processed = 0; @@ -474,7 +478,7 @@ TEST_CASE("TCPImageCommTest_AutoPort_StarBind", "[TCP]") { } REQUIRE(pusher.EndDataCollection(end)); - receiver.join(); + REQUIRE_NOTHROW(receiver.get()); puller.Disconnect(); } -- 2.52.0 From eea438185c84e517656b7f958d8c4c0c7850f7c8 Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Sun, 10 May 2026 16:55:21 +0200 Subject: [PATCH 025/132] jfjoch_process: Implement split scale & merge. --- image_analysis/IndexAndRefine.cpp | 18 ++++++++++++++++-- image_analysis/IndexAndRefine.h | 6 +++++- image_analysis/scale_merge/Merge.cpp | 10 +++++----- image_analysis/scale_merge/Merge.h | 6 ++---- image_analysis/scale_merge/ScaleAll.cpp | 23 +++++++++++------------ image_analysis/scale_merge/ScaleAll.h | 9 ++++++++- tools/jfjoch_process.cpp | 17 ++++++++++------- 7 files changed, 57 insertions(+), 32 deletions(-) diff --git a/image_analysis/IndexAndRefine.cpp b/image_analysis/IndexAndRefine.cpp index fcb68708..76f972d8 100644 --- a/image_analysis/IndexAndRefine.cpp +++ b/image_analysis/IndexAndRefine.cpp @@ -276,7 +276,7 @@ std::optional IndexAndRefine::Finalize() { return {}; } -std::optional IndexAndRefine::ScaleRotationData(const ScaleMergeOptions &opts) { +std::optional IndexAndRefine::ScaleAllImages(const ScaleMergeOptions &opts) { size_t nrefl = 0; for (const auto &i: reflections) nrefl += i.size(); @@ -302,5 +302,19 @@ std::optional IndexAndRefine::ScaleRotationData(const ScaleMer options.space_group = *sg; } - return ScaleAndMergeReflectionsCeres(reflections, options); + return ScaleAll(reflections, options); +} + +MergeResult IndexAndRefine::Merge(const ScaleMergeOptions &opts) { + // Build options focused on mosaicity refinement but allow caller override + ScaleMergeOptions options = opts; + + // If caller left space_group unset, try to pick it from the indexed lattice + if (!opts.space_group.has_value()) { + auto sg = experiment.GetGemmiSpaceGroup(); + if (sg) + options.space_group = *sg; + } + + return MergeReflections(reflections, options); } diff --git a/image_analysis/IndexAndRefine.h b/image_analysis/IndexAndRefine.h index 59577fc1..2a5e5b86 100644 --- a/image_analysis/IndexAndRefine.h +++ b/image_analysis/IndexAndRefine.h @@ -15,6 +15,7 @@ #include "scale_merge/Merge.h" #include "RotationIndexer.h" #include "RotationParameters.h" +#include "scale_merge/ScaleAll.h" class IndexAndRefine { const bool index_ice_rings; @@ -60,10 +61,13 @@ public: IndexAndRefine(const DiffractionExperiment &x, IndexerThreadPool *indexer); void ProcessImage(DataMessage &msg, const SpotFindingSettings &settings, const CompressedImage &image, BraggPrediction &prediction); + + std::optional ScaleAllImages(const ScaleMergeOptions &opts = {}); + /// Run scale-and-merge on accumulated reflections to refine per-image /// mosaicity (and optionally B-factors / scale factors). /// Returns std::nullopt if there are too few reflections to be meaningful. - std::optional ScaleRotationData(const ScaleMergeOptions &opts = {}); + MergeResult Merge(const ScaleMergeOptions &opts = {}); std::optional Finalize(); }; diff --git a/image_analysis/scale_merge/Merge.cpp b/image_analysis/scale_merge/Merge.cpp index a34c43aa..83b091bc 100644 --- a/image_analysis/scale_merge/Merge.cpp +++ b/image_analysis/scale_merge/Merge.cpp @@ -77,9 +77,9 @@ namespace { return out; } - ScaleMergeResult InitResult(const std::vector &slot_to_hkl, + MergeResult InitResult(const std::vector &slot_to_hkl, const std::vector &obs) { - ScaleMergeResult out; + MergeResult out; out.merged.resize(slot_to_hkl.size()); for (int i = 0; i < static_cast(slot_to_hkl.size()); ++i) { @@ -109,7 +109,7 @@ namespace { return out; } - void Merge(size_t nhkl, ScaleMergeResult &out, const std::vector &obs) { + void Merge(size_t nhkl, MergeResult &out, const std::vector &obs) { struct Accum { double sum_wI = 0.0; double sum_w = 0.0; @@ -144,7 +144,7 @@ namespace { } } - void Stats(const ScaleMergeOptions &opt, ScaleMergeResult &out, const std::vector &obs) { + void Stats(const ScaleMergeOptions &opt, MergeResult &out, const std::vector &obs) { constexpr int n_shells = 10; float d_min = std::numeric_limits::max(); @@ -286,7 +286,7 @@ namespace { } } -ScaleMergeResult MergeReflections(const std::vector> &observations, +MergeResult MergeReflections(const std::vector> &observations, const ScaleMergeOptions &opt) { std::vector slot_to_hkl; auto obs = BuildObservations(observations, opt, slot_to_hkl); diff --git a/image_analysis/scale_merge/Merge.h b/image_analysis/scale_merge/Merge.h index 6b9987d6..694e3868 100644 --- a/image_analysis/scale_merge/Merge.h +++ b/image_analysis/scale_merge/Merge.h @@ -59,12 +59,10 @@ struct MergeStatistics { MergeStatisticsShell overall; }; -struct ScaleMergeResult { +struct MergeResult { std::vector merged; - std::vector image_scale_g; - std::vector mosaicity_deg; MergeStatistics statistics; }; -ScaleMergeResult MergeReflections(const std::vector> &observations, +MergeResult MergeReflections(const std::vector> &observations, const ScaleMergeOptions &opt = {}); \ No newline at end of file diff --git a/image_analysis/scale_merge/ScaleAll.cpp b/image_analysis/scale_merge/ScaleAll.cpp index ba761e45..f83514dd 100644 --- a/image_analysis/scale_merge/ScaleAll.cpp +++ b/image_analysis/scale_merge/ScaleAll.cpp @@ -156,7 +156,7 @@ namespace { double inv_sigma; }; - std::vector BuildScaleObs(const std::vector> &observations, + std::vector BuildScaleObs(const std::vector > &observations, const ScaleMergeOptions &opt, std::vector &image_used, int &nhkl) { @@ -205,7 +205,7 @@ namespace { std::vector InitialIntensities(int nhkl, const ScaleMergeOptions &opt, const std::vector &obs) { - std::vector> values(nhkl); + std::vector > values(nhkl); for (const auto &o: obs) values[o.hkl].push_back(o.r->I); @@ -377,12 +377,11 @@ namespace { return 1.0; } - void CalcCorrections(std::vector> &observations, - const ScaleMergeOptions &opt, - const std::vector &G, - const std::vector &mosaicity, - const std::vector &R_sq) { - + void CalcCorrections(std::vector > &observations, + const ScaleMergeOptions &opt, + const std::vector &G, + const std::vector &mosaicity, + const std::vector &R_sq) { size_t nrefl = 0; for (const auto &image: observations) nrefl += image.size(); @@ -411,8 +410,8 @@ namespace { } } -ScaleMergeResult ScaleAndMergeReflectionsCeres(std::vector> &observations, - const ScaleMergeOptions &opt) { +ScaleResult ScaleAll(std::vector > &observations, + const ScaleMergeOptions &opt) { if (opt.image_cluster <= 0) throw std::invalid_argument("image_cluster must be positive"); @@ -443,7 +442,7 @@ ScaleMergeResult ScaleAndMergeReflectionsCeres(std::vector>& observations, + +struct ScaleResult { + std::vector image_scale_g; + std::vector mosaicity_deg; + std::vector image_bfactor_Ang2; +}; + +ScaleResult ScaleAll(std::vector>& observations, const ScaleMergeOptions& opt = {}); \ No newline at end of file diff --git a/tools/jfjoch_process.cpp b/tools/jfjoch_process.cpp index 21f88512..bc7bf9f9 100644 --- a/tools/jfjoch_process.cpp +++ b/tools/jfjoch_process.cpp @@ -567,7 +567,8 @@ int main(int argc, char **argv) { scale_opts.space_group = experiment.GetGemmiSpaceGroup(); auto scale_start = std::chrono::steady_clock::now(); - auto scale_result = indexer.ScaleRotationData(scale_opts); + auto scale_result = indexer.ScaleAllImages(scale_opts); + auto merge_result = indexer.Merge(scale_opts); auto scale_end = std::chrono::steady_clock::now(); double scale_time = std::chrono::duration(scale_end - scale_start).count(); @@ -585,7 +586,7 @@ int main(int argc, char **argv) { sg_opts.min_total_compared = 100; sg_opts.test_systematic_absences = true; - const auto sg_search = SearchSpaceGroup(scale_result->merged, sg_opts); + const auto sg_search = SearchSpaceGroup(merge_result.merged, sg_opts); logger.Info(""); { @@ -604,11 +605,13 @@ int main(int argc, char **argv) { scale_opts.space_group = *sg_search.best_space_group; auto rescale_start = std::chrono::steady_clock::now(); - auto refined_scale_result = indexer.ScaleRotationData(scale_opts); + auto refined_scale_result = indexer.ScaleAllImages(scale_opts); + auto refined_merge_result = indexer.Merge(scale_opts); auto rescale_end = std::chrono::steady_clock::now(); if (refined_scale_result) { scale_result = std::move(refined_scale_result); + merge_result = std::move(refined_merge_result); scale_time += std::chrono::duration(rescale_end - rescale_start).count(); } } else { @@ -620,11 +623,11 @@ int main(int argc, char **argv) { end_msg.image_scale_factor = scale_result->image_scale_g; logger.Info("Scaling completed in {:.2f} s ({} unique reflections)", - scale_time, scale_result->merged.size()); + scale_time, merge_result.merged.size()); // Print resolution-shell statistics table { - const auto &stats = scale_result->statistics; + const auto &stats = merge_result.statistics; logger.Info(""); logger.Info(" {:>8s} {:>8s} {:>8s} {:>8s} {:>8s} {:>10s}", "d_min", "N_obs", "N_uniq", "Rmeas", "", "Complete"); @@ -674,7 +677,7 @@ int main(int argc, char **argv) { fw_opts.acentric = true; // typical for MX fw_opts.num_shells = 20; - auto fw = FrenchWilson(scale_result->merged, fw_opts); + auto fw = FrenchWilson(merge_result.merged, fw_opts); { { const std::string hkl_path = output_prefix + "_amplitudes.hkl"; @@ -689,7 +692,7 @@ int main(int argc, char **argv) { << "\n"; } hkl_file.close(); - logger.Info("Wrote {} reflections to {}", scale_result->merged.size(), hkl_path); + logger.Info("Wrote {} reflections to {}", merge_result.merged.size(), hkl_path); } } MmcifMetadata cif_meta; -- 2.52.0 From 440e54d5889397be40ea5d5221afac336488326f Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Sun, 10 May 2026 17:41:12 +0200 Subject: [PATCH 026/132] ScalingResult: Dedicated data structure --- image_analysis/IndexAndRefine.cpp | 2 +- image_analysis/IndexAndRefine.h | 2 +- image_analysis/scale_merge/CMakeLists.txt | 3 +- image_analysis/scale_merge/ScaleAll.cpp | 4 +- image_analysis/scale_merge/ScaleAll.h | 10 +--- image_analysis/scale_merge/ScaleOnTheFly.cpp | 50 ++++++++++++++++++++ image_analysis/scale_merge/ScaleOnTheFly.h | 3 +- image_analysis/scale_merge/ScalingResult.h | 12 +++++ 8 files changed, 72 insertions(+), 14 deletions(-) create mode 100644 image_analysis/scale_merge/ScalingResult.h diff --git a/image_analysis/IndexAndRefine.cpp b/image_analysis/IndexAndRefine.cpp index 76f972d8..6dfbb683 100644 --- a/image_analysis/IndexAndRefine.cpp +++ b/image_analysis/IndexAndRefine.cpp @@ -276,7 +276,7 @@ std::optional IndexAndRefine::Finalize() { return {}; } -std::optional IndexAndRefine::ScaleAllImages(const ScaleMergeOptions &opts) { +std::optional IndexAndRefine::ScaleAllImages(const ScaleMergeOptions &opts) { size_t nrefl = 0; for (const auto &i: reflections) nrefl += i.size(); diff --git a/image_analysis/IndexAndRefine.h b/image_analysis/IndexAndRefine.h index 2a5e5b86..4ef24b13 100644 --- a/image_analysis/IndexAndRefine.h +++ b/image_analysis/IndexAndRefine.h @@ -62,7 +62,7 @@ public: void ProcessImage(DataMessage &msg, const SpotFindingSettings &settings, const CompressedImage &image, BraggPrediction &prediction); - std::optional ScaleAllImages(const ScaleMergeOptions &opts = {}); + std::optional ScaleAllImages(const ScaleMergeOptions &opts = {}); /// Run scale-and-merge on accumulated reflections to refine per-image /// mosaicity (and optionally B-factors / scale factors). diff --git a/image_analysis/scale_merge/CMakeLists.txt b/image_analysis/scale_merge/CMakeLists.txt index 3f06312e..2bde4a67 100644 --- a/image_analysis/scale_merge/CMakeLists.txt +++ b/image_analysis/scale_merge/CMakeLists.txt @@ -6,5 +6,6 @@ ADD_LIBRARY(JFJochScaleMerge ScaleAll.cpp ScaleAll.h FrenchWilson.cpp FrenchWils ScaleOnTheFly.cpp ScaleOnTheFly.h HKLKey.cpp - HKLKey.h) + HKLKey.h + ScalingResult.h) TARGET_LINK_LIBRARIES(JFJochScaleMerge Ceres::ceres Eigen3::Eigen JFJochCommon) \ No newline at end of file diff --git a/image_analysis/scale_merge/ScaleAll.cpp b/image_analysis/scale_merge/ScaleAll.cpp index f83514dd..310392c1 100644 --- a/image_analysis/scale_merge/ScaleAll.cpp +++ b/image_analysis/scale_merge/ScaleAll.cpp @@ -410,7 +410,7 @@ namespace { } } -ScaleResult ScaleAll(std::vector > &observations, +ScalingResult ScaleAll(std::vector > &observations, const ScaleMergeOptions &opt) { if (opt.image_cluster <= 0) throw std::invalid_argument("image_cluster must be positive"); @@ -442,7 +442,7 @@ ScaleResult ScaleAll(std::vector > &observations, CalcCorrections(observations, opt, G, mosaicity, R_sq); - ScaleResult out{}; + ScalingResult out{}; out.image_scale_g.resize(observations.size(), NAN); out.mosaicity_deg.resize(observations.size(), NAN); diff --git a/image_analysis/scale_merge/ScaleAll.h b/image_analysis/scale_merge/ScaleAll.h index f06aaa26..45766914 100644 --- a/image_analysis/scale_merge/ScaleAll.h +++ b/image_analysis/scale_merge/ScaleAll.h @@ -4,13 +4,7 @@ #pragma once #include "Merge.h" +#include "ScalingResult.h" -struct ScaleResult { - std::vector image_scale_g; - std::vector mosaicity_deg; - std::vector image_bfactor_Ang2; -}; - -ScaleResult ScaleAll(std::vector>& observations, - const ScaleMergeOptions& opt = {}); \ No newline at end of file +ScalingResult ScaleAll(std::vector>& observations, const ScaleMergeOptions& opt = {}); \ No newline at end of file diff --git a/image_analysis/scale_merge/ScaleOnTheFly.cpp b/image_analysis/scale_merge/ScaleOnTheFly.cpp index 5bf75560..f225d6cc 100644 --- a/image_analysis/scale_merge/ScaleOnTheFly.cpp +++ b/image_analysis/scale_merge/ScaleOnTheFly.cpp @@ -3,6 +3,52 @@ #include "ScaleOnTheFly.h" +#include + +double SafeInv(double x, double fallback) { + if (!std::isfinite(x) || x == 0.0) + return fallback; + return 1.0 / x; +} + +struct IntensityRotationResidual { + IntensityRotationResidual(const Reflection &r, double Itrue, double sigma) + : Iobs(r.I), + Itrue(Itrue), + weight(SafeInv(sigma, 1.0)), + delta_phi_deg(r.delta_phi_deg), + lp(SafeInv(r.rlp, 1.0)), + c1(r.zeta / std::sqrt(2.0)), + b_resolution_coeff(SafeInv(-r.d * r.d / 4.0, 0.0)) { + } + + template + bool operator()(const T *const G, + const T *const B, + const T *const mosaicity, + const T *const wedge, + T *residual) const { + const T partiality = T(1.0); + if (mosaicity > 0) { + const T half_wedge = wedge[0] / T(2.0); + const T arg_plus = T(delta_phi_deg + half_wedge) * T(c1) / mosaicity[0]; + const T arg_minus = T(delta_phi_deg - half_wedge) * T(c1) / mosaicity[0]; + partiality = (ceres::erf(arg_plus) - ceres::erf(arg_minus)) / T(2.0); + } + const T B_term = ceres::exp(T(B[0]) * b_resolution_coeff); + residual[0] = (G[0] * partiality * T(lp) * Itrue - T(Iobs)) * T(weight); + return true; + } + + double Iobs; + double Itrue; + double weight; + double delta_phi_deg; + double lp; + double c1; + double b_resolution_coeff; +}; + ScaleOnTheFly::ScaleOnTheFly(const std::vector &ref, std::optional sg, bool merge_friedel) : sg(sg), merge_friedel(merge_friedel) { @@ -11,3 +57,7 @@ ScaleOnTheFly::ScaleOnTheFly(const std::vector &ref, reference_data[key] = r.I; } } + +std::optional ScaleOnTheFly::Scale(std::vector &reflections) { + +} diff --git a/image_analysis/scale_merge/ScaleOnTheFly.h b/image_analysis/scale_merge/ScaleOnTheFly.h index 21b4a2ba..798698e0 100644 --- a/image_analysis/scale_merge/ScaleOnTheFly.h +++ b/image_analysis/scale_merge/ScaleOnTheFly.h @@ -5,6 +5,7 @@ #include "HKLKey.h" #include "Merge.h" +#include "ScalingResult.h" #include @@ -14,6 +15,6 @@ class ScaleOnTheFly { std::map reference_data; public: ScaleOnTheFly(const std::vector &ref, std::optional sg, bool merge_friedel = true); - void Scale(std::vector &reflections); + std::optional Scale(std::vector &reflections); }; diff --git a/image_analysis/scale_merge/ScalingResult.h b/image_analysis/scale_merge/ScalingResult.h new file mode 100644 index 00000000..e4701814 --- /dev/null +++ b/image_analysis/scale_merge/ScalingResult.h @@ -0,0 +1,12 @@ +// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute +// SPDX-License-Identifier: GPL-3.0-only + +#pragma once + +#include + +struct ScalingResult { + std::vector image_scale_g; + std::vector mosaicity_deg; + std::vector image_bfactor_Ang2; +}; \ No newline at end of file -- 2.52.0 From b854c0f356051b6f8ef27c12282dc8c0386923e7 Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Sun, 10 May 2026 23:03:00 +0200 Subject: [PATCH 027/132] Move to ScaleOnTheFly model --- common/CMakeLists.txt | 2 + common/DiffractionExperiment.cpp | 17 + common/DiffractionExperiment.h | 7 + common/ScalingSettings.cpp | 78 ++++ common/ScalingSettings.h | 46 ++ image_analysis/IndexAndRefine.cpp | 55 +-- image_analysis/IndexAndRefine.h | 14 +- image_analysis/scale_merge/CMakeLists.txt | 2 +- image_analysis/scale_merge/HKLKey.cpp | 12 + image_analysis/scale_merge/HKLKey.h | 1 + image_analysis/scale_merge/Merge.cpp | 38 +- image_analysis/scale_merge/Merge.h | 37 +- image_analysis/scale_merge/ScaleAll.cpp | 460 ------------------- image_analysis/scale_merge/ScaleAll.h | 10 - image_analysis/scale_merge/ScaleOnTheFly.cpp | 298 ++++++++++-- image_analysis/scale_merge/ScaleOnTheFly.h | 36 +- image_analysis/scale_merge/ScalingResult.h | 10 +- tools/jfjoch_process.cpp | 282 ++++++------ 18 files changed, 634 insertions(+), 771 deletions(-) create mode 100644 common/ScalingSettings.cpp create mode 100644 common/ScalingSettings.h delete mode 100644 image_analysis/scale_merge/ScaleAll.cpp delete mode 100644 image_analysis/scale_merge/ScaleAll.h diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index 93aa8b70..a15a652b 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -126,6 +126,8 @@ ADD_LIBRARY(JFJochCommon STATIC hkl_key.h JfjochTCP.h BrokerStatus.h + ScalingSettings.cpp + ScalingSettings.h ) TARGET_LINK_LIBRARIES(JFJochCommon JFJochLogger Compression JFCalibration gemmi Threads::Threads -lrt ) diff --git a/common/DiffractionExperiment.cpp b/common/DiffractionExperiment.cpp index e3b6cd5b..645fcec1 100644 --- a/common/DiffractionExperiment.cpp +++ b/common/DiffractionExperiment.cpp @@ -1554,6 +1554,15 @@ BraggIntegrationSettings DiffractionExperiment::GetBraggIntegrationSettings() co return bragg_integration_settings; } +DiffractionExperiment &DiffractionExperiment::ImportScalingSettings(const ScalingSettings &input) { + scaling_settings = input; + return *this; +} + +ScalingSettings DiffractionExperiment::GetScalingSettings() const { + return scaling_settings; +} + DiffractionExperiment &DiffractionExperiment::PoniRot1_rad(float input) { dataset.PoniRot1_rad(input); return *this; @@ -1663,3 +1672,11 @@ DiffractionExperiment &DiffractionExperiment::RunNumber(uint64_t input) { series_id = input; return *this; } + +PartialityModel DiffractionExperiment::GetPartialityModel() const { + if (const auto model = scaling_settings.GetPartialityModel()) + return model.value(); + if (GetGoniometer().has_value()) + return PartialityModel::Rotation; + return PartialityModel::Unity; +} \ No newline at end of file diff --git a/common/DiffractionExperiment.h b/common/DiffractionExperiment.h index e4851224..6fcd0c36 100644 --- a/common/DiffractionExperiment.h +++ b/common/DiffractionExperiment.h @@ -27,6 +27,7 @@ #include "CompressedImage.h" #include "IndexingSettings.h" #include "BraggIntegrationSettings.h" +#include "ScalingSettings.h" #include "../symmetry/gemmi/symmetry.hpp" @@ -73,6 +74,7 @@ class DiffractionExperiment { FileWriterSettings file_writer; IndexingSettings indexing; BraggIntegrationSettings bragg_integration_settings; + ScalingSettings scaling_settings; DarkMaskSettings dark_mask_settings; @@ -182,6 +184,9 @@ public: DiffractionExperiment &ImportDarkMaskSettings(const DarkMaskSettings &input); DarkMaskSettings GetDarkMaskSettings() const; + DiffractionExperiment &ImportScalingSettings(const ScalingSettings& input); + ScalingSettings GetScalingSettings() const; + DatasetSettings GetDatasetSettings() const; void FillMessage(StartMessage &message) const; @@ -407,6 +412,8 @@ public: int64_t GetDarkMaskNumberOfFrames() const; bool IsRotationIndexing() const; + + PartialityModel GetPartialityModel() const; }; #endif //DIFFRACTIONEXPERIMENT_H diff --git a/common/ScalingSettings.cpp b/common/ScalingSettings.cpp new file mode 100644 index 00000000..372d6c75 --- /dev/null +++ b/common/ScalingSettings.cpp @@ -0,0 +1,78 @@ +// SPDX-FileCopyrightText: 2026 Filip Leonarski, Paul Scherrer Institute +// SPDX-License-Identifier: GPL-3.0-only + + +#include "ScalingSettings.h" + + +ScalingSettings& ScalingSettings::SetPartialityModel(PartialityModel mode) { + partiality_mode = mode; + return *this; +} + +ScalingSettings& ScalingSettings::RefineB(bool input) { + refine_b = input; + return *this; +} + +ScalingSettings& ScalingSettings::MergeFriedel(bool input) { + merge_friedel = input; + return *this; +} + +ScalingSettings& ScalingSettings::HighResolutionLimit_A(double limit) { + if (limit <= 0.0) + throw JFJochException(JFJochExceptionCategory::InputParameterBelowMin, "High resolution limit must be positive"); + high_resolution_limit_A = limit; + return *this; +} + +bool ScalingSettings::GetRefineB() const { + return refine_b; +} + +bool ScalingSettings::GetMergeFriedel() const { + return merge_friedel; +} + +ScalingSettings &ScalingSettings::RefineWedge(bool input) { + refine_wedge = input; + return *this; +} + +bool ScalingSettings::GetRefineWedge() const { + return refine_wedge; +} + +std::optional ScalingSettings::GetPartialityModel() const { + return partiality_mode; +} + +std::optional ScalingSettings::GetHighResolutionLimit_A() const { + return high_resolution_limit_A; +} + +double ScalingSettings::GetMinB() const { + return min_b; +} + +double ScalingSettings::GetMaxB() const { + return max_b; +} + +double ScalingSettings::GetMinMosaicity() const { + return 0.001; +} + +double ScalingSettings::GetMaxMosaicity() const { + return 1.0; +} + +double ScalingSettings::GetMinWedge() const { + return 0.001; + +} + +double ScalingSettings::GetMaxWedge() const { + return 10.0; +} diff --git a/common/ScalingSettings.h b/common/ScalingSettings.h new file mode 100644 index 00000000..f61c79b3 --- /dev/null +++ b/common/ScalingSettings.h @@ -0,0 +1,46 @@ +// SPDX-FileCopyrightText: 2026 Filip Leonarski, Paul Scherrer Institute +// SPDX-License-Identifier: GPL-3.0-only + +#pragma once + +#include +#include "JFJochException.h" + +enum class PartialityModel { Fixed, Rotation, Unity, Still }; + +class ScalingSettings { + std::optional partiality_mode; + + bool refine_b = false; + double max_b = 200.0; + double min_b = -50.0; + + bool refine_wedge = false; + + bool merge_friedel = true; + std::optional high_resolution_limit_A; + +public: + ScalingSettings& SetPartialityModel(PartialityModel mode); + ScalingSettings& RefineB(bool input); + ScalingSettings& RefineWedge(bool input); + ScalingSettings& MergeFriedel(bool input); + ScalingSettings& HighResolutionLimit_A(double limit); + + [[nodiscard]] bool GetRefineB() const; + [[nodiscard]] bool GetRefineWedge() const; + + [[nodiscard]] double GetMinB() const; + [[nodiscard]] double GetMaxB() const; + + double GetMinMosaicity() const; + double GetMaxMosaicity() const; + + double GetMinWedge() const; + double GetMaxWedge() const; + + [[nodiscard]] bool GetMergeFriedel() const; + + [[nodiscard]] std::optional GetPartialityModel() const; + [[nodiscard]] std::optional GetHighResolutionLimit_A() const; +}; diff --git a/image_analysis/IndexAndRefine.cpp b/image_analysis/IndexAndRefine.cpp index 6dfbb683..9f797144 100644 --- a/image_analysis/IndexAndRefine.cpp +++ b/image_analysis/IndexAndRefine.cpp @@ -9,7 +9,7 @@ #include "indexing/AnalyzeIndexing.h" #include "indexing/FFTIndexer.h" #include "lattice_search/LatticeSearch.h" -#include "scale_merge/ScaleAll.h" +#include "scale_merge/ScaleOnTheFly.h" IndexAndRefine::IndexAndRefine(const DiffractionExperiment &x, IndexerThreadPool *indexer) : index_ice_rings(x.GetIndexingSettings().GetIndexIceRings()), @@ -276,45 +276,20 @@ std::optional IndexAndRefine::Finalize() { return {}; } -std::optional IndexAndRefine::ScaleAllImages(const ScaleMergeOptions &opts) { - size_t nrefl = 0; - for (const auto &i: reflections) - nrefl += i.size(); - - // Need a reasonable number of reflections to make refinement meaningful - constexpr size_t kMinReflections = 20; - if (nrefl < kMinReflections) - return std::nullopt; - - // Build options focused on mosaicity refinement but allow caller override - ScaleMergeOptions options = opts; - - // If the experiment provides a wedge, propagate it - if (experiment.GetGoniometer().has_value() && rotation_indexer) { - options.wedge_deg = experiment.GetGoniometer()->GetWedge_deg(); - options.mosaicity_init_deg_vec = mosaicity; - } - - // If caller left space_group unset, try to pick it from the indexed lattice - if (!options.space_group.has_value()) { - auto sg = experiment.GetGemmiSpaceGroup(); - if (sg) - options.space_group = *sg; - } - - return ScaleAll(reflections, options); +void IndexAndRefine::ScaleImage(size_t n, ScaleOnTheFly &scaling, ScalingResult &result) { + auto res = scaling.Scale(reflections[n], mosaicity[n]); + result.mosaicity_deg[n] = res.mos; + result.image_bfactor_Ang2[n] = res.B; + result.image_scale_g[n] = res.G; + result.rotation_wedge_deg[n] = res.wedge; } -MergeResult IndexAndRefine::Merge(const ScaleMergeOptions &opts) { - // Build options focused on mosaicity refinement but allow caller override - ScaleMergeOptions options = opts; - - // If caller left space_group unset, try to pick it from the indexed lattice - if (!opts.space_group.has_value()) { - auto sg = experiment.GetGemmiSpaceGroup(); - if (sg) - options.space_group = *sg; - } - - return MergeReflections(reflections, options); +ScalingResult IndexAndRefine::ScaleAllImages(size_t nthreads) { + auto merge_result = MergeReflections(reflections, experiment); + ScaleOnTheFly scaling(merge_result.merged, experiment); + return scaling.Scale(reflections, mosaicity, nthreads); +} + +MergeResult IndexAndRefine::Merge() { + return MergeReflections(reflections, experiment); } diff --git a/image_analysis/IndexAndRefine.h b/image_analysis/IndexAndRefine.h index 4ef24b13..6aad02fa 100644 --- a/image_analysis/IndexAndRefine.h +++ b/image_analysis/IndexAndRefine.h @@ -15,7 +15,8 @@ #include "scale_merge/Merge.h" #include "RotationIndexer.h" #include "RotationParameters.h" -#include "scale_merge/ScaleAll.h" +#include "scale_merge/ScaleOnTheFly.h" +#include "scale_merge/ScalingResult.h" class IndexAndRefine { const bool index_ice_rings; @@ -57,17 +58,14 @@ class IndexAndRefine { const CompressedImage &image, BraggPrediction &prediction, const IndexingOutcome &outcome); + + void ScaleImage(size_t n, ScaleOnTheFly &scaling, ScalingResult &result); public: IndexAndRefine(const DiffractionExperiment &x, IndexerThreadPool *indexer); void ProcessImage(DataMessage &msg, const SpotFindingSettings &settings, const CompressedImage &image, BraggPrediction &prediction); - - std::optional ScaleAllImages(const ScaleMergeOptions &opts = {}); - - /// Run scale-and-merge on accumulated reflections to refine per-image - /// mosaicity (and optionally B-factors / scale factors). - /// Returns std::nullopt if there are too few reflections to be meaningful. - MergeResult Merge(const ScaleMergeOptions &opts = {}); + ScalingResult ScaleAllImages(size_t nthreads = 0); + MergeResult Merge(); std::optional Finalize(); }; diff --git a/image_analysis/scale_merge/CMakeLists.txt b/image_analysis/scale_merge/CMakeLists.txt index 2bde4a67..a4b0a686 100644 --- a/image_analysis/scale_merge/CMakeLists.txt +++ b/image_analysis/scale_merge/CMakeLists.txt @@ -1,4 +1,4 @@ -ADD_LIBRARY(JFJochScaleMerge ScaleAll.cpp ScaleAll.h FrenchWilson.cpp FrenchWilson.h +ADD_LIBRARY(JFJochScaleMerge FrenchWilson.cpp FrenchWilson.h SearchSpaceGroup.cpp SearchSpaceGroup.h Merge.cpp diff --git a/image_analysis/scale_merge/HKLKey.cpp b/image_analysis/scale_merge/HKLKey.cpp index f5ded9cf..b8cc31cb 100644 --- a/image_analysis/scale_merge/HKLKey.cpp +++ b/image_analysis/scale_merge/HKLKey.cpp @@ -43,6 +43,18 @@ HKLKey CanonicalHKL(const MergedReflection &r, bool merge_friedel, const std::op return CanonicalHKL(r.h, r.k, r.l, merge_friedel, sg); } +bool AcceptReflection(const Reflection &r, std::optional d_min_limit) { + if (!std::isfinite(r.I)) + return false; + if (!std::isfinite(r.d) || r.d <= 0.0f) + return false; + if (d_min_limit && r.d < d_min_limit) + return false; + if (!std::isfinite(r.rlp) || r.rlp == 0.0f) + return false; + return true; +} + bool AcceptReflection(const Reflection &r, double d_min_limit) { if (!std::isfinite(r.I)) return false; diff --git a/image_analysis/scale_merge/HKLKey.h b/image_analysis/scale_merge/HKLKey.h index 92fc592e..c03bf092 100644 --- a/image_analysis/scale_merge/HKLKey.h +++ b/image_analysis/scale_merge/HKLKey.h @@ -22,4 +22,5 @@ HKLKey CanonicalHKL(const Reflection &r, bool merge_friedel, const std::optional HKLKey CanonicalHKL(const MergedReflection &r, bool merge_friedel, const std::optional &sg); HKLKey CanonicalHKL(int32_t h, int32_t k, int32_t l, bool merge_friedel, const std::optional &sg); +bool AcceptReflection(const Reflection &r, std::optional d_min_limit); bool AcceptReflection(const Reflection &r, double d_min_limit); \ No newline at end of file diff --git a/image_analysis/scale_merge/Merge.cpp b/image_analysis/scale_merge/Merge.cpp index 83b091bc..ddc56f16 100644 --- a/image_analysis/scale_merge/Merge.cpp +++ b/image_analysis/scale_merge/Merge.cpp @@ -21,20 +21,15 @@ namespace { double sigma = 1.0; }; - double SafeSigma(double sigma, double min_sigma) { - if (!std::isfinite(sigma) || sigma <= 0.0) - return min_sigma; - return std::max(sigma, min_sigma); - } - - double SafeInv(double x, double fallback) { - if (!std::isfinite(x) || x == 0.0) - return fallback; - return 1.0 / x; + double SafeSigma(double sigma) { + // TODO: Think about safe sigma... + if (!std::isfinite(sigma) || sigma <= 1e-3) + return 1e-3; + return sigma; } std::vector BuildObservations(const std::vector> &observations, - const ScaleMergeOptions &opt, + const DiffractionExperiment &x, std::vector &slot_to_hkl) { std::map hkl_to_slot; std::vector out; @@ -44,17 +39,19 @@ namespace { nrefl += image.size(); out.reserve(nrefl); + auto scaling_settings = x.GetScalingSettings(); + for (const auto &image: observations) { for (const auto &r: image) { if (r.scaling_correction <= 0.0 || !std::isfinite(r.scaling_correction)) continue; - if (!AcceptReflection(r, opt.d_min_limit_A)) + if (!AcceptReflection(r, scaling_settings.GetHighResolutionLimit_A())) continue; HKLKey key; try { - key = CanonicalHKL(r, opt.merge_friedel, opt.space_group); + key = CanonicalHKL(r, scaling_settings.GetMergeFriedel(), x.GetGemmiSpaceGroup()); } catch (...) { continue; } @@ -69,7 +66,7 @@ namespace { out.push_back({ .r = &r, .hkl = it->second, - .sigma = SafeSigma(r.sigma, opt.min_sigma) + .sigma = SafeSigma(r.sigma) }); } } @@ -144,17 +141,18 @@ namespace { } } - void Stats(const ScaleMergeOptions &opt, MergeResult &out, const std::vector &obs) { + void Stats(const DiffractionExperiment &x, MergeResult &out, const std::vector &obs) { constexpr int n_shells = 10; float d_min = std::numeric_limits::max(); float d_max = 0.0f; + auto d_min_limit_A = x.GetScalingSettings().GetHighResolutionLimit_A(); for (const auto &m: out.merged) { const auto d = static_cast(m.d); if (!std::isfinite(d) || d <= 0.0f) continue; - if (opt.d_min_limit_A > 0.0 && d < static_cast(opt.d_min_limit_A)) + if (d_min_limit_A && d < d_min_limit_A) continue; d_min = std::min(d_min, d); @@ -286,15 +284,15 @@ namespace { } } -MergeResult MergeReflections(const std::vector> &observations, - const ScaleMergeOptions &opt) { +MergeResult MergeReflections(const std::vector > &observations, + const DiffractionExperiment &x) { std::vector slot_to_hkl; - auto obs = BuildObservations(observations, opt, slot_to_hkl); + auto obs = BuildObservations(observations, x, slot_to_hkl); auto out = InitResult(slot_to_hkl, obs); Merge(slot_to_hkl.size(), out, obs); - Stats(opt, out, obs); + Stats(x, out, obs); return out; } diff --git a/image_analysis/scale_merge/Merge.h b/image_analysis/scale_merge/Merge.h index 694e3868..7d249fcf 100644 --- a/image_analysis/scale_merge/Merge.h +++ b/image_analysis/scale_merge/Merge.h @@ -7,41 +7,10 @@ #include #include +#include "../../common/DiffractionExperiment.h" #include "../../common/Reflection.h" #include "gemmi/symmetry.hpp" -struct ScaleMergeOptions { - int max_num_iterations = 100; - double max_solver_time_s = 1.0; - - double image_number_rounding = 1.0; - double min_sigma = 1e-3; - - std::optional space_group; - bool merge_friedel = true; - - std::optional wedge_deg; - - double mosaicity_init_deg = 0.17; - double mosaicity_min_deg = 1e-3; - double mosaicity_max_deg = 2.0; - std::vector mosaicity_init_deg_vec; - - bool regularize_scale_to_one = true; - double scale_regularization_sigma = 0.05; - - bool smoothen_g = true; - bool smoothen_mos = true; - - double d_min_limit_A = 0.0; - - int64_t image_cluster = 1; - - bool refine_wedge = false; - - enum class PartialityModel { Fixed, Rotation, Unity, Still } partiality_model = PartialityModel::Fixed; -}; - struct MergeStatisticsShell { float d_min = 0.0f; float d_max = 0.0f; @@ -64,5 +33,5 @@ struct MergeResult { MergeStatistics statistics; }; -MergeResult MergeReflections(const std::vector> &observations, - const ScaleMergeOptions &opt = {}); \ No newline at end of file +MergeResult MergeReflections(const std::vector > &observations, + const DiffractionExperiment &x); diff --git a/image_analysis/scale_merge/ScaleAll.cpp b/image_analysis/scale_merge/ScaleAll.cpp deleted file mode 100644 index 310392c1..00000000 --- a/image_analysis/scale_merge/ScaleAll.cpp +++ /dev/null @@ -1,460 +0,0 @@ -// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute -// SPDX-License-Identifier: GPL-3.0-only - -#include "ScaleAll.h" - -#include - -#include -#include -#include -#include -#include -#include -#include "HKLKey.h" - -namespace { - struct ScaleObs { - const Reflection *r = nullptr; - int image = 0; - int hkl = -1; - double sigma = 1.0; - }; - - double SafeSigma(double sigma, double min_sigma) { - if (!std::isfinite(sigma) || sigma <= 0.0) - return min_sigma; - return std::max(sigma, min_sigma); - } - - double SafeInv(double x, double fallback) { - if (!std::isfinite(x) || x == 0.0) - return fallback; - return 1.0 / x; - } - - bool AcceptReflectionForScaling(const Reflection &r, const ScaleMergeOptions &opt) { - if (!AcceptReflection(r, opt.d_min_limit_A)) - return false; - - switch (opt.partiality_model) { - case ScaleMergeOptions::PartialityModel::Rotation: - return std::isfinite(r.zeta) && r.zeta > 0.0f; - - case ScaleMergeOptions::PartialityModel::Still: - return std::isfinite(r.dist_ewald); - - case ScaleMergeOptions::PartialityModel::Fixed: - case ScaleMergeOptions::PartialityModel::Unity: - return true; - } - - return true; - } - - struct IntensityFixedResidual { - IntensityFixedResidual(const Reflection &r, double sigma, double partiality) - : Iobs(r.I), - weight(SafeInv(sigma, 1.0)), - correction(partiality * SafeInv(r.rlp, 1.0)) { - } - - template - bool operator()(const T *const G, const T *const Itrue, T *residual) const { - residual[0] = (T(correction) * G[0] * Itrue[0] - T(Iobs)) * T(weight); - return true; - } - - double Iobs; - double weight; - double correction; - }; - - struct IntensityRotationResidual { - IntensityRotationResidual(const Reflection &r, double sigma) - : Iobs(r.I), - weight(SafeInv(sigma, 1.0)), - delta_phi_deg(r.delta_phi_deg), - lp(SafeInv(r.rlp, 1.0)), - c1(r.zeta / std::sqrt(2.0)) { - } - - template - bool operator()(const T *const G, - const T *const mosaicity, - const T *const Itrue, - const T *const wedge, - T *residual) const { - const T half_wedge = wedge[0] / T(2.0); - const T arg_plus = T(delta_phi_deg + half_wedge) * T(c1) / mosaicity[0]; - const T arg_minus = T(delta_phi_deg - half_wedge) * T(c1) / mosaicity[0]; - const T partiality = (ceres::erf(arg_plus) - ceres::erf(arg_minus)) / T(2.0); - - residual[0] = (G[0] * partiality * T(lp) * Itrue[0] - T(Iobs)) * T(weight); - return true; - } - - double Iobs; - double weight; - double delta_phi_deg; - double lp; - double c1; - }; - - struct IntensityStillResidual { - IntensityStillResidual(const Reflection &r, double sigma) - : Iobs(r.I), - weight(SafeInv(sigma, 1.0)), - lp(SafeInv(r.rlp, 1.0)), - dist_ewald_sq(r.dist_ewald * r.dist_ewald) { - } - - template - bool operator()(const T *const G, - const T *const R_sq, - const T *const Itrue, - T *residual) const { - const T partiality = ceres::exp(-T(dist_ewald_sq) / R_sq[0]); - residual[0] = (G[0] * partiality * T(lp) * Itrue[0] - T(Iobs)) * T(weight); - return true; - } - - double Iobs; - double weight; - double lp; - double dist_ewald_sq; - }; - - struct ScaleRegularizationResidual { - explicit ScaleRegularizationResidual(double sigma) - : inv_sigma(SafeInv(sigma, 1.0)) { - } - - template - bool operator()(const T *const k, T *residual) const { - residual[0] = (k[0] - T(1.0)) * T(inv_sigma); - return true; - } - - double inv_sigma; - }; - - struct SmoothnessResidual { - explicit SmoothnessResidual(double sigma) - : inv_sigma(SafeInv(sigma, 1.0)) { - } - - template - bool operator()(const T *const x0, - const T *const x1, - const T *const x2, - T *residual) const { - residual[0] = (ceres::log(x0[0]) + ceres::log(x2[0]) - T(2.0) * ceres::log(x1[0])) * T(inv_sigma); - return true; - } - - double inv_sigma; - }; - - std::vector BuildScaleObs(const std::vector > &observations, - const ScaleMergeOptions &opt, - std::vector &image_used, - int &nhkl) { - std::map hkl_to_slot; - std::vector obs; - - size_t nrefl = 0; - for (const auto &image: observations) - nrefl += image.size(); - obs.reserve(nrefl); - - for (int image = 0; image < static_cast(observations.size()); ++image) { - const int image_slot = image / static_cast(opt.image_cluster); - - for (const auto &r: observations[image]) { - if (!AcceptReflectionForScaling(r, opt)) - continue; - - HKLKey key; - try { - key = CanonicalHKL(r, opt.merge_friedel, opt.space_group); - } catch (...) { - continue; - } - auto it = hkl_to_slot.find(key); - if (it == hkl_to_slot.end()) { - const int slot = static_cast(hkl_to_slot.size()); - it = hkl_to_slot.emplace(key, slot).first; - } - - image_used[image_slot] = 1; - - obs.push_back({ - .r = &r, - .image = image_slot, - .hkl = it->second, - .sigma = SafeSigma(r.sigma, opt.min_sigma) - }); - } - } - - nhkl = static_cast(hkl_to_slot.size()); - return obs; - } - - std::vector InitialIntensities(int nhkl, - const ScaleMergeOptions &opt, - const std::vector &obs) { - std::vector > values(nhkl); - - for (const auto &o: obs) - values[o.hkl].push_back(o.r->I); - - std::vector Itrue(nhkl, opt.min_sigma); - - for (int h = 0; h < nhkl; ++h) { - auto &v = values[h]; - if (v.empty()) - continue; - - std::nth_element(v.begin(), v.begin() + static_cast(v.size() / 2), v.end()); - - Itrue[h] = v[v.size() / 2]; - if (!std::isfinite(Itrue[h]) || Itrue[h] <= opt.min_sigma) - Itrue[h] = opt.min_sigma; - } - - return Itrue; - } - - void Scale(const ScaleMergeOptions &opt, - const std::vector &obs, - const std::vector &image_used, - int nhkl, - std::vector &G, - std::vector &mosaicity, - std::vector &R_sq) { - ceres::Problem problem; - - auto Itrue = InitialIntensities(nhkl, opt, obs); - double wedge = opt.wedge_deg.value_or(0.0); - - for (const auto &o: obs) { - switch (opt.partiality_model) { - case ScaleMergeOptions::PartialityModel::Rotation: { - auto *cost = new ceres::AutoDiffCostFunction( - new IntensityRotationResidual(*o.r, o.sigma)); - problem.AddResidualBlock(cost, nullptr, &G[o.image], &mosaicity[o.image], &Itrue[o.hkl], &wedge); - break; - } - - case ScaleMergeOptions::PartialityModel::Still: { - auto *cost = new ceres::AutoDiffCostFunction( - new IntensityStillResidual(*o.r, o.sigma)); - problem.AddResidualBlock(cost, nullptr, &G[o.image], &R_sq[o.image], &Itrue[o.hkl]); - break; - } - - case ScaleMergeOptions::PartialityModel::Unity: { - auto *cost = new ceres::AutoDiffCostFunction( - new IntensityFixedResidual(*o.r, o.sigma, 1.0)); - problem.AddResidualBlock(cost, nullptr, &G[o.image], &Itrue[o.hkl]); - break; - } - - case ScaleMergeOptions::PartialityModel::Fixed: { - auto *cost = new ceres::AutoDiffCostFunction( - new IntensityFixedResidual(*o.r, o.sigma, o.r->partiality)); - problem.AddResidualBlock(cost, nullptr, &G[o.image], &Itrue[o.hkl]); - break; - } - } - } - - for (int i = 0; i < static_cast(G.size()); ++i) { - if (!image_used[i]) - continue; - - problem.SetParameterLowerBound(&G[i], 0, 1e-12); - - if (opt.regularize_scale_to_one) { - auto *cost = new ceres::AutoDiffCostFunction( - new ScaleRegularizationResidual(opt.scale_regularization_sigma)); - problem.AddResidualBlock(cost, nullptr, &G[i]); - } - } - - if (opt.smoothen_g) { - for (int i = 0; i + 2 < static_cast(G.size()); ++i) { - if (!(image_used[i] && image_used[i + 1] && image_used[i + 2])) - continue; - - auto *cost = new ceres::AutoDiffCostFunction( - new SmoothnessResidual(0.05)); - problem.AddResidualBlock(cost, nullptr, &G[i], &G[i + 1], &G[i + 2]); - } - } - - if (opt.partiality_model == ScaleMergeOptions::PartialityModel::Rotation) { - for (int i = 0; i < static_cast(mosaicity.size()); ++i) { - if (!image_used[i]) - continue; - - problem.SetParameterLowerBound(&mosaicity[i], 0, opt.mosaicity_min_deg); - problem.SetParameterUpperBound(&mosaicity[i], 0, opt.mosaicity_max_deg); - } - - if (opt.smoothen_mos) { - for (int i = 0; i + 2 < static_cast(mosaicity.size()); ++i) { - if (!(image_used[i] && image_used[i + 1] && image_used[i + 2])) - continue; - - auto *cost = new ceres::AutoDiffCostFunction( - new SmoothnessResidual(0.05)); - problem.AddResidualBlock(cost, nullptr, &mosaicity[i], &mosaicity[i + 1], &mosaicity[i + 2]); - } - } - - if (!opt.refine_wedge) - problem.SetParameterBlockConstant(&wedge); - else - problem.SetParameterLowerBound(&wedge, 0, 0.0); - } - - if (opt.partiality_model == ScaleMergeOptions::PartialityModel::Still) { - for (int i = 0; i < static_cast(R_sq.size()); ++i) { - if (!image_used[i]) - continue; - - problem.SetParameterLowerBound(&R_sq[i], 0, 1e-9); - problem.SetParameterUpperBound(&R_sq[i], 0, 1.0); - } - } - - unsigned int hw = std::thread::hardware_concurrency(); - if (hw == 0) - hw = 1; - - ceres::Solver::Options options; - options.linear_solver_type = ceres::SPARSE_NORMAL_CHOLESKY; - options.minimizer_progress_to_stdout = true; - options.max_num_iterations = opt.max_num_iterations; - options.max_solver_time_in_seconds = opt.max_solver_time_s; - options.num_threads = static_cast(hw); - options.function_tolerance = 1e-4; - - ceres::Solver::Summary summary; - ceres::Solve(options, &problem, &summary); - - std::cout << summary.FullReport() << std::endl; - } - - double Partiality(const Reflection &r, - const ScaleMergeOptions &opt, - int image_slot, - const std::vector &mosaicity, - const std::vector &R_sq) { - switch (opt.partiality_model) { - case ScaleMergeOptions::PartialityModel::Fixed: - return r.partiality; - - case ScaleMergeOptions::PartialityModel::Unity: - return 1.0; - - case ScaleMergeOptions::PartialityModel::Rotation: { - const double half_wedge = opt.wedge_deg.value_or(0.0) / 2.0; - const double c1 = r.zeta / std::sqrt(2.0); - const double arg_plus = (r.delta_phi_deg + half_wedge) * c1 / mosaicity[image_slot]; - const double arg_minus = (r.delta_phi_deg - half_wedge) * c1 / mosaicity[image_slot]; - - return (std::erf(arg_plus) - std::erf(arg_minus)) / 2.0; - } - - case ScaleMergeOptions::PartialityModel::Still: - return std::exp(-r.dist_ewald * r.dist_ewald / R_sq[image_slot]); - } - - return 1.0; - } - - void CalcCorrections(std::vector > &observations, - const ScaleMergeOptions &opt, - const std::vector &G, - const std::vector &mosaicity, - const std::vector &R_sq) { - size_t nrefl = 0; - for (const auto &image: observations) - nrefl += image.size(); - - for (int image = 0; image < static_cast(observations.size()); ++image) { - const int image_slot = image / static_cast(opt.image_cluster); - - for (auto &r: observations[image]) { - if (!AcceptReflectionForScaling(r, opt)) { - r.scaling_correction = 0.0; - continue; - } - - const double partiality = Partiality(r, opt, image_slot, mosaicity, R_sq); - if (!std::isfinite(partiality) || partiality < 0.01) { - r.partiality = 0.0; - r.scaling_correction = 0.0; - } else { - const double denom = G[image_slot] * partiality; - const double correction = denom > 0.0 ? r.rlp / denom : 0.0; - r.partiality = partiality; - r.scaling_correction = std::isfinite(correction) ? correction : 0.0; - } - } - } - } -} - -ScalingResult ScaleAll(std::vector > &observations, - const ScaleMergeOptions &opt) { - if (opt.image_cluster <= 0) - throw std::invalid_argument("image_cluster must be positive"); - - const size_t n_image_slots = observations.size() / opt.image_cluster + - (observations.size() % opt.image_cluster > 0 ? 1 : 0); - - std::vector image_used(n_image_slots, 0); - - int nhkl = 0; - auto scale_obs = BuildScaleObs(observations, opt, image_used, nhkl); - - std::vector G(n_image_slots, 1.0); - std::vector mosaicity(n_image_slots, opt.mosaicity_init_deg); - std::vector R_sq(n_image_slots, 0.001 * 0.001); - - for (int i = 0; i < static_cast(n_image_slots); ++i) { - if (!image_used[i]) { - G[i] = NAN; - mosaicity[i] = NAN; - R_sq[i] = NAN; - } else if (opt.mosaicity_init_deg_vec.size() > static_cast(i) && - std::isfinite(opt.mosaicity_init_deg_vec[i])) { - mosaicity[i] = opt.mosaicity_init_deg_vec[i]; - } - } - - Scale(opt, scale_obs, image_used, nhkl, G, mosaicity, R_sq); - - CalcCorrections(observations, opt, G, mosaicity, R_sq); - - ScalingResult out{}; - - out.image_scale_g.resize(observations.size(), NAN); - out.mosaicity_deg.resize(observations.size(), NAN); - - for (int image = 0; image < static_cast(observations.size()); ++image) { - const int image_slot = image / static_cast(opt.image_cluster); - - if (image_slot < static_cast(image_used.size()) && image_used[image_slot]) { - out.image_scale_g[image] = G[image_slot]; - out.mosaicity_deg[image] = mosaicity[image_slot]; - } - } - - return out; -} diff --git a/image_analysis/scale_merge/ScaleAll.h b/image_analysis/scale_merge/ScaleAll.h deleted file mode 100644 index 45766914..00000000 --- a/image_analysis/scale_merge/ScaleAll.h +++ /dev/null @@ -1,10 +0,0 @@ -// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute -// SPDX-License-Identifier: GPL-3.0-only - -#pragma once - -#include "Merge.h" -#include "ScalingResult.h" - - -ScalingResult ScaleAll(std::vector>& observations, const ScaleMergeOptions& opt = {}); \ No newline at end of file diff --git a/image_analysis/scale_merge/ScaleOnTheFly.cpp b/image_analysis/scale_merge/ScaleOnTheFly.cpp index f225d6cc..bf40c0d4 100644 --- a/image_analysis/scale_merge/ScaleOnTheFly.cpp +++ b/image_analysis/scale_merge/ScaleOnTheFly.cpp @@ -3,61 +3,273 @@ #include "ScaleOnTheFly.h" +#include #include -double SafeInv(double x, double fallback) { - if (!std::isfinite(x) || x == 0.0) - return fallback; - return 1.0 / x; -} - -struct IntensityRotationResidual { - IntensityRotationResidual(const Reflection &r, double Itrue, double sigma) - : Iobs(r.I), - Itrue(Itrue), - weight(SafeInv(sigma, 1.0)), - delta_phi_deg(r.delta_phi_deg), - lp(SafeInv(r.rlp, 1.0)), - c1(r.zeta / std::sqrt(2.0)), - b_resolution_coeff(SafeInv(-r.d * r.d / 4.0, 0.0)) { +namespace { + double SafeInv(double x, double fallback) { + if (!std::isfinite(x) || x == 0.0) + return fallback; + return 1.0 / x; } - template - bool operator()(const T *const G, - const T *const B, - const T *const mosaicity, - const T *const wedge, - T *residual) const { - const T partiality = T(1.0); - if (mosaicity > 0) { + class ScalingResidual { + protected: + const double Iobs; + const double Itrue; + const double weight; + const double lp; + const double b_resolution_coeff; + + ScalingResidual(const Reflection &r, double Itrue, double sigma) + : Iobs(r.I), + Itrue(Itrue), + weight(SafeInv(sigma, 1.0)), + lp(SafeInv(r.rlp, 1.0)), + b_resolution_coeff(SafeInv(-r.d * r.d / 4.0, 0.0)) { + } + }; + + struct ScalingRotationResidual : public ScalingResidual { + ScalingRotationResidual(const Reflection &r, double Itrue, double sigma) + : ScalingResidual(r, Itrue, sigma), + delta_phi_deg(r.delta_phi_deg), + c1(r.zeta / std::sqrt(2.0)) { + } + + template + bool operator()(const T *const G, + const T *const B, + const T *const mosaicity, + const T *const wedge, + T *residual) const { + if (mosaicity[0] < 1e-6) + return false; + const T half_wedge = wedge[0] / T(2.0); const T arg_plus = T(delta_phi_deg + half_wedge) * T(c1) / mosaicity[0]; const T arg_minus = T(delta_phi_deg - half_wedge) * T(c1) / mosaicity[0]; - partiality = (ceres::erf(arg_plus) - ceres::erf(arg_minus)) / T(2.0); + const T partiality = (ceres::erf(arg_plus) - ceres::erf(arg_minus)) / T(2.0); + const T B_term = ceres::exp(B[0] * T(b_resolution_coeff)); + residual[0] = (G[0] * partiality * B_term * T(lp) * Itrue - T(Iobs)) * T(weight); + return true; + } + + double delta_phi_deg; + double c1; + }; + + struct IntensityFixedResidual : public ScalingResidual { + IntensityFixedResidual(const Reflection &r, double Itrue, double sigma, double partiality) + : ScalingResidual(r, Itrue, sigma), + partiality(partiality) { + } + + template + bool operator()(const T *const G, const T *const B, T *residual) const { + const T B_term = ceres::exp(T(B[0]) * b_resolution_coeff); + residual[0] = (G[0] * T(partiality) * B_term * T(lp) * Itrue - T(Iobs)) * T(weight); + return true; + } + + double partiality; + }; + + namespace { + std::optional GetWedge(const DiffractionExperiment &x) { + auto gon = x.GetGoniometer(); + if (gon.has_value()) + return gon->GetWedge_deg(); + return std::nullopt; } - const T B_term = ceres::exp(T(B[0]) * b_resolution_coeff); - residual[0] = (G[0] * partiality * T(lp) * Itrue - T(Iobs)) * T(weight); - return true; } +} - double Iobs; - double Itrue; - double weight; - double delta_phi_deg; - double lp; - double c1; - double b_resolution_coeff; -}; - -ScaleOnTheFly::ScaleOnTheFly(const std::vector &ref, - std::optional sg, - bool merge_friedel) : sg(sg), merge_friedel(merge_friedel) { - for (const auto &r : ref) { - const auto key = CanonicalHKL(r, merge_friedel, sg); +ScaleOnTheFly::ScaleOnTheFly(const std::vector &ref, const DiffractionExperiment &x) + : sg(x.GetGemmiSpaceGroup()), + model(x.GetPartialityModel()), + s(x.GetScalingSettings()), + rot_wedge_deg(GetWedge(x)) { + for (const auto &r: ref) { + const auto key = CanonicalHKL(r, s.GetMergeFriedel(), sg); reference_data[key] = r.I; } } -std::optional ScaleOnTheFly::Scale(std::vector &reflections) { - +bool ScaleOnTheFly::Accept(const Reflection &r) { + if (!AcceptReflection(r, s.GetHighResolutionLimit_A())) + return false; + + switch (model) { + case PartialityModel::Rotation: + return std::isfinite(r.zeta) && r.zeta > 0.0f; + case PartialityModel::Still: + return std::isfinite(r.dist_ewald); + case PartialityModel::Fixed: + case PartialityModel::Unity: + return true; + } + + return true; +} + +ScaleOnTheFlyResult ScaleOnTheFly::Scale(std::vector &reflections, std::optional mosaicity_deg) { + auto start = std::chrono::steady_clock::now(); + + ceres::Problem problem; + + ScaleOnTheFlyResult result{ + .B = 0.0, + .G = 1.0, + .mos = mosaicity_deg.value_or(0.1), + .wedge = rot_wedge_deg.value_or(0.1) + }; + + size_t n_reflections = 0; + + for (const auto &r: reflections) { + const HKLKey key = CanonicalHKL(r, s.GetMergeFriedel(), sg); + + if (!Accept(r)) + continue; + + if (!reference_data.contains(key)) + continue; + + ++n_reflections; + + const double Itrue = reference_data.at(key); + const double sigma = r.sigma; + + switch (model) { + case PartialityModel::Fixed: { + auto *cost = new ceres::AutoDiffCostFunction( + new IntensityFixedResidual(r, Itrue, sigma, r.partiality)); + problem.AddResidualBlock(cost, nullptr, &result.G, &result.B); + } + break; + case PartialityModel::Unity: { + auto *cost = new ceres::AutoDiffCostFunction( + new IntensityFixedResidual(r, Itrue, sigma, 1.0)); + problem.AddResidualBlock(cost, nullptr, &result.G, &result.B); + } + break; + case PartialityModel::Rotation: { + auto *cost = new ceres::AutoDiffCostFunction( + new ScalingRotationResidual(r, Itrue, sigma)); + problem.AddResidualBlock(cost, nullptr, &result.G, &result.B, &result.mos, + &result.wedge); + } + break; + default: + throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, + "Not supported partiality model"); + } + } + + if (n_reflections < MIN_REFLECTIONS) { + result.succesful = false; + return result; + } + result.succesful = true; + + if (s.GetRefineB()) { + problem.SetParameterLowerBound(&result.B, 0, s.GetMinB()); + problem.SetParameterUpperBound(&result.B, 0, s.GetMaxB()); + } else { + problem.SetParameterBlockConstant(&result.B); + } + + if (model == PartialityModel::Rotation) { + if (s.GetRefineWedge()) { + problem.SetParameterLowerBound(&result.wedge, 0, s.GetMinWedge()); + problem.SetParameterUpperBound(&result.wedge, 0, s.GetMaxWedge()); + } else { + problem.SetParameterBlockConstant(&result.wedge); + } + problem.SetParameterLowerBound(&result.mos, 0, s.GetMinMosaicity()); + problem.SetParameterUpperBound(&result.mos, 0, s.GetMaxMosaicity()); + } + + ceres::Solver::Options options; + options.linear_solver_type = ceres::DENSE_QR; + options.minimizer_progress_to_stdout = false; + options.num_threads = 1; + + ceres::Solver::Summary summary; + ceres::Solve(options, &problem, &summary); + + for (auto &r: reflections) { + const double B_term = exp(result.B * SafeInv(-r.d * r.d / 4.0, 0.0)); + + switch (model) { + case PartialityModel::Unity: + r.partiality = 1.0; + break; + case PartialityModel::Rotation: { + double partiality = 0.0; + ScalingRotationResidual res(r, 0, 0); + if (res(&result.G, &result.B, &result.mos, &result.wedge, &partiality)) + r.partiality = static_cast(partiality); + break; + } + default: + break; + } + r.scaling_correction = static_cast(r.rlp / (B_term * r.partiality * result.G)); + } + + auto end = std::chrono::steady_clock::now(); + result.time_s = std::chrono::duration(end - start).count(); + return result; +} + +ScalingResult ScaleOnTheFly::Scale(std::vector > &reflections, + const std::vector &mosaicity, + size_t nthreads) { + ScalingResult result(reflections.size()); + + if (nthreads == 0) + nthreads = std::thread::hardware_concurrency(); + + if (nthreads <= 1) { + for (int i = 0; i < reflections.size(); i++) { + std::optional mos_val; + if (model == PartialityModel::Rotation && mosaicity.size() > i) + mos_val = mosaicity[i]; + + auto local_result = Scale(reflections[i], mos_val); + result.mosaicity_deg[i] = local_result.mos; + result.image_bfactor_Ang2[i] = local_result.B; + result.image_scale_g[i] = local_result.G; + result.rotation_wedge_deg[i] = local_result.wedge; + } + } else { + auto local_nthreads = std::min(nthreads, reflections.size()); + std::vector> futures; + futures.reserve(local_nthreads); + std::atomic curr_image = 0; + + for (size_t t = 0; t < local_nthreads; ++t) + futures.emplace_back(std::async(std::launch::async, [&] { + size_t i = curr_image.fetch_add(1); + while (i < reflections.size()) { + std::optional mos_val; + if (model == PartialityModel::Rotation && mosaicity.size() > i) + mos_val = mosaicity[i]; + + auto local_result = Scale(reflections[i], mos_val); + result.mosaicity_deg[i] = local_result.mos; + result.image_bfactor_Ang2[i] = local_result.B; + result.image_scale_g[i] = local_result.G; + result.rotation_wedge_deg[i] = local_result.wedge; + i = curr_image.fetch_add(1); + } + })); + + for (auto &f: futures) + f.get(); + } + + return result; } diff --git a/image_analysis/scale_merge/ScaleOnTheFly.h b/image_analysis/scale_merge/ScaleOnTheFly.h index 798698e0..79ef6100 100644 --- a/image_analysis/scale_merge/ScaleOnTheFly.h +++ b/image_analysis/scale_merge/ScaleOnTheFly.h @@ -5,16 +5,38 @@ #include "HKLKey.h" #include "Merge.h" +#include "../../common/DiffractionExperiment.h" #include "ScalingResult.h" #include -class ScaleOnTheFly { - std::optional sg; - bool merge_friedel; - std::map reference_data; -public: - ScaleOnTheFly(const std::vector &ref, std::optional sg, bool merge_friedel = true); - std::optional Scale(std::vector &reflections); + +struct ScaleOnTheFlyResult { + double B = 0; + double G = 1.0; + double mos = 0.1; + double wedge = 0.1; + float time_s = 0.0; + bool succesful = false; +}; + +class ScaleOnTheFly { + constexpr static size_t MIN_REFLECTIONS = 20; + + const std::optional sg; + const PartialityModel model; + const ScalingSettings s; + const std::optional rot_wedge_deg; + + std::map reference_data; + + bool Accept(const Reflection &r); +public: + ScaleOnTheFly(const std::vector &ref, const DiffractionExperiment &x); + ScaleOnTheFlyResult Scale(std::vector &r, std::optional mosaicity_deg); + + ScalingResult Scale(std::vector > &reflections, + const std::vector &mosaicity, + size_t nthreads = 0); }; diff --git a/image_analysis/scale_merge/ScalingResult.h b/image_analysis/scale_merge/ScalingResult.h index e4701814..6b97304e 100644 --- a/image_analysis/scale_merge/ScalingResult.h +++ b/image_analysis/scale_merge/ScalingResult.h @@ -9,4 +9,12 @@ struct ScalingResult { std::vector image_scale_g; std::vector mosaicity_deg; std::vector image_bfactor_Ang2; -}; \ No newline at end of file + std::vector rotation_wedge_deg; + + explicit ScalingResult(size_t n) + : image_scale_g(n, NAN), + mosaicity_deg(n, NAN), + image_bfactor_Ang2(n, NAN), + rotation_wedge_deg(n, NAN) { + } +}; diff --git a/tools/jfjoch_process.cpp b/tools/jfjoch_process.cpp index bc7bf9f9..a6359a6f 100644 --- a/tools/jfjoch_process.cpp +++ b/tools/jfjoch_process.cpp @@ -52,7 +52,8 @@ void print_usage(Logger &logger) { logger.Info(" -X Indexing algorithm (FFBIDX|FFT|FFTW|Auto|None)"); logger.Info(" -F Use FFT indexing algorithm (shortcut for -XFFT)"); logger.Info(" -S Space group number - used for both indexing and scaling"); - logger.Info(" -C Fix reference unit cell: -C\"a,b,c,alpha,beta,gamma\" (comma-separated, no spaces; quotes optional)"); + logger.Info( + " -C Fix reference unit cell: -C\"a,b,c,alpha,beta,gamma\" (comma-separated, no spaces; quotes optional)"); logger.Info(" -x No least-square beam center refinement"); logger.Info(""); @@ -63,7 +64,7 @@ void print_usage(Logger &logger) { logger.Info(" -A Anomalous mode (don't merge Friedel pairs)"); } -void trim_in_place(std::string& t) { +void trim_in_place(std::string &t) { size_t b = 0; while (b < t.size() && std::isspace(static_cast(t[b]))) b++; size_t e = t.size(); @@ -71,7 +72,7 @@ void trim_in_place(std::string& t) { t = t.substr(b, e - b); }; -std::optional parse_unit_cell_arg(const char* arg) { +std::optional parse_unit_cell_arg(const char *arg) { if (!arg) return std::nullopt; @@ -101,7 +102,7 @@ std::optional parse_unit_cell_arg(const char* arg) { if (parts.size() != 6) return std::nullopt; - auto parse_float_strict = [](const std::string& t, float& out) -> bool { + auto parse_float_strict = [](const std::string &t, float &out) -> bool { try { size_t idx = 0; out = std::stof(t, &idx); @@ -149,7 +150,7 @@ int main(int argc, char **argv) { IndexingAlgorithmEnum indexing_algorithm = IndexingAlgorithmEnum::Auto; - ScaleMergeOptions::PartialityModel partiality_model = ScaleMergeOptions::PartialityModel::Fixed; + PartialityModel partiality_model = PartialityModel::Fixed; float d_min_spot_finding = 1.5; std::optional d_min_scale_merge; @@ -196,7 +197,7 @@ int main(int argc, char **argv) { case 'X': { std::string alg = optarg ? optarg : ""; std::transform(alg.begin(), alg.end(), alg.begin(), - [](unsigned char c) { return static_cast(std::tolower(c)); }); + [](unsigned char c) { return static_cast(std::tolower(c)); }); if (alg == "ffbidx") indexing_algorithm = IndexingAlgorithmEnum::FFBIDX; @@ -237,29 +238,33 @@ int main(int argc, char **argv) { break; case 't': photon_count_threshold_spot_finding = atoi(optarg); - logger.Info("Photon-count threshold level for spot finding set to {:d}", photon_count_threshold_spot_finding); + logger.Info("Photon-count threshold level for spot finding set to {:d}", + photon_count_threshold_spot_finding); break; case 'C': { auto uc = parse_unit_cell_arg(optarg); if (!uc.has_value()) { - logger.Error("Invalid -C unit cell. Expected: -C\"a,b,c,alpha,beta,gamma\" (6 floats, comma-separated, no spaces). Got: {}", optarg ? optarg : ""); + logger.Error( + "Invalid -C unit cell. Expected: -C\"a,b,c,alpha,beta,gamma\" (6 floats, comma-separated, no spaces). Got: {}", + optarg ? optarg : ""); print_usage(logger); exit(EXIT_FAILURE); } fixed_reference_unit_cell = uc; - logger.Info("Fixed reference unit cell set: a={:.3f} b={:.3f} c={:.3f} alpha={:.3f} beta={:.3f} gamma={:.3f}", - uc->a, uc->b, uc->c, uc->alpha, uc->beta, uc->gamma); + logger.Info( + "Fixed reference unit cell set: a={:.3f} b={:.3f} c={:.3f} alpha={:.3f} beta={:.3f} gamma={:.3f}", + uc->a, uc->b, uc->c, uc->alpha, uc->beta, uc->gamma); break; } case 'P': if (strcmp(optarg, "unity") == 0) - partiality_model = ScaleMergeOptions::PartialityModel::Unity; + partiality_model = PartialityModel::Unity; else if (strcmp(optarg, "fixed") == 0) - partiality_model = ScaleMergeOptions::PartialityModel::Fixed; + partiality_model = PartialityModel::Fixed; else if (strcmp(optarg, "rot") == 0) - partiality_model = ScaleMergeOptions::PartialityModel::Rotation; + partiality_model = PartialityModel::Rotation; else if (strcmp(optarg, "still") == 0) - partiality_model = ScaleMergeOptions::PartialityModel::Still; + partiality_model = PartialityModel::Still; else { logger.Error("Invalid partiality mode: {}", optarg); print_usage(logger); @@ -340,13 +345,20 @@ int main(int argc, char **argv) { indexing_settings.GeomRefinementAlgorithm(GeomRefinementAlgorithmEnum::None); experiment.ImportIndexingSettings(indexing_settings); + ScalingSettings scaling_settings; + scaling_settings.SetPartialityModel(partiality_model); + if (d_min_scale_merge) + scaling_settings.HighResolutionLimit_A(d_min_scale_merge.value()); + scaling_settings.MergeFriedel(!anomalous_mode); + experiment.ImportScalingSettings(scaling_settings); + SpotFindingSettings spot_settings; spot_settings.enable = true; spot_settings.indexing = true; spot_settings.high_resolution_limit = d_min_spot_finding; spot_settings.signal_to_noise_threshold = sigma_spot_finding; spot_settings.photon_count_threshold = photon_count_threshold_spot_finding; - if (d_min_scale_merge > 0) + if (d_min_spot_finding > 0.0f) spot_settings.high_resolution_limit = d_min_spot_finding; // Initialize Analysis Components @@ -552,27 +564,15 @@ int main(int argc, char **argv) { if (run_scaling) { logger.Info("Running scaling (mosaicity refinement) ..."); - ScaleMergeOptions scale_opts; - scale_opts.partiality_model = partiality_model; - scale_opts.max_num_iterations = 500; - scale_opts.max_solver_time_s = 240.0; // generous cutoff for now - scale_opts.merge_friedel = !anomalous_mode; - scale_opts.d_min_limit_A = d_min_scale_merge.value_or(0.0); - const bool fixed_space_group = space_group || experiment.GetGemmiSpaceGroup().has_value(); - if (space_group) - scale_opts.space_group = *space_group; - else - scale_opts.space_group = experiment.GetGemmiSpaceGroup(); - auto scale_start = std::chrono::steady_clock::now(); - auto scale_result = indexer.ScaleAllImages(scale_opts); - auto merge_result = indexer.Merge(scale_opts); + auto scale_result = indexer.ScaleAllImages(); + auto merge_result = indexer.Merge(); auto scale_end = std::chrono::steady_clock::now(); double scale_time = std::chrono::duration(scale_end - scale_start).count(); - - if (scale_result && !fixed_space_group) { + /* + if (!fixed_space_group) { logger.Info("Searching for space group from P1-merged reflections ..."); SearchSpaceGroupOptions sg_opts; @@ -617,122 +617,109 @@ int main(int argc, char **argv) { } else { logger.Warning("No space group accepted; keeping P1-merged result"); } + } */ + + end_msg.image_scale_factor = scale_result.image_scale_g; + + logger.Info("Scaling completed in {:.2f} s ({} unique reflections)", + scale_time, merge_result.merged.size()); + + // Print resolution-shell statistics table + { + const auto &stats = merge_result.statistics; + logger.Info(""); + logger.Info(" {:>8s} {:>8s} {:>8s} {:>8s}", + "d_min", "N_obs", "N_uniq", ""); + logger.Info(" {:->8s} {:->8s} {:->8s} {:->8s}", + "", "", "", "", "", ""); + for (const auto &sh: stats.shells) { + if (sh.unique_reflections == 0) + continue; + logger.Info(" {:8.2f} {:8d} {:8d} {:8.1f}", + sh.d_min, sh.total_observations, sh.unique_reflections, + sh.mean_i_over_sigma); + } + { + const auto &ov = stats.overall; + logger.Info(" {:->8s} {:->8s} {:->8s} {:->8s}", + "", "", "", ""); + logger.Info(" {:>8s} {:8d} {:8d} {:8.1f}", + "Overall", ov.total_observations, ov.unique_reflections, + ov.mean_i_over_sigma); + } + logger.Info(""); } - if (scale_result) { - end_msg.image_scale_factor = scale_result->image_scale_g; - - logger.Info("Scaling completed in {:.2f} s ({} unique reflections)", - scale_time, merge_result.merged.size()); - - // Print resolution-shell statistics table - { - const auto &stats = merge_result.statistics; - logger.Info(""); - logger.Info(" {:>8s} {:>8s} {:>8s} {:>8s} {:>8s} {:>10s}", - "d_min", "N_obs", "N_uniq", "Rmeas", "", "Complete"); - logger.Info(" {:->8s} {:->8s} {:->8s} {:->8s} {:->8s} {:->10s}", - "", "", "", "", "", ""); - for (const auto &sh: stats.shells) { - if (sh.unique_reflections == 0) - continue; - std::string compl_str = (sh.completeness > 0.0) - ? fmt::format("{:8.1f}%", sh.completeness * 100.0) - : " N/A"; - logger.Info(" {:8.2f} {:8d} {:8d} {:8.3f}% {:8.1f} {:>10s}", - sh.d_min, sh.total_observations, sh.unique_reflections, - sh.rmeas * 100, sh.mean_i_over_sigma, compl_str); + { + const std::string img_path = output_prefix + "_image.dat"; + std::ofstream img_file(img_path); + if (!img_file) { + logger.Error("Cannot open {} for writing", img_path); + } else { + img_file << "# image_id mosaicity_deg K\n"; + for (size_t i = 0; i < scale_result.mosaicity_deg.size(); ++i) { + img_file << i << " " << scale_result.mosaicity_deg[i] << " " << scale_result.image_scale_g[i] + << "\n"; } - { - const auto &ov = stats.overall; - logger.Info(" {:->8s} {:->8s} {:->8s} {:->8s} {:->8s} {:->10s}", - "", "", "", "", "", ""); - std::string compl_str = (ov.completeness > 0.0) - ? fmt::format("{:8.1f}%", ov.completeness * 100.0) - : " N/A"; - logger.Info(" {:>8s} {:8d} {:8d} {:8.3f}% {:8.1f} {:>10s}", - "Overall", ov.total_observations, ov.unique_reflections, - ov.rmeas * 100, ov.mean_i_over_sigma, compl_str); - } - logger.Info(""); + img_file.close(); } + } + { + FrenchWilsonOptions fw_opts; + fw_opts.acentric = true; // typical for MX + fw_opts.num_shells = 20; + + auto fw = FrenchWilson(merge_result.merged, fw_opts); { - const std::string img_path = output_prefix + "_image.dat"; - std::ofstream img_file(img_path); - if (!img_file) { - logger.Error("Cannot open {} for writing", img_path); - } else { - img_file << "# image_id mosaicity_deg K\n"; - for (size_t i = 0; i < scale_result->mosaicity_deg.size(); ++i) { - img_file << i << " " << scale_result->mosaicity_deg[i] << " " << scale_result->image_scale_g[i] - << "\n"; - } - img_file.close(); - } - } - - { - FrenchWilsonOptions fw_opts; - fw_opts.acentric = true; // typical for MX - fw_opts.num_shells = 20; - - auto fw = FrenchWilson(merge_result.merged, fw_opts); { - { - const std::string hkl_path = output_prefix + "_amplitudes.hkl"; - std::ofstream hkl_file(hkl_path); - if (!hkl_file) { - logger.Error("Cannot open {} for writing", hkl_path); - } else { - for (const auto &r: fw) { - hkl_file << r.h << " " << r.k << " " << r.l << " " - << r.F << " " << r.sigmaF << " " - << r.I << " " << r.sigmaI - << "\n"; - } - hkl_file.close(); - logger.Info("Wrote {} reflections to {}", merge_result.merged.size(), hkl_path); + const std::string hkl_path = output_prefix + "_amplitudes.hkl"; + std::ofstream hkl_file(hkl_path); + if (!hkl_file) { + logger.Error("Cannot open {} for writing", hkl_path); + } else { + for (const auto &r: fw) { + hkl_file << r.h << " " << r.k << " " << r.l << " " + << r.F << " " << r.sigmaF << " " + << r.I << " " << r.sigmaI + << "\n"; } - } - MmcifMetadata cif_meta; - - if (rotation_indexer_ret.has_value()) { - cif_meta.unit_cell = rotation_indexer_ret->lattice.GetUnitCell(); - } else if (experiment.GetUnitCell().has_value()) { - cif_meta.unit_cell = experiment.GetUnitCell().value(); - } - - if (scale_opts.space_group.has_value()) { - cif_meta.space_group_name = scale_opts.space_group->hm; - cif_meta.space_group_number = scale_opts.space_group->number; - } else if (auto sg = experiment.GetGemmiSpaceGroup(); sg.has_value()) { - cif_meta.space_group_name = sg->hm; - cif_meta.space_group_number = sg->number; - } - - cif_meta.detector_name = experiment.GetDetectorDescription(); - cif_meta.wavelength_A = experiment.GetWavelength_A(); - cif_meta.detector_distance_mm = experiment.GetDetectorDistance_mm(); - cif_meta.sample_temperature_K = experiment.GetSampleTemperature_K(); - cif_meta.sample_name = experiment.GetSampleName(); - cif_meta.data_block_name = output_prefix; - - cif_meta.beamline = experiment.GetInstrumentName(); - cif_meta.source = experiment.GetSourceName(); - - const std::string cif_path = output_prefix + "_amplitudes.cif"; - try { - WriteMmcifReflections(cif_path, fw, cif_meta); - logger.Info("Wrote mmCIF reflections to {}", cif_path); - } catch (const std::exception &e) { - logger.Error("Failed to write mmCIF: {}", e.what()); + hkl_file.close(); + logger.Info("Wrote {} reflections to {}", merge_result.merged.size(), hkl_path); } } + MmcifMetadata cif_meta; + + if (rotation_indexer_ret.has_value()) { + cif_meta.unit_cell = rotation_indexer_ret->lattice.GetUnitCell(); + } else if (experiment.GetUnitCell().has_value()) { + cif_meta.unit_cell = experiment.GetUnitCell().value(); + } + + auto sg = experiment.GetGemmiSpaceGroup(); + if (sg.has_value()) { + cif_meta.space_group_name = sg->hm; + cif_meta.space_group_number = sg->number; + } + + cif_meta.detector_name = experiment.GetDetectorDescription(); + cif_meta.wavelength_A = experiment.GetWavelength_A(); + cif_meta.detector_distance_mm = experiment.GetDetectorDistance_mm(); + cif_meta.sample_temperature_K = experiment.GetSampleTemperature_K(); + cif_meta.sample_name = experiment.GetSampleName(); + cif_meta.data_block_name = output_prefix; + + cif_meta.beamline = experiment.GetInstrumentName(); + cif_meta.source = experiment.GetSourceName(); + + const std::string cif_path = output_prefix + "_amplitudes.cif"; + try { + WriteMmcifReflections(cif_path, fw, cif_meta); + logger.Info("Wrote mmCIF reflections to {}", cif_path); + } catch (const std::exception &e) { + logger.Error("Failed to write mmCIF: {}", e.what()); + } } - } else { - logger.Warning("Scaling skipped — too few reflections accumulated (need >= 20)"); - logger.Info("Scaling wall-clock time: {:.2f} s", scale_time); } } @@ -758,17 +745,18 @@ int main(int argc, char **argv) { } auto image_mean_time = plots.GetMeanProcessingTime(); - logger.Info("Per-image time: (mean; microseconds): decompress {:.0f} preprocess {:.0f} azint {:.0f} spot finding {:.0f} indexing {:.0f} refinement {:.0f} indexing analysis {:.0f} prediction {:.0f} integration {:.0f} total {:.0f}", - image_mean_time.compression * 1e6, - image_mean_time.preprocessing * 1e6, - image_mean_time.azint * 1e6, - image_mean_time.spot_finding * 1e6, - image_mean_time.indexing * 1e6, - image_mean_time.refinement * 1e6, - image_mean_time.indexing_analysis * 1e6, - image_mean_time.bragg_prediction * 1e6, - image_mean_time.integration * 1e6, - image_mean_time.processing * 1e6); + logger.Info( + "Per-image time: (mean; microseconds): decompress {:.0f} preprocess {:.0f} azint {:.0f} spot finding {:.0f} indexing {:.0f} refinement {:.0f} indexing analysis {:.0f} prediction {:.0f} integration {:.0f} total {:.0f}", + image_mean_time.compression * 1e6, + image_mean_time.preprocessing * 1e6, + image_mean_time.azint * 1e6, + image_mean_time.spot_finding * 1e6, + image_mean_time.indexing * 1e6, + image_mean_time.refinement * 1e6, + image_mean_time.indexing_analysis * 1e6, + image_mean_time.bragg_prediction * 1e6, + image_mean_time.integration * 1e6, + image_mean_time.processing * 1e6); if (rotation_indexer_ret.has_value()) { auto vec0 = rotation_indexer_ret->lattice.Vec0(); -- 2.52.0 From e70b21f6de781a7530f52ee3000a72730111a23a Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Mon, 11 May 2026 08:29:41 +0200 Subject: [PATCH 028/132] HKLKey: Test properly, fix bug with merging Friedel pairs when no SG given --- image_analysis/scale_merge/HKLKey.cpp | 43 ++++++------- image_analysis/scale_merge/HKLKey.h | 13 ++++ image_analysis/scale_merge/Merge.cpp | 9 +-- image_analysis/scale_merge/ScaleOnTheFly.cpp | 7 ++- tests/CMakeLists.txt | 1 + tests/MergeScaleTest.cpp | 66 ++++++++++++++++++++ 6 files changed, 109 insertions(+), 30 deletions(-) create mode 100644 tests/MergeScaleTest.cpp diff --git a/image_analysis/scale_merge/HKLKey.cpp b/image_analysis/scale_merge/HKLKey.cpp index b8cc31cb..ffa28162 100644 --- a/image_analysis/scale_merge/HKLKey.cpp +++ b/image_analysis/scale_merge/HKLKey.cpp @@ -6,23 +6,32 @@ #include "HKLKey.h" #include "gemmi/symmetry.hpp" -HKLKey CanonicalHKL(int32_t h, int32_t k, int32_t l, bool merge_friedel, const std::optional &in_sg) { +HKLKeyGenerator::HKLKeyGenerator(bool merge_friedel, const std::optional &sg) +: merge_friedel(merge_friedel), sg(sg) {} + +HKLKey HKLKeyGenerator::operator()(const MergedReflection &r) const { + return operator()(r.h, r.k, r.l); +} + +HKLKey HKLKeyGenerator::operator()(const Reflection &r) const { + return operator()(r.h, r.k, r.l); +} + +HKLKey HKLKeyGenerator::operator()(int32_t h, int32_t k, int32_t l) const { HKLKey key{h, k, l, true}; - if (!in_sg.has_value()) { - if (!merge_friedel) { - const HKLKey neg{-h, -k, -l, true}; - if (std::tie(key.h, key.k, key.l) < std::tie(neg.h, neg.k, neg.l)) { - key.h = -key.h; - key.k = -key.k; - key.l = -key.l; - key.plus = false; - } + if (!sg.has_value()) { + const HKLKey neg{-h, -k, -l, true}; + if (std::tie(key.h, key.k, key.l) < std::tie(neg.h, neg.k, neg.l)) { + key.h = -key.h; + key.k = -key.k; + key.l = -key.l; + key.plus = merge_friedel; } } else { - gemmi::SpaceGroup sg = in_sg.value(); - const auto ops = sg.operations(); - const gemmi::ReciprocalAsu asu(&sg); + const auto sg_local = sg.value(); + const auto ops = sg_local.operations(); + const gemmi::ReciprocalAsu asu(&sg_local); const gemmi::Op::Miller in{h, k, l}; const auto [hkl, sign_plus] = asu.to_asu_sign(in, ops); @@ -35,14 +44,6 @@ HKLKey CanonicalHKL(int32_t h, int32_t k, int32_t l, bool merge_friedel, const s return key; } -HKLKey CanonicalHKL(const Reflection &r, bool merge_friedel, const std::optional &sg) { - return CanonicalHKL(r.h, r.k, r.l, merge_friedel, sg); -} - -HKLKey CanonicalHKL(const MergedReflection &r, bool merge_friedel, const std::optional &sg) { - return CanonicalHKL(r.h, r.k, r.l, merge_friedel, sg); -} - bool AcceptReflection(const Reflection &r, std::optional d_min_limit) { if (!std::isfinite(r.I)) return false; diff --git a/image_analysis/scale_merge/HKLKey.h b/image_analysis/scale_merge/HKLKey.h index c03bf092..93c5d505 100644 --- a/image_analysis/scale_merge/HKLKey.h +++ b/image_analysis/scale_merge/HKLKey.h @@ -16,6 +16,19 @@ struct HKLKey { bool operator<(const HKLKey &o) const { return std::tie(h, k, l, plus) < std::tie(o.h, o.k, o.l, o.plus); } + bool operator==(const HKLKey &o) const { + return h == o.h && k == o.k && l == o.l && plus == o.plus; + } +}; + +class HKLKeyGenerator { + bool merge_friedel; + std::optional sg; +public: + HKLKeyGenerator(bool merge_friedel, const std::optional &sg); + HKLKey operator()(const Reflection &r) const; + HKLKey operator()(const MergedReflection &r) const; + HKLKey operator()(int32_t h, int32_t k, int32_t l) const; }; HKLKey CanonicalHKL(const Reflection &r, bool merge_friedel, const std::optional &sg); diff --git a/image_analysis/scale_merge/Merge.cpp b/image_analysis/scale_merge/Merge.cpp index ddc56f16..cb68acd1 100644 --- a/image_analysis/scale_merge/Merge.cpp +++ b/image_analysis/scale_merge/Merge.cpp @@ -41,6 +41,8 @@ namespace { auto scaling_settings = x.GetScalingSettings(); + HKLKeyGenerator key_generator(scaling_settings.GetMergeFriedel(), x.GetGemmiSpaceGroup() ); + for (const auto &image: observations) { for (const auto &r: image) { @@ -49,12 +51,7 @@ namespace { if (!AcceptReflection(r, scaling_settings.GetHighResolutionLimit_A())) continue; - HKLKey key; - try { - key = CanonicalHKL(r, scaling_settings.GetMergeFriedel(), x.GetGemmiSpaceGroup()); - } catch (...) { - continue; - } + HKLKey key = key_generator(r); auto it = hkl_to_slot.find(key); if (it == hkl_to_slot.end()) { diff --git a/image_analysis/scale_merge/ScaleOnTheFly.cpp b/image_analysis/scale_merge/ScaleOnTheFly.cpp index bf40c0d4..91e47d6e 100644 --- a/image_analysis/scale_merge/ScaleOnTheFly.cpp +++ b/image_analysis/scale_merge/ScaleOnTheFly.cpp @@ -90,8 +90,9 @@ ScaleOnTheFly::ScaleOnTheFly(const std::vector &ref, const Dif model(x.GetPartialityModel()), s(x.GetScalingSettings()), rot_wedge_deg(GetWedge(x)) { + HKLKeyGenerator key_generator(s.GetMergeFriedel(), sg); for (const auto &r: ref) { - const auto key = CanonicalHKL(r, s.GetMergeFriedel(), sg); + const auto key = key_generator(r); reference_data[key] = r.I; } } @@ -126,9 +127,9 @@ ScaleOnTheFlyResult ScaleOnTheFly::Scale(std::vector &reflections, s }; size_t n_reflections = 0; - + HKLKeyGenerator key_generator(s.GetMergeFriedel(), sg); for (const auto &r: reflections) { - const HKLKey key = CanonicalHKL(r, s.GetMergeFriedel(), sg); + const HKLKey key = key_generator(r); if (!Accept(r)) continue; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index a0e1756b..1761d5ac 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -67,6 +67,7 @@ ADD_EXECUTABLE(jfjoch_test BraggIntegrate2DTest.cpp SearchSpaceGroupTest.cpp XDSPluginTest.cpp + MergeScaleTest.cpp ) target_link_libraries(jfjoch_test Catch2WithMain JFJochBroker JFJochReceiver JFJochReader JFJochWriter diff --git a/tests/MergeScaleTest.cpp b/tests/MergeScaleTest.cpp new file mode 100644 index 00000000..b821b8d8 --- /dev/null +++ b/tests/MergeScaleTest.cpp @@ -0,0 +1,66 @@ +// SPDX-FileCopyrightText: 2024 Filip Leonarski, Paul Scherrer Institute +// SPDX-License-Identifier: GPL-3.0-only + +#include +#include "../image_analysis/scale_merge/HKLKey.h" + +TEST_CASE("HKLKey_NoSG_noMergeFriedel") { + HKLKeyGenerator hkl_key_gen(false, std::nullopt); + CHECK(hkl_key_gen(-1, -2, -3) != hkl_key_gen(1,2,3)); + CHECK(hkl_key_gen(-1,-2,-3) == hkl_key_gen(-1,-2,-3)); + CHECK(hkl_key_gen(-1,-2,-3) != hkl_key_gen(1,-2,-3)); +} + +TEST_CASE("HKLKey_NoSG_MergeFriedel") { + HKLKeyGenerator hkl_key_gen(true, std::nullopt); + CHECK(hkl_key_gen(-1, -2, -3) == hkl_key_gen(1,2,3)); + CHECK(hkl_key_gen(-1,-2,-3) == hkl_key_gen(-1,-2,-3)); + CHECK(hkl_key_gen(-1,-2,-3) != hkl_key_gen(1,-2,-3)); +} + +TEST_CASE("HKLKey_SG1_MergeFriedel") { + HKLKeyGenerator hkl_key_gen(true, *gemmi::find_spacegroup_by_number(1)); + CHECK(hkl_key_gen(-1, -2, -3) == hkl_key_gen(1,2,3)); + CHECK(hkl_key_gen(-1,-2,-3) == hkl_key_gen(-1,-2,-3)); + CHECK(hkl_key_gen(-1,-2,-3) != hkl_key_gen(1,-2,-3)); +} + +TEST_CASE("HKLKey_SG1_NoMergeFriedel") { + HKLKeyGenerator hkl_key_gen(false, *gemmi::find_spacegroup_by_number(1)); + CHECK(hkl_key_gen(-1, -2, -3) != hkl_key_gen(1,2,3)); + CHECK(hkl_key_gen(-1,-2,-3) == hkl_key_gen(-1,-2,-3)); + CHECK(hkl_key_gen(-1,-2,-3) != hkl_key_gen(1,-2,-3)); +} + +TEST_CASE("HKLKey_SG96_MergeFriedel") { + HKLKeyGenerator hkl_key_gen(true, *gemmi::find_spacegroup_by_number(96)); + CHECK(hkl_key_gen(-1, -2, -3) == hkl_key_gen(1,2,3)); + CHECK(hkl_key_gen(-1,-2,-3) == hkl_key_gen(-1,-2,-3)); + CHECK(hkl_key_gen(1,2,3) == hkl_key_gen(-2,1,3)); + CHECK(hkl_key_gen(1,2,3) == hkl_key_gen(-1,-2,3)); + CHECK(hkl_key_gen(1,2,3) == hkl_key_gen(2,-1,3)); + CHECK(hkl_key_gen(1,2,3) == hkl_key_gen(1,-2,-3)); + CHECK(hkl_key_gen(1,2,3) == hkl_key_gen(-1,2,-3)); + CHECK(hkl_key_gen(1,2,3) == hkl_key_gen(2,1,-3)); + CHECK(hkl_key_gen(1,2,3) == hkl_key_gen(-2, -1, -3)); + + CHECK(hkl_key_gen(1,2,3) == hkl_key_gen(-2,-1,3)); + CHECK(hkl_key_gen(1,2,3) == hkl_key_gen(2, 1, 3)); +} + + +TEST_CASE("HKLKey_SG96_NoMergeFriedel") { + HKLKeyGenerator hkl_key_gen(false, *gemmi::find_spacegroup_by_number(96)); + CHECK(hkl_key_gen(-1, -2, -3) != hkl_key_gen(1,2,3)); + CHECK(hkl_key_gen(-1,-2,-3) == hkl_key_gen(-1,-2,-3)); + CHECK(hkl_key_gen(1,2,3) == hkl_key_gen(-2,1,3)); + CHECK(hkl_key_gen(1,2,3) == hkl_key_gen(-1,-2,3)); + CHECK(hkl_key_gen(1,2,3) == hkl_key_gen(2,-1,3)); + CHECK(hkl_key_gen(1,2,3) == hkl_key_gen(1,-2,-3)); + CHECK(hkl_key_gen(1,2,3) == hkl_key_gen(-1,2,-3)); + CHECK(hkl_key_gen(1,2,3) == hkl_key_gen(2,1,-3)); + CHECK(hkl_key_gen(1,2,3) == hkl_key_gen(-2, -1, -3)); + + CHECK(hkl_key_gen(1,2,3) != hkl_key_gen(-2,-1,3)); + CHECK(hkl_key_gen(1,2,3) != hkl_key_gen(2, 1, 3)); +} -- 2.52.0 From 486310fd87285566c4729ccc6d2eaf62f60223be Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Mon, 11 May 2026 10:10:11 +0200 Subject: [PATCH 029/132] jfjoch_process: Optimize design choices for scaling integration --- common/DiffractionExperiment.cpp | 22 ++++++++- common/DiffractionExperiment.h | 2 + common/ScalingSettings.cpp | 20 +++++++- common/ScalingSettings.h | 14 +++--- image_analysis/scale_merge/ScaleOnTheFly.cpp | 18 ++----- image_analysis/scale_merge/ScaleOnTheFly.h | 2 +- tools/jfjoch_process.cpp | 50 +++++++++++++++----- 7 files changed, 93 insertions(+), 35 deletions(-) diff --git a/common/DiffractionExperiment.cpp b/common/DiffractionExperiment.cpp index 645fcec1..c3c22788 100644 --- a/common/DiffractionExperiment.cpp +++ b/common/DiffractionExperiment.cpp @@ -1679,4 +1679,24 @@ PartialityModel DiffractionExperiment::GetPartialityModel() const { if (GetGoniometer().has_value()) return PartialityModel::Rotation; return PartialityModel::Unity; -} \ No newline at end of file +} + +std::optional DiffractionExperiment::GetRotationWedgeForScaling() const { + // Only makes sense for rotation partiality model + // If ScalingSettings set one, it is takes priority + // otherwise if this is really rotation scan, than actual rotation wedge is given + // otherwise return std::nullopt + if (GetPartialityModel() != PartialityModel::Rotation) + return std::nullopt; + if (scaling_settings.GetRotationWedgeForScaling().has_value()) + return scaling_settings.GetRotationWedgeForScaling(); + if (GetGoniometer().has_value()) + return GetGoniometer()->GetWedge_deg(); + return std::nullopt; +} + +bool DiffractionExperiment::GetRefineRotationWedgeInScaling() const { + if (GetRotationWedgeForScaling().has_value()) + return scaling_settings.GetRefineWedge(); + return false; +} diff --git a/common/DiffractionExperiment.h b/common/DiffractionExperiment.h index 6fcd0c36..95cc74c8 100644 --- a/common/DiffractionExperiment.h +++ b/common/DiffractionExperiment.h @@ -414,6 +414,8 @@ public: bool IsRotationIndexing() const; PartialityModel GetPartialityModel() const; + std::optional GetRotationWedgeForScaling() const; + bool GetRefineRotationWedgeInScaling() const; }; #endif //DIFFRACTIONEXPERIMENT_H diff --git a/common/ScalingSettings.cpp b/common/ScalingSettings.cpp index 372d6c75..7969c331 100644 --- a/common/ScalingSettings.cpp +++ b/common/ScalingSettings.cpp @@ -35,7 +35,7 @@ bool ScalingSettings::GetMergeFriedel() const { return merge_friedel; } -ScalingSettings &ScalingSettings::RefineWedge(bool input) { +ScalingSettings &ScalingSettings::RefineRotationWedge(bool input) { refine_wedge = input; return *this; } @@ -76,3 +76,21 @@ double ScalingSettings::GetMinWedge() const { double ScalingSettings::GetMaxWedge() const { return 10.0; } + +ScalingSettings &ScalingSettings::RotationWedgeForScaling(std::optional input) { + if (input) { + // TODO: Use fmt + if (input.value() < GetMinWedge() || input.value() > GetMaxWedge()) + throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, + "Wedge for scaling must be between " + std::to_string(GetMinWedge()) + + " and " + std::to_string(GetMaxWedge())); + } + wedge_for_scaling = input; + return *this; +} + +std::optional ScalingSettings::GetRotationWedgeForScaling() const { + return wedge_for_scaling; +} + + diff --git a/common/ScalingSettings.h b/common/ScalingSettings.h index f61c79b3..fae31415 100644 --- a/common/ScalingSettings.h +++ b/common/ScalingSettings.h @@ -19,11 +19,12 @@ class ScalingSettings { bool merge_friedel = true; std::optional high_resolution_limit_A; - + std::optional wedge_for_scaling; public: ScalingSettings& SetPartialityModel(PartialityModel mode); ScalingSettings& RefineB(bool input); - ScalingSettings& RefineWedge(bool input); + ScalingSettings& RefineRotationWedge(bool input); + ScalingSettings& RotationWedgeForScaling(std::optional input); ScalingSettings& MergeFriedel(bool input); ScalingSettings& HighResolutionLimit_A(double limit); @@ -33,11 +34,12 @@ public: [[nodiscard]] double GetMinB() const; [[nodiscard]] double GetMaxB() const; - double GetMinMosaicity() const; - double GetMaxMosaicity() const; + [[nodiscard]] double GetMinMosaicity() const; + [[nodiscard]] double GetMaxMosaicity() const; - double GetMinWedge() const; - double GetMaxWedge() const; + [[nodiscard]] double GetMinWedge() const; + [[nodiscard]] std::optional GetRotationWedgeForScaling() const; + [[nodiscard]] double GetMaxWedge() const; [[nodiscard]] bool GetMergeFriedel() const; diff --git a/image_analysis/scale_merge/ScaleOnTheFly.cpp b/image_analysis/scale_merge/ScaleOnTheFly.cpp index 91e47d6e..79676491 100644 --- a/image_analysis/scale_merge/ScaleOnTheFly.cpp +++ b/image_analysis/scale_merge/ScaleOnTheFly.cpp @@ -74,23 +74,15 @@ namespace { double partiality; }; - - namespace { - std::optional GetWedge(const DiffractionExperiment &x) { - auto gon = x.GetGoniometer(); - if (gon.has_value()) - return gon->GetWedge_deg(); - return std::nullopt; - } - } } ScaleOnTheFly::ScaleOnTheFly(const std::vector &ref, const DiffractionExperiment &x) : sg(x.GetGemmiSpaceGroup()), model(x.GetPartialityModel()), s(x.GetScalingSettings()), - rot_wedge_deg(GetWedge(x)) { - HKLKeyGenerator key_generator(s.GetMergeFriedel(), sg); + rot_wedge_deg(x.GetRotationWedgeForScaling()), + refine_rot_wedge(x.GetRefineRotationWedgeInScaling()) { + const HKLKeyGenerator key_generator(s.GetMergeFriedel(), sg); for (const auto &r: ref) { const auto key = key_generator(r); reference_data[key] = r.I; @@ -122,8 +114,8 @@ ScaleOnTheFlyResult ScaleOnTheFly::Scale(std::vector &reflections, s ScaleOnTheFlyResult result{ .B = 0.0, .G = 1.0, - .mos = mosaicity_deg.value_or(0.1), - .wedge = rot_wedge_deg.value_or(0.1) + .mos = mosaicity_deg.value_or(0.0), + .wedge = rot_wedge_deg.value_or(0.0) }; size_t n_reflections = 0; diff --git a/image_analysis/scale_merge/ScaleOnTheFly.h b/image_analysis/scale_merge/ScaleOnTheFly.h index 79ef6100..b037ed16 100644 --- a/image_analysis/scale_merge/ScaleOnTheFly.h +++ b/image_analysis/scale_merge/ScaleOnTheFly.h @@ -27,7 +27,7 @@ class ScaleOnTheFly { const PartialityModel model; const ScalingSettings s; const std::optional rot_wedge_deg; - + const bool refine_rot_wedge; std::map reference_data; bool Accept(const Reflection &r); diff --git a/tools/jfjoch_process.cpp b/tools/jfjoch_process.cpp index a6359a6f..8e93cbfe 100644 --- a/tools/jfjoch_process.cpp +++ b/tools/jfjoch_process.cpp @@ -37,8 +37,6 @@ void print_usage(Logger &logger) { logger.Info(" -s Start image number (default: 0)"); logger.Info(" -e End image number (default: all)"); logger.Info(" -v Verbose output"); - logger.Info(" -W Include images in the written HDF5 file (otherwise only analysis results are saved)"); - logger.Info(" -U Unmerged intensities are written to a text file"); logger.Info(""); logger.Info(" Spot finding"); @@ -62,6 +60,8 @@ void print_usage(Logger &logger) { logger.Info(" -M Scale and merge (refine mosaicity) and write scaled.hkl + image.dat"); logger.Info(" -P Partiality refinement fixed|rot|unity (default: fixed)"); logger.Info(" -A Anomalous mode (don't merge Friedel pairs)"); + logger.Info(" -B Refine per image B-factor"); + logger.Info(" -w Refine image wedge during scaling with starting wedge value"); } void trim_in_place(std::string &t) { @@ -143,10 +143,12 @@ int main(int argc, char **argv) { bool anomalous_mode = false; std::optional space_group_number; std::optional fixed_reference_unit_cell; - bool write_output = false; std::optional max_spot_count_override; float sigma_spot_finding = 3.0; int64_t photon_count_threshold_spot_finding = 10; + bool refine_bfactor = false; + bool refine_wedge = false; + std::optional wedge_for_scaling; IndexingAlgorithmEnum indexing_algorithm = IndexingAlgorithmEnum::Auto; @@ -161,7 +163,7 @@ int main(int argc, char **argv) { } int opt; - while ((opt = getopt(argc, argv, "o:N:s:e:vc:R::FX:xd:S:MP:AD:C:T:t:W")) != -1) { + while ((opt = getopt(argc, argv, "o:N:s:e:vc:R::FX:xd:S:MP:AD:C:T:t:Bw::")) != -1) { switch (opt) { case 'o': output_prefix = optarg; @@ -175,8 +177,13 @@ int main(int argc, char **argv) { case 'e': end_image = atoi(optarg); break; - case 'W': - write_output = true; + case 'B': + refine_bfactor = true; + break; + case 'w': + refine_wedge = true; + if (optarg) + wedge_for_scaling = std::stod(optarg); break; case 'v': verbose = true; @@ -263,8 +270,6 @@ int main(int argc, char **argv) { partiality_model = PartialityModel::Fixed; else if (strcmp(optarg, "rot") == 0) partiality_model = PartialityModel::Rotation; - else if (strcmp(optarg, "still") == 0) - partiality_model = PartialityModel::Still; else { logger.Error("Invalid partiality mode: {}", optarg); print_usage(logger); @@ -350,6 +355,11 @@ int main(int argc, char **argv) { if (d_min_scale_merge) scaling_settings.HighResolutionLimit_A(d_min_scale_merge.value()); scaling_settings.MergeFriedel(!anomalous_mode); + scaling_settings.RefineB(refine_bfactor); + if (wedge_for_scaling.has_value()) { + scaling_settings.RefineRotationWedge(true); + scaling_settings.RotationWedgeForScaling(wedge_for_scaling); + } experiment.ImportScalingSettings(scaling_settings); SpotFindingSettings spot_settings; @@ -385,7 +395,7 @@ int main(int argc, char **argv) { start_message.pixel_mask["default"] = pixel_mask.GetMask(experiment); start_message.max_spot_count = experiment.GetMaxSpotCount(); - start_message.write_images = write_output; + start_message.write_images = false; start_message.file_format = FileWriterFormat::NXmxIntegrated; start_message.master_suffix = "process"; @@ -656,10 +666,24 @@ int main(int argc, char **argv) { if (!img_file) { logger.Error("Cannot open {} for writing", img_path); } else { - img_file << "# image_id mosaicity_deg K\n"; - for (size_t i = 0; i < scale_result.mosaicity_deg.size(); ++i) { - img_file << i << " " << scale_result.mosaicity_deg[i] << " " << scale_result.image_scale_g[i] - << "\n"; + if (experiment.GetPartialityModel() == PartialityModel::Rotation) { + img_file << "# image_id G B mosaicity_deg wedge_deg\n"; + for (size_t i = 0; i < scale_result.image_scale_g.size(); ++i) { + img_file << i + << " " << scale_result.image_scale_g[i] + << " " << scale_result.image_bfactor_Ang2[i] + << " " << scale_result.mosaicity_deg[i] + << " " << scale_result.rotation_wedge_deg[i] + << "\n"; + } + } else { + img_file << "# image_id G B\n"; + for (size_t i = 0; i < scale_result.image_scale_g.size(); ++i) { + img_file << i + << " " << scale_result.image_scale_g[i] + << " " << scale_result.image_bfactor_Ang2[i] + << "\n"; + } } img_file.close(); } -- 2.52.0 From b53f0d6474a33e6fffe337554a3b5e5e9cc72d74 Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Mon, 11 May 2026 10:37:39 +0200 Subject: [PATCH 030/132] jfjoch_process: Print statistics moved to Merge.h/Merge.cpp and ScalingResult.cpp/ScalingResult.h --- common/ScalingSettings.cpp | 4 ++ common/ScalingSettings.h | 3 +- image_analysis/scale_merge/CMakeLists.txt | 3 +- image_analysis/scale_merge/Merge.cpp | 21 +++++++ image_analysis/scale_merge/Merge.h | 2 + image_analysis/scale_merge/ScaleOnTheFly.cpp | 15 +++-- image_analysis/scale_merge/ScalingResult.cpp | 38 ++++++++++++ image_analysis/scale_merge/ScalingResult.h | 9 +-- tools/jfjoch_process.cpp | 64 +++----------------- 9 files changed, 89 insertions(+), 70 deletions(-) create mode 100644 image_analysis/scale_merge/ScalingResult.cpp diff --git a/common/ScalingSettings.cpp b/common/ScalingSettings.cpp index 7969c331..04f0f8b1 100644 --- a/common/ScalingSettings.cpp +++ b/common/ScalingSettings.cpp @@ -77,6 +77,10 @@ double ScalingSettings::GetMaxWedge() const { return 10.0; } +double ScalingSettings::GetDefaultMosaicity() const { + return 0.1; +} + ScalingSettings &ScalingSettings::RotationWedgeForScaling(std::optional input) { if (input) { // TODO: Use fmt diff --git a/common/ScalingSettings.h b/common/ScalingSettings.h index fae31415..941b6dea 100644 --- a/common/ScalingSettings.h +++ b/common/ScalingSettings.h @@ -6,7 +6,7 @@ #include #include "JFJochException.h" -enum class PartialityModel { Fixed, Rotation, Unity, Still }; +enum class PartialityModel { Fixed, Rotation, Unity }; class ScalingSettings { std::optional partiality_mode; @@ -35,6 +35,7 @@ public: [[nodiscard]] double GetMaxB() const; [[nodiscard]] double GetMinMosaicity() const; + [[nodiscard]] double GetDefaultMosaicity() const; [[nodiscard]] double GetMaxMosaicity() const; [[nodiscard]] double GetMinWedge() const; diff --git a/image_analysis/scale_merge/CMakeLists.txt b/image_analysis/scale_merge/CMakeLists.txt index a4b0a686..2ad08bcf 100644 --- a/image_analysis/scale_merge/CMakeLists.txt +++ b/image_analysis/scale_merge/CMakeLists.txt @@ -7,5 +7,6 @@ ADD_LIBRARY(JFJochScaleMerge FrenchWilson.cpp FrenchWilson.h ScaleOnTheFly.h HKLKey.cpp HKLKey.h - ScalingResult.h) + ScalingResult.h + ScalingResult.cpp) TARGET_LINK_LIBRARIES(JFJochScaleMerge Ceres::ceres Eigen3::Eigen JFJochCommon) \ No newline at end of file diff --git a/image_analysis/scale_merge/Merge.cpp b/image_analysis/scale_merge/Merge.cpp index cb68acd1..f57d8b0d 100644 --- a/image_analysis/scale_merge/Merge.cpp +++ b/image_analysis/scale_merge/Merge.cpp @@ -293,3 +293,24 @@ MergeResult MergeReflections(const std::vector > &observ return out; } + +void MergeStatistics::Print(Logger &logger) const { + logger.Info(""); + logger.Info(" {:>8s} {:>8s} {:>8s} {:>8s}", "d_min", "N_obs", "N_uniq", ""); + logger.Info(" {:->8s} {:->8s} {:->8s} {:->8s}", "", "", "", "", "", ""); + for (const auto &sh: shells) { + if (sh.unique_reflections == 0) + continue; + logger.Info(" {:8.2f} {:8d} {:8d} {:8.1f}", + sh.d_min, sh.total_observations, sh.unique_reflections, + sh.mean_i_over_sigma); + } + { + const auto &ov = overall; + logger.Info(" {:->8s} {:->8s} {:->8s} {:->8s}", "", "", "", ""); + logger.Info(" {:>8s} {:8d} {:8d} {:8.1f}", + "Overall", ov.total_observations, ov.unique_reflections, + ov.mean_i_over_sigma); + } + logger.Info(""); +} diff --git a/image_analysis/scale_merge/Merge.h b/image_analysis/scale_merge/Merge.h index 7d249fcf..8af88753 100644 --- a/image_analysis/scale_merge/Merge.h +++ b/image_analysis/scale_merge/Merge.h @@ -7,6 +7,7 @@ #include #include +#include "../../common/Logger.h" #include "../../common/DiffractionExperiment.h" #include "../../common/Reflection.h" #include "gemmi/symmetry.hpp" @@ -26,6 +27,7 @@ struct MergeStatisticsShell { struct MergeStatistics { std::vector shells; MergeStatisticsShell overall; + void Print(Logger &logger) const; }; struct MergeResult { diff --git a/image_analysis/scale_merge/ScaleOnTheFly.cpp b/image_analysis/scale_merge/ScaleOnTheFly.cpp index 79676491..45f96f6a 100644 --- a/image_analysis/scale_merge/ScaleOnTheFly.cpp +++ b/image_analysis/scale_merge/ScaleOnTheFly.cpp @@ -96,8 +96,6 @@ bool ScaleOnTheFly::Accept(const Reflection &r) { switch (model) { case PartialityModel::Rotation: return std::isfinite(r.zeta) && r.zeta > 0.0f; - case PartialityModel::Still: - return std::isfinite(r.dist_ewald); case PartialityModel::Fixed: case PartialityModel::Unity: return true; @@ -113,11 +111,17 @@ ScaleOnTheFlyResult ScaleOnTheFly::Scale(std::vector &reflections, s ScaleOnTheFlyResult result{ .B = 0.0, - .G = 1.0, - .mos = mosaicity_deg.value_or(0.0), - .wedge = rot_wedge_deg.value_or(0.0) + .G = 1.0 }; + if (model == PartialityModel::Rotation) { + result.mos = mosaicity_deg.value_or(s.GetDefaultMosaicity()); + result.wedge = rot_wedge_deg.value_or(0.0); + } else { + result.mos = NAN; + result.wedge = NAN; + } + size_t n_reflections = 0; HKLKeyGenerator key_generator(s.GetMergeFriedel(), sg); for (const auto &r: reflections) { @@ -207,6 +211,7 @@ ScaleOnTheFlyResult ScaleOnTheFly::Scale(std::vector &reflections, s break; } default: + // For fixed partiality there is no need to change anything break; } r.scaling_correction = static_cast(r.rlp / (B_term * r.partiality * result.G)); diff --git a/image_analysis/scale_merge/ScalingResult.cpp b/image_analysis/scale_merge/ScalingResult.cpp new file mode 100644 index 00000000..8c683da9 --- /dev/null +++ b/image_analysis/scale_merge/ScalingResult.cpp @@ -0,0 +1,38 @@ +// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute +// SPDX-License-Identifier: GPL-3.0-only + +#include "ScalingResult.h" +#include +#include + +#include "../../common/JFJochException.h" + +ScalingResult::ScalingResult(size_t n) + : image_scale_g(n, NAN), + mosaicity_deg(n, NAN), + image_bfactor_Ang2(n, NAN), + rotation_wedge_deg(n, NAN) { +} + +void ScalingResult::SaveToFile(const std::string &filename) { + const std::string img_path = filename + "_image.dat"; + std::ofstream img_file(img_path); + if (!img_file) { + throw JFJochException(JFJochExceptionCategory::FileWriteError + , "Cannot open {} for writing"); + } + + img_file << "# image_id G B mosaicity_deg wedge_deg\n"; + + for (size_t i = 0; i < image_scale_g.size(); ++i) { + img_file << i + << " " << image_scale_g[i] + << " " << image_bfactor_Ang2[i] + << " " << mosaicity_deg[i] + << " " << rotation_wedge_deg[i] + << "\n"; + } + + img_file.close(); +} + diff --git a/image_analysis/scale_merge/ScalingResult.h b/image_analysis/scale_merge/ScalingResult.h index 6b97304e..880f5c55 100644 --- a/image_analysis/scale_merge/ScalingResult.h +++ b/image_analysis/scale_merge/ScalingResult.h @@ -4,6 +4,7 @@ #pragma once #include +#include struct ScalingResult { std::vector image_scale_g; @@ -11,10 +12,6 @@ struct ScalingResult { std::vector image_bfactor_Ang2; std::vector rotation_wedge_deg; - explicit ScalingResult(size_t n) - : image_scale_g(n, NAN), - mosaicity_deg(n, NAN), - image_bfactor_Ang2(n, NAN), - rotation_wedge_deg(n, NAN) { - } + explicit ScalingResult(size_t n); + void SaveToFile(const std::string &filename); }; diff --git a/tools/jfjoch_process.cpp b/tools/jfjoch_process.cpp index 8e93cbfe..20e67be5 100644 --- a/tools/jfjoch_process.cpp +++ b/tools/jfjoch_process.cpp @@ -581,7 +581,7 @@ int main(int argc, char **argv) { auto merge_result = indexer.Merge(); auto scale_end = std::chrono::steady_clock::now(); double scale_time = std::chrono::duration(scale_end - scale_start).count(); - /* + if (!fixed_space_group) { logger.Info("Searching for space group from P1-merged reflections ..."); @@ -608,7 +608,7 @@ int main(int argc, char **argv) { } } logger.Info(""); - + /* if (sg_search.best_space_group.has_value()) { logger.Info("Re-running scaling in detected space group {}", sg_search.best_space_group->short_name()); @@ -626,8 +626,8 @@ int main(int argc, char **argv) { } } else { logger.Warning("No space group accepted; keeping P1-merged result"); - } - } */ + } */ + } end_msg.image_scale_factor = scale_result.image_scale_g; @@ -635,59 +635,9 @@ int main(int argc, char **argv) { scale_time, merge_result.merged.size()); // Print resolution-shell statistics table - { - const auto &stats = merge_result.statistics; - logger.Info(""); - logger.Info(" {:>8s} {:>8s} {:>8s} {:>8s}", - "d_min", "N_obs", "N_uniq", ""); - logger.Info(" {:->8s} {:->8s} {:->8s} {:->8s}", - "", "", "", "", "", ""); - for (const auto &sh: stats.shells) { - if (sh.unique_reflections == 0) - continue; - logger.Info(" {:8.2f} {:8d} {:8d} {:8.1f}", - sh.d_min, sh.total_observations, sh.unique_reflections, - sh.mean_i_over_sigma); - } - { - const auto &ov = stats.overall; - logger.Info(" {:->8s} {:->8s} {:->8s} {:->8s}", - "", "", "", ""); - logger.Info(" {:>8s} {:8d} {:8d} {:8.1f}", - "Overall", ov.total_observations, ov.unique_reflections, - ov.mean_i_over_sigma); - } - logger.Info(""); - } - - { - const std::string img_path = output_prefix + "_image.dat"; - std::ofstream img_file(img_path); - if (!img_file) { - logger.Error("Cannot open {} for writing", img_path); - } else { - if (experiment.GetPartialityModel() == PartialityModel::Rotation) { - img_file << "# image_id G B mosaicity_deg wedge_deg\n"; - for (size_t i = 0; i < scale_result.image_scale_g.size(); ++i) { - img_file << i - << " " << scale_result.image_scale_g[i] - << " " << scale_result.image_bfactor_Ang2[i] - << " " << scale_result.mosaicity_deg[i] - << " " << scale_result.rotation_wedge_deg[i] - << "\n"; - } - } else { - img_file << "# image_id G B\n"; - for (size_t i = 0; i < scale_result.image_scale_g.size(); ++i) { - img_file << i - << " " << scale_result.image_scale_g[i] - << " " << scale_result.image_bfactor_Ang2[i] - << "\n"; - } - } - img_file.close(); - } - } + const auto &stats = merge_result.statistics; + stats.Print(logger); + scale_result.SaveToFile(output_prefix + "_scale.dat"); { FrenchWilsonOptions fw_opts; -- 2.52.0 From 9ec8ebcde8945556ec132836b218e8de9e3bb97c Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Mon, 11 May 2026 13:06:28 +0200 Subject: [PATCH 031/132] jfjoch_process: Add space group number handling --- tools/jfjoch_process.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/jfjoch_process.cpp b/tools/jfjoch_process.cpp index 20e67be5..396c1cb2 100644 --- a/tools/jfjoch_process.cpp +++ b/tools/jfjoch_process.cpp @@ -141,7 +141,7 @@ int main(int argc, char **argv) { bool refine_beam_center = true; bool run_scaling = false; bool anomalous_mode = false; - std::optional space_group_number; + std::optional space_group_number; std::optional fixed_reference_unit_cell; std::optional max_spot_count_override; float sigma_spot_finding = 3.0; @@ -328,6 +328,7 @@ int main(int argc, char **argv) { experiment.OverwriteExistingFiles(true); experiment.PolarizationFactor(0.99); experiment.SetFileWriterFormat(FileWriterFormat::NXmxLegacy); + experiment.SpaceGroupNumber(space_group_number); if (fixed_reference_unit_cell.has_value()) experiment.SetUnitCell(*fixed_reference_unit_cell); -- 2.52.0 From 4e6a6e2aa288b17352330db0a6b0daae2f06f81d Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Mon, 11 May 2026 13:11:42 +0200 Subject: [PATCH 032/132] jfjoch_process: Fixes to parameter logic --- tools/jfjoch_process.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tools/jfjoch_process.cpp b/tools/jfjoch_process.cpp index 396c1cb2..42ef6e45 100644 --- a/tools/jfjoch_process.cpp +++ b/tools/jfjoch_process.cpp @@ -228,7 +228,7 @@ int main(int argc, char **argv) { break; case 'D': d_min_scale_merge = atof(optarg); - logger.Info("High resolution limit for scaling/merging set to {:.2f} A", d_min_spot_finding); + logger.Info("High resolution limit for scaling/merging set to {:.2f} A", d_min_scale_merge.value()); break; case 'S': space_group_number = atoi(optarg); @@ -357,10 +357,10 @@ int main(int argc, char **argv) { scaling_settings.HighResolutionLimit_A(d_min_scale_merge.value()); scaling_settings.MergeFriedel(!anomalous_mode); scaling_settings.RefineB(refine_bfactor); - if (wedge_for_scaling.has_value()) { - scaling_settings.RefineRotationWedge(true); + scaling_settings.RefineRotationWedge(refine_wedge); + if (wedge_for_scaling.has_value()) scaling_settings.RotationWedgeForScaling(wedge_for_scaling); - } + experiment.ImportScalingSettings(scaling_settings); SpotFindingSettings spot_settings; @@ -491,7 +491,6 @@ int main(int argc, char **argv) { std::chrono::duration image_duration = image_end_time - image_start_time; msg.processing_time_s = image_duration.count(); - msg.original_number = msg.number; msg.run_number = experiment.GetRunNumber(); msg.run_name = experiment.GetRunName(); @@ -715,7 +714,7 @@ int main(int argc, char **argv) { logger.Info("Total throughput:{:.2f} MB/s", throughput_MBs); // Print extended stats similar to Receiver - if (!end_msg.indexing_rate.has_value()) { + if (end_msg.indexing_rate.has_value()) { logger.Info("Indexing rate: {:.2f}%", end_msg.indexing_rate.value() * 100.0); } -- 2.52.0 From aa347096d212d9b73ee01c2aceb3580570c85252 Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Mon, 11 May 2026 13:12:02 +0200 Subject: [PATCH 033/132] Merge: Remove unused statistics values (for now) --- image_analysis/scale_merge/Merge.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/image_analysis/scale_merge/Merge.h b/image_analysis/scale_merge/Merge.h index 8af88753..95908919 100644 --- a/image_analysis/scale_merge/Merge.h +++ b/image_analysis/scale_merge/Merge.h @@ -20,8 +20,6 @@ struct MergeStatisticsShell { int unique_reflections = 0; double rmeas = 0.0; double mean_i_over_sigma = 0.0; - double completeness = 0.0; - int possible_reflections = 0; }; struct MergeStatistics { -- 2.52.0 From c92d01222cc7d2501232ff2eb70004c7eaa11c4a Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Mon, 11 May 2026 13:15:52 +0200 Subject: [PATCH 034/132] ScaleOnTheFly: Calculate rotation partiality correctly --- image_analysis/scale_merge/ScaleOnTheFly.cpp | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/image_analysis/scale_merge/ScaleOnTheFly.cpp b/image_analysis/scale_merge/ScaleOnTheFly.cpp index 45f96f6a..77a0a613 100644 --- a/image_analysis/scale_merge/ScaleOnTheFly.cpp +++ b/image_analysis/scale_merge/ScaleOnTheFly.cpp @@ -13,6 +13,17 @@ namespace { return 1.0 / x; } + float RotationPartiality( double delta_phi_deg, + double zeta, + double mosaicity_deg, + double wedge_deg) { + const double half_wedge = wedge_deg / 2.0; + const double c1 = zeta / std::sqrt(2.0); + const double arg_plus = (delta_phi_deg + half_wedge) * c1 / mosaicity_deg; + const double arg_minus = (delta_phi_deg - half_wedge) * c1 / mosaicity_deg; + return static_cast((std::erf(arg_plus) - std::erf(arg_minus)) / 2.0); + } + class ScalingResidual { protected: const double Iobs; @@ -204,10 +215,8 @@ ScaleOnTheFlyResult ScaleOnTheFly::Scale(std::vector &reflections, s r.partiality = 1.0; break; case PartialityModel::Rotation: { - double partiality = 0.0; - ScalingRotationResidual res(r, 0, 0); - if (res(&result.G, &result.B, &result.mos, &result.wedge, &partiality)) - r.partiality = static_cast(partiality); + if (std::isfinite(r.delta_phi_deg) && std::isfinite(r.zeta) && result.mos > 1e-6) + r.partiality = RotationPartiality(r.delta_phi_deg, r.zeta, result.mos, result.wedge); break; } default: -- 2.52.0 From cecb3024e018be10302265880a1d1429815f650b Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Mon, 11 May 2026 13:17:10 +0200 Subject: [PATCH 035/132] ScaleOnTheFly: Fix B term --- image_analysis/scale_merge/ScaleOnTheFly.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/image_analysis/scale_merge/ScaleOnTheFly.cpp b/image_analysis/scale_merge/ScaleOnTheFly.cpp index 77a0a613..1d155883 100644 --- a/image_analysis/scale_merge/ScaleOnTheFly.cpp +++ b/image_analysis/scale_merge/ScaleOnTheFly.cpp @@ -37,7 +37,7 @@ namespace { Itrue(Itrue), weight(SafeInv(sigma, 1.0)), lp(SafeInv(r.rlp, 1.0)), - b_resolution_coeff(SafeInv(-r.d * r.d / 4.0, 0.0)) { + b_resolution_coeff(-SafeInv(4.0 * r.d * r.d, 0.0)) { } }; @@ -208,7 +208,7 @@ ScaleOnTheFlyResult ScaleOnTheFly::Scale(std::vector &reflections, s ceres::Solve(options, &problem, &summary); for (auto &r: reflections) { - const double B_term = exp(result.B * SafeInv(-r.d * r.d / 4.0, 0.0)); + const double B_term = exp(result.B * -SafeInv(4.0 * r.d * r.d, 0.0)); switch (model) { case PartialityModel::Unity: -- 2.52.0 From 0cf801c2d26dcf1df67799a43cb5371a6935ef4f Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Mon, 11 May 2026 13:18:49 +0200 Subject: [PATCH 036/132] Merge: Remove old statistics --- image_analysis/scale_merge/Merge.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/image_analysis/scale_merge/Merge.cpp b/image_analysis/scale_merge/Merge.cpp index f57d8b0d..9477504d 100644 --- a/image_analysis/scale_merge/Merge.cpp +++ b/image_analysis/scale_merge/Merge.cpp @@ -250,8 +250,6 @@ namespace { ss.mean_i_over_sigma = sa.n_i_over_sigma > 0 ? sa.sum_i_over_sigma / sa.n_i_over_sigma : 0.0; - ss.completeness = 0.0; - ss.possible_reflections = 0; } auto &overall = out.statistics.overall; @@ -276,8 +274,6 @@ namespace { overall.unique_reflections = static_cast(all_unique.size()); overall.rmeas = rmeas_den > 0.0 ? rmeas_num / rmeas_den : 0.0; overall.mean_i_over_sigma = n_i_over_sigma > 0 ? sum_i_over_sigma / n_i_over_sigma : 0.0; - overall.completeness = 0.0; - overall.possible_reflections = 0; } } -- 2.52.0 From b1fe201047d4aefbf549c963526365e5fa1bd3c9 Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Mon, 11 May 2026 13:25:02 +0200 Subject: [PATCH 037/132] Clean-up --- image_analysis/scale_merge/Merge.cpp | 2 +- image_analysis/scale_merge/ScaleOnTheFly.cpp | 32 +++++++++++++++----- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/image_analysis/scale_merge/Merge.cpp b/image_analysis/scale_merge/Merge.cpp index 9477504d..29939e5c 100644 --- a/image_analysis/scale_merge/Merge.cpp +++ b/image_analysis/scale_merge/Merge.cpp @@ -293,7 +293,7 @@ MergeResult MergeReflections(const std::vector > &observ void MergeStatistics::Print(Logger &logger) const { logger.Info(""); logger.Info(" {:>8s} {:>8s} {:>8s} {:>8s}", "d_min", "N_obs", "N_uniq", ""); - logger.Info(" {:->8s} {:->8s} {:->8s} {:->8s}", "", "", "", "", "", ""); + logger.Info(" {:->8s} {:->8s} {:->8s} {:->8s}", "", "", "", ""); for (const auto &sh: shells) { if (sh.unique_reflections == 0) continue; diff --git a/image_analysis/scale_merge/ScaleOnTheFly.cpp b/image_analysis/scale_merge/ScaleOnTheFly.cpp index 1d155883..99c29b7b 100644 --- a/image_analysis/scale_merge/ScaleOnTheFly.cpp +++ b/image_analysis/scale_merge/ScaleOnTheFly.cpp @@ -58,11 +58,11 @@ namespace { return false; const T half_wedge = wedge[0] / T(2.0); - const T arg_plus = T(delta_phi_deg + half_wedge) * T(c1) / mosaicity[0]; - const T arg_minus = T(delta_phi_deg - half_wedge) * T(c1) / mosaicity[0]; + const T arg_plus = (T(delta_phi_deg) + half_wedge) * T(c1) / mosaicity[0]; + const T arg_minus = (T(delta_phi_deg) - half_wedge) * T(c1) / mosaicity[0]; const T partiality = (ceres::erf(arg_plus) - ceres::erf(arg_minus)) / T(2.0); const T B_term = ceres::exp(B[0] * T(b_resolution_coeff)); - residual[0] = (G[0] * partiality * B_term * T(lp) * Itrue - T(Iobs)) * T(weight); + residual[0] = (G[0] * partiality * B_term * T(lp) * T(Itrue) - T(Iobs)) * T(weight); return true; } @@ -126,7 +126,10 @@ ScaleOnTheFlyResult ScaleOnTheFly::Scale(std::vector &reflections, s }; if (model == PartialityModel::Rotation) { - result.mos = mosaicity_deg.value_or(s.GetDefaultMosaicity()); + if (mosaicity_deg && std::isfinite(*mosaicity_deg) && *mosaicity_deg > 0.0) + result.mos = *mosaicity_deg; + else + result.mos = s.GetDefaultMosaicity(); result.wedge = rot_wedge_deg.value_or(0.0); } else { result.mos = NAN; @@ -189,7 +192,7 @@ ScaleOnTheFlyResult ScaleOnTheFly::Scale(std::vector &reflections, s } if (model == PartialityModel::Rotation) { - if (s.GetRefineWedge()) { + if (refine_rot_wedge) { problem.SetParameterLowerBound(&result.wedge, 0, s.GetMinWedge()); problem.SetParameterUpperBound(&result.wedge, 0, s.GetMaxWedge()); } else { @@ -223,7 +226,14 @@ ScaleOnTheFlyResult ScaleOnTheFly::Scale(std::vector &reflections, s // For fixed partiality there is no need to change anything break; } - r.scaling_correction = static_cast(r.rlp / (B_term * r.partiality * result.G)); + const double denom = B_term * r.partiality * result.G; + if (std::isfinite(r.rlp) && + std::isfinite(denom) && + denom > 0.0) { + r.scaling_correction = static_cast(r.rlp / denom); + } else { + r.scaling_correction = NAN; + } } auto end = std::chrono::steady_clock::now(); @@ -242,7 +252,10 @@ ScalingResult ScaleOnTheFly::Scale(std::vector > &reflec if (nthreads <= 1) { for (int i = 0; i < reflections.size(); i++) { std::optional mos_val; - if (model == PartialityModel::Rotation && mosaicity.size() > i) + if (model == PartialityModel::Rotation + && mosaicity.size() > i + && std::isfinite(mosaicity[i]) + && mosaicity[i] > 0.0) mos_val = mosaicity[i]; auto local_result = Scale(reflections[i], mos_val); @@ -262,7 +275,10 @@ ScalingResult ScaleOnTheFly::Scale(std::vector > &reflec size_t i = curr_image.fetch_add(1); while (i < reflections.size()) { std::optional mos_val; - if (model == PartialityModel::Rotation && mosaicity.size() > i) + if (model == PartialityModel::Rotation + && mosaicity.size() > i + && std::isfinite(mosaicity[i]) + && mosaicity[i] > 0.0) mos_val = mosaicity[i]; auto local_result = Scale(reflections[i], mos_val); -- 2.52.0 From a468e462229c760836bcc7d3eedf3d397ab0f96d Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Mon, 11 May 2026 13:56:05 +0200 Subject: [PATCH 038/132] Merge: Remove scaling correction for now (need to think about it) --- image_analysis/scale_merge/Merge.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/image_analysis/scale_merge/Merge.cpp b/image_analysis/scale_merge/Merge.cpp index 29939e5c..186d4c1e 100644 --- a/image_analysis/scale_merge/Merge.cpp +++ b/image_analysis/scale_merge/Merge.cpp @@ -119,8 +119,8 @@ namespace { if (!std::isfinite(I_corr) || !std::isfinite(sigma_corr) || sigma_corr <= 0.0) continue; - // Extra factor o.r->scaling_correction down-weights weak images / low partiality observations. - const double w = o.r->scaling_correction / (sigma_corr * sigma_corr); + // TODO: Figure out right way to handle this + const double w = 1.0 / (sigma_corr * sigma_corr); auto &a = acc[o.hkl]; a.sum_wI += w * I_corr; -- 2.52.0 From b7627b5a11107137b1244cb9d6eab1bccb19d89d Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Mon, 11 May 2026 13:59:16 +0200 Subject: [PATCH 039/132] jfjoch_process: Multiple scaling iterations --- tools/jfjoch_process.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/tools/jfjoch_process.cpp b/tools/jfjoch_process.cpp index 42ef6e45..639d386b 100644 --- a/tools/jfjoch_process.cpp +++ b/tools/jfjoch_process.cpp @@ -577,8 +577,20 @@ int main(int argc, char **argv) { const bool fixed_space_group = space_group || experiment.GetGemmiSpaceGroup().has_value(); auto scale_start = std::chrono::steady_clock::now(); - auto scale_result = indexer.ScaleAllImages(); + for (int i = 0; i < 3; i++) { + auto iter_start = std::chrono::steady_clock::now(); + auto scale_result = indexer.ScaleAllImages(); + end_msg.image_scale_factor = scale_result.image_scale_g; + + scale_result.SaveToFile(output_prefix + "_iter" + std::to_string(i) + "_scale.dat"); + + auto iter_end = std::chrono::steady_clock::now(); + double iter_time = std::chrono::duration(iter_end - iter_start).count(); + logger.Info("Scaling iteration {} took {:.3f} seconds", i, iter_time); + } auto merge_result = indexer.Merge(); + + auto scale_end = std::chrono::steady_clock::now(); double scale_time = std::chrono::duration(scale_end - scale_start).count(); @@ -629,7 +641,6 @@ int main(int argc, char **argv) { } */ } - end_msg.image_scale_factor = scale_result.image_scale_g; logger.Info("Scaling completed in {:.2f} s ({} unique reflections)", scale_time, merge_result.merged.size()); @@ -637,7 +648,6 @@ int main(int argc, char **argv) { // Print resolution-shell statistics table const auto &stats = merge_result.statistics; stats.Print(logger); - scale_result.SaveToFile(output_prefix + "_scale.dat"); { FrenchWilsonOptions fw_opts; -- 2.52.0 From e45d1ff4d32efb4080a839d0e0784f2ce88a94ab Mon Sep 17 00:00:00 2001 From: takaba_k Date: Tue, 12 May 2026 16:04:45 +0200 Subject: [PATCH 040/132] jfjoch_process: removed geometry restriction option --- image_analysis/IndexAndRefine.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/image_analysis/IndexAndRefine.cpp b/image_analysis/IndexAndRefine.cpp index 9f797144..253d338c 100644 --- a/image_analysis/IndexAndRefine.cpp +++ b/image_analysis/IndexAndRefine.cpp @@ -126,11 +126,7 @@ void IndexAndRefine::RefineGeometryIfNeeded(DataMessage &msg, IndexAndRefine::In case GeomRefinementAlgorithmEnum::None: break; case GeomRefinementAlgorithmEnum::BeamCenter: - if (experiment.IsRotationIndexing()) { - XtalOptimizerRotationOnly(data, msg.spots, 0.2); - XtalOptimizerRotationOnly(data, msg.spots, 0.1); - XtalOptimizerRotationOnly(data, msg.spots, 0.05); - } else if (XtalOptimizer(data, msg.spots)) { + if (XtalOptimizer(data, msg.spots)) { outcome.experiment.BeamX_pxl(data.geom.GetBeamX_pxl()) .BeamY_pxl(data.geom.GetBeamY_pxl()); outcome.beam_center_updated = true; -- 2.52.0 From 2fa9293fdadbdc94543df9dd768156feeb76ec96 Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Tue, 12 May 2026 19:38:15 +0200 Subject: [PATCH 041/132] HKLKeyGenerator: Make HKLKeyGenerator having already prebuilt gemmi data structures --- image_analysis/scale_merge/HKLKey.cpp | 18 +++++++++++------- image_analysis/scale_merge/HKLKey.h | 8 ++++++-- image_analysis/scale_merge/Merge.cpp | 2 +- image_analysis/scale_merge/ScaleOnTheFly.cpp | 11 +++++------ image_analysis/scale_merge/ScaleOnTheFly.h | 1 + tests/MergeScaleTest.cpp | 4 ++-- 6 files changed, 26 insertions(+), 18 deletions(-) diff --git a/image_analysis/scale_merge/HKLKey.cpp b/image_analysis/scale_merge/HKLKey.cpp index ffa28162..3f05db05 100644 --- a/image_analysis/scale_merge/HKLKey.cpp +++ b/image_analysis/scale_merge/HKLKey.cpp @@ -6,8 +6,16 @@ #include "HKLKey.h" #include "gemmi/symmetry.hpp" -HKLKeyGenerator::HKLKeyGenerator(bool merge_friedel, const std::optional &sg) -: merge_friedel(merge_friedel), sg(sg) {} +HKLKeyGenerator::HKLKeyGenerator(bool merge_friedel, int32_t space_group_number) + : HKLKeyGenerator(merge_friedel, *gemmi::find_spacegroup_by_number(space_group_number)) { +} + +HKLKeyGenerator::HKLKeyGenerator(bool merge_friedel, const gemmi::SpaceGroup &sg) + : merge_friedel(merge_friedel), + sg(sg), + ops(sg.operations()), + asu(&sg) { +} HKLKey HKLKeyGenerator::operator()(const MergedReflection &r) const { return operator()(r.h, r.k, r.l); @@ -20,7 +28,7 @@ HKLKey HKLKeyGenerator::operator()(const Reflection &r) const { HKLKey HKLKeyGenerator::operator()(int32_t h, int32_t k, int32_t l) const { HKLKey key{h, k, l, true}; - if (!sg.has_value()) { + if (sg.number == 1) { const HKLKey neg{-h, -k, -l, true}; if (std::tie(key.h, key.k, key.l) < std::tie(neg.h, neg.k, neg.l)) { key.h = -key.h; @@ -29,10 +37,6 @@ HKLKey HKLKeyGenerator::operator()(int32_t h, int32_t k, int32_t l) const { key.plus = merge_friedel; } } else { - const auto sg_local = sg.value(); - const auto ops = sg_local.operations(); - const gemmi::ReciprocalAsu asu(&sg_local); - const gemmi::Op::Miller in{h, k, l}; const auto [hkl, sign_plus] = asu.to_asu_sign(in, ops); diff --git a/image_analysis/scale_merge/HKLKey.h b/image_analysis/scale_merge/HKLKey.h index 93c5d505..8084d8e9 100644 --- a/image_analysis/scale_merge/HKLKey.h +++ b/image_analysis/scale_merge/HKLKey.h @@ -23,9 +23,13 @@ struct HKLKey { class HKLKeyGenerator { bool merge_friedel; - std::optional sg; + gemmi::SpaceGroup sg; + gemmi::GroupOps ops; + gemmi::ReciprocalAsu asu; + public: - HKLKeyGenerator(bool merge_friedel, const std::optional &sg); + HKLKeyGenerator(bool merge_friedel, int32_t space_group_number); + HKLKeyGenerator(bool merge_friedel, const gemmi::SpaceGroup &sg); HKLKey operator()(const Reflection &r) const; HKLKey operator()(const MergedReflection &r) const; HKLKey operator()(int32_t h, int32_t k, int32_t l) const; diff --git a/image_analysis/scale_merge/Merge.cpp b/image_analysis/scale_merge/Merge.cpp index 186d4c1e..9f049362 100644 --- a/image_analysis/scale_merge/Merge.cpp +++ b/image_analysis/scale_merge/Merge.cpp @@ -41,7 +41,7 @@ namespace { auto scaling_settings = x.GetScalingSettings(); - HKLKeyGenerator key_generator(scaling_settings.GetMergeFriedel(), x.GetGemmiSpaceGroup() ); + HKLKeyGenerator key_generator(scaling_settings.GetMergeFriedel(), x.GetSpaceGroupNumber().value_or(1) ); for (const auto &image: observations) { for (const auto &r: image) { diff --git a/image_analysis/scale_merge/ScaleOnTheFly.cpp b/image_analysis/scale_merge/ScaleOnTheFly.cpp index 99c29b7b..d2057fbe 100644 --- a/image_analysis/scale_merge/ScaleOnTheFly.cpp +++ b/image_analysis/scale_merge/ScaleOnTheFly.cpp @@ -92,10 +92,10 @@ ScaleOnTheFly::ScaleOnTheFly(const std::vector &ref, const Dif model(x.GetPartialityModel()), s(x.GetScalingSettings()), rot_wedge_deg(x.GetRotationWedgeForScaling()), - refine_rot_wedge(x.GetRefineRotationWedgeInScaling()) { - const HKLKeyGenerator key_generator(s.GetMergeFriedel(), sg); + refine_rot_wedge(x.GetRefineRotationWedgeInScaling()), + hkl_key_generator(s.GetMergeFriedel(), x.GetSpaceGroupNumber().value_or(1)) { for (const auto &r: ref) { - const auto key = key_generator(r); + const auto key = hkl_key_generator(r); reference_data[key] = r.I; } } @@ -137,13 +137,12 @@ ScaleOnTheFlyResult ScaleOnTheFly::Scale(std::vector &reflections, s } size_t n_reflections = 0; - HKLKeyGenerator key_generator(s.GetMergeFriedel(), sg); for (const auto &r: reflections) { - const HKLKey key = key_generator(r); - if (!Accept(r)) continue; + const HKLKey key = hkl_key_generator(r); + if (!reference_data.contains(key)) continue; diff --git a/image_analysis/scale_merge/ScaleOnTheFly.h b/image_analysis/scale_merge/ScaleOnTheFly.h index b037ed16..c491c4a5 100644 --- a/image_analysis/scale_merge/ScaleOnTheFly.h +++ b/image_analysis/scale_merge/ScaleOnTheFly.h @@ -28,6 +28,7 @@ class ScaleOnTheFly { const ScalingSettings s; const std::optional rot_wedge_deg; const bool refine_rot_wedge; + const HKLKeyGenerator hkl_key_generator; std::map reference_data; bool Accept(const Reflection &r); diff --git a/tests/MergeScaleTest.cpp b/tests/MergeScaleTest.cpp index b821b8d8..534871d7 100644 --- a/tests/MergeScaleTest.cpp +++ b/tests/MergeScaleTest.cpp @@ -5,14 +5,14 @@ #include "../image_analysis/scale_merge/HKLKey.h" TEST_CASE("HKLKey_NoSG_noMergeFriedel") { - HKLKeyGenerator hkl_key_gen(false, std::nullopt); + HKLKeyGenerator hkl_key_gen(false, 1); CHECK(hkl_key_gen(-1, -2, -3) != hkl_key_gen(1,2,3)); CHECK(hkl_key_gen(-1,-2,-3) == hkl_key_gen(-1,-2,-3)); CHECK(hkl_key_gen(-1,-2,-3) != hkl_key_gen(1,-2,-3)); } TEST_CASE("HKLKey_NoSG_MergeFriedel") { - HKLKeyGenerator hkl_key_gen(true, std::nullopt); + HKLKeyGenerator hkl_key_gen(true, 1); CHECK(hkl_key_gen(-1, -2, -3) == hkl_key_gen(1,2,3)); CHECK(hkl_key_gen(-1,-2,-3) == hkl_key_gen(-1,-2,-3)); CHECK(hkl_key_gen(-1,-2,-3) != hkl_key_gen(1,-2,-3)); -- 2.52.0 From 078c871ec15fec7898ddb660938b78641f76c6da Mon Sep 17 00:00:00 2001 From: takaba_k Date: Tue, 12 May 2026 21:44:12 +0200 Subject: [PATCH 042/132] jfjoch_process: Fix reading missing metadata --- tools/jfjoch_process.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/jfjoch_process.cpp b/tools/jfjoch_process.cpp index 639d386b..ef9d1861 100644 --- a/tools/jfjoch_process.cpp +++ b/tools/jfjoch_process.cpp @@ -473,7 +473,7 @@ int main(int argc, char **argv) { msg.image = img->image; msg.number = current_idx_offset; msg.original_number = image_idx; - msg.image_collection_efficiency = dataset->efficiency[image_idx]; + if (dataset->efficiency.size() > image_idx) msg.image_collection_efficiency = dataset->efficiency[image_idx]; total_uncompressed_bytes += msg.image.GetUncompressedSize(); -- 2.52.0 From c55e91ccfa801311e2d4b9e9ab4faf133d3faaaf Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Wed, 13 May 2026 07:48:17 +0200 Subject: [PATCH 043/132] Merge: Simplify (but not yet gaining performance) --- image_analysis/scale_merge/Merge.cpp | 63 ++++++++++------------------ 1 file changed, 22 insertions(+), 41 deletions(-) diff --git a/image_analysis/scale_merge/Merge.cpp b/image_analysis/scale_merge/Merge.cpp index 9f049362..e5ef98b1 100644 --- a/image_analysis/scale_merge/Merge.cpp +++ b/image_analysis/scale_merge/Merge.cpp @@ -71,46 +71,15 @@ namespace { return out; } - MergeResult InitResult(const std::vector &slot_to_hkl, - const std::vector &obs) { - MergeResult out; - out.merged.resize(slot_to_hkl.size()); - - for (int i = 0; i < static_cast(slot_to_hkl.size()); ++i) { - out.merged[i].h = slot_to_hkl[i].h; - out.merged[i].k = slot_to_hkl[i].k; - out.merged[i].l = slot_to_hkl[i].l; - out.merged[i].I = 0.0; - out.merged[i].sigma = 0.0; - out.merged[i].d = 0.0; - } - - std::vector> d_values(slot_to_hkl.size()); - for (const auto &o: obs) { - if (std::isfinite(o.r->d) && o.r->d > 0.0f) - d_values[o.hkl].push_back(o.r->d); - } - - for (int h = 0; h < static_cast(d_values.size()); ++h) { - auto &v = d_values[h]; - if (v.empty()) - continue; - - std::nth_element(v.begin(), v.begin() + static_cast(v.size() / 2), v.end()); - out.merged[h].d = v[v.size() / 2]; - } - - return out; - } - - void Merge(size_t nhkl, MergeResult &out, const std::vector &obs) { + MergeResult Merge(const std::vector &slot_to_hkl, const std::vector &obs) { struct Accum { double sum_wI = 0.0; double sum_w = 0.0; double sum_wsigma2 = 0.0; + std::vector d_values; }; - std::vector acc(nhkl); + std::vector acc(slot_to_hkl.size()); for (const auto &o: obs) { const double I_corr = static_cast(o.r->I) * o.r->scaling_correction; @@ -126,16 +95,31 @@ namespace { a.sum_wI += w * I_corr; a.sum_w += w; a.sum_wsigma2 += w * w * sigma_corr * sigma_corr; + if (std::isfinite(o.r->d) && o.r->d > 0.0f) + a.d_values.push_back(o.r->d); + } - for (int h = 0; h < static_cast(nhkl); ++h) { - const auto &a = acc[h]; + MergeResult out; + out.merged.resize(slot_to_hkl.size()); + + for (int h = 0; h < static_cast(slot_to_hkl.size()); ++h) { + auto &a = acc[h]; if (a.sum_w <= 0.0) continue; - + out.merged[h].h = slot_to_hkl[h].h; + out.merged[h].k = slot_to_hkl[h].k; + out.merged[h].l = slot_to_hkl[h].l; out.merged[h].I = a.sum_wI / a.sum_w; out.merged[h].sigma = std::sqrt(a.sum_wsigma2) / a.sum_w; + if (a.d_values.empty()) + continue; + + std::ranges::nth_element(a.d_values, + a.d_values.begin() + static_cast(a.d_values.size() / 2)); + out.merged[h].d = a.d_values[a.d_values.size() / 2]; } + return out; } void Stats(const DiffractionExperiment &x, MergeResult &out, const std::vector &obs) { @@ -281,10 +265,7 @@ MergeResult MergeReflections(const std::vector > &observ const DiffractionExperiment &x) { std::vector slot_to_hkl; auto obs = BuildObservations(observations, x, slot_to_hkl); - - auto out = InitResult(slot_to_hkl, obs); - - Merge(slot_to_hkl.size(), out, obs); + auto out = Merge(slot_to_hkl, obs); Stats(x, out, obs); return out; -- 2.52.0 From f0601289d982cc5e7172ca0499bf76eeea0e9c7d Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Wed, 13 May 2026 07:49:59 +0200 Subject: [PATCH 044/132] Report merge time --- tools/jfjoch_process.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/tools/jfjoch_process.cpp b/tools/jfjoch_process.cpp index 639d386b..645b1a62 100644 --- a/tools/jfjoch_process.cpp +++ b/tools/jfjoch_process.cpp @@ -588,12 +588,19 @@ int main(int argc, char **argv) { double iter_time = std::chrono::duration(iter_end - iter_start).count(); logger.Info("Scaling iteration {} took {:.3f} seconds", i, iter_time); } - auto merge_result = indexer.Merge(); - - auto scale_end = std::chrono::steady_clock::now(); double scale_time = std::chrono::duration(scale_end - scale_start).count(); + auto merge_start = std::chrono::steady_clock::now(); + auto merge_result = indexer.Merge(); + auto merge_end = std::chrono::steady_clock::now(); + double merge_time = std::chrono::duration(merge_end - merge_start).count(); + + logger.Info("Scaling completed in {:.2f} s", scale_time); + logger.Info("Merge completed in {:.2f} s ({} unique reflections)", merge_time, + merge_result.merged.size()); + + if (!fixed_space_group) { logger.Info("Searching for space group from P1-merged reflections ..."); @@ -642,8 +649,6 @@ int main(int argc, char **argv) { } - logger.Info("Scaling completed in {:.2f} s ({} unique reflections)", - scale_time, merge_result.merged.size()); // Print resolution-shell statistics table const auto &stats = merge_result.statistics; -- 2.52.0 From 02327bf610160708f40c1090f5d27361029258fe Mon Sep 17 00:00:00 2001 From: Filip Leonarski Date: Wed, 13 May 2026 12:20:10 +0200 Subject: [PATCH 045/132] BraggIntegrate2D: For very large sigma's, use 2% of I as sigma (if larger than sqrt(I)) --- common/BraggIntegrationSettings.cpp | 4 ++++ common/BraggIntegrationSettings.h | 4 ++++ .../bragg_integration/BraggIntegrate2D.cpp | 21 ++++++++++++------- 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/common/BraggIntegrationSettings.cpp b/common/BraggIntegrationSettings.cpp index a0d4e927..3fe0a861 100644 --- a/common/BraggIntegrationSettings.cpp +++ b/common/BraggIntegrationSettings.cpp @@ -83,3 +83,7 @@ float BraggIntegrationSettings::GetR3() const { float BraggIntegrationSettings::GetDMinLimit_A() const { return d_min_limit_A; } + +float BraggIntegrationSettings::GetMinimumSigmaInRegardsToI() const { + return minimum_sigma_in_regards_to_i; +} diff --git a/common/BraggIntegrationSettings.h b/common/BraggIntegrationSettings.h index 212799e1..151f4eab 100644 --- a/common/BraggIntegrationSettings.h +++ b/common/BraggIntegrationSettings.h @@ -12,6 +12,8 @@ class BraggIntegrationSettings { float r_3 = 8; float d_min_limit_A = 1.0; std::optional fixed_profile_radius; + float minimum_sigma_in_regards_to_i = 0.02; + public: BraggIntegrationSettings& R1(float input); BraggIntegrationSettings& R2(float input); @@ -25,6 +27,8 @@ public: [[nodiscard]] float GetR3() const; [[nodiscard]] std::optional GetFixedProfileRadius_recipA() const; [[nodiscard]] float GetDMinLimit_A() const; + + [[nodiscard]] float GetMinimumSigmaInRegardsToI() const; }; diff --git a/image_analysis/bragg_integration/BraggIntegrate2D.cpp b/image_analysis/bragg_integration/BraggIntegrate2D.cpp index dba2d7c2..2b6017c8 100644 --- a/image_analysis/bragg_integration/BraggIntegrate2D.cpp +++ b/image_analysis/bragg_integration/BraggIntegrate2D.cpp @@ -73,7 +73,8 @@ template void IntegrateReflection(Reflection &r, const T *image, const std::vector &reflection_mask, size_t xpixel, size_t ypixel, int64_t special_value, int64_t saturation, - float r_3, float r_1_sq, float r_2_sq, float r_3_sq) { + float r_3, float r_1_sq, float r_2_sq, float r_3_sq, + float minimum_sigma_in_regards_to_i) { int64_t x0 = std::floor(r.predicted_x - r_3 - 1.0); int64_t x1 = std::ceil(r.predicted_x + r_3 + 1.0); @@ -119,15 +120,20 @@ void IntegrateReflection(Reflection &r, const T *image, const std::vector 5)) { r.bkg = Median(bkg_values); r.I = static_cast(I_sum) - static_cast(I_npixel_integrated) * r.bkg; - // minimum sigma is 1! - if (I_sum >= 1) - r.sigma = std::sqrt(static_cast(I_sum)); - else - r.sigma = 1; + + // sigma is max of the: + // - 1 (for very small numbers) + // - Poisson noise (sqrt(I_sum)) (for in between) + // - minimum_sigma_in_regards_to_i of Intensity (for very large numbers) + r.sigma = 1.0; + r.sigma = std::max(r.sigma, r.I * minimum_sigma_in_regards_to_i); + if (I_sum > 0) + r.sigma = std::max(r.sigma, std::sqrt(static_cast(I_sum))); r.observed = true; } else { r.I = 0; r.bkg = 0; + r.sigma = NAN; r.observed = false; } } @@ -156,6 +162,7 @@ std::vector IntegrateInternal(const DiffractionExperiment &experimen const float r_2_sq = settings.GetR2() * settings.GetR2(); const float r_3_sq = settings.GetR3() * settings.GetR3(); + const float minimum_sigma_in_regards_to_i = settings.GetMinimumSigmaInRegardsToI(); const auto reflection_mask = BuildReflectionMask(predicted, npredicted, image.GetWidth(), image.GetHeight(), r_2, r_2_sq); @@ -163,7 +170,7 @@ std::vector IntegrateInternal(const DiffractionExperiment &experimen for (int i = 0; i < npredicted; i++) { auto r = predicted.at(i); IntegrateReflection(r, ptr, reflection_mask, image.GetWidth(), image.GetHeight(), special_value, saturation, - r_3, r_1_sq, r_2_sq, r_3_sq); + r_3, r_1_sq, r_2_sq, r_3_sq, minimum_sigma_in_regards_to_i); if (r.observed) { if (experiment.GetPolarizationFactor()) r.rlp /= geom.CalcAzIntPolarizationCorr(r.predicted_x, r.predicted_y, experiment.GetPolarizationFactor().value()); -- 2.52.0 From d5b928fa736177b343b10b5f8a1e7946855e1105 Mon Sep 17 00:00:00 2001 From: Filip Leonarski Date: Wed, 13 May 2026 12:27:43 +0200 Subject: [PATCH 046/132] Merge: Simplify code for merging, make it more efficient --- common/Reflection.h | 14 +- image_analysis/IndexAndRefine.cpp | 11 +- image_analysis/IndexAndRefine.h | 2 +- image_analysis/scale_merge/HKLKey.cpp | 25 ++ image_analysis/scale_merge/HKLKey.h | 6 + image_analysis/scale_merge/Merge.cpp | 412 +++++++++++--------------- image_analysis/scale_merge/Merge.h | 9 +- 7 files changed, 228 insertions(+), 251 deletions(-) diff --git a/common/Reflection.h b/common/Reflection.h index 219c2763..1f755dd0 100644 --- a/common/Reflection.h +++ b/common/Reflection.h @@ -6,6 +6,8 @@ #include #include +#include + #include "SpotToSave.h" struct Reflection { @@ -29,11 +31,13 @@ struct Reflection { }; struct MergedReflection { - int32_t h; - int32_t k; - int32_t l; - float I; - float sigma; + int32_t h = 0; + int32_t k = 0; + int32_t l = 0; + float I = NAN; + float sigma = NAN; + float I_anom[2] = {NAN, NAN}; + float sigma_anom[2] = {NAN, NAN}; float d = 0.0; }; diff --git a/image_analysis/IndexAndRefine.cpp b/image_analysis/IndexAndRefine.cpp index 253d338c..9b749e89 100644 --- a/image_analysis/IndexAndRefine.cpp +++ b/image_analysis/IndexAndRefine.cpp @@ -281,11 +281,14 @@ void IndexAndRefine::ScaleImage(size_t n, ScaleOnTheFly &scaling, ScalingResult } ScalingResult IndexAndRefine::ScaleAllImages(size_t nthreads) { - auto merge_result = MergeReflections(reflections, experiment); - ScaleOnTheFly scaling(merge_result.merged, experiment); + auto merge_result = MergeAll(experiment, reflections); + ScaleOnTheFly scaling(merge_result, experiment); return scaling.Scale(reflections, mosaicity, nthreads); } -MergeResult IndexAndRefine::Merge() { - return MergeReflections(reflections, experiment); +MergeResult IndexAndRefine::Merge() const { + MergeResult out; + out.merged = MergeAll(experiment, reflections); + out.statistics = MergeStats(experiment, out.merged, reflections); + return out; } diff --git a/image_analysis/IndexAndRefine.h b/image_analysis/IndexAndRefine.h index 6aad02fa..f80335e3 100644 --- a/image_analysis/IndexAndRefine.h +++ b/image_analysis/IndexAndRefine.h @@ -65,7 +65,7 @@ public: void ProcessImage(DataMessage &msg, const SpotFindingSettings &settings, const CompressedImage &image, BraggPrediction &prediction); ScalingResult ScaleAllImages(size_t nthreads = 0); - MergeResult Merge(); + MergeResult Merge() const; std::optional Finalize(); }; diff --git a/image_analysis/scale_merge/HKLKey.cpp b/image_analysis/scale_merge/HKLKey.cpp index 3f05db05..0f72d0ce 100644 --- a/image_analysis/scale_merge/HKLKey.cpp +++ b/image_analysis/scale_merge/HKLKey.cpp @@ -6,6 +6,26 @@ #include "HKLKey.h" #include "gemmi/symmetry.hpp" +uint64_t HKLKey::pack_no_anom() const { + constexpr int bits = 21; + constexpr int bias = 1 << (bits - 1); // 1,048,576 + constexpr int max_value = bias - 1; + constexpr int min_value = -bias; + constexpr std::uint64_t mask = (1ULL << bits) - 1ULL; + + if (h < min_value || h > max_value || + k < min_value || k > max_value || + l < min_value || l > max_value) { + throw std::out_of_range("HKL index outside packable range"); + } + + const std::uint64_t hh = static_cast(h + bias) & mask; + const std::uint64_t kk = static_cast(k + bias) & mask; + const std::uint64_t ll = static_cast(l + bias) & mask; + + return hh | (kk << bits) | (ll << (2 * bits)); +} + HKLKeyGenerator::HKLKeyGenerator(bool merge_friedel, int32_t space_group_number) : HKLKeyGenerator(merge_friedel, *gemmi::find_spacegroup_by_number(space_group_number)) { } @@ -57,6 +77,8 @@ bool AcceptReflection(const Reflection &r, std::optional d_min_limit) { return false; if (!std::isfinite(r.rlp) || r.rlp == 0.0f) return false; + if (!std::isfinite(r.sigma) || r.sigma <= 0.0) + return false; return true; } @@ -69,5 +91,8 @@ bool AcceptReflection(const Reflection &r, double d_min_limit) { return false; if (!std::isfinite(r.rlp) || r.rlp == 0.0f) return false; + if (!std::isfinite(r.sigma) || r.sigma <= 0.0) + return false; return true; } + diff --git a/image_analysis/scale_merge/HKLKey.h b/image_analysis/scale_merge/HKLKey.h index 8084d8e9..74af3aff 100644 --- a/image_analysis/scale_merge/HKLKey.h +++ b/image_analysis/scale_merge/HKLKey.h @@ -3,6 +3,10 @@ #pragma once +#include +#include +#include + #include #include "../../common/Reflection.h" #include "gemmi/symmetry.hpp" @@ -19,6 +23,8 @@ struct HKLKey { bool operator==(const HKLKey &o) const { return h == o.h && k == o.k && l == o.l && plus == o.plus; } + + [[nodiscard]] uint64_t pack_no_anom() const; }; class HKLKeyGenerator { diff --git a/image_analysis/scale_merge/Merge.cpp b/image_analysis/scale_merge/Merge.cpp index e5ef98b1..2cad3d64 100644 --- a/image_analysis/scale_merge/Merge.cpp +++ b/image_analysis/scale_merge/Merge.cpp @@ -6,267 +6,203 @@ #include #include #include -#include -#include -#include -#include +#include #include "../../common/ResolutionShells.h" #include "HKLKey.h" -namespace { - struct Obs { - const Reflection *r = nullptr; - int hkl = -1; - double sigma = 1.0; +std::vector MergeAll(const DiffractionExperiment &x, const std::vector > &reflections) { + auto scaling_settings = x.GetScalingSettings(); + HKLKeyGenerator key_generator(scaling_settings.GetMergeFriedel(), x.GetSpaceGroupNumber().value_or(1)); + const std::optional high_resolution_limit = scaling_settings.GetHighResolutionLimit_A(); + + struct Accum { + // Keep anomalous + / - together, but separate + int32_t h = 0; + int32_t k = 0; + int32_t l = 0; + float d = NAN; + double sum_wI = 0.0; + double sum_w = 0.0; + double sum_wI_anom[2] = {0.0, 0.0}; + double sum_w_anom[2] = {0.0, 0.0}; + bool present[2] = {false, false}; }; - double SafeSigma(double sigma) { - // TODO: Think about safe sigma... - if (!std::isfinite(sigma) || sigma <= 1e-3) - return 1e-3; - return sigma; - } + std::unordered_map acc; - std::vector BuildObservations(const std::vector> &observations, - const DiffractionExperiment &x, - std::vector &slot_to_hkl) { - std::map hkl_to_slot; - std::vector out; - - size_t nrefl = 0; - for (const auto &image: observations) - nrefl += image.size(); - out.reserve(nrefl); - - auto scaling_settings = x.GetScalingSettings(); - - HKLKeyGenerator key_generator(scaling_settings.GetMergeFriedel(), x.GetSpaceGroupNumber().value_or(1) ); - - for (const auto &image: observations) { - for (const auto &r: image) { - - if (r.scaling_correction <= 0.0 || !std::isfinite(r.scaling_correction)) - continue; - if (!AcceptReflection(r, scaling_settings.GetHighResolutionLimit_A())) - continue; - - HKLKey key = key_generator(r); - - auto it = hkl_to_slot.find(key); - if (it == hkl_to_slot.end()) { - const int slot = static_cast(slot_to_hkl.size()); - it = hkl_to_slot.emplace(key, slot).first; - slot_to_hkl.push_back(key); - } - - out.push_back({ - .r = &r, - .hkl = it->second, - .sigma = SafeSigma(r.sigma) - }); - } - } - - return out; - } - - MergeResult Merge(const std::vector &slot_to_hkl, const std::vector &obs) { - struct Accum { - double sum_wI = 0.0; - double sum_w = 0.0; - double sum_wsigma2 = 0.0; - std::vector d_values; - }; - - std::vector acc(slot_to_hkl.size()); - - for (const auto &o: obs) { - const double I_corr = static_cast(o.r->I) * o.r->scaling_correction; - const double sigma_corr = o.sigma * o.r->scaling_correction; + for (const auto &image: reflections) { + for (const auto &r: image) { + if (r.scaling_correction <= 0.0 || !std::isfinite(r.scaling_correction)) + continue; + if (!AcceptReflection(r, high_resolution_limit)) + continue; + const float I_corr = r.I * r.scaling_correction; + const float sigma_corr = r.sigma * r.scaling_correction; if (!std::isfinite(I_corr) || !std::isfinite(sigma_corr) || sigma_corr <= 0.0) continue; - // TODO: Figure out right way to handle this - const double w = 1.0 / (sigma_corr * sigma_corr); + auto hkl = key_generator(r); + auto hkl_key = hkl.pack_no_anom(); - auto &a = acc[o.hkl]; - a.sum_wI += w * I_corr; - a.sum_w += w; - a.sum_wsigma2 += w * w * sigma_corr * sigma_corr; - if (std::isfinite(o.r->d) && o.r->d > 0.0f) - a.d_values.push_back(o.r->d); + auto it = acc.find(hkl_key); + if (it == acc.end()) + it = acc.emplace(hkl_key, Accum{ + .h = hkl.h, + .k = hkl.k, + .l = hkl.l + }).first; + int solution = hkl.plus ? 0 : 1; + + const float w = 1.0f / (sigma_corr * sigma_corr); + it->second.sum_wI += w * I_corr; + it->second.sum_w += w; + it->second.sum_wI_anom[solution] += w * I_corr; + it->second.sum_w_anom[solution] += w; + + it->second.present[solution] = true; + if (!std::isfinite(it->second.d) && std::isfinite(r.d) && r.d > 0.0f) + it->second.d = r.d; } - - MergeResult out; - out.merged.resize(slot_to_hkl.size()); - - for (int h = 0; h < static_cast(slot_to_hkl.size()); ++h) { - auto &a = acc[h]; - if (a.sum_w <= 0.0) - continue; - out.merged[h].h = slot_to_hkl[h].h; - out.merged[h].k = slot_to_hkl[h].k; - out.merged[h].l = slot_to_hkl[h].l; - out.merged[h].I = a.sum_wI / a.sum_w; - out.merged[h].sigma = std::sqrt(a.sum_wsigma2) / a.sum_w; - if (a.d_values.empty()) - continue; - - std::ranges::nth_element(a.d_values, - a.d_values.begin() + static_cast(a.d_values.size() / 2)); - out.merged[h].d = a.d_values[a.d_values.size() / 2]; - } - return out; } - void Stats(const DiffractionExperiment &x, MergeResult &out, const std::vector &obs) { - constexpr int n_shells = 10; + std::vector out; + out.reserve(acc.size()); + for (const auto &[key, accum]: acc) { + if (accum.sum_w <= 0.0) + continue; - float d_min = std::numeric_limits::max(); - float d_max = 0.0f; + float I_anom[2] = {NAN, NAN}; + float sigma_anom[2] = {NAN, NAN}; - auto d_min_limit_A = x.GetScalingSettings().GetHighResolutionLimit_A(); - for (const auto &m: out.merged) { - const auto d = static_cast(m.d); - if (!std::isfinite(d) || d <= 0.0f) - continue; - if (d_min_limit_A && d < d_min_limit_A) - continue; - - d_min = std::min(d_min, d); - d_max = std::max(d_max, d); - } - - if (!(d_min < d_max && d_min > 0.0f)) - return; - - const float d_min_pad = d_min * 0.999f; - const float d_max_pad = d_max * 1.001f; - - ResolutionShells shells(d_min_pad, d_max_pad, n_shells); - const auto shell_mean_1_d2 = shells.GetShellMeanOneOverResSq(); - const auto shell_min_res = shells.GetShellMinRes(); - - std::vector hkl_shell(out.merged.size(), -1); - for (int h = 0; h < static_cast(out.merged.size()); ++h) { - auto s = shells.GetShell(out.merged[h].d); - if (s) - hkl_shell[h] = *s; - } - - struct PerHKL { - double sum_I = 0.0; - std::vector I; - }; - - std::vector per_hkl(out.merged.size()); - - for (const auto &o: obs) { - if (o.hkl < 0 || o.hkl >= static_cast(per_hkl.size())) - continue; - if (hkl_shell[o.hkl] < 0) - continue; - - const double I_corr = static_cast(o.r->I) * o.r->scaling_correction; - if (!std::isfinite(I_corr)) - continue; - - per_hkl[o.hkl].sum_I += I_corr; - per_hkl[o.hkl].I.push_back(I_corr); - } - - struct ShellAccum { - int total_obs = 0; - std::unordered_set unique; - double rmeas_num = 0.0; - double rmeas_den = 0.0; - double sum_i_over_sigma = 0.0; - int n_i_over_sigma = 0; - }; - - std::vector acc(n_shells); - - for (int h = 0; h < static_cast(per_hkl.size()); ++h) { - const int s = hkl_shell[h]; - if (s < 0 || per_hkl[h].I.empty()) - continue; - - auto &sa = acc[s]; - const auto &ph = per_hkl[h]; - const int n = static_cast(ph.I.size()); - const double mean_I = ph.sum_I / n; - - sa.unique.insert(h); - sa.total_obs += n; - - if (n >= 2) { - double sum_abs_dev = 0.0; - for (double I: ph.I) - sum_abs_dev += std::abs(I - mean_I); - - sa.rmeas_num += std::sqrt(static_cast(n) / (n - 1.0)) * sum_abs_dev; - } - - for (double I: ph.I) - sa.rmeas_den += std::abs(I); - - if (out.merged[h].sigma > 0.0) { - sa.sum_i_over_sigma += out.merged[h].I / out.merged[h].sigma; - ++sa.n_i_over_sigma; + for (int i = 0; i < 2; ++i) { + if (accum.present[i] && accum.sum_w_anom[i] > 0.0) { + I_anom[i] = static_cast(accum.sum_wI_anom[i] / accum.sum_w_anom[i]); + sigma_anom[i] = 1.0f / std::sqrt(static_cast(accum.sum_w_anom[i])); } } - - out.statistics.shells.resize(n_shells); - - for (int s = 0; s < n_shells; ++s) { - const auto &sa = acc[s]; - auto &ss = out.statistics.shells[s]; - - ss.mean_one_over_d2 = shell_mean_1_d2[s]; - ss.d_min = shell_min_res[s]; - ss.d_max = s == 0 ? d_max_pad : shell_min_res[s - 1]; - ss.total_observations = sa.total_obs; - ss.unique_reflections = static_cast(sa.unique.size()); - ss.rmeas = sa.rmeas_den > 0.0 ? sa.rmeas_num / sa.rmeas_den : 0.0; - ss.mean_i_over_sigma = sa.n_i_over_sigma > 0 - ? sa.sum_i_over_sigma / sa.n_i_over_sigma - : 0.0; - } - - auto &overall = out.statistics.overall; - overall.d_min = d_min; - overall.d_max = d_max; - - std::unordered_set all_unique; - double rmeas_num = 0.0; - double rmeas_den = 0.0; - double sum_i_over_sigma = 0.0; - int n_i_over_sigma = 0; - - for (const auto &sa: acc) { - overall.total_observations += sa.total_obs; - all_unique.insert(sa.unique.begin(), sa.unique.end()); - rmeas_num += sa.rmeas_num; - rmeas_den += sa.rmeas_den; - sum_i_over_sigma += sa.sum_i_over_sigma; - n_i_over_sigma += sa.n_i_over_sigma; - } - - overall.unique_reflections = static_cast(all_unique.size()); - overall.rmeas = rmeas_den > 0.0 ? rmeas_num / rmeas_den : 0.0; - overall.mean_i_over_sigma = n_i_over_sigma > 0 ? sum_i_over_sigma / n_i_over_sigma : 0.0; + out.emplace_back(MergedReflection{ + .h = accum.h, + .k = accum.k, + .l = accum.l, + .I = static_cast(accum.sum_wI / accum.sum_w), + .sigma = 1.0f / std::sqrt(static_cast(accum.sum_w)), + .I_anom = {I_anom[0], I_anom[1]}, + .sigma_anom = {sigma_anom[0], sigma_anom[1]}, + .d = accum.d + }); } + return out; } -MergeResult MergeReflections(const std::vector > &observations, - const DiffractionExperiment &x) { - std::vector slot_to_hkl; - auto obs = BuildObservations(observations, x, slot_to_hkl); - auto out = Merge(slot_to_hkl, obs); - Stats(x, out, obs); +MergeStatistics MergeStats(const DiffractionExperiment &x, + const std::vector &merged, + const std::vector > &reflections) { + + constexpr int n_shells = 10; + + float d_min = std::numeric_limits::max(); + float d_max = 0.0f; + + auto d_min_limit_A = x.GetScalingSettings().GetHighResolutionLimit_A(); + for (const auto &m: merged) { + if (!std::isfinite(m.d) || m.d <= 0.0f) + continue; + if (d_min_limit_A && m.d < d_min_limit_A) + continue; + + d_min = std::min(d_min, m.d); + d_max = std::max(d_max, m.d); + } + + if (!(d_min < d_max && d_min > 0.0f)) + throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "Error in resolution calculation"); + + const float d_min_pad = d_min * 0.999f; + const float d_max_pad = d_max * 1.001f; + + ResolutionShells shells(d_min_pad, d_max_pad, n_shells); + const auto shell_mean_1_d2 = shells.GetShellMeanOneOverResSq(); + const auto shell_min_res = shells.GetShellMinRes(); + + struct ShellAccum { + int total_obs = 0; + int unique = 0; + double sum_i_over_sigma = 0.0; + int n_i_over_sigma = 0; + }; + + std::vector acc(n_shells); + + for (const auto &m: merged) { + const auto shell = shells.GetShell(m.d); + if (!shell.has_value()) + continue; + + const int s = *shell; + if (s >= 0 && s < n_shells) { + if (std::isfinite(m.I) && std::isfinite(m.sigma) && m.sigma > 0.0) { + acc[s].unique++; + acc[s].sum_i_over_sigma += m.I / m.sigma; + ++acc[s].n_i_over_sigma; + } + + } + } + + for (const auto &image: reflections) { + for (const auto &r: image) { + if (r.scaling_correction <= 0.0 || !std::isfinite(r.scaling_correction)) + continue; + + if (!AcceptReflection(r, d_min_limit_A)) + continue; + + const auto shell = shells.GetShell(r.d); + if (!shell.has_value()) + continue; + const int s = *shell; + if (s >= 0 && s < n_shells) + acc[s].total_obs++; + } + } + + MergeStatistics out; + out.shells.resize(n_shells); + + for (int s = 0; s < n_shells; ++s) { + const auto &sa = acc[s]; + auto &ss = out.shells[s]; + + ss.mean_one_over_d2 = shell_mean_1_d2[s]; + ss.d_min = shell_min_res[s]; + ss.d_max = s == 0 ? d_max_pad : shell_min_res[s - 1]; + ss.total_observations = sa.total_obs; + ss.unique_reflections = sa.unique; + ss.mean_i_over_sigma = sa.n_i_over_sigma > 0 + ? sa.sum_i_over_sigma / sa.n_i_over_sigma + : 0.0; + } + + auto &overall = out.overall; + overall.d_min = d_min; + overall.d_max = d_max; + + int all_unique = 0; + double sum_i_over_sigma = 0.0; + int n_i_over_sigma = 0; + + for (const auto &sa: acc) { + overall.total_observations += sa.total_obs; + all_unique += sa.unique; + sum_i_over_sigma += sa.sum_i_over_sigma; + n_i_over_sigma += sa.n_i_over_sigma; + } + + overall.unique_reflections = all_unique; + overall.mean_i_over_sigma = n_i_over_sigma > 0 ? sum_i_over_sigma / n_i_over_sigma : 0.0; return out; } diff --git a/image_analysis/scale_merge/Merge.h b/image_analysis/scale_merge/Merge.h index 95908919..3ed79f83 100644 --- a/image_analysis/scale_merge/Merge.h +++ b/image_analysis/scale_merge/Merge.h @@ -18,7 +18,6 @@ struct MergeStatisticsShell { float mean_one_over_d2 = 0; int total_observations = 0; int unique_reflections = 0; - double rmeas = 0.0; double mean_i_over_sigma = 0.0; }; @@ -33,5 +32,9 @@ struct MergeResult { MergeStatistics statistics; }; -MergeResult MergeReflections(const std::vector > &observations, - const DiffractionExperiment &x); +std::vector MergeAll(const DiffractionExperiment &x, + const std::vector > &reflections); + +MergeStatistics MergeStats(const DiffractionExperiment &x, + const std::vector &merged, + const std::vector > &reflections); \ No newline at end of file -- 2.52.0 From b06dfc8357e9706d96f55eb0991e48aaeabdcae2 Mon Sep 17 00:00:00 2001 From: Filip Leonarski Date: Wed, 13 May 2026 13:13:17 +0200 Subject: [PATCH 047/132] Gemmi: Include through FetchContent full gemmi library (not limited cpp/hpp files) --- CMakeLists.txt | 11 +- common/CMakeLists.txt | 2 +- docs/SOFTWARE.md | 2 +- image_analysis/CMakeLists.txt | 2 +- symmetry/CMakeLists.txt | 2 - symmetry/LICENSE.txt | 373 ---------- symmetry/gemmi/cellred.hpp | 406 ----------- symmetry/gemmi/fail.hpp | 93 --- symmetry/gemmi/math.hpp | 458 ------------- symmetry/gemmi/symmetry.hpp | 1044 ---------------------------- symmetry/gemmi/unitcell.hpp | 618 ----------------- symmetry/symmetry.cpp | 1215 --------------------------------- 12 files changed, 12 insertions(+), 4214 deletions(-) delete mode 100644 symmetry/CMakeLists.txt delete mode 100644 symmetry/LICENSE.txt delete mode 100644 symmetry/gemmi/cellred.hpp delete mode 100644 symmetry/gemmi/fail.hpp delete mode 100644 symmetry/gemmi/math.hpp delete mode 100644 symmetry/gemmi/symmetry.hpp delete mode 100644 symmetry/gemmi/unitcell.hpp delete mode 100644 symmetry/symmetry.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index c49e0644..9beada6d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -95,6 +95,13 @@ SET(BUILD_SHARED_LIBS OFF CACHE BOOL "" FORCE) SET(HTTPLIB_USE_NON_BLOCKING_GETADDRINFO OFF CACHE BOOL "" FORCE) SET(HTTPLIB_REQUIRE_ZLIB ON CACHE BOOL "" FORCE) +FetchContent_Declare( + gemmi + GIT_REPOSITORY https://github.com/fleon-psi/gemmi + GIT_TAG d6dcc1f57eedf7ba34a7d2d2ed283075113040bf + EXCLUDE_FROM_ALL +) + FetchContent_Declare( spdlog GIT_REPOSITORY https://github.com/gabime/spdlog.git @@ -142,7 +149,7 @@ FetchContent_Declare( EXCLUDE_FROM_ALL ) -FetchContent_MakeAvailable(zstd sls_detector_package catch2 hdf5 spdlog httplib) +FetchContent_MakeAvailable(zstd sls_detector_package catch2 hdf5 spdlog httplib gemmi) ADD_SUBDIRECTORY(jungfrau) ADD_SUBDIRECTORY(compression) @@ -153,7 +160,7 @@ ADD_SUBDIRECTORY(reader) ADD_SUBDIRECTORY(detector_control) ADD_SUBDIRECTORY(image_puller) ADD_SUBDIRECTORY(preview) -ADD_SUBDIRECTORY(symmetry) +#ADD_SUBDIRECTORY(symmetry) ADD_SUBDIRECTORY(xds-plugin) IF (JFJOCH_WRITER_ONLY) diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index a15a652b..d9b1d8b2 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -130,7 +130,7 @@ ADD_LIBRARY(JFJochCommon STATIC ScalingSettings.h ) -TARGET_LINK_LIBRARIES(JFJochCommon JFJochLogger Compression JFCalibration gemmi Threads::Threads -lrt ) +TARGET_LINK_LIBRARIES(JFJochCommon JFJochLogger Compression JFCalibration gemmi_cpp Threads::Threads -lrt ) TARGET_LINK_LIBRARIES(JFJochZMQ "$") diff --git a/docs/SOFTWARE.md b/docs/SOFTWARE.md index d64671e8..bddc3952 100644 --- a/docs/SOFTWARE.md +++ b/docs/SOFTWARE.md @@ -33,6 +33,7 @@ Automatically downloaded by CMake and statically linked: * Catch2 testing library - see [github.com/catchorg/Catch2](https://github.com/catchorg/Catch2) * Ceres Solver library for least square optimization - see [http://ceres-solver.org/] * Spdlog logging library - see [github.com/gabime/spdlog](https://github.com/gabime/spdlog) +* GEMMI library by Global Phasing - see [github.com/project-gemmi/gemmi](https://github.com/project-gemmi/gemmi) Please follow the link provided above to check for LICENSE file. Building code with dependencies above requires access from the build system to github.com. Directly included in the repository: @@ -44,6 +45,5 @@ Directly included in the repository: * LZ4 compression by Y.Collet - see [github.com/lz4/lz4](https://github.com/lz4/lz4) * ZeroMQ library (through slsDetectorPackage) - see [github.com/zeromq/libzmq](https://github.com/zeromq/libzmq) * Base64 decoder/encoder - see [gist.github.com/tomykaira](https://gist.github.com/tomykaira/f0fd86b6c73063283afe550bc5d77594) -* GEMMI library by Global Phasing - see [github.com/project-gemmi/gemmi](https://github.com/project-gemmi/gemmi) For license check LICENSE file in respective directory diff --git a/image_analysis/CMakeLists.txt b/image_analysis/CMakeLists.txt index 394dbaed..6bb52518 100644 --- a/image_analysis/CMakeLists.txt +++ b/image_analysis/CMakeLists.txt @@ -44,4 +44,4 @@ ADD_SUBDIRECTORY(scale_merge) ADD_SUBDIRECTORY(image_preprocessing) ADD_SUBDIRECTORY(azint) -TARGET_LINK_LIBRARIES(JFJochImageAnalysis JFJochAzIntEngine JFJochImagePreprocessing JFJochBraggPrediction JFJochBraggIntegration JFJochLatticeSearch JFJochIndexing JFJochSpotFinding JFJochCommon JFJochGeomRefinement JFJochScaleMerge gemmi) +TARGET_LINK_LIBRARIES(JFJochImageAnalysis JFJochAzIntEngine JFJochImagePreprocessing JFJochBraggPrediction JFJochBraggIntegration JFJochLatticeSearch JFJochIndexing JFJochSpotFinding JFJochCommon JFJochGeomRefinement JFJochScaleMerge) diff --git a/symmetry/CMakeLists.txt b/symmetry/CMakeLists.txt deleted file mode 100644 index fed3f792..00000000 --- a/symmetry/CMakeLists.txt +++ /dev/null @@ -1,2 +0,0 @@ -ADD_LIBRARY(gemmi STATIC symmetry.cpp gemmi/symmetry.hpp gemmi/fail.hpp) -TARGET_INCLUDE_DIRECTORIES(gemmi PUBLIC .) \ No newline at end of file diff --git a/symmetry/LICENSE.txt b/symmetry/LICENSE.txt deleted file mode 100644 index 14e2f777..00000000 --- a/symmetry/LICENSE.txt +++ /dev/null @@ -1,373 +0,0 @@ -Mozilla Public License Version 2.0 -================================== - -1. Definitions --------------- - -1.1. "Contributor" - means each individual or legal entity that creates, contributes to - the creation of, or owns Covered Software. - -1.2. "Contributor Version" - means the combination of the Contributions of others (if any) used - by a Contributor and that particular Contributor's Contribution. - -1.3. "Contribution" - means Covered Software of a particular Contributor. - -1.4. "Covered Software" - means Source Code Form to which the initial Contributor has attached - the notice in Exhibit A, the Executable Form of such Source Code - Form, and Modifications of such Source Code Form, in each case - including portions thereof. - -1.5. "Incompatible With Secondary Licenses" - means - - (a) that the initial Contributor has attached the notice described - in Exhibit B to the Covered Software; or - - (b) that the Covered Software was made available under the terms of - version 1.1 or earlier of the License, but not also under the - terms of a Secondary License. - -1.6. "Executable Form" - means any form of the work other than Source Code Form. - -1.7. "Larger Work" - means a work that combines Covered Software with other material, in - a separate file or files, that is not Covered Software. - -1.8. "License" - means this document. - -1.9. "Licensable" - means having the right to grant, to the maximum extent possible, - whether at the time of the initial grant or subsequently, any and - all of the rights conveyed by this License. - -1.10. "Modifications" - means any of the following: - - (a) any file in Source Code Form that results from an addition to, - deletion from, or modification of the contents of Covered - Software; or - - (b) any new file in Source Code Form that contains any Covered - Software. - -1.11. "Patent Claims" of a Contributor - means any patent claim(s), including without limitation, method, - process, and apparatus claims, in any patent Licensable by such - Contributor that would be infringed, but for the grant of the - License, by the making, using, selling, offering for sale, having - made, import, or transfer of either its Contributions or its - Contributor Version. - -1.12. "Secondary License" - means either the GNU General Public License, Version 2.0, the GNU - Lesser General Public License, Version 2.1, the GNU Affero General - Public License, Version 3.0, or any later versions of those - licenses. - -1.13. "Source Code Form" - means the form of the work preferred for making modifications. - -1.14. "You" (or "Your") - means an individual or a legal entity exercising rights under this - License. For legal entities, "You" includes any entity that - controls, is controlled by, or is under common control with You. For - purposes of this definition, "control" means (a) the power, direct - or indirect, to cause the direction or management of such entity, - whether by contract or otherwise, or (b) ownership of more than - fifty percent (50%) of the outstanding shares or beneficial - ownership of such entity. - -2. License Grants and Conditions --------------------------------- - -2.1. Grants - -Each Contributor hereby grants You a world-wide, royalty-free, -non-exclusive license: - -(a) under intellectual property rights (other than patent or trademark) - Licensable by such Contributor to use, reproduce, make available, - modify, display, perform, distribute, and otherwise exploit its - Contributions, either on an unmodified basis, with Modifications, or - as part of a Larger Work; and - -(b) under Patent Claims of such Contributor to make, use, sell, offer - for sale, have made, import, and otherwise transfer either its - Contributions or its Contributor Version. - -2.2. Effective Date - -The licenses granted in Section 2.1 with respect to any Contribution -become effective for each Contribution on the date the Contributor first -distributes such Contribution. - -2.3. Limitations on Grant Scope - -The licenses granted in this Section 2 are the only rights granted under -this License. No additional rights or licenses will be implied from the -distribution or licensing of Covered Software under this License. -Notwithstanding Section 2.1(b) above, no patent license is granted by a -Contributor: - -(a) for any code that a Contributor has removed from Covered Software; - or - -(b) for infringements caused by: (i) Your and any other third party's - modifications of Covered Software, or (ii) the combination of its - Contributions with other software (except as part of its Contributor - Version); or - -(c) under Patent Claims infringed by Covered Software in the absence of - its Contributions. - -This License does not grant any rights in the trademarks, service marks, -or logos of any Contributor (except as may be necessary to comply with -the notice requirements in Section 3.4). - -2.4. Subsequent Licenses - -No Contributor makes additional grants as a result of Your choice to -distribute the Covered Software under a subsequent version of this -License (see Section 10.2) or under the terms of a Secondary License (if -permitted under the terms of Section 3.3). - -2.5. Representation - -Each Contributor represents that the Contributor believes its -Contributions are its original creation(s) or it has sufficient rights -to grant the rights to its Contributions conveyed by this License. - -2.6. Fair Use - -This License is not intended to limit any rights You have under -applicable copyright doctrines of fair use, fair dealing, or other -equivalents. - -2.7. Conditions - -Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted -in Section 2.1. - -3. Responsibilities -------------------- - -3.1. Distribution of Source Form - -All distribution of Covered Software in Source Code Form, including any -Modifications that You create or to which You contribute, must be under -the terms of this License. You must inform recipients that the Source -Code Form of the Covered Software is governed by the terms of this -License, and how they can obtain a copy of this License. You may not -attempt to alter or restrict the recipients' rights in the Source Code -Form. - -3.2. Distribution of Executable Form - -If You distribute Covered Software in Executable Form then: - -(a) such Covered Software must also be made available in Source Code - Form, as described in Section 3.1, and You must inform recipients of - the Executable Form how they can obtain a copy of such Source Code - Form by reasonable means in a timely manner, at a charge no more - than the cost of distribution to the recipient; and - -(b) You may distribute such Executable Form under the terms of this - License, or sublicense it under different terms, provided that the - license for the Executable Form does not attempt to limit or alter - the recipients' rights in the Source Code Form under this License. - -3.3. Distribution of a Larger Work - -You may create and distribute a Larger Work under terms of Your choice, -provided that You also comply with the requirements of this License for -the Covered Software. If the Larger Work is a combination of Covered -Software with a work governed by one or more Secondary Licenses, and the -Covered Software is not Incompatible With Secondary Licenses, this -License permits You to additionally distribute such Covered Software -under the terms of such Secondary License(s), so that the recipient of -the Larger Work may, at their option, further distribute the Covered -Software under the terms of either this License or such Secondary -License(s). - -3.4. Notices - -You may not remove or alter the substance of any license notices -(including copyright notices, patent notices, disclaimers of warranty, -or limitations of liability) contained within the Source Code Form of -the Covered Software, except that You may alter any license notices to -the extent required to remedy known factual inaccuracies. - -3.5. Application of Additional Terms - -You may choose to offer, and to charge a fee for, warranty, support, -indemnity or liability obligations to one or more recipients of Covered -Software. However, You may do so only on Your own behalf, and not on -behalf of any Contributor. You must make it absolutely clear that any -such warranty, support, indemnity, or liability obligation is offered by -You alone, and You hereby agree to indemnify every Contributor for any -liability incurred by such Contributor as a result of warranty, support, -indemnity or liability terms You offer. You may include additional -disclaimers of warranty and limitations of liability specific to any -jurisdiction. - -4. Inability to Comply Due to Statute or Regulation ---------------------------------------------------- - -If it is impossible for You to comply with any of the terms of this -License with respect to some or all of the Covered Software due to -statute, judicial order, or regulation then You must: (a) comply with -the terms of this License to the maximum extent possible; and (b) -describe the limitations and the code they affect. Such description must -be placed in a text file included with all distributions of the Covered -Software under this License. Except to the extent prohibited by statute -or regulation, such description must be sufficiently detailed for a -recipient of ordinary skill to be able to understand it. - -5. Termination --------------- - -5.1. The rights granted under this License will terminate automatically -if You fail to comply with any of its terms. However, if You become -compliant, then the rights granted under this License from a particular -Contributor are reinstated (a) provisionally, unless and until such -Contributor explicitly and finally terminates Your grants, and (b) on an -ongoing basis, if such Contributor fails to notify You of the -non-compliance by some reasonable means prior to 60 days after You have -come back into compliance. Moreover, Your grants from a particular -Contributor are reinstated on an ongoing basis if such Contributor -notifies You of the non-compliance by some reasonable means, this is the -first time You have received notice of non-compliance with this License -from such Contributor, and You become compliant prior to 30 days after -Your receipt of the notice. - -5.2. If You initiate litigation against any entity by asserting a patent -infringement claim (excluding declaratory judgment actions, -counter-claims, and cross-claims) alleging that a Contributor Version -directly or indirectly infringes any patent, then the rights granted to -You by any and all Contributors for the Covered Software under Section -2.1 of this License shall terminate. - -5.3. In the event of termination under Sections 5.1 or 5.2 above, all -end user license agreements (excluding distributors and resellers) which -have been validly granted by You or Your distributors under this License -prior to termination shall survive termination. - -************************************************************************ -* * -* 6. Disclaimer of Warranty * -* ------------------------- * -* * -* Covered Software is provided under this License on an "as is" * -* basis, without warranty of any kind, either expressed, implied, or * -* statutory, including, without limitation, warranties that the * -* Covered Software is free of defects, merchantable, fit for a * -* particular purpose or non-infringing. The entire risk as to the * -* quality and performance of the Covered Software is with You. * -* Should any Covered Software prove defective in any respect, You * -* (not any Contributor) assume the cost of any necessary servicing, * -* repair, or correction. This disclaimer of warranty constitutes an * -* essential part of this License. No use of any Covered Software is * -* authorized under this License except under this disclaimer. * -* * -************************************************************************ - -************************************************************************ -* * -* 7. Limitation of Liability * -* -------------------------- * -* * -* Under no circumstances and under no legal theory, whether tort * -* (including negligence), contract, or otherwise, shall any * -* Contributor, or anyone who distributes Covered Software as * -* permitted above, be liable to You for any direct, indirect, * -* special, incidental, or consequential damages of any character * -* including, without limitation, damages for lost profits, loss of * -* goodwill, work stoppage, computer failure or malfunction, or any * -* and all other commercial damages or losses, even if such party * -* shall have been informed of the possibility of such damages. This * -* limitation of liability shall not apply to liability for death or * -* personal injury resulting from such party's negligence to the * -* extent applicable law prohibits such limitation. Some * -* jurisdictions do not allow the exclusion or limitation of * -* incidental or consequential damages, so this exclusion and * -* limitation may not apply to You. * -* * -************************************************************************ - -8. Litigation -------------- - -Any litigation relating to this License may be brought only in the -courts of a jurisdiction where the defendant maintains its principal -place of business and such litigation shall be governed by laws of that -jurisdiction, without reference to its conflict-of-law provisions. -Nothing in this Section shall prevent a party's ability to bring -cross-claims or counter-claims. - -9. Miscellaneous ----------------- - -This License represents the complete agreement concerning the subject -matter hereof. If any provision of this License is held to be -unenforceable, such provision shall be reformed only to the extent -necessary to make it enforceable. Any law or regulation which provides -that the language of a contract shall be construed against the drafter -shall not be used to construe this License against a Contributor. - -10. Versions of the License ---------------------------- - -10.1. New Versions - -Mozilla Foundation is the license steward. Except as provided in Section -10.3, no one other than the license steward has the right to modify or -publish new versions of this License. Each version will be given a -distinguishing version number. - -10.2. Effect of New Versions - -You may distribute the Covered Software under the terms of the version -of the License under which You originally received the Covered Software, -or under the terms of any subsequent version published by the license -steward. - -10.3. Modified Versions - -If you create software not governed by this License, and you want to -create a new license for such software, you may create and use a -modified version of this License if you rename the license and remove -any references to the name of the license steward (except to note that -such modified license differs from this License). - -10.4. Distributing Source Code Form that is Incompatible With Secondary -Licenses - -If You choose to distribute Source Code Form that is Incompatible With -Secondary Licenses under the terms of this version of the License, the -notice described in Exhibit B of this License must be attached. - -Exhibit A - Source Code Form License Notice -------------------------------------------- - - This Source Code Form is subject to the terms of the Mozilla Public - License, v. 2.0. If a copy of the MPL was not distributed with this - file, You can obtain one at http://mozilla.org/MPL/2.0/. - -If it is not possible or desirable to put the notice in a particular -file, then You may include the notice in a location (such as a LICENSE -file in a relevant directory) where a recipient would be likely to look -for such a notice. - -You may add additional accurate notices of copyright ownership. - -Exhibit B - "Incompatible With Secondary Licenses" Notice ---------------------------------------------------------- - - This Source Code Form is "Incompatible With Secondary Licenses", as - defined by the Mozilla Public License, v. 2.0. diff --git a/symmetry/gemmi/cellred.hpp b/symmetry/gemmi/cellred.hpp deleted file mode 100644 index 7174f776..00000000 --- a/symmetry/gemmi/cellred.hpp +++ /dev/null @@ -1,406 +0,0 @@ -// Copyright 2021 Global Phasing Ltd. -// -// Unit cell reductions: Buerger, Niggli, Selling-Delaunay. - -#ifndef GEMMI_CELLRED_HPP_ -#define GEMMI_CELLRED_HPP_ - -#include -#include -#include // for unique_ptr -#include "math.hpp" // for deg -#include "symmetry.hpp" // for Op -#include "unitcell.hpp" // for UnitCell - -namespace gemmi { - -struct SellingVector; - -// GruberVector contains G6 vector (G for Gruber) and cell reduction algorithms. -// Originally, in B. Gruber, Acta Cryst. A29, 433 (1973), the vector was called -// "characteristic" of a lattice/cell. -// Functions that take epsilon as a parameter use it for comparisons, -// as proposed in Grosse-Kunstleve et al, Acta Cryst. (2004) A60, 1. -struct GruberVector { - // a.a b.b c.c 2b.c 2a.c 2a.b - double A, B, C, xi, eta, zeta; // the 1973 paper uses names A B C ξ η ζ - std::unique_ptr change_of_basis; // we use only Op::Rot - - // m - orthogonalization matrix of a primitive cell - explicit GruberVector(const Mat33& m) - : A(m.column_dot(0,0)), - B(m.column_dot(1,1)), - C(m.column_dot(2,2)), - xi(2 * m.column_dot(1,2)), - eta(2 * m.column_dot(0,2)), - zeta(2 * m.column_dot(0,1)) {} - - explicit GruberVector(const std::array& g6) - : A(g6[0]), B(g6[1]), C(g6[2]), xi(g6[3]), eta(g6[4]), zeta(g6[5]) {} - - GruberVector(const UnitCell& u, char centring, bool track_change_of_basis=false) - : GruberVector(u.primitive_orth_matrix(centring)) { - if (track_change_of_basis) - set_change_of_basis(Op{centred_to_primitive(centring), {0,0,0}, 'x'}); - } - - GruberVector(const UnitCell& u, const SpaceGroup* sg, bool track_change_of_basis=false) - : GruberVector(u, sg ? sg->centring_type() : 'P', track_change_of_basis) {} - - void set_change_of_basis(const Op& op) { change_of_basis.reset(new Op(op)); } - - std::array parameters() const { return {A, B, C, xi, eta, zeta}; } - std::array cell_parameters() const { - // inverse of UnitCell::g6() - double a = std::sqrt(A); - double b = std::sqrt(B); - double c = std::sqrt(C); - return {a, b, c, - deg(std::acos(xi/(2*b*c))), - deg(std::acos(eta/(2*a*c))), - deg(std::acos(zeta/(2*a*b)))}; - } - UnitCell get_cell() const { return UnitCell(cell_parameters()); } - - SellingVector selling() const; - - bool is_normalized() const { - // eq(3) from Gruber 1973 - return A <= B && B <= C && - (A != B || std::abs(xi) <= std::abs(eta)) && - (B != C || std::abs(eta) <= std::abs(zeta)) && - (xi > 0) == (eta > 0) && (xi > 0) == (zeta > 0); - } - - bool is_buerger(double epsilon=1e-9) const { - return is_normalized() && - // eq (4) from Gruber 1973 - std::abs(xi) <= B + epsilon && - std::abs(eta) <= A + epsilon && - std::abs(zeta) <= A + epsilon; - } - - // Algorithm N from Gruber (1973). - // Returns branch taken in N3. - void normalize(double eps=1e-9) { - auto step_N1 = [&]() { - if (A - B > eps || (A - B >= -eps && std::abs(xi) > std::abs(eta) + eps)) { // N1 - std::swap(A, B); - std::swap(xi, eta); - if (change_of_basis) - swap_columns_and_negate(0, 1); - } - }; - step_N1(); - if (B - C > eps || (B - C >= -eps && std::abs(eta) > std::abs(zeta) + eps)) { // N2 - std::swap(B, C); - std::swap(eta, zeta); - if (change_of_basis) - swap_columns_and_negate(1, 2); - // To make it faster, instead of "go to the point N1" we repeat N1 once - // (which is equivalent - three swaps are sufficient to reorder ABC). - step_N1(); - } - // N3 - // xi * eta * zeta > 0 <=> positive count is 1 or 3 and no zeros - int pos_count = (xi > eps) + (eta > eps) + (zeta > eps); - int nonneg_count = (xi >= -eps) + (eta >= -eps) + (zeta >= -eps); - double sgn = (pos_count == nonneg_count && pos_count % 2 == 1) ? 1 : -1; - if (change_of_basis) { - if (sgn * xi < -eps) negate_column(0); - if (sgn * eta < -eps) negate_column(1); - if (sgn * zeta < -eps) negate_column(2); - if (pos_count != nonneg_count && pos_count % 2 == 1) - negate_column(std::fabs(zeta) <= eps ? 2 : - std::fabs(eta) <= eps ? 1 : 0); - } - xi = std::copysign(xi, sgn); - eta = std::copysign(eta, sgn); - zeta = std::copysign(zeta, sgn); - } - - // Algorithm B from Gruber (1973). - // Returns true if no change was needed. - bool buerger_step() { - if (std::abs(xi) > B) { // B2 - double j = std::floor(0.5*xi/B + 0.5); - C += j * (j*B - xi); - xi -= 2 * j * B; - eta -= j * zeta; - } else if (std::abs(eta) > A) { // B3 - double j = std::floor(0.5*eta/A + 0.5); - C += j * (j*A - eta); - xi -= j * zeta; - eta -= 2 * j * A; - } else if (std::abs(zeta) > A) { // B4 - double j = std::floor(0.5*zeta/A + 0.5); - B += j * (j*A - zeta); - xi -= j * eta; - zeta -= 2 * j * A; - } else if (xi + eta + zeta + A + B < 0) { // B5 - double j = std::floor(0.5 * (xi + eta) / (A + B + zeta) + 0.5); - C += j * (j * (A + B + zeta) - (xi + eta)); - xi -= j * (2*B + zeta); - eta -= j * (2*A + zeta); - } else { - return true; - } - return false; - } - - // Returns number of iterations. - int buerger_reduce() { - int n = 0; - double prev_sum = -1; - int stall_count = 0; - for (;;) { - normalize(); - // In rare cases numerical errors push the algorithm into infinite loop, - // as described in Grosse-Kunstleve et al, Acta Cryst. (2004) A60, 1. - // Ad-hoc solution: stop if a+b+c is stalled for 5 iterations. - if (++n > 8) { // don't waste time during the first few iterations - double sum = std::sqrt(A) + std::sqrt(B) + std::sqrt(C); - if (std::abs(sum - prev_sum) < sum * 1e-6) { - if (++stall_count == 5) - break; - } else { - stall_count = 0; - } - prev_sum = sum; - } - if (buerger_step()) - break; - } - return n; - } - - // To be called after normalize() or is_normalized(). - // Returns true if it already was Niggli cell. - // Algorithm from Krivy & Gruber, Acta Cryst. (1976) A32, 297. - bool niggli_step(double epsilon=1e-9) { - if (std::abs(xi) > B + epsilon || // step 5. from Krivy-Gruber (1976) - (xi >= B - epsilon && 2 * eta < zeta - epsilon) || - (xi <= -(B - epsilon) && zeta < -epsilon)) { - double sign_xi = xi >= 0 ? 1 : -1; - C += B - xi * sign_xi; - eta -= zeta * sign_xi; - xi -= 2 * B * sign_xi; - if (change_of_basis) - add_column(1, 2, -int(sign_xi)); - } else if (std::abs(eta) > A + epsilon || // step 6. - (eta >= A - epsilon && 2 * xi < zeta - epsilon) || - (eta <= -(A - epsilon) && zeta < -epsilon)) { - double sign_eta = eta >= 0 ? 1 : -1; - C += A - eta * sign_eta; - xi -= zeta * sign_eta; - eta -= 2 * A * sign_eta; - if (change_of_basis) - add_column(0, 2, -int(sign_eta)); - } else if (std::abs(zeta) > A + epsilon || // step 7. - (zeta >= A - epsilon && 2 * xi < eta - epsilon) || - (zeta <= -(A - epsilon) && eta < -epsilon)) { - double sign_zeta = zeta >= 0 ? 1 : -1; - B += A - zeta * sign_zeta; - xi -= eta * sign_zeta; - zeta -= 2 * A * sign_zeta; - if (change_of_basis) - add_column(0, 1, -int(sign_zeta)); - } else if (xi + eta + zeta + A + B < -epsilon || // step 8. - (xi + eta + zeta + A + B <= epsilon && 2 * (A + eta) + zeta > epsilon)) { - C += A + B + xi + eta + zeta; - xi += 2 * B + zeta; - eta += 2 * A + zeta; - if (change_of_basis) { - add_column(0, 2, 1); - add_column(1, 2, 1); - } - } else { - return true; - } - return false; - } - - // Returns number of iterations. - int niggli_reduce(double epsilon=1e-9, int iteration_limit=100) { - int n = 0; - for (;;) { - normalize(epsilon); - if (++n == iteration_limit || niggli_step(epsilon)) - break; - } - return n; - } - - bool is_niggli(double epsilon=1e-9) const { - return is_normalized() && GruberVector(parameters()).niggli_step(epsilon); - } - -private: - void swap_columns_and_negate(int i, int j) { - for (auto& r : change_of_basis->rot) - std::swap(r[i], r[j]); - for (auto& r : change_of_basis->rot) - for (auto& v : r) - v = -v; - } - void negate_column(int i) { - for (auto& r : change_of_basis->rot) - r[i] = -r[i]; - } - void add_column(int pos, int dest, int sign) { - for (auto& r : change_of_basis->rot) - r[dest] += sign * r[pos]; - } -}; - - -// Selling-Delaunay reduction. Based on: -// - chapter "Delaunay reduction and standardization" in -// International Tables for Crystallography vol. A (2016), sec. 3.1.2.3. -// https://onlinelibrary.wiley.com/iucr/itc/Ac/ch3o1v0001/ -// - Patterson & Love (1957), Acta Cryst. 10, 111, -// "Remarks on the Delaunay reduction", doi:10.1107/s0365110x57000328 -// - Andrews et al (2019), Acta Cryst. A75, 115, -// "Selling reduction versus Niggli reduction for crystallographic lattices". -struct SellingVector { - // b.c a.c a.b a.d b.d c.d - std::array s; - - explicit SellingVector(const std::array& s_) : s(s_) {} - - explicit SellingVector(const Mat33& orth) { - Vec3 b[4]; - for (int i = 0; i < 3; ++i) - b[i] = orth.column_copy(i); - b[3]= -b[0] - b[1] - b[2]; - s[0] = b[1].dot(b[2]); - s[1] = b[0].dot(b[2]); - s[2] = b[0].dot(b[1]); - s[3] = b[0].dot(b[3]); - s[4] = b[1].dot(b[3]); - s[5] = b[2].dot(b[3]); - } - - SellingVector(const UnitCell& u, char centring) - : SellingVector(u.primitive_orth_matrix(centring)) {} - SellingVector(const UnitCell& u, const SpaceGroup* sg) - : SellingVector(u, sg ? sg->centring_type() : 'P') {} - - // The reduction minimizes the sum b_i^2 which is equal to -2 sum s_i. - double sum_b_squared() const { - return -2 * (s[0] + s[1] + s[2] + s[3] + s[4] + s[5]); - } - - bool is_reduced(double eps=1e-9) const { - return std::all_of(s.begin(), s.end(), [eps](double x) { return x <= eps; }); - } - - bool reduce_step(double eps=1e-9) { - //printf(" s = %g %g %g %g %g %g sum=%g\n", - // s[0], s[1], s[2], s[3], s[4], s[5], sum_b_squared()); - const int table[6][5] = { - // When negating s[n] we need to apply operations from table[n]: - // 2 x add, subtract, 2 x swap&add - {2, 4, 3, 1, 5}, // 0 - {2, 3, 4, 0, 5}, // 1 - {1, 3, 5, 0, 4}, // 2 - {1, 2, 0, 4, 5}, // 3 - {0, 2, 1, 3, 5}, // 4 - {0, 1, 2, 3, 4}, // 5 - }; - - double max_s = eps; - int max_s_pos = -1; - for (int i = 0; i < 6; ++i) - if (s[i] > max_s) { - max_s = s[i]; - max_s_pos = i; - } - if (max_s_pos < 0) - return false; - const int (&indices)[5] = table[max_s_pos]; - s[max_s_pos] = -max_s; - s[indices[0]] += max_s; - s[indices[1]] += max_s; - s[indices[2]] -= max_s; - std::swap(s[indices[3]], s[indices[4]]); - s[indices[3]] += max_s; - s[indices[4]] += max_s; - //printf(" s[%d]=%g sum: %g\n", max_s_pos, max_s, sum_b_squared()); - return true; - } - - // Returns number of iterations. - int reduce(double eps=1e-9, int iteration_limit=100) { - int n = 0; - while (++n != iteration_limit) - if (!reduce_step(eps)) - break; - return n; - } - - std::array g6_parameters() const { - return {-s[1]-s[2]-s[3], -s[0]-s[2]-s[4], -s[0]-s[1]-s[5], 2*s[0], 2*s[1], 2*s[2]}; - } - - GruberVector gruber() const { return GruberVector(g6_parameters()); } - - // Swap values to make a <= b <= c <= d - void sort(double eps=1e-9) { - double abcd_sq_neg[4] = { - // -a^2, -b^2, -c^2, -d^2 (negated - to be sorted in descending order) - s[1]+s[2]+s[3], s[0]+s[2]+s[4], s[0]+s[1]+s[5], s[3]+s[4]+s[5] - }; - // First, make sure that d >= a,b,c (therefore -d^2 <= -a^2,...). - int min_idx = 3; - for (int i = 0; i < 3; ++i) - if (abcd_sq_neg[i] < abcd_sq_neg[min_idx] - eps) - min_idx = i; - switch (min_idx) { - case 0: // a <-> d - std::swap(s[1], s[5]); - std::swap(s[2], s[4]); - break; - case 1: // b <-> d - std::swap(s[0], s[5]); - std::swap(s[2], s[3]); - break; - case 2: // c <-> d - std::swap(s[0], s[4]); - std::swap(s[1], s[3]); - break; - } - // we could stop here and not care about the order of a,b,c. - std::swap(abcd_sq_neg[min_idx], abcd_sq_neg[3]); - if (abcd_sq_neg[0] < abcd_sq_neg[1] - eps) { // a <-> b - std::swap(s[0], s[1]); - std::swap(s[3], s[4]); - std::swap(abcd_sq_neg[0], abcd_sq_neg[1]); - } - if (abcd_sq_neg[1] < abcd_sq_neg[2] - eps) { // b <-> c - std::swap(s[1], s[2]); - std::swap(s[4], s[5]); - std::swap(abcd_sq_neg[1], abcd_sq_neg[2]); - } - if (abcd_sq_neg[0] < abcd_sq_neg[1] - eps) { // a <-> b - std::swap(s[0], s[1]); - std::swap(s[3], s[4]); - //std::swap(abcd_sq_neg[0], abcd_sq_neg[1]); - } - } - - std::array cell_parameters() const { - return gruber().cell_parameters(); - } - UnitCell get_cell() const { return UnitCell(cell_parameters()); } -}; - -inline SellingVector GruberVector::selling() const { - double s0 = 0.5 * xi; - double s1 = 0.5 * eta; - double s2 = 0.5 * zeta; - return SellingVector({s0, s1, s2, -A - s1 - s2, -B - s0 - s2, -C - s0 - s1}); -} - -} // namespace gemmi -#endif diff --git a/symmetry/gemmi/fail.hpp b/symmetry/gemmi/fail.hpp deleted file mode 100644 index 10596385..00000000 --- a/symmetry/gemmi/fail.hpp +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright 2017 Global Phasing Ltd. -// -// fail(), unreachable() and __declspec/__attribute__ macros - -#ifndef GEMMI_FAIL_HPP_ -#define GEMMI_FAIL_HPP_ - -#include // for errno -#include // for runtime_error -#include // for system_error -#include -#include // for forward - -#ifdef __INTEL_COMPILER -// warning #2196: routine is both "inline" and "noinline" -# pragma warning disable 2196 -#endif -#if defined(__GNUG__) && !defined(__clang__) -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wattributes" -#endif - -#if defined(__GNUC__) || defined(__clang__) -# define GEMMI_COLD __attribute__((cold)) -#elif defined(_MSC_VER) -# define GEMMI_COLD __declspec(noinline) -#else -# define GEMMI_COLD __attribute__((noinline)) -#endif - -#if __cplusplus >= 202002L || _MSVC_LANG >= 202002L -# define GEMMI_LIKELY(x) (x) [[likely]] -# define GEMMI_UNLIKELY(x) (x) [[unlikely]] -#elif defined(__GNUC__) || defined(__clang__) -# define GEMMI_LIKELY(x) (__builtin_expect(!!(x), 1)) -# define GEMMI_UNLIKELY(x) (__builtin_expect(!!(x), 0)) -#else -# define GEMMI_LIKELY(x) (x) -# define GEMMI_UNLIKELY(x) (x) -#endif - -#if defined(_WIN32) -# if defined(GEMMI_SHARED) -# if defined(GEMMI_BUILD) -# define GEMMI_DLL __declspec(dllexport) -# else -# define GEMMI_DLL __declspec(dllimport) -# endif // GEMMI_BUILD -# else -# define GEMMI_DLL -# endif // GEMMI_SHARED -#else -# define GEMMI_DLL __attribute__((visibility("default"))) -#endif - -namespace gemmi { - -[[noreturn]] -inline void fail(const std::string& msg) { throw std::runtime_error(msg); } - -template [[noreturn]] -void fail(std::string&& str, T&& arg1, Args&&... args) { - str += arg1; - fail(std::move(str), std::forward(args)...); -} - -[[noreturn]] -inline GEMMI_COLD void fail(const char* msg) { throw std::runtime_error(msg); } - -[[noreturn]] -inline GEMMI_COLD void sys_fail(const std::string& msg) { - throw std::system_error(errno, std::system_category(), msg); -} -[[noreturn]] -inline GEMMI_COLD void sys_fail(const char* msg) { - throw std::system_error(errno, std::system_category(), msg); -} - -// unreachable() is used to silence GCC -Wreturn-type and hint the compiler -[[noreturn]] inline void unreachable() { -#if defined(__GNUC__) || defined(__clang__) - __builtin_unreachable(); -#elif defined(_MSC_VER) - __assume(0); -#endif -} - -#if defined(__GNUG__) && !defined(__clang__) -# pragma GCC diagnostic pop -#endif - -} // namespace gemmi -#endif diff --git a/symmetry/gemmi/math.hpp b/symmetry/gemmi/math.hpp deleted file mode 100644 index 87c10516..00000000 --- a/symmetry/gemmi/math.hpp +++ /dev/null @@ -1,458 +0,0 @@ -// Copyright 2018 Global Phasing Ltd. -// -// Math utilities. 3D linear algebra. - -#ifndef GEMMI_MATH_HPP_ -#define GEMMI_MATH_HPP_ - -#include // for fabs, cos, sqrt, round -#include // for min -#include -#include // for out_of_range -#include // for enable_if, is_integral - -namespace gemmi { - -constexpr double pi() { return 3.1415926535897932384626433832795029; } - -// The value used in converting between energy[eV] and wavelength[Angstrom]. -// $ units -d15 'h * c / eV / angstrom' -constexpr double hc() { return 12398.4197386209; } - -// The Bohr radius (a0) in Angstroms. -constexpr double bohrradius() { return 0.529177210903; } - -// for Mott-Bethe factor -constexpr double mott_bethe_const() { return 1. / (2 * pi() * pi() * bohrradius()); } - -// Used in conversion of ADPs (atomic displacement parameters). -constexpr double u_to_b() { return 8 * pi() * pi(); } - -constexpr double deg(double angle) { return 180.0 / pi() * angle; } -constexpr double rad(double angle) { return pi() / 180.0 * angle; } - -constexpr float sq(float x) { return x * x; } -constexpr double sq(double x) { return x * x; } - -inline double log_cosh(double x) { - // cosh(x) would overflow for x > 710.5, so we calculate: - // ln(cosh(x)) = ln(e^x + e^-x) - ln(2) = ln(e^x * (1 + e^-2x)) - ln(2) - x = std::abs(x); - return x - std::log(2) + std::log1p(std::exp(-2 * x)); -} - -inline int iround(double d) { return static_cast(std::round(d)); } - -inline double angle_abs_diff(double a, double b, double full=360.0) { - double d = std::fabs(a - b); - if (d > full) - d -= std::floor(d / full) * full; - return std::min(d, full - d); -} - -// similar to C++17 std::clamp() -template constexpr T clamp(T v, T lo, T hi) { - return std::min(std::max(v, lo), hi); -} - -template -struct Vec3_ { - Real x, y, z; - - Vec3_() : x(0), y(0), z(0) {} - Vec3_(Real x_, Real y_, Real z_) : x(x_), y(y_), z(z_) {} - explicit Vec3_(std::array h) : x(h[0]), y(h[1]), z(h[2]) {} - - Real& at(int i) { - switch (i) { - case 0: return x; - case 1: return y; - case 2: return z; - default: throw std::out_of_range("Vec3 index must be 0, 1 or 2."); - } - } - Real at(int i) const { return const_cast(this)->at(i); } - - Vec3_ operator-() const { return {-x, -y, -z}; } - Vec3_ operator-(const Vec3_& o) const { return {x-o.x, y-o.y, z-o.z}; } - Vec3_ operator+(const Vec3_& o) const { return {x+o.x, y+o.y, z+o.z}; } - Vec3_ operator*(Real d) const { return {x*d, y*d, z*d}; } - Vec3_ operator/(Real d) const { return *this * (1.0/d); } - Vec3_& operator-=(const Vec3_& o) { *this = *this - o; return *this; } - Vec3_& operator+=(const Vec3_& o) { *this = *this + o; return *this; } - Vec3_& operator*=(Real d) { *this = *this * d; return *this; } - Vec3_& operator/=(Real d) { return operator*=(1.0/d); } - - Vec3_ negated() const { return {-x, -y, -z}; } - Real dot(const Vec3_& o) const { return x*o.x + y*o.y + z*o.z; } - Vec3_ cross(const Vec3_& o) const { - return {y*o.z - z*o.y, z*o.x - x*o.z, x*o.y - y*o.x}; - } - Real length_sq() const { return x * x + y * y + z * z; } - Real length() const { return std::sqrt(length_sq()); } - Vec3_ changed_magnitude(Real m) const { return operator*(m / length()); } - Vec3_ normalized() const { return changed_magnitude(1.0); } - Real dist_sq(const Vec3_& o) const { return (*this - o).length_sq(); } - Real dist(const Vec3_& o) const { return std::sqrt(dist_sq(o)); } - Real cos_angle(const Vec3_& o) const { - return dot(o) / std::sqrt(length_sq() * o.length_sq()); - } - Real angle(const Vec3_& o) const { - return std::acos(clamp(cos_angle(o), -1., 1.)); - } - bool approx(const Vec3_& o, Real epsilon) const { - return std::fabs(x - o.x) <= epsilon && - std::fabs(y - o.y) <= epsilon && - std::fabs(z - o.z) <= epsilon; - } - bool has_nan() const { - return std::isnan(x) || std::isnan(y) || std::isnan(z); - } -}; - -using Vec3 = Vec3_; -using Vec3f = Vec3_; - -inline Vec3 operator*(double d, const Vec3& v) { return v * d; } - -/// Rodrigues' rotation formula: rotate vector v about given axis of rotation -/// (which must be a unit vector) by given angle (in radians). -inline Vec3 rotate_about_axis(const Vec3& v, const Vec3& axis, double theta) { - double sin_theta = std::sin(theta); - double cos_theta = std::cos(theta); - return v * cos_theta + axis.cross(v) * sin_theta + - axis * (axis.dot(v) * (1 - cos_theta)); -} - -struct Mat33 { - double a[3][3] = { {1.,0.,0.}, {0.,1.,0.}, {0.,0.,1.} }; - - // make it accessible with ".a" - typedef double row_t[3]; - const row_t& operator[](int i) const { return a[i]; } - row_t& operator[](int i) { return a[i]; } - - Mat33() = default; - explicit Mat33(double d) : a{{d, d, d}, {d, d, d}, {d, d, d}} {} - Mat33(double a1, double a2, double a3, double b1, double b2, double b3, - double c1, double c2, double c3) - : a{{a1, a2, a3}, {b1, b2, b3}, {c1, c2, c3}} {} - - static Mat33 from_columns(const Vec3& c1, const Vec3& c2, const Vec3& c3) { - return Mat33(c1.x, c2.x, c3.x, c1.y, c2.y, c3.y, c1.z, c2.z, c3.z); - } - - Vec3 row_copy(int i) const { - if (i < 0 || i > 2) - throw std::out_of_range("Mat33 row index must be 0, 1 or 2."); - return Vec3(a[i][0], a[i][1], a[i][2]); - } - - Vec3 column_copy(int i) const { - if (i < 0 || i > 2) - throw std::out_of_range("Mat33 column index must be 0, 1 or 2."); - return Vec3(a[0][i], a[1][i], a[2][i]); - } - - Mat33 operator+(const Mat33& b) const { - return Mat33(a[0][0] + b[0][0], a[0][1] + b[0][1], a[0][2] + b[0][2], - a[1][0] + b[1][0], a[1][1] + b[1][1], a[1][2] + b[1][2], - a[2][0] + b[2][0], a[2][1] + b[2][1], a[2][2] + b[2][2]); - } - Mat33 operator-(const Mat33& b) const { - return Mat33(a[0][0] - b[0][0], a[0][1] - b[0][1], a[0][2] - b[0][2], - a[1][0] - b[1][0], a[1][1] - b[1][1], a[1][2] - b[1][2], - a[2][0] - b[2][0], a[2][1] - b[2][1], a[2][2] - b[2][2]); - } - - Vec3 multiply(const Vec3& p) const { - return {a[0][0] * p.x + a[0][1] * p.y + a[0][2] * p.z, - a[1][0] * p.x + a[1][1] * p.y + a[1][2] * p.z, - a[2][0] * p.x + a[2][1] * p.y + a[2][2] * p.z}; - } - Vec3 left_multiply(const Vec3& p) const { - return {a[0][0] * p.x + a[1][0] * p.y + a[2][0] * p.z, - a[0][1] * p.x + a[1][1] * p.y + a[2][1] * p.z, - a[0][2] * p.x + a[1][2] * p.y + a[2][2] * p.z}; - } - // p has elements from the main diagonal of a 3x3 diagonal matrix - Mat33 multiply_by_diagonal(const Vec3& p) const { - return Mat33(a[0][0] * p.x, a[0][1] * p.y, a[0][2] * p.z, - a[1][0] * p.x, a[1][1] * p.y, a[1][2] * p.z, - a[2][0] * p.x, a[2][1] * p.y, a[2][2] * p.z); - } - Mat33 multiply(const Mat33& b) const { - Mat33 r; - for (int i = 0; i != 3; ++i) - for (int j = 0; j != 3; ++j) - r[i][j] = a[i][0] * b[0][j] + a[i][1] * b[1][j] + a[i][2] * b[2][j]; - return r; - } - Mat33 transpose() const { - return Mat33(a[0][0], a[1][0], a[2][0], - a[0][1], a[1][1], a[2][1], - a[0][2], a[1][2], a[2][2]); - } - double trace() const { return a[0][0] + a[1][1] + a[2][2]; } - - bool approx(const Mat33& other, double epsilon) const { - for (int i = 0; i < 3; ++i) - for (int j = 0; j < 3; ++j) - if (std::fabs(a[i][j] - other.a[i][j]) > epsilon) - return false; - return true; - } - bool has_nan() const { - for (int i = 0; i < 3; ++i) - for (int j = 0; j < 3; ++j) - if (std::isnan(a[i][j])) - return true; - return false; - } - - double determinant() const { - return a[0][0] * (a[1][1]*a[2][2] - a[2][1]*a[1][2]) + - a[0][1] * (a[1][2]*a[2][0] - a[2][2]*a[1][0]) + - a[0][2] * (a[1][0]*a[2][1] - a[2][0]*a[1][1]); - } - Mat33 inverse() const { - Mat33 inv; - double inv_det = 1.0 / determinant(); - inv[0][0] = inv_det * (a[1][1] * a[2][2] - a[2][1] * a[1][2]); - inv[0][1] = inv_det * (a[0][2] * a[2][1] - a[0][1] * a[2][2]); - inv[0][2] = inv_det * (a[0][1] * a[1][2] - a[0][2] * a[1][1]); - inv[1][0] = inv_det * (a[1][2] * a[2][0] - a[1][0] * a[2][2]); - inv[1][1] = inv_det * (a[0][0] * a[2][2] - a[0][2] * a[2][0]); - inv[1][2] = inv_det * (a[1][0] * a[0][2] - a[0][0] * a[1][2]); - inv[2][0] = inv_det * (a[1][0] * a[2][1] - a[2][0] * a[1][1]); - inv[2][1] = inv_det * (a[2][0] * a[0][1] - a[0][0] * a[2][1]); - inv[2][2] = inv_det * (a[0][0] * a[1][1] - a[1][0] * a[0][1]); - return inv; - } - bool is_identity() const { - return a[0][0] == 1 && a[0][1] == 0 && a[0][2] == 0 && - a[1][0] == 0 && a[1][1] == 1 && a[1][2] == 0 && - a[2][0] == 0 && a[2][1] == 0 && a[2][2] == 1; - } - - double column_dot(int i, int j) const { - return a[0][i] * a[0][j] + a[1][i] * a[1][j] + a[2][i] * a[2][j]; - } - - bool is_upper_triangular() const { - return a[1][0] == 0 && a[2][0] == 0 && a[2][1] == 0; - } -}; - -struct UpperTriangularMat33 { - double a11 = 0, a12 = 0, a13 = 0; - double a22 = 0, a23 = 0; - double a33 = 0; - UpperTriangularMat33() = default; - UpperTriangularMat33& operator=(const Mat33& m) { - if (m.is_upper_triangular()) { - a11 = m[0][0]; - a12 = m[0][1]; - a13 = m[0][2]; - a22 = m[1][1]; - a23 = m[1][2]; - a33 = m[2][2]; - } else { - a11 = a12 = a13 = a22 = a23 = a33 = NAN; - } - return *this; - } - Vec3 multiply(const Vec3& p) const { - return {a11 * p.x + a12 * p.y + a13 * p.z, - a22 * p.y + a23 * p.z, - a33 * p.z}; - } -}; - -// Symmetric matrix 3x3. Used primarily for an ADP tensor. -template struct SMat33 { - T u11, u22, u33, u12, u13, u23; - - // The PDB ANISOU record has the above order, but in a different context - // (such as metric tensor) the order of Voigt notation may be preferred. - std::array elements_pdb() const { return {{u11, u22, u33, u12, u13, u23}}; } - std::array elements_voigt() const { return {{u11, u22, u33, u23, u13, u12}}; } - - Mat33 as_mat33() const { - return Mat33(u11, u12, u13, u12, u22, u23, u13, u23, u33); - } - - // the arguments i and j must be in [0,2], i.e. 0, 1 or 2. - T& unchecked_ref(int i, int j) { - T* ptrs[9] = {&u11, &u12, &u13, &u12, &u22, &u23, &u13, &u23, &u33}; - return *ptrs[3 * i + j]; - } - - T trace() const { return u11 + u22 + u33; } - bool nonzero() const { return trace() != 0; } - - bool all_zero() const { - return u11 == 0 && u22 == 0 && u33 == 0 && u12 == 0 && u13 == 0 && u23 == 0; - } - - void scale(T s) const { - u11 *= s; u22 *= s; u33 *= s; u12 *= s; u13 *= s; u23 *= s; - } - - template - SMat33 scaled(Real s) const { - return SMat33{u11*s, u22*s, u33*s, u12*s, u13*s, u23*s}; - } - - // returns U + kI - SMat33 added_kI(T k) const { - return {u11+k, u22+k, u33+k, u12, u13, u23}; - } - - // returns squared norm r^T U r where U is this matrix and vector r is arg - template - auto r_u_r(const Vec3_& r) const -> decltype(r.x+u11) { - return r.x * r.x * u11 + r.y * r.y * u22 + r.z * r.z * u33 + - 2 * (r.x * r.y * u12 + r.x * r.z * u13 + r.y * r.z * u23); - } - double r_u_r(const std::array& h) const { - // it's faster to first convert ints to doubles (Vec3) - return r_u_r(Vec3(h)); - } - - Vec3 multiply(const Vec3& p) const { - return {u11 * p.x + u12 * p.y + u13 * p.z, - u12 * p.x + u22 * p.y + u23 * p.z, - u13 * p.x + u23 * p.y + u33 * p.z}; - } - - SMat33 operator-(const SMat33& o) const { - return {u11-o.u11, u22-o.u22, u33-o.u33, u12-o.u12, u13-o.u13, u23-o.u23}; - } - SMat33 operator+(const SMat33& o) const { - return {u11+o.u11, u22+o.u22, u33+o.u33, u12+o.u12, u13+o.u13, u23+o.u23}; - } - - // return M U M^T - template - SMat33 transformed_by(const Mat33& m) const { - // slightly faster than m.multiply(as_mat33()).multiply(m.transpose()); - auto elem = [&](int i, int j) { - return static_cast( - m[i][0] * (m[j][0] * u11 + m[j][1] * u12 + m[j][2] * u13) + - m[i][1] * (m[j][0] * u12 + m[j][1] * u22 + m[j][2] * u23) + - m[i][2] * (m[j][0] * u13 + m[j][1] * u23 + m[j][2] * u33)); - }; - return SMat33{elem(0, 0), elem(1, 1), elem(2, 2), - elem(0, 1), elem(0, 2), elem(1, 2)}; - } - - T determinant() const { - return u11 * (u22*u33 - u23*u23) + - u12 * (u23*u13 - u33*u12) + - u13 * (u12*u23 - u13*u22); - } - - SMat33 inverse_(T det) const { - SMat33 inv; - T inv_det = 1.0f / det; - inv.u11 = inv_det * (u22 * u33 - u23 * u23); - inv.u22 = inv_det * (u11 * u33 - u13 * u13); - inv.u33 = inv_det * (u11 * u22 - u12 * u12); - inv.u12 = inv_det * (u13 * u23 - u12 * u33); - inv.u13 = inv_det * (u12 * u23 - u13 * u22); - inv.u23 = inv_det * (u12 * u13 - u11 * u23); - return inv; - } - SMat33 inverse() const { - return inverse_(determinant()); - } - - /// Based on https://en.wikipedia.org/wiki/Eigenvalue_algorithm - /// To calculate both eigenvalues and eigenvectors use eig3.hpp - std::array calculate_eigenvalues() const { - double p1 = u12*u12 + u13*u13 + u23*u23; - if (p1 == 0) - return {{u11, u22, u33}}; - double q = (1./3.) * trace(); - SMat33 b{u11 - q, u22 - q, u33 - q, u12, u13, u23}; - double p2 = sq(b.u11) + sq(b.u22) + sq(b.u33) + 2 * p1; - double p = std::sqrt((1./6.) * p2); - double r = b.determinant() / ((1./3.) * p2 * p); - double phi = 0; - if (r <= -1) - phi = (1./3.) * pi(); - else if (r < 1) - phi = (1./3.) * std::acos(r); - double eig1 = q + 2 * p * std::cos(phi); - double eig3 = q + 2 * p * std::cos(phi + 2./3.*pi()); - return {{eig1, 3 * q - eig1 - eig3, eig3}}; - } -}; - -struct Transform { - Mat33 mat; - Vec3 vec; - - Transform inverse() const { - Mat33 minv = mat.inverse(); - return {minv, minv.multiply(vec).negated()}; - } - - Vec3 apply(const Vec3& x) const { return mat.multiply(x) + vec; } - - Transform combine(const Transform& b) const { - return {mat.multiply(b.mat), vec + mat.multiply(b.vec)}; - } - - bool is_identity() const { - return mat.is_identity() && vec.x == 0. && vec.y == 0. && vec.z == 0.; - } - void set_identity() { mat = Mat33(); vec = Vec3(); } - - bool has_nan() const { - return mat.has_nan() || vec.has_nan(); - } - - bool approx(const Transform& o, double epsilon) const { - return mat.approx(o.mat, epsilon) && vec.approx(o.vec, epsilon); - } -}; - -template -struct Box { - Pos minimum = Pos(INFINITY, INFINITY, INFINITY); - Pos maximum = Pos(-INFINITY, -INFINITY, -INFINITY); - void extend(const Pos& p) { - if (p.x < minimum.x) minimum.x = p.x; - if (p.y < minimum.y) minimum.y = p.y; - if (p.z < minimum.z) minimum.z = p.z; - if (p.x > maximum.x) maximum.x = p.x; - if (p.y > maximum.y) maximum.y = p.y; - if (p.z > maximum.z) maximum.z = p.z; - } - Pos get_size() const { return maximum - minimum; } - void add_margins(const Pos& p) { minimum -= p; maximum += p; } - void add_margin(double m) { add_margins(Pos(m, m, m)); } -}; - -// internally used functions -namespace impl { -// MSVC is missing isnan(IntegralType), so we define is_nan as a replacement -template -typename std::enable_if::value, bool>::type -is_nan(T) { return false; } -template -typename std::enable_if::value, bool>::type -is_nan(T a) { return std::isnan(a); } - -template -typename std::enable_if::value, bool>::type -is_same(T a, T b) { return a == b; } -template -typename std::enable_if::value, bool>::type -is_same(T a, T b) { return std::isnan(b) ? std::isnan(a) : a == b; } -} // namespace impl - -} // namespace gemmi -#endif diff --git a/symmetry/gemmi/symmetry.hpp b/symmetry/gemmi/symmetry.hpp deleted file mode 100644 index 203324fd..00000000 --- a/symmetry/gemmi/symmetry.hpp +++ /dev/null @@ -1,1044 +0,0 @@ -// Copyright 2017-2019 Global Phasing Ltd. -// -// Crystallographic Symmetry. Space Groups. Coordinate Triplets. -// -// If this is all that you need from Gemmi you can just copy this file, -// src/symmetry.cpp fail.hpp and LICENSE.txt to your project. - -#ifndef GEMMI_SYMMETRY_HPP_ -#define GEMMI_SYMMETRY_HPP_ - -#include // for strtol, abs -#include -#include // for sort, remove -#include // for hash -#include // for invalid_argument -#include -#include // for tie -#include - -#include "fail.hpp" // for fail, unreachable - -namespace gemmi { - -// OP - -// Op is a symmetry operation, or a change-of-basic transformation, -// or a different operation of similar kind. -// Both "rotation" matrix and translation vector are fractional, with DEN -// used as the denominator. -struct GEMMI_DLL Op { - static constexpr int DEN = 24; // 24 to handle 1/8 in change-of-basis - typedef std::array, 3> Rot; - typedef std::array Tran; - - Rot rot; - Tran tran; - char notation = ' '; - - bool is_hkl() const { return notation == 'h'; } - - Op as_hkl() const { - return is_hkl() ? *this : Op{rot, {0,0,0}, 'h'}; - } - Op as_xyz() const { - return is_hkl() ? Op{rot, {0,0,0}, 'x'} : *this; - } - - std::string triplet(char style=' ') const; - - Op inverse() const; - - Op::Tran wrapped_tran() const { - Op::Tran t = tran; - for (int i = 0; i != 3; ++i) { - if (t[i] >= DEN) // elements need to be in [0,DEN) - t[i] %= DEN; - else if (t[i] < 0) - t[i] = ((t[i] + 1) % DEN) + DEN - 1; - } - return t; - } - - // If the translation points outside of the unit cell, wrap it. - Op& wrap() { - tran = wrapped_tran(); - return *this; - } - - Op& translate(const Tran& a) { - for (int i = 0; i != 3; ++i) - tran[i] += a[i]; - return *this; - } - - Op translated(const Tran& a) const { return Op(*this).translate(a); } - - Op add_centering(const Tran& a) const { return translated(a).wrap(); } - - Rot negated_rot() const { - return {{{-rot[0][0], -rot[0][1], -rot[0][2]}, - {-rot[1][0], -rot[1][1], -rot[1][2]}, - {-rot[2][0], -rot[2][1], -rot[2][2]}}}; - } - - static Rot transpose(const Rot& rot) { - return {{{rot[0][0], rot[1][0], rot[2][0]}, - {rot[0][1], rot[1][1], rot[2][1]}, - {rot[0][2], rot[1][2], rot[2][2]}}}; - } - Rot transposed_rot() const { return transpose(rot); } - - // DEN^3 for rotation, -DEN^3 for rotoinversion - int det_rot() const { - return rot[0][0] * (rot[1][1] * rot[2][2] - rot[1][2] * rot[2][1]) - - rot[0][1] * (rot[1][0] * rot[2][2] - rot[1][2] * rot[2][0]) - + rot[0][2] * (rot[1][0] * rot[2][1] - rot[1][1] * rot[2][0]); - } - - // Rotation-part type based on Table 1 in RWGK, Acta Cryst. A55, 383 (1999) - int rot_type() const { - int det = det_rot(); - int tr_den = rot[0][0] + rot[1][1] + rot[2][2]; - int tr = tr_den / DEN; - const int table[] = {0, 0, 2, 3, 4, 6, 1}; - if (std::abs(det) == DEN * DEN * DEN && tr * DEN == tr_den && std::abs(tr) <= 3) - return det > 0 ? table[3 + tr] : -table[3 - tr]; - return 0; - } - - Op combine(const Op& b) const { - if (is_hkl() != b.is_hkl()) - fail("can't combine real- and reciprocal-space Op"); - Op r; - for (int i = 0; i != 3; ++i) { - r.tran[i] = tran[i] * Op::DEN; - for (int j = 0; j != 3; ++j) { - r.rot[i][j] = (rot[i][0] * b.rot[0][j] + - rot[i][1] * b.rot[1][j] + - rot[i][2] * b.rot[2][j]) / Op::DEN; - r.tran[i] += rot[i][j] * b.tran[j]; - } - r.tran[i] /= Op::DEN; - } - r.notation = notation; - return r; - } - - std::array apply_to_xyz(const std::array& xyz) const { - if (is_hkl()) - fail("can't apply reciprocal-space Op to xyz"); - std::array out; - for (int i = 0; i != 3; ++i) - out[i] = (rot[i][0] * xyz[0] + rot[i][1] * xyz[1] + rot[i][2] * xyz[2] + - tran[i]) / Op::DEN; - return out; - } - - // Miller is defined in the same way in namespace gemmi in unitcell.hpp - using Miller = std::array; - - Miller apply_to_hkl_without_division(const Miller& hkl) const { - Miller r; - for (int i = 0; i != 3; ++i) - r[i] = (rot[0][i] * hkl[0] + rot[1][i] * hkl[1] + rot[2][i] * hkl[2]); - return r; - } - static Miller divide_hkl_by_DEN(const Miller& hkl) { - return {{ hkl[0] / DEN, hkl[1] / DEN, hkl[2] / DEN }}; - } - Miller apply_to_hkl(const Miller& hkl) const { - return divide_hkl_by_DEN(apply_to_hkl_without_division(hkl)); - } - - double phase_shift(const Miller& hkl) const { - constexpr double mult = -2 * 3.1415926535897932384626433832795 / Op::DEN; - return mult * (hkl[0] * tran[0] + hkl[1] * tran[1] + hkl[2] * tran[2]); - } - - std::array, 4> int_seitz() const { - std::array, 4> t; - for (int i = 0; i < 3; ++i) - t[i] = { rot[i][0], rot[i][1], rot[i][2], tran[i] }; - t[3] = { 0, 0, 0, 1 }; - return t; - } - - std::array, 4> float_seitz() const { - std::array, 4> t; - double m = 1.0 / Op::DEN; - for (int i = 0; i < 3; ++i) - t[i] = { m * rot[i][0], m * rot[i][1], m * rot[i][2], m * tran[i] }; - t[3] = { 0., 0., 0., 1. }; - return t; - } - - static constexpr Op identity() { - return {{{{DEN,0,0}, {0,DEN,0}, {0,0,DEN}}}, {0,0,0}, ' '}; - } - static constexpr Op::Rot inversion_rot() { - return {{{-DEN,0,0}, {0,-DEN,0}, {0,0,-DEN}}}; - } - bool operator<(const Op& rhs) const { - return std::tie(rot, tran) < std::tie(rhs.rot, rhs.tran); - } -}; - -inline bool operator==(const Op& a, const Op& b) { - return a.rot == b.rot && a.tran == b.tran; -} -inline bool operator!=(const Op& a, const Op& b) { return !(a == b); } - -inline Op operator*(const Op& a, const Op& b) { return a.combine(b).wrap(); } -inline Op& operator*=(Op& a, const Op& b) { a = a * b; return a; } - -inline Op Op::inverse() const { - int detr = det_rot(); - if (detr == 0) - fail("cannot invert matrix: " + Op{rot, {0,0,0}, notation}.triplet()); - int d2 = Op::DEN * Op::DEN; - Op inv; - inv.rot[0][0] = d2 * (rot[1][1] * rot[2][2] - rot[2][1] * rot[1][2]) / detr; - inv.rot[0][1] = d2 * (rot[0][2] * rot[2][1] - rot[0][1] * rot[2][2]) / detr; - inv.rot[0][2] = d2 * (rot[0][1] * rot[1][2] - rot[0][2] * rot[1][1]) / detr; - inv.rot[1][0] = d2 * (rot[1][2] * rot[2][0] - rot[1][0] * rot[2][2]) / detr; - inv.rot[1][1] = d2 * (rot[0][0] * rot[2][2] - rot[0][2] * rot[2][0]) / detr; - inv.rot[1][2] = d2 * (rot[1][0] * rot[0][2] - rot[0][0] * rot[1][2]) / detr; - inv.rot[2][0] = d2 * (rot[1][0] * rot[2][1] - rot[2][0] * rot[1][1]) / detr; - inv.rot[2][1] = d2 * (rot[2][0] * rot[0][1] - rot[0][0] * rot[2][1]) / detr; - inv.rot[2][2] = d2 * (rot[0][0] * rot[1][1] - rot[1][0] * rot[0][1]) / detr; - for (int i = 0; i != 3; ++i) - inv.tran[i] = (-tran[0] * inv.rot[i][0] - -tran[1] * inv.rot[i][1] - -tran[2] * inv.rot[i][2]) / Op::DEN; - inv.notation = notation; - return inv; -} - -// inverse of Op::float_seitz() -GEMMI_DLL Op seitz_to_op(const std::array, 4>& t); - -// helper function for use in AsuBrick::str() -GEMMI_DLL void append_op_fraction(std::string& s, int w); - -// TRIPLET -> OP -GEMMI_DLL std::array parse_triplet_part(const std::string& s, char& notation, - double* decimal_fract=nullptr); -GEMMI_DLL Op parse_triplet(const std::string& s, char notation=' '); - -// GROUPS OF OPERATIONS - -// corresponds to Table A1.4.2.2 in ITfC vol.B (edition 2010) -inline std::vector centring_vectors(char centring_type) { - constexpr int h = Op::DEN / 2; - constexpr int t = Op::DEN / 3; - constexpr int d = 2 * t; - // note: find_centering() depends on the order of operations in vector - switch (centring_type & ~0x20) { - case 'P': return {{0, 0, 0}}; - case 'A': return {{0, 0, 0}, {0, h, h}}; - case 'B': return {{0, 0, 0}, {h, 0, h}}; - case 'C': return {{0, 0, 0}, {h, h, 0}}; - case 'I': return {{0, 0, 0}, {h, h, h}}; - case 'R': return {{0, 0, 0}, {d, t, t}, {t, d, d}}; - // hall_symbols.html has no H, ITfC 2010 has no S and T - case 'H': return {{0, 0, 0}, {d, t, 0}, {t, d, 0}}; - case 'S': return {{0, 0, 0}, {t, t, d}, {d, t, d}}; - case 'T': return {{0, 0, 0}, {t, d, t}, {d, t, d}}; - case 'F': return {{0, 0, 0}, {0, h, h}, {h, 0, h}, {h, h, 0}}; - default: fail("not a centring type: ", centring_type); - } -} - - -struct GroupOps { - std::vector sym_ops; - std::vector cen_ops; - - int order() const { return static_cast(sym_ops.size()*cen_ops.size()); } - - void add_missing_elements(); - void add_missing_elements_part2(const std::vector& gen, - size_t max_size, bool ignore_bad_gen); - - bool add_inversion() { - size_t init_size = sym_ops.size(); - sym_ops.reserve(2 * init_size); - for (const Op& op : sym_ops) { - Op::Rot neg = op.negated_rot(); - if (find_by_rotation(neg)) { - sym_ops.resize(init_size); - return false; - } - sym_ops.push_back({neg, op.tran, op.notation}); - } - return true; - } - - char find_centering() const { - if (cen_ops.size() == 1 && cen_ops[0] == Op::Tran{0, 0, 0}) - return 'P'; - std::vector trans = cen_ops; - std::sort(trans.begin(), trans.end()); - for (char c : {'A', 'B', 'C', 'I', 'F', 'R', 'H', 'S', 'T'}) { - std::vector c_vectors = centring_vectors(c); - if (c == 'R' || c == 'H') // these two are returned not sorted - std::swap(c_vectors[1], c_vectors[2]); - if (trans == c_vectors) - return c; - } - return 0; - } - - Op* find_by_rotation(const Op::Rot& r) { - for (Op& op : sym_ops) - if (op.rot == r) - return &op; - return nullptr; - } - - const Op* find_by_rotation(const Op::Rot& r) const { - return const_cast(this)->find_by_rotation(r); - } - - bool is_centrosymmetric() const { - return find_by_rotation(Op::inversion_rot()) != nullptr; - } - - bool is_reflection_centric(const Op::Miller& hkl) const { - Op::Miller mhkl = {{-Op::DEN * hkl[0], -Op::DEN * hkl[1], -Op::DEN * hkl[2]}}; - for (const Op& op : sym_ops) - if (op.apply_to_hkl_without_division(hkl) == mhkl) - return true; - return false; - } - - int epsilon_factor_without_centering(const Op::Miller& hkl) const { - Op::Miller denh = {{Op::DEN * hkl[0], Op::DEN * hkl[1], Op::DEN * hkl[2]}}; - int epsilon = 0; - for (const Op& op : sym_ops) - if (op.apply_to_hkl_without_division(hkl) == denh) - ++epsilon; - return epsilon; - } - int epsilon_factor(const Op::Miller& hkl) const { - return epsilon_factor_without_centering(hkl) * (int) cen_ops.size(); - } - - static bool has_phase_shift(const Op::Tran& c, const Op::Miller& hkl) { - return (hkl[0] * c[0] + hkl[1] * c[1] + hkl[2] * c[2]) % Op::DEN != 0; - } - - bool is_systematically_absent(const Op::Miller& hkl) const { - for (auto i = cen_ops.begin() + 1; i != cen_ops.end(); ++i) - if (has_phase_shift(*i, hkl)) - return true; - Op::Miller denh = {{Op::DEN * hkl[0], Op::DEN * hkl[1], Op::DEN * hkl[2]}}; - for (auto op = sym_ops.begin() + 1; op != sym_ops.end(); ++op) - if (op->apply_to_hkl_without_division(hkl) == denh) { - for (const Op::Tran& c : cen_ops) - if (has_phase_shift({{op->tran[0] + c[0], - op->tran[1] + c[1], - op->tran[2] + c[2]}}, hkl)) - return true; - } - return false; - } - - void change_basis_impl(const Op& cob, const Op& inv) { - if (sym_ops.empty() || cen_ops.empty()) - return; - - // Apply change-of-basis to sym_ops. - // Ignore the first item in sym_ops -- it's identity. - for (auto op = sym_ops.begin() + 1; op != sym_ops.end(); ++op) - *op = cob.combine(*op).combine(inv).wrap(); - - // The number of centering vectors may be different. - // As an ad-hoc method (not proved to be robust) add lattice points - // from a super-cell. - int idet = inv.det_rot() / (Op::DEN * Op::DEN * Op::DEN); - if (idet > 1) { - std::vector new_cen_ops; - new_cen_ops.reserve(cen_ops.size() * idet * idet * idet); - for (int i = 0; i < idet; ++i) - for (int j = 0; j < idet; ++j) - for (int k = 0; k < idet; ++k) - for (Op::Tran& cen : cen_ops) - new_cen_ops.push_back({i * Op::DEN + cen[0], - j * Op::DEN + cen[1], - k * Op::DEN + cen[2]}); - cen_ops.swap(new_cen_ops); - } - - // Apply change-of-basis to centering vectors - Op cvec = Op::identity(); - for (auto tr = cen_ops.begin() + 1; tr != cen_ops.end(); ++tr) { - cvec.tran = *tr; - *tr = cob.combine(cvec).combine(inv).wrap().tran; - } - - // Remove redundant centering vectors. - for (int i = static_cast(cen_ops.size()) - 1; i > 0; --i) - for (int j = i - 1; j >= 0; --j) - if (cen_ops[i] == cen_ops[j]) { - cen_ops.erase(cen_ops.begin() + i); - break; - } - } - - void change_basis_forward(const Op& cob) { change_basis_impl(cob, cob.inverse()); } - void change_basis_backward(const Op& inv) { change_basis_impl(inv.inverse(), inv); } - - std::vector all_ops_sorted() const { - std::vector ops; - ops.reserve(sym_ops.size() * cen_ops.size()); - for (const Op& so : sym_ops) - for (const Op::Tran& co : cen_ops) - ops.push_back(so.add_centering(co)); - std::sort(ops.begin(), ops.end()); - return ops; - } - - Op get_op(int n) const { - int n_cen = n / (int) sym_ops.size(); - int n_sym = n % (int) sym_ops.size(); - return sym_ops.at(n_sym).add_centering(cen_ops.at(n_cen)); - } - - bool is_same_as(const GroupOps& other) const { - if (cen_ops.size() != other.cen_ops.size() || - sym_ops.size() != other.sym_ops.size()) - return false; - return all_ops_sorted() == other.all_ops_sorted(); - } - - bool has_same_centring(const GroupOps& other) const { - if (cen_ops.size() != other.cen_ops.size()) - return false; - if (std::is_sorted(cen_ops.begin(), cen_ops.end()) && - std::is_sorted(other.cen_ops.begin(), other.cen_ops.end())) - return cen_ops == other.cen_ops; - std::vector v1 = cen_ops; - std::vector v2 = other.cen_ops; - std::sort(v1.begin(), v1.end()); - std::sort(v2.begin(), v2.end()); - return v1 == v2; - } - - bool has_same_rotations(const GroupOps& other) const { - if (sym_ops.size() != other.sym_ops.size()) - return false; - auto sorted_rotations = [](const GroupOps& g) { - std::vector r(g.sym_ops.size()); - for (size_t i = 0; i != r.size(); ++i) - r[i] = g.sym_ops[i].rot; - std::sort(r.begin(), r.end()); - return r; - }; - return sorted_rotations(*this) == sorted_rotations(other); - } - - // minimal multiplicity for real-space grid in each direction - // examples: 1,2,1 for P21, 1,1,6 for P61 - std::array find_grid_factors() const { - const int T = Op::DEN; - int r[3] = {T, T, T}; - for (Op op : *this) - for (int i = 0; i != 3; ++i) - if (op.tran[i] != 0 && op.tran[i] < r[i]) - r[i] = op.tran[i]; - return {T / r[0], T / r[1], T / r[2]}; - } - - bool are_directions_symmetry_related(int u, int v) const { - for (const Op& op : sym_ops) - if (op.rot[u][v] != 0) - return true; - return false; - } - - // remove translation part of sym_ops - GroupOps derive_symmorphic() const { - GroupOps r(*this); - for (Op& op : r.sym_ops) - op.tran[0] = op.tran[1] = op.tran[2] = 0; - return r; - } - - struct Iter { - const GroupOps& gops; - int n_sym, n_cen; - void operator++() { - if (++n_sym == (int) gops.sym_ops.size()) { - ++n_cen; - n_sym = 0; - } - } - Op operator*() const { - return gops.sym_ops.at(n_sym).translated(gops.cen_ops.at(n_cen)).wrap(); - } - bool operator==(const Iter& other) const { - return n_sym == other.n_sym && n_cen == other.n_cen; - } - bool operator!=(const Iter& other) const { return !(*this == other); } - }; - - Iter begin() const { return {*this, 0, 0}; } - Iter end() const { return {*this, 0, (int) cen_ops.size()}; } -}; - -inline void GroupOps::add_missing_elements() { - // We always keep identity as sym_ops[0]. - if (sym_ops.empty() || sym_ops[0] != Op::identity()) - fail("oops"); - if (sym_ops.size() == 1) - return; - constexpr size_t max_size = 1024; - // Below we assume that all centring vectors are already known (in cen_ops) - // so when checking for a new element we compare only the 3x3 matrix. - // Dimino's algorithm. https://physics.stackexchange.com/a/351400/95713 - std::vector gen(sym_ops.begin() + 1, sym_ops.end()); - sym_ops.resize(2); - const Op::Rot idrot = Op::identity().rot; - for (Op g = sym_ops[1] * sym_ops[1]; g.rot != idrot; g *= sym_ops[1]) { - sym_ops.push_back(g); - if (sym_ops.size() > max_size) - fail("Too many elements in the group - bad generators"); - } - // the rest is in separate function b/c it's reused in twin.hpp - add_missing_elements_part2(gen, max_size, false); -} - -inline void GroupOps::add_missing_elements_part2(const std::vector& gen, - size_t max_size, bool ignore_bad_gen) { - for (size_t i = 1; i < gen.size(); ++i) { - std::vector coset_repr(1, Op::identity()); - size_t init_size = sym_ops.size(); - for (;;) { - size_t len = coset_repr.size(); - for (size_t j = 0; j != len; ++j) { - for (size_t n = 0; n != i + 1; ++n) { - Op sg = gen[n] * coset_repr[j]; - if (find_by_rotation(sg.rot) == nullptr) { - sym_ops.push_back(sg); - for (size_t k = 1; k != init_size; ++k) - sym_ops.push_back(sg * sym_ops[k]); - coset_repr.push_back(sg); - } - } - } - if (len == coset_repr.size()) - break; - if (sym_ops.size() > max_size) { - if (!ignore_bad_gen) - fail("Too many elements in the group - bad generators"); - // ignore this generator and continue with the next one - sym_ops.resize(init_size); - break; - } - } - } -} - -// Create GroupOps from Ops by separating centering vectors -inline GroupOps split_centering_vectors(const std::vector& ops) { - const Op identity = Op::identity(); - GroupOps go; - go.sym_ops.push_back(identity); - for (const Op& op : ops) - if (Op* old_op = go.find_by_rotation(op.rot)) { - Op::Tran tran = op.wrapped_tran(); - if (op.rot == identity.rot) // pure shift - go.cen_ops.push_back(tran); - if (tran == identity.tran) // or rather |tran| < |old_op->tran| ? - old_op->tran = op.tran; - } else { - go.sym_ops.push_back(op); - } - return go; -} - -GEMMI_DLL GroupOps generators_from_hall(const char* hall); - -inline GroupOps symops_from_hall(const char* hall) { - GroupOps ops = generators_from_hall(hall); - ops.add_missing_elements(); - return ops; -} - -// CRYSTAL SYSTEMS, POINT GROUPS AND LAUE CLASSES - -enum class CrystalSystem : unsigned char { - Triclinic=0, Monoclinic, Orthorhombic, Tetragonal, Trigonal, Hexagonal, Cubic -}; - -inline const char* crystal_system_str(CrystalSystem system) { - static const char* names[7] = { - "triclinic", "monoclinic", "orthorhombic", "tetragonal", - "trigonal", "hexagonal", "cubic" - }; - return names[static_cast(system)]; -} - -enum class PointGroup : unsigned char { - C1=0, Ci, C2, Cs, C2h, D2, C2v, D2h, C4, S4, C4h, D4, C4v, D2d, D4h, C3, - C3i, D3, C3v, D3d, C6, C3h, C6h, D6, C6v, D3h, D6h, T, Th, O, Td, Oh -}; - -inline const char* point_group_hm(PointGroup pg) { - static const char hm_pointgroup_names[32][6] = { - "1", "-1", "2", "m", "2/m", "222", "mm2", "mmm", - "4", "-4", "4/m", "422", "4mm", "-42m", "4/mmm", "3", - "-3", "32", "3m", "-3m", "6", "-6", "6/m", "622", - "6mm", "-62m", "6/mmm", "23", "m-3", "432", "-43m", "m-3m", - }; - return hm_pointgroup_names[static_cast(pg)]; -} - -// http://reference.iucr.org/dictionary/Laue_class -enum class Laue : unsigned char { - L1=0, L2m, Lmmm, L4m, L4mmm, L3, L3m, L6m, L6mmm, Lm3, Lm3m -}; - -inline Laue pointgroup_to_laue(PointGroup pg) { - static const Laue laue[32] = { - Laue::L1, Laue::L1, - Laue::L2m, Laue::L2m, Laue::L2m, - Laue::Lmmm, Laue::Lmmm, Laue::Lmmm, - Laue::L4m, Laue::L4m, Laue::L4m, - Laue::L4mmm, Laue::L4mmm, Laue::L4mmm, Laue::L4mmm, - Laue::L3, Laue::L3, - Laue::L3m, Laue::L3m, Laue::L3m, - Laue::L6m, Laue::L6m, Laue::L6m, - Laue::L6mmm, Laue::L6mmm, Laue::L6mmm, Laue::L6mmm, - Laue::Lm3, Laue::Lm3, - Laue::Lm3m, Laue::Lm3m, Laue::Lm3m, - }; - return laue[static_cast(pg)]; -} - -// return centrosymmetric pointgroup from the Laue class -inline PointGroup laue_to_pointgroup(Laue laue) { - static const PointGroup pg[11] = { - PointGroup::Ci, PointGroup::C2h, PointGroup::D2h, PointGroup::C4h, - PointGroup::D4h, PointGroup::C3i, PointGroup::D3d, PointGroup::C6h, - PointGroup::D6h, PointGroup::Th, PointGroup::Oh - }; - return pg[static_cast(laue)]; -} - -inline const char* laue_class_str(Laue laue) { - return point_group_hm(laue_to_pointgroup(laue)); -} - -inline CrystalSystem crystal_system(Laue laue) { - static const CrystalSystem crystal_systems[11] = { - CrystalSystem::Triclinic, - CrystalSystem::Monoclinic, - CrystalSystem::Orthorhombic, - CrystalSystem::Tetragonal, CrystalSystem::Tetragonal, - CrystalSystem::Trigonal, CrystalSystem::Trigonal, - CrystalSystem::Hexagonal, CrystalSystem::Hexagonal, - CrystalSystem::Cubic, CrystalSystem::Cubic - }; - return crystal_systems[static_cast(laue)]; -} - -inline CrystalSystem crystal_system(PointGroup pg) { - return crystal_system(pointgroup_to_laue(pg)); -} - -inline unsigned char point_group_index_and_category(int space_group_number) { - // 0x20=Sohncke, 0x40=enantiomorphic, 0x80=symmorphic - enum : unsigned char { S=0x20, E=(0x20|0x40), Y=0x80, Z=(0x20|0x80) }; - static const unsigned char indices[230] = { - 0|Z, 1|Y, 2|Z, 2|S, 2|Z, 3|Y, 3, 3|Y, 3, 4|Y, // 1-10 - 4, 4|Y, 4, 4, 4, 5|Z, 5|S, 5|S, 5|S, 5|S, // 11-20 - 5|Z, 5|Z, 5|Z, 5|S, 6|Y, 6, 6, 6, 6, 6, // 21-30 - 6, 6, 6, 6, 6|Y, 6, 6, 6|Y, 6, 6, // 31-40 - 6, 6|Y, 6, 6|Y, 6, 6, 7|Y, 7, 7, 7, // 41-50 - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 51-60 - 7, 7, 7, 7, 7|Y, 7, 7, 7, 7|Y, 7, // 61-70 - 7|Y, 7, 7, 7, 8|Z, 8|E, 8|S, 8|E, 8|Z, 8|S, // 71-80 - 9|Y, 9|Y, 10|Y, 10, 10, 10, 10|Y, 10, 11|Z, 11|S, // 81-90 - 11|E, 11|E, 11|S, 11|S, 11|E, 11|E, 11|Z, 11|S, 12|Y, 12, // 91-100 - 12, 12, 12, 12, 12, 12, 12|Y, 12, 12, 12, // 101-110 - 13|Y, 13, 13, 13, 13|Y, 13, 13, 13, 13|Y, 13, // 111-120 - 13|Y, 13, 14|Y, 14, 14, 14, 14, 14, 14, 14, // 121-130 - 14, 14, 14, 14, 14, 14, 14, 14, 14|Y, 14, // 131-140 - 14, 14, 15|Z, 15|E, 15|E, 15|Z, 16|Y, 16|Y, 17|Z, 17|Z, // 141-150 - 17|E, 17|E, 17|E, 17|E, 17|Z, 18|Y, 18|Y, 18, 18, 18|Y, // 151-160 - 18, 19|Y, 19, 19|Y, 19, 19|Y, 19, 20|Z, 20|E, 20|E, // 161-170 - 20|E, 20|E, 20|S, 21|Y, 22|Y, 22, 23|Z, 23|E, 23|E, 23|E, // 171-180 - 23|E, 23|S, 24|Y, 24, 24, 24, 25|Y, 25, 25|Y, 25, // 181-190 - 26|Y, 26, 26, 26, 27|Z, 27|Z, 27|Z, 27|S, 27|S, 28|Y, // 191-200 - 28, 28|Y, 28, 28|Y, 28, 28, 29|Z, 29|S, 29|Z, 29|S, // 201-210 - 29|Z, 29|E, 29|E, 29|S, 30|Y, 30|Y, 30|Y, 30, 30, 30, // 211-220 - 31|Y, 31, 31, 31, 31|Y, 31, 31, 31, 31|Y, 31 // 221-230 - }; - return indices[space_group_number-1]; -} - -inline PointGroup point_group(int space_group_number) { - auto n = point_group_index_and_category(space_group_number); - return static_cast(n & 0x1f); -} - -// true for 65 Sohncke (non-enantiogenic) space groups -inline bool is_sohncke(int space_group_number) { - return (point_group_index_and_category(space_group_number) & 0x20) != 0; -} - -// true for 22 space groups (11 enantiomorphic pairs) -inline bool is_enantiomorphic(int space_group_number) { - return (point_group_index_and_category(space_group_number) & 0x40) != 0; -} - -// true for 73 space groups -inline bool is_symmorphic(int space_group_number) { - return (point_group_index_and_category(space_group_number) & 0x80) != 0; -} - -/// Inversion center of the Euclidean normalizer that is not at the origin of -/// reference settings. Returns (0,0,0) if absent. Based on tables in ch. 3.5 -/// of ITA (2016) doi:10.1107/97809553602060000933 (column "Inversion through -/// a centre at"). -inline Op::Tran nonzero_inversion_center(int space_group_number) { - constexpr int D = Op::DEN; - switch (space_group_number) { - case 43: return {D/8, D/8, 0}; - case 80: return {D/4, 0, 0}; - case 98: return {D/4, 0, D/8}; - case 109: return {D/4, 0, 0}; - case 110: return {D/4, 0, 0}; - case 122: return {D/4, 0, D/8}; - case 210: return {D/8, D/8, D/8}; - default: return {0, 0, 0}; - } -} - -GEMMI_DLL const char* get_basisop(int basisop_idx); - - -// Returns a change-of-basis operator for centred -> primitive transformation. -// The same operator as inverse of z2p_op in sgtbx. -inline Op::Rot centred_to_primitive(char centring_type) { - constexpr int D = Op::DEN; - constexpr int H = Op::DEN / 2; - constexpr int T = Op::DEN / 3; - switch (centring_type) { - case 'P': return {{{D,0,0}, {0,D,0}, {0,0,D}}}; - case 'A': return {{{-D,0,0}, {0,-H,H}, {0,H,H}}}; - case 'B': return {{{-H,0,H}, {0,-D,0}, {H,0,H}}}; - case 'C': return {{{H,H,0}, {H,-H,0}, {0,0,-D}}}; - case 'I': return {{{-H,H,H}, {H,-H,H}, {H,H,-H}}}; - case 'R': return {{{2*T,-T,-T}, {T,T,-2*T}, {T,T,T}}}; - case 'H': return {{{2*T,-T,0}, {T,T,0}, {0,0,D}}}; // not used normally - case 'F': return {{{0,H,H}, {H,0,H}, {H,H,0}}}; - default: fail("not a centring type: ", centring_type); - } -} - - -// LIST OF CRYSTALLOGRAPHIC SPACE GROUPS - -struct SpaceGroup { // typically 44 bytes - int number; - int ccp4; - char hm[11]; // Hermann-Mauguin (international) notation - char ext; - char qualifier[5]; - char hall[15]; - int basisop_idx; - - std::string xhm() const { - std::string ret = hm; - if (ext) { - ret += ':'; - ret += ext; - } - return ret; - } - - char centring_type() const { return ext == 'R' ? 'P' : hm[0]; } - - // (old) CCP4 spacegroup names start with H for hexagonal setting - char ccp4_lattice_type() const { return ext == 'H' ? 'H' : hm[0]; } - - // P 1 2 1 -> P2, but P 1 1 2 -> P112. R 3:H -> H3. - std::string short_name() const { - std::string s(hm); - size_t len = s.size(); - if (len > 6 && s[2] == '1' && s[len - 2] == ' ' && s[len - 1] == '1') - s = s[0] + s.substr(4, len - 4 - 2); - if (ext == 'H') - s[0] = 'H'; - s.erase(std::remove(s.begin(), s.end(), ' '), s.end()); - return s; - } - - // As explained in Phenix newsletter CCN_2011_01.pdf#page=12 - // the PDB uses own, non-standard symbols for rhombohedral space groups. - std::string pdb_name() const { - std::string s; - s += ccp4_lattice_type(); - s += hm+1; - return s; - } - - bool is_sohncke() const { return gemmi::is_sohncke(number); } - bool is_enantiomorphic() const { return gemmi::is_enantiomorphic(number); } - bool is_symmorphic() const { return gemmi::is_symmorphic(number); } - PointGroup point_group() const { return gemmi::point_group(number); } - const char* point_group_hm() const { - return gemmi::point_group_hm(point_group()); - } - Laue laue_class() const { return pointgroup_to_laue(point_group()); } - const char* laue_str() const { return laue_class_str(laue_class()); } - CrystalSystem crystal_system() const { - return gemmi::crystal_system(point_group()); - } - const char* crystal_system_str() const { - return gemmi::crystal_system_str(crystal_system()); - } - bool is_centrosymmetric() const { - return laue_to_pointgroup(laue_class()) == point_group(); - } - - /// returns 'a', 'b' or 'c' for monoclinic SG, '\0' otherwise - char monoclinic_unique_axis() const { - if (crystal_system() == CrystalSystem::Monoclinic) - return qualifier[qualifier[0] == '-' ? 1 : 0]; - return '\0'; - } - - const char* basisop_str() const { return get_basisop(basisop_idx); } - Op basisop() const { return parse_triplet(basisop_str()); } - bool is_reference_setting() const { return basisop_idx == 0; } - - Op centred_to_primitive() const { - return {gemmi::centred_to_primitive(centring_type()), {0,0,0}, 'x'}; - } - - /// Returns change-of-hand operator. Compatible with similar sgtbx function. - Op change_of_hand_op() const { - if (is_centrosymmetric()) - return Op::identity(); - Op::Tran t = nonzero_inversion_center(number); - Op op{Op::inversion_rot(), {2*t[0], 2*t[1], 2*t[2]}, 'x'}; - if (!is_reference_setting()) { - Op b = basisop(); - op = b.combine(op).combine(b.inverse()); - } - return op; - } - - GroupOps operations() const { return symops_from_hall(hall); } -}; - -struct SpaceGroupAltName { - char hm[11]; - char ext; - int pos; -}; - -struct GEMMI_DLL spacegroup_tables { - static const SpaceGroup main[564]; - static const SpaceGroupAltName alt_names[28]; - static const unsigned char ccp4_hkl_asu[230]; -}; - -inline const SpaceGroup* find_spacegroup_by_number(int ccp4) noexcept { - if (ccp4 == 0) - return &spacegroup_tables::main[0]; - for (const SpaceGroup& sg : spacegroup_tables::main) - if (sg.ccp4 == ccp4) - return &sg; - return nullptr; -} - -inline const SpaceGroup& get_spacegroup_by_number(int ccp4) { - const SpaceGroup* sg = find_spacegroup_by_number(ccp4); - if (sg == nullptr) - throw std::invalid_argument("Invalid space-group number: " - + std::to_string(ccp4)); - return *sg; -} - -inline const SpaceGroup& get_spacegroup_reference_setting(int number) { - for (const SpaceGroup& sg : spacegroup_tables::main) - if (sg.number == number && sg.is_reference_setting()) - return sg; - throw std::invalid_argument("Invalid space-group number: " - + std::to_string(number)); -} - -/// If angles alpha and gamma are provided, they are used to -/// distinguish hexagonal and rhombohedral settings (e.g. for "R 3"). -/// \param prefer can specify preferred H/R settings and 1/2 origin choice. -/// For example, prefer="2H" means the origin choice 2 and hexagonal -/// settings. The default is "1H". -GEMMI_DLL const SpaceGroup* find_spacegroup_by_name(std::string name, - double alpha=0., double gamma=0., - const char* prefer=nullptr); - -inline const SpaceGroup& get_spacegroup_by_name(const std::string& name) { - const SpaceGroup* sg = find_spacegroup_by_name(name); - if (sg == nullptr) - throw std::invalid_argument("Unknown space-group name: " + name); - return *sg; -} - -inline const SpaceGroup& get_spacegroup_p1() { - return spacegroup_tables::main[0]; -} - -inline const SpaceGroup* find_spacegroup_by_ops(const GroupOps& gops) { - char c = gops.find_centering(); - for (const SpaceGroup& sg : spacegroup_tables::main) - if ((c == sg.hall[0] || c == sg.hall[1]) && - gops.is_same_as(sg.operations())) - return &sg; - return nullptr; -} - -// Reciprocal space asu (asymmetric unit). -// The same 12 choices of ASU as in CCP4 symlib and cctbx. -struct ReciprocalAsu { - int idx; - Op::Rot rot{}; // value-initialized only to avoid -Wmaybe-uninitialized - bool is_ref; - - ReciprocalAsu(const SpaceGroup* sg, bool tnt=false) { - if (sg == nullptr) - fail("Missing space group"); - idx = spacegroup_tables::ccp4_hkl_asu[sg->number - 1]; - if (tnt) { - idx += 10; - is_ref = true; // TNT ASU is given wrt current (not standard) settings - } else { - is_ref = sg->is_reference_setting(); - if (!is_ref) - rot = sg->basisop().rot; - } - } - - bool is_in(const Op::Miller& hkl) const { - if (is_ref) - return is_in_reference_setting(hkl[0], hkl[1], hkl[2]); - Op::Miller r; - for (int i = 0; i != 3; ++i) - r[i] = rot[0][i] * hkl[0] + rot[1][i] * hkl[1] + rot[2][i] * hkl[2]; - return is_in_reference_setting(r[0], r[1], r[2]); - } - - bool is_in_reference_setting(int h, int k, int l) const { - switch (idx) { - // 0-9: CCP4 hkl asu, 10-19: TNT hkl asu - case 0: return l>0 || (l==0 && (h>0 || (h==0 && k>=0))); - case 1: return k>=0 && (l>0 || (l==0 && h>=0)); - case 12: // orthorhombic-D - case 2: return h>=0 && k>=0 && l>=0; - case 3: return l>=0 && ((h>=0 && k>0) || (h==0 && k==0)); - case 14: // tetragonal-D, hexagonal-D - case 4: return h>=k && k>=0 && l>=0; - case 5: return (h>=0 && k>0) || (h==0 && k==0 && l>=0); - case 16: // trigonal-D P312 - case 6: return h>=k && k>=0 && (k>0 || l>=0); - case 17: // trigonal-D P321 - case 7: return h>=k && k>=0 && (h>k || l>=0); - case 8: return h>=0 && ((l>=h && k>h) || (l==h && k==h)); - case 9: return k>=l && l>=h && h>=0; - case 10: return k>0 || (k==0 && (h>0 || (h==0 && l>=0))); // triclinic - case 11: return k>=0 && (h>0 || (h==0 && l>=0)); // monoclinic-B - case 13: return l>=0 && ((k>=0 && h>0) || (h==0 && k==0)); // tetragonal-C, hexagonal-C - case 15: return (k>=0 && h>0) || (h==0 && k==0 && l>=0); // trigonal-C - case 18: return k>=0 && l>=0 && ((h>k && h>l) || (h==k && h>=l)); // cubic-T - case 19: return h>=k && k>=l && l>=0; // cubic-O - } - unreachable(); - } - - const char* condition_str() const { - switch (idx) { - case 0: return "l>0 or (l=0 and (h>0 or (h=0 and k>=0)))"; - case 1: return "k>=0 and (l>0 or (l=0 and h>=0))"; - case 12: - case 2: return "h>=0 and k>=0 and l>=0"; - case 3: return "l>=0 and ((h>=0 and k>0) or (h=0 and k=0))"; - case 14: - case 4: return "h>=k and k>=0 and l>=0"; - case 5: return "(h>=0 and k>0) or (h=0 and k=0 and l>=0)"; - case 16: - case 6: return "h>=k and k>=0 and (k>0 or l>=0)"; - case 17: - case 7: return "h>=k and k>=0 and (h>k or l>=0)"; - case 8: return "h>=0 and ((l>=h and k>h) or (l=h and k=h))"; - case 9: return "k>=l and l>=h and h>=0"; - case 10: return "k>0 or (k==0 and (h>0 or (h=0 and l>=0)))"; - case 11: return "k>=0 and (h>0 or (h=0 and l>=0))"; - case 13: return "l>=0 and ((k>=0 and h>0) or (h=0 and k==0))"; - case 15: return "(k>=0 and h>0) or (h=0 and k==0 and l>=0)"; - case 18: return "k>=0 and l>=0 and ((h>k and h>l) or (h=k and h>=l))"; - case 19: return "h>=k and k>=l and l>=0"; - } - unreachable(); - } - - /// Returns hkl in asu and MTZ ISYM - 2*n-1 for reflections in the positive - /// asu (I+ of a Friedel pair), 2*n for reflections in the negative asu (I-). - std::pair to_asu(const Op::Miller& hkl, const std::vector& sym_ops) const { - int isym = 0; - for (const Op& op : sym_ops) { - ++isym; - Op::Miller new_hkl = op.apply_to_hkl_without_division(hkl); - if (is_in(new_hkl)) - return {Op::divide_hkl_by_DEN(new_hkl), isym}; - ++isym; - Op::Miller negated_new_hkl{{-new_hkl[0], -new_hkl[1], -new_hkl[2]}}; - if (is_in(negated_new_hkl)) - return {Op::divide_hkl_by_DEN(negated_new_hkl), isym}; - } - fail("Oops, maybe inconsistent GroupOps?"); - } - - std::pair to_asu(const Op::Miller& hkl, const GroupOps& gops) const { - return to_asu(hkl, gops.sym_ops); - } - - /// Similar to to_asu(), but the second returned value is sign: true for + or centric - std::pair to_asu_sign(const Op::Miller& hkl, const GroupOps& gops) const { - std::pair neg = {{0,0,0}, true}; - for (const Op& op : gops.sym_ops) { - Op::Miller new_hkl = op.apply_to_hkl_without_division(hkl); - if (is_in(new_hkl)) - return {Op::divide_hkl_by_DEN(new_hkl), true}; - Op::Miller negated_new_hkl{{-new_hkl[0], -new_hkl[1], -new_hkl[2]}}; - if (is_in(negated_new_hkl)) - // don't return it yet, because for centric reflection we prefer (+) - neg = {Op::divide_hkl_by_DEN(negated_new_hkl), false}; - } - if (neg.second) - fail("Oops, maybe inconsistent GroupOps?"); - return neg; - } -}; - -} // namespace gemmi - -namespace std { -template<> struct hash { - size_t operator()(const gemmi::Op& op) const { - size_t h = 0; - for (int i = 0; i != 3; ++i) - for (int j = 0; j != 3; ++j) - h = (h << 2) ^ (op.rot[i][j] + 1); - for (int i = 0; i != 3; ++i) - h = (h << 5) ^ op.tran[i]; - return h; - } -}; -} // namespace std - -#endif diff --git a/symmetry/gemmi/unitcell.hpp b/symmetry/gemmi/unitcell.hpp deleted file mode 100644 index 25bb8b46..00000000 --- a/symmetry/gemmi/unitcell.hpp +++ /dev/null @@ -1,618 +0,0 @@ -// Copyright 2017 Global Phasing Ltd. -// -// Unit cell. - -#ifndef GEMMI_UNITCELL_HPP_ -#define GEMMI_UNITCELL_HPP_ - -#include -#include // for cos, sin, sqrt, floor, NAN -#include -#include "math.hpp" -#include "fail.hpp" // for fail -#include "symmetry.hpp" // for Op, SpaceGroup - -namespace gemmi { - -inline Mat33 rot_as_mat33(const Op::Rot& rot) { - double mult = 1.0 / Op::DEN; - return Mat33(mult * rot[0][0], mult * rot[0][1], mult * rot[0][2], - mult * rot[1][0], mult * rot[1][1], mult * rot[1][2], - mult * rot[2][0], mult * rot[2][1], mult * rot[2][2]); -} -inline Mat33 rot_as_mat33(const Op& op) { return rot_as_mat33(op.rot); } - - -inline Vec3 tran_as_vec3(const Op& op) { - double mult = 1.0 / Op::DEN; - return Vec3(mult * op.tran[0], mult * op.tran[1], mult * op.tran[2]); -} - -/// Coordinates in Angstroms - orthogonal (Cartesian) coordinates. -struct Position : Vec3 { - using Vec3::Vec3; - Position() = default; - explicit Position(const Vec3& v) : Vec3(v) {} - Position operator-() const { return Position(Vec3::operator-()); } - Position operator-(const Position& o) const { return Position(Vec3::operator-(o)); } - Position operator+(const Position& o) const { return Position(Vec3::operator+(o)); } - Position operator*(double d) const { return Position(Vec3::operator*(d)); } - Position operator/(double d) const { return Position(Vec3::operator/(d)); } - Position& operator-=(const Position& o) { *this = *this - o; return *this; } - Position& operator+=(const Position& o) { *this = *this + o; return *this; } - Position& operator*=(double d) { *this = *this * d; return *this; } - Position& operator/=(double d) { return operator*=(1.0/d); } -}; - -inline Position operator*(double d, const Position& v) { return v * d; } - -/// Fractional coordinates. -struct Fractional : Vec3 { - using Vec3::Vec3; - Fractional() = default; - explicit Fractional(const Vec3& v) : Vec3(v) {} - Fractional operator-(const Fractional& o) const { - return Fractional(Vec3::operator-(o)); - } - Fractional operator+(const Fractional& o) const { - return Fractional(Vec3::operator+(o)); - } - Fractional wrap_to_unit() const { - return {x - std::floor(x), y - std::floor(y), z - std::floor(z)}; - } - Fractional wrap_to_zero() const { - return {x - std::round(x), y - std::round(y), z - std::round(z)}; - } - Fractional round() const { - return {std::round(x), std::round(y), std::round(z)}; - } - void move_toward_zero_by_one() { - if (x > 0.5) x -= 1.0; else if (x < -0.5) x += 1.0; - if (y > 0.5) y -= 1.0; else if (y < -0.5) y += 1.0; - if (z > 0.5) z -= 1.0; else if (z < -0.5) z += 1.0; - } -}; - -enum class Asu : unsigned char { Same, Different, Any }; - -/// Result of find_nearest_image -struct NearestImage { - double dist_sq; - int pbc_shift[3] = { 0, 0, 0 }; - int sym_idx = 0; - - double dist() const { return std::sqrt(dist_sq); } - bool same_asu() const { - return pbc_shift[0] == 0 && pbc_shift[1] == 0 && pbc_shift[2] == 0 && sym_idx == 0; - } - - /// Returns a string such as 1555 or 1_555. - std::string symmetry_code(bool underscore) const { - std::string s = std::to_string(sym_idx + 1); - if (underscore) - s += '_'; - if (unsigned(5 + pbc_shift[0]) <= 9 && - unsigned(5 + pbc_shift[1]) <= 9 && - unsigned(5 + pbc_shift[2]) <= 9) { // normal, quick path - for (int shift : pbc_shift) - s += char('5' + shift); - } else { // problematic, non-standard path - for (int i = 0; i < 3; ++i) { - if (i != 0 && underscore) - s += '_'; - s += std::to_string(5 + pbc_shift[i]); - } - } - return s; - } -}; - - -/// Like Transform, but apply() arg is Fractional (not Vec3 - for type safety). -struct FTransform : Transform { - FTransform() = default; - FTransform(const Transform& t) : Transform(t) {} - Fractional apply(const Fractional& p) const { - return Fractional(Transform::apply(p)); - } -}; - -/// Non-crystallographic symmetry operation (such as in the MTRIXn record) -struct NcsOp { - std::string id; - bool given; - Transform tr; - Position apply(const Position& p) const { return Position(tr.apply(p)); } -}; - -/// A synonym for convenient passing of hkl. -using Miller = std::array; - -struct MillerHash { - std::size_t operator()(const Miller& hkl) const noexcept { - return std::size_t((hkl[0] * 1024 + hkl[1]) * 1024 + hkl[2]); // NOLINT misplaced cast - } -}; - -struct UnitCellParameters { - double a = 1.0, b = 1.0, c = 1.0; - double alpha = 90.0, beta = 90.0, gamma = 90.0; - - UnitCellParameters() = default; - explicit UnitCellParameters(const double (&par)[6]) { - a = par[0]; b = par[1]; c = par[2]; alpha = par[3]; beta = par[4]; gamma = par[5]; - } - explicit UnitCellParameters(const std::array& par) { - a = par[0]; b = par[1]; c = par[2]; alpha = par[3]; beta = par[4]; gamma = par[5]; - } - - bool operator==(const UnitCellParameters& o) const { - return a == o.a && b == o.b && c == o.c && - alpha == o.alpha && beta == o.beta && gamma == o.gamma; - } - bool operator!=(const UnitCellParameters& o) const { return !operator==(o); } - - bool approx(const UnitCellParameters& o, double epsilon) const { - auto eq = [&](double x, double y) { return std::fabs(x - y) < epsilon; }; - return eq(a, o.a) && eq(b, o.b) && eq(c, o.c) && - eq(alpha, o.alpha) && eq(beta, o.beta) && eq(gamma, o.gamma); - } -}; - -/// Unit cell. Contains cell parameters as well as pre-calculated -/// orthogonalization and fractionalization matrices, volume, and more. -/// Contains symmetry operations (incl. NCS) if they were set from outside. -struct UnitCell : UnitCellParameters { - UnitCell() = default; - UnitCell(double a_, double b_, double c_, - double alpha_, double beta_, double gamma_) { - set(a_, b_, c_, alpha_, beta_, gamma_); - } - UnitCell(const std::array& v) { set_from_array(v); } - - Transform orth; - Transform frac; - double volume = 1.0; - /// reciprocal parameters a*, b*, c*, alpha*, beta*, gamma* - double ar = 1.0, br = 1.0, cr = 1.0; - double cos_alphar = 0.0, cos_betar = 0.0, cos_gammar = 0.0; - bool explicit_matrices = false; - short cs_count = 0; // crystallographic symmetries except identity - std::vector images; // symmetry operations - - // Non-crystalline (for example NMR) structures are supposed to use fake - // unit cell 1x1x1, but sometimes they don't. A number of non-crystalline - // entries in the PDB has incorrectly set unit cell or fract. matrix, - // that is why we check both. - bool is_crystal() const { return a != 1.0 && frac.mat[0][0] != 1.0; } - - // compare lengths using relative tolerance rel, angles using tolerance deg - bool is_similar(const UnitCell& o, double rel, double deg) const { - auto siml = [&](double x, double y) { return std::fabs(x - y) < rel * std::max(x, y); }; - auto sima = [&](double x, double y) { return std::fabs(x - y) < deg; }; - return siml(a, o.a) && siml(b, o.b) && siml(c, o.c) && - sima(alpha, o.alpha) && sima(beta, o.beta) && sima(gamma, o.gamma); - } - - void calculate_properties() { - // ensure exact values for right angles - double cos_alpha = alpha == 90. ? 0. : std::cos(rad(alpha)); - double cos_beta = beta == 90. ? 0. : std::cos(rad(beta)); - double cos_gamma = gamma == 90. ? 0. : std::cos(rad(gamma)); - double sin_alpha = alpha == 90. ? 1. : std::sin(rad(alpha)); - double sin_beta = beta == 90. ? 1. : std::sin(rad(beta)); - double sin_gamma = gamma == 90. ? 1. : std::sin(rad(gamma)); - if (sin_alpha == 0 || sin_beta == 0 || sin_gamma == 0) - fail("Impossible angle - N*180deg."); - - // volume - formula from Giacovazzo p.62 - volume = a * b * c * std::sqrt(1 - cos_alpha * cos_alpha - - cos_beta * cos_beta - cos_gamma * cos_gamma - + 2 * cos_alpha * cos_beta * cos_gamma); - - // reciprocal parameters a*, b*, ... (Giacovazzo, p. 64) - ar = b * c * sin_alpha / volume; - br = a * c * sin_beta / volume; - cr = a * b * sin_gamma / volume; - double cos_alphar_sin_beta = (cos_beta * cos_gamma - cos_alpha) / sin_gamma; - cos_alphar = cos_alphar_sin_beta / sin_beta; - //cos_alphar = (cos_beta * cos_gamma - cos_alpha) / (sin_beta * sin_gamma); - cos_betar = (cos_alpha * cos_gamma - cos_beta) / (sin_alpha * sin_gamma); - cos_gammar = (cos_alpha * cos_beta - cos_gamma) / (sin_alpha * sin_beta); - - if (explicit_matrices) - return; - - // The orthogonalization matrix we use is described in ITfC B p.262: - // "An alternative mode of orthogonalization, used by the Protein - // Data Bank and most programs, is to align the a1 axis of the unit - // cell with the Cartesian X_1 axis, and to align the a*_3 axis with the - // Cartesian X_3 axis." - double sin_alphar = std::sqrt(1.0 - cos_alphar * cos_alphar); - orth.mat = {a, b * cos_gamma, c * cos_beta, - 0., b * sin_gamma, -c * cos_alphar_sin_beta, - 0., 0. , c * sin_beta * sin_alphar}; - orth.vec = {0., 0., 0.}; - - double o12 = -cos_gamma / (sin_gamma * a); - double o13 = -(cos_gamma * cos_alphar_sin_beta + cos_beta * sin_gamma) - / (sin_alphar * sin_beta * sin_gamma * a); - double o23 = cos_alphar / (sin_alphar * sin_gamma * b); - frac.mat = {1 / a, o12, o13, - 0., 1 / orth.mat[1][1], o23, - 0., 0., 1 / orth.mat[2][2]}; - frac.vec = {0., 0., 0.}; - } - - double cos_alpha() const { return alpha == 90. ? 0. : std::cos(rad(alpha)); } - - /// B matrix following convention from Busing & Levy (1967), not from cctbx. - /// Cf. https://dials.github.io/documentation/conventions.html - Mat33 calculate_matrix_B() const { - double sin_gammar = std::sqrt(1 - cos_gammar * cos_gammar); - double sin_betar = std::sqrt(1 - cos_betar * cos_betar); - return Mat33(ar, br * cos_gammar, cr * cos_betar, - 0., br * sin_gammar, -cr * sin_betar * cos_alpha(), - 0., 0., 1.0 / c); - } - - /// The equivalent isotropic displacement factor. - /// Based on Fischer & Tillmanns (1988). Acta Cryst. C44, 775-776. - /// The argument is a non-orthogonalized tensor U, - /// i.e. the one from SmallStructure::Site, but not from Atom. - double calculate_u_eq(const SMat33& ani) const { - double aar = a * ar; - double bbr = b * br; - double ccr = c * cr; - // it could be optimized using orth.mat[0][1] and orth.mat[0][2] - double cos_beta = beta == 90. ? 0. : std::cos(rad(beta)); - double cos_gamma = gamma == 90. ? 0. : std::cos(rad(gamma)); - return 1/3. * (sq(aar) * ani.u11 + sq(bbr) * ani.u22 + sq(ccr) * ani.u33 + - 2 * (aar * bbr * cos_gamma * ani.u12 + - aar * ccr * cos_beta * ani.u13 + - bbr * ccr * cos_alpha() * ani.u23)); - } - - void set_matrices_from_fract(const Transform& f) { - // mmCIF _atom_sites.fract_transf_* and PDB SCALEn records usually contain - // fewer significant digits than the unit cell parameters, and sometimes are - // just wrong. Use them only if we seem to have non-standard crystal frame. - if (f.mat.approx(frac.mat, 1e-4) && f.vec.approx(frac.vec, 1e-6)) - return; - // The SCALE record is sometimes incorrect. Here we only catch cases - // when CRYST1 is set as for non-crystal and SCALE is very suspicious. - if (frac.mat[0][0] == 1.0 && (f.mat[0][0] == 0.0 || f.mat[0][0] > 1.0)) - return; - frac = f; - orth = f.inverse(); - explicit_matrices = true; - } - - void set(double a_, double b_, double c_, - double alpha_, double beta_, double gamma_) { - if (gamma_ == 0.0) // ignore empty/partial CRYST1 (example: 3iyp) - return; - a = a_; - b = b_; - c = c_; - alpha = alpha_; - beta = beta_; - gamma = gamma_; - calculate_properties(); - } - - void set_from_parameters(const UnitCellParameters& p) { - set(p.a, p.b, p.c, p.alpha, p.beta, p.gamma); - } - - void set_from_array(const std::array& v) { set(v[0], v[1], v[2], v[3], v[4], v[5]); } - - void set_from_vectors(const Vec3& va, const Vec3& vb, const Vec3& vc) { - set(va.length(), vb.length(), vc.length(), - deg(vb.angle(vc)), deg(vc.angle(va)), deg(va.angle(vb))); - } - - UnitCell changed_basis_backward(const Op& op, bool set_images) { - Mat33 mat = orth.mat.multiply(rot_as_mat33(op)); - UnitCell new_cell; - new_cell.set_from_vectors(mat.column_copy(0), - mat.column_copy(1), - mat.column_copy(2)); - if (set_images && !images.empty()) { - new_cell.images.reserve(images.size()); - Transform tr{rot_as_mat33(op), tran_as_vec3(op)}; - Transform tr_inv = tr.inverse(); - for (const FTransform& im : images) - new_cell.images.push_back(tr.combine(im).combine(tr_inv)); - } - return new_cell; - } - - UnitCell changed_basis_forward(const Op& op, bool set_images) { - return changed_basis_backward(op.inverse(), set_images); - } - - bool is_compatible_with_groupops(const GroupOps& gops, double eps=1e-3) const { - std::array metric = metric_tensor().elements_voigt(); - for (const Op& op : gops.sym_ops) { - Mat33 m = orth.mat.multiply(rot_as_mat33(op)); - std::array other = {{ - m.column_dot(0,0), m.column_dot(1,1), m.column_dot(2,2), - m.column_dot(1,2), m.column_dot(0,2), m.column_dot(0,1) - }}; - for (int i = 0; i < 6; ++i) - if (std::fabs(metric[i] - other[i]) > eps) - return false; - } - return true; - } - - bool is_compatible_with_spacegroup(const SpaceGroup* sg, double eps=1e-3) const { - return sg ? is_compatible_with_groupops(sg->operations(), eps) : false; - } - - void set_cell_images_from_groupops(const GroupOps& group_ops) { - images.clear(); - cs_count = (short) group_ops.order() - 1; - images.reserve(cs_count); - for (Op op : group_ops) - if (op != Op::identity()) - images.push_back(Transform{rot_as_mat33(op), tran_as_vec3(op)}); - } - - void set_cell_images_from_spacegroup(const SpaceGroup* sg) { - if (sg) { - set_cell_images_from_groupops(sg->operations()); - } else { - images.clear(); - cs_count = 0; - } - } - - void add_ncs_images_to_cs_images(const std::vector& ncs) { - assert(cs_count == (short) images.size()); - for (const NcsOp& ncs_op : ncs) - if (!ncs_op.given) { - // We need it to operates on fractional, not orthogonal coordinates. - FTransform f = frac.combine(ncs_op.tr.combine(orth)); - images.push_back(f); - for (int i = 0; i < cs_count; ++i) - images.push_back(images[i].combine(f)); - } - } - - std::vector get_ncs_transforms() const { - std::vector ncs; - for (size_t n = cs_count; n < images.size(); n += cs_count + 1) - ncs.push_back(images[n]); - return ncs; - } - - Position orthogonalize(const Fractional& f) const { - return Position(orth.apply(f)); - } - Fractional fractionalize(const Position& o) const { - return Fractional(frac.apply(o)); - } - - /// orthogonalize_difference(a-b) == orthogonalize(a) - orthogonalize(b) - // The shift (fract.vec) can be non-zero in non-standard settings, - // just do not apply it here. - Position orthogonalize_difference(const Fractional& delta) const { - return Position(orth.mat.multiply(delta)); - } - /// the inverse of orthogonalize_difference - Fractional fractionalize_difference(const Position& delta) const { - return Fractional(frac.mat.multiply(delta)); - } - - /// Returns box containing fractional box (a cuboid in fractional - /// coordinates can be a parallelepiped in Cartesian coordinates). - Box orthogonalize_box(const Box& f) const { - Box r; - r.minimum = orthogonalize(f.minimum); - r.maximum = orthogonalize(f.maximum); - if (alpha != 90. || beta == 90. || gamma == 90.) { - r.extend(orthogonalize({f.minimum.x, f.minimum.y, f.maximum.z})); - r.extend(orthogonalize({f.minimum.x, f.maximum.y, f.maximum.z})); - r.extend(orthogonalize({f.minimum.x, f.maximum.y, f.minimum.z})); - r.extend(orthogonalize({f.maximum.x, f.maximum.y, f.minimum.z})); - r.extend(orthogonalize({f.maximum.x, f.minimum.y, f.minimum.z})); - r.extend(orthogonalize({f.maximum.x, f.minimum.y, f.maximum.z})); - } - return r; - } - - Transform orthogonalize_transform(const FTransform& ftr) const { - return orth.combine(ftr.combine(frac)); - } - Transform op_as_transform(const Op& op) const { - return orthogonalize_transform(Transform{rot_as_mat33(op), tran_as_vec3(op)}); - } - - double distance_sq(const Fractional& pos1, const Fractional& pos2) const { - Fractional diff = (pos1 - pos2).wrap_to_zero(); - return orthogonalize_difference(diff).length_sq(); - } - double distance_sq(const Position& pos1, const Position& pos2) const { - return distance_sq(fractionalize(pos1), fractionalize(pos2)); - } - - double volume_per_image() const { - return is_crystal() ? volume / (1 + images.size()) : NAN; - } - - // Helper function. PBC = periodic boundary conditions. - bool search_pbc_images(Fractional&& diff, NearestImage& image) const { - int neg_shift[3] = {0, 0, 0}; - if (is_crystal()) { - for (int j = 0; j < 3; ++j) - neg_shift[j] = iround(diff.at(j)); - diff.x -= neg_shift[0]; - diff.y -= neg_shift[1]; - diff.z -= neg_shift[2]; - } - Position orth_diff = orthogonalize_difference(diff); - double dsq = orth_diff.length_sq(); - if (dsq < image.dist_sq) { - image.dist_sq = dsq; - for (int j = 0; j < 3; ++j) - image.pbc_shift[j] = -neg_shift[j]; - return true; - } - return false; - } - - NearestImage find_nearest_image(const Position& ref, const Position& pos, Asu asu) const { - NearestImage image; - if (asu == Asu::Different) - image.dist_sq = INFINITY; - else - image.dist_sq = ref.dist_sq(pos); - if (asu == Asu::Same) - return image; - Fractional fpos = fractionalize(pos); - Fractional fref = fractionalize(ref); - search_pbc_images(fpos - fref, image); - if (asu == Asu::Different && - image.pbc_shift[0] == 0 && image.pbc_shift[1] == 0 && image.pbc_shift[2] == 0) - image.dist_sq = INFINITY; - for (int n = 0; n != static_cast(images.size()); ++n) - if (search_pbc_images(images[n].apply(fpos) - fref, image)) - image.sym_idx = n + 1; - return image; - } - - void apply_transform(Fractional& fpos, int image_idx, bool inverse) const { - if (image_idx > 0) { - const FTransform& t = images.at(image_idx - 1); - if (!inverse) - fpos = t.apply(fpos); - else - fpos = FTransform(t.inverse()).apply(fpos); - } - } - - NearestImage find_nearest_pbc_image(const Fractional& fref, Fractional fpos, - int image_idx=0) const { - NearestImage sym_image; - sym_image.dist_sq = INFINITY; - sym_image.sym_idx = image_idx; - apply_transform(fpos, image_idx, false); - search_pbc_images(fpos - fref, sym_image); - return sym_image; - } - NearestImage find_nearest_pbc_image(const Position& ref, const Position& pos, - int image_idx=0) const { - return find_nearest_pbc_image(fractionalize(ref), fractionalize(pos), image_idx); - } - - std::vector find_nearest_pbc_images(const Fractional& fref, double dist, - const Fractional& fpos, int image_idx) const { - std::vector results; - NearestImage im = find_nearest_pbc_image(fref, fpos, image_idx); - int sh[3] = {im.pbc_shift[0], im.pbc_shift[1], im.pbc_shift[2]}; - for (im.pbc_shift[0] = sh[0]-1; im.pbc_shift[0] <= sh[0]+1; ++im.pbc_shift[0]) - for (im.pbc_shift[1] = sh[1]-1; im.pbc_shift[1] <= sh[1]+1; ++im.pbc_shift[1]) - for (im.pbc_shift[2] = sh[2]-1; im.pbc_shift[2] <= sh[2]+1; ++im.pbc_shift[2]) { - Fractional shift(im.pbc_shift[0], im.pbc_shift[1], im.pbc_shift[2]); - im.dist_sq = orthogonalize_difference(fpos - fref + shift).length_sq(); - if (im.dist_sq <= sq(dist)) - results.push_back(im); - } - return results; - } - - Position orthogonalize_in_pbc(const Position& ref, - const Fractional& fpos) const { - Fractional fref = fractionalize(ref); - return orthogonalize_difference((fpos - fref).wrap_to_zero()) + ref; - } - - Position find_nearest_pbc_position(const Position& ref, const Position& pos, - int image_idx, bool inverse=false) const { - Fractional fpos = fractionalize(pos); - apply_transform(fpos, image_idx, inverse); - return orthogonalize_in_pbc(ref, fpos); - } - - // apply NearestImage symmetry to fpos - Fractional fract_image(const NearestImage& im, Fractional fpos) { - apply_transform(fpos, im.sym_idx, false); - return fpos + Fractional(im.pbc_shift[0], im.pbc_shift[1], im.pbc_shift[2]); - } - - /// Counts nearby symmetry mates (0 = none, 3 = 4-fold axis, etc). - /// \pre is_crystal() - int is_special_position(const Fractional& fpos, double max_dist) const { - const double max_dist_sq = max_dist * max_dist; - int n = 0; - for (const FTransform& image : images) { - Fractional fdiff = (image.apply(fpos) - fpos).wrap_to_zero(); - if (orthogonalize_difference(fdiff).length_sq() < max_dist_sq) - ++n; - } - return n; - } - int is_special_position(const Position& pos, double max_dist = 0.8) const { - return is_special_position(fractionalize(pos), max_dist); - } - - /// Calculate 1/d^2 for specified hkl reflection. - /// 1/d^2 = (2*sin(theta)/lambda)^2 - // The indices are integers, but they may be stored as floating-point - // numbers (MTZ format) so we use double to avoid conversions. - double calculate_1_d2_double(double h, double k, double l) const { - double arh = ar * h; - double brk = br * k; - double crl = cr * l; - return arh * arh + brk * brk + crl * crl + 2 * (arh * brk * cos_gammar + - arh * crl * cos_betar + - brk * crl * cos_alphar); - } - double calculate_1_d2(const Miller& hkl) const { - return calculate_1_d2_double(hkl[0], hkl[1], hkl[2]); - } - - /// Calculate d-spacing. - /// d = lambda/(2*sin(theta)) - double calculate_d(const Miller& hkl) const { - return 1.0 / std::sqrt(calculate_1_d2(hkl)); - } - - /// Calculate (sin(theta)/lambda)^2 = d*^2/4 - double calculate_stol_sq(const Miller& hkl) const { - return 0.25 * calculate_1_d2(hkl); - } - - /// https://dictionary.iucr.org/Metric_tensor - SMat33 metric_tensor() const { - // the order in SMat33 is ... m12 m13 m23 -> a.a b.b c.c a.b a.c b.c - return {a*a, b*b, c*c, a*orth.mat[0][1], a*orth.mat[0][2], b*c*cos_alpha()}; - } - - SMat33 reciprocal_metric_tensor() const { - return {ar*ar, br*br, cr*cr, ar*br*cos_gammar, ar*cr*cos_betar, br*cr*cos_alphar}; - } - - /// Returns reciprocal unit cell. - UnitCell reciprocal() const { - auto acosd = [](double x) { return deg(std::acos(x)); }; - return UnitCell(ar, br, cr, - acosd(cos_alphar), acosd(cos_betar), acosd(cos_gammar)); - } - - Miller get_hkl_limits(double dmin) const { - return {{int(a / dmin), int(b / dmin), int(c / dmin)}}; - } - - Mat33 primitive_orth_matrix(char centring_type) const { - if (centring_type == 'P') - return orth.mat; - Mat33 c2p = rot_as_mat33(centred_to_primitive(centring_type)); - return orth.mat.multiply(c2p); - } -}; - -} // namespace gemmi -#endif diff --git a/symmetry/symmetry.cpp b/symmetry/symmetry.cpp deleted file mode 100644 index 05b5d1c0..00000000 --- a/symmetry/symmetry.cpp +++ /dev/null @@ -1,1215 +0,0 @@ -// Copyright Global Phasing Ltd. - -#include -#include // for fabs -#include // for memchr, strchr - -static const char* skip_space(const char* p) { - if (p) - while (*p == ' ' || *p == '\t' || *p == '_') // '_' can be used as space - ++p; - return p; -} - -namespace gemmi { - -// TRIPLET -> OP - -// param only can be set to 'h', 'x', 'a' or ' ' (any), to limit accepted characters. -// decimal_fract is useful only for non-crystallographic ops (such as x+0.12) -std::array parse_triplet_part(const std::string& s, char& notation, double* decimal_fract) { - constexpr char a_ = 'a' & ~3; - constexpr char h_ = 'h' & ~3; - constexpr char x_ = 'x' & ~3; - static const signed char letter2index[] = - // a b c d e f g h i j k l - { a_+0, a_+1, a_+2, 0, 0, 0, 0, h_+0, 0, 0, h_+1, h_+2, - // m n o p q r s t u v w x y z - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, x_+0, x_+1, x_+2 }; - auto interpret_letter = [&](char c) { - size_t idx = size_t((c | 0x20) - 'a'); // "|0x20" = to lower - if (idx >= sizeof(letter2index) || letter2index[idx] == 0) - fail("unexpected character '", c, "' in: ", s); - auto value = letter2index[idx]; - int detected_notation = value & ~3; - if ((notation | 0x20) == ' ') - notation = detected_notation; - else if (((notation | 0x20) & ~3) != detected_notation) - fail("Unexpected notation (letter set) in: ", s); - return value & 3; - }; - - std::array r = { 0, 0, 0, 0 }; - int num = Op::DEN; - const char* c = s.c_str(); - while (*(c = skip_space(c))) { - if (*c == '+' || *c == '-') { - num = (*c == '+' ? Op::DEN : -Op::DEN); - c = skip_space(++c); - } - if (num == 0) - fail("wrong or unsupported triplet format: " + s); - int r_idx; - int den = 1; - double fract = 0; - if ((*c >= '0' && *c <= '9') || *c == '.') { - // syntax examples in this branch: "1", "-1/2", "+2*x", "1/2 * b" - char* endptr; - int n = std::strtol(c, &endptr, 10); - // some COD CIFs have decimal fractions ("-x+0.25", ".5+Y", "1.25000-y") - if (*endptr == '.') { - // avoiding strtod() etc which is locale-dependent - fract = n; - for (double denom = 0.1; *++endptr >= '0' && *endptr <= '9'; denom *= 0.1) - fract += int(*endptr - '0') * denom; - double rounded = std::round(fract * num); - if (!decimal_fract) { - if (std::fabs(rounded - fract * num) > 0.05) - fail("unexpected number in a symmetry triplet part: " + s); - num = int(rounded); - } - } else { - num *= n; - } - if (*endptr == '/') - den = std::strtol(endptr + 1, &endptr, 10); - if (*endptr == '*') { - c = skip_space(endptr + 1); - r_idx = interpret_letter(*c); - ++c; - } else { - c = endptr; - r_idx = 3; - } - } else { - // syntax examples in this branch: "x", "+a", "-k/3" - r_idx = interpret_letter(*c); - c = skip_space(++c); - if (*c == '/') { - char* endptr; - den = std::strtol(c + 1, &endptr, 10); - c = endptr; - } - } - if (den != 1) { - if (den <= 0 || Op::DEN % den != 0 || fract != 0) - fail("Wrong denominator " + std::to_string(den) + " in: " + s); - num /= den; - } - r[r_idx] += num; - if (decimal_fract) - decimal_fract[r_idx] = num > 0 ? fract : -fract; - num = 0; - } - if (num != 0) - fail("trailing sign in: " + s); - return r; -} - -Op parse_triplet(const std::string& s, char notation) { - if (std::count(s.begin(), s.end(), ',') != 2) - fail("expected exactly two commas in triplet"); - size_t comma1 = s.find(','); - size_t comma2 = s.find(',', comma1 + 1); - char save_notation = notation; - notation = (notation | 0x20) & ~3; - if (notation != 'x' && notation != 'h' && notation != '`' && notation != ' ') // '`' == a' & ~3 - fail("parse_triplet(): unexpected notation='", save_notation, "'"); - auto a = parse_triplet_part(s.substr(0, comma1), notation); - auto b = parse_triplet_part(s.substr(comma1 + 1, comma2 - (comma1 + 1)), notation); - auto c = parse_triplet_part(s.substr(comma2 + 1), notation); - Op::Rot rot = {{{a[0], a[1], a[2]}, {b[0], b[1], b[2]}, {c[0], c[1], c[2]}}}; - Op::Tran tran = {a[3], b[3], c[3]}; - if (notation == 'h') { - if (tran != Op::Tran{0, 0, 0}) - fail("parse_triplet(): reciprocal-space Op cannot have translation: ", s); - rot = Op::transpose(rot); - } - return { rot, tran, notation }; -} - - -// OP -> TRIPLET - -namespace { - -// much faster than s += std::to_string(n) for n in 0 ... 99 -void append_small_number(std::string& s, int n) { - if (n < 0 || n >= 100) { - s += std::to_string(n); - } else if (n < 10) { - s += char('0' + n); - } else { // 10 ... 99 - int tens = n / 10; - s += char('0' + tens); - s += char('0' + n - 10 * tens); - } -} - -void append_sign_of(std::string& s, int n) { - if (n < 0) - s += '-'; - else if (!s.empty()) - s += '+'; -} - -// append w/DEN fraction reduced to the lowest terms -std::pair get_op_fraction(int w) { - // Op::DEN == 24 == 2 * 2 * 2 * 3 - int denom = 1; - for (int i = 0; i != 3; ++i) - if (w % 2 == 0) // 2, 2, 2 - w /= 2; - else - denom *= 2; - if (w % 3 == 0) // 3 - w /= 3; - else - denom *= 3; - return {w, denom}; -} - -void append_fraction(std::string& s, std::pair frac) { - append_small_number(s, frac.first); - if (frac.second != 1) { - s += '/'; - append_small_number(s, frac.second); - } -} - -std::string make_triplet_part(const std::array& xyz, int w, char style) { - std::string s; - const char* letters = "xyz hkl abc XYZ HKL ABC"; - switch((style | 0x20) & ~3) { // |0x20 converts to lower case - case 'h': letters += 4; break; - case '`': letters += 8; break; // 'a', because 'a'&~3 == 0x60 == '`' - } - if (!(style & 0x20)) // not lower - letters += 12; - for (int i = 0; i != 3; ++i) - if (xyz[i] != 0) { - append_sign_of(s, xyz[i]); - int a = std::abs(xyz[i]); - if (a != Op::DEN) { - std::pair frac = get_op_fraction(a); - if (frac.first == 1) { // e.g. "x/3" - s += letters[i]; - s += '/'; - append_small_number(s, frac.second); - } else { // e.g. "2/3*x" - append_fraction(s, frac); - s += '*'; - s += letters[i]; - } - } else { - s += letters[i]; - } - } - if (w != 0) { - append_sign_of(s, w); - std::pair frac = get_op_fraction(std::abs(w)); - append_fraction(s, frac); - } - return s; -} - -} // anonymous namespace - -Op seitz_to_op(const std::array, 4>& t) { - static_assert(Op::DEN == 24, ""); - auto check_round = [](double d) { - double r = std::round(d * Op::DEN); - if (std::fabs(r - d * Op::DEN) > 0.05) - fail("all numbers in Seitz matrix must be equal Z/24"); - return static_cast(r); - }; - Op op; - if (std::fabs(t[3][0]) + std::fabs(t[3][1]) + std::fabs(t[3][2]) + - std::fabs(t[3][3] - 1) > 1e-3) - fail("the last row in Seitz matrix must be [0 0 0 1]"); - for (int i = 0; i < 3; ++i) { - for (int j = 0; j < 3; ++j) - op.rot[i][j] = check_round(t[i][j]); - op.tran[i] = check_round(t[i][3]); - } - op.notation = 'x'; - return op; -} - -void append_op_fraction(std::string& s, int w) { - append_fraction(s, get_op_fraction(w)); -} - -std::string Op::triplet(char style) const { - if (style == ' ') - style = (notation & ~0x20) ? notation : 'x'; - char lower_style = (style | 0x20) & ~3; - if (lower_style == 'h' && !is_hkl()) - fail("triplet(): can't write real-space triplet as hkl"); - if (lower_style != 'h' && is_hkl()) - fail("triplet(): can't write reciprocal-space triplet as xyz"); - // 'x'==0x78, 'h'==0x68, 'a'==0x61, so 'a'&~3 == 0x60 == '`' - if (lower_style != 'x' && lower_style != 'h' && lower_style != '`') - fail("unexpected triplet style: '", style, "'"); - // parse_triplet() transposes hkl ops such as l,h,k - auto r = !is_hkl()? rot : transposed_rot(); - return make_triplet_part(r[0], tran[0], style) + - "," + make_triplet_part(r[1], tran[1], style) + - "," + make_triplet_part(r[2], tran[2], style); -} - - -// INTERPRETING HALL SYMBOLS -// based on both ITfC vol.B ch.1.4 (2010) -// and http://cci.lbl.gov/sginfo/hall_symbols.html - -// matrices for Nz from Table 3 and 4 from hall_symbols.html -namespace { -Op::Rot hall_rotation_z(int N) { - constexpr int d = Op::DEN; - switch (N) { - case 1: return {{{d,0,0}, {0,d,0}, {0,0,d}}}; - case 2: return {{{-d,0,0}, {0,-d,0}, {0,0,d}}}; - case 3: return {{{0,-d,0}, {d,-d,0}, {0,0,d}}}; - case 4: return {{{0,-d,0}, {d,0,0}, {0,0,d}}}; - case 6: return {{{d,-d,0}, {d,0,0}, {0,0,d}}}; - case '\'': return {{{0,-d,0},{-d,0,0}, {0,0,-d}}}; - case '"': return {{{0,d,0}, { d,0,0}, {0,0,-d}}}; - case '*': return {{{0,0,d}, { d,0,0}, {0,d,0}}}; - default: fail("incorrect axis definition"); - } -} -Op::Tran hall_translation_from_symbol(char symbol) { - constexpr int h = Op::DEN / 2; - constexpr int q = Op::DEN / 4; - switch (symbol) { - case 'a': return {h, 0, 0}; - case 'b': return {0, h, 0}; - case 'c': return {0, 0, h}; - case 'n': return {h, h, h}; - case 'u': return {q, 0, 0}; - case 'v': return {0, q, 0}; - case 'w': return {0, 0, q}; - case 'd': return {q, q, q}; - default: fail(std::string("unknown symbol: ") + symbol); - } -} - -Op hall_matrix_symbol(const char* start, const char* end, int pos, int& prev) { - Op op = Op::identity(); - bool neg = (*start == '-'); - const char* p = (neg ? start + 1 : start); - if (*p < '1' || *p == '5' || *p > '6') - fail("wrong n-fold order notation: " + std::string(start, end)); - int N = *p++ - '0'; - int fractional_tran = 0; - char principal_axis = '\0'; - char diagonal_axis = '\0'; - for (; p < end; ++p) { - if (*p >= '1' && *p <= '5') { - if (fractional_tran != '\0') - fail("two numeric subscripts"); - fractional_tran = *p - '0'; - } else if (*p == '\'' || *p == '"' || *p == '*') { - if (N != (*p == '*' ? 3 : 2)) - fail("wrong symbol: " + std::string(start, end)); - diagonal_axis = *p; - } else if (*p == 'x' || *p == 'y' || *p == 'z') { - principal_axis = *p; - } else { - op.translate(hall_translation_from_symbol(*p)); - } - } - // fill in implicit values - if (!principal_axis && !diagonal_axis) { - if (pos == 1) { - principal_axis = 'z'; - } else if (pos == 2 && N == 2) { - if (prev == 2 || prev == 4) - principal_axis = 'x'; - else if (prev == 3 || prev == 6) - diagonal_axis = '\''; - } else if (pos == 3 && N == 3) { - diagonal_axis = '*'; - } else if (N != 1) { - fail("missing axis"); - } - } - // get the operation - op.rot = hall_rotation_z(diagonal_axis ? diagonal_axis : N); - if (neg) - op.rot = op.negated_rot(); - auto alter_order = [](const Op::Rot& r, int i, int j, int k) { - return Op::Rot{{ {r[i][i], r[i][j], r[i][k]}, - {r[j][i], r[j][j], r[j][k]}, - {r[k][i], r[k][j], r[k][k]} }}; - }; - if (principal_axis == 'x') - op.rot = alter_order(op.rot, 2, 0, 1); - else if (principal_axis == 'y') - op.rot = alter_order(op.rot, 1, 2, 0); - if (fractional_tran) - op.tran[principal_axis - 'x'] += Op::DEN / N * fractional_tran; - prev = N; - return op; -} - -// Parses either short (0 0 1) or long notation (x,y,z+1/12) -// but without multipliers (such as 1/2x) to keep things simple for now. -Op parse_hall_change_of_basis(const char* start, const char* end) { - if (std::memchr(start, ',', end - start) != nullptr) // long symbol - return parse_triplet(std::string(start, end)); - // short symbol (0 0 1) - Op cob = Op::identity(); - char* endptr; - for (int i = 0; i != 3; ++i) { - cob.tran[i] = std::strtol(start, &endptr, 10) % 12 * (Op::DEN / 12); - start = endptr; - } - if (endptr != end) - fail("unexpected change-of-basis format: " + std::string(start, end)); - return cob; -} -} // anonymous namespace - -GroupOps generators_from_hall(const char* hall) { - auto find_blank = [](const char* p) { - while (*p != '\0' && *p != ' ' && *p != '\t' && *p != '_') // '_' == ' ' - ++p; - return p; - }; - if (hall == nullptr) - fail("null"); - hall = skip_space(hall); - GroupOps ops; - ops.sym_ops.emplace_back(Op::identity()); - bool centrosym = (hall[0] == '-'); - const char* lat = skip_space(centrosym ? hall + 1 : hall); - if (!lat) - fail("not a hall symbol: " + std::string(hall)); - ops.cen_ops = centring_vectors(*lat); - int counter = 0; - int prev = 0; - const char* part = skip_space(lat + 1); - while (*part != '\0' && *part != '(') { - const char* space = find_blank(part); - ++counter; - if (part[0] != '1' || (part[1] != ' ' && part[1] != '\0')) { - Op op = hall_matrix_symbol(part, space, counter, prev); - ops.sym_ops.emplace_back(op); - } - part = skip_space(space); - } - if (centrosym) - ops.sym_ops.push_back({Op::identity().negated_rot(), {0,0,0}, 'x'}); - if (*part == '(') { - const char* rb = std::strchr(part, ')'); - if (!rb) - fail("missing ')': " + std::string(hall)); - if (ops.sym_ops.empty()) - fail("misplaced translation: " + std::string(hall)); - ops.change_basis_forward(parse_hall_change_of_basis(part + 1, rb)); - - if (*skip_space(find_blank(rb + 1)) != '\0') - fail("unexpected characters after ')': " + std::string(hall)); - } - return ops; -} - - -const SpaceGroup spacegroup_tables::main[564] = { - // This table was generated by tools/gen_sg_table.py. - // First 530 entries in the same order as in SgInfo, sgtbx and ITB. - // Note: spacegroup 68 has three duplicates with different H-M names. - { 1, 1, "P 1" , 0, "", "P 1" , 0 }, // 0 - { 2, 2, "P -1" , 0, "", "-P 1" , 0 }, // 1 - { 3, 3, "P 1 2 1" , 0, "b", "P 2y" , 0 }, // 2 - { 3, 1003, "P 1 1 2" , 0, "c", "P 2" , 1 }, // 3 - { 3, 0, "P 2 1 1" , 0, "a", "P 2x" , 2 }, // 4 - { 4, 4, "P 1 21 1" , 0, "b", "P 2yb" , 0 }, // 5 - { 4, 1004, "P 1 1 21" , 0, "c", "P 2c" , 1 }, // 6 - { 4, 0, "P 21 1 1" , 0, "a", "P 2xa" , 2 }, // 7 - { 5, 5, "C 1 2 1" , 0, "b1", "C 2y" , 0 }, // 8 - { 5, 2005, "A 1 2 1" , 0, "b2", "A 2y" , 3 }, // 9 - { 5, 4005, "I 1 2 1" , 0, "b3", "I 2y" , 4 }, // 10 - { 5, 0, "A 1 1 2" , 0, "c1", "A 2" , 1 }, // 11 - { 5, 1005, "B 1 1 2" , 0, "c2", "B 2" , 5 }, // 12 - { 5, 0, "I 1 1 2" , 0, "c3", "I 2" , 6 }, // 13 - { 5, 0, "B 2 1 1" , 0, "a1", "B 2x" , 2 }, // 14 - { 5, 0, "C 2 1 1" , 0, "a2", "C 2x" , 7 }, // 15 - { 5, 0, "I 2 1 1" , 0, "a3", "I 2x" , 8 }, // 16 - { 6, 6, "P 1 m 1" , 0, "b", "P -2y" , 0 }, // 17 - { 6, 1006, "P 1 1 m" , 0, "c", "P -2" , 1 }, // 18 - { 6, 0, "P m 1 1" , 0, "a", "P -2x" , 2 }, // 19 - { 7, 7, "P 1 c 1" , 0, "b1", "P -2yc" , 0 }, // 20 - { 7, 0, "P 1 n 1" , 0, "b2", "P -2yac" , 9 }, // 21 - { 7, 0, "P 1 a 1" , 0, "b3", "P -2ya" , 3 }, // 22 - { 7, 0, "P 1 1 a" , 0, "c1", "P -2a" , 1 }, // 23 - { 7, 0, "P 1 1 n" , 0, "c2", "P -2ab" , 10}, // 24 - { 7, 1007, "P 1 1 b" , 0, "c3", "P -2b" , 5 }, // 25 - { 7, 0, "P b 1 1" , 0, "a1", "P -2xb" , 2 }, // 26 - { 7, 0, "P n 1 1" , 0, "a2", "P -2xbc" , 11}, // 27 - { 7, 0, "P c 1 1" , 0, "a3", "P -2xc" , 7 }, // 28 - { 8, 8, "C 1 m 1" , 0, "b1", "C -2y" , 0 }, // 29 - { 8, 0, "A 1 m 1" , 0, "b2", "A -2y" , 3 }, // 30 - { 8, 0, "I 1 m 1" , 0, "b3", "I -2y" , 4 }, // 31 - { 8, 0, "A 1 1 m" , 0, "c1", "A -2" , 1 }, // 32 - { 8, 1008, "B 1 1 m" , 0, "c2", "B -2" , 5 }, // 33 - { 8, 0, "I 1 1 m" , 0, "c3", "I -2" , 6 }, // 34 - { 8, 0, "B m 1 1" , 0, "a1", "B -2x" , 2 }, // 35 - { 8, 0, "C m 1 1" , 0, "a2", "C -2x" , 7 }, // 36 - { 8, 0, "I m 1 1" , 0, "a3", "I -2x" , 8 }, // 37 - { 9, 9, "C 1 c 1" , 0, "b1", "C -2yc" , 0 }, // 38 - { 9, 0, "A 1 n 1" , 0, "b2", "A -2yab" , 12}, // 39 - { 9, 0, "I 1 a 1" , 0, "b3", "I -2ya" , 13}, // 40 - { 9, 0, "A 1 a 1" , 0, "-b1", "A -2ya" , 3 }, // 41 - { 9, 0, "C 1 n 1" , 0, "-b2", "C -2yac" , 14}, // 42 - { 9, 0, "I 1 c 1" , 0, "-b3", "I -2yc" , 4 }, // 43 - { 9, 0, "A 1 1 a" , 0, "c1", "A -2a" , 1 }, // 44 - { 9, 0, "B 1 1 n" , 0, "c2", "B -2ab" , 15}, // 45 - { 9, 0, "I 1 1 b" , 0, "c3", "I -2b" , 16}, // 46 - { 9, 1009, "B 1 1 b" , 0, "-c1", "B -2b" , 5 }, // 47 - { 9, 0, "A 1 1 n" , 0, "-c2", "A -2ab" , 10}, // 48 - { 9, 0, "I 1 1 a" , 0, "-c3", "I -2a" , 6 }, // 49 - { 9, 0, "B b 1 1" , 0, "a1", "B -2xb" , 2 }, // 50 - { 9, 0, "C n 1 1" , 0, "a2", "C -2xac" , 17}, // 51 - { 9, 0, "I c 1 1" , 0, "a3", "I -2xc" , 18}, // 52 - { 9, 0, "C c 1 1" , 0, "-a1", "C -2xc" , 7 }, // 53 - { 9, 0, "B n 1 1" , 0, "-a2", "B -2xab" , 11}, // 54 - { 9, 0, "I b 1 1" , 0, "-a3", "I -2xb" , 8 }, // 55 - { 10, 10, "P 1 2/m 1" , 0, "b", "-P 2y" , 0 }, // 56 - { 10, 1010, "P 1 1 2/m" , 0, "c", "-P 2" , 1 }, // 57 - { 10, 0, "P 2/m 1 1" , 0, "a", "-P 2x" , 2 }, // 58 - { 11, 11, "P 1 21/m 1", 0, "b", "-P 2yb" , 0 }, // 59 - { 11, 1011, "P 1 1 21/m", 0, "c", "-P 2c" , 1 }, // 60 - { 11, 0, "P 21/m 1 1", 0, "a", "-P 2xa" , 2 }, // 61 - { 12, 12, "C 1 2/m 1" , 0, "b1", "-C 2y" , 0 }, // 62 - { 12, 0, "A 1 2/m 1" , 0, "b2", "-A 2y" , 3 }, // 63 - { 12, 0, "I 1 2/m 1" , 0, "b3", "-I 2y" , 4 }, // 64 - { 12, 0, "A 1 1 2/m" , 0, "c1", "-A 2" , 1 }, // 65 - { 12, 1012, "B 1 1 2/m" , 0, "c2", "-B 2" , 5 }, // 66 - { 12, 0, "I 1 1 2/m" , 0, "c3", "-I 2" , 6 }, // 67 - { 12, 0, "B 2/m 1 1" , 0, "a1", "-B 2x" , 2 }, // 68 - { 12, 0, "C 2/m 1 1" , 0, "a2", "-C 2x" , 7 }, // 69 - { 12, 0, "I 2/m 1 1" , 0, "a3", "-I 2x" , 8 }, // 70 - { 13, 13, "P 1 2/c 1" , 0, "b1", "-P 2yc" , 0 }, // 71 - { 13, 0, "P 1 2/n 1" , 0, "b2", "-P 2yac" , 9 }, // 72 - { 13, 0, "P 1 2/a 1" , 0, "b3", "-P 2ya" , 3 }, // 73 - { 13, 0, "P 1 1 2/a" , 0, "c1", "-P 2a" , 1 }, // 74 - { 13, 0, "P 1 1 2/n" , 0, "c2", "-P 2ab" , 10}, // 75 - { 13, 1013, "P 1 1 2/b" , 0, "c3", "-P 2b" , 5 }, // 76 - { 13, 0, "P 2/b 1 1" , 0, "a1", "-P 2xb" , 2 }, // 77 - { 13, 0, "P 2/n 1 1" , 0, "a2", "-P 2xbc" , 11}, // 78 - { 13, 0, "P 2/c 1 1" , 0, "a3", "-P 2xc" , 7 }, // 79 - { 14, 14, "P 1 21/c 1", 0, "b1", "-P 2ybc" , 0 }, // 80 - { 14, 2014, "P 1 21/n 1", 0, "b2", "-P 2yn" , 9 }, // 81 - { 14, 3014, "P 1 21/a 1", 0, "b3", "-P 2yab" , 3 }, // 82 - { 14, 0, "P 1 1 21/a", 0, "c1", "-P 2ac" , 1 }, // 83 - { 14, 0, "P 1 1 21/n", 0, "c2", "-P 2n" , 10}, // 84 - { 14, 1014, "P 1 1 21/b", 0, "c3", "-P 2bc" , 5 }, // 85 - { 14, 0, "P 21/b 1 1", 0, "a1", "-P 2xab" , 2 }, // 86 - { 14, 0, "P 21/n 1 1", 0, "a2", "-P 2xn" , 11}, // 87 - { 14, 0, "P 21/c 1 1", 0, "a3", "-P 2xac" , 7 }, // 88 - { 15, 15, "C 1 2/c 1" , 0, "b1", "-C 2yc" , 0 }, // 89 - { 15, 0, "A 1 2/n 1" , 0, "b2", "-A 2yab" , 12}, // 90 - { 15, 0, "I 1 2/a 1" , 0, "b3", "-I 2ya" , 13}, // 91 - { 15, 0, "A 1 2/a 1" , 0, "-b1", "-A 2ya" , 3 }, // 92 - { 15, 0, "C 1 2/n 1" , 0, "-b2", "-C 2yac" , 19}, // 93 - { 15, 0, "I 1 2/c 1" , 0, "-b3", "-I 2yc" , 4 }, // 94 - { 15, 0, "A 1 1 2/a" , 0, "c1", "-A 2a" , 1 }, // 95 - { 15, 0, "B 1 1 2/n" , 0, "c2", "-B 2ab" , 15}, // 96 - { 15, 0, "I 1 1 2/b" , 0, "c3", "-I 2b" , 16}, // 97 - { 15, 1015, "B 1 1 2/b" , 0, "-c1", "-B 2b" , 5 }, // 98 - { 15, 0, "A 1 1 2/n" , 0, "-c2", "-A 2ab" , 10}, // 99 - { 15, 0, "I 1 1 2/a" , 0, "-c3", "-I 2a" , 6 }, // 100 - { 15, 0, "B 2/b 1 1" , 0, "a1", "-B 2xb" , 2 }, // 101 - { 15, 0, "C 2/n 1 1" , 0, "a2", "-C 2xac" , 17}, // 102 - { 15, 0, "I 2/c 1 1" , 0, "a3", "-I 2xc" , 18}, // 103 - { 15, 0, "C 2/c 1 1" , 0, "-a1", "-C 2xc" , 7 }, // 104 - { 15, 0, "B 2/n 1 1" , 0, "-a2", "-B 2xab" , 11}, // 105 - { 15, 0, "I 2/b 1 1" , 0, "-a3", "-I 2xb" , 8 }, // 106 - { 16, 16, "P 2 2 2" , 0, "", "P 2 2" , 0 }, // 107 - { 17, 17, "P 2 2 21" , 0, "", "P 2c 2" , 0 }, // 108 - { 17, 1017, "P 21 2 2" , 0, "cab", "P 2a 2a" , 1 }, // 109 - { 17, 2017, "P 2 21 2" , 0, "bca", "P 2 2b" , 2 }, // 110 - { 18, 18, "P 21 21 2" , 0, "", "P 2 2ab" , 0 }, // 111 - { 18, 3018, "P 2 21 21" , 0, "cab", "P 2bc 2" , 1 }, // 112 - { 18, 2018, "P 21 2 21" , 0, "bca", "P 2ac 2ac" , 2 }, // 113 - { 19, 19, "P 21 21 21", 0, "", "P 2ac 2ab" , 0 }, // 114 - { 20, 20, "C 2 2 21" , 0, "", "C 2c 2" , 0 }, // 115 - { 20, 0, "A 21 2 2" , 0, "cab", "A 2a 2a" , 1 }, // 116 - { 20, 0, "B 2 21 2" , 0, "bca", "B 2 2b" , 2 }, // 117 - { 21, 21, "C 2 2 2" , 0, "", "C 2 2" , 0 }, // 118 - { 21, 0, "A 2 2 2" , 0, "cab", "A 2 2" , 1 }, // 119 - { 21, 0, "B 2 2 2" , 0, "bca", "B 2 2" , 2 }, // 120 - { 22, 22, "F 2 2 2" , 0, "", "F 2 2" , 0 }, // 121 - { 23, 23, "I 2 2 2" , 0, "", "I 2 2" , 0 }, // 122 - { 24, 24, "I 21 21 21", 0, "", "I 2b 2c" , 0 }, // 123 - { 25, 25, "P m m 2" , 0, "", "P 2 -2" , 0 }, // 124 - { 25, 0, "P 2 m m" , 0, "cab", "P -2 2" , 1 }, // 125 - { 25, 0, "P m 2 m" , 0, "bca", "P -2 -2" , 2 }, // 126 - { 26, 26, "P m c 21" , 0, "", "P 2c -2" , 0 }, // 127 - { 26, 0, "P c m 21" , 0, "ba-c", "P 2c -2c" , 7 }, // 128 - { 26, 0, "P 21 m a" , 0, "cab", "P -2a 2a" , 1 }, // 129 - { 26, 0, "P 21 a m" , 0, "-cba", "P -2 2a" , 3 }, // 130 - { 26, 0, "P b 21 m" , 0, "bca", "P -2 -2b" , 2 }, // 131 - { 26, 0, "P m 21 b" , 0, "a-cb", "P -2b -2" , 5 }, // 132 - { 27, 27, "P c c 2" , 0, "", "P 2 -2c" , 0 }, // 133 - { 27, 0, "P 2 a a" , 0, "cab", "P -2a 2" , 1 }, // 134 - { 27, 0, "P b 2 b" , 0, "bca", "P -2b -2b" , 2 }, // 135 - { 28, 28, "P m a 2" , 0, "", "P 2 -2a" , 0 }, // 136 - { 28, 0, "P b m 2" , 0, "ba-c", "P 2 -2b" , 7 }, // 137 - { 28, 0, "P 2 m b" , 0, "cab", "P -2b 2" , 1 }, // 138 - { 28, 0, "P 2 c m" , 0, "-cba", "P -2c 2" , 3 }, // 139 - { 28, 0, "P c 2 m" , 0, "bca", "P -2c -2c" , 2 }, // 140 - { 28, 0, "P m 2 a" , 0, "a-cb", "P -2a -2a" , 5 }, // 141 - { 29, 29, "P c a 21" , 0, "", "P 2c -2ac" , 0 }, // 142 - { 29, 0, "P b c 21" , 0, "ba-c", "P 2c -2b" , 7 }, // 143 - { 29, 0, "P 21 a b" , 0, "cab", "P -2b 2a" , 1 }, // 144 - { 29, 0, "P 21 c a" , 0, "-cba", "P -2ac 2a" , 3 }, // 145 - { 29, 0, "P c 21 b" , 0, "bca", "P -2bc -2c" , 2 }, // 146 - { 29, 0, "P b 21 a" , 0, "a-cb", "P -2a -2ab" , 5 }, // 147 - { 30, 30, "P n c 2" , 0, "", "P 2 -2bc" , 0 }, // 148 - { 30, 0, "P c n 2" , 0, "ba-c", "P 2 -2ac" , 7 }, // 149 - { 30, 0, "P 2 n a" , 0, "cab", "P -2ac 2" , 1 }, // 150 - { 30, 0, "P 2 a n" , 0, "-cba", "P -2ab 2" , 3 }, // 151 - { 30, 0, "P b 2 n" , 0, "bca", "P -2ab -2ab" , 2 }, // 152 - { 30, 0, "P n 2 b" , 0, "a-cb", "P -2bc -2bc" , 5 }, // 153 - { 31, 31, "P m n 21" , 0, "", "P 2ac -2" , 0 }, // 154 - { 31, 0, "P n m 21" , 0, "ba-c", "P 2bc -2bc" , 7 }, // 155 - { 31, 0, "P 21 m n" , 0, "cab", "P -2ab 2ab" , 1 }, // 156 - { 31, 0, "P 21 n m" , 0, "-cba", "P -2 2ac" , 3 }, // 157 - { 31, 0, "P n 21 m" , 0, "bca", "P -2 -2bc" , 2 }, // 158 - { 31, 0, "P m 21 n" , 0, "a-cb", "P -2ab -2" , 5 }, // 159 - { 32, 32, "P b a 2" , 0, "", "P 2 -2ab" , 0 }, // 160 - { 32, 0, "P 2 c b" , 0, "cab", "P -2bc 2" , 1 }, // 161 - { 32, 0, "P c 2 a" , 0, "bca", "P -2ac -2ac" , 2 }, // 162 - { 33, 33, "P n a 21" , 0, "", "P 2c -2n" , 0 }, // 163 - { 33, 0, "P b n 21" , 0, "ba-c", "P 2c -2ab" , 7 }, // 164 - { 33, 0, "P 21 n b" , 0, "cab", "P -2bc 2a" , 1 }, // 165 - { 33, 0, "P 21 c n" , 0, "-cba", "P -2n 2a" , 3 }, // 166 - { 33, 0, "P c 21 n" , 0, "bca", "P -2n -2ac" , 2 }, // 167 - { 33, 0, "P n 21 a" , 0, "a-cb", "P -2ac -2n" , 5 }, // 168 - { 34, 34, "P n n 2" , 0, "", "P 2 -2n" , 0 }, // 169 - { 34, 0, "P 2 n n" , 0, "cab", "P -2n 2" , 1 }, // 170 - { 34, 0, "P n 2 n" , 0, "bca", "P -2n -2n" , 2 }, // 171 - { 35, 35, "C m m 2" , 0, "", "C 2 -2" , 0 }, // 172 - { 35, 0, "A 2 m m" , 0, "cab", "A -2 2" , 1 }, // 173 - { 35, 0, "B m 2 m" , 0, "bca", "B -2 -2" , 2 }, // 174 - { 36, 36, "C m c 21" , 0, "", "C 2c -2" , 0 }, // 175 - { 36, 0, "C c m 21" , 0, "ba-c", "C 2c -2c" , 7 }, // 176 - { 36, 0, "A 21 m a" , 0, "cab", "A -2a 2a" , 1 }, // 177 - { 36, 0, "A 21 a m" , 0, "-cba", "A -2 2a" , 3 }, // 178 - { 36, 0, "B b 21 m" , 0, "bca", "B -2 -2b" , 2 }, // 179 - { 36, 0, "B m 21 b" , 0, "a-cb", "B -2b -2" , 5 }, // 180 - { 37, 37, "C c c 2" , 0, "", "C 2 -2c" , 0 }, // 181 - { 37, 0, "A 2 a a" , 0, "cab", "A -2a 2" , 1 }, // 182 - { 37, 0, "B b 2 b" , 0, "bca", "B -2b -2b" , 2 }, // 183 - { 38, 38, "A m m 2" , 0, "", "A 2 -2" , 0 }, // 184 - { 38, 0, "B m m 2" , 0, "ba-c", "B 2 -2" , 7 }, // 185 - { 38, 0, "B 2 m m" , 0, "cab", "B -2 2" , 1 }, // 186 - { 38, 0, "C 2 m m" , 0, "-cba", "C -2 2" , 3 }, // 187 - { 38, 0, "C m 2 m" , 0, "bca", "C -2 -2" , 2 }, // 188 - { 38, 0, "A m 2 m" , 0, "a-cb", "A -2 -2" , 5 }, // 189 - { 39, 39, "A b m 2" , 0, "", "A 2 -2b" , 0 }, // 190 - { 39, 0, "B m a 2" , 0, "ba-c", "B 2 -2a" , 7 }, // 191 - { 39, 0, "B 2 c m" , 0, "cab", "B -2a 2" , 1 }, // 192 - { 39, 0, "C 2 m b" , 0, "-cba", "C -2a 2" , 3 }, // 193 - { 39, 0, "C m 2 a" , 0, "bca", "C -2a -2a" , 2 }, // 194 - { 39, 0, "A c 2 m" , 0, "a-cb", "A -2b -2b" , 5 }, // 195 - { 40, 40, "A m a 2" , 0, "", "A 2 -2a" , 0 }, // 196 - { 40, 0, "B b m 2" , 0, "ba-c", "B 2 -2b" , 7 }, // 197 - { 40, 0, "B 2 m b" , 0, "cab", "B -2b 2" , 1 }, // 198 - { 40, 0, "C 2 c m" , 0, "-cba", "C -2c 2" , 3 }, // 199 - { 40, 0, "C c 2 m" , 0, "bca", "C -2c -2c" , 2 }, // 200 - { 40, 0, "A m 2 a" , 0, "a-cb", "A -2a -2a" , 5 }, // 201 - { 41, 41, "A b a 2" , 0, "", "A 2 -2ab" , 0 }, // 202 - { 41, 0, "B b a 2" , 0, "ba-c", "B 2 -2ab" , 7 }, // 203 - { 41, 0, "B 2 c b" , 0, "cab", "B -2ab 2" , 1 }, // 204 - { 41, 0, "C 2 c b" , 0, "-cba", "C -2ac 2" , 3 }, // 205 - { 41, 0, "C c 2 a" , 0, "bca", "C -2ac -2ac" , 2 }, // 206 - { 41, 0, "A c 2 a" , 0, "a-cb", "A -2ab -2ab" , 5 }, // 207 - { 42, 42, "F m m 2" , 0, "", "F 2 -2" , 0 }, // 208 - { 42, 0, "F 2 m m" , 0, "cab", "F -2 2" , 1 }, // 209 - { 42, 0, "F m 2 m" , 0, "bca", "F -2 -2" , 2 }, // 210 - { 43, 43, "F d d 2" , 0, "", "F 2 -2d" , 0 }, // 211 - { 43, 0, "F 2 d d" , 0, "cab", "F -2d 2" , 1 }, // 212 - { 43, 0, "F d 2 d" , 0, "bca", "F -2d -2d" , 2 }, // 213 - { 44, 44, "I m m 2" , 0, "", "I 2 -2" , 0 }, // 214 - { 44, 0, "I 2 m m" , 0, "cab", "I -2 2" , 1 }, // 215 - { 44, 0, "I m 2 m" , 0, "bca", "I -2 -2" , 2 }, // 216 - { 45, 45, "I b a 2" , 0, "", "I 2 -2c" , 0 }, // 217 - { 45, 0, "I 2 c b" , 0, "cab", "I -2a 2" , 1 }, // 218 - { 45, 0, "I c 2 a" , 0, "bca", "I -2b -2b" , 2 }, // 219 - { 46, 46, "I m a 2" , 0, "", "I 2 -2a" , 0 }, // 220 - { 46, 0, "I b m 2" , 0, "ba-c", "I 2 -2b" , 7 }, // 221 - { 46, 0, "I 2 m b" , 0, "cab", "I -2b 2" , 1 }, // 222 - { 46, 0, "I 2 c m" , 0, "-cba", "I -2c 2" , 3 }, // 223 - { 46, 0, "I c 2 m" , 0, "bca", "I -2c -2c" , 2 }, // 224 - { 46, 0, "I m 2 a" , 0, "a-cb", "I -2a -2a" , 5 }, // 225 - { 47, 47, "P m m m" , 0, "", "-P 2 2" , 0 }, // 226 - { 48, 48, "P n n n" , '1', "", "P 2 2 -1n" , 20}, // 227 - { 48, 0, "P n n n" , '2', "", "-P 2ab 2bc" , 0 }, // 228 - { 49, 49, "P c c m" , 0, "", "-P 2 2c" , 0 }, // 229 - { 49, 0, "P m a a" , 0, "cab", "-P 2a 2" , 1 }, // 230 - { 49, 0, "P b m b" , 0, "bca", "-P 2b 2b" , 2 }, // 231 - { 50, 50, "P b a n" , '1', "", "P 2 2 -1ab" , 21}, // 232 - { 50, 0, "P b a n" , '2', "", "-P 2ab 2b" , 0 }, // 233 - { 50, 0, "P n c b" , '1', "cab", "P 2 2 -1bc" , 22}, // 234 - { 50, 0, "P n c b" , '2', "cab", "-P 2b 2bc" , 1 }, // 235 - { 50, 0, "P c n a" , '1', "bca", "P 2 2 -1ac" , 23}, // 236 - { 50, 0, "P c n a" , '2', "bca", "-P 2a 2c" , 2 }, // 237 - { 51, 51, "P m m a" , 0, "", "-P 2a 2a" , 0 }, // 238 - { 51, 0, "P m m b" , 0, "ba-c", "-P 2b 2" , 7 }, // 239 - { 51, 0, "P b m m" , 0, "cab", "-P 2 2b" , 1 }, // 240 - { 51, 0, "P c m m" , 0, "-cba", "-P 2c 2c" , 3 }, // 241 - { 51, 0, "P m c m" , 0, "bca", "-P 2c 2" , 2 }, // 242 - { 51, 0, "P m a m" , 0, "a-cb", "-P 2 2a" , 5 }, // 243 - { 52, 52, "P n n a" , 0, "", "-P 2a 2bc" , 0 }, // 244 - { 52, 0, "P n n b" , 0, "ba-c", "-P 2b 2n" , 7 }, // 245 - { 52, 0, "P b n n" , 0, "cab", "-P 2n 2b" , 1 }, // 246 - { 52, 0, "P c n n" , 0, "-cba", "-P 2ab 2c" , 3 }, // 247 - { 52, 0, "P n c n" , 0, "bca", "-P 2ab 2n" , 2 }, // 248 - { 52, 0, "P n a n" , 0, "a-cb", "-P 2n 2bc" , 5 }, // 249 - { 53, 53, "P m n a" , 0, "", "-P 2ac 2" , 0 }, // 250 - { 53, 0, "P n m b" , 0, "ba-c", "-P 2bc 2bc" , 7 }, // 251 - { 53, 0, "P b m n" , 0, "cab", "-P 2ab 2ab" , 1 }, // 252 - { 53, 0, "P c n m" , 0, "-cba", "-P 2 2ac" , 3 }, // 253 - { 53, 0, "P n c m" , 0, "bca", "-P 2 2bc" , 2 }, // 254 - { 53, 0, "P m a n" , 0, "a-cb", "-P 2ab 2" , 5 }, // 255 - { 54, 54, "P c c a" , 0, "", "-P 2a 2ac" , 0 }, // 256 - { 54, 0, "P c c b" , 0, "ba-c", "-P 2b 2c" , 7 }, // 257 - { 54, 0, "P b a a" , 0, "cab", "-P 2a 2b" , 1 }, // 258 - { 54, 0, "P c a a" , 0, "-cba", "-P 2ac 2c" , 3 }, // 259 - { 54, 0, "P b c b" , 0, "bca", "-P 2bc 2b" , 2 }, // 260 - { 54, 0, "P b a b" , 0, "a-cb", "-P 2b 2ab" , 5 }, // 261 - { 55, 55, "P b a m" , 0, "", "-P 2 2ab" , 0 }, // 262 - { 55, 0, "P m c b" , 0, "cab", "-P 2bc 2" , 1 }, // 263 - { 55, 0, "P c m a" , 0, "bca", "-P 2ac 2ac" , 2 }, // 264 - { 56, 56, "P c c n" , 0, "", "-P 2ab 2ac" , 0 }, // 265 - { 56, 0, "P n a a" , 0, "cab", "-P 2ac 2bc" , 1 }, // 266 - { 56, 0, "P b n b" , 0, "bca", "-P 2bc 2ab" , 2 }, // 267 - { 57, 57, "P b c m" , 0, "", "-P 2c 2b" , 0 }, // 268 - { 57, 0, "P c a m" , 0, "ba-c", "-P 2c 2ac" , 7 }, // 269 - { 57, 0, "P m c a" , 0, "cab", "-P 2ac 2a" , 1 }, // 270 - { 57, 0, "P m a b" , 0, "-cba", "-P 2b 2a" , 3 }, // 271 - { 57, 0, "P b m a" , 0, "bca", "-P 2a 2ab" , 2 }, // 272 - { 57, 0, "P c m b" , 0, "a-cb", "-P 2bc 2c" , 5 }, // 273 - { 58, 58, "P n n m" , 0, "", "-P 2 2n" , 0 }, // 274 - { 58, 0, "P m n n" , 0, "cab", "-P 2n 2" , 1 }, // 275 - { 58, 0, "P n m n" , 0, "bca", "-P 2n 2n" , 2 }, // 276 - { 59, 59, "P m m n" , '1', "", "P 2 2ab -1ab" , 21}, // 277 - { 59, 1059, "P m m n" , '2', "", "-P 2ab 2a" , 0 }, // 278 - { 59, 0, "P n m m" , '1', "cab", "P 2bc 2 -1bc" , 22}, // 279 - { 59, 0, "P n m m" , '2', "cab", "-P 2c 2bc" , 1 }, // 280 - { 59, 0, "P m n m" , '1', "bca", "P 2ac 2ac -1ac", 23}, // 281 - { 59, 0, "P m n m" , '2', "bca", "-P 2c 2a" , 2 }, // 282 - { 60, 60, "P b c n" , 0, "", "-P 2n 2ab" , 0 }, // 283 - { 60, 0, "P c a n" , 0, "ba-c", "-P 2n 2c" , 7 }, // 284 - { 60, 0, "P n c a" , 0, "cab", "-P 2a 2n" , 1 }, // 285 - { 60, 0, "P n a b" , 0, "-cba", "-P 2bc 2n" , 3 }, // 286 - { 60, 0, "P b n a" , 0, "bca", "-P 2ac 2b" , 2 }, // 287 - { 60, 0, "P c n b" , 0, "a-cb", "-P 2b 2ac" , 5 }, // 288 - { 61, 61, "P b c a" , 0, "", "-P 2ac 2ab" , 0 }, // 289 - { 61, 0, "P c a b" , 0, "ba-c", "-P 2bc 2ac" , 3 }, // 290 - { 62, 62, "P n m a" , 0, "", "-P 2ac 2n" , 0 }, // 291 - { 62, 0, "P m n b" , 0, "ba-c", "-P 2bc 2a" , 7 }, // 292 - { 62, 0, "P b n m" , 0, "cab", "-P 2c 2ab" , 1 }, // 293 - { 62, 0, "P c m n" , 0, "-cba", "-P 2n 2ac" , 3 }, // 294 - { 62, 0, "P m c n" , 0, "bca", "-P 2n 2a" , 2 }, // 295 - { 62, 0, "P n a m" , 0, "a-cb", "-P 2c 2n" , 5 }, // 296 - { 63, 63, "C m c m" , 0, "", "-C 2c 2" , 0 }, // 297 - { 63, 0, "C c m m" , 0, "ba-c", "-C 2c 2c" , 7 }, // 298 - { 63, 0, "A m m a" , 0, "cab", "-A 2a 2a" , 1 }, // 299 - { 63, 0, "A m a m" , 0, "-cba", "-A 2 2a" , 3 }, // 300 - { 63, 0, "B b m m" , 0, "bca", "-B 2 2b" , 2 }, // 301 - { 63, 0, "B m m b" , 0, "a-cb", "-B 2b 2" , 5 }, // 302 - { 64, 64, "C m c a" , 0, "", "-C 2ac 2" , 0 }, // 303 - { 64, 0, "C c m b" , 0, "ba-c", "-C 2ac 2ac" , 7 }, // 304 - { 64, 0, "A b m a" , 0, "cab", "-A 2ab 2ab" , 1 }, // 305 - { 64, 0, "A c a m" , 0, "-cba", "-A 2 2ab" , 3 }, // 306 - { 64, 0, "B b c m" , 0, "bca", "-B 2 2ab" , 2 }, // 307 - { 64, 0, "B m a b" , 0, "a-cb", "-B 2ab 2" , 5 }, // 308 - { 65, 65, "C m m m" , 0, "", "-C 2 2" , 0 }, // 309 - { 65, 0, "A m m m" , 0, "cab", "-A 2 2" , 1 }, // 310 - { 65, 0, "B m m m" , 0, "bca", "-B 2 2" , 2 }, // 311 - { 66, 66, "C c c m" , 0, "", "-C 2 2c" , 0 }, // 312 - { 66, 0, "A m a a" , 0, "cab", "-A 2a 2" , 1 }, // 313 - { 66, 0, "B b m b" , 0, "bca", "-B 2b 2b" , 2 }, // 314 - { 67, 67, "C m m a" , 0, "", "-C 2a 2" , 0 }, // 315 - { 67, 0, "C m m b" , 0, "ba-c", "-C 2a 2a" , 14}, // 316 - { 67, 0, "A b m m" , 0, "cab", "-A 2b 2b" , 1 }, // 317 - { 67, 0, "A c m m" , 0, "-cba", "-A 2 2b" , 3 }, // 318 - { 67, 0, "B m c m" , 0, "bca", "-B 2 2a" , 2 }, // 319 - { 67, 0, "B m a m" , 0, "a-cb", "-B 2a 2" , 5 }, // 320 - { 68, 68, "C c c a" , '1', "", "C 2 2 -1ac" , 24}, // 321 - { 68, 0, "C c c a" , '2', "", "-C 2a 2ac" , 0 }, // 322 - { 68, 0, "C c c b" , '1', "ba-c", "C 2 2 -1ac" , 24}, // 323 (==321) - { 68, 0, "C c c b" , '2', "ba-c", "-C 2a 2c" , 21}, // 324 - { 68, 0, "A b a a" , '1', "cab", "A 2 2 -1ab" , 25}, // 325 - { 68, 0, "A b a a" , '2', "cab", "-A 2a 2b" , 1 }, // 326 - { 68, 0, "A c a a" , '1', "-cba", "A 2 2 -1ab" , 25}, // 327 (==325) - { 68, 0, "A c a a" , '2', "-cba", "-A 2ab 2b" , 3 }, // 328 - { 68, 0, "B b c b" , '1', "bca", "B 2 2 -1ab" , 26}, // 329 - { 68, 0, "B b c b" , '2', "bca", "-B 2ab 2b" , 2 }, // 330 - { 68, 0, "B b a b" , '1', "a-cb", "B 2 2 -1ab" , 26}, // 331 (==329) - { 68, 0, "B b a b" , '2', "a-cb", "-B 2b 2ab" , 5 }, // 332 - { 69, 69, "F m m m" , 0, "", "-F 2 2" , 0 }, // 333 - { 70, 70, "F d d d" , '1', "", "F 2 2 -1d" , 27}, // 334 - { 70, 0, "F d d d" , '2', "", "-F 2uv 2vw" , 0 }, // 335 - { 71, 71, "I m m m" , 0, "", "-I 2 2" , 0 }, // 336 - { 72, 72, "I b a m" , 0, "", "-I 2 2c" , 0 }, // 337 - { 72, 0, "I m c b" , 0, "cab", "-I 2a 2" , 1 }, // 338 - { 72, 0, "I c m a" , 0, "bca", "-I 2b 2b" , 2 }, // 339 - { 73, 73, "I b c a" , 0, "", "-I 2b 2c" , 0 }, // 340 - { 73, 0, "I c a b" , 0, "ba-c", "-I 2a 2b" , 28}, // 341 - { 74, 74, "I m m a" , 0, "", "-I 2b 2" , 0 }, // 342 - { 74, 0, "I m m b" , 0, "ba-c", "-I 2a 2a" , 28}, // 343 - { 74, 0, "I b m m" , 0, "cab", "-I 2c 2c" , 1 }, // 344 - { 74, 0, "I c m m" , 0, "-cba", "-I 2 2b" , 3 }, // 345 - { 74, 0, "I m c m" , 0, "bca", "-I 2 2a" , 2 }, // 346 - { 74, 0, "I m a m" , 0, "a-cb", "-I 2c 2" , 5 }, // 347 - { 75, 75, "P 4" , 0, "", "P 4" , 0 }, // 348 - { 76, 76, "P 41" , 0, "", "P 4w" , 0 }, // 349 - { 77, 77, "P 42" , 0, "", "P 4c" , 0 }, // 350 - { 78, 78, "P 43" , 0, "", "P 4cw" , 0 }, // 351 - { 79, 79, "I 4" , 0, "", "I 4" , 0 }, // 352 - { 80, 80, "I 41" , 0, "", "I 4bw" , 0 }, // 353 - { 81, 81, "P -4" , 0, "", "P -4" , 0 }, // 354 - { 82, 82, "I -4" , 0, "", "I -4" , 0 }, // 355 - { 83, 83, "P 4/m" , 0, "", "-P 4" , 0 }, // 356 - { 84, 84, "P 42/m" , 0, "", "-P 4c" , 0 }, // 357 - { 85, 85, "P 4/n" , '1', "", "P 4ab -1ab" , 29}, // 358 - { 85, 0, "P 4/n" , '2', "", "-P 4a" , 0 }, // 359 - { 86, 86, "P 42/n" , '1', "", "P 4n -1n" , 30}, // 360 - { 86, 0, "P 42/n" , '2', "", "-P 4bc" , 0 }, // 361 - { 87, 87, "I 4/m" , 0, "", "-I 4" , 0 }, // 362 - { 88, 88, "I 41/a" , '1', "", "I 4bw -1bw" , 31}, // 363 - { 88, 0, "I 41/a" , '2', "", "-I 4ad" , 0 }, // 364 - { 89, 89, "P 4 2 2" , 0, "", "P 4 2" , 0 }, // 365 - { 90, 90, "P 4 21 2" , 0, "", "P 4ab 2ab" , 0 }, // 366 - { 91, 91, "P 41 2 2" , 0, "", "P 4w 2c" , 0 }, // 367 - { 92, 92, "P 41 21 2" , 0, "", "P 4abw 2nw" , 0 }, // 368 - { 93, 93, "P 42 2 2" , 0, "", "P 4c 2" , 0 }, // 369 - { 94, 94, "P 42 21 2" , 0, "", "P 4n 2n" , 0 }, // 370 - { 95, 95, "P 43 2 2" , 0, "", "P 4cw 2c" , 0 }, // 371 - { 96, 96, "P 43 21 2" , 0, "", "P 4nw 2abw" , 0 }, // 372 - { 97, 97, "I 4 2 2" , 0, "", "I 4 2" , 0 }, // 373 - { 98, 98, "I 41 2 2" , 0, "", "I 4bw 2bw" , 0 }, // 374 - { 99, 99, "P 4 m m" , 0, "", "P 4 -2" , 0 }, // 375 - {100, 100, "P 4 b m" , 0, "", "P 4 -2ab" , 0 }, // 376 - {101, 101, "P 42 c m" , 0, "", "P 4c -2c" , 0 }, // 377 - {102, 102, "P 42 n m" , 0, "", "P 4n -2n" , 0 }, // 378 - {103, 103, "P 4 c c" , 0, "", "P 4 -2c" , 0 }, // 379 - {104, 104, "P 4 n c" , 0, "", "P 4 -2n" , 0 }, // 380 - {105, 105, "P 42 m c" , 0, "", "P 4c -2" , 0 }, // 381 - {106, 106, "P 42 b c" , 0, "", "P 4c -2ab" , 0 }, // 382 - {107, 107, "I 4 m m" , 0, "", "I 4 -2" , 0 }, // 383 - {108, 108, "I 4 c m" , 0, "", "I 4 -2c" , 0 }, // 384 - {109, 109, "I 41 m d" , 0, "", "I 4bw -2" , 0 }, // 385 - {110, 110, "I 41 c d" , 0, "", "I 4bw -2c" , 0 }, // 386 - {111, 111, "P -4 2 m" , 0, "", "P -4 2" , 0 }, // 387 - {112, 112, "P -4 2 c" , 0, "", "P -4 2c" , 0 }, // 388 - {113, 113, "P -4 21 m" , 0, "", "P -4 2ab" , 0 }, // 389 - {114, 114, "P -4 21 c" , 0, "", "P -4 2n" , 0 }, // 390 - {115, 115, "P -4 m 2" , 0, "", "P -4 -2" , 0 }, // 391 - {116, 116, "P -4 c 2" , 0, "", "P -4 -2c" , 0 }, // 392 - {117, 117, "P -4 b 2" , 0, "", "P -4 -2ab" , 0 }, // 393 - {118, 118, "P -4 n 2" , 0, "", "P -4 -2n" , 0 }, // 394 - {119, 119, "I -4 m 2" , 0, "", "I -4 -2" , 0 }, // 395 - {120, 120, "I -4 c 2" , 0, "", "I -4 -2c" , 0 }, // 396 - {121, 121, "I -4 2 m" , 0, "", "I -4 2" , 0 }, // 397 - {122, 122, "I -4 2 d" , 0, "", "I -4 2bw" , 0 }, // 398 - {123, 123, "P 4/m m m" , 0, "", "-P 4 2" , 0 }, // 399 - {124, 124, "P 4/m c c" , 0, "", "-P 4 2c" , 0 }, // 400 - {125, 125, "P 4/n b m" , '1', "", "P 4 2 -1ab" , 21}, // 401 - {125, 0, "P 4/n b m" , '2', "", "-P 4a 2b" , 0 }, // 402 - {126, 126, "P 4/n n c" , '1', "", "P 4 2 -1n" , 20}, // 403 - {126, 0, "P 4/n n c" , '2', "", "-P 4a 2bc" , 0 }, // 404 - {127, 127, "P 4/m b m" , 0, "", "-P 4 2ab" , 0 }, // 405 - {128, 128, "P 4/m n c" , 0, "", "-P 4 2n" , 0 }, // 406 - {129, 129, "P 4/n m m" , '1', "", "P 4ab 2ab -1ab", 29}, // 407 - {129, 0, "P 4/n m m" , '2', "", "-P 4a 2a" , 0 }, // 408 - {130, 130, "P 4/n c c" , '1', "", "P 4ab 2n -1ab" , 29}, // 409 - {130, 0, "P 4/n c c" , '2', "", "-P 4a 2ac" , 0 }, // 410 - {131, 131, "P 42/m m c", 0, "", "-P 4c 2" , 0 }, // 411 - {132, 132, "P 42/m c m", 0, "", "-P 4c 2c" , 0 }, // 412 - {133, 133, "P 42/n b c", '1', "", "P 4n 2c -1n" , 32}, // 413 - {133, 0, "P 42/n b c", '2', "", "-P 4ac 2b" , 0 }, // 414 - {134, 134, "P 42/n n m", '1', "", "P 4n 2 -1n" , 33}, // 415 - {134, 0, "P 42/n n m", '2', "", "-P 4ac 2bc" , 0 }, // 416 - {135, 135, "P 42/m b c", 0, "", "-P 4c 2ab" , 0 }, // 417 - {136, 136, "P 42/m n m", 0, "", "-P 4n 2n" , 0 }, // 418 - {137, 137, "P 42/n m c", '1', "", "P 4n 2n -1n" , 32}, // 419 - {137, 0, "P 42/n m c", '2', "", "-P 4ac 2a" , 0 }, // 420 - {138, 138, "P 42/n c m", '1', "", "P 4n 2ab -1n" , 33}, // 421 - {138, 0, "P 42/n c m", '2', "", "-P 4ac 2ac" , 0 }, // 422 - {139, 139, "I 4/m m m" , 0, "", "-I 4 2" , 0 }, // 423 - {140, 140, "I 4/m c m" , 0, "", "-I 4 2c" , 0 }, // 424 - {141, 141, "I 41/a m d", '1', "", "I 4bw 2bw -1bw", 34}, // 425 - {141, 0, "I 41/a m d", '2', "", "-I 4bd 2" , 0 }, // 426 - {142, 142, "I 41/a c d", '1', "", "I 4bw 2aw -1bw", 35}, // 427 - {142, 0, "I 41/a c d", '2', "", "-I 4bd 2c" , 0 }, // 428 - {143, 143, "P 3" , 0, "", "P 3" , 0 }, // 429 - {144, 144, "P 31" , 0, "", "P 31" , 0 }, // 430 - {145, 145, "P 32" , 0, "", "P 32" , 0 }, // 431 - {146, 146, "R 3" , 'H', "", "R 3" , 0 }, // 432 - {146, 1146, "R 3" , 'R', "", "P 3*" , 36}, // 433 - {147, 147, "P -3" , 0, "", "-P 3" , 0 }, // 434 - {148, 148, "R -3" , 'H', "", "-R 3" , 0 }, // 435 - {148, 1148, "R -3" , 'R', "", "-P 3*" , 36}, // 436 - {149, 149, "P 3 1 2" , 0, "", "P 3 2" , 0 }, // 437 - {150, 150, "P 3 2 1" , 0, "", "P 3 2\"" , 0 }, // 438 - {151, 151, "P 31 1 2" , 0, "", "P 31 2 (0 0 4)", 0 }, // 439 - {152, 152, "P 31 2 1" , 0, "", "P 31 2\"" , 0 }, // 440 - {153, 153, "P 32 1 2" , 0, "", "P 32 2 (0 0 2)", 0 }, // 441 - {154, 154, "P 32 2 1" , 0, "", "P 32 2\"" , 0 }, // 442 - {155, 155, "R 3 2" , 'H', "", "R 3 2\"" , 0 }, // 443 - {155, 1155, "R 3 2" , 'R', "", "P 3* 2" , 36}, // 444 - {156, 156, "P 3 m 1" , 0, "", "P 3 -2\"" , 0 }, // 445 - {157, 157, "P 3 1 m" , 0, "", "P 3 -2" , 0 }, // 446 - {158, 158, "P 3 c 1" , 0, "", "P 3 -2\"c" , 0 }, // 447 - {159, 159, "P 3 1 c" , 0, "", "P 3 -2c" , 0 }, // 448 - {160, 160, "R 3 m" , 'H', "", "R 3 -2\"" , 0 }, // 449 - {160, 1160, "R 3 m" , 'R', "", "P 3* -2" , 36}, // 450 - {161, 161, "R 3 c" , 'H', "", "R 3 -2\"c" , 0 }, // 451 - {161, 1161, "R 3 c" , 'R', "", "P 3* -2n" , 36}, // 452 - {162, 162, "P -3 1 m" , 0, "", "-P 3 2" , 0 }, // 453 - {163, 163, "P -3 1 c" , 0, "", "-P 3 2c" , 0 }, // 454 - {164, 164, "P -3 m 1" , 0, "", "-P 3 2\"" , 0 }, // 455 - {165, 165, "P -3 c 1" , 0, "", "-P 3 2\"c" , 0 }, // 456 - {166, 166, "R -3 m" , 'H', "", "-R 3 2\"" , 0 }, // 457 - {166, 1166, "R -3 m" , 'R', "", "-P 3* 2" , 36}, // 458 - {167, 167, "R -3 c" , 'H', "", "-R 3 2\"c" , 0 }, // 459 - {167, 1167, "R -3 c" , 'R', "", "-P 3* 2n" , 36}, // 460 - {168, 168, "P 6" , 0, "", "P 6" , 0 }, // 461 - {169, 169, "P 61" , 0, "", "P 61" , 0 }, // 462 - {170, 170, "P 65" , 0, "", "P 65" , 0 }, // 463 - {171, 171, "P 62" , 0, "", "P 62" , 0 }, // 464 - {172, 172, "P 64" , 0, "", "P 64" , 0 }, // 465 - {173, 173, "P 63" , 0, "", "P 6c" , 0 }, // 466 - {174, 174, "P -6" , 0, "", "P -6" , 0 }, // 467 - {175, 175, "P 6/m" , 0, "", "-P 6" , 0 }, // 468 - {176, 176, "P 63/m" , 0, "", "-P 6c" , 0 }, // 469 - {177, 177, "P 6 2 2" , 0, "", "P 6 2" , 0 }, // 470 - {178, 178, "P 61 2 2" , 0, "", "P 61 2 (0 0 5)", 0 }, // 471 - {179, 179, "P 65 2 2" , 0, "", "P 65 2 (0 0 1)", 0 }, // 472 - {180, 180, "P 62 2 2" , 0, "", "P 62 2 (0 0 4)", 0 }, // 473 - {181, 181, "P 64 2 2" , 0, "", "P 64 2 (0 0 2)", 0 }, // 474 - {182, 182, "P 63 2 2" , 0, "", "P 6c 2c" , 0 }, // 475 - {183, 183, "P 6 m m" , 0, "", "P 6 -2" , 0 }, // 476 - {184, 184, "P 6 c c" , 0, "", "P 6 -2c" , 0 }, // 477 - {185, 185, "P 63 c m" , 0, "", "P 6c -2" , 0 }, // 478 - {186, 186, "P 63 m c" , 0, "", "P 6c -2c" , 0 }, // 479 - {187, 187, "P -6 m 2" , 0, "", "P -6 2" , 0 }, // 480 - {188, 188, "P -6 c 2" , 0, "", "P -6c 2" , 0 }, // 481 - {189, 189, "P -6 2 m" , 0, "", "P -6 -2" , 0 }, // 482 - {190, 190, "P -6 2 c" , 0, "", "P -6c -2c" , 0 }, // 483 - {191, 191, "P 6/m m m" , 0, "", "-P 6 2" , 0 }, // 484 - {192, 192, "P 6/m c c" , 0, "", "-P 6 2c" , 0 }, // 485 - {193, 193, "P 63/m c m", 0, "", "-P 6c 2" , 0 }, // 486 - {194, 194, "P 63/m m c", 0, "", "-P 6c 2c" , 0 }, // 487 - {195, 195, "P 2 3" , 0, "", "P 2 2 3" , 0 }, // 488 - {196, 196, "F 2 3" , 0, "", "F 2 2 3" , 0 }, // 489 - {197, 197, "I 2 3" , 0, "", "I 2 2 3" , 0 }, // 490 - {198, 198, "P 21 3" , 0, "", "P 2ac 2ab 3" , 0 }, // 491 - {199, 199, "I 21 3" , 0, "", "I 2b 2c 3" , 0 }, // 492 - {200, 200, "P m -3" , 0, "", "-P 2 2 3" , 0 }, // 493 - {201, 201, "P n -3" , '1', "", "P 2 2 3 -1n" , 20}, // 494 - {201, 0, "P n -3" , '2', "", "-P 2ab 2bc 3" , 0 }, // 495 - {202, 202, "F m -3" , 0, "", "-F 2 2 3" , 0 }, // 496 - {203, 203, "F d -3" , '1', "", "F 2 2 3 -1d" , 27}, // 497 - {203, 0, "F d -3" , '2', "", "-F 2uv 2vw 3" , 0 }, // 498 - {204, 204, "I m -3" , 0, "", "-I 2 2 3" , 0 }, // 499 - {205, 205, "P a -3" , 0, "", "-P 2ac 2ab 3" , 0 }, // 500 - {206, 206, "I a -3" , 0, "", "-I 2b 2c 3" , 0 }, // 501 - {207, 207, "P 4 3 2" , 0, "", "P 4 2 3" , 0 }, // 502 - {208, 208, "P 42 3 2" , 0, "", "P 4n 2 3" , 0 }, // 503 - {209, 209, "F 4 3 2" , 0, "", "F 4 2 3" , 0 }, // 504 - {210, 210, "F 41 3 2" , 0, "", "F 4d 2 3" , 0 }, // 505 - {211, 211, "I 4 3 2" , 0, "", "I 4 2 3" , 0 }, // 506 - {212, 212, "P 43 3 2" , 0, "", "P 4acd 2ab 3" , 0 }, // 507 - {213, 213, "P 41 3 2" , 0, "", "P 4bd 2ab 3" , 0 }, // 508 - {214, 214, "I 41 3 2" , 0, "", "I 4bd 2c 3" , 0 }, // 509 - {215, 215, "P -4 3 m" , 0, "", "P -4 2 3" , 0 }, // 510 - {216, 216, "F -4 3 m" , 0, "", "F -4 2 3" , 0 }, // 511 - {217, 217, "I -4 3 m" , 0, "", "I -4 2 3" , 0 }, // 512 - {218, 218, "P -4 3 n" , 0, "", "P -4n 2 3" , 0 }, // 513 - {219, 219, "F -4 3 c" , 0, "", "F -4a 2 3" , 0 }, // 514 - {220, 220, "I -4 3 d" , 0, "", "I -4bd 2c 3" , 0 }, // 515 - {221, 221, "P m -3 m" , 0, "", "-P 4 2 3" , 0 }, // 516 - {222, 222, "P n -3 n" , '1', "", "P 4 2 3 -1n" , 20}, // 517 - {222, 0, "P n -3 n" , '2', "", "-P 4a 2bc 3" , 0 }, // 518 - {223, 223, "P m -3 n" , 0, "", "-P 4n 2 3" , 0 }, // 519 - {224, 224, "P n -3 m" , '1', "", "P 4n 2 3 -1n" , 30}, // 520 - {224, 0, "P n -3 m" , '2', "", "-P 4bc 2bc 3" , 0 }, // 521 - {225, 225, "F m -3 m" , 0, "", "-F 4 2 3" , 0 }, // 522 - {226, 226, "F m -3 c" , 0, "", "-F 4a 2 3" , 0 }, // 523 - {227, 227, "F d -3 m" , '1', "", "F 4d 2 3 -1d" , 27}, // 524 - {227, 0, "F d -3 m" , '2', "", "-F 4vw 2vw 3" , 0 }, // 525 - {228, 228, "F d -3 c" , '1', "", "F 4d 2 3 -1ad" , 37}, // 526 - {228, 0, "F d -3 c" , '2', "", "-F 4ud 2vw 3" , 0 }, // 527 - {229, 229, "I m -3 m" , 0, "", "-I 4 2 3" , 0 }, // 528 - {230, 230, "I a -3 d" , 0, "", "-I 4bd 2c 3" , 0 }, // 529 - // And extra entries from syminfo.lib - { 5, 5005, "I 1 21 1" , 0, "b4", "I 2yb" , 38}, // 530 - { 5, 3005, "C 1 21 1" , 0, "b5", "C 2yb" , 14}, // 531 - { 18, 1018, "P 21212(a)", 0, "", "P 2ab 2a" , 14}, // 532 - { 20, 1020, "C 2 2 21a)", 0, "", "C 2ac 2" , 39}, // 533 - { 21, 1021, "C 2 2 2a" , 0, "", "C 2ab 2b" , 14}, // 534 - { 22, 1022, "F 2 2 2a" , 0, "", "F 2 2c" , 40}, // 535 - { 23, 1023, "I 2 2 2a" , 0, "", "I 2ab 2bc" , 33}, // 536 - { 94, 1094, "P 42 21 2a", 0, "", "P 4bc 2a" , 20}, // 537 - {197, 1197, "I 2 3a" , 0, "", "I 2ab 2bc 3" , 30}, // 538 - // And extra entries from Crystallographic Space Group Diagrams and Tables - // http://img.chem.ucl.ac.uk/sgp/ - // We want to have all entries from Open Babel and PDB. - // If available, Hall symbols are taken from - // https://cci.lbl.gov/cctbx/multiple_cell.html - // triclinic - enlarged unit cells - { 1, 0, "A 1" , 0, "", "A 1" , 41}, // 539 - { 1, 0, "B 1" , 0, "", "B 1" , 42}, // 540 - { 1, 0, "C 1" , 0, "", "C 1" , 43}, // 541 - { 1, 0, "F 1" , 0, "", "F 1" , 44}, // 542 - { 1, 0, "I 1" , 0, "", "I 1" , 45}, // 543 - { 2, 0, "A -1" , 0, "", "-A 1" , 41}, // 544 - { 2, 0, "B -1" , 0, "", "-B 1" , 42}, // 545 - { 2, 0, "C -1" , 0, "", "-C 1" , 43}, // 546 - { 2, 0, "F -1" , 0, "", "-F 1" , 44}, // 547 - { 2, 0, "I -1" , 0, "", "-I 1" , 45}, // 548 - // monoclinic (qualifiers such as "b1" are assigned arbitrary unique numbers) - { 3, 0, "B 1 2 1" , 0, "b1", "B 2y" , 46}, // 549 - { 3, 0, "C 1 1 2" , 0, "c1", "C 2" , 47}, // 550 - { 4, 0, "B 1 21 1" , 0, "b1", "B 2yb" , 46}, // 551 - { 4, 0, "C 1 1 21" , 0, "c2", "C 2c" , 47}, // 552 - { 5, 0, "F 1 2 1" , 0, "b6", "F 2y" , 48}, // 553 - { 8, 0, "F 1 m 1" , 0, "b4", "F -2y" , 48}, // 554 - { 9, 0, "F 1 d 1" , 0, "b4", "F -2yuw" , 49}, // 555 - { 12, 0, "F 1 2/m 1" , 0, "b4", "-F 2y" , 48}, // 556 - // orthorhombic - { 64, 0, "A b a m" , 0, "", "-A 2 2ab" , 3 }, // 557 (==306) - // tetragonal - enlarged C- and F-centred unit cells - { 89, 0, "C 4 2 2" , 0, "", "C 4 2" , 50}, // 558 - { 90, 0, "C 4 2 21" , 0, "", "C 4a 2" , 50}, // 559 - { 97, 0, "F 4 2 2" , 0, "", "F 4 2" , 50}, // 560 - {115, 0, "C -4 2 m" , 0, "", "C -4 2" , 50}, // 561 - {117, 0, "C -4 2 b" , 0, "", "C -4 2ya" , 50}, // 562 - {139, 0, "F 4/m m m" , 0, "", "-F 4 2" , 50}, // 563 -}; - -const SpaceGroupAltName spacegroup_tables::alt_names[28] = { - // In 1990's ITfC vol.A changed some of the standard names, introducing - // symbol 'e'. sgtbx interprets these new symbols with option ad_hoc_1992. - // spglib uses only the new symbols. - {"A e m 2", 0, 190}, // A b m 2 - {"B m e 2", 0, 191}, // B m a 2 - {"B 2 e m", 0, 192}, // B 2 c m - {"C 2 m e", 0, 193}, // C 2 m b - {"C m 2 e", 0, 194}, // C m 2 a - {"A e 2 m", 0, 195}, // A c 2 m - {"A e a 2", 0, 202}, // A b a 2 - {"B b e 2", 0, 203}, // B b a 2 - {"B 2 e b", 0, 204}, // B 2 c b - {"C 2 c e", 0, 205}, // C 2 c b - {"C c 2 e", 0, 206}, // C c 2 a - {"A e 2 a", 0, 207}, // A c 2 a - {"C m c e", 0, 303}, // C m c a - {"C c m e", 0, 304}, // C c m b - {"A e m a", 0, 305}, // A b m a - {"A e a m", 0, 306}, // A c a m - {"B b e m", 0, 307}, // B b c m - {"B m e b", 0, 308}, // B m a b - {"C m m e", 0, 315}, // C m m a - {"A e m m", 0, 317}, // A b m m - {"B m e m", 0, 319}, // B m c m - {"C c c e", '1', 321}, // C c c a - {"C c c e", '2', 322}, // C c c a - {"A e a a", '1', 325}, // A b a a - {"A e a a", '2', 326}, // A b a a - {"B b e b", '1', 329}, // B b c b - {"B b e b", '2', 330}, // B b c b - // help with parsing of unusual setting names that are present in the PDB - {"P 21 21 2a", 0, 532}, // P 21212(a) -}; - -// This table was generated by tools/gen_reciprocal_asu.py. -const unsigned char spacegroup_tables::ccp4_hkl_asu[230] = { - 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 7, 6, 7, 6, 7, 7, 7, - 6, 7, 6, 7, 7, 6, 6, 7, 7, 7, 7, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9 -}; - -// Generated by tools/gen_sg_table.py. -const char* get_basisop(int basisop_idx) { - static const char* basisops[51] = { - "x,y,z", // 0 - "z,x,y", // 1 - "y,z,x", // 2 - "z,y,-x", // 3 - "x,y,-x+z", // 4 - "-x,z,y", // 5 - "-x+z,x,y", // 6 - "y,-x,z", // 7 - "y,-x+z,x", // 8 - "x-z,y,z", // 9 - "z,x-z,y", // 10 - "y,z,x-z", // 11 - "z,y,-x+z", // 12 - "x+z,y,-x", // 13 - "x+1/4,y+1/4,z", // 14 - "-x+z,z,y", // 15 - "-x,x+z,y", // 16 - "y,-x+z,z", // 17 - "y,-x,x+z", // 18 - "x+1/4,y-1/4,z", // 19 - "x-1/4,y-1/4,z-1/4", // 20 - "x-1/4,y-1/4,z", // 21 - "z,x-1/4,y-1/4", // 22 - "y-1/4,z,x-1/4", // 23 - "x-1/2,y-1/4,z+1/4", // 24 - "z+1/4,x-1/2,y-1/4", // 25 - "y-1/4,z+1/4,x-1/2", // 26 - "x+1/8,y+1/8,z+1/8", // 27 - "x+1/4,y-1/4,z+1/4", // 28 - "x-1/4,y+1/4,z", // 29 - "x+1/4,y+1/4,z+1/4", // 30 - "x,y+1/4,z+1/8", // 31 - "x-1/4,y+1/4,z+1/4", // 32 - "x-1/4,y+1/4,z-1/4", // 33 - "x-1/2,y+1/4,z+1/8", // 34 - "x-1/2,y+1/4,z-3/8", // 35 - "-y+z,x+z,-x+y+z", // 36 - "x-1/8,y-1/8,z-1/8", // 37 - "x+1/4,y+1/4,-x+z-1/4", // 38 - "x+1/4,y,z", // 39 - "x,y,z+1/4", // 40 - "-x,-y/2+z/2,y/2+z/2", // 41 - "-x/2+z/2,-y,x/2+z/2", // 42 - "x/2+y/2,x/2-y/2,-z", // 43 - "y/2+z/2,x/2+z/2,x/2+y/2", // 44 - "-x/2+y/2+z/2,x/2-y/2+z/2,x/2+y/2-z/2", // 45 - "x/2,y,-x/2+z", // 46 - "-x/2+z,x/2,y", // 47 - "x-z/2,y,z/2", // 48 - "x+z/2,y,z/2", // 49 - "x/2+y/2,-x/2+y/2,z", // 50 - }; - return basisops[basisop_idx]; -} - -const SpaceGroup* find_spacegroup_by_name(std::string name, double alpha, double gamma, - const char* prefer) { - bool prefer_2 = false; - bool prefer_R = false; - if (prefer) - for (const char* p = prefer; *p != '\0'; ++p) { - if (*p == '2') - prefer_2 = true; - else if (*p == 'R') - prefer_R = true; - else if (*p != '1' && *p != 'H') - throw std::invalid_argument("find_spacegroup_by_name(): invalid arg 'prefer'"); - } - const char* p = skip_space(name.c_str()); - if (*p >= '0' && *p <= '9') { // handle numbers - char *endptr; - long n = std::strtol(p, &endptr, 10); - return *endptr == '\0' ? find_spacegroup_by_number(n) : nullptr; - } - char first = *p & ~0x20; // to uppercase - if (first == '\0') - return nullptr; - if (first == 'H') - first = 'R'; - p = skip_space(p+1); - size_t start = p - name.c_str(); - // change letters to lower case, except the letter after : - for (size_t i = start; i < name.size(); ++i) { - if (name[i] >= 'A' && name[i] <= 'Z') - name[i] |= 0x20; // to lowercase - else if (name[i] == ':') - while (++i < name.size()) - if (name[i] >= 'a' && name[i] <= 'z') - name[i] &= ~0x20; // to uppercase - } - // allow names ending with R or H, such as R3R instead of R3:R - if (name.back() == 'h' || name.back() == 'r') { - name.back() &= ~0x20; // to uppercase - name.insert(name.end() - 1, ':'); - } - // The string that const char* p points to was just modified. - // This confuses some compilers (GCC 4.8), so let's re-assign p. - p = name.c_str() + start; - - for (const SpaceGroup& sg : spacegroup_tables::main) - if (sg.hm[0] == first) { - if (sg.hm[2] == *p) { - const char* a = skip_space(p + 1); - const char* b = skip_space(sg.hm + 3); - // In IT 1935 and 1952, symbols of centrosymmetric, cubic space groups - // 200-206 and 221-230 had symbol 3 (not -3), e.g. Pm3 instead of Pm-3, - // as listed in Table 3.3.3.1 in ITfC (2016) vol. A, p.788. - while ((*a == *b && *b != '\0') || - (*a == '3' && *b == '-' && b == sg.hm + 4 && *++b == '3')) { - a = skip_space(a+1); - b = skip_space(b+1); - } - if (*b == '\0') { - if (*a == '\0') { - // Change hexagonal settings to rhombohedral if the unit cell - // angles are more consistent with the latter. - // We have possible ambiguity in the hexagonal crystal family. - // For instance, "R 3" may mean "R 3:H" (hexagonal setting) or - // "R 3:R" (rhombohedral setting). The :H symbols come first - // in the table and are used by default. The ratio gamma:alpha - // is 120:90 in the hexagonal system and 1:1 in rhombohedral. - // We assume that the 'R' entry follows directly the 'H' entry. - if (sg.ext == 'H' && (alpha == 0. ? prefer_R : gamma < 1.125 * alpha)) - return &sg + 1; - // Similarly, the origin choice #2 follows directly #1. - if (sg.ext == '1' && prefer_2) - return &sg + 1; - return &sg; - } - if (*a == ':' && *skip_space(a+1) == sg.ext) - return &sg; - } - } else if (sg.hm[2] == '1' && sg.hm[3] == ' ') { - // check monoclinic short names, matching P2 to "P 1 2 1"; - // as an exception "B 2" == "B 1 1 2" (like in the PDB) - const char* b = sg.hm + 4; - if (*b != '1' || (first == 'B' && *++b == ' ' && *++b != '1')) { - char end = (b == sg.hm + 4 ? ' ' : '\0'); - const char* a = skip_space(p); - while (*a == *b && *b != end) { - ++a; - ++b; - } - if (*skip_space(a) == '\0' && *b == end) - return &sg; - } - } - } - for (const SpaceGroupAltName& sg : spacegroup_tables::alt_names) - if (sg.hm[0] == first && sg.hm[2] == *p) { - const char* a = skip_space(p + 1); - const char* b = skip_space(sg.hm + 3); - while (*a == *b && *b != '\0') { - a = skip_space(a+1); - b = skip_space(b+1); - } - if (*b == '\0' && - (*a == '\0' || (*a == ':' && *skip_space(a+1) == sg.ext))) - return &spacegroup_tables::main[sg.pos]; - } - return nullptr; -} - -} // namespace gemmi - -- 2.52.0 From e1c661e48db1e1d869742c1060dda47af08de8ab Mon Sep 17 00:00:00 2001 From: Filip Leonarski Date: Wed, 13 May 2026 13:32:25 +0200 Subject: [PATCH 048/132] Revert "Gemmi: Include through FetchContent full gemmi library (not limited cpp/hpp files)" This reverts commit b06dfc8357e9706d96f55eb0991e48aaeabdcae2. --- CMakeLists.txt | 11 +- common/CMakeLists.txt | 2 +- docs/SOFTWARE.md | 2 +- image_analysis/CMakeLists.txt | 2 +- symmetry/CMakeLists.txt | 2 + symmetry/LICENSE.txt | 373 ++++++++++ symmetry/gemmi/cellred.hpp | 406 +++++++++++ symmetry/gemmi/fail.hpp | 93 +++ symmetry/gemmi/math.hpp | 458 +++++++++++++ symmetry/gemmi/symmetry.hpp | 1044 ++++++++++++++++++++++++++++ symmetry/gemmi/unitcell.hpp | 618 +++++++++++++++++ symmetry/symmetry.cpp | 1215 +++++++++++++++++++++++++++++++++ 12 files changed, 4214 insertions(+), 12 deletions(-) create mode 100644 symmetry/CMakeLists.txt create mode 100644 symmetry/LICENSE.txt create mode 100644 symmetry/gemmi/cellred.hpp create mode 100644 symmetry/gemmi/fail.hpp create mode 100644 symmetry/gemmi/math.hpp create mode 100644 symmetry/gemmi/symmetry.hpp create mode 100644 symmetry/gemmi/unitcell.hpp create mode 100644 symmetry/symmetry.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 9beada6d..c49e0644 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -95,13 +95,6 @@ SET(BUILD_SHARED_LIBS OFF CACHE BOOL "" FORCE) SET(HTTPLIB_USE_NON_BLOCKING_GETADDRINFO OFF CACHE BOOL "" FORCE) SET(HTTPLIB_REQUIRE_ZLIB ON CACHE BOOL "" FORCE) -FetchContent_Declare( - gemmi - GIT_REPOSITORY https://github.com/fleon-psi/gemmi - GIT_TAG d6dcc1f57eedf7ba34a7d2d2ed283075113040bf - EXCLUDE_FROM_ALL -) - FetchContent_Declare( spdlog GIT_REPOSITORY https://github.com/gabime/spdlog.git @@ -149,7 +142,7 @@ FetchContent_Declare( EXCLUDE_FROM_ALL ) -FetchContent_MakeAvailable(zstd sls_detector_package catch2 hdf5 spdlog httplib gemmi) +FetchContent_MakeAvailable(zstd sls_detector_package catch2 hdf5 spdlog httplib) ADD_SUBDIRECTORY(jungfrau) ADD_SUBDIRECTORY(compression) @@ -160,7 +153,7 @@ ADD_SUBDIRECTORY(reader) ADD_SUBDIRECTORY(detector_control) ADD_SUBDIRECTORY(image_puller) ADD_SUBDIRECTORY(preview) -#ADD_SUBDIRECTORY(symmetry) +ADD_SUBDIRECTORY(symmetry) ADD_SUBDIRECTORY(xds-plugin) IF (JFJOCH_WRITER_ONLY) diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index d9b1d8b2..a15a652b 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -130,7 +130,7 @@ ADD_LIBRARY(JFJochCommon STATIC ScalingSettings.h ) -TARGET_LINK_LIBRARIES(JFJochCommon JFJochLogger Compression JFCalibration gemmi_cpp Threads::Threads -lrt ) +TARGET_LINK_LIBRARIES(JFJochCommon JFJochLogger Compression JFCalibration gemmi Threads::Threads -lrt ) TARGET_LINK_LIBRARIES(JFJochZMQ "$") diff --git a/docs/SOFTWARE.md b/docs/SOFTWARE.md index bddc3952..d64671e8 100644 --- a/docs/SOFTWARE.md +++ b/docs/SOFTWARE.md @@ -33,7 +33,6 @@ Automatically downloaded by CMake and statically linked: * Catch2 testing library - see [github.com/catchorg/Catch2](https://github.com/catchorg/Catch2) * Ceres Solver library for least square optimization - see [http://ceres-solver.org/] * Spdlog logging library - see [github.com/gabime/spdlog](https://github.com/gabime/spdlog) -* GEMMI library by Global Phasing - see [github.com/project-gemmi/gemmi](https://github.com/project-gemmi/gemmi) Please follow the link provided above to check for LICENSE file. Building code with dependencies above requires access from the build system to github.com. Directly included in the repository: @@ -45,5 +44,6 @@ Directly included in the repository: * LZ4 compression by Y.Collet - see [github.com/lz4/lz4](https://github.com/lz4/lz4) * ZeroMQ library (through slsDetectorPackage) - see [github.com/zeromq/libzmq](https://github.com/zeromq/libzmq) * Base64 decoder/encoder - see [gist.github.com/tomykaira](https://gist.github.com/tomykaira/f0fd86b6c73063283afe550bc5d77594) +* GEMMI library by Global Phasing - see [github.com/project-gemmi/gemmi](https://github.com/project-gemmi/gemmi) For license check LICENSE file in respective directory diff --git a/image_analysis/CMakeLists.txt b/image_analysis/CMakeLists.txt index 6bb52518..394dbaed 100644 --- a/image_analysis/CMakeLists.txt +++ b/image_analysis/CMakeLists.txt @@ -44,4 +44,4 @@ ADD_SUBDIRECTORY(scale_merge) ADD_SUBDIRECTORY(image_preprocessing) ADD_SUBDIRECTORY(azint) -TARGET_LINK_LIBRARIES(JFJochImageAnalysis JFJochAzIntEngine JFJochImagePreprocessing JFJochBraggPrediction JFJochBraggIntegration JFJochLatticeSearch JFJochIndexing JFJochSpotFinding JFJochCommon JFJochGeomRefinement JFJochScaleMerge) +TARGET_LINK_LIBRARIES(JFJochImageAnalysis JFJochAzIntEngine JFJochImagePreprocessing JFJochBraggPrediction JFJochBraggIntegration JFJochLatticeSearch JFJochIndexing JFJochSpotFinding JFJochCommon JFJochGeomRefinement JFJochScaleMerge gemmi) diff --git a/symmetry/CMakeLists.txt b/symmetry/CMakeLists.txt new file mode 100644 index 00000000..fed3f792 --- /dev/null +++ b/symmetry/CMakeLists.txt @@ -0,0 +1,2 @@ +ADD_LIBRARY(gemmi STATIC symmetry.cpp gemmi/symmetry.hpp gemmi/fail.hpp) +TARGET_INCLUDE_DIRECTORIES(gemmi PUBLIC .) \ No newline at end of file diff --git a/symmetry/LICENSE.txt b/symmetry/LICENSE.txt new file mode 100644 index 00000000..14e2f777 --- /dev/null +++ b/symmetry/LICENSE.txt @@ -0,0 +1,373 @@ +Mozilla Public License Version 2.0 +================================== + +1. Definitions +-------------- + +1.1. "Contributor" + means each individual or legal entity that creates, contributes to + the creation of, or owns Covered Software. + +1.2. "Contributor Version" + means the combination of the Contributions of others (if any) used + by a Contributor and that particular Contributor's Contribution. + +1.3. "Contribution" + means Covered Software of a particular Contributor. + +1.4. "Covered Software" + means Source Code Form to which the initial Contributor has attached + the notice in Exhibit A, the Executable Form of such Source Code + Form, and Modifications of such Source Code Form, in each case + including portions thereof. + +1.5. "Incompatible With Secondary Licenses" + means + + (a) that the initial Contributor has attached the notice described + in Exhibit B to the Covered Software; or + + (b) that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the + terms of a Secondary License. + +1.6. "Executable Form" + means any form of the work other than Source Code Form. + +1.7. "Larger Work" + means a work that combines Covered Software with other material, in + a separate file or files, that is not Covered Software. + +1.8. "License" + means this document. + +1.9. "Licensable" + means having the right to grant, to the maximum extent possible, + whether at the time of the initial grant or subsequently, any and + all of the rights conveyed by this License. + +1.10. "Modifications" + means any of the following: + + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered + Software; or + + (b) any new file in Source Code Form that contains any Covered + Software. + +1.11. "Patent Claims" of a Contributor + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the + License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its + Contributor Version. + +1.12. "Secondary License" + means either the GNU General Public License, Version 2.0, the GNU + Lesser General Public License, Version 2.1, the GNU Affero General + Public License, Version 3.0, or any later versions of those + licenses. + +1.13. "Source Code Form" + means the form of the work preferred for making modifications. + +1.14. "You" (or "Your") + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that + controls, is controlled by, or is under common control with You. For + purposes of this definition, "control" means (a) the power, direct + or indirect, to cause the direction or management of such entity, + whether by contract or otherwise, or (b) ownership of more than + fifty percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants and Conditions +-------------------------------- + +2.1. Grants + +Each Contributor hereby grants You a world-wide, royalty-free, +non-exclusive license: + +(a) under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + +(b) under Patent Claims of such Contributor to make, use, sell, offer + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + +2.2. Effective Date + +The licenses granted in Section 2.1 with respect to any Contribution +become effective for each Contribution on the date the Contributor first +distributes such Contribution. + +2.3. Limitations on Grant Scope + +The licenses granted in this Section 2 are the only rights granted under +this License. No additional rights or licenses will be implied from the +distribution or licensing of Covered Software under this License. +Notwithstanding Section 2.1(b) above, no patent license is granted by a +Contributor: + +(a) for any code that a Contributor has removed from Covered Software; + or + +(b) for infringements caused by: (i) Your and any other third party's + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + +(c) under Patent Claims infringed by Covered Software in the absence of + its Contributions. + +This License does not grant any rights in the trademarks, service marks, +or logos of any Contributor (except as may be necessary to comply with +the notice requirements in Section 3.4). + +2.4. Subsequent Licenses + +No Contributor makes additional grants as a result of Your choice to +distribute the Covered Software under a subsequent version of this +License (see Section 10.2) or under the terms of a Secondary License (if +permitted under the terms of Section 3.3). + +2.5. Representation + +Each Contributor represents that the Contributor believes its +Contributions are its original creation(s) or it has sufficient rights +to grant the rights to its Contributions conveyed by this License. + +2.6. Fair Use + +This License is not intended to limit any rights You have under +applicable copyright doctrines of fair use, fair dealing, or other +equivalents. + +2.7. Conditions + +Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted +in Section 2.1. + +3. Responsibilities +------------------- + +3.1. Distribution of Source Form + +All distribution of Covered Software in Source Code Form, including any +Modifications that You create or to which You contribute, must be under +the terms of this License. You must inform recipients that the Source +Code Form of the Covered Software is governed by the terms of this +License, and how they can obtain a copy of this License. You may not +attempt to alter or restrict the recipients' rights in the Source Code +Form. + +3.2. Distribution of Executable Form + +If You distribute Covered Software in Executable Form then: + +(a) such Covered Software must also be made available in Source Code + Form, as described in Section 3.1, and You must inform recipients of + the Executable Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and + +(b) You may distribute such Executable Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter + the recipients' rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + +You may create and distribute a Larger Work under terms of Your choice, +provided that You also comply with the requirements of this License for +the Covered Software. If the Larger Work is a combination of Covered +Software with a work governed by one or more Secondary Licenses, and the +Covered Software is not Incompatible With Secondary Licenses, this +License permits You to additionally distribute such Covered Software +under the terms of such Secondary License(s), so that the recipient of +the Larger Work may, at their option, further distribute the Covered +Software under the terms of either this License or such Secondary +License(s). + +3.4. Notices + +You may not remove or alter the substance of any license notices +(including copyright notices, patent notices, disclaimers of warranty, +or limitations of liability) contained within the Source Code Form of +the Covered Software, except that You may alter any license notices to +the extent required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + +You may choose to offer, and to charge a fee for, warranty, support, +indemnity or liability obligations to one or more recipients of Covered +Software. However, You may do so only on Your own behalf, and not on +behalf of any Contributor. You must make it absolutely clear that any +such warranty, support, indemnity, or liability obligation is offered by +You alone, and You hereby agree to indemnify every Contributor for any +liability incurred by such Contributor as a result of warranty, support, +indemnity or liability terms You offer. You may include additional +disclaimers of warranty and limitations of liability specific to any +jurisdiction. + +4. Inability to Comply Due to Statute or Regulation +--------------------------------------------------- + +If it is impossible for You to comply with any of the terms of this +License with respect to some or all of the Covered Software due to +statute, judicial order, or regulation then You must: (a) comply with +the terms of this License to the maximum extent possible; and (b) +describe the limitations and the code they affect. Such description must +be placed in a text file included with all distributions of the Covered +Software under this License. Except to the extent prohibited by statute +or regulation, such description must be sufficiently detailed for a +recipient of ordinary skill to be able to understand it. + +5. Termination +-------------- + +5.1. The rights granted under this License will terminate automatically +if You fail to comply with any of its terms. However, if You become +compliant, then the rights granted under this License from a particular +Contributor are reinstated (a) provisionally, unless and until such +Contributor explicitly and finally terminates Your grants, and (b) on an +ongoing basis, if such Contributor fails to notify You of the +non-compliance by some reasonable means prior to 60 days after You have +come back into compliance. Moreover, Your grants from a particular +Contributor are reinstated on an ongoing basis if such Contributor +notifies You of the non-compliance by some reasonable means, this is the +first time You have received notice of non-compliance with this License +from such Contributor, and You become compliant prior to 30 days after +Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent +infringement claim (excluding declaratory judgment actions, +counter-claims, and cross-claims) alleging that a Contributor Version +directly or indirectly infringes any patent, then the rights granted to +You by any and all Contributors for the Covered Software under Section +2.1 of this License shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all +end user license agreements (excluding distributors and resellers) which +have been validly granted by You or Your distributors under this License +prior to termination shall survive termination. + +************************************************************************ +* * +* 6. Disclaimer of Warranty * +* ------------------------- * +* * +* Covered Software is provided under this License on an "as is" * +* basis, without warranty of any kind, either expressed, implied, or * +* statutory, including, without limitation, warranties that the * +* Covered Software is free of defects, merchantable, fit for a * +* particular purpose or non-infringing. The entire risk as to the * +* quality and performance of the Covered Software is with You. * +* Should any Covered Software prove defective in any respect, You * +* (not any Contributor) assume the cost of any necessary servicing, * +* repair, or correction. This disclaimer of warranty constitutes an * +* essential part of this License. No use of any Covered Software is * +* authorized under this License except under this disclaimer. * +* * +************************************************************************ + +************************************************************************ +* * +* 7. Limitation of Liability * +* -------------------------- * +* * +* Under no circumstances and under no legal theory, whether tort * +* (including negligence), contract, or otherwise, shall any * +* Contributor, or anyone who distributes Covered Software as * +* permitted above, be liable to You for any direct, indirect, * +* special, incidental, or consequential damages of any character * +* including, without limitation, damages for lost profits, loss of * +* goodwill, work stoppage, computer failure or malfunction, or any * +* and all other commercial damages or losses, even if such party * +* shall have been informed of the possibility of such damages. This * +* limitation of liability shall not apply to liability for death or * +* personal injury resulting from such party's negligence to the * +* extent applicable law prohibits such limitation. Some * +* jurisdictions do not allow the exclusion or limitation of * +* incidental or consequential damages, so this exclusion and * +* limitation may not apply to You. * +* * +************************************************************************ + +8. Litigation +------------- + +Any litigation relating to this License may be brought only in the +courts of a jurisdiction where the defendant maintains its principal +place of business and such litigation shall be governed by laws of that +jurisdiction, without reference to its conflict-of-law provisions. +Nothing in this Section shall prevent a party's ability to bring +cross-claims or counter-claims. + +9. Miscellaneous +---------------- + +This License represents the complete agreement concerning the subject +matter hereof. If any provision of this License is held to be +unenforceable, such provision shall be reformed only to the extent +necessary to make it enforceable. Any law or regulation which provides +that the language of a contract shall be construed against the drafter +shall not be used to construe this License against a Contributor. + +10. Versions of the License +--------------------------- + +10.1. New Versions + +Mozilla Foundation is the license steward. Except as provided in Section +10.3, no one other than the license steward has the right to modify or +publish new versions of this License. Each version will be given a +distinguishing version number. + +10.2. Effect of New Versions + +You may distribute the Covered Software under the terms of the version +of the License under which You originally received the Covered Software, +or under the terms of any subsequent version published by the license +steward. + +10.3. Modified Versions + +If you create software not governed by this License, and you want to +create a new license for such software, you may create and use a +modified version of this License if you rename the license and remove +any references to the name of the license steward (except to note that +such modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary +Licenses + +If You choose to distribute Source Code Form that is Incompatible With +Secondary Licenses under the terms of this version of the License, the +notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice +------------------------------------------- + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at http://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular +file, then You may include the notice in a location (such as a LICENSE +file in a relevant directory) where a recipient would be likely to look +for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - "Incompatible With Secondary Licenses" Notice +--------------------------------------------------------- + + This Source Code Form is "Incompatible With Secondary Licenses", as + defined by the Mozilla Public License, v. 2.0. diff --git a/symmetry/gemmi/cellred.hpp b/symmetry/gemmi/cellred.hpp new file mode 100644 index 00000000..7174f776 --- /dev/null +++ b/symmetry/gemmi/cellred.hpp @@ -0,0 +1,406 @@ +// Copyright 2021 Global Phasing Ltd. +// +// Unit cell reductions: Buerger, Niggli, Selling-Delaunay. + +#ifndef GEMMI_CELLRED_HPP_ +#define GEMMI_CELLRED_HPP_ + +#include +#include +#include // for unique_ptr +#include "math.hpp" // for deg +#include "symmetry.hpp" // for Op +#include "unitcell.hpp" // for UnitCell + +namespace gemmi { + +struct SellingVector; + +// GruberVector contains G6 vector (G for Gruber) and cell reduction algorithms. +// Originally, in B. Gruber, Acta Cryst. A29, 433 (1973), the vector was called +// "characteristic" of a lattice/cell. +// Functions that take epsilon as a parameter use it for comparisons, +// as proposed in Grosse-Kunstleve et al, Acta Cryst. (2004) A60, 1. +struct GruberVector { + // a.a b.b c.c 2b.c 2a.c 2a.b + double A, B, C, xi, eta, zeta; // the 1973 paper uses names A B C ξ η ζ + std::unique_ptr change_of_basis; // we use only Op::Rot + + // m - orthogonalization matrix of a primitive cell + explicit GruberVector(const Mat33& m) + : A(m.column_dot(0,0)), + B(m.column_dot(1,1)), + C(m.column_dot(2,2)), + xi(2 * m.column_dot(1,2)), + eta(2 * m.column_dot(0,2)), + zeta(2 * m.column_dot(0,1)) {} + + explicit GruberVector(const std::array& g6) + : A(g6[0]), B(g6[1]), C(g6[2]), xi(g6[3]), eta(g6[4]), zeta(g6[5]) {} + + GruberVector(const UnitCell& u, char centring, bool track_change_of_basis=false) + : GruberVector(u.primitive_orth_matrix(centring)) { + if (track_change_of_basis) + set_change_of_basis(Op{centred_to_primitive(centring), {0,0,0}, 'x'}); + } + + GruberVector(const UnitCell& u, const SpaceGroup* sg, bool track_change_of_basis=false) + : GruberVector(u, sg ? sg->centring_type() : 'P', track_change_of_basis) {} + + void set_change_of_basis(const Op& op) { change_of_basis.reset(new Op(op)); } + + std::array parameters() const { return {A, B, C, xi, eta, zeta}; } + std::array cell_parameters() const { + // inverse of UnitCell::g6() + double a = std::sqrt(A); + double b = std::sqrt(B); + double c = std::sqrt(C); + return {a, b, c, + deg(std::acos(xi/(2*b*c))), + deg(std::acos(eta/(2*a*c))), + deg(std::acos(zeta/(2*a*b)))}; + } + UnitCell get_cell() const { return UnitCell(cell_parameters()); } + + SellingVector selling() const; + + bool is_normalized() const { + // eq(3) from Gruber 1973 + return A <= B && B <= C && + (A != B || std::abs(xi) <= std::abs(eta)) && + (B != C || std::abs(eta) <= std::abs(zeta)) && + (xi > 0) == (eta > 0) && (xi > 0) == (zeta > 0); + } + + bool is_buerger(double epsilon=1e-9) const { + return is_normalized() && + // eq (4) from Gruber 1973 + std::abs(xi) <= B + epsilon && + std::abs(eta) <= A + epsilon && + std::abs(zeta) <= A + epsilon; + } + + // Algorithm N from Gruber (1973). + // Returns branch taken in N3. + void normalize(double eps=1e-9) { + auto step_N1 = [&]() { + if (A - B > eps || (A - B >= -eps && std::abs(xi) > std::abs(eta) + eps)) { // N1 + std::swap(A, B); + std::swap(xi, eta); + if (change_of_basis) + swap_columns_and_negate(0, 1); + } + }; + step_N1(); + if (B - C > eps || (B - C >= -eps && std::abs(eta) > std::abs(zeta) + eps)) { // N2 + std::swap(B, C); + std::swap(eta, zeta); + if (change_of_basis) + swap_columns_and_negate(1, 2); + // To make it faster, instead of "go to the point N1" we repeat N1 once + // (which is equivalent - three swaps are sufficient to reorder ABC). + step_N1(); + } + // N3 + // xi * eta * zeta > 0 <=> positive count is 1 or 3 and no zeros + int pos_count = (xi > eps) + (eta > eps) + (zeta > eps); + int nonneg_count = (xi >= -eps) + (eta >= -eps) + (zeta >= -eps); + double sgn = (pos_count == nonneg_count && pos_count % 2 == 1) ? 1 : -1; + if (change_of_basis) { + if (sgn * xi < -eps) negate_column(0); + if (sgn * eta < -eps) negate_column(1); + if (sgn * zeta < -eps) negate_column(2); + if (pos_count != nonneg_count && pos_count % 2 == 1) + negate_column(std::fabs(zeta) <= eps ? 2 : + std::fabs(eta) <= eps ? 1 : 0); + } + xi = std::copysign(xi, sgn); + eta = std::copysign(eta, sgn); + zeta = std::copysign(zeta, sgn); + } + + // Algorithm B from Gruber (1973). + // Returns true if no change was needed. + bool buerger_step() { + if (std::abs(xi) > B) { // B2 + double j = std::floor(0.5*xi/B + 0.5); + C += j * (j*B - xi); + xi -= 2 * j * B; + eta -= j * zeta; + } else if (std::abs(eta) > A) { // B3 + double j = std::floor(0.5*eta/A + 0.5); + C += j * (j*A - eta); + xi -= j * zeta; + eta -= 2 * j * A; + } else if (std::abs(zeta) > A) { // B4 + double j = std::floor(0.5*zeta/A + 0.5); + B += j * (j*A - zeta); + xi -= j * eta; + zeta -= 2 * j * A; + } else if (xi + eta + zeta + A + B < 0) { // B5 + double j = std::floor(0.5 * (xi + eta) / (A + B + zeta) + 0.5); + C += j * (j * (A + B + zeta) - (xi + eta)); + xi -= j * (2*B + zeta); + eta -= j * (2*A + zeta); + } else { + return true; + } + return false; + } + + // Returns number of iterations. + int buerger_reduce() { + int n = 0; + double prev_sum = -1; + int stall_count = 0; + for (;;) { + normalize(); + // In rare cases numerical errors push the algorithm into infinite loop, + // as described in Grosse-Kunstleve et al, Acta Cryst. (2004) A60, 1. + // Ad-hoc solution: stop if a+b+c is stalled for 5 iterations. + if (++n > 8) { // don't waste time during the first few iterations + double sum = std::sqrt(A) + std::sqrt(B) + std::sqrt(C); + if (std::abs(sum - prev_sum) < sum * 1e-6) { + if (++stall_count == 5) + break; + } else { + stall_count = 0; + } + prev_sum = sum; + } + if (buerger_step()) + break; + } + return n; + } + + // To be called after normalize() or is_normalized(). + // Returns true if it already was Niggli cell. + // Algorithm from Krivy & Gruber, Acta Cryst. (1976) A32, 297. + bool niggli_step(double epsilon=1e-9) { + if (std::abs(xi) > B + epsilon || // step 5. from Krivy-Gruber (1976) + (xi >= B - epsilon && 2 * eta < zeta - epsilon) || + (xi <= -(B - epsilon) && zeta < -epsilon)) { + double sign_xi = xi >= 0 ? 1 : -1; + C += B - xi * sign_xi; + eta -= zeta * sign_xi; + xi -= 2 * B * sign_xi; + if (change_of_basis) + add_column(1, 2, -int(sign_xi)); + } else if (std::abs(eta) > A + epsilon || // step 6. + (eta >= A - epsilon && 2 * xi < zeta - epsilon) || + (eta <= -(A - epsilon) && zeta < -epsilon)) { + double sign_eta = eta >= 0 ? 1 : -1; + C += A - eta * sign_eta; + xi -= zeta * sign_eta; + eta -= 2 * A * sign_eta; + if (change_of_basis) + add_column(0, 2, -int(sign_eta)); + } else if (std::abs(zeta) > A + epsilon || // step 7. + (zeta >= A - epsilon && 2 * xi < eta - epsilon) || + (zeta <= -(A - epsilon) && eta < -epsilon)) { + double sign_zeta = zeta >= 0 ? 1 : -1; + B += A - zeta * sign_zeta; + xi -= eta * sign_zeta; + zeta -= 2 * A * sign_zeta; + if (change_of_basis) + add_column(0, 1, -int(sign_zeta)); + } else if (xi + eta + zeta + A + B < -epsilon || // step 8. + (xi + eta + zeta + A + B <= epsilon && 2 * (A + eta) + zeta > epsilon)) { + C += A + B + xi + eta + zeta; + xi += 2 * B + zeta; + eta += 2 * A + zeta; + if (change_of_basis) { + add_column(0, 2, 1); + add_column(1, 2, 1); + } + } else { + return true; + } + return false; + } + + // Returns number of iterations. + int niggli_reduce(double epsilon=1e-9, int iteration_limit=100) { + int n = 0; + for (;;) { + normalize(epsilon); + if (++n == iteration_limit || niggli_step(epsilon)) + break; + } + return n; + } + + bool is_niggli(double epsilon=1e-9) const { + return is_normalized() && GruberVector(parameters()).niggli_step(epsilon); + } + +private: + void swap_columns_and_negate(int i, int j) { + for (auto& r : change_of_basis->rot) + std::swap(r[i], r[j]); + for (auto& r : change_of_basis->rot) + for (auto& v : r) + v = -v; + } + void negate_column(int i) { + for (auto& r : change_of_basis->rot) + r[i] = -r[i]; + } + void add_column(int pos, int dest, int sign) { + for (auto& r : change_of_basis->rot) + r[dest] += sign * r[pos]; + } +}; + + +// Selling-Delaunay reduction. Based on: +// - chapter "Delaunay reduction and standardization" in +// International Tables for Crystallography vol. A (2016), sec. 3.1.2.3. +// https://onlinelibrary.wiley.com/iucr/itc/Ac/ch3o1v0001/ +// - Patterson & Love (1957), Acta Cryst. 10, 111, +// "Remarks on the Delaunay reduction", doi:10.1107/s0365110x57000328 +// - Andrews et al (2019), Acta Cryst. A75, 115, +// "Selling reduction versus Niggli reduction for crystallographic lattices". +struct SellingVector { + // b.c a.c a.b a.d b.d c.d + std::array s; + + explicit SellingVector(const std::array& s_) : s(s_) {} + + explicit SellingVector(const Mat33& orth) { + Vec3 b[4]; + for (int i = 0; i < 3; ++i) + b[i] = orth.column_copy(i); + b[3]= -b[0] - b[1] - b[2]; + s[0] = b[1].dot(b[2]); + s[1] = b[0].dot(b[2]); + s[2] = b[0].dot(b[1]); + s[3] = b[0].dot(b[3]); + s[4] = b[1].dot(b[3]); + s[5] = b[2].dot(b[3]); + } + + SellingVector(const UnitCell& u, char centring) + : SellingVector(u.primitive_orth_matrix(centring)) {} + SellingVector(const UnitCell& u, const SpaceGroup* sg) + : SellingVector(u, sg ? sg->centring_type() : 'P') {} + + // The reduction minimizes the sum b_i^2 which is equal to -2 sum s_i. + double sum_b_squared() const { + return -2 * (s[0] + s[1] + s[2] + s[3] + s[4] + s[5]); + } + + bool is_reduced(double eps=1e-9) const { + return std::all_of(s.begin(), s.end(), [eps](double x) { return x <= eps; }); + } + + bool reduce_step(double eps=1e-9) { + //printf(" s = %g %g %g %g %g %g sum=%g\n", + // s[0], s[1], s[2], s[3], s[4], s[5], sum_b_squared()); + const int table[6][5] = { + // When negating s[n] we need to apply operations from table[n]: + // 2 x add, subtract, 2 x swap&add + {2, 4, 3, 1, 5}, // 0 + {2, 3, 4, 0, 5}, // 1 + {1, 3, 5, 0, 4}, // 2 + {1, 2, 0, 4, 5}, // 3 + {0, 2, 1, 3, 5}, // 4 + {0, 1, 2, 3, 4}, // 5 + }; + + double max_s = eps; + int max_s_pos = -1; + for (int i = 0; i < 6; ++i) + if (s[i] > max_s) { + max_s = s[i]; + max_s_pos = i; + } + if (max_s_pos < 0) + return false; + const int (&indices)[5] = table[max_s_pos]; + s[max_s_pos] = -max_s; + s[indices[0]] += max_s; + s[indices[1]] += max_s; + s[indices[2]] -= max_s; + std::swap(s[indices[3]], s[indices[4]]); + s[indices[3]] += max_s; + s[indices[4]] += max_s; + //printf(" s[%d]=%g sum: %g\n", max_s_pos, max_s, sum_b_squared()); + return true; + } + + // Returns number of iterations. + int reduce(double eps=1e-9, int iteration_limit=100) { + int n = 0; + while (++n != iteration_limit) + if (!reduce_step(eps)) + break; + return n; + } + + std::array g6_parameters() const { + return {-s[1]-s[2]-s[3], -s[0]-s[2]-s[4], -s[0]-s[1]-s[5], 2*s[0], 2*s[1], 2*s[2]}; + } + + GruberVector gruber() const { return GruberVector(g6_parameters()); } + + // Swap values to make a <= b <= c <= d + void sort(double eps=1e-9) { + double abcd_sq_neg[4] = { + // -a^2, -b^2, -c^2, -d^2 (negated - to be sorted in descending order) + s[1]+s[2]+s[3], s[0]+s[2]+s[4], s[0]+s[1]+s[5], s[3]+s[4]+s[5] + }; + // First, make sure that d >= a,b,c (therefore -d^2 <= -a^2,...). + int min_idx = 3; + for (int i = 0; i < 3; ++i) + if (abcd_sq_neg[i] < abcd_sq_neg[min_idx] - eps) + min_idx = i; + switch (min_idx) { + case 0: // a <-> d + std::swap(s[1], s[5]); + std::swap(s[2], s[4]); + break; + case 1: // b <-> d + std::swap(s[0], s[5]); + std::swap(s[2], s[3]); + break; + case 2: // c <-> d + std::swap(s[0], s[4]); + std::swap(s[1], s[3]); + break; + } + // we could stop here and not care about the order of a,b,c. + std::swap(abcd_sq_neg[min_idx], abcd_sq_neg[3]); + if (abcd_sq_neg[0] < abcd_sq_neg[1] - eps) { // a <-> b + std::swap(s[0], s[1]); + std::swap(s[3], s[4]); + std::swap(abcd_sq_neg[0], abcd_sq_neg[1]); + } + if (abcd_sq_neg[1] < abcd_sq_neg[2] - eps) { // b <-> c + std::swap(s[1], s[2]); + std::swap(s[4], s[5]); + std::swap(abcd_sq_neg[1], abcd_sq_neg[2]); + } + if (abcd_sq_neg[0] < abcd_sq_neg[1] - eps) { // a <-> b + std::swap(s[0], s[1]); + std::swap(s[3], s[4]); + //std::swap(abcd_sq_neg[0], abcd_sq_neg[1]); + } + } + + std::array cell_parameters() const { + return gruber().cell_parameters(); + } + UnitCell get_cell() const { return UnitCell(cell_parameters()); } +}; + +inline SellingVector GruberVector::selling() const { + double s0 = 0.5 * xi; + double s1 = 0.5 * eta; + double s2 = 0.5 * zeta; + return SellingVector({s0, s1, s2, -A - s1 - s2, -B - s0 - s2, -C - s0 - s1}); +} + +} // namespace gemmi +#endif diff --git a/symmetry/gemmi/fail.hpp b/symmetry/gemmi/fail.hpp new file mode 100644 index 00000000..10596385 --- /dev/null +++ b/symmetry/gemmi/fail.hpp @@ -0,0 +1,93 @@ +// Copyright 2017 Global Phasing Ltd. +// +// fail(), unreachable() and __declspec/__attribute__ macros + +#ifndef GEMMI_FAIL_HPP_ +#define GEMMI_FAIL_HPP_ + +#include // for errno +#include // for runtime_error +#include // for system_error +#include +#include // for forward + +#ifdef __INTEL_COMPILER +// warning #2196: routine is both "inline" and "noinline" +# pragma warning disable 2196 +#endif +#if defined(__GNUG__) && !defined(__clang__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wattributes" +#endif + +#if defined(__GNUC__) || defined(__clang__) +# define GEMMI_COLD __attribute__((cold)) +#elif defined(_MSC_VER) +# define GEMMI_COLD __declspec(noinline) +#else +# define GEMMI_COLD __attribute__((noinline)) +#endif + +#if __cplusplus >= 202002L || _MSVC_LANG >= 202002L +# define GEMMI_LIKELY(x) (x) [[likely]] +# define GEMMI_UNLIKELY(x) (x) [[unlikely]] +#elif defined(__GNUC__) || defined(__clang__) +# define GEMMI_LIKELY(x) (__builtin_expect(!!(x), 1)) +# define GEMMI_UNLIKELY(x) (__builtin_expect(!!(x), 0)) +#else +# define GEMMI_LIKELY(x) (x) +# define GEMMI_UNLIKELY(x) (x) +#endif + +#if defined(_WIN32) +# if defined(GEMMI_SHARED) +# if defined(GEMMI_BUILD) +# define GEMMI_DLL __declspec(dllexport) +# else +# define GEMMI_DLL __declspec(dllimport) +# endif // GEMMI_BUILD +# else +# define GEMMI_DLL +# endif // GEMMI_SHARED +#else +# define GEMMI_DLL __attribute__((visibility("default"))) +#endif + +namespace gemmi { + +[[noreturn]] +inline void fail(const std::string& msg) { throw std::runtime_error(msg); } + +template [[noreturn]] +void fail(std::string&& str, T&& arg1, Args&&... args) { + str += arg1; + fail(std::move(str), std::forward(args)...); +} + +[[noreturn]] +inline GEMMI_COLD void fail(const char* msg) { throw std::runtime_error(msg); } + +[[noreturn]] +inline GEMMI_COLD void sys_fail(const std::string& msg) { + throw std::system_error(errno, std::system_category(), msg); +} +[[noreturn]] +inline GEMMI_COLD void sys_fail(const char* msg) { + throw std::system_error(errno, std::system_category(), msg); +} + +// unreachable() is used to silence GCC -Wreturn-type and hint the compiler +[[noreturn]] inline void unreachable() { +#if defined(__GNUC__) || defined(__clang__) + __builtin_unreachable(); +#elif defined(_MSC_VER) + __assume(0); +#endif +} + +#if defined(__GNUG__) && !defined(__clang__) +# pragma GCC diagnostic pop +#endif + +} // namespace gemmi +#endif diff --git a/symmetry/gemmi/math.hpp b/symmetry/gemmi/math.hpp new file mode 100644 index 00000000..87c10516 --- /dev/null +++ b/symmetry/gemmi/math.hpp @@ -0,0 +1,458 @@ +// Copyright 2018 Global Phasing Ltd. +// +// Math utilities. 3D linear algebra. + +#ifndef GEMMI_MATH_HPP_ +#define GEMMI_MATH_HPP_ + +#include // for fabs, cos, sqrt, round +#include // for min +#include +#include // for out_of_range +#include // for enable_if, is_integral + +namespace gemmi { + +constexpr double pi() { return 3.1415926535897932384626433832795029; } + +// The value used in converting between energy[eV] and wavelength[Angstrom]. +// $ units -d15 'h * c / eV / angstrom' +constexpr double hc() { return 12398.4197386209; } + +// The Bohr radius (a0) in Angstroms. +constexpr double bohrradius() { return 0.529177210903; } + +// for Mott-Bethe factor +constexpr double mott_bethe_const() { return 1. / (2 * pi() * pi() * bohrradius()); } + +// Used in conversion of ADPs (atomic displacement parameters). +constexpr double u_to_b() { return 8 * pi() * pi(); } + +constexpr double deg(double angle) { return 180.0 / pi() * angle; } +constexpr double rad(double angle) { return pi() / 180.0 * angle; } + +constexpr float sq(float x) { return x * x; } +constexpr double sq(double x) { return x * x; } + +inline double log_cosh(double x) { + // cosh(x) would overflow for x > 710.5, so we calculate: + // ln(cosh(x)) = ln(e^x + e^-x) - ln(2) = ln(e^x * (1 + e^-2x)) - ln(2) + x = std::abs(x); + return x - std::log(2) + std::log1p(std::exp(-2 * x)); +} + +inline int iround(double d) { return static_cast(std::round(d)); } + +inline double angle_abs_diff(double a, double b, double full=360.0) { + double d = std::fabs(a - b); + if (d > full) + d -= std::floor(d / full) * full; + return std::min(d, full - d); +} + +// similar to C++17 std::clamp() +template constexpr T clamp(T v, T lo, T hi) { + return std::min(std::max(v, lo), hi); +} + +template +struct Vec3_ { + Real x, y, z; + + Vec3_() : x(0), y(0), z(0) {} + Vec3_(Real x_, Real y_, Real z_) : x(x_), y(y_), z(z_) {} + explicit Vec3_(std::array h) : x(h[0]), y(h[1]), z(h[2]) {} + + Real& at(int i) { + switch (i) { + case 0: return x; + case 1: return y; + case 2: return z; + default: throw std::out_of_range("Vec3 index must be 0, 1 or 2."); + } + } + Real at(int i) const { return const_cast(this)->at(i); } + + Vec3_ operator-() const { return {-x, -y, -z}; } + Vec3_ operator-(const Vec3_& o) const { return {x-o.x, y-o.y, z-o.z}; } + Vec3_ operator+(const Vec3_& o) const { return {x+o.x, y+o.y, z+o.z}; } + Vec3_ operator*(Real d) const { return {x*d, y*d, z*d}; } + Vec3_ operator/(Real d) const { return *this * (1.0/d); } + Vec3_& operator-=(const Vec3_& o) { *this = *this - o; return *this; } + Vec3_& operator+=(const Vec3_& o) { *this = *this + o; return *this; } + Vec3_& operator*=(Real d) { *this = *this * d; return *this; } + Vec3_& operator/=(Real d) { return operator*=(1.0/d); } + + Vec3_ negated() const { return {-x, -y, -z}; } + Real dot(const Vec3_& o) const { return x*o.x + y*o.y + z*o.z; } + Vec3_ cross(const Vec3_& o) const { + return {y*o.z - z*o.y, z*o.x - x*o.z, x*o.y - y*o.x}; + } + Real length_sq() const { return x * x + y * y + z * z; } + Real length() const { return std::sqrt(length_sq()); } + Vec3_ changed_magnitude(Real m) const { return operator*(m / length()); } + Vec3_ normalized() const { return changed_magnitude(1.0); } + Real dist_sq(const Vec3_& o) const { return (*this - o).length_sq(); } + Real dist(const Vec3_& o) const { return std::sqrt(dist_sq(o)); } + Real cos_angle(const Vec3_& o) const { + return dot(o) / std::sqrt(length_sq() * o.length_sq()); + } + Real angle(const Vec3_& o) const { + return std::acos(clamp(cos_angle(o), -1., 1.)); + } + bool approx(const Vec3_& o, Real epsilon) const { + return std::fabs(x - o.x) <= epsilon && + std::fabs(y - o.y) <= epsilon && + std::fabs(z - o.z) <= epsilon; + } + bool has_nan() const { + return std::isnan(x) || std::isnan(y) || std::isnan(z); + } +}; + +using Vec3 = Vec3_; +using Vec3f = Vec3_; + +inline Vec3 operator*(double d, const Vec3& v) { return v * d; } + +/// Rodrigues' rotation formula: rotate vector v about given axis of rotation +/// (which must be a unit vector) by given angle (in radians). +inline Vec3 rotate_about_axis(const Vec3& v, const Vec3& axis, double theta) { + double sin_theta = std::sin(theta); + double cos_theta = std::cos(theta); + return v * cos_theta + axis.cross(v) * sin_theta + + axis * (axis.dot(v) * (1 - cos_theta)); +} + +struct Mat33 { + double a[3][3] = { {1.,0.,0.}, {0.,1.,0.}, {0.,0.,1.} }; + + // make it accessible with ".a" + typedef double row_t[3]; + const row_t& operator[](int i) const { return a[i]; } + row_t& operator[](int i) { return a[i]; } + + Mat33() = default; + explicit Mat33(double d) : a{{d, d, d}, {d, d, d}, {d, d, d}} {} + Mat33(double a1, double a2, double a3, double b1, double b2, double b3, + double c1, double c2, double c3) + : a{{a1, a2, a3}, {b1, b2, b3}, {c1, c2, c3}} {} + + static Mat33 from_columns(const Vec3& c1, const Vec3& c2, const Vec3& c3) { + return Mat33(c1.x, c2.x, c3.x, c1.y, c2.y, c3.y, c1.z, c2.z, c3.z); + } + + Vec3 row_copy(int i) const { + if (i < 0 || i > 2) + throw std::out_of_range("Mat33 row index must be 0, 1 or 2."); + return Vec3(a[i][0], a[i][1], a[i][2]); + } + + Vec3 column_copy(int i) const { + if (i < 0 || i > 2) + throw std::out_of_range("Mat33 column index must be 0, 1 or 2."); + return Vec3(a[0][i], a[1][i], a[2][i]); + } + + Mat33 operator+(const Mat33& b) const { + return Mat33(a[0][0] + b[0][0], a[0][1] + b[0][1], a[0][2] + b[0][2], + a[1][0] + b[1][0], a[1][1] + b[1][1], a[1][2] + b[1][2], + a[2][0] + b[2][0], a[2][1] + b[2][1], a[2][2] + b[2][2]); + } + Mat33 operator-(const Mat33& b) const { + return Mat33(a[0][0] - b[0][0], a[0][1] - b[0][1], a[0][2] - b[0][2], + a[1][0] - b[1][0], a[1][1] - b[1][1], a[1][2] - b[1][2], + a[2][0] - b[2][0], a[2][1] - b[2][1], a[2][2] - b[2][2]); + } + + Vec3 multiply(const Vec3& p) const { + return {a[0][0] * p.x + a[0][1] * p.y + a[0][2] * p.z, + a[1][0] * p.x + a[1][1] * p.y + a[1][2] * p.z, + a[2][0] * p.x + a[2][1] * p.y + a[2][2] * p.z}; + } + Vec3 left_multiply(const Vec3& p) const { + return {a[0][0] * p.x + a[1][0] * p.y + a[2][0] * p.z, + a[0][1] * p.x + a[1][1] * p.y + a[2][1] * p.z, + a[0][2] * p.x + a[1][2] * p.y + a[2][2] * p.z}; + } + // p has elements from the main diagonal of a 3x3 diagonal matrix + Mat33 multiply_by_diagonal(const Vec3& p) const { + return Mat33(a[0][0] * p.x, a[0][1] * p.y, a[0][2] * p.z, + a[1][0] * p.x, a[1][1] * p.y, a[1][2] * p.z, + a[2][0] * p.x, a[2][1] * p.y, a[2][2] * p.z); + } + Mat33 multiply(const Mat33& b) const { + Mat33 r; + for (int i = 0; i != 3; ++i) + for (int j = 0; j != 3; ++j) + r[i][j] = a[i][0] * b[0][j] + a[i][1] * b[1][j] + a[i][2] * b[2][j]; + return r; + } + Mat33 transpose() const { + return Mat33(a[0][0], a[1][0], a[2][0], + a[0][1], a[1][1], a[2][1], + a[0][2], a[1][2], a[2][2]); + } + double trace() const { return a[0][0] + a[1][1] + a[2][2]; } + + bool approx(const Mat33& other, double epsilon) const { + for (int i = 0; i < 3; ++i) + for (int j = 0; j < 3; ++j) + if (std::fabs(a[i][j] - other.a[i][j]) > epsilon) + return false; + return true; + } + bool has_nan() const { + for (int i = 0; i < 3; ++i) + for (int j = 0; j < 3; ++j) + if (std::isnan(a[i][j])) + return true; + return false; + } + + double determinant() const { + return a[0][0] * (a[1][1]*a[2][2] - a[2][1]*a[1][2]) + + a[0][1] * (a[1][2]*a[2][0] - a[2][2]*a[1][0]) + + a[0][2] * (a[1][0]*a[2][1] - a[2][0]*a[1][1]); + } + Mat33 inverse() const { + Mat33 inv; + double inv_det = 1.0 / determinant(); + inv[0][0] = inv_det * (a[1][1] * a[2][2] - a[2][1] * a[1][2]); + inv[0][1] = inv_det * (a[0][2] * a[2][1] - a[0][1] * a[2][2]); + inv[0][2] = inv_det * (a[0][1] * a[1][2] - a[0][2] * a[1][1]); + inv[1][0] = inv_det * (a[1][2] * a[2][0] - a[1][0] * a[2][2]); + inv[1][1] = inv_det * (a[0][0] * a[2][2] - a[0][2] * a[2][0]); + inv[1][2] = inv_det * (a[1][0] * a[0][2] - a[0][0] * a[1][2]); + inv[2][0] = inv_det * (a[1][0] * a[2][1] - a[2][0] * a[1][1]); + inv[2][1] = inv_det * (a[2][0] * a[0][1] - a[0][0] * a[2][1]); + inv[2][2] = inv_det * (a[0][0] * a[1][1] - a[1][0] * a[0][1]); + return inv; + } + bool is_identity() const { + return a[0][0] == 1 && a[0][1] == 0 && a[0][2] == 0 && + a[1][0] == 0 && a[1][1] == 1 && a[1][2] == 0 && + a[2][0] == 0 && a[2][1] == 0 && a[2][2] == 1; + } + + double column_dot(int i, int j) const { + return a[0][i] * a[0][j] + a[1][i] * a[1][j] + a[2][i] * a[2][j]; + } + + bool is_upper_triangular() const { + return a[1][0] == 0 && a[2][0] == 0 && a[2][1] == 0; + } +}; + +struct UpperTriangularMat33 { + double a11 = 0, a12 = 0, a13 = 0; + double a22 = 0, a23 = 0; + double a33 = 0; + UpperTriangularMat33() = default; + UpperTriangularMat33& operator=(const Mat33& m) { + if (m.is_upper_triangular()) { + a11 = m[0][0]; + a12 = m[0][1]; + a13 = m[0][2]; + a22 = m[1][1]; + a23 = m[1][2]; + a33 = m[2][2]; + } else { + a11 = a12 = a13 = a22 = a23 = a33 = NAN; + } + return *this; + } + Vec3 multiply(const Vec3& p) const { + return {a11 * p.x + a12 * p.y + a13 * p.z, + a22 * p.y + a23 * p.z, + a33 * p.z}; + } +}; + +// Symmetric matrix 3x3. Used primarily for an ADP tensor. +template struct SMat33 { + T u11, u22, u33, u12, u13, u23; + + // The PDB ANISOU record has the above order, but in a different context + // (such as metric tensor) the order of Voigt notation may be preferred. + std::array elements_pdb() const { return {{u11, u22, u33, u12, u13, u23}}; } + std::array elements_voigt() const { return {{u11, u22, u33, u23, u13, u12}}; } + + Mat33 as_mat33() const { + return Mat33(u11, u12, u13, u12, u22, u23, u13, u23, u33); + } + + // the arguments i and j must be in [0,2], i.e. 0, 1 or 2. + T& unchecked_ref(int i, int j) { + T* ptrs[9] = {&u11, &u12, &u13, &u12, &u22, &u23, &u13, &u23, &u33}; + return *ptrs[3 * i + j]; + } + + T trace() const { return u11 + u22 + u33; } + bool nonzero() const { return trace() != 0; } + + bool all_zero() const { + return u11 == 0 && u22 == 0 && u33 == 0 && u12 == 0 && u13 == 0 && u23 == 0; + } + + void scale(T s) const { + u11 *= s; u22 *= s; u33 *= s; u12 *= s; u13 *= s; u23 *= s; + } + + template + SMat33 scaled(Real s) const { + return SMat33{u11*s, u22*s, u33*s, u12*s, u13*s, u23*s}; + } + + // returns U + kI + SMat33 added_kI(T k) const { + return {u11+k, u22+k, u33+k, u12, u13, u23}; + } + + // returns squared norm r^T U r where U is this matrix and vector r is arg + template + auto r_u_r(const Vec3_& r) const -> decltype(r.x+u11) { + return r.x * r.x * u11 + r.y * r.y * u22 + r.z * r.z * u33 + + 2 * (r.x * r.y * u12 + r.x * r.z * u13 + r.y * r.z * u23); + } + double r_u_r(const std::array& h) const { + // it's faster to first convert ints to doubles (Vec3) + return r_u_r(Vec3(h)); + } + + Vec3 multiply(const Vec3& p) const { + return {u11 * p.x + u12 * p.y + u13 * p.z, + u12 * p.x + u22 * p.y + u23 * p.z, + u13 * p.x + u23 * p.y + u33 * p.z}; + } + + SMat33 operator-(const SMat33& o) const { + return {u11-o.u11, u22-o.u22, u33-o.u33, u12-o.u12, u13-o.u13, u23-o.u23}; + } + SMat33 operator+(const SMat33& o) const { + return {u11+o.u11, u22+o.u22, u33+o.u33, u12+o.u12, u13+o.u13, u23+o.u23}; + } + + // return M U M^T + template + SMat33 transformed_by(const Mat33& m) const { + // slightly faster than m.multiply(as_mat33()).multiply(m.transpose()); + auto elem = [&](int i, int j) { + return static_cast( + m[i][0] * (m[j][0] * u11 + m[j][1] * u12 + m[j][2] * u13) + + m[i][1] * (m[j][0] * u12 + m[j][1] * u22 + m[j][2] * u23) + + m[i][2] * (m[j][0] * u13 + m[j][1] * u23 + m[j][2] * u33)); + }; + return SMat33{elem(0, 0), elem(1, 1), elem(2, 2), + elem(0, 1), elem(0, 2), elem(1, 2)}; + } + + T determinant() const { + return u11 * (u22*u33 - u23*u23) + + u12 * (u23*u13 - u33*u12) + + u13 * (u12*u23 - u13*u22); + } + + SMat33 inverse_(T det) const { + SMat33 inv; + T inv_det = 1.0f / det; + inv.u11 = inv_det * (u22 * u33 - u23 * u23); + inv.u22 = inv_det * (u11 * u33 - u13 * u13); + inv.u33 = inv_det * (u11 * u22 - u12 * u12); + inv.u12 = inv_det * (u13 * u23 - u12 * u33); + inv.u13 = inv_det * (u12 * u23 - u13 * u22); + inv.u23 = inv_det * (u12 * u13 - u11 * u23); + return inv; + } + SMat33 inverse() const { + return inverse_(determinant()); + } + + /// Based on https://en.wikipedia.org/wiki/Eigenvalue_algorithm + /// To calculate both eigenvalues and eigenvectors use eig3.hpp + std::array calculate_eigenvalues() const { + double p1 = u12*u12 + u13*u13 + u23*u23; + if (p1 == 0) + return {{u11, u22, u33}}; + double q = (1./3.) * trace(); + SMat33 b{u11 - q, u22 - q, u33 - q, u12, u13, u23}; + double p2 = sq(b.u11) + sq(b.u22) + sq(b.u33) + 2 * p1; + double p = std::sqrt((1./6.) * p2); + double r = b.determinant() / ((1./3.) * p2 * p); + double phi = 0; + if (r <= -1) + phi = (1./3.) * pi(); + else if (r < 1) + phi = (1./3.) * std::acos(r); + double eig1 = q + 2 * p * std::cos(phi); + double eig3 = q + 2 * p * std::cos(phi + 2./3.*pi()); + return {{eig1, 3 * q - eig1 - eig3, eig3}}; + } +}; + +struct Transform { + Mat33 mat; + Vec3 vec; + + Transform inverse() const { + Mat33 minv = mat.inverse(); + return {minv, minv.multiply(vec).negated()}; + } + + Vec3 apply(const Vec3& x) const { return mat.multiply(x) + vec; } + + Transform combine(const Transform& b) const { + return {mat.multiply(b.mat), vec + mat.multiply(b.vec)}; + } + + bool is_identity() const { + return mat.is_identity() && vec.x == 0. && vec.y == 0. && vec.z == 0.; + } + void set_identity() { mat = Mat33(); vec = Vec3(); } + + bool has_nan() const { + return mat.has_nan() || vec.has_nan(); + } + + bool approx(const Transform& o, double epsilon) const { + return mat.approx(o.mat, epsilon) && vec.approx(o.vec, epsilon); + } +}; + +template +struct Box { + Pos minimum = Pos(INFINITY, INFINITY, INFINITY); + Pos maximum = Pos(-INFINITY, -INFINITY, -INFINITY); + void extend(const Pos& p) { + if (p.x < minimum.x) minimum.x = p.x; + if (p.y < minimum.y) minimum.y = p.y; + if (p.z < minimum.z) minimum.z = p.z; + if (p.x > maximum.x) maximum.x = p.x; + if (p.y > maximum.y) maximum.y = p.y; + if (p.z > maximum.z) maximum.z = p.z; + } + Pos get_size() const { return maximum - minimum; } + void add_margins(const Pos& p) { minimum -= p; maximum += p; } + void add_margin(double m) { add_margins(Pos(m, m, m)); } +}; + +// internally used functions +namespace impl { +// MSVC is missing isnan(IntegralType), so we define is_nan as a replacement +template +typename std::enable_if::value, bool>::type +is_nan(T) { return false; } +template +typename std::enable_if::value, bool>::type +is_nan(T a) { return std::isnan(a); } + +template +typename std::enable_if::value, bool>::type +is_same(T a, T b) { return a == b; } +template +typename std::enable_if::value, bool>::type +is_same(T a, T b) { return std::isnan(b) ? std::isnan(a) : a == b; } +} // namespace impl + +} // namespace gemmi +#endif diff --git a/symmetry/gemmi/symmetry.hpp b/symmetry/gemmi/symmetry.hpp new file mode 100644 index 00000000..203324fd --- /dev/null +++ b/symmetry/gemmi/symmetry.hpp @@ -0,0 +1,1044 @@ +// Copyright 2017-2019 Global Phasing Ltd. +// +// Crystallographic Symmetry. Space Groups. Coordinate Triplets. +// +// If this is all that you need from Gemmi you can just copy this file, +// src/symmetry.cpp fail.hpp and LICENSE.txt to your project. + +#ifndef GEMMI_SYMMETRY_HPP_ +#define GEMMI_SYMMETRY_HPP_ + +#include // for strtol, abs +#include +#include // for sort, remove +#include // for hash +#include // for invalid_argument +#include +#include // for tie +#include + +#include "fail.hpp" // for fail, unreachable + +namespace gemmi { + +// OP + +// Op is a symmetry operation, or a change-of-basic transformation, +// or a different operation of similar kind. +// Both "rotation" matrix and translation vector are fractional, with DEN +// used as the denominator. +struct GEMMI_DLL Op { + static constexpr int DEN = 24; // 24 to handle 1/8 in change-of-basis + typedef std::array, 3> Rot; + typedef std::array Tran; + + Rot rot; + Tran tran; + char notation = ' '; + + bool is_hkl() const { return notation == 'h'; } + + Op as_hkl() const { + return is_hkl() ? *this : Op{rot, {0,0,0}, 'h'}; + } + Op as_xyz() const { + return is_hkl() ? Op{rot, {0,0,0}, 'x'} : *this; + } + + std::string triplet(char style=' ') const; + + Op inverse() const; + + Op::Tran wrapped_tran() const { + Op::Tran t = tran; + for (int i = 0; i != 3; ++i) { + if (t[i] >= DEN) // elements need to be in [0,DEN) + t[i] %= DEN; + else if (t[i] < 0) + t[i] = ((t[i] + 1) % DEN) + DEN - 1; + } + return t; + } + + // If the translation points outside of the unit cell, wrap it. + Op& wrap() { + tran = wrapped_tran(); + return *this; + } + + Op& translate(const Tran& a) { + for (int i = 0; i != 3; ++i) + tran[i] += a[i]; + return *this; + } + + Op translated(const Tran& a) const { return Op(*this).translate(a); } + + Op add_centering(const Tran& a) const { return translated(a).wrap(); } + + Rot negated_rot() const { + return {{{-rot[0][0], -rot[0][1], -rot[0][2]}, + {-rot[1][0], -rot[1][1], -rot[1][2]}, + {-rot[2][0], -rot[2][1], -rot[2][2]}}}; + } + + static Rot transpose(const Rot& rot) { + return {{{rot[0][0], rot[1][0], rot[2][0]}, + {rot[0][1], rot[1][1], rot[2][1]}, + {rot[0][2], rot[1][2], rot[2][2]}}}; + } + Rot transposed_rot() const { return transpose(rot); } + + // DEN^3 for rotation, -DEN^3 for rotoinversion + int det_rot() const { + return rot[0][0] * (rot[1][1] * rot[2][2] - rot[1][2] * rot[2][1]) + - rot[0][1] * (rot[1][0] * rot[2][2] - rot[1][2] * rot[2][0]) + + rot[0][2] * (rot[1][0] * rot[2][1] - rot[1][1] * rot[2][0]); + } + + // Rotation-part type based on Table 1 in RWGK, Acta Cryst. A55, 383 (1999) + int rot_type() const { + int det = det_rot(); + int tr_den = rot[0][0] + rot[1][1] + rot[2][2]; + int tr = tr_den / DEN; + const int table[] = {0, 0, 2, 3, 4, 6, 1}; + if (std::abs(det) == DEN * DEN * DEN && tr * DEN == tr_den && std::abs(tr) <= 3) + return det > 0 ? table[3 + tr] : -table[3 - tr]; + return 0; + } + + Op combine(const Op& b) const { + if (is_hkl() != b.is_hkl()) + fail("can't combine real- and reciprocal-space Op"); + Op r; + for (int i = 0; i != 3; ++i) { + r.tran[i] = tran[i] * Op::DEN; + for (int j = 0; j != 3; ++j) { + r.rot[i][j] = (rot[i][0] * b.rot[0][j] + + rot[i][1] * b.rot[1][j] + + rot[i][2] * b.rot[2][j]) / Op::DEN; + r.tran[i] += rot[i][j] * b.tran[j]; + } + r.tran[i] /= Op::DEN; + } + r.notation = notation; + return r; + } + + std::array apply_to_xyz(const std::array& xyz) const { + if (is_hkl()) + fail("can't apply reciprocal-space Op to xyz"); + std::array out; + for (int i = 0; i != 3; ++i) + out[i] = (rot[i][0] * xyz[0] + rot[i][1] * xyz[1] + rot[i][2] * xyz[2] + + tran[i]) / Op::DEN; + return out; + } + + // Miller is defined in the same way in namespace gemmi in unitcell.hpp + using Miller = std::array; + + Miller apply_to_hkl_without_division(const Miller& hkl) const { + Miller r; + for (int i = 0; i != 3; ++i) + r[i] = (rot[0][i] * hkl[0] + rot[1][i] * hkl[1] + rot[2][i] * hkl[2]); + return r; + } + static Miller divide_hkl_by_DEN(const Miller& hkl) { + return {{ hkl[0] / DEN, hkl[1] / DEN, hkl[2] / DEN }}; + } + Miller apply_to_hkl(const Miller& hkl) const { + return divide_hkl_by_DEN(apply_to_hkl_without_division(hkl)); + } + + double phase_shift(const Miller& hkl) const { + constexpr double mult = -2 * 3.1415926535897932384626433832795 / Op::DEN; + return mult * (hkl[0] * tran[0] + hkl[1] * tran[1] + hkl[2] * tran[2]); + } + + std::array, 4> int_seitz() const { + std::array, 4> t; + for (int i = 0; i < 3; ++i) + t[i] = { rot[i][0], rot[i][1], rot[i][2], tran[i] }; + t[3] = { 0, 0, 0, 1 }; + return t; + } + + std::array, 4> float_seitz() const { + std::array, 4> t; + double m = 1.0 / Op::DEN; + for (int i = 0; i < 3; ++i) + t[i] = { m * rot[i][0], m * rot[i][1], m * rot[i][2], m * tran[i] }; + t[3] = { 0., 0., 0., 1. }; + return t; + } + + static constexpr Op identity() { + return {{{{DEN,0,0}, {0,DEN,0}, {0,0,DEN}}}, {0,0,0}, ' '}; + } + static constexpr Op::Rot inversion_rot() { + return {{{-DEN,0,0}, {0,-DEN,0}, {0,0,-DEN}}}; + } + bool operator<(const Op& rhs) const { + return std::tie(rot, tran) < std::tie(rhs.rot, rhs.tran); + } +}; + +inline bool operator==(const Op& a, const Op& b) { + return a.rot == b.rot && a.tran == b.tran; +} +inline bool operator!=(const Op& a, const Op& b) { return !(a == b); } + +inline Op operator*(const Op& a, const Op& b) { return a.combine(b).wrap(); } +inline Op& operator*=(Op& a, const Op& b) { a = a * b; return a; } + +inline Op Op::inverse() const { + int detr = det_rot(); + if (detr == 0) + fail("cannot invert matrix: " + Op{rot, {0,0,0}, notation}.triplet()); + int d2 = Op::DEN * Op::DEN; + Op inv; + inv.rot[0][0] = d2 * (rot[1][1] * rot[2][2] - rot[2][1] * rot[1][2]) / detr; + inv.rot[0][1] = d2 * (rot[0][2] * rot[2][1] - rot[0][1] * rot[2][2]) / detr; + inv.rot[0][2] = d2 * (rot[0][1] * rot[1][2] - rot[0][2] * rot[1][1]) / detr; + inv.rot[1][0] = d2 * (rot[1][2] * rot[2][0] - rot[1][0] * rot[2][2]) / detr; + inv.rot[1][1] = d2 * (rot[0][0] * rot[2][2] - rot[0][2] * rot[2][0]) / detr; + inv.rot[1][2] = d2 * (rot[1][0] * rot[0][2] - rot[0][0] * rot[1][2]) / detr; + inv.rot[2][0] = d2 * (rot[1][0] * rot[2][1] - rot[2][0] * rot[1][1]) / detr; + inv.rot[2][1] = d2 * (rot[2][0] * rot[0][1] - rot[0][0] * rot[2][1]) / detr; + inv.rot[2][2] = d2 * (rot[0][0] * rot[1][1] - rot[1][0] * rot[0][1]) / detr; + for (int i = 0; i != 3; ++i) + inv.tran[i] = (-tran[0] * inv.rot[i][0] + -tran[1] * inv.rot[i][1] + -tran[2] * inv.rot[i][2]) / Op::DEN; + inv.notation = notation; + return inv; +} + +// inverse of Op::float_seitz() +GEMMI_DLL Op seitz_to_op(const std::array, 4>& t); + +// helper function for use in AsuBrick::str() +GEMMI_DLL void append_op_fraction(std::string& s, int w); + +// TRIPLET -> OP +GEMMI_DLL std::array parse_triplet_part(const std::string& s, char& notation, + double* decimal_fract=nullptr); +GEMMI_DLL Op parse_triplet(const std::string& s, char notation=' '); + +// GROUPS OF OPERATIONS + +// corresponds to Table A1.4.2.2 in ITfC vol.B (edition 2010) +inline std::vector centring_vectors(char centring_type) { + constexpr int h = Op::DEN / 2; + constexpr int t = Op::DEN / 3; + constexpr int d = 2 * t; + // note: find_centering() depends on the order of operations in vector + switch (centring_type & ~0x20) { + case 'P': return {{0, 0, 0}}; + case 'A': return {{0, 0, 0}, {0, h, h}}; + case 'B': return {{0, 0, 0}, {h, 0, h}}; + case 'C': return {{0, 0, 0}, {h, h, 0}}; + case 'I': return {{0, 0, 0}, {h, h, h}}; + case 'R': return {{0, 0, 0}, {d, t, t}, {t, d, d}}; + // hall_symbols.html has no H, ITfC 2010 has no S and T + case 'H': return {{0, 0, 0}, {d, t, 0}, {t, d, 0}}; + case 'S': return {{0, 0, 0}, {t, t, d}, {d, t, d}}; + case 'T': return {{0, 0, 0}, {t, d, t}, {d, t, d}}; + case 'F': return {{0, 0, 0}, {0, h, h}, {h, 0, h}, {h, h, 0}}; + default: fail("not a centring type: ", centring_type); + } +} + + +struct GroupOps { + std::vector sym_ops; + std::vector cen_ops; + + int order() const { return static_cast(sym_ops.size()*cen_ops.size()); } + + void add_missing_elements(); + void add_missing_elements_part2(const std::vector& gen, + size_t max_size, bool ignore_bad_gen); + + bool add_inversion() { + size_t init_size = sym_ops.size(); + sym_ops.reserve(2 * init_size); + for (const Op& op : sym_ops) { + Op::Rot neg = op.negated_rot(); + if (find_by_rotation(neg)) { + sym_ops.resize(init_size); + return false; + } + sym_ops.push_back({neg, op.tran, op.notation}); + } + return true; + } + + char find_centering() const { + if (cen_ops.size() == 1 && cen_ops[0] == Op::Tran{0, 0, 0}) + return 'P'; + std::vector trans = cen_ops; + std::sort(trans.begin(), trans.end()); + for (char c : {'A', 'B', 'C', 'I', 'F', 'R', 'H', 'S', 'T'}) { + std::vector c_vectors = centring_vectors(c); + if (c == 'R' || c == 'H') // these two are returned not sorted + std::swap(c_vectors[1], c_vectors[2]); + if (trans == c_vectors) + return c; + } + return 0; + } + + Op* find_by_rotation(const Op::Rot& r) { + for (Op& op : sym_ops) + if (op.rot == r) + return &op; + return nullptr; + } + + const Op* find_by_rotation(const Op::Rot& r) const { + return const_cast(this)->find_by_rotation(r); + } + + bool is_centrosymmetric() const { + return find_by_rotation(Op::inversion_rot()) != nullptr; + } + + bool is_reflection_centric(const Op::Miller& hkl) const { + Op::Miller mhkl = {{-Op::DEN * hkl[0], -Op::DEN * hkl[1], -Op::DEN * hkl[2]}}; + for (const Op& op : sym_ops) + if (op.apply_to_hkl_without_division(hkl) == mhkl) + return true; + return false; + } + + int epsilon_factor_without_centering(const Op::Miller& hkl) const { + Op::Miller denh = {{Op::DEN * hkl[0], Op::DEN * hkl[1], Op::DEN * hkl[2]}}; + int epsilon = 0; + for (const Op& op : sym_ops) + if (op.apply_to_hkl_without_division(hkl) == denh) + ++epsilon; + return epsilon; + } + int epsilon_factor(const Op::Miller& hkl) const { + return epsilon_factor_without_centering(hkl) * (int) cen_ops.size(); + } + + static bool has_phase_shift(const Op::Tran& c, const Op::Miller& hkl) { + return (hkl[0] * c[0] + hkl[1] * c[1] + hkl[2] * c[2]) % Op::DEN != 0; + } + + bool is_systematically_absent(const Op::Miller& hkl) const { + for (auto i = cen_ops.begin() + 1; i != cen_ops.end(); ++i) + if (has_phase_shift(*i, hkl)) + return true; + Op::Miller denh = {{Op::DEN * hkl[0], Op::DEN * hkl[1], Op::DEN * hkl[2]}}; + for (auto op = sym_ops.begin() + 1; op != sym_ops.end(); ++op) + if (op->apply_to_hkl_without_division(hkl) == denh) { + for (const Op::Tran& c : cen_ops) + if (has_phase_shift({{op->tran[0] + c[0], + op->tran[1] + c[1], + op->tran[2] + c[2]}}, hkl)) + return true; + } + return false; + } + + void change_basis_impl(const Op& cob, const Op& inv) { + if (sym_ops.empty() || cen_ops.empty()) + return; + + // Apply change-of-basis to sym_ops. + // Ignore the first item in sym_ops -- it's identity. + for (auto op = sym_ops.begin() + 1; op != sym_ops.end(); ++op) + *op = cob.combine(*op).combine(inv).wrap(); + + // The number of centering vectors may be different. + // As an ad-hoc method (not proved to be robust) add lattice points + // from a super-cell. + int idet = inv.det_rot() / (Op::DEN * Op::DEN * Op::DEN); + if (idet > 1) { + std::vector new_cen_ops; + new_cen_ops.reserve(cen_ops.size() * idet * idet * idet); + for (int i = 0; i < idet; ++i) + for (int j = 0; j < idet; ++j) + for (int k = 0; k < idet; ++k) + for (Op::Tran& cen : cen_ops) + new_cen_ops.push_back({i * Op::DEN + cen[0], + j * Op::DEN + cen[1], + k * Op::DEN + cen[2]}); + cen_ops.swap(new_cen_ops); + } + + // Apply change-of-basis to centering vectors + Op cvec = Op::identity(); + for (auto tr = cen_ops.begin() + 1; tr != cen_ops.end(); ++tr) { + cvec.tran = *tr; + *tr = cob.combine(cvec).combine(inv).wrap().tran; + } + + // Remove redundant centering vectors. + for (int i = static_cast(cen_ops.size()) - 1; i > 0; --i) + for (int j = i - 1; j >= 0; --j) + if (cen_ops[i] == cen_ops[j]) { + cen_ops.erase(cen_ops.begin() + i); + break; + } + } + + void change_basis_forward(const Op& cob) { change_basis_impl(cob, cob.inverse()); } + void change_basis_backward(const Op& inv) { change_basis_impl(inv.inverse(), inv); } + + std::vector all_ops_sorted() const { + std::vector ops; + ops.reserve(sym_ops.size() * cen_ops.size()); + for (const Op& so : sym_ops) + for (const Op::Tran& co : cen_ops) + ops.push_back(so.add_centering(co)); + std::sort(ops.begin(), ops.end()); + return ops; + } + + Op get_op(int n) const { + int n_cen = n / (int) sym_ops.size(); + int n_sym = n % (int) sym_ops.size(); + return sym_ops.at(n_sym).add_centering(cen_ops.at(n_cen)); + } + + bool is_same_as(const GroupOps& other) const { + if (cen_ops.size() != other.cen_ops.size() || + sym_ops.size() != other.sym_ops.size()) + return false; + return all_ops_sorted() == other.all_ops_sorted(); + } + + bool has_same_centring(const GroupOps& other) const { + if (cen_ops.size() != other.cen_ops.size()) + return false; + if (std::is_sorted(cen_ops.begin(), cen_ops.end()) && + std::is_sorted(other.cen_ops.begin(), other.cen_ops.end())) + return cen_ops == other.cen_ops; + std::vector v1 = cen_ops; + std::vector v2 = other.cen_ops; + std::sort(v1.begin(), v1.end()); + std::sort(v2.begin(), v2.end()); + return v1 == v2; + } + + bool has_same_rotations(const GroupOps& other) const { + if (sym_ops.size() != other.sym_ops.size()) + return false; + auto sorted_rotations = [](const GroupOps& g) { + std::vector r(g.sym_ops.size()); + for (size_t i = 0; i != r.size(); ++i) + r[i] = g.sym_ops[i].rot; + std::sort(r.begin(), r.end()); + return r; + }; + return sorted_rotations(*this) == sorted_rotations(other); + } + + // minimal multiplicity for real-space grid in each direction + // examples: 1,2,1 for P21, 1,1,6 for P61 + std::array find_grid_factors() const { + const int T = Op::DEN; + int r[3] = {T, T, T}; + for (Op op : *this) + for (int i = 0; i != 3; ++i) + if (op.tran[i] != 0 && op.tran[i] < r[i]) + r[i] = op.tran[i]; + return {T / r[0], T / r[1], T / r[2]}; + } + + bool are_directions_symmetry_related(int u, int v) const { + for (const Op& op : sym_ops) + if (op.rot[u][v] != 0) + return true; + return false; + } + + // remove translation part of sym_ops + GroupOps derive_symmorphic() const { + GroupOps r(*this); + for (Op& op : r.sym_ops) + op.tran[0] = op.tran[1] = op.tran[2] = 0; + return r; + } + + struct Iter { + const GroupOps& gops; + int n_sym, n_cen; + void operator++() { + if (++n_sym == (int) gops.sym_ops.size()) { + ++n_cen; + n_sym = 0; + } + } + Op operator*() const { + return gops.sym_ops.at(n_sym).translated(gops.cen_ops.at(n_cen)).wrap(); + } + bool operator==(const Iter& other) const { + return n_sym == other.n_sym && n_cen == other.n_cen; + } + bool operator!=(const Iter& other) const { return !(*this == other); } + }; + + Iter begin() const { return {*this, 0, 0}; } + Iter end() const { return {*this, 0, (int) cen_ops.size()}; } +}; + +inline void GroupOps::add_missing_elements() { + // We always keep identity as sym_ops[0]. + if (sym_ops.empty() || sym_ops[0] != Op::identity()) + fail("oops"); + if (sym_ops.size() == 1) + return; + constexpr size_t max_size = 1024; + // Below we assume that all centring vectors are already known (in cen_ops) + // so when checking for a new element we compare only the 3x3 matrix. + // Dimino's algorithm. https://physics.stackexchange.com/a/351400/95713 + std::vector gen(sym_ops.begin() + 1, sym_ops.end()); + sym_ops.resize(2); + const Op::Rot idrot = Op::identity().rot; + for (Op g = sym_ops[1] * sym_ops[1]; g.rot != idrot; g *= sym_ops[1]) { + sym_ops.push_back(g); + if (sym_ops.size() > max_size) + fail("Too many elements in the group - bad generators"); + } + // the rest is in separate function b/c it's reused in twin.hpp + add_missing_elements_part2(gen, max_size, false); +} + +inline void GroupOps::add_missing_elements_part2(const std::vector& gen, + size_t max_size, bool ignore_bad_gen) { + for (size_t i = 1; i < gen.size(); ++i) { + std::vector coset_repr(1, Op::identity()); + size_t init_size = sym_ops.size(); + for (;;) { + size_t len = coset_repr.size(); + for (size_t j = 0; j != len; ++j) { + for (size_t n = 0; n != i + 1; ++n) { + Op sg = gen[n] * coset_repr[j]; + if (find_by_rotation(sg.rot) == nullptr) { + sym_ops.push_back(sg); + for (size_t k = 1; k != init_size; ++k) + sym_ops.push_back(sg * sym_ops[k]); + coset_repr.push_back(sg); + } + } + } + if (len == coset_repr.size()) + break; + if (sym_ops.size() > max_size) { + if (!ignore_bad_gen) + fail("Too many elements in the group - bad generators"); + // ignore this generator and continue with the next one + sym_ops.resize(init_size); + break; + } + } + } +} + +// Create GroupOps from Ops by separating centering vectors +inline GroupOps split_centering_vectors(const std::vector& ops) { + const Op identity = Op::identity(); + GroupOps go; + go.sym_ops.push_back(identity); + for (const Op& op : ops) + if (Op* old_op = go.find_by_rotation(op.rot)) { + Op::Tran tran = op.wrapped_tran(); + if (op.rot == identity.rot) // pure shift + go.cen_ops.push_back(tran); + if (tran == identity.tran) // or rather |tran| < |old_op->tran| ? + old_op->tran = op.tran; + } else { + go.sym_ops.push_back(op); + } + return go; +} + +GEMMI_DLL GroupOps generators_from_hall(const char* hall); + +inline GroupOps symops_from_hall(const char* hall) { + GroupOps ops = generators_from_hall(hall); + ops.add_missing_elements(); + return ops; +} + +// CRYSTAL SYSTEMS, POINT GROUPS AND LAUE CLASSES + +enum class CrystalSystem : unsigned char { + Triclinic=0, Monoclinic, Orthorhombic, Tetragonal, Trigonal, Hexagonal, Cubic +}; + +inline const char* crystal_system_str(CrystalSystem system) { + static const char* names[7] = { + "triclinic", "monoclinic", "orthorhombic", "tetragonal", + "trigonal", "hexagonal", "cubic" + }; + return names[static_cast(system)]; +} + +enum class PointGroup : unsigned char { + C1=0, Ci, C2, Cs, C2h, D2, C2v, D2h, C4, S4, C4h, D4, C4v, D2d, D4h, C3, + C3i, D3, C3v, D3d, C6, C3h, C6h, D6, C6v, D3h, D6h, T, Th, O, Td, Oh +}; + +inline const char* point_group_hm(PointGroup pg) { + static const char hm_pointgroup_names[32][6] = { + "1", "-1", "2", "m", "2/m", "222", "mm2", "mmm", + "4", "-4", "4/m", "422", "4mm", "-42m", "4/mmm", "3", + "-3", "32", "3m", "-3m", "6", "-6", "6/m", "622", + "6mm", "-62m", "6/mmm", "23", "m-3", "432", "-43m", "m-3m", + }; + return hm_pointgroup_names[static_cast(pg)]; +} + +// http://reference.iucr.org/dictionary/Laue_class +enum class Laue : unsigned char { + L1=0, L2m, Lmmm, L4m, L4mmm, L3, L3m, L6m, L6mmm, Lm3, Lm3m +}; + +inline Laue pointgroup_to_laue(PointGroup pg) { + static const Laue laue[32] = { + Laue::L1, Laue::L1, + Laue::L2m, Laue::L2m, Laue::L2m, + Laue::Lmmm, Laue::Lmmm, Laue::Lmmm, + Laue::L4m, Laue::L4m, Laue::L4m, + Laue::L4mmm, Laue::L4mmm, Laue::L4mmm, Laue::L4mmm, + Laue::L3, Laue::L3, + Laue::L3m, Laue::L3m, Laue::L3m, + Laue::L6m, Laue::L6m, Laue::L6m, + Laue::L6mmm, Laue::L6mmm, Laue::L6mmm, Laue::L6mmm, + Laue::Lm3, Laue::Lm3, + Laue::Lm3m, Laue::Lm3m, Laue::Lm3m, + }; + return laue[static_cast(pg)]; +} + +// return centrosymmetric pointgroup from the Laue class +inline PointGroup laue_to_pointgroup(Laue laue) { + static const PointGroup pg[11] = { + PointGroup::Ci, PointGroup::C2h, PointGroup::D2h, PointGroup::C4h, + PointGroup::D4h, PointGroup::C3i, PointGroup::D3d, PointGroup::C6h, + PointGroup::D6h, PointGroup::Th, PointGroup::Oh + }; + return pg[static_cast(laue)]; +} + +inline const char* laue_class_str(Laue laue) { + return point_group_hm(laue_to_pointgroup(laue)); +} + +inline CrystalSystem crystal_system(Laue laue) { + static const CrystalSystem crystal_systems[11] = { + CrystalSystem::Triclinic, + CrystalSystem::Monoclinic, + CrystalSystem::Orthorhombic, + CrystalSystem::Tetragonal, CrystalSystem::Tetragonal, + CrystalSystem::Trigonal, CrystalSystem::Trigonal, + CrystalSystem::Hexagonal, CrystalSystem::Hexagonal, + CrystalSystem::Cubic, CrystalSystem::Cubic + }; + return crystal_systems[static_cast(laue)]; +} + +inline CrystalSystem crystal_system(PointGroup pg) { + return crystal_system(pointgroup_to_laue(pg)); +} + +inline unsigned char point_group_index_and_category(int space_group_number) { + // 0x20=Sohncke, 0x40=enantiomorphic, 0x80=symmorphic + enum : unsigned char { S=0x20, E=(0x20|0x40), Y=0x80, Z=(0x20|0x80) }; + static const unsigned char indices[230] = { + 0|Z, 1|Y, 2|Z, 2|S, 2|Z, 3|Y, 3, 3|Y, 3, 4|Y, // 1-10 + 4, 4|Y, 4, 4, 4, 5|Z, 5|S, 5|S, 5|S, 5|S, // 11-20 + 5|Z, 5|Z, 5|Z, 5|S, 6|Y, 6, 6, 6, 6, 6, // 21-30 + 6, 6, 6, 6, 6|Y, 6, 6, 6|Y, 6, 6, // 31-40 + 6, 6|Y, 6, 6|Y, 6, 6, 7|Y, 7, 7, 7, // 41-50 + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 51-60 + 7, 7, 7, 7, 7|Y, 7, 7, 7, 7|Y, 7, // 61-70 + 7|Y, 7, 7, 7, 8|Z, 8|E, 8|S, 8|E, 8|Z, 8|S, // 71-80 + 9|Y, 9|Y, 10|Y, 10, 10, 10, 10|Y, 10, 11|Z, 11|S, // 81-90 + 11|E, 11|E, 11|S, 11|S, 11|E, 11|E, 11|Z, 11|S, 12|Y, 12, // 91-100 + 12, 12, 12, 12, 12, 12, 12|Y, 12, 12, 12, // 101-110 + 13|Y, 13, 13, 13, 13|Y, 13, 13, 13, 13|Y, 13, // 111-120 + 13|Y, 13, 14|Y, 14, 14, 14, 14, 14, 14, 14, // 121-130 + 14, 14, 14, 14, 14, 14, 14, 14, 14|Y, 14, // 131-140 + 14, 14, 15|Z, 15|E, 15|E, 15|Z, 16|Y, 16|Y, 17|Z, 17|Z, // 141-150 + 17|E, 17|E, 17|E, 17|E, 17|Z, 18|Y, 18|Y, 18, 18, 18|Y, // 151-160 + 18, 19|Y, 19, 19|Y, 19, 19|Y, 19, 20|Z, 20|E, 20|E, // 161-170 + 20|E, 20|E, 20|S, 21|Y, 22|Y, 22, 23|Z, 23|E, 23|E, 23|E, // 171-180 + 23|E, 23|S, 24|Y, 24, 24, 24, 25|Y, 25, 25|Y, 25, // 181-190 + 26|Y, 26, 26, 26, 27|Z, 27|Z, 27|Z, 27|S, 27|S, 28|Y, // 191-200 + 28, 28|Y, 28, 28|Y, 28, 28, 29|Z, 29|S, 29|Z, 29|S, // 201-210 + 29|Z, 29|E, 29|E, 29|S, 30|Y, 30|Y, 30|Y, 30, 30, 30, // 211-220 + 31|Y, 31, 31, 31, 31|Y, 31, 31, 31, 31|Y, 31 // 221-230 + }; + return indices[space_group_number-1]; +} + +inline PointGroup point_group(int space_group_number) { + auto n = point_group_index_and_category(space_group_number); + return static_cast(n & 0x1f); +} + +// true for 65 Sohncke (non-enantiogenic) space groups +inline bool is_sohncke(int space_group_number) { + return (point_group_index_and_category(space_group_number) & 0x20) != 0; +} + +// true for 22 space groups (11 enantiomorphic pairs) +inline bool is_enantiomorphic(int space_group_number) { + return (point_group_index_and_category(space_group_number) & 0x40) != 0; +} + +// true for 73 space groups +inline bool is_symmorphic(int space_group_number) { + return (point_group_index_and_category(space_group_number) & 0x80) != 0; +} + +/// Inversion center of the Euclidean normalizer that is not at the origin of +/// reference settings. Returns (0,0,0) if absent. Based on tables in ch. 3.5 +/// of ITA (2016) doi:10.1107/97809553602060000933 (column "Inversion through +/// a centre at"). +inline Op::Tran nonzero_inversion_center(int space_group_number) { + constexpr int D = Op::DEN; + switch (space_group_number) { + case 43: return {D/8, D/8, 0}; + case 80: return {D/4, 0, 0}; + case 98: return {D/4, 0, D/8}; + case 109: return {D/4, 0, 0}; + case 110: return {D/4, 0, 0}; + case 122: return {D/4, 0, D/8}; + case 210: return {D/8, D/8, D/8}; + default: return {0, 0, 0}; + } +} + +GEMMI_DLL const char* get_basisop(int basisop_idx); + + +// Returns a change-of-basis operator for centred -> primitive transformation. +// The same operator as inverse of z2p_op in sgtbx. +inline Op::Rot centred_to_primitive(char centring_type) { + constexpr int D = Op::DEN; + constexpr int H = Op::DEN / 2; + constexpr int T = Op::DEN / 3; + switch (centring_type) { + case 'P': return {{{D,0,0}, {0,D,0}, {0,0,D}}}; + case 'A': return {{{-D,0,0}, {0,-H,H}, {0,H,H}}}; + case 'B': return {{{-H,0,H}, {0,-D,0}, {H,0,H}}}; + case 'C': return {{{H,H,0}, {H,-H,0}, {0,0,-D}}}; + case 'I': return {{{-H,H,H}, {H,-H,H}, {H,H,-H}}}; + case 'R': return {{{2*T,-T,-T}, {T,T,-2*T}, {T,T,T}}}; + case 'H': return {{{2*T,-T,0}, {T,T,0}, {0,0,D}}}; // not used normally + case 'F': return {{{0,H,H}, {H,0,H}, {H,H,0}}}; + default: fail("not a centring type: ", centring_type); + } +} + + +// LIST OF CRYSTALLOGRAPHIC SPACE GROUPS + +struct SpaceGroup { // typically 44 bytes + int number; + int ccp4; + char hm[11]; // Hermann-Mauguin (international) notation + char ext; + char qualifier[5]; + char hall[15]; + int basisop_idx; + + std::string xhm() const { + std::string ret = hm; + if (ext) { + ret += ':'; + ret += ext; + } + return ret; + } + + char centring_type() const { return ext == 'R' ? 'P' : hm[0]; } + + // (old) CCP4 spacegroup names start with H for hexagonal setting + char ccp4_lattice_type() const { return ext == 'H' ? 'H' : hm[0]; } + + // P 1 2 1 -> P2, but P 1 1 2 -> P112. R 3:H -> H3. + std::string short_name() const { + std::string s(hm); + size_t len = s.size(); + if (len > 6 && s[2] == '1' && s[len - 2] == ' ' && s[len - 1] == '1') + s = s[0] + s.substr(4, len - 4 - 2); + if (ext == 'H') + s[0] = 'H'; + s.erase(std::remove(s.begin(), s.end(), ' '), s.end()); + return s; + } + + // As explained in Phenix newsletter CCN_2011_01.pdf#page=12 + // the PDB uses own, non-standard symbols for rhombohedral space groups. + std::string pdb_name() const { + std::string s; + s += ccp4_lattice_type(); + s += hm+1; + return s; + } + + bool is_sohncke() const { return gemmi::is_sohncke(number); } + bool is_enantiomorphic() const { return gemmi::is_enantiomorphic(number); } + bool is_symmorphic() const { return gemmi::is_symmorphic(number); } + PointGroup point_group() const { return gemmi::point_group(number); } + const char* point_group_hm() const { + return gemmi::point_group_hm(point_group()); + } + Laue laue_class() const { return pointgroup_to_laue(point_group()); } + const char* laue_str() const { return laue_class_str(laue_class()); } + CrystalSystem crystal_system() const { + return gemmi::crystal_system(point_group()); + } + const char* crystal_system_str() const { + return gemmi::crystal_system_str(crystal_system()); + } + bool is_centrosymmetric() const { + return laue_to_pointgroup(laue_class()) == point_group(); + } + + /// returns 'a', 'b' or 'c' for monoclinic SG, '\0' otherwise + char monoclinic_unique_axis() const { + if (crystal_system() == CrystalSystem::Monoclinic) + return qualifier[qualifier[0] == '-' ? 1 : 0]; + return '\0'; + } + + const char* basisop_str() const { return get_basisop(basisop_idx); } + Op basisop() const { return parse_triplet(basisop_str()); } + bool is_reference_setting() const { return basisop_idx == 0; } + + Op centred_to_primitive() const { + return {gemmi::centred_to_primitive(centring_type()), {0,0,0}, 'x'}; + } + + /// Returns change-of-hand operator. Compatible with similar sgtbx function. + Op change_of_hand_op() const { + if (is_centrosymmetric()) + return Op::identity(); + Op::Tran t = nonzero_inversion_center(number); + Op op{Op::inversion_rot(), {2*t[0], 2*t[1], 2*t[2]}, 'x'}; + if (!is_reference_setting()) { + Op b = basisop(); + op = b.combine(op).combine(b.inverse()); + } + return op; + } + + GroupOps operations() const { return symops_from_hall(hall); } +}; + +struct SpaceGroupAltName { + char hm[11]; + char ext; + int pos; +}; + +struct GEMMI_DLL spacegroup_tables { + static const SpaceGroup main[564]; + static const SpaceGroupAltName alt_names[28]; + static const unsigned char ccp4_hkl_asu[230]; +}; + +inline const SpaceGroup* find_spacegroup_by_number(int ccp4) noexcept { + if (ccp4 == 0) + return &spacegroup_tables::main[0]; + for (const SpaceGroup& sg : spacegroup_tables::main) + if (sg.ccp4 == ccp4) + return &sg; + return nullptr; +} + +inline const SpaceGroup& get_spacegroup_by_number(int ccp4) { + const SpaceGroup* sg = find_spacegroup_by_number(ccp4); + if (sg == nullptr) + throw std::invalid_argument("Invalid space-group number: " + + std::to_string(ccp4)); + return *sg; +} + +inline const SpaceGroup& get_spacegroup_reference_setting(int number) { + for (const SpaceGroup& sg : spacegroup_tables::main) + if (sg.number == number && sg.is_reference_setting()) + return sg; + throw std::invalid_argument("Invalid space-group number: " + + std::to_string(number)); +} + +/// If angles alpha and gamma are provided, they are used to +/// distinguish hexagonal and rhombohedral settings (e.g. for "R 3"). +/// \param prefer can specify preferred H/R settings and 1/2 origin choice. +/// For example, prefer="2H" means the origin choice 2 and hexagonal +/// settings. The default is "1H". +GEMMI_DLL const SpaceGroup* find_spacegroup_by_name(std::string name, + double alpha=0., double gamma=0., + const char* prefer=nullptr); + +inline const SpaceGroup& get_spacegroup_by_name(const std::string& name) { + const SpaceGroup* sg = find_spacegroup_by_name(name); + if (sg == nullptr) + throw std::invalid_argument("Unknown space-group name: " + name); + return *sg; +} + +inline const SpaceGroup& get_spacegroup_p1() { + return spacegroup_tables::main[0]; +} + +inline const SpaceGroup* find_spacegroup_by_ops(const GroupOps& gops) { + char c = gops.find_centering(); + for (const SpaceGroup& sg : spacegroup_tables::main) + if ((c == sg.hall[0] || c == sg.hall[1]) && + gops.is_same_as(sg.operations())) + return &sg; + return nullptr; +} + +// Reciprocal space asu (asymmetric unit). +// The same 12 choices of ASU as in CCP4 symlib and cctbx. +struct ReciprocalAsu { + int idx; + Op::Rot rot{}; // value-initialized only to avoid -Wmaybe-uninitialized + bool is_ref; + + ReciprocalAsu(const SpaceGroup* sg, bool tnt=false) { + if (sg == nullptr) + fail("Missing space group"); + idx = spacegroup_tables::ccp4_hkl_asu[sg->number - 1]; + if (tnt) { + idx += 10; + is_ref = true; // TNT ASU is given wrt current (not standard) settings + } else { + is_ref = sg->is_reference_setting(); + if (!is_ref) + rot = sg->basisop().rot; + } + } + + bool is_in(const Op::Miller& hkl) const { + if (is_ref) + return is_in_reference_setting(hkl[0], hkl[1], hkl[2]); + Op::Miller r; + for (int i = 0; i != 3; ++i) + r[i] = rot[0][i] * hkl[0] + rot[1][i] * hkl[1] + rot[2][i] * hkl[2]; + return is_in_reference_setting(r[0], r[1], r[2]); + } + + bool is_in_reference_setting(int h, int k, int l) const { + switch (idx) { + // 0-9: CCP4 hkl asu, 10-19: TNT hkl asu + case 0: return l>0 || (l==0 && (h>0 || (h==0 && k>=0))); + case 1: return k>=0 && (l>0 || (l==0 && h>=0)); + case 12: // orthorhombic-D + case 2: return h>=0 && k>=0 && l>=0; + case 3: return l>=0 && ((h>=0 && k>0) || (h==0 && k==0)); + case 14: // tetragonal-D, hexagonal-D + case 4: return h>=k && k>=0 && l>=0; + case 5: return (h>=0 && k>0) || (h==0 && k==0 && l>=0); + case 16: // trigonal-D P312 + case 6: return h>=k && k>=0 && (k>0 || l>=0); + case 17: // trigonal-D P321 + case 7: return h>=k && k>=0 && (h>k || l>=0); + case 8: return h>=0 && ((l>=h && k>h) || (l==h && k==h)); + case 9: return k>=l && l>=h && h>=0; + case 10: return k>0 || (k==0 && (h>0 || (h==0 && l>=0))); // triclinic + case 11: return k>=0 && (h>0 || (h==0 && l>=0)); // monoclinic-B + case 13: return l>=0 && ((k>=0 && h>0) || (h==0 && k==0)); // tetragonal-C, hexagonal-C + case 15: return (k>=0 && h>0) || (h==0 && k==0 && l>=0); // trigonal-C + case 18: return k>=0 && l>=0 && ((h>k && h>l) || (h==k && h>=l)); // cubic-T + case 19: return h>=k && k>=l && l>=0; // cubic-O + } + unreachable(); + } + + const char* condition_str() const { + switch (idx) { + case 0: return "l>0 or (l=0 and (h>0 or (h=0 and k>=0)))"; + case 1: return "k>=0 and (l>0 or (l=0 and h>=0))"; + case 12: + case 2: return "h>=0 and k>=0 and l>=0"; + case 3: return "l>=0 and ((h>=0 and k>0) or (h=0 and k=0))"; + case 14: + case 4: return "h>=k and k>=0 and l>=0"; + case 5: return "(h>=0 and k>0) or (h=0 and k=0 and l>=0)"; + case 16: + case 6: return "h>=k and k>=0 and (k>0 or l>=0)"; + case 17: + case 7: return "h>=k and k>=0 and (h>k or l>=0)"; + case 8: return "h>=0 and ((l>=h and k>h) or (l=h and k=h))"; + case 9: return "k>=l and l>=h and h>=0"; + case 10: return "k>0 or (k==0 and (h>0 or (h=0 and l>=0)))"; + case 11: return "k>=0 and (h>0 or (h=0 and l>=0))"; + case 13: return "l>=0 and ((k>=0 and h>0) or (h=0 and k==0))"; + case 15: return "(k>=0 and h>0) or (h=0 and k==0 and l>=0)"; + case 18: return "k>=0 and l>=0 and ((h>k and h>l) or (h=k and h>=l))"; + case 19: return "h>=k and k>=l and l>=0"; + } + unreachable(); + } + + /// Returns hkl in asu and MTZ ISYM - 2*n-1 for reflections in the positive + /// asu (I+ of a Friedel pair), 2*n for reflections in the negative asu (I-). + std::pair to_asu(const Op::Miller& hkl, const std::vector& sym_ops) const { + int isym = 0; + for (const Op& op : sym_ops) { + ++isym; + Op::Miller new_hkl = op.apply_to_hkl_without_division(hkl); + if (is_in(new_hkl)) + return {Op::divide_hkl_by_DEN(new_hkl), isym}; + ++isym; + Op::Miller negated_new_hkl{{-new_hkl[0], -new_hkl[1], -new_hkl[2]}}; + if (is_in(negated_new_hkl)) + return {Op::divide_hkl_by_DEN(negated_new_hkl), isym}; + } + fail("Oops, maybe inconsistent GroupOps?"); + } + + std::pair to_asu(const Op::Miller& hkl, const GroupOps& gops) const { + return to_asu(hkl, gops.sym_ops); + } + + /// Similar to to_asu(), but the second returned value is sign: true for + or centric + std::pair to_asu_sign(const Op::Miller& hkl, const GroupOps& gops) const { + std::pair neg = {{0,0,0}, true}; + for (const Op& op : gops.sym_ops) { + Op::Miller new_hkl = op.apply_to_hkl_without_division(hkl); + if (is_in(new_hkl)) + return {Op::divide_hkl_by_DEN(new_hkl), true}; + Op::Miller negated_new_hkl{{-new_hkl[0], -new_hkl[1], -new_hkl[2]}}; + if (is_in(negated_new_hkl)) + // don't return it yet, because for centric reflection we prefer (+) + neg = {Op::divide_hkl_by_DEN(negated_new_hkl), false}; + } + if (neg.second) + fail("Oops, maybe inconsistent GroupOps?"); + return neg; + } +}; + +} // namespace gemmi + +namespace std { +template<> struct hash { + size_t operator()(const gemmi::Op& op) const { + size_t h = 0; + for (int i = 0; i != 3; ++i) + for (int j = 0; j != 3; ++j) + h = (h << 2) ^ (op.rot[i][j] + 1); + for (int i = 0; i != 3; ++i) + h = (h << 5) ^ op.tran[i]; + return h; + } +}; +} // namespace std + +#endif diff --git a/symmetry/gemmi/unitcell.hpp b/symmetry/gemmi/unitcell.hpp new file mode 100644 index 00000000..25bb8b46 --- /dev/null +++ b/symmetry/gemmi/unitcell.hpp @@ -0,0 +1,618 @@ +// Copyright 2017 Global Phasing Ltd. +// +// Unit cell. + +#ifndef GEMMI_UNITCELL_HPP_ +#define GEMMI_UNITCELL_HPP_ + +#include +#include // for cos, sin, sqrt, floor, NAN +#include +#include "math.hpp" +#include "fail.hpp" // for fail +#include "symmetry.hpp" // for Op, SpaceGroup + +namespace gemmi { + +inline Mat33 rot_as_mat33(const Op::Rot& rot) { + double mult = 1.0 / Op::DEN; + return Mat33(mult * rot[0][0], mult * rot[0][1], mult * rot[0][2], + mult * rot[1][0], mult * rot[1][1], mult * rot[1][2], + mult * rot[2][0], mult * rot[2][1], mult * rot[2][2]); +} +inline Mat33 rot_as_mat33(const Op& op) { return rot_as_mat33(op.rot); } + + +inline Vec3 tran_as_vec3(const Op& op) { + double mult = 1.0 / Op::DEN; + return Vec3(mult * op.tran[0], mult * op.tran[1], mult * op.tran[2]); +} + +/// Coordinates in Angstroms - orthogonal (Cartesian) coordinates. +struct Position : Vec3 { + using Vec3::Vec3; + Position() = default; + explicit Position(const Vec3& v) : Vec3(v) {} + Position operator-() const { return Position(Vec3::operator-()); } + Position operator-(const Position& o) const { return Position(Vec3::operator-(o)); } + Position operator+(const Position& o) const { return Position(Vec3::operator+(o)); } + Position operator*(double d) const { return Position(Vec3::operator*(d)); } + Position operator/(double d) const { return Position(Vec3::operator/(d)); } + Position& operator-=(const Position& o) { *this = *this - o; return *this; } + Position& operator+=(const Position& o) { *this = *this + o; return *this; } + Position& operator*=(double d) { *this = *this * d; return *this; } + Position& operator/=(double d) { return operator*=(1.0/d); } +}; + +inline Position operator*(double d, const Position& v) { return v * d; } + +/// Fractional coordinates. +struct Fractional : Vec3 { + using Vec3::Vec3; + Fractional() = default; + explicit Fractional(const Vec3& v) : Vec3(v) {} + Fractional operator-(const Fractional& o) const { + return Fractional(Vec3::operator-(o)); + } + Fractional operator+(const Fractional& o) const { + return Fractional(Vec3::operator+(o)); + } + Fractional wrap_to_unit() const { + return {x - std::floor(x), y - std::floor(y), z - std::floor(z)}; + } + Fractional wrap_to_zero() const { + return {x - std::round(x), y - std::round(y), z - std::round(z)}; + } + Fractional round() const { + return {std::round(x), std::round(y), std::round(z)}; + } + void move_toward_zero_by_one() { + if (x > 0.5) x -= 1.0; else if (x < -0.5) x += 1.0; + if (y > 0.5) y -= 1.0; else if (y < -0.5) y += 1.0; + if (z > 0.5) z -= 1.0; else if (z < -0.5) z += 1.0; + } +}; + +enum class Asu : unsigned char { Same, Different, Any }; + +/// Result of find_nearest_image +struct NearestImage { + double dist_sq; + int pbc_shift[3] = { 0, 0, 0 }; + int sym_idx = 0; + + double dist() const { return std::sqrt(dist_sq); } + bool same_asu() const { + return pbc_shift[0] == 0 && pbc_shift[1] == 0 && pbc_shift[2] == 0 && sym_idx == 0; + } + + /// Returns a string such as 1555 or 1_555. + std::string symmetry_code(bool underscore) const { + std::string s = std::to_string(sym_idx + 1); + if (underscore) + s += '_'; + if (unsigned(5 + pbc_shift[0]) <= 9 && + unsigned(5 + pbc_shift[1]) <= 9 && + unsigned(5 + pbc_shift[2]) <= 9) { // normal, quick path + for (int shift : pbc_shift) + s += char('5' + shift); + } else { // problematic, non-standard path + for (int i = 0; i < 3; ++i) { + if (i != 0 && underscore) + s += '_'; + s += std::to_string(5 + pbc_shift[i]); + } + } + return s; + } +}; + + +/// Like Transform, but apply() arg is Fractional (not Vec3 - for type safety). +struct FTransform : Transform { + FTransform() = default; + FTransform(const Transform& t) : Transform(t) {} + Fractional apply(const Fractional& p) const { + return Fractional(Transform::apply(p)); + } +}; + +/// Non-crystallographic symmetry operation (such as in the MTRIXn record) +struct NcsOp { + std::string id; + bool given; + Transform tr; + Position apply(const Position& p) const { return Position(tr.apply(p)); } +}; + +/// A synonym for convenient passing of hkl. +using Miller = std::array; + +struct MillerHash { + std::size_t operator()(const Miller& hkl) const noexcept { + return std::size_t((hkl[0] * 1024 + hkl[1]) * 1024 + hkl[2]); // NOLINT misplaced cast + } +}; + +struct UnitCellParameters { + double a = 1.0, b = 1.0, c = 1.0; + double alpha = 90.0, beta = 90.0, gamma = 90.0; + + UnitCellParameters() = default; + explicit UnitCellParameters(const double (&par)[6]) { + a = par[0]; b = par[1]; c = par[2]; alpha = par[3]; beta = par[4]; gamma = par[5]; + } + explicit UnitCellParameters(const std::array& par) { + a = par[0]; b = par[1]; c = par[2]; alpha = par[3]; beta = par[4]; gamma = par[5]; + } + + bool operator==(const UnitCellParameters& o) const { + return a == o.a && b == o.b && c == o.c && + alpha == o.alpha && beta == o.beta && gamma == o.gamma; + } + bool operator!=(const UnitCellParameters& o) const { return !operator==(o); } + + bool approx(const UnitCellParameters& o, double epsilon) const { + auto eq = [&](double x, double y) { return std::fabs(x - y) < epsilon; }; + return eq(a, o.a) && eq(b, o.b) && eq(c, o.c) && + eq(alpha, o.alpha) && eq(beta, o.beta) && eq(gamma, o.gamma); + } +}; + +/// Unit cell. Contains cell parameters as well as pre-calculated +/// orthogonalization and fractionalization matrices, volume, and more. +/// Contains symmetry operations (incl. NCS) if they were set from outside. +struct UnitCell : UnitCellParameters { + UnitCell() = default; + UnitCell(double a_, double b_, double c_, + double alpha_, double beta_, double gamma_) { + set(a_, b_, c_, alpha_, beta_, gamma_); + } + UnitCell(const std::array& v) { set_from_array(v); } + + Transform orth; + Transform frac; + double volume = 1.0; + /// reciprocal parameters a*, b*, c*, alpha*, beta*, gamma* + double ar = 1.0, br = 1.0, cr = 1.0; + double cos_alphar = 0.0, cos_betar = 0.0, cos_gammar = 0.0; + bool explicit_matrices = false; + short cs_count = 0; // crystallographic symmetries except identity + std::vector images; // symmetry operations + + // Non-crystalline (for example NMR) structures are supposed to use fake + // unit cell 1x1x1, but sometimes they don't. A number of non-crystalline + // entries in the PDB has incorrectly set unit cell or fract. matrix, + // that is why we check both. + bool is_crystal() const { return a != 1.0 && frac.mat[0][0] != 1.0; } + + // compare lengths using relative tolerance rel, angles using tolerance deg + bool is_similar(const UnitCell& o, double rel, double deg) const { + auto siml = [&](double x, double y) { return std::fabs(x - y) < rel * std::max(x, y); }; + auto sima = [&](double x, double y) { return std::fabs(x - y) < deg; }; + return siml(a, o.a) && siml(b, o.b) && siml(c, o.c) && + sima(alpha, o.alpha) && sima(beta, o.beta) && sima(gamma, o.gamma); + } + + void calculate_properties() { + // ensure exact values for right angles + double cos_alpha = alpha == 90. ? 0. : std::cos(rad(alpha)); + double cos_beta = beta == 90. ? 0. : std::cos(rad(beta)); + double cos_gamma = gamma == 90. ? 0. : std::cos(rad(gamma)); + double sin_alpha = alpha == 90. ? 1. : std::sin(rad(alpha)); + double sin_beta = beta == 90. ? 1. : std::sin(rad(beta)); + double sin_gamma = gamma == 90. ? 1. : std::sin(rad(gamma)); + if (sin_alpha == 0 || sin_beta == 0 || sin_gamma == 0) + fail("Impossible angle - N*180deg."); + + // volume - formula from Giacovazzo p.62 + volume = a * b * c * std::sqrt(1 - cos_alpha * cos_alpha + - cos_beta * cos_beta - cos_gamma * cos_gamma + + 2 * cos_alpha * cos_beta * cos_gamma); + + // reciprocal parameters a*, b*, ... (Giacovazzo, p. 64) + ar = b * c * sin_alpha / volume; + br = a * c * sin_beta / volume; + cr = a * b * sin_gamma / volume; + double cos_alphar_sin_beta = (cos_beta * cos_gamma - cos_alpha) / sin_gamma; + cos_alphar = cos_alphar_sin_beta / sin_beta; + //cos_alphar = (cos_beta * cos_gamma - cos_alpha) / (sin_beta * sin_gamma); + cos_betar = (cos_alpha * cos_gamma - cos_beta) / (sin_alpha * sin_gamma); + cos_gammar = (cos_alpha * cos_beta - cos_gamma) / (sin_alpha * sin_beta); + + if (explicit_matrices) + return; + + // The orthogonalization matrix we use is described in ITfC B p.262: + // "An alternative mode of orthogonalization, used by the Protein + // Data Bank and most programs, is to align the a1 axis of the unit + // cell with the Cartesian X_1 axis, and to align the a*_3 axis with the + // Cartesian X_3 axis." + double sin_alphar = std::sqrt(1.0 - cos_alphar * cos_alphar); + orth.mat = {a, b * cos_gamma, c * cos_beta, + 0., b * sin_gamma, -c * cos_alphar_sin_beta, + 0., 0. , c * sin_beta * sin_alphar}; + orth.vec = {0., 0., 0.}; + + double o12 = -cos_gamma / (sin_gamma * a); + double o13 = -(cos_gamma * cos_alphar_sin_beta + cos_beta * sin_gamma) + / (sin_alphar * sin_beta * sin_gamma * a); + double o23 = cos_alphar / (sin_alphar * sin_gamma * b); + frac.mat = {1 / a, o12, o13, + 0., 1 / orth.mat[1][1], o23, + 0., 0., 1 / orth.mat[2][2]}; + frac.vec = {0., 0., 0.}; + } + + double cos_alpha() const { return alpha == 90. ? 0. : std::cos(rad(alpha)); } + + /// B matrix following convention from Busing & Levy (1967), not from cctbx. + /// Cf. https://dials.github.io/documentation/conventions.html + Mat33 calculate_matrix_B() const { + double sin_gammar = std::sqrt(1 - cos_gammar * cos_gammar); + double sin_betar = std::sqrt(1 - cos_betar * cos_betar); + return Mat33(ar, br * cos_gammar, cr * cos_betar, + 0., br * sin_gammar, -cr * sin_betar * cos_alpha(), + 0., 0., 1.0 / c); + } + + /// The equivalent isotropic displacement factor. + /// Based on Fischer & Tillmanns (1988). Acta Cryst. C44, 775-776. + /// The argument is a non-orthogonalized tensor U, + /// i.e. the one from SmallStructure::Site, but not from Atom. + double calculate_u_eq(const SMat33& ani) const { + double aar = a * ar; + double bbr = b * br; + double ccr = c * cr; + // it could be optimized using orth.mat[0][1] and orth.mat[0][2] + double cos_beta = beta == 90. ? 0. : std::cos(rad(beta)); + double cos_gamma = gamma == 90. ? 0. : std::cos(rad(gamma)); + return 1/3. * (sq(aar) * ani.u11 + sq(bbr) * ani.u22 + sq(ccr) * ani.u33 + + 2 * (aar * bbr * cos_gamma * ani.u12 + + aar * ccr * cos_beta * ani.u13 + + bbr * ccr * cos_alpha() * ani.u23)); + } + + void set_matrices_from_fract(const Transform& f) { + // mmCIF _atom_sites.fract_transf_* and PDB SCALEn records usually contain + // fewer significant digits than the unit cell parameters, and sometimes are + // just wrong. Use them only if we seem to have non-standard crystal frame. + if (f.mat.approx(frac.mat, 1e-4) && f.vec.approx(frac.vec, 1e-6)) + return; + // The SCALE record is sometimes incorrect. Here we only catch cases + // when CRYST1 is set as for non-crystal and SCALE is very suspicious. + if (frac.mat[0][0] == 1.0 && (f.mat[0][0] == 0.0 || f.mat[0][0] > 1.0)) + return; + frac = f; + orth = f.inverse(); + explicit_matrices = true; + } + + void set(double a_, double b_, double c_, + double alpha_, double beta_, double gamma_) { + if (gamma_ == 0.0) // ignore empty/partial CRYST1 (example: 3iyp) + return; + a = a_; + b = b_; + c = c_; + alpha = alpha_; + beta = beta_; + gamma = gamma_; + calculate_properties(); + } + + void set_from_parameters(const UnitCellParameters& p) { + set(p.a, p.b, p.c, p.alpha, p.beta, p.gamma); + } + + void set_from_array(const std::array& v) { set(v[0], v[1], v[2], v[3], v[4], v[5]); } + + void set_from_vectors(const Vec3& va, const Vec3& vb, const Vec3& vc) { + set(va.length(), vb.length(), vc.length(), + deg(vb.angle(vc)), deg(vc.angle(va)), deg(va.angle(vb))); + } + + UnitCell changed_basis_backward(const Op& op, bool set_images) { + Mat33 mat = orth.mat.multiply(rot_as_mat33(op)); + UnitCell new_cell; + new_cell.set_from_vectors(mat.column_copy(0), + mat.column_copy(1), + mat.column_copy(2)); + if (set_images && !images.empty()) { + new_cell.images.reserve(images.size()); + Transform tr{rot_as_mat33(op), tran_as_vec3(op)}; + Transform tr_inv = tr.inverse(); + for (const FTransform& im : images) + new_cell.images.push_back(tr.combine(im).combine(tr_inv)); + } + return new_cell; + } + + UnitCell changed_basis_forward(const Op& op, bool set_images) { + return changed_basis_backward(op.inverse(), set_images); + } + + bool is_compatible_with_groupops(const GroupOps& gops, double eps=1e-3) const { + std::array metric = metric_tensor().elements_voigt(); + for (const Op& op : gops.sym_ops) { + Mat33 m = orth.mat.multiply(rot_as_mat33(op)); + std::array other = {{ + m.column_dot(0,0), m.column_dot(1,1), m.column_dot(2,2), + m.column_dot(1,2), m.column_dot(0,2), m.column_dot(0,1) + }}; + for (int i = 0; i < 6; ++i) + if (std::fabs(metric[i] - other[i]) > eps) + return false; + } + return true; + } + + bool is_compatible_with_spacegroup(const SpaceGroup* sg, double eps=1e-3) const { + return sg ? is_compatible_with_groupops(sg->operations(), eps) : false; + } + + void set_cell_images_from_groupops(const GroupOps& group_ops) { + images.clear(); + cs_count = (short) group_ops.order() - 1; + images.reserve(cs_count); + for (Op op : group_ops) + if (op != Op::identity()) + images.push_back(Transform{rot_as_mat33(op), tran_as_vec3(op)}); + } + + void set_cell_images_from_spacegroup(const SpaceGroup* sg) { + if (sg) { + set_cell_images_from_groupops(sg->operations()); + } else { + images.clear(); + cs_count = 0; + } + } + + void add_ncs_images_to_cs_images(const std::vector& ncs) { + assert(cs_count == (short) images.size()); + for (const NcsOp& ncs_op : ncs) + if (!ncs_op.given) { + // We need it to operates on fractional, not orthogonal coordinates. + FTransform f = frac.combine(ncs_op.tr.combine(orth)); + images.push_back(f); + for (int i = 0; i < cs_count; ++i) + images.push_back(images[i].combine(f)); + } + } + + std::vector get_ncs_transforms() const { + std::vector ncs; + for (size_t n = cs_count; n < images.size(); n += cs_count + 1) + ncs.push_back(images[n]); + return ncs; + } + + Position orthogonalize(const Fractional& f) const { + return Position(orth.apply(f)); + } + Fractional fractionalize(const Position& o) const { + return Fractional(frac.apply(o)); + } + + /// orthogonalize_difference(a-b) == orthogonalize(a) - orthogonalize(b) + // The shift (fract.vec) can be non-zero in non-standard settings, + // just do not apply it here. + Position orthogonalize_difference(const Fractional& delta) const { + return Position(orth.mat.multiply(delta)); + } + /// the inverse of orthogonalize_difference + Fractional fractionalize_difference(const Position& delta) const { + return Fractional(frac.mat.multiply(delta)); + } + + /// Returns box containing fractional box (a cuboid in fractional + /// coordinates can be a parallelepiped in Cartesian coordinates). + Box orthogonalize_box(const Box& f) const { + Box r; + r.minimum = orthogonalize(f.minimum); + r.maximum = orthogonalize(f.maximum); + if (alpha != 90. || beta == 90. || gamma == 90.) { + r.extend(orthogonalize({f.minimum.x, f.minimum.y, f.maximum.z})); + r.extend(orthogonalize({f.minimum.x, f.maximum.y, f.maximum.z})); + r.extend(orthogonalize({f.minimum.x, f.maximum.y, f.minimum.z})); + r.extend(orthogonalize({f.maximum.x, f.maximum.y, f.minimum.z})); + r.extend(orthogonalize({f.maximum.x, f.minimum.y, f.minimum.z})); + r.extend(orthogonalize({f.maximum.x, f.minimum.y, f.maximum.z})); + } + return r; + } + + Transform orthogonalize_transform(const FTransform& ftr) const { + return orth.combine(ftr.combine(frac)); + } + Transform op_as_transform(const Op& op) const { + return orthogonalize_transform(Transform{rot_as_mat33(op), tran_as_vec3(op)}); + } + + double distance_sq(const Fractional& pos1, const Fractional& pos2) const { + Fractional diff = (pos1 - pos2).wrap_to_zero(); + return orthogonalize_difference(diff).length_sq(); + } + double distance_sq(const Position& pos1, const Position& pos2) const { + return distance_sq(fractionalize(pos1), fractionalize(pos2)); + } + + double volume_per_image() const { + return is_crystal() ? volume / (1 + images.size()) : NAN; + } + + // Helper function. PBC = periodic boundary conditions. + bool search_pbc_images(Fractional&& diff, NearestImage& image) const { + int neg_shift[3] = {0, 0, 0}; + if (is_crystal()) { + for (int j = 0; j < 3; ++j) + neg_shift[j] = iround(diff.at(j)); + diff.x -= neg_shift[0]; + diff.y -= neg_shift[1]; + diff.z -= neg_shift[2]; + } + Position orth_diff = orthogonalize_difference(diff); + double dsq = orth_diff.length_sq(); + if (dsq < image.dist_sq) { + image.dist_sq = dsq; + for (int j = 0; j < 3; ++j) + image.pbc_shift[j] = -neg_shift[j]; + return true; + } + return false; + } + + NearestImage find_nearest_image(const Position& ref, const Position& pos, Asu asu) const { + NearestImage image; + if (asu == Asu::Different) + image.dist_sq = INFINITY; + else + image.dist_sq = ref.dist_sq(pos); + if (asu == Asu::Same) + return image; + Fractional fpos = fractionalize(pos); + Fractional fref = fractionalize(ref); + search_pbc_images(fpos - fref, image); + if (asu == Asu::Different && + image.pbc_shift[0] == 0 && image.pbc_shift[1] == 0 && image.pbc_shift[2] == 0) + image.dist_sq = INFINITY; + for (int n = 0; n != static_cast(images.size()); ++n) + if (search_pbc_images(images[n].apply(fpos) - fref, image)) + image.sym_idx = n + 1; + return image; + } + + void apply_transform(Fractional& fpos, int image_idx, bool inverse) const { + if (image_idx > 0) { + const FTransform& t = images.at(image_idx - 1); + if (!inverse) + fpos = t.apply(fpos); + else + fpos = FTransform(t.inverse()).apply(fpos); + } + } + + NearestImage find_nearest_pbc_image(const Fractional& fref, Fractional fpos, + int image_idx=0) const { + NearestImage sym_image; + sym_image.dist_sq = INFINITY; + sym_image.sym_idx = image_idx; + apply_transform(fpos, image_idx, false); + search_pbc_images(fpos - fref, sym_image); + return sym_image; + } + NearestImage find_nearest_pbc_image(const Position& ref, const Position& pos, + int image_idx=0) const { + return find_nearest_pbc_image(fractionalize(ref), fractionalize(pos), image_idx); + } + + std::vector find_nearest_pbc_images(const Fractional& fref, double dist, + const Fractional& fpos, int image_idx) const { + std::vector results; + NearestImage im = find_nearest_pbc_image(fref, fpos, image_idx); + int sh[3] = {im.pbc_shift[0], im.pbc_shift[1], im.pbc_shift[2]}; + for (im.pbc_shift[0] = sh[0]-1; im.pbc_shift[0] <= sh[0]+1; ++im.pbc_shift[0]) + for (im.pbc_shift[1] = sh[1]-1; im.pbc_shift[1] <= sh[1]+1; ++im.pbc_shift[1]) + for (im.pbc_shift[2] = sh[2]-1; im.pbc_shift[2] <= sh[2]+1; ++im.pbc_shift[2]) { + Fractional shift(im.pbc_shift[0], im.pbc_shift[1], im.pbc_shift[2]); + im.dist_sq = orthogonalize_difference(fpos - fref + shift).length_sq(); + if (im.dist_sq <= sq(dist)) + results.push_back(im); + } + return results; + } + + Position orthogonalize_in_pbc(const Position& ref, + const Fractional& fpos) const { + Fractional fref = fractionalize(ref); + return orthogonalize_difference((fpos - fref).wrap_to_zero()) + ref; + } + + Position find_nearest_pbc_position(const Position& ref, const Position& pos, + int image_idx, bool inverse=false) const { + Fractional fpos = fractionalize(pos); + apply_transform(fpos, image_idx, inverse); + return orthogonalize_in_pbc(ref, fpos); + } + + // apply NearestImage symmetry to fpos + Fractional fract_image(const NearestImage& im, Fractional fpos) { + apply_transform(fpos, im.sym_idx, false); + return fpos + Fractional(im.pbc_shift[0], im.pbc_shift[1], im.pbc_shift[2]); + } + + /// Counts nearby symmetry mates (0 = none, 3 = 4-fold axis, etc). + /// \pre is_crystal() + int is_special_position(const Fractional& fpos, double max_dist) const { + const double max_dist_sq = max_dist * max_dist; + int n = 0; + for (const FTransform& image : images) { + Fractional fdiff = (image.apply(fpos) - fpos).wrap_to_zero(); + if (orthogonalize_difference(fdiff).length_sq() < max_dist_sq) + ++n; + } + return n; + } + int is_special_position(const Position& pos, double max_dist = 0.8) const { + return is_special_position(fractionalize(pos), max_dist); + } + + /// Calculate 1/d^2 for specified hkl reflection. + /// 1/d^2 = (2*sin(theta)/lambda)^2 + // The indices are integers, but they may be stored as floating-point + // numbers (MTZ format) so we use double to avoid conversions. + double calculate_1_d2_double(double h, double k, double l) const { + double arh = ar * h; + double brk = br * k; + double crl = cr * l; + return arh * arh + brk * brk + crl * crl + 2 * (arh * brk * cos_gammar + + arh * crl * cos_betar + + brk * crl * cos_alphar); + } + double calculate_1_d2(const Miller& hkl) const { + return calculate_1_d2_double(hkl[0], hkl[1], hkl[2]); + } + + /// Calculate d-spacing. + /// d = lambda/(2*sin(theta)) + double calculate_d(const Miller& hkl) const { + return 1.0 / std::sqrt(calculate_1_d2(hkl)); + } + + /// Calculate (sin(theta)/lambda)^2 = d*^2/4 + double calculate_stol_sq(const Miller& hkl) const { + return 0.25 * calculate_1_d2(hkl); + } + + /// https://dictionary.iucr.org/Metric_tensor + SMat33 metric_tensor() const { + // the order in SMat33 is ... m12 m13 m23 -> a.a b.b c.c a.b a.c b.c + return {a*a, b*b, c*c, a*orth.mat[0][1], a*orth.mat[0][2], b*c*cos_alpha()}; + } + + SMat33 reciprocal_metric_tensor() const { + return {ar*ar, br*br, cr*cr, ar*br*cos_gammar, ar*cr*cos_betar, br*cr*cos_alphar}; + } + + /// Returns reciprocal unit cell. + UnitCell reciprocal() const { + auto acosd = [](double x) { return deg(std::acos(x)); }; + return UnitCell(ar, br, cr, + acosd(cos_alphar), acosd(cos_betar), acosd(cos_gammar)); + } + + Miller get_hkl_limits(double dmin) const { + return {{int(a / dmin), int(b / dmin), int(c / dmin)}}; + } + + Mat33 primitive_orth_matrix(char centring_type) const { + if (centring_type == 'P') + return orth.mat; + Mat33 c2p = rot_as_mat33(centred_to_primitive(centring_type)); + return orth.mat.multiply(c2p); + } +}; + +} // namespace gemmi +#endif diff --git a/symmetry/symmetry.cpp b/symmetry/symmetry.cpp new file mode 100644 index 00000000..05b5d1c0 --- /dev/null +++ b/symmetry/symmetry.cpp @@ -0,0 +1,1215 @@ +// Copyright Global Phasing Ltd. + +#include +#include // for fabs +#include // for memchr, strchr + +static const char* skip_space(const char* p) { + if (p) + while (*p == ' ' || *p == '\t' || *p == '_') // '_' can be used as space + ++p; + return p; +} + +namespace gemmi { + +// TRIPLET -> OP + +// param only can be set to 'h', 'x', 'a' or ' ' (any), to limit accepted characters. +// decimal_fract is useful only for non-crystallographic ops (such as x+0.12) +std::array parse_triplet_part(const std::string& s, char& notation, double* decimal_fract) { + constexpr char a_ = 'a' & ~3; + constexpr char h_ = 'h' & ~3; + constexpr char x_ = 'x' & ~3; + static const signed char letter2index[] = + // a b c d e f g h i j k l + { a_+0, a_+1, a_+2, 0, 0, 0, 0, h_+0, 0, 0, h_+1, h_+2, + // m n o p q r s t u v w x y z + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, x_+0, x_+1, x_+2 }; + auto interpret_letter = [&](char c) { + size_t idx = size_t((c | 0x20) - 'a'); // "|0x20" = to lower + if (idx >= sizeof(letter2index) || letter2index[idx] == 0) + fail("unexpected character '", c, "' in: ", s); + auto value = letter2index[idx]; + int detected_notation = value & ~3; + if ((notation | 0x20) == ' ') + notation = detected_notation; + else if (((notation | 0x20) & ~3) != detected_notation) + fail("Unexpected notation (letter set) in: ", s); + return value & 3; + }; + + std::array r = { 0, 0, 0, 0 }; + int num = Op::DEN; + const char* c = s.c_str(); + while (*(c = skip_space(c))) { + if (*c == '+' || *c == '-') { + num = (*c == '+' ? Op::DEN : -Op::DEN); + c = skip_space(++c); + } + if (num == 0) + fail("wrong or unsupported triplet format: " + s); + int r_idx; + int den = 1; + double fract = 0; + if ((*c >= '0' && *c <= '9') || *c == '.') { + // syntax examples in this branch: "1", "-1/2", "+2*x", "1/2 * b" + char* endptr; + int n = std::strtol(c, &endptr, 10); + // some COD CIFs have decimal fractions ("-x+0.25", ".5+Y", "1.25000-y") + if (*endptr == '.') { + // avoiding strtod() etc which is locale-dependent + fract = n; + for (double denom = 0.1; *++endptr >= '0' && *endptr <= '9'; denom *= 0.1) + fract += int(*endptr - '0') * denom; + double rounded = std::round(fract * num); + if (!decimal_fract) { + if (std::fabs(rounded - fract * num) > 0.05) + fail("unexpected number in a symmetry triplet part: " + s); + num = int(rounded); + } + } else { + num *= n; + } + if (*endptr == '/') + den = std::strtol(endptr + 1, &endptr, 10); + if (*endptr == '*') { + c = skip_space(endptr + 1); + r_idx = interpret_letter(*c); + ++c; + } else { + c = endptr; + r_idx = 3; + } + } else { + // syntax examples in this branch: "x", "+a", "-k/3" + r_idx = interpret_letter(*c); + c = skip_space(++c); + if (*c == '/') { + char* endptr; + den = std::strtol(c + 1, &endptr, 10); + c = endptr; + } + } + if (den != 1) { + if (den <= 0 || Op::DEN % den != 0 || fract != 0) + fail("Wrong denominator " + std::to_string(den) + " in: " + s); + num /= den; + } + r[r_idx] += num; + if (decimal_fract) + decimal_fract[r_idx] = num > 0 ? fract : -fract; + num = 0; + } + if (num != 0) + fail("trailing sign in: " + s); + return r; +} + +Op parse_triplet(const std::string& s, char notation) { + if (std::count(s.begin(), s.end(), ',') != 2) + fail("expected exactly two commas in triplet"); + size_t comma1 = s.find(','); + size_t comma2 = s.find(',', comma1 + 1); + char save_notation = notation; + notation = (notation | 0x20) & ~3; + if (notation != 'x' && notation != 'h' && notation != '`' && notation != ' ') // '`' == a' & ~3 + fail("parse_triplet(): unexpected notation='", save_notation, "'"); + auto a = parse_triplet_part(s.substr(0, comma1), notation); + auto b = parse_triplet_part(s.substr(comma1 + 1, comma2 - (comma1 + 1)), notation); + auto c = parse_triplet_part(s.substr(comma2 + 1), notation); + Op::Rot rot = {{{a[0], a[1], a[2]}, {b[0], b[1], b[2]}, {c[0], c[1], c[2]}}}; + Op::Tran tran = {a[3], b[3], c[3]}; + if (notation == 'h') { + if (tran != Op::Tran{0, 0, 0}) + fail("parse_triplet(): reciprocal-space Op cannot have translation: ", s); + rot = Op::transpose(rot); + } + return { rot, tran, notation }; +} + + +// OP -> TRIPLET + +namespace { + +// much faster than s += std::to_string(n) for n in 0 ... 99 +void append_small_number(std::string& s, int n) { + if (n < 0 || n >= 100) { + s += std::to_string(n); + } else if (n < 10) { + s += char('0' + n); + } else { // 10 ... 99 + int tens = n / 10; + s += char('0' + tens); + s += char('0' + n - 10 * tens); + } +} + +void append_sign_of(std::string& s, int n) { + if (n < 0) + s += '-'; + else if (!s.empty()) + s += '+'; +} + +// append w/DEN fraction reduced to the lowest terms +std::pair get_op_fraction(int w) { + // Op::DEN == 24 == 2 * 2 * 2 * 3 + int denom = 1; + for (int i = 0; i != 3; ++i) + if (w % 2 == 0) // 2, 2, 2 + w /= 2; + else + denom *= 2; + if (w % 3 == 0) // 3 + w /= 3; + else + denom *= 3; + return {w, denom}; +} + +void append_fraction(std::string& s, std::pair frac) { + append_small_number(s, frac.first); + if (frac.second != 1) { + s += '/'; + append_small_number(s, frac.second); + } +} + +std::string make_triplet_part(const std::array& xyz, int w, char style) { + std::string s; + const char* letters = "xyz hkl abc XYZ HKL ABC"; + switch((style | 0x20) & ~3) { // |0x20 converts to lower case + case 'h': letters += 4; break; + case '`': letters += 8; break; // 'a', because 'a'&~3 == 0x60 == '`' + } + if (!(style & 0x20)) // not lower + letters += 12; + for (int i = 0; i != 3; ++i) + if (xyz[i] != 0) { + append_sign_of(s, xyz[i]); + int a = std::abs(xyz[i]); + if (a != Op::DEN) { + std::pair frac = get_op_fraction(a); + if (frac.first == 1) { // e.g. "x/3" + s += letters[i]; + s += '/'; + append_small_number(s, frac.second); + } else { // e.g. "2/3*x" + append_fraction(s, frac); + s += '*'; + s += letters[i]; + } + } else { + s += letters[i]; + } + } + if (w != 0) { + append_sign_of(s, w); + std::pair frac = get_op_fraction(std::abs(w)); + append_fraction(s, frac); + } + return s; +} + +} // anonymous namespace + +Op seitz_to_op(const std::array, 4>& t) { + static_assert(Op::DEN == 24, ""); + auto check_round = [](double d) { + double r = std::round(d * Op::DEN); + if (std::fabs(r - d * Op::DEN) > 0.05) + fail("all numbers in Seitz matrix must be equal Z/24"); + return static_cast(r); + }; + Op op; + if (std::fabs(t[3][0]) + std::fabs(t[3][1]) + std::fabs(t[3][2]) + + std::fabs(t[3][3] - 1) > 1e-3) + fail("the last row in Seitz matrix must be [0 0 0 1]"); + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 3; ++j) + op.rot[i][j] = check_round(t[i][j]); + op.tran[i] = check_round(t[i][3]); + } + op.notation = 'x'; + return op; +} + +void append_op_fraction(std::string& s, int w) { + append_fraction(s, get_op_fraction(w)); +} + +std::string Op::triplet(char style) const { + if (style == ' ') + style = (notation & ~0x20) ? notation : 'x'; + char lower_style = (style | 0x20) & ~3; + if (lower_style == 'h' && !is_hkl()) + fail("triplet(): can't write real-space triplet as hkl"); + if (lower_style != 'h' && is_hkl()) + fail("triplet(): can't write reciprocal-space triplet as xyz"); + // 'x'==0x78, 'h'==0x68, 'a'==0x61, so 'a'&~3 == 0x60 == '`' + if (lower_style != 'x' && lower_style != 'h' && lower_style != '`') + fail("unexpected triplet style: '", style, "'"); + // parse_triplet() transposes hkl ops such as l,h,k + auto r = !is_hkl()? rot : transposed_rot(); + return make_triplet_part(r[0], tran[0], style) + + "," + make_triplet_part(r[1], tran[1], style) + + "," + make_triplet_part(r[2], tran[2], style); +} + + +// INTERPRETING HALL SYMBOLS +// based on both ITfC vol.B ch.1.4 (2010) +// and http://cci.lbl.gov/sginfo/hall_symbols.html + +// matrices for Nz from Table 3 and 4 from hall_symbols.html +namespace { +Op::Rot hall_rotation_z(int N) { + constexpr int d = Op::DEN; + switch (N) { + case 1: return {{{d,0,0}, {0,d,0}, {0,0,d}}}; + case 2: return {{{-d,0,0}, {0,-d,0}, {0,0,d}}}; + case 3: return {{{0,-d,0}, {d,-d,0}, {0,0,d}}}; + case 4: return {{{0,-d,0}, {d,0,0}, {0,0,d}}}; + case 6: return {{{d,-d,0}, {d,0,0}, {0,0,d}}}; + case '\'': return {{{0,-d,0},{-d,0,0}, {0,0,-d}}}; + case '"': return {{{0,d,0}, { d,0,0}, {0,0,-d}}}; + case '*': return {{{0,0,d}, { d,0,0}, {0,d,0}}}; + default: fail("incorrect axis definition"); + } +} +Op::Tran hall_translation_from_symbol(char symbol) { + constexpr int h = Op::DEN / 2; + constexpr int q = Op::DEN / 4; + switch (symbol) { + case 'a': return {h, 0, 0}; + case 'b': return {0, h, 0}; + case 'c': return {0, 0, h}; + case 'n': return {h, h, h}; + case 'u': return {q, 0, 0}; + case 'v': return {0, q, 0}; + case 'w': return {0, 0, q}; + case 'd': return {q, q, q}; + default: fail(std::string("unknown symbol: ") + symbol); + } +} + +Op hall_matrix_symbol(const char* start, const char* end, int pos, int& prev) { + Op op = Op::identity(); + bool neg = (*start == '-'); + const char* p = (neg ? start + 1 : start); + if (*p < '1' || *p == '5' || *p > '6') + fail("wrong n-fold order notation: " + std::string(start, end)); + int N = *p++ - '0'; + int fractional_tran = 0; + char principal_axis = '\0'; + char diagonal_axis = '\0'; + for (; p < end; ++p) { + if (*p >= '1' && *p <= '5') { + if (fractional_tran != '\0') + fail("two numeric subscripts"); + fractional_tran = *p - '0'; + } else if (*p == '\'' || *p == '"' || *p == '*') { + if (N != (*p == '*' ? 3 : 2)) + fail("wrong symbol: " + std::string(start, end)); + diagonal_axis = *p; + } else if (*p == 'x' || *p == 'y' || *p == 'z') { + principal_axis = *p; + } else { + op.translate(hall_translation_from_symbol(*p)); + } + } + // fill in implicit values + if (!principal_axis && !diagonal_axis) { + if (pos == 1) { + principal_axis = 'z'; + } else if (pos == 2 && N == 2) { + if (prev == 2 || prev == 4) + principal_axis = 'x'; + else if (prev == 3 || prev == 6) + diagonal_axis = '\''; + } else if (pos == 3 && N == 3) { + diagonal_axis = '*'; + } else if (N != 1) { + fail("missing axis"); + } + } + // get the operation + op.rot = hall_rotation_z(diagonal_axis ? diagonal_axis : N); + if (neg) + op.rot = op.negated_rot(); + auto alter_order = [](const Op::Rot& r, int i, int j, int k) { + return Op::Rot{{ {r[i][i], r[i][j], r[i][k]}, + {r[j][i], r[j][j], r[j][k]}, + {r[k][i], r[k][j], r[k][k]} }}; + }; + if (principal_axis == 'x') + op.rot = alter_order(op.rot, 2, 0, 1); + else if (principal_axis == 'y') + op.rot = alter_order(op.rot, 1, 2, 0); + if (fractional_tran) + op.tran[principal_axis - 'x'] += Op::DEN / N * fractional_tran; + prev = N; + return op; +} + +// Parses either short (0 0 1) or long notation (x,y,z+1/12) +// but without multipliers (such as 1/2x) to keep things simple for now. +Op parse_hall_change_of_basis(const char* start, const char* end) { + if (std::memchr(start, ',', end - start) != nullptr) // long symbol + return parse_triplet(std::string(start, end)); + // short symbol (0 0 1) + Op cob = Op::identity(); + char* endptr; + for (int i = 0; i != 3; ++i) { + cob.tran[i] = std::strtol(start, &endptr, 10) % 12 * (Op::DEN / 12); + start = endptr; + } + if (endptr != end) + fail("unexpected change-of-basis format: " + std::string(start, end)); + return cob; +} +} // anonymous namespace + +GroupOps generators_from_hall(const char* hall) { + auto find_blank = [](const char* p) { + while (*p != '\0' && *p != ' ' && *p != '\t' && *p != '_') // '_' == ' ' + ++p; + return p; + }; + if (hall == nullptr) + fail("null"); + hall = skip_space(hall); + GroupOps ops; + ops.sym_ops.emplace_back(Op::identity()); + bool centrosym = (hall[0] == '-'); + const char* lat = skip_space(centrosym ? hall + 1 : hall); + if (!lat) + fail("not a hall symbol: " + std::string(hall)); + ops.cen_ops = centring_vectors(*lat); + int counter = 0; + int prev = 0; + const char* part = skip_space(lat + 1); + while (*part != '\0' && *part != '(') { + const char* space = find_blank(part); + ++counter; + if (part[0] != '1' || (part[1] != ' ' && part[1] != '\0')) { + Op op = hall_matrix_symbol(part, space, counter, prev); + ops.sym_ops.emplace_back(op); + } + part = skip_space(space); + } + if (centrosym) + ops.sym_ops.push_back({Op::identity().negated_rot(), {0,0,0}, 'x'}); + if (*part == '(') { + const char* rb = std::strchr(part, ')'); + if (!rb) + fail("missing ')': " + std::string(hall)); + if (ops.sym_ops.empty()) + fail("misplaced translation: " + std::string(hall)); + ops.change_basis_forward(parse_hall_change_of_basis(part + 1, rb)); + + if (*skip_space(find_blank(rb + 1)) != '\0') + fail("unexpected characters after ')': " + std::string(hall)); + } + return ops; +} + + +const SpaceGroup spacegroup_tables::main[564] = { + // This table was generated by tools/gen_sg_table.py. + // First 530 entries in the same order as in SgInfo, sgtbx and ITB. + // Note: spacegroup 68 has three duplicates with different H-M names. + { 1, 1, "P 1" , 0, "", "P 1" , 0 }, // 0 + { 2, 2, "P -1" , 0, "", "-P 1" , 0 }, // 1 + { 3, 3, "P 1 2 1" , 0, "b", "P 2y" , 0 }, // 2 + { 3, 1003, "P 1 1 2" , 0, "c", "P 2" , 1 }, // 3 + { 3, 0, "P 2 1 1" , 0, "a", "P 2x" , 2 }, // 4 + { 4, 4, "P 1 21 1" , 0, "b", "P 2yb" , 0 }, // 5 + { 4, 1004, "P 1 1 21" , 0, "c", "P 2c" , 1 }, // 6 + { 4, 0, "P 21 1 1" , 0, "a", "P 2xa" , 2 }, // 7 + { 5, 5, "C 1 2 1" , 0, "b1", "C 2y" , 0 }, // 8 + { 5, 2005, "A 1 2 1" , 0, "b2", "A 2y" , 3 }, // 9 + { 5, 4005, "I 1 2 1" , 0, "b3", "I 2y" , 4 }, // 10 + { 5, 0, "A 1 1 2" , 0, "c1", "A 2" , 1 }, // 11 + { 5, 1005, "B 1 1 2" , 0, "c2", "B 2" , 5 }, // 12 + { 5, 0, "I 1 1 2" , 0, "c3", "I 2" , 6 }, // 13 + { 5, 0, "B 2 1 1" , 0, "a1", "B 2x" , 2 }, // 14 + { 5, 0, "C 2 1 1" , 0, "a2", "C 2x" , 7 }, // 15 + { 5, 0, "I 2 1 1" , 0, "a3", "I 2x" , 8 }, // 16 + { 6, 6, "P 1 m 1" , 0, "b", "P -2y" , 0 }, // 17 + { 6, 1006, "P 1 1 m" , 0, "c", "P -2" , 1 }, // 18 + { 6, 0, "P m 1 1" , 0, "a", "P -2x" , 2 }, // 19 + { 7, 7, "P 1 c 1" , 0, "b1", "P -2yc" , 0 }, // 20 + { 7, 0, "P 1 n 1" , 0, "b2", "P -2yac" , 9 }, // 21 + { 7, 0, "P 1 a 1" , 0, "b3", "P -2ya" , 3 }, // 22 + { 7, 0, "P 1 1 a" , 0, "c1", "P -2a" , 1 }, // 23 + { 7, 0, "P 1 1 n" , 0, "c2", "P -2ab" , 10}, // 24 + { 7, 1007, "P 1 1 b" , 0, "c3", "P -2b" , 5 }, // 25 + { 7, 0, "P b 1 1" , 0, "a1", "P -2xb" , 2 }, // 26 + { 7, 0, "P n 1 1" , 0, "a2", "P -2xbc" , 11}, // 27 + { 7, 0, "P c 1 1" , 0, "a3", "P -2xc" , 7 }, // 28 + { 8, 8, "C 1 m 1" , 0, "b1", "C -2y" , 0 }, // 29 + { 8, 0, "A 1 m 1" , 0, "b2", "A -2y" , 3 }, // 30 + { 8, 0, "I 1 m 1" , 0, "b3", "I -2y" , 4 }, // 31 + { 8, 0, "A 1 1 m" , 0, "c1", "A -2" , 1 }, // 32 + { 8, 1008, "B 1 1 m" , 0, "c2", "B -2" , 5 }, // 33 + { 8, 0, "I 1 1 m" , 0, "c3", "I -2" , 6 }, // 34 + { 8, 0, "B m 1 1" , 0, "a1", "B -2x" , 2 }, // 35 + { 8, 0, "C m 1 1" , 0, "a2", "C -2x" , 7 }, // 36 + { 8, 0, "I m 1 1" , 0, "a3", "I -2x" , 8 }, // 37 + { 9, 9, "C 1 c 1" , 0, "b1", "C -2yc" , 0 }, // 38 + { 9, 0, "A 1 n 1" , 0, "b2", "A -2yab" , 12}, // 39 + { 9, 0, "I 1 a 1" , 0, "b3", "I -2ya" , 13}, // 40 + { 9, 0, "A 1 a 1" , 0, "-b1", "A -2ya" , 3 }, // 41 + { 9, 0, "C 1 n 1" , 0, "-b2", "C -2yac" , 14}, // 42 + { 9, 0, "I 1 c 1" , 0, "-b3", "I -2yc" , 4 }, // 43 + { 9, 0, "A 1 1 a" , 0, "c1", "A -2a" , 1 }, // 44 + { 9, 0, "B 1 1 n" , 0, "c2", "B -2ab" , 15}, // 45 + { 9, 0, "I 1 1 b" , 0, "c3", "I -2b" , 16}, // 46 + { 9, 1009, "B 1 1 b" , 0, "-c1", "B -2b" , 5 }, // 47 + { 9, 0, "A 1 1 n" , 0, "-c2", "A -2ab" , 10}, // 48 + { 9, 0, "I 1 1 a" , 0, "-c3", "I -2a" , 6 }, // 49 + { 9, 0, "B b 1 1" , 0, "a1", "B -2xb" , 2 }, // 50 + { 9, 0, "C n 1 1" , 0, "a2", "C -2xac" , 17}, // 51 + { 9, 0, "I c 1 1" , 0, "a3", "I -2xc" , 18}, // 52 + { 9, 0, "C c 1 1" , 0, "-a1", "C -2xc" , 7 }, // 53 + { 9, 0, "B n 1 1" , 0, "-a2", "B -2xab" , 11}, // 54 + { 9, 0, "I b 1 1" , 0, "-a3", "I -2xb" , 8 }, // 55 + { 10, 10, "P 1 2/m 1" , 0, "b", "-P 2y" , 0 }, // 56 + { 10, 1010, "P 1 1 2/m" , 0, "c", "-P 2" , 1 }, // 57 + { 10, 0, "P 2/m 1 1" , 0, "a", "-P 2x" , 2 }, // 58 + { 11, 11, "P 1 21/m 1", 0, "b", "-P 2yb" , 0 }, // 59 + { 11, 1011, "P 1 1 21/m", 0, "c", "-P 2c" , 1 }, // 60 + { 11, 0, "P 21/m 1 1", 0, "a", "-P 2xa" , 2 }, // 61 + { 12, 12, "C 1 2/m 1" , 0, "b1", "-C 2y" , 0 }, // 62 + { 12, 0, "A 1 2/m 1" , 0, "b2", "-A 2y" , 3 }, // 63 + { 12, 0, "I 1 2/m 1" , 0, "b3", "-I 2y" , 4 }, // 64 + { 12, 0, "A 1 1 2/m" , 0, "c1", "-A 2" , 1 }, // 65 + { 12, 1012, "B 1 1 2/m" , 0, "c2", "-B 2" , 5 }, // 66 + { 12, 0, "I 1 1 2/m" , 0, "c3", "-I 2" , 6 }, // 67 + { 12, 0, "B 2/m 1 1" , 0, "a1", "-B 2x" , 2 }, // 68 + { 12, 0, "C 2/m 1 1" , 0, "a2", "-C 2x" , 7 }, // 69 + { 12, 0, "I 2/m 1 1" , 0, "a3", "-I 2x" , 8 }, // 70 + { 13, 13, "P 1 2/c 1" , 0, "b1", "-P 2yc" , 0 }, // 71 + { 13, 0, "P 1 2/n 1" , 0, "b2", "-P 2yac" , 9 }, // 72 + { 13, 0, "P 1 2/a 1" , 0, "b3", "-P 2ya" , 3 }, // 73 + { 13, 0, "P 1 1 2/a" , 0, "c1", "-P 2a" , 1 }, // 74 + { 13, 0, "P 1 1 2/n" , 0, "c2", "-P 2ab" , 10}, // 75 + { 13, 1013, "P 1 1 2/b" , 0, "c3", "-P 2b" , 5 }, // 76 + { 13, 0, "P 2/b 1 1" , 0, "a1", "-P 2xb" , 2 }, // 77 + { 13, 0, "P 2/n 1 1" , 0, "a2", "-P 2xbc" , 11}, // 78 + { 13, 0, "P 2/c 1 1" , 0, "a3", "-P 2xc" , 7 }, // 79 + { 14, 14, "P 1 21/c 1", 0, "b1", "-P 2ybc" , 0 }, // 80 + { 14, 2014, "P 1 21/n 1", 0, "b2", "-P 2yn" , 9 }, // 81 + { 14, 3014, "P 1 21/a 1", 0, "b3", "-P 2yab" , 3 }, // 82 + { 14, 0, "P 1 1 21/a", 0, "c1", "-P 2ac" , 1 }, // 83 + { 14, 0, "P 1 1 21/n", 0, "c2", "-P 2n" , 10}, // 84 + { 14, 1014, "P 1 1 21/b", 0, "c3", "-P 2bc" , 5 }, // 85 + { 14, 0, "P 21/b 1 1", 0, "a1", "-P 2xab" , 2 }, // 86 + { 14, 0, "P 21/n 1 1", 0, "a2", "-P 2xn" , 11}, // 87 + { 14, 0, "P 21/c 1 1", 0, "a3", "-P 2xac" , 7 }, // 88 + { 15, 15, "C 1 2/c 1" , 0, "b1", "-C 2yc" , 0 }, // 89 + { 15, 0, "A 1 2/n 1" , 0, "b2", "-A 2yab" , 12}, // 90 + { 15, 0, "I 1 2/a 1" , 0, "b3", "-I 2ya" , 13}, // 91 + { 15, 0, "A 1 2/a 1" , 0, "-b1", "-A 2ya" , 3 }, // 92 + { 15, 0, "C 1 2/n 1" , 0, "-b2", "-C 2yac" , 19}, // 93 + { 15, 0, "I 1 2/c 1" , 0, "-b3", "-I 2yc" , 4 }, // 94 + { 15, 0, "A 1 1 2/a" , 0, "c1", "-A 2a" , 1 }, // 95 + { 15, 0, "B 1 1 2/n" , 0, "c2", "-B 2ab" , 15}, // 96 + { 15, 0, "I 1 1 2/b" , 0, "c3", "-I 2b" , 16}, // 97 + { 15, 1015, "B 1 1 2/b" , 0, "-c1", "-B 2b" , 5 }, // 98 + { 15, 0, "A 1 1 2/n" , 0, "-c2", "-A 2ab" , 10}, // 99 + { 15, 0, "I 1 1 2/a" , 0, "-c3", "-I 2a" , 6 }, // 100 + { 15, 0, "B 2/b 1 1" , 0, "a1", "-B 2xb" , 2 }, // 101 + { 15, 0, "C 2/n 1 1" , 0, "a2", "-C 2xac" , 17}, // 102 + { 15, 0, "I 2/c 1 1" , 0, "a3", "-I 2xc" , 18}, // 103 + { 15, 0, "C 2/c 1 1" , 0, "-a1", "-C 2xc" , 7 }, // 104 + { 15, 0, "B 2/n 1 1" , 0, "-a2", "-B 2xab" , 11}, // 105 + { 15, 0, "I 2/b 1 1" , 0, "-a3", "-I 2xb" , 8 }, // 106 + { 16, 16, "P 2 2 2" , 0, "", "P 2 2" , 0 }, // 107 + { 17, 17, "P 2 2 21" , 0, "", "P 2c 2" , 0 }, // 108 + { 17, 1017, "P 21 2 2" , 0, "cab", "P 2a 2a" , 1 }, // 109 + { 17, 2017, "P 2 21 2" , 0, "bca", "P 2 2b" , 2 }, // 110 + { 18, 18, "P 21 21 2" , 0, "", "P 2 2ab" , 0 }, // 111 + { 18, 3018, "P 2 21 21" , 0, "cab", "P 2bc 2" , 1 }, // 112 + { 18, 2018, "P 21 2 21" , 0, "bca", "P 2ac 2ac" , 2 }, // 113 + { 19, 19, "P 21 21 21", 0, "", "P 2ac 2ab" , 0 }, // 114 + { 20, 20, "C 2 2 21" , 0, "", "C 2c 2" , 0 }, // 115 + { 20, 0, "A 21 2 2" , 0, "cab", "A 2a 2a" , 1 }, // 116 + { 20, 0, "B 2 21 2" , 0, "bca", "B 2 2b" , 2 }, // 117 + { 21, 21, "C 2 2 2" , 0, "", "C 2 2" , 0 }, // 118 + { 21, 0, "A 2 2 2" , 0, "cab", "A 2 2" , 1 }, // 119 + { 21, 0, "B 2 2 2" , 0, "bca", "B 2 2" , 2 }, // 120 + { 22, 22, "F 2 2 2" , 0, "", "F 2 2" , 0 }, // 121 + { 23, 23, "I 2 2 2" , 0, "", "I 2 2" , 0 }, // 122 + { 24, 24, "I 21 21 21", 0, "", "I 2b 2c" , 0 }, // 123 + { 25, 25, "P m m 2" , 0, "", "P 2 -2" , 0 }, // 124 + { 25, 0, "P 2 m m" , 0, "cab", "P -2 2" , 1 }, // 125 + { 25, 0, "P m 2 m" , 0, "bca", "P -2 -2" , 2 }, // 126 + { 26, 26, "P m c 21" , 0, "", "P 2c -2" , 0 }, // 127 + { 26, 0, "P c m 21" , 0, "ba-c", "P 2c -2c" , 7 }, // 128 + { 26, 0, "P 21 m a" , 0, "cab", "P -2a 2a" , 1 }, // 129 + { 26, 0, "P 21 a m" , 0, "-cba", "P -2 2a" , 3 }, // 130 + { 26, 0, "P b 21 m" , 0, "bca", "P -2 -2b" , 2 }, // 131 + { 26, 0, "P m 21 b" , 0, "a-cb", "P -2b -2" , 5 }, // 132 + { 27, 27, "P c c 2" , 0, "", "P 2 -2c" , 0 }, // 133 + { 27, 0, "P 2 a a" , 0, "cab", "P -2a 2" , 1 }, // 134 + { 27, 0, "P b 2 b" , 0, "bca", "P -2b -2b" , 2 }, // 135 + { 28, 28, "P m a 2" , 0, "", "P 2 -2a" , 0 }, // 136 + { 28, 0, "P b m 2" , 0, "ba-c", "P 2 -2b" , 7 }, // 137 + { 28, 0, "P 2 m b" , 0, "cab", "P -2b 2" , 1 }, // 138 + { 28, 0, "P 2 c m" , 0, "-cba", "P -2c 2" , 3 }, // 139 + { 28, 0, "P c 2 m" , 0, "bca", "P -2c -2c" , 2 }, // 140 + { 28, 0, "P m 2 a" , 0, "a-cb", "P -2a -2a" , 5 }, // 141 + { 29, 29, "P c a 21" , 0, "", "P 2c -2ac" , 0 }, // 142 + { 29, 0, "P b c 21" , 0, "ba-c", "P 2c -2b" , 7 }, // 143 + { 29, 0, "P 21 a b" , 0, "cab", "P -2b 2a" , 1 }, // 144 + { 29, 0, "P 21 c a" , 0, "-cba", "P -2ac 2a" , 3 }, // 145 + { 29, 0, "P c 21 b" , 0, "bca", "P -2bc -2c" , 2 }, // 146 + { 29, 0, "P b 21 a" , 0, "a-cb", "P -2a -2ab" , 5 }, // 147 + { 30, 30, "P n c 2" , 0, "", "P 2 -2bc" , 0 }, // 148 + { 30, 0, "P c n 2" , 0, "ba-c", "P 2 -2ac" , 7 }, // 149 + { 30, 0, "P 2 n a" , 0, "cab", "P -2ac 2" , 1 }, // 150 + { 30, 0, "P 2 a n" , 0, "-cba", "P -2ab 2" , 3 }, // 151 + { 30, 0, "P b 2 n" , 0, "bca", "P -2ab -2ab" , 2 }, // 152 + { 30, 0, "P n 2 b" , 0, "a-cb", "P -2bc -2bc" , 5 }, // 153 + { 31, 31, "P m n 21" , 0, "", "P 2ac -2" , 0 }, // 154 + { 31, 0, "P n m 21" , 0, "ba-c", "P 2bc -2bc" , 7 }, // 155 + { 31, 0, "P 21 m n" , 0, "cab", "P -2ab 2ab" , 1 }, // 156 + { 31, 0, "P 21 n m" , 0, "-cba", "P -2 2ac" , 3 }, // 157 + { 31, 0, "P n 21 m" , 0, "bca", "P -2 -2bc" , 2 }, // 158 + { 31, 0, "P m 21 n" , 0, "a-cb", "P -2ab -2" , 5 }, // 159 + { 32, 32, "P b a 2" , 0, "", "P 2 -2ab" , 0 }, // 160 + { 32, 0, "P 2 c b" , 0, "cab", "P -2bc 2" , 1 }, // 161 + { 32, 0, "P c 2 a" , 0, "bca", "P -2ac -2ac" , 2 }, // 162 + { 33, 33, "P n a 21" , 0, "", "P 2c -2n" , 0 }, // 163 + { 33, 0, "P b n 21" , 0, "ba-c", "P 2c -2ab" , 7 }, // 164 + { 33, 0, "P 21 n b" , 0, "cab", "P -2bc 2a" , 1 }, // 165 + { 33, 0, "P 21 c n" , 0, "-cba", "P -2n 2a" , 3 }, // 166 + { 33, 0, "P c 21 n" , 0, "bca", "P -2n -2ac" , 2 }, // 167 + { 33, 0, "P n 21 a" , 0, "a-cb", "P -2ac -2n" , 5 }, // 168 + { 34, 34, "P n n 2" , 0, "", "P 2 -2n" , 0 }, // 169 + { 34, 0, "P 2 n n" , 0, "cab", "P -2n 2" , 1 }, // 170 + { 34, 0, "P n 2 n" , 0, "bca", "P -2n -2n" , 2 }, // 171 + { 35, 35, "C m m 2" , 0, "", "C 2 -2" , 0 }, // 172 + { 35, 0, "A 2 m m" , 0, "cab", "A -2 2" , 1 }, // 173 + { 35, 0, "B m 2 m" , 0, "bca", "B -2 -2" , 2 }, // 174 + { 36, 36, "C m c 21" , 0, "", "C 2c -2" , 0 }, // 175 + { 36, 0, "C c m 21" , 0, "ba-c", "C 2c -2c" , 7 }, // 176 + { 36, 0, "A 21 m a" , 0, "cab", "A -2a 2a" , 1 }, // 177 + { 36, 0, "A 21 a m" , 0, "-cba", "A -2 2a" , 3 }, // 178 + { 36, 0, "B b 21 m" , 0, "bca", "B -2 -2b" , 2 }, // 179 + { 36, 0, "B m 21 b" , 0, "a-cb", "B -2b -2" , 5 }, // 180 + { 37, 37, "C c c 2" , 0, "", "C 2 -2c" , 0 }, // 181 + { 37, 0, "A 2 a a" , 0, "cab", "A -2a 2" , 1 }, // 182 + { 37, 0, "B b 2 b" , 0, "bca", "B -2b -2b" , 2 }, // 183 + { 38, 38, "A m m 2" , 0, "", "A 2 -2" , 0 }, // 184 + { 38, 0, "B m m 2" , 0, "ba-c", "B 2 -2" , 7 }, // 185 + { 38, 0, "B 2 m m" , 0, "cab", "B -2 2" , 1 }, // 186 + { 38, 0, "C 2 m m" , 0, "-cba", "C -2 2" , 3 }, // 187 + { 38, 0, "C m 2 m" , 0, "bca", "C -2 -2" , 2 }, // 188 + { 38, 0, "A m 2 m" , 0, "a-cb", "A -2 -2" , 5 }, // 189 + { 39, 39, "A b m 2" , 0, "", "A 2 -2b" , 0 }, // 190 + { 39, 0, "B m a 2" , 0, "ba-c", "B 2 -2a" , 7 }, // 191 + { 39, 0, "B 2 c m" , 0, "cab", "B -2a 2" , 1 }, // 192 + { 39, 0, "C 2 m b" , 0, "-cba", "C -2a 2" , 3 }, // 193 + { 39, 0, "C m 2 a" , 0, "bca", "C -2a -2a" , 2 }, // 194 + { 39, 0, "A c 2 m" , 0, "a-cb", "A -2b -2b" , 5 }, // 195 + { 40, 40, "A m a 2" , 0, "", "A 2 -2a" , 0 }, // 196 + { 40, 0, "B b m 2" , 0, "ba-c", "B 2 -2b" , 7 }, // 197 + { 40, 0, "B 2 m b" , 0, "cab", "B -2b 2" , 1 }, // 198 + { 40, 0, "C 2 c m" , 0, "-cba", "C -2c 2" , 3 }, // 199 + { 40, 0, "C c 2 m" , 0, "bca", "C -2c -2c" , 2 }, // 200 + { 40, 0, "A m 2 a" , 0, "a-cb", "A -2a -2a" , 5 }, // 201 + { 41, 41, "A b a 2" , 0, "", "A 2 -2ab" , 0 }, // 202 + { 41, 0, "B b a 2" , 0, "ba-c", "B 2 -2ab" , 7 }, // 203 + { 41, 0, "B 2 c b" , 0, "cab", "B -2ab 2" , 1 }, // 204 + { 41, 0, "C 2 c b" , 0, "-cba", "C -2ac 2" , 3 }, // 205 + { 41, 0, "C c 2 a" , 0, "bca", "C -2ac -2ac" , 2 }, // 206 + { 41, 0, "A c 2 a" , 0, "a-cb", "A -2ab -2ab" , 5 }, // 207 + { 42, 42, "F m m 2" , 0, "", "F 2 -2" , 0 }, // 208 + { 42, 0, "F 2 m m" , 0, "cab", "F -2 2" , 1 }, // 209 + { 42, 0, "F m 2 m" , 0, "bca", "F -2 -2" , 2 }, // 210 + { 43, 43, "F d d 2" , 0, "", "F 2 -2d" , 0 }, // 211 + { 43, 0, "F 2 d d" , 0, "cab", "F -2d 2" , 1 }, // 212 + { 43, 0, "F d 2 d" , 0, "bca", "F -2d -2d" , 2 }, // 213 + { 44, 44, "I m m 2" , 0, "", "I 2 -2" , 0 }, // 214 + { 44, 0, "I 2 m m" , 0, "cab", "I -2 2" , 1 }, // 215 + { 44, 0, "I m 2 m" , 0, "bca", "I -2 -2" , 2 }, // 216 + { 45, 45, "I b a 2" , 0, "", "I 2 -2c" , 0 }, // 217 + { 45, 0, "I 2 c b" , 0, "cab", "I -2a 2" , 1 }, // 218 + { 45, 0, "I c 2 a" , 0, "bca", "I -2b -2b" , 2 }, // 219 + { 46, 46, "I m a 2" , 0, "", "I 2 -2a" , 0 }, // 220 + { 46, 0, "I b m 2" , 0, "ba-c", "I 2 -2b" , 7 }, // 221 + { 46, 0, "I 2 m b" , 0, "cab", "I -2b 2" , 1 }, // 222 + { 46, 0, "I 2 c m" , 0, "-cba", "I -2c 2" , 3 }, // 223 + { 46, 0, "I c 2 m" , 0, "bca", "I -2c -2c" , 2 }, // 224 + { 46, 0, "I m 2 a" , 0, "a-cb", "I -2a -2a" , 5 }, // 225 + { 47, 47, "P m m m" , 0, "", "-P 2 2" , 0 }, // 226 + { 48, 48, "P n n n" , '1', "", "P 2 2 -1n" , 20}, // 227 + { 48, 0, "P n n n" , '2', "", "-P 2ab 2bc" , 0 }, // 228 + { 49, 49, "P c c m" , 0, "", "-P 2 2c" , 0 }, // 229 + { 49, 0, "P m a a" , 0, "cab", "-P 2a 2" , 1 }, // 230 + { 49, 0, "P b m b" , 0, "bca", "-P 2b 2b" , 2 }, // 231 + { 50, 50, "P b a n" , '1', "", "P 2 2 -1ab" , 21}, // 232 + { 50, 0, "P b a n" , '2', "", "-P 2ab 2b" , 0 }, // 233 + { 50, 0, "P n c b" , '1', "cab", "P 2 2 -1bc" , 22}, // 234 + { 50, 0, "P n c b" , '2', "cab", "-P 2b 2bc" , 1 }, // 235 + { 50, 0, "P c n a" , '1', "bca", "P 2 2 -1ac" , 23}, // 236 + { 50, 0, "P c n a" , '2', "bca", "-P 2a 2c" , 2 }, // 237 + { 51, 51, "P m m a" , 0, "", "-P 2a 2a" , 0 }, // 238 + { 51, 0, "P m m b" , 0, "ba-c", "-P 2b 2" , 7 }, // 239 + { 51, 0, "P b m m" , 0, "cab", "-P 2 2b" , 1 }, // 240 + { 51, 0, "P c m m" , 0, "-cba", "-P 2c 2c" , 3 }, // 241 + { 51, 0, "P m c m" , 0, "bca", "-P 2c 2" , 2 }, // 242 + { 51, 0, "P m a m" , 0, "a-cb", "-P 2 2a" , 5 }, // 243 + { 52, 52, "P n n a" , 0, "", "-P 2a 2bc" , 0 }, // 244 + { 52, 0, "P n n b" , 0, "ba-c", "-P 2b 2n" , 7 }, // 245 + { 52, 0, "P b n n" , 0, "cab", "-P 2n 2b" , 1 }, // 246 + { 52, 0, "P c n n" , 0, "-cba", "-P 2ab 2c" , 3 }, // 247 + { 52, 0, "P n c n" , 0, "bca", "-P 2ab 2n" , 2 }, // 248 + { 52, 0, "P n a n" , 0, "a-cb", "-P 2n 2bc" , 5 }, // 249 + { 53, 53, "P m n a" , 0, "", "-P 2ac 2" , 0 }, // 250 + { 53, 0, "P n m b" , 0, "ba-c", "-P 2bc 2bc" , 7 }, // 251 + { 53, 0, "P b m n" , 0, "cab", "-P 2ab 2ab" , 1 }, // 252 + { 53, 0, "P c n m" , 0, "-cba", "-P 2 2ac" , 3 }, // 253 + { 53, 0, "P n c m" , 0, "bca", "-P 2 2bc" , 2 }, // 254 + { 53, 0, "P m a n" , 0, "a-cb", "-P 2ab 2" , 5 }, // 255 + { 54, 54, "P c c a" , 0, "", "-P 2a 2ac" , 0 }, // 256 + { 54, 0, "P c c b" , 0, "ba-c", "-P 2b 2c" , 7 }, // 257 + { 54, 0, "P b a a" , 0, "cab", "-P 2a 2b" , 1 }, // 258 + { 54, 0, "P c a a" , 0, "-cba", "-P 2ac 2c" , 3 }, // 259 + { 54, 0, "P b c b" , 0, "bca", "-P 2bc 2b" , 2 }, // 260 + { 54, 0, "P b a b" , 0, "a-cb", "-P 2b 2ab" , 5 }, // 261 + { 55, 55, "P b a m" , 0, "", "-P 2 2ab" , 0 }, // 262 + { 55, 0, "P m c b" , 0, "cab", "-P 2bc 2" , 1 }, // 263 + { 55, 0, "P c m a" , 0, "bca", "-P 2ac 2ac" , 2 }, // 264 + { 56, 56, "P c c n" , 0, "", "-P 2ab 2ac" , 0 }, // 265 + { 56, 0, "P n a a" , 0, "cab", "-P 2ac 2bc" , 1 }, // 266 + { 56, 0, "P b n b" , 0, "bca", "-P 2bc 2ab" , 2 }, // 267 + { 57, 57, "P b c m" , 0, "", "-P 2c 2b" , 0 }, // 268 + { 57, 0, "P c a m" , 0, "ba-c", "-P 2c 2ac" , 7 }, // 269 + { 57, 0, "P m c a" , 0, "cab", "-P 2ac 2a" , 1 }, // 270 + { 57, 0, "P m a b" , 0, "-cba", "-P 2b 2a" , 3 }, // 271 + { 57, 0, "P b m a" , 0, "bca", "-P 2a 2ab" , 2 }, // 272 + { 57, 0, "P c m b" , 0, "a-cb", "-P 2bc 2c" , 5 }, // 273 + { 58, 58, "P n n m" , 0, "", "-P 2 2n" , 0 }, // 274 + { 58, 0, "P m n n" , 0, "cab", "-P 2n 2" , 1 }, // 275 + { 58, 0, "P n m n" , 0, "bca", "-P 2n 2n" , 2 }, // 276 + { 59, 59, "P m m n" , '1', "", "P 2 2ab -1ab" , 21}, // 277 + { 59, 1059, "P m m n" , '2', "", "-P 2ab 2a" , 0 }, // 278 + { 59, 0, "P n m m" , '1', "cab", "P 2bc 2 -1bc" , 22}, // 279 + { 59, 0, "P n m m" , '2', "cab", "-P 2c 2bc" , 1 }, // 280 + { 59, 0, "P m n m" , '1', "bca", "P 2ac 2ac -1ac", 23}, // 281 + { 59, 0, "P m n m" , '2', "bca", "-P 2c 2a" , 2 }, // 282 + { 60, 60, "P b c n" , 0, "", "-P 2n 2ab" , 0 }, // 283 + { 60, 0, "P c a n" , 0, "ba-c", "-P 2n 2c" , 7 }, // 284 + { 60, 0, "P n c a" , 0, "cab", "-P 2a 2n" , 1 }, // 285 + { 60, 0, "P n a b" , 0, "-cba", "-P 2bc 2n" , 3 }, // 286 + { 60, 0, "P b n a" , 0, "bca", "-P 2ac 2b" , 2 }, // 287 + { 60, 0, "P c n b" , 0, "a-cb", "-P 2b 2ac" , 5 }, // 288 + { 61, 61, "P b c a" , 0, "", "-P 2ac 2ab" , 0 }, // 289 + { 61, 0, "P c a b" , 0, "ba-c", "-P 2bc 2ac" , 3 }, // 290 + { 62, 62, "P n m a" , 0, "", "-P 2ac 2n" , 0 }, // 291 + { 62, 0, "P m n b" , 0, "ba-c", "-P 2bc 2a" , 7 }, // 292 + { 62, 0, "P b n m" , 0, "cab", "-P 2c 2ab" , 1 }, // 293 + { 62, 0, "P c m n" , 0, "-cba", "-P 2n 2ac" , 3 }, // 294 + { 62, 0, "P m c n" , 0, "bca", "-P 2n 2a" , 2 }, // 295 + { 62, 0, "P n a m" , 0, "a-cb", "-P 2c 2n" , 5 }, // 296 + { 63, 63, "C m c m" , 0, "", "-C 2c 2" , 0 }, // 297 + { 63, 0, "C c m m" , 0, "ba-c", "-C 2c 2c" , 7 }, // 298 + { 63, 0, "A m m a" , 0, "cab", "-A 2a 2a" , 1 }, // 299 + { 63, 0, "A m a m" , 0, "-cba", "-A 2 2a" , 3 }, // 300 + { 63, 0, "B b m m" , 0, "bca", "-B 2 2b" , 2 }, // 301 + { 63, 0, "B m m b" , 0, "a-cb", "-B 2b 2" , 5 }, // 302 + { 64, 64, "C m c a" , 0, "", "-C 2ac 2" , 0 }, // 303 + { 64, 0, "C c m b" , 0, "ba-c", "-C 2ac 2ac" , 7 }, // 304 + { 64, 0, "A b m a" , 0, "cab", "-A 2ab 2ab" , 1 }, // 305 + { 64, 0, "A c a m" , 0, "-cba", "-A 2 2ab" , 3 }, // 306 + { 64, 0, "B b c m" , 0, "bca", "-B 2 2ab" , 2 }, // 307 + { 64, 0, "B m a b" , 0, "a-cb", "-B 2ab 2" , 5 }, // 308 + { 65, 65, "C m m m" , 0, "", "-C 2 2" , 0 }, // 309 + { 65, 0, "A m m m" , 0, "cab", "-A 2 2" , 1 }, // 310 + { 65, 0, "B m m m" , 0, "bca", "-B 2 2" , 2 }, // 311 + { 66, 66, "C c c m" , 0, "", "-C 2 2c" , 0 }, // 312 + { 66, 0, "A m a a" , 0, "cab", "-A 2a 2" , 1 }, // 313 + { 66, 0, "B b m b" , 0, "bca", "-B 2b 2b" , 2 }, // 314 + { 67, 67, "C m m a" , 0, "", "-C 2a 2" , 0 }, // 315 + { 67, 0, "C m m b" , 0, "ba-c", "-C 2a 2a" , 14}, // 316 + { 67, 0, "A b m m" , 0, "cab", "-A 2b 2b" , 1 }, // 317 + { 67, 0, "A c m m" , 0, "-cba", "-A 2 2b" , 3 }, // 318 + { 67, 0, "B m c m" , 0, "bca", "-B 2 2a" , 2 }, // 319 + { 67, 0, "B m a m" , 0, "a-cb", "-B 2a 2" , 5 }, // 320 + { 68, 68, "C c c a" , '1', "", "C 2 2 -1ac" , 24}, // 321 + { 68, 0, "C c c a" , '2', "", "-C 2a 2ac" , 0 }, // 322 + { 68, 0, "C c c b" , '1', "ba-c", "C 2 2 -1ac" , 24}, // 323 (==321) + { 68, 0, "C c c b" , '2', "ba-c", "-C 2a 2c" , 21}, // 324 + { 68, 0, "A b a a" , '1', "cab", "A 2 2 -1ab" , 25}, // 325 + { 68, 0, "A b a a" , '2', "cab", "-A 2a 2b" , 1 }, // 326 + { 68, 0, "A c a a" , '1', "-cba", "A 2 2 -1ab" , 25}, // 327 (==325) + { 68, 0, "A c a a" , '2', "-cba", "-A 2ab 2b" , 3 }, // 328 + { 68, 0, "B b c b" , '1', "bca", "B 2 2 -1ab" , 26}, // 329 + { 68, 0, "B b c b" , '2', "bca", "-B 2ab 2b" , 2 }, // 330 + { 68, 0, "B b a b" , '1', "a-cb", "B 2 2 -1ab" , 26}, // 331 (==329) + { 68, 0, "B b a b" , '2', "a-cb", "-B 2b 2ab" , 5 }, // 332 + { 69, 69, "F m m m" , 0, "", "-F 2 2" , 0 }, // 333 + { 70, 70, "F d d d" , '1', "", "F 2 2 -1d" , 27}, // 334 + { 70, 0, "F d d d" , '2', "", "-F 2uv 2vw" , 0 }, // 335 + { 71, 71, "I m m m" , 0, "", "-I 2 2" , 0 }, // 336 + { 72, 72, "I b a m" , 0, "", "-I 2 2c" , 0 }, // 337 + { 72, 0, "I m c b" , 0, "cab", "-I 2a 2" , 1 }, // 338 + { 72, 0, "I c m a" , 0, "bca", "-I 2b 2b" , 2 }, // 339 + { 73, 73, "I b c a" , 0, "", "-I 2b 2c" , 0 }, // 340 + { 73, 0, "I c a b" , 0, "ba-c", "-I 2a 2b" , 28}, // 341 + { 74, 74, "I m m a" , 0, "", "-I 2b 2" , 0 }, // 342 + { 74, 0, "I m m b" , 0, "ba-c", "-I 2a 2a" , 28}, // 343 + { 74, 0, "I b m m" , 0, "cab", "-I 2c 2c" , 1 }, // 344 + { 74, 0, "I c m m" , 0, "-cba", "-I 2 2b" , 3 }, // 345 + { 74, 0, "I m c m" , 0, "bca", "-I 2 2a" , 2 }, // 346 + { 74, 0, "I m a m" , 0, "a-cb", "-I 2c 2" , 5 }, // 347 + { 75, 75, "P 4" , 0, "", "P 4" , 0 }, // 348 + { 76, 76, "P 41" , 0, "", "P 4w" , 0 }, // 349 + { 77, 77, "P 42" , 0, "", "P 4c" , 0 }, // 350 + { 78, 78, "P 43" , 0, "", "P 4cw" , 0 }, // 351 + { 79, 79, "I 4" , 0, "", "I 4" , 0 }, // 352 + { 80, 80, "I 41" , 0, "", "I 4bw" , 0 }, // 353 + { 81, 81, "P -4" , 0, "", "P -4" , 0 }, // 354 + { 82, 82, "I -4" , 0, "", "I -4" , 0 }, // 355 + { 83, 83, "P 4/m" , 0, "", "-P 4" , 0 }, // 356 + { 84, 84, "P 42/m" , 0, "", "-P 4c" , 0 }, // 357 + { 85, 85, "P 4/n" , '1', "", "P 4ab -1ab" , 29}, // 358 + { 85, 0, "P 4/n" , '2', "", "-P 4a" , 0 }, // 359 + { 86, 86, "P 42/n" , '1', "", "P 4n -1n" , 30}, // 360 + { 86, 0, "P 42/n" , '2', "", "-P 4bc" , 0 }, // 361 + { 87, 87, "I 4/m" , 0, "", "-I 4" , 0 }, // 362 + { 88, 88, "I 41/a" , '1', "", "I 4bw -1bw" , 31}, // 363 + { 88, 0, "I 41/a" , '2', "", "-I 4ad" , 0 }, // 364 + { 89, 89, "P 4 2 2" , 0, "", "P 4 2" , 0 }, // 365 + { 90, 90, "P 4 21 2" , 0, "", "P 4ab 2ab" , 0 }, // 366 + { 91, 91, "P 41 2 2" , 0, "", "P 4w 2c" , 0 }, // 367 + { 92, 92, "P 41 21 2" , 0, "", "P 4abw 2nw" , 0 }, // 368 + { 93, 93, "P 42 2 2" , 0, "", "P 4c 2" , 0 }, // 369 + { 94, 94, "P 42 21 2" , 0, "", "P 4n 2n" , 0 }, // 370 + { 95, 95, "P 43 2 2" , 0, "", "P 4cw 2c" , 0 }, // 371 + { 96, 96, "P 43 21 2" , 0, "", "P 4nw 2abw" , 0 }, // 372 + { 97, 97, "I 4 2 2" , 0, "", "I 4 2" , 0 }, // 373 + { 98, 98, "I 41 2 2" , 0, "", "I 4bw 2bw" , 0 }, // 374 + { 99, 99, "P 4 m m" , 0, "", "P 4 -2" , 0 }, // 375 + {100, 100, "P 4 b m" , 0, "", "P 4 -2ab" , 0 }, // 376 + {101, 101, "P 42 c m" , 0, "", "P 4c -2c" , 0 }, // 377 + {102, 102, "P 42 n m" , 0, "", "P 4n -2n" , 0 }, // 378 + {103, 103, "P 4 c c" , 0, "", "P 4 -2c" , 0 }, // 379 + {104, 104, "P 4 n c" , 0, "", "P 4 -2n" , 0 }, // 380 + {105, 105, "P 42 m c" , 0, "", "P 4c -2" , 0 }, // 381 + {106, 106, "P 42 b c" , 0, "", "P 4c -2ab" , 0 }, // 382 + {107, 107, "I 4 m m" , 0, "", "I 4 -2" , 0 }, // 383 + {108, 108, "I 4 c m" , 0, "", "I 4 -2c" , 0 }, // 384 + {109, 109, "I 41 m d" , 0, "", "I 4bw -2" , 0 }, // 385 + {110, 110, "I 41 c d" , 0, "", "I 4bw -2c" , 0 }, // 386 + {111, 111, "P -4 2 m" , 0, "", "P -4 2" , 0 }, // 387 + {112, 112, "P -4 2 c" , 0, "", "P -4 2c" , 0 }, // 388 + {113, 113, "P -4 21 m" , 0, "", "P -4 2ab" , 0 }, // 389 + {114, 114, "P -4 21 c" , 0, "", "P -4 2n" , 0 }, // 390 + {115, 115, "P -4 m 2" , 0, "", "P -4 -2" , 0 }, // 391 + {116, 116, "P -4 c 2" , 0, "", "P -4 -2c" , 0 }, // 392 + {117, 117, "P -4 b 2" , 0, "", "P -4 -2ab" , 0 }, // 393 + {118, 118, "P -4 n 2" , 0, "", "P -4 -2n" , 0 }, // 394 + {119, 119, "I -4 m 2" , 0, "", "I -4 -2" , 0 }, // 395 + {120, 120, "I -4 c 2" , 0, "", "I -4 -2c" , 0 }, // 396 + {121, 121, "I -4 2 m" , 0, "", "I -4 2" , 0 }, // 397 + {122, 122, "I -4 2 d" , 0, "", "I -4 2bw" , 0 }, // 398 + {123, 123, "P 4/m m m" , 0, "", "-P 4 2" , 0 }, // 399 + {124, 124, "P 4/m c c" , 0, "", "-P 4 2c" , 0 }, // 400 + {125, 125, "P 4/n b m" , '1', "", "P 4 2 -1ab" , 21}, // 401 + {125, 0, "P 4/n b m" , '2', "", "-P 4a 2b" , 0 }, // 402 + {126, 126, "P 4/n n c" , '1', "", "P 4 2 -1n" , 20}, // 403 + {126, 0, "P 4/n n c" , '2', "", "-P 4a 2bc" , 0 }, // 404 + {127, 127, "P 4/m b m" , 0, "", "-P 4 2ab" , 0 }, // 405 + {128, 128, "P 4/m n c" , 0, "", "-P 4 2n" , 0 }, // 406 + {129, 129, "P 4/n m m" , '1', "", "P 4ab 2ab -1ab", 29}, // 407 + {129, 0, "P 4/n m m" , '2', "", "-P 4a 2a" , 0 }, // 408 + {130, 130, "P 4/n c c" , '1', "", "P 4ab 2n -1ab" , 29}, // 409 + {130, 0, "P 4/n c c" , '2', "", "-P 4a 2ac" , 0 }, // 410 + {131, 131, "P 42/m m c", 0, "", "-P 4c 2" , 0 }, // 411 + {132, 132, "P 42/m c m", 0, "", "-P 4c 2c" , 0 }, // 412 + {133, 133, "P 42/n b c", '1', "", "P 4n 2c -1n" , 32}, // 413 + {133, 0, "P 42/n b c", '2', "", "-P 4ac 2b" , 0 }, // 414 + {134, 134, "P 42/n n m", '1', "", "P 4n 2 -1n" , 33}, // 415 + {134, 0, "P 42/n n m", '2', "", "-P 4ac 2bc" , 0 }, // 416 + {135, 135, "P 42/m b c", 0, "", "-P 4c 2ab" , 0 }, // 417 + {136, 136, "P 42/m n m", 0, "", "-P 4n 2n" , 0 }, // 418 + {137, 137, "P 42/n m c", '1', "", "P 4n 2n -1n" , 32}, // 419 + {137, 0, "P 42/n m c", '2', "", "-P 4ac 2a" , 0 }, // 420 + {138, 138, "P 42/n c m", '1', "", "P 4n 2ab -1n" , 33}, // 421 + {138, 0, "P 42/n c m", '2', "", "-P 4ac 2ac" , 0 }, // 422 + {139, 139, "I 4/m m m" , 0, "", "-I 4 2" , 0 }, // 423 + {140, 140, "I 4/m c m" , 0, "", "-I 4 2c" , 0 }, // 424 + {141, 141, "I 41/a m d", '1', "", "I 4bw 2bw -1bw", 34}, // 425 + {141, 0, "I 41/a m d", '2', "", "-I 4bd 2" , 0 }, // 426 + {142, 142, "I 41/a c d", '1', "", "I 4bw 2aw -1bw", 35}, // 427 + {142, 0, "I 41/a c d", '2', "", "-I 4bd 2c" , 0 }, // 428 + {143, 143, "P 3" , 0, "", "P 3" , 0 }, // 429 + {144, 144, "P 31" , 0, "", "P 31" , 0 }, // 430 + {145, 145, "P 32" , 0, "", "P 32" , 0 }, // 431 + {146, 146, "R 3" , 'H', "", "R 3" , 0 }, // 432 + {146, 1146, "R 3" , 'R', "", "P 3*" , 36}, // 433 + {147, 147, "P -3" , 0, "", "-P 3" , 0 }, // 434 + {148, 148, "R -3" , 'H', "", "-R 3" , 0 }, // 435 + {148, 1148, "R -3" , 'R', "", "-P 3*" , 36}, // 436 + {149, 149, "P 3 1 2" , 0, "", "P 3 2" , 0 }, // 437 + {150, 150, "P 3 2 1" , 0, "", "P 3 2\"" , 0 }, // 438 + {151, 151, "P 31 1 2" , 0, "", "P 31 2 (0 0 4)", 0 }, // 439 + {152, 152, "P 31 2 1" , 0, "", "P 31 2\"" , 0 }, // 440 + {153, 153, "P 32 1 2" , 0, "", "P 32 2 (0 0 2)", 0 }, // 441 + {154, 154, "P 32 2 1" , 0, "", "P 32 2\"" , 0 }, // 442 + {155, 155, "R 3 2" , 'H', "", "R 3 2\"" , 0 }, // 443 + {155, 1155, "R 3 2" , 'R', "", "P 3* 2" , 36}, // 444 + {156, 156, "P 3 m 1" , 0, "", "P 3 -2\"" , 0 }, // 445 + {157, 157, "P 3 1 m" , 0, "", "P 3 -2" , 0 }, // 446 + {158, 158, "P 3 c 1" , 0, "", "P 3 -2\"c" , 0 }, // 447 + {159, 159, "P 3 1 c" , 0, "", "P 3 -2c" , 0 }, // 448 + {160, 160, "R 3 m" , 'H', "", "R 3 -2\"" , 0 }, // 449 + {160, 1160, "R 3 m" , 'R', "", "P 3* -2" , 36}, // 450 + {161, 161, "R 3 c" , 'H', "", "R 3 -2\"c" , 0 }, // 451 + {161, 1161, "R 3 c" , 'R', "", "P 3* -2n" , 36}, // 452 + {162, 162, "P -3 1 m" , 0, "", "-P 3 2" , 0 }, // 453 + {163, 163, "P -3 1 c" , 0, "", "-P 3 2c" , 0 }, // 454 + {164, 164, "P -3 m 1" , 0, "", "-P 3 2\"" , 0 }, // 455 + {165, 165, "P -3 c 1" , 0, "", "-P 3 2\"c" , 0 }, // 456 + {166, 166, "R -3 m" , 'H', "", "-R 3 2\"" , 0 }, // 457 + {166, 1166, "R -3 m" , 'R', "", "-P 3* 2" , 36}, // 458 + {167, 167, "R -3 c" , 'H', "", "-R 3 2\"c" , 0 }, // 459 + {167, 1167, "R -3 c" , 'R', "", "-P 3* 2n" , 36}, // 460 + {168, 168, "P 6" , 0, "", "P 6" , 0 }, // 461 + {169, 169, "P 61" , 0, "", "P 61" , 0 }, // 462 + {170, 170, "P 65" , 0, "", "P 65" , 0 }, // 463 + {171, 171, "P 62" , 0, "", "P 62" , 0 }, // 464 + {172, 172, "P 64" , 0, "", "P 64" , 0 }, // 465 + {173, 173, "P 63" , 0, "", "P 6c" , 0 }, // 466 + {174, 174, "P -6" , 0, "", "P -6" , 0 }, // 467 + {175, 175, "P 6/m" , 0, "", "-P 6" , 0 }, // 468 + {176, 176, "P 63/m" , 0, "", "-P 6c" , 0 }, // 469 + {177, 177, "P 6 2 2" , 0, "", "P 6 2" , 0 }, // 470 + {178, 178, "P 61 2 2" , 0, "", "P 61 2 (0 0 5)", 0 }, // 471 + {179, 179, "P 65 2 2" , 0, "", "P 65 2 (0 0 1)", 0 }, // 472 + {180, 180, "P 62 2 2" , 0, "", "P 62 2 (0 0 4)", 0 }, // 473 + {181, 181, "P 64 2 2" , 0, "", "P 64 2 (0 0 2)", 0 }, // 474 + {182, 182, "P 63 2 2" , 0, "", "P 6c 2c" , 0 }, // 475 + {183, 183, "P 6 m m" , 0, "", "P 6 -2" , 0 }, // 476 + {184, 184, "P 6 c c" , 0, "", "P 6 -2c" , 0 }, // 477 + {185, 185, "P 63 c m" , 0, "", "P 6c -2" , 0 }, // 478 + {186, 186, "P 63 m c" , 0, "", "P 6c -2c" , 0 }, // 479 + {187, 187, "P -6 m 2" , 0, "", "P -6 2" , 0 }, // 480 + {188, 188, "P -6 c 2" , 0, "", "P -6c 2" , 0 }, // 481 + {189, 189, "P -6 2 m" , 0, "", "P -6 -2" , 0 }, // 482 + {190, 190, "P -6 2 c" , 0, "", "P -6c -2c" , 0 }, // 483 + {191, 191, "P 6/m m m" , 0, "", "-P 6 2" , 0 }, // 484 + {192, 192, "P 6/m c c" , 0, "", "-P 6 2c" , 0 }, // 485 + {193, 193, "P 63/m c m", 0, "", "-P 6c 2" , 0 }, // 486 + {194, 194, "P 63/m m c", 0, "", "-P 6c 2c" , 0 }, // 487 + {195, 195, "P 2 3" , 0, "", "P 2 2 3" , 0 }, // 488 + {196, 196, "F 2 3" , 0, "", "F 2 2 3" , 0 }, // 489 + {197, 197, "I 2 3" , 0, "", "I 2 2 3" , 0 }, // 490 + {198, 198, "P 21 3" , 0, "", "P 2ac 2ab 3" , 0 }, // 491 + {199, 199, "I 21 3" , 0, "", "I 2b 2c 3" , 0 }, // 492 + {200, 200, "P m -3" , 0, "", "-P 2 2 3" , 0 }, // 493 + {201, 201, "P n -3" , '1', "", "P 2 2 3 -1n" , 20}, // 494 + {201, 0, "P n -3" , '2', "", "-P 2ab 2bc 3" , 0 }, // 495 + {202, 202, "F m -3" , 0, "", "-F 2 2 3" , 0 }, // 496 + {203, 203, "F d -3" , '1', "", "F 2 2 3 -1d" , 27}, // 497 + {203, 0, "F d -3" , '2', "", "-F 2uv 2vw 3" , 0 }, // 498 + {204, 204, "I m -3" , 0, "", "-I 2 2 3" , 0 }, // 499 + {205, 205, "P a -3" , 0, "", "-P 2ac 2ab 3" , 0 }, // 500 + {206, 206, "I a -3" , 0, "", "-I 2b 2c 3" , 0 }, // 501 + {207, 207, "P 4 3 2" , 0, "", "P 4 2 3" , 0 }, // 502 + {208, 208, "P 42 3 2" , 0, "", "P 4n 2 3" , 0 }, // 503 + {209, 209, "F 4 3 2" , 0, "", "F 4 2 3" , 0 }, // 504 + {210, 210, "F 41 3 2" , 0, "", "F 4d 2 3" , 0 }, // 505 + {211, 211, "I 4 3 2" , 0, "", "I 4 2 3" , 0 }, // 506 + {212, 212, "P 43 3 2" , 0, "", "P 4acd 2ab 3" , 0 }, // 507 + {213, 213, "P 41 3 2" , 0, "", "P 4bd 2ab 3" , 0 }, // 508 + {214, 214, "I 41 3 2" , 0, "", "I 4bd 2c 3" , 0 }, // 509 + {215, 215, "P -4 3 m" , 0, "", "P -4 2 3" , 0 }, // 510 + {216, 216, "F -4 3 m" , 0, "", "F -4 2 3" , 0 }, // 511 + {217, 217, "I -4 3 m" , 0, "", "I -4 2 3" , 0 }, // 512 + {218, 218, "P -4 3 n" , 0, "", "P -4n 2 3" , 0 }, // 513 + {219, 219, "F -4 3 c" , 0, "", "F -4a 2 3" , 0 }, // 514 + {220, 220, "I -4 3 d" , 0, "", "I -4bd 2c 3" , 0 }, // 515 + {221, 221, "P m -3 m" , 0, "", "-P 4 2 3" , 0 }, // 516 + {222, 222, "P n -3 n" , '1', "", "P 4 2 3 -1n" , 20}, // 517 + {222, 0, "P n -3 n" , '2', "", "-P 4a 2bc 3" , 0 }, // 518 + {223, 223, "P m -3 n" , 0, "", "-P 4n 2 3" , 0 }, // 519 + {224, 224, "P n -3 m" , '1', "", "P 4n 2 3 -1n" , 30}, // 520 + {224, 0, "P n -3 m" , '2', "", "-P 4bc 2bc 3" , 0 }, // 521 + {225, 225, "F m -3 m" , 0, "", "-F 4 2 3" , 0 }, // 522 + {226, 226, "F m -3 c" , 0, "", "-F 4a 2 3" , 0 }, // 523 + {227, 227, "F d -3 m" , '1', "", "F 4d 2 3 -1d" , 27}, // 524 + {227, 0, "F d -3 m" , '2', "", "-F 4vw 2vw 3" , 0 }, // 525 + {228, 228, "F d -3 c" , '1', "", "F 4d 2 3 -1ad" , 37}, // 526 + {228, 0, "F d -3 c" , '2', "", "-F 4ud 2vw 3" , 0 }, // 527 + {229, 229, "I m -3 m" , 0, "", "-I 4 2 3" , 0 }, // 528 + {230, 230, "I a -3 d" , 0, "", "-I 4bd 2c 3" , 0 }, // 529 + // And extra entries from syminfo.lib + { 5, 5005, "I 1 21 1" , 0, "b4", "I 2yb" , 38}, // 530 + { 5, 3005, "C 1 21 1" , 0, "b5", "C 2yb" , 14}, // 531 + { 18, 1018, "P 21212(a)", 0, "", "P 2ab 2a" , 14}, // 532 + { 20, 1020, "C 2 2 21a)", 0, "", "C 2ac 2" , 39}, // 533 + { 21, 1021, "C 2 2 2a" , 0, "", "C 2ab 2b" , 14}, // 534 + { 22, 1022, "F 2 2 2a" , 0, "", "F 2 2c" , 40}, // 535 + { 23, 1023, "I 2 2 2a" , 0, "", "I 2ab 2bc" , 33}, // 536 + { 94, 1094, "P 42 21 2a", 0, "", "P 4bc 2a" , 20}, // 537 + {197, 1197, "I 2 3a" , 0, "", "I 2ab 2bc 3" , 30}, // 538 + // And extra entries from Crystallographic Space Group Diagrams and Tables + // http://img.chem.ucl.ac.uk/sgp/ + // We want to have all entries from Open Babel and PDB. + // If available, Hall symbols are taken from + // https://cci.lbl.gov/cctbx/multiple_cell.html + // triclinic - enlarged unit cells + { 1, 0, "A 1" , 0, "", "A 1" , 41}, // 539 + { 1, 0, "B 1" , 0, "", "B 1" , 42}, // 540 + { 1, 0, "C 1" , 0, "", "C 1" , 43}, // 541 + { 1, 0, "F 1" , 0, "", "F 1" , 44}, // 542 + { 1, 0, "I 1" , 0, "", "I 1" , 45}, // 543 + { 2, 0, "A -1" , 0, "", "-A 1" , 41}, // 544 + { 2, 0, "B -1" , 0, "", "-B 1" , 42}, // 545 + { 2, 0, "C -1" , 0, "", "-C 1" , 43}, // 546 + { 2, 0, "F -1" , 0, "", "-F 1" , 44}, // 547 + { 2, 0, "I -1" , 0, "", "-I 1" , 45}, // 548 + // monoclinic (qualifiers such as "b1" are assigned arbitrary unique numbers) + { 3, 0, "B 1 2 1" , 0, "b1", "B 2y" , 46}, // 549 + { 3, 0, "C 1 1 2" , 0, "c1", "C 2" , 47}, // 550 + { 4, 0, "B 1 21 1" , 0, "b1", "B 2yb" , 46}, // 551 + { 4, 0, "C 1 1 21" , 0, "c2", "C 2c" , 47}, // 552 + { 5, 0, "F 1 2 1" , 0, "b6", "F 2y" , 48}, // 553 + { 8, 0, "F 1 m 1" , 0, "b4", "F -2y" , 48}, // 554 + { 9, 0, "F 1 d 1" , 0, "b4", "F -2yuw" , 49}, // 555 + { 12, 0, "F 1 2/m 1" , 0, "b4", "-F 2y" , 48}, // 556 + // orthorhombic + { 64, 0, "A b a m" , 0, "", "-A 2 2ab" , 3 }, // 557 (==306) + // tetragonal - enlarged C- and F-centred unit cells + { 89, 0, "C 4 2 2" , 0, "", "C 4 2" , 50}, // 558 + { 90, 0, "C 4 2 21" , 0, "", "C 4a 2" , 50}, // 559 + { 97, 0, "F 4 2 2" , 0, "", "F 4 2" , 50}, // 560 + {115, 0, "C -4 2 m" , 0, "", "C -4 2" , 50}, // 561 + {117, 0, "C -4 2 b" , 0, "", "C -4 2ya" , 50}, // 562 + {139, 0, "F 4/m m m" , 0, "", "-F 4 2" , 50}, // 563 +}; + +const SpaceGroupAltName spacegroup_tables::alt_names[28] = { + // In 1990's ITfC vol.A changed some of the standard names, introducing + // symbol 'e'. sgtbx interprets these new symbols with option ad_hoc_1992. + // spglib uses only the new symbols. + {"A e m 2", 0, 190}, // A b m 2 + {"B m e 2", 0, 191}, // B m a 2 + {"B 2 e m", 0, 192}, // B 2 c m + {"C 2 m e", 0, 193}, // C 2 m b + {"C m 2 e", 0, 194}, // C m 2 a + {"A e 2 m", 0, 195}, // A c 2 m + {"A e a 2", 0, 202}, // A b a 2 + {"B b e 2", 0, 203}, // B b a 2 + {"B 2 e b", 0, 204}, // B 2 c b + {"C 2 c e", 0, 205}, // C 2 c b + {"C c 2 e", 0, 206}, // C c 2 a + {"A e 2 a", 0, 207}, // A c 2 a + {"C m c e", 0, 303}, // C m c a + {"C c m e", 0, 304}, // C c m b + {"A e m a", 0, 305}, // A b m a + {"A e a m", 0, 306}, // A c a m + {"B b e m", 0, 307}, // B b c m + {"B m e b", 0, 308}, // B m a b + {"C m m e", 0, 315}, // C m m a + {"A e m m", 0, 317}, // A b m m + {"B m e m", 0, 319}, // B m c m + {"C c c e", '1', 321}, // C c c a + {"C c c e", '2', 322}, // C c c a + {"A e a a", '1', 325}, // A b a a + {"A e a a", '2', 326}, // A b a a + {"B b e b", '1', 329}, // B b c b + {"B b e b", '2', 330}, // B b c b + // help with parsing of unusual setting names that are present in the PDB + {"P 21 21 2a", 0, 532}, // P 21212(a) +}; + +// This table was generated by tools/gen_reciprocal_asu.py. +const unsigned char spacegroup_tables::ccp4_hkl_asu[230] = { + 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 7, 6, 7, 6, 7, 7, 7, + 6, 7, 6, 7, 7, 6, 6, 7, 7, 7, 7, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9 +}; + +// Generated by tools/gen_sg_table.py. +const char* get_basisop(int basisop_idx) { + static const char* basisops[51] = { + "x,y,z", // 0 + "z,x,y", // 1 + "y,z,x", // 2 + "z,y,-x", // 3 + "x,y,-x+z", // 4 + "-x,z,y", // 5 + "-x+z,x,y", // 6 + "y,-x,z", // 7 + "y,-x+z,x", // 8 + "x-z,y,z", // 9 + "z,x-z,y", // 10 + "y,z,x-z", // 11 + "z,y,-x+z", // 12 + "x+z,y,-x", // 13 + "x+1/4,y+1/4,z", // 14 + "-x+z,z,y", // 15 + "-x,x+z,y", // 16 + "y,-x+z,z", // 17 + "y,-x,x+z", // 18 + "x+1/4,y-1/4,z", // 19 + "x-1/4,y-1/4,z-1/4", // 20 + "x-1/4,y-1/4,z", // 21 + "z,x-1/4,y-1/4", // 22 + "y-1/4,z,x-1/4", // 23 + "x-1/2,y-1/4,z+1/4", // 24 + "z+1/4,x-1/2,y-1/4", // 25 + "y-1/4,z+1/4,x-1/2", // 26 + "x+1/8,y+1/8,z+1/8", // 27 + "x+1/4,y-1/4,z+1/4", // 28 + "x-1/4,y+1/4,z", // 29 + "x+1/4,y+1/4,z+1/4", // 30 + "x,y+1/4,z+1/8", // 31 + "x-1/4,y+1/4,z+1/4", // 32 + "x-1/4,y+1/4,z-1/4", // 33 + "x-1/2,y+1/4,z+1/8", // 34 + "x-1/2,y+1/4,z-3/8", // 35 + "-y+z,x+z,-x+y+z", // 36 + "x-1/8,y-1/8,z-1/8", // 37 + "x+1/4,y+1/4,-x+z-1/4", // 38 + "x+1/4,y,z", // 39 + "x,y,z+1/4", // 40 + "-x,-y/2+z/2,y/2+z/2", // 41 + "-x/2+z/2,-y,x/2+z/2", // 42 + "x/2+y/2,x/2-y/2,-z", // 43 + "y/2+z/2,x/2+z/2,x/2+y/2", // 44 + "-x/2+y/2+z/2,x/2-y/2+z/2,x/2+y/2-z/2", // 45 + "x/2,y,-x/2+z", // 46 + "-x/2+z,x/2,y", // 47 + "x-z/2,y,z/2", // 48 + "x+z/2,y,z/2", // 49 + "x/2+y/2,-x/2+y/2,z", // 50 + }; + return basisops[basisop_idx]; +} + +const SpaceGroup* find_spacegroup_by_name(std::string name, double alpha, double gamma, + const char* prefer) { + bool prefer_2 = false; + bool prefer_R = false; + if (prefer) + for (const char* p = prefer; *p != '\0'; ++p) { + if (*p == '2') + prefer_2 = true; + else if (*p == 'R') + prefer_R = true; + else if (*p != '1' && *p != 'H') + throw std::invalid_argument("find_spacegroup_by_name(): invalid arg 'prefer'"); + } + const char* p = skip_space(name.c_str()); + if (*p >= '0' && *p <= '9') { // handle numbers + char *endptr; + long n = std::strtol(p, &endptr, 10); + return *endptr == '\0' ? find_spacegroup_by_number(n) : nullptr; + } + char first = *p & ~0x20; // to uppercase + if (first == '\0') + return nullptr; + if (first == 'H') + first = 'R'; + p = skip_space(p+1); + size_t start = p - name.c_str(); + // change letters to lower case, except the letter after : + for (size_t i = start; i < name.size(); ++i) { + if (name[i] >= 'A' && name[i] <= 'Z') + name[i] |= 0x20; // to lowercase + else if (name[i] == ':') + while (++i < name.size()) + if (name[i] >= 'a' && name[i] <= 'z') + name[i] &= ~0x20; // to uppercase + } + // allow names ending with R or H, such as R3R instead of R3:R + if (name.back() == 'h' || name.back() == 'r') { + name.back() &= ~0x20; // to uppercase + name.insert(name.end() - 1, ':'); + } + // The string that const char* p points to was just modified. + // This confuses some compilers (GCC 4.8), so let's re-assign p. + p = name.c_str() + start; + + for (const SpaceGroup& sg : spacegroup_tables::main) + if (sg.hm[0] == first) { + if (sg.hm[2] == *p) { + const char* a = skip_space(p + 1); + const char* b = skip_space(sg.hm + 3); + // In IT 1935 and 1952, symbols of centrosymmetric, cubic space groups + // 200-206 and 221-230 had symbol 3 (not -3), e.g. Pm3 instead of Pm-3, + // as listed in Table 3.3.3.1 in ITfC (2016) vol. A, p.788. + while ((*a == *b && *b != '\0') || + (*a == '3' && *b == '-' && b == sg.hm + 4 && *++b == '3')) { + a = skip_space(a+1); + b = skip_space(b+1); + } + if (*b == '\0') { + if (*a == '\0') { + // Change hexagonal settings to rhombohedral if the unit cell + // angles are more consistent with the latter. + // We have possible ambiguity in the hexagonal crystal family. + // For instance, "R 3" may mean "R 3:H" (hexagonal setting) or + // "R 3:R" (rhombohedral setting). The :H symbols come first + // in the table and are used by default. The ratio gamma:alpha + // is 120:90 in the hexagonal system and 1:1 in rhombohedral. + // We assume that the 'R' entry follows directly the 'H' entry. + if (sg.ext == 'H' && (alpha == 0. ? prefer_R : gamma < 1.125 * alpha)) + return &sg + 1; + // Similarly, the origin choice #2 follows directly #1. + if (sg.ext == '1' && prefer_2) + return &sg + 1; + return &sg; + } + if (*a == ':' && *skip_space(a+1) == sg.ext) + return &sg; + } + } else if (sg.hm[2] == '1' && sg.hm[3] == ' ') { + // check monoclinic short names, matching P2 to "P 1 2 1"; + // as an exception "B 2" == "B 1 1 2" (like in the PDB) + const char* b = sg.hm + 4; + if (*b != '1' || (first == 'B' && *++b == ' ' && *++b != '1')) { + char end = (b == sg.hm + 4 ? ' ' : '\0'); + const char* a = skip_space(p); + while (*a == *b && *b != end) { + ++a; + ++b; + } + if (*skip_space(a) == '\0' && *b == end) + return &sg; + } + } + } + for (const SpaceGroupAltName& sg : spacegroup_tables::alt_names) + if (sg.hm[0] == first && sg.hm[2] == *p) { + const char* a = skip_space(p + 1); + const char* b = skip_space(sg.hm + 3); + while (*a == *b && *b != '\0') { + a = skip_space(a+1); + b = skip_space(b+1); + } + if (*b == '\0' && + (*a == '\0' || (*a == ':' && *skip_space(a+1) == sg.ext))) + return &spacegroup_tables::main[sg.pos]; + } + return nullptr; +} + +} // namespace gemmi + -- 2.52.0 From b27b140bf044e1ddd59536274b37da3ee20ee160 Mon Sep 17 00:00:00 2001 From: takaba_k Date: Wed, 13 May 2026 15:25:52 +0200 Subject: [PATCH 049/132] JFJochHDF5Reader: read pixel_mask defined under /detectorSpecific --- reader/JFJochHDF5Reader.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/reader/JFJochHDF5Reader.cpp b/reader/JFJochHDF5Reader.cpp index 8da235cb..d5599612 100644 --- a/reader/JFJochHDF5Reader.cpp +++ b/reader/JFJochHDF5Reader.cpp @@ -583,6 +583,12 @@ void JFJochHDF5Reader::ReadFile(const std::string &filename) { {0, 0}, {image_size_y, image_size_x} ); + if (mask_tmp.empty()) + mask_tmp = master_file->ReadOptVector( + "/entry/instrument/detector/detectorSpecific/pixel_mask", + {0, 0}, + {image_size_y, image_size_x} + ); if (mask_tmp.empty()) mask_tmp = std::vector(image_size_x * image_size_y); dataset->pixel_mask = PixelMask(mask_tmp); -- 2.52.0 From 08bf1867663126667c018acecf40bbb0c6ddeffa Mon Sep 17 00:00:00 2001 From: Filip Leonarski Date: Wed, 13 May 2026 13:44:02 +0200 Subject: [PATCH 050/132] Gemmi: Add more functionality from gemmi 0.7.5 --- CMakeLists.txt | 2 +- gemmi_gph/CMakeLists.txt | 8 + {symmetry => gemmi_gph}/LICENSE.txt | 0 gemmi_gph/gemmi/atof.hpp | 41 + gemmi_gph/gemmi/atox.hpp | 135 + {symmetry => gemmi_gph}/gemmi/cellred.hpp | 0 {symmetry => gemmi_gph}/gemmi/fail.hpp | 0 gemmi_gph/gemmi/fileutil.hpp | 173 + gemmi_gph/gemmi/gz.hpp | 52 + gemmi_gph/gemmi/input.hpp | 168 + gemmi_gph/gemmi/iterator.hpp | 287 ++ gemmi_gph/gemmi/logger.hpp | 71 + {symmetry => gemmi_gph}/gemmi/math.hpp | 0 gemmi_gph/gemmi/mtz.hpp | 600 +++ gemmi_gph/gemmi/sprintf.hpp | 80 + {symmetry => gemmi_gph}/gemmi/symmetry.hpp | 0 gemmi_gph/gemmi/third_party/fast_float.h | 4933 ++++++++++++++++++++ {symmetry => gemmi_gph}/gemmi/unitcell.hpp | 0 gemmi_gph/gemmi/util.hpp | 315 ++ gemmi_gph/gemmi/xds_ascii.hpp | 183 + gemmi_gph/gz.cpp | 189 + gemmi_gph/mtz.cpp | 991 ++++ gemmi_gph/sprintf.cpp | 68 + gemmi_gph/stb/stb_sprintf.h | 1906 ++++++++ {symmetry => gemmi_gph}/symmetry.cpp | 0 gemmi_gph/xds_ascii.cpp | 306 ++ symmetry/CMakeLists.txt | 2 - 27 files changed, 10507 insertions(+), 3 deletions(-) create mode 100644 gemmi_gph/CMakeLists.txt rename {symmetry => gemmi_gph}/LICENSE.txt (100%) create mode 100644 gemmi_gph/gemmi/atof.hpp create mode 100644 gemmi_gph/gemmi/atox.hpp rename {symmetry => gemmi_gph}/gemmi/cellred.hpp (100%) rename {symmetry => gemmi_gph}/gemmi/fail.hpp (100%) create mode 100644 gemmi_gph/gemmi/fileutil.hpp create mode 100644 gemmi_gph/gemmi/gz.hpp create mode 100644 gemmi_gph/gemmi/input.hpp create mode 100644 gemmi_gph/gemmi/iterator.hpp create mode 100644 gemmi_gph/gemmi/logger.hpp rename {symmetry => gemmi_gph}/gemmi/math.hpp (100%) create mode 100644 gemmi_gph/gemmi/mtz.hpp create mode 100644 gemmi_gph/gemmi/sprintf.hpp rename {symmetry => gemmi_gph}/gemmi/symmetry.hpp (100%) create mode 100644 gemmi_gph/gemmi/third_party/fast_float.h rename {symmetry => gemmi_gph}/gemmi/unitcell.hpp (100%) create mode 100644 gemmi_gph/gemmi/util.hpp create mode 100644 gemmi_gph/gemmi/xds_ascii.hpp create mode 100644 gemmi_gph/gz.cpp create mode 100644 gemmi_gph/mtz.cpp create mode 100644 gemmi_gph/sprintf.cpp create mode 100644 gemmi_gph/stb/stb_sprintf.h rename {symmetry => gemmi_gph}/symmetry.cpp (100%) create mode 100644 gemmi_gph/xds_ascii.cpp delete mode 100644 symmetry/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index c49e0644..92e97835 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -153,7 +153,7 @@ ADD_SUBDIRECTORY(reader) ADD_SUBDIRECTORY(detector_control) ADD_SUBDIRECTORY(image_puller) ADD_SUBDIRECTORY(preview) -ADD_SUBDIRECTORY(symmetry) +ADD_SUBDIRECTORY(gemmi_gph) ADD_SUBDIRECTORY(xds-plugin) IF (JFJOCH_WRITER_ONLY) diff --git a/gemmi_gph/CMakeLists.txt b/gemmi_gph/CMakeLists.txt new file mode 100644 index 00000000..66d88819 --- /dev/null +++ b/gemmi_gph/CMakeLists.txt @@ -0,0 +1,8 @@ +ADD_LIBRARY(gemmi STATIC symmetry.cpp gz.cpp mtz.cpp sprintf.cpp xds_ascii.cpp + gemmi/cellred.hpp + gemmi/symmetry.hpp + gemmi/fail.hpp + gemmi/unitcell.hpp + gemmi/math.hpp) +TARGET_INCLUDE_DIRECTORIES(gemmi PUBLIC .) +TARGET_LINK_LIBRARIES(gemmi ) \ No newline at end of file diff --git a/symmetry/LICENSE.txt b/gemmi_gph/LICENSE.txt similarity index 100% rename from symmetry/LICENSE.txt rename to gemmi_gph/LICENSE.txt diff --git a/gemmi_gph/gemmi/atof.hpp b/gemmi_gph/gemmi/atof.hpp new file mode 100644 index 00000000..7f275f7b --- /dev/null +++ b/gemmi_gph/gemmi/atof.hpp @@ -0,0 +1,41 @@ +// Copyright 2020 Global Phasing Ltd. +// +// Functions that convert strings to floating-point numbers ignoring locale. +// Simple wrappers around fastfloat::from_chars(). + +#ifndef GEMMI_ATOF_HPP_ +#define GEMMI_ATOF_HPP_ + +#include "atox.hpp" // for is_space +#include "third_party/fast_float.h" + +namespace gemmi { + +using fast_float::from_chars_result; + +inline from_chars_result fast_from_chars(const char* start, const char* end, double& d) { + while (start < end && is_space(*start)) + ++start; + if (start < end && *start == '+') + ++start; + return fast_float::from_chars(start, end, d); +} + +inline from_chars_result fast_from_chars(const char* start, double& d) { + while (is_space(*start)) + ++start; + if (*start == '+') + ++start; + return fast_float::from_chars(start, start + std::strlen(start), d); +} + +inline double fast_atof(const char* p, const char** endptr=nullptr) { + double d = 0; + auto result = fast_from_chars(p, d); + if (endptr) + *endptr = result.ptr; + return d; +} + +} // namespace gemmi +#endif diff --git a/gemmi_gph/gemmi/atox.hpp b/gemmi_gph/gemmi/atox.hpp new file mode 100644 index 00000000..14b4a3b3 --- /dev/null +++ b/gemmi_gph/gemmi/atox.hpp @@ -0,0 +1,135 @@ +// Copyright 2018 Global Phasing Ltd. +// +// Locale-independent functions that convert strings to integers, +// equivalents of standard isspace and isdigit, and a few helper functions. +// +// This file is named similarly to the standard functions atoi() and atof(). +// But the functions here are not meant to be equivalent to the standard +// library functions. They are locale-independent (a good thing when reading +// numbers from files). They don't set errno, don't signal overflow and +// underflow. Due to the limited scope these functions tend to be faster +// than the standard-library ones. + +#ifndef GEMMI_ATOX_HPP_ +#define GEMMI_ATOX_HPP_ + +#include +#include // for invalid_argument +#include + +namespace gemmi { + +// equivalent of std::isspace for C locale (no handling of EOF) +inline bool is_space(char c) { + static const std::uint8_t table[256] = { // 1 for 9-13 and 32 + 0,0,0,0,0,0,0,0, 0,1,1,1,1,1,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, + 1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0 + }; + return table[(std::uint8_t)c] != 0; +} + +// equivalent of std::isblank for C locale (no handling of EOF) +inline bool is_blank(char c) { + return c == ' ' || c == '\t'; +} + +// equivalent of std::isdigit for C locale (no handling of EOF) +inline bool is_digit(char c) { + return c >= '0' && c <= '9'; +} + +inline const char* skip_blank(const char* p) { + if (p) + while (is_blank(*p)) + ++p; + return p; +} + +inline const char* skip_word(const char* p) { + if (p) + while (*p != '\0' && !is_space(*p)) + ++p; + return p; +} + +inline std::string read_word(const char* line) { + line = skip_blank(line); + return std::string(line, skip_word(line)); +} + +inline std::string read_word(const char* line, const char** endptr) { + line = skip_blank(line); + *endptr = skip_word(line); + return std::string(line, *endptr); +} + +// no checking for overflow +inline int string_to_int(const char* p, bool checked, size_t length=0) { + int mult = -1; + int n = 0; + size_t i = 0; + while ((length == 0 || i < length) && is_space(p[i])) + ++i; + if (p[i] == '-') { + mult = 1; + ++i; + } else if (p[i] == '+') { + ++i; + } + bool has_digits = false; + // use negative numbers because INT_MIN < -INT_MAX + for (; (length == 0 || i < length) && is_digit(p[i]); ++i) { + n = n * 10 - (p[i] - '0'); + has_digits = true; + } + if (checked) { + while ((length == 0 || i < length) && is_space(p[i])) + ++i; + if (!has_digits || p[i] != '\0') + throw std::invalid_argument("not an integer: " + + std::string(p, length ? length : i+1)); + } + return mult * n; +} + +inline int string_to_int(const std::string& str, bool checked) { + return string_to_int(str.c_str(), checked); +} + +inline int simple_atoi(const char* p, const char** endptr=nullptr) { + int mult = -1; + int n = 0; + while (is_space(*p)) + ++p; + if (*p == '-') { + mult = 1; + ++p; + } else if (*p == '+') { + ++p; + } + for (; is_digit(*p); ++p) + n = n * 10 - (*p - '0'); // use negative numbers because INT_MIN < -INT_MAX + if (endptr) + *endptr = p; + return mult * n; +} + +inline int no_sign_atoi(const char* p, const char** endptr=nullptr) { + int n = 0; + while (is_space(*p)) + ++p; + for (; is_digit(*p); ++p) + n = n * 10 + (*p - '0'); + if (endptr) + *endptr = p; + return n; +} + +} // namespace gemmi +#endif diff --git a/symmetry/gemmi/cellred.hpp b/gemmi_gph/gemmi/cellred.hpp similarity index 100% rename from symmetry/gemmi/cellred.hpp rename to gemmi_gph/gemmi/cellred.hpp diff --git a/symmetry/gemmi/fail.hpp b/gemmi_gph/gemmi/fail.hpp similarity index 100% rename from symmetry/gemmi/fail.hpp rename to gemmi_gph/gemmi/fail.hpp diff --git a/gemmi_gph/gemmi/fileutil.hpp b/gemmi_gph/gemmi/fileutil.hpp new file mode 100644 index 00000000..bd01fed4 --- /dev/null +++ b/gemmi_gph/gemmi/fileutil.hpp @@ -0,0 +1,173 @@ +// Copyright 2018 Global Phasing Ltd. +// +// File-related utilities. + +#ifndef GEMMI_FILEUTIL_HPP_ +#define GEMMI_FILEUTIL_HPP_ + +#include +#include // for FILE, fopen, fclose +#include +#include // for malloc, realloc +#include // for strlen +#include +#include // for unique_ptr +#include "fail.hpp" // for sys_fail + +#if defined(_WIN32) && !defined(GEMMI_USE_FOPEN) +#include "utf.hpp" +#endif + +namespace gemmi { + +// strip directory and suffixes from filename +inline std::string path_basename(const std::string& path, + std::initializer_list exts) { + size_t pos = path.find_last_of("\\/"); + std::string basename = pos == std::string::npos ? path : path.substr(pos + 1); + for (const char* ext : exts) { + size_t len = std::strlen(ext); + if (basename.size() > len && + basename.compare(basename.length() - len, len, ext, len) == 0) + basename.resize(basename.length() - len); + } + return basename; +} + +// file operations + +/// deleter for fileptr_t +struct needs_fclose { + bool use_fclose; + void operator()(std::FILE* f) const noexcept { + if (use_fclose) + std::fclose(f); + } +}; + +typedef std::unique_ptr fileptr_t; + +inline fileptr_t file_open(const char* path, const char* mode) { + std::FILE* file; +#if defined(_WIN32) && !defined(GEMMI_USE_FOPEN) + std::wstring wpath = UTF8_to_wchar(path); + std::wstring wmode = UTF8_to_wchar(mode); + if ((file = ::_wfopen(wpath.c_str(), wmode.c_str())) == nullptr) +#else + if ((file = std::fopen(path, mode)) == nullptr) +#endif + sys_fail(std::string("Failed to open ") + path + + (*mode == 'w' ? " for writing" : "")); + return fileptr_t(file, needs_fclose{true}); +} + +// helper function for treating "-" as stdin or stdout +inline fileptr_t file_open_or(const char* path, const char* mode, + std::FILE* dash_stream) { + if (path[0] == '-' && path[1] == '\0') + return fileptr_t(dash_stream, needs_fclose{false}); + return file_open(path, mode); +} + +inline std::size_t file_size(std::FILE* f, const std::string& path) { + if (std::fseek(f, 0, SEEK_END) != 0) + sys_fail(path + ": fseek failed"); + long length = std::ftell(f); + if (length < 0) + sys_fail(path + ": ftell failed"); + if (std::fseek(f, 0, SEEK_SET) != 0) + sys_fail(path + ": fseek failed"); + return length; +} + +// helper function for working with binary files +inline bool is_little_endian() { + std::uint32_t x = 1; + return *reinterpret_cast(&x) == 1; +} + +inline void swap_two_bytes(void* start) { + char* bytes = static_cast(start); + std::swap(bytes[0], bytes[1]); +} + +inline void swap_four_bytes(void* start) { + char* bytes = static_cast(start); + std::swap(bytes[0], bytes[3]); + std::swap(bytes[1], bytes[2]); +} + +inline void swap_eight_bytes(void* start) { + char* bytes = static_cast(start); + std::swap(bytes[0], bytes[7]); + std::swap(bytes[1], bytes[6]); + std::swap(bytes[2], bytes[5]); + std::swap(bytes[3], bytes[4]); +} + + +class CharArray { + std::unique_ptr ptr_; + size_t size_; +public: + CharArray() : ptr_(nullptr, &std::free), size_(0) {} + explicit CharArray(size_t n) : ptr_((char*)std::malloc(n), &std::free), size_(n) {} + explicit operator bool() const { return (bool)ptr_; } + char* data() { return ptr_.get(); } + const char* data() const { return ptr_.get(); } + size_t size() const { return size_; } + void set_size(size_t n) { size_ = n; } + + void resize(size_t n) { + char* new_ptr = (char*) std::realloc(ptr_.get(), n); + if (!new_ptr && n != 0) + fail("Out of memory."); + (void) ptr_.release(); // NOLINT(bugprone-unused-return-value) + ptr_.reset(new_ptr); + size_ = n; + } + + // Remove first n bytes making space for more text at the returned position. + char* roll(size_t n) { + assert(n <= size()); + std::memmove(data(), data() + n, n); + return data() + n; + } +}; + + +/// reading file into a memory buffer (optimized: uses fseek to determine file size) +inline CharArray read_file_into_buffer(const std::string& path) { + fileptr_t f = file_open(path.c_str(), "rb"); + size_t size = file_size(f.get(), path); + CharArray buffer(size); + if (std::fread(buffer.data(), size, 1, f.get()) != 1) + sys_fail(path + ": fread failed"); + return buffer; +} + +inline CharArray read_stdin_into_buffer() { + size_t n = 0; + CharArray buffer(16 * 1024); + for (;;) { + n += std::fread(buffer.data() + n, 1, buffer.size() - n, stdin); + if (n != buffer.size()) { + buffer.set_size(n); + break; + } + buffer.resize(2*n); + } + return buffer; +} + +template +inline CharArray read_into_buffer(T&& input) { + if (input.is_compressed()) + return input.uncompress_into_buffer(); + if (input.is_stdin()) + return read_stdin_into_buffer(); + return read_file_into_buffer(input.path()); +} + +} // namespace gemmi +#endif diff --git a/gemmi_gph/gemmi/gz.hpp b/gemmi_gph/gemmi/gz.hpp new file mode 100644 index 00000000..b4edc016 --- /dev/null +++ b/gemmi_gph/gemmi/gz.hpp @@ -0,0 +1,52 @@ +// Copyright 2017 Global Phasing Ltd. +// +// Functions for transparent reading of gzipped files. Uses zlib. + +#ifndef GEMMI_GZ_HPP_ +#define GEMMI_GZ_HPP_ +#include +#include "fail.hpp" // GEMMI_DLL +#include "input.hpp" // BasicInput +#include "util.hpp" // iends_with + +namespace gemmi { + +GEMMI_DLL extern const char* const zlib_description; + +GEMMI_DLL size_t estimate_uncompressed_size(const std::string& path); + +// the same interface as FileStream and MemoryStream +struct GEMMI_DLL GzStream final : public AnyStream { + GzStream(void* f_) : f(f_) {} + char* gets(char* line, int size) override; + int getc() override; + bool read(void* buf, size_t len) override; + bool skip(size_t n) override; + long tell() override; + std::string read_rest() override; + +private: + void* f; // implementation detail +}; + +class GEMMI_DLL MaybeGzipped : public BasicInput { +public: + explicit MaybeGzipped(const std::string& path); + ~MaybeGzipped(); + size_t gzread_checked(void* buf, size_t len); + bool is_compressed() const { return iends_with(path(), ".gz"); } + std::string basepath() const { + return is_compressed() ? path().substr(0, path().size() - 3) : path(); + } + + CharArray uncompress_into_buffer(size_t limit=0); + + std::unique_ptr create_stream(); + +private: + void* file_ = nullptr; +}; + +} // namespace gemmi + +#endif diff --git a/gemmi_gph/gemmi/input.hpp b/gemmi_gph/gemmi/input.hpp new file mode 100644 index 00000000..2bf505f7 --- /dev/null +++ b/gemmi_gph/gemmi/input.hpp @@ -0,0 +1,168 @@ +// Copyright 2018 Global Phasing Ltd. +// +// Input abstraction. +// Used to decouple file reading and decompression. + +#ifndef GEMMI_INPUT_HPP_ +#define GEMMI_INPUT_HPP_ + +#include // for ptrdiff_t +#include // for FILE, fseek, fread +#include // for memchr +#include +#include "fileutil.hpp" // for fileptr_t + +namespace gemmi { + +// base class for FileStream, MemoryStream and GzStream +struct AnyStream { + virtual ~AnyStream() = default; + + virtual char* gets(char* line, int size) = 0; // for pdb, copy_line() + virtual int getc() = 0; // for copy_line() + virtual bool read(void* buf, size_t len) = 0; // for ccp4, mtz + + // these are not used in GzStream because MemoryStream is used for mtz + virtual long tell() = 0; // temporary, for testing + virtual bool skip(size_t n) = 0; // for reading mtz without data + virtual std::string read_rest() { return {}; } // for mtz (appendix) + + size_t copy_line(char* line, int size) { // for pdb, xds_ascii + if (!gets(line, size)) + return 0; + size_t len = std::strlen(line); + // If a line is longer than size we discard the rest of it. + if (len > 0 && line[len-1] != '\n') + for (int c = getc(); c > 0 /* not 0 nor EOF */ && c != '\n'; c = getc()) + continue; + return len; + }; +}; + +struct FileStream final : public AnyStream { + FileStream(std::FILE* f_) : f(f_, needs_fclose{false}) {} + FileStream(const char* path, const char* mode) : f(file_open_or(path, mode, stdin)) {} + + char* gets(char* line, int size) override { return std::fgets(line, size, f.get()); } + int getc() override { return std::fgetc(f.get()); } + bool read(void* buf, size_t len) override { return std::fread(buf, len, 1, f.get()) == 1; } + + std::string read_rest() override { + std::string ret; + int c = std::fgetc(f.get()); + if (c != EOF) { + ret += (char)c; + char buf[512]; + for (;;) { + size_t n = std::fread(buf, 1, sizeof(buf), f.get()); + ret.append(buf, n); + if (n != sizeof(buf)) + break; + } + } + return ret; + } + + long tell() override { + return std::ftell(f.get()); + } + + bool skip(size_t n) override { +#if defined(_MSC_VER) + int result = _fseeki64(f.get(), (std::ptrdiff_t)n, SEEK_CUR); +#elif defined(__MINGW32__) + int result = fseeko(f.get(), (_off_t)n, SEEK_CUR); +#else + int result = std::fseek(f.get(), (long)n, SEEK_CUR); +#endif + if (result != 0) { + char buf[512]; + while (n >= sizeof(buf)) { + if (std::fread(buf, sizeof(buf), 1, f.get()) != 1) + return false; + n -= sizeof(buf); + } + if (n > 0 && std::fread(buf, n, 1, f.get()) != 1) + return false; + } + return true; + } + +private: + fileptr_t f; +}; + +struct MemoryStream final : public AnyStream { + MemoryStream(const char* start_, size_t size) + : start(start_), end(start_ + size), cur(start_) {} + + char* gets(char* line, int size) override { + --size; // fgets reads in at most one less than size characters + if (cur >= end) + return nullptr; + if (size > end - cur) + size = int(end - cur); + const char* nl = (const char*) std::memchr(cur, '\n', size); + size_t len = nl ? nl - cur + 1 : size; + std::memcpy(line, cur, len); + line[len] = '\0'; + cur += len; + return line; + } + int getc() override { return cur < end ? *cur++ : EOF; } + + bool read(void* buf, size_t len) override { + if (cur + len > end) + return false; + std::memcpy(buf, cur, len); + cur += len; + return true; + } + + std::string read_rest() override { + const char* last = cur; + cur = end; + return std::string(last, end); + } + + long tell() override { + return cur - start; + } + bool skip(size_t n) override { + cur += n; + return cur < end; + } + +private: + const char* const start; + const char* const end; + const char* cur; +}; + +class BasicInput { +public: + explicit BasicInput(const std::string& path) : path_(path) {} + + const std::string& path() const { return path_; } + const std::string& basepath() const { return path_; } + + // Does the path stands for stdin? + // Each reading function needs to call it (some functions use stdin + // and some std::cin, so we don't try to unify it here). + bool is_stdin() const { return path() == "-"; } + + // providing the same interface as MaybeGzipped + bool is_compressed() const { return false; } + // for reading (uncompressing into memory) the whole file at once + CharArray uncompress_into_buffer(size_t=0) { return {}; } + + std::unique_ptr create_stream() { + return std::unique_ptr(new FileStream(path().c_str(), "rb")); + } + +private: + std::string path_; +}; + +} // namespace gemmi +#endif diff --git a/gemmi_gph/gemmi/iterator.hpp b/gemmi_gph/gemmi/iterator.hpp new file mode 100644 index 00000000..824472f9 --- /dev/null +++ b/gemmi_gph/gemmi/iterator.hpp @@ -0,0 +1,287 @@ +// Copyright 2018 Global Phasing Ltd. +// +// Bidirectional iterators (over elements of any container) that can filter, +// uniquify, group, or iterate with a stride. + +#ifndef GEMMI_ITERATOR_HPP_ +#define GEMMI_ITERATOR_HPP_ +#include // for bidirectional_iterator_tag +#include // for remove_cv +#include + +namespace gemmi { + +// Disable warning "X::operator X() const will not be called for +// implicit or explicit conversions", which is triggered when templates +// StrideIter, IndirectIter and others are expanded with const Value. +#if defined(__INTEL_COMPILER) || defined(__NVCOMPILER) + #pragma diagnostic push + #pragma diag_suppress = conversion_function_not_usable +#elif defined(__NVCC__) + #pragma nv_diagnostic push + #pragma nv_diag_suppress = conversion_function_not_usable +#endif + +// implements concept BidirectionalIterator +template +struct BidirIterator : Policy { + using value_type = typename std::remove_cv::type; + using difference_type = std::ptrdiff_t; + using pointer = typename Policy::value_type*; + using reference = typename Policy::reference; + using iterator_category = std::bidirectional_iterator_tag; + + BidirIterator() = default; + BidirIterator(Policy&& p) : Policy(p) {} + + BidirIterator& operator++() { Policy::increment(); return *this; } + BidirIterator operator++(int) { BidirIterator x = *this; ++*this; return x; } + BidirIterator& operator--() { Policy::decrement(); return *this; } + BidirIterator operator--(int) { BidirIterator x = *this; --*this; return x; } + bool operator==(const BidirIterator &o) const { return Policy::equal(o); } + bool operator!=(const BidirIterator &o) const { return !Policy::equal(o); } + reference operator*() { return Policy::dereference(); } + pointer operator->() { return &Policy::dereference(); } + using const_variant = BidirIterator; + operator const_variant() const { + return const_variant(static_cast(*this)); + } +}; + +template +class StrideIterPolicy { +public: + using value_type = Value; + using reference = Value&; + StrideIterPolicy() : cur_(nullptr), offset_(0), stride_(0) {} + StrideIterPolicy(Value* ptr, std::size_t offset, size_t stride) + : cur_(ptr), offset_(offset), stride_((unsigned)stride) {} + void increment() { cur_ += stride_; } + void decrement() { cur_ -= stride_; } + bool equal(const StrideIterPolicy& o) const { return cur_ == o.cur_; } + Value& dereference() { return cur_[offset_]; } + using const_policy = StrideIterPolicy; + operator const_policy() const { return const_policy(cur_, offset_, stride_); } +private: + Value* cur_; + std::size_t offset_; + unsigned stride_; +}; +template +using StrideIter = BidirIterator>; + + +template +class IndirectIterPolicy { +public: + using value_type = Value; + using reference = Value&; + IndirectIterPolicy() : redir_(nullptr) {} + IndirectIterPolicy(Redirect* redir, std::vector::const_iterator cur) + : redir_(redir), cur_(cur) {} + void increment() { ++cur_; } + void decrement() { --cur_; } + bool equal(const IndirectIterPolicy& o) const { return cur_ == o.cur_; } + Value& dereference() { return redir_->value_at(*cur_); } + using const_policy = IndirectIterPolicy; + operator const_policy() const { return const_policy(redir_, cur_); } + // TODO: what should be done with absent optional tags (*cur_ < 0)? +private: + Redirect* redir_; + std::vector::const_iterator cur_; // points into positions +}; +template +using IndirectIter = BidirIterator>; + + +template +class UniqIterPolicy { +public: + using value_type = Value; + using reference = Value&; + UniqIterPolicy() : vec_(nullptr), pos_(0) {} + UniqIterPolicy(Vector* vec, std::size_t pos) : vec_(vec), pos_(pos) {} + void increment() { + // move to the first element of the next group + const auto& key = (*vec_)[pos_].group_key(); + ++pos_; + while (pos_ != vec_->size() && (*vec_)[pos_].group_key() == key) + ++pos_; + } + void decrement() { + --pos_; // now we are at the last element of the previous group + const auto& key = (*vec_)[pos_].group_key(); + while (pos_ != 0 && (*vec_)[pos_-1].group_key() == key) + --pos_; // move to the group beginning + } + bool equal(const UniqIterPolicy& o) const { return pos_ == o.pos_; } + Value& dereference() { return (*vec_)[pos_]; } + using const_policy = UniqIterPolicy; + operator const_policy() const { return const_policy(vec_, pos_); } +private: + Vector* vec_; + std::size_t pos_; +}; +template +using UniqIter = BidirIterator>; + +template> +struct UniqProxy { + Vector& vec; + using iterator = UniqIter; + iterator begin() { return {{&vec, 0}}; } + iterator end() { return {{&vec, vec.size()}}; } +}; +template> +struct ConstUniqProxy { + const Vector& vec; + using iterator = UniqIter; + iterator begin() const { return {{&vec, 0}}; } + iterator end() const { return {{&vec, vec.size()}}; } +}; + + +template +class GroupingIterPolicy { +public: + using value_type = Value; + using reference = Value&; + GroupingIterPolicy() = default; + GroupingIterPolicy(const Value& span) : span_(span) {} + void increment() { + span_.set_begin(span_.end()); + span_.set_size(0); + while (!span_.is_ending() && + span_.begin()->group_key() == span_.end()->group_key()) + span_.set_size(span_.size() + 1); + } + void decrement() { + span_.set_begin(span_.begin() - 1); + span_.set_size(1); + while (!span_.is_beginning() && + span_.begin()->group_key() == (span_.begin() - 1)->group_key()) { + span_.set_begin(span_.begin() - 1); + span_.set_size(span_.size() + 1); + } + } + bool equal(const GroupingIterPolicy& o) const { + return span_.begin() == o.span_.begin(); + } + Value& dereference() { return span_; } + using const_policy = GroupingIterPolicy; + operator const_policy() const { return const_policy(span_); } +private: + Value span_; +}; +template +using GroupingIter = BidirIterator>; + + +template +class FilterIterPolicy { +public: + using value_type = Value; + using reference = Value&; + FilterIterPolicy() : vec_(nullptr), pos_(0) {} + FilterIterPolicy(const Filter* filter, Vector* vec, std::size_t pos) + : filter_(filter), vec_(vec), pos_(pos) { + while (pos_ != vec_->size() && !matches(pos_)) + ++pos_; + } + bool matches(std::size_t p) const { return filter_->matches((*vec_)[p]); } + void increment() { while (++pos_ < vec_->size() && !matches(pos_)) {} } + void decrement() { while (pos_ != 0 && !matches(--pos_)) {} } + bool equal(const FilterIterPolicy& o) const { return pos_ == o.pos_; } + Value& dereference() { return (*vec_)[pos_]; } + using const_policy = FilterIterPolicy; + operator const_policy() const { return const_policy(vec_, pos_); } +private: + const Filter* filter_; + Vector* vec_; + std::size_t pos_; +}; +template +using FilterIter = BidirIterator>; + +template +struct FilterProxy { + const Filter& filter; + std::vector& vec; + using iterator = FilterIter, Value>; + iterator begin() { return {{&filter, &vec, 0}}; } + iterator end() { return {{&filter, &vec, vec.size()}}; } +}; + +template +struct ConstFilterProxy { + const Filter& filter; + const std::vector& vec; + using iterator = FilterIter, const Value>; + iterator begin() const { return {{&filter, &vec, 0}}; } + iterator end() const { return {{&filter, &vec, vec.size()}}; } +}; + + +template +struct ItemGroup { + using element_type = Item; + + ItemGroup(Item* start, const Item* end) + : size_(int(end - start)), extent_(int(end - start)), start_(start) { + for (const Item* i = start + 1; i != end; ++i) + if (i->group_key() != start->group_key()) + --size_; + } + + struct iterator { + Item* ptr; + const Item* end; + bool operator==(const iterator& o) const { return ptr == o.ptr; } + bool operator!=(const iterator& o) const { return ptr != o.ptr; } + iterator& operator++() { + const Item* prev = ptr++; + while (ptr != end && ptr->group_key() != prev->group_key()) + ++ptr; + return *this; + } + Item& operator*() { return *ptr; } + Item* operator->() { return ptr; } + }; + iterator begin() { return iterator{start_, start_+extent_}; } + iterator end() { return iterator{start_+extent_, start_+extent_}; } + + size_t size() const { return (size_t) size_; } + int extent() const { return extent_; } + bool empty() const { return size_ == 0; } + Item& front() { return *start_; } + const Item& front() const { return *start_; } + Item& back() { return start_[extent_ - 1]; } + const Item& back() const { return start_[extent_ - 1]; } + + // constant time unless sparse (extend_ > size_) + Item& operator[](std::size_t i) { + if (size_ == extent_ || i == 0) + return start_[i]; + for (Item* ptr = start_ + 1; ; ++ptr) + if (ptr->group_key() == start_->group_key()) + if (--i == 0) + return *ptr; + } + const Item& operator[](std::size_t i) const { + return const_cast(this)->operator[](i); + } + +private: + int size_ = 0; + int extent_ = 0; + Item* start_ = nullptr; +}; + +#if defined(__INTEL_COMPILER) || defined(__NVCOMPILER) + #pragma diagnostic pop +#elif defined(__NVCC__) + #pragma nv_diagnostic pop +#endif + +} // namespace gemmi +#endif diff --git a/gemmi_gph/gemmi/logger.hpp b/gemmi_gph/gemmi/logger.hpp new file mode 100644 index 00000000..35fafd17 --- /dev/null +++ b/gemmi_gph/gemmi/logger.hpp @@ -0,0 +1,71 @@ +// Copyright Global Phasing Ltd. +// +// Logger - a tiny utility for passing messages through a callback. + +#ifndef GEMMI_LOGGER_HPP_ +#define GEMMI_LOGGER_HPP_ + +#include // for fprintf +#include // for function +#include "fail.hpp" // for GEMMI_COLD +#include "util.hpp" // for cat + +namespace gemmi { + +/// Passes messages (including warnings/errors) to a callback function. +/// Messages are passed as strings without a trailing newline. +/// They have syslog-like severity levels: 8=debug, 6=info, 5=notice, 3=error, +/// allowing the use of a threshold to filter them. +/// Quirk: Errors double as both errors and warnings. Unrecoverable errors +/// don't go through this class; Logger only handles errors that can +/// be downgraded to warnings. If a callback is set, the error is passed +/// as a warning message. Otherwise, it's thrown as std::runtime_error. +struct Logger { + /// A function that handles messages. + std::function callback; + /// Pass messages of this level and all lower (more severe) levels: + /// 8=all, 6=all but debug, 5=notes and warnings, 3=warnings, 0=none + int threshold = 6; + + /// suspend() and resume() are used internally to avoid duplicate messages + /// when the same function is called (internally) multiple times. + void suspend() { threshold -= 100; } + void resume() { threshold += 100; } + + /// Send a message without any prefix on with a numeric threshold N. + template void level(Args const&... args) const { + if (threshold >= N && callback) + callback(cat(args...)); + } + + /// Send a debug message. + template void debug(Args const&... args) const { level<8>("Debug: ", args...); } + /// Send a message without any prefix. + template void mesg(Args const&... args) const { level<6>(args...); } + /// Send a note (a notice, a significant message). + template void note(Args const&... args) const { level<5>("Note: ", args...); } + + /// Send a warning/error (see Quirk above). + template GEMMI_COLD void err(Args const&... args) const { + if (threshold >= 3) { + std::string msg = cat(args...); + if (callback == nullptr) + fail(msg); + callback("Warning: " + msg); + } + } + + // predefined callbacks + + /// to be used as: logger.callback = Logger::to_stderr; + static void to_stderr(const std::string& s) { + std::fprintf(stderr, "%s\n", s.c_str()); + } + /// to be used as: logger.callback = Logger::to_stdout; + static void to_stdout(const std::string& s) { + std::fprintf(stdout, "%s\n", s.c_str()); + } +}; + +} // namespace gemmi +#endif diff --git a/symmetry/gemmi/math.hpp b/gemmi_gph/gemmi/math.hpp similarity index 100% rename from symmetry/gemmi/math.hpp rename to gemmi_gph/gemmi/math.hpp diff --git a/gemmi_gph/gemmi/mtz.hpp b/gemmi_gph/gemmi/mtz.hpp new file mode 100644 index 00000000..c7cf4431 --- /dev/null +++ b/gemmi_gph/gemmi/mtz.hpp @@ -0,0 +1,600 @@ +// Copyright 2019 Global Phasing Ltd. +// +// MTZ reflection file format. + +#ifndef GEMMI_MTZ_HPP_ +#define GEMMI_MTZ_HPP_ + +#include +#include // for isnan +#include // for int32_t +#include // for copy +#include +#include +#include +#include +#include "fail.hpp" // for fail +#include "input.hpp" // for AnyStream, FileStream, CharArray +#include "iterator.hpp" // for StrideIter +#include "logger.hpp" // for Logger +#include "math.hpp" // for rad, Mat33 +#include "symmetry.hpp" // for find_spacegroup_by_name, SpaceGroup +#include "unitcell.hpp" // for UnitCell +#include "util.hpp" // for ialpha4_id, rtrim_str, ialpha3_id, ... + +namespace gemmi { + +// Unmerged MTZ files always store in-asu hkl indices and symmetry operation +// encoded in the M/ISYM column. Here is a helper for writing such files. +struct UnmergedHklMover { + UnmergedHklMover(const SpaceGroup* spacegroup) : asu_(spacegroup) { + if (spacegroup) + group_ops_ = spacegroup->operations(); + } + + // Modifies hkl and returns ISYM value for M/ISYM + int move_to_asu(std::array& hkl) { + std::pair hkl_isym = asu_.to_asu(hkl, group_ops_); + hkl = hkl_isym.first; + return hkl_isym.second; + } + +private: + ReciprocalAsu asu_; + GroupOps group_ops_; +}; + +struct MtzMetadata { + std::string source_path; // input file path, if known + bool same_byte_order = true; + bool indices_switched_to_original = false; + std::int64_t header_offset = 0; + std::string version_stamp; + std::string title; + int nreflections = 0; + std::array sort_order = {}; + double min_1_d2 = NAN; + double max_1_d2 = NAN; + float valm = NAN; + int nsymop = 0; + UnitCell cell; + int spacegroup_number = 0; + std::string spacegroup_name; + std::vector symops; + const SpaceGroup* spacegroup = nullptr; + std::vector history; + std::string appended_text; + // used to report non-critical problems when reading a file (also used in mtz2cif) + Logger logger; +}; + +struct GEMMI_DLL Mtz : public MtzMetadata { + struct Dataset { + int id; + std::string project_name; + std::string crystal_name; + std::string dataset_name; + UnitCell cell; + double wavelength; // 0 means not set + }; + + struct Column { + int dataset_id; + char type; + std::string label; + float min_value = NAN; + float max_value = NAN; + std::string source; // from COLSRC + Mtz* parent; + std::size_t idx; + + Dataset& dataset() { return parent->dataset(dataset_id); } + const Dataset& dataset() const { return parent->dataset(dataset_id); } + bool has_data() const { return parent->has_data(); } + int size() const { return has_data() ? parent->nreflections : 0; } + size_t stride() const { return parent->columns.size(); } + float& operator[](std::size_t n) { return parent->data[idx + n * stride()]; } + float operator[](std::size_t n) const { return parent->data[idx + n * stride()]; } + float& at(std::size_t n) { return parent->data.at(idx + n * stride()); } + float at(std::size_t n) const { return parent->data.at(idx + n * stride()); } + bool is_integer() const { + return type == 'H' || type == 'B' || type == 'Y' || type == 'I'; + } + + const Column* get_next_column_if_type(char next_type) const { + if (idx + 1 < parent->columns.size()) { + const Column& next_col = parent->columns[idx + 1]; + if (next_col.dataset_id == dataset_id && next_col.type == next_type) + return &next_col; + } + return nullptr; + } + + using iterator = StrideIter; + iterator begin() { + assert(parent); + assert(&parent->columns[idx] == this); + return iterator({parent->data.data(), idx, stride()}); + } + iterator end() { + return iterator({parent->data.data() + parent->data.size(), idx, + stride()}); + } + using const_iterator = StrideIter; + const_iterator begin() const { return const_cast(this)->begin(); } + const_iterator end() const { return const_cast(this)->end(); } + }; + + struct Batch { + Batch() { + ints.resize(29, 0); + floats.resize(156, 0.); + // write the same values that are written by CCP4 progs such as COMBAT + ints[0] = 29 + 156; + ints[1] = 29; + ints[2] = 156; + // COMBAT sets BSCALE=1, but Pointless sets it to 0. + //floats[43] = 1.f; // batch scale + } + int number = 0; + std::string title; + std::vector ints; + std::vector floats; + std::vector axes; + + UnitCell get_cell() const { + return UnitCell(floats[0], floats[1], floats[2], + floats[3], floats[4], floats[5]); + } + void set_cell(const UnitCell& uc) { + floats[0] = (float) uc.a; + floats[1] = (float) uc.b; + floats[2] = (float) uc.c; + floats[3] = (float) uc.alpha; + floats[4] = (float) uc.beta; + floats[5] = (float) uc.gamma; + } + + int dataset_id() const { return ints[20]; } + void set_dataset_id(int id) { ints[20] = id; } + float wavelength() const { return floats[86]; } + void set_wavelength(float lambda) { floats[86] = lambda; } + float phi_start() const { return floats[36]; } + float phi_end() const { return floats[37]; } + Mat33 matrix_U() const { + return Mat33(floats[6], floats[9], floats[12], + floats[7], floats[10], floats[13], + floats[8], floats[11], floats[14]); + } + }; + + std::vector datasets; + std::vector columns; + std::vector batches; + std::vector data; + + explicit Mtz(bool with_base=false) { + if (with_base) + add_base(); + } + Mtz(Mtz&& o) noexcept { *this = std::move(o); } + Mtz& operator=(Mtz&& o) noexcept { + MtzMetadata::operator=(std::move(o)); + datasets = std::move(o.datasets); + columns = std::move(o.columns); + batches = std::move(o.batches); + data = std::move(o.data); + for (Mtz::Column& col : columns) + col.parent = this; + return *this; + } + + // explicit to be aware where we make copies + explicit Mtz(const Mtz& o) : MtzMetadata(o) { + datasets = o.datasets; + columns = o.columns; + batches = o.batches; + data = o.data; + for (Mtz::Column& col : columns) + col.parent = this; + } + + Mtz& operator=(Mtz const&) = delete; + + void add_base() { + datasets.push_back({0, "HKL_base", "HKL_base", "HKL_base", cell, 0.}); + for (int i = 0; i != 3; ++i) + add_column(std::string(1, "HKL"[i]), 'H', 0, i, false); + } + + // Functions to use after MTZ headers (and data) is read. + + double resolution_high() const { return std::sqrt(1.0 / max_1_d2); } + double resolution_low() const { return std::sqrt(1.0 / min_1_d2); } + + UnitCell& get_cell(int dataset=-1) { + for (Dataset& ds : datasets) + if (ds.id == dataset && ds.cell.is_crystal() && ds.cell.a > 0) + return ds.cell; + return cell; + } + + const UnitCell& get_cell(int dataset=-1) const { + return const_cast(this)->get_cell(dataset); + } + + void set_cell_for_all(const UnitCell& new_cell) { + cell = new_cell; + cell.set_cell_images_from_spacegroup(spacegroup); // probably not needed + for (Dataset& ds : datasets) + ds.cell = cell; + } + + UnitCellParameters get_average_cell_from_batch_headers(double* rmsd) const; + + void set_spacegroup(const SpaceGroup* new_sg) { + spacegroup = new_sg; + spacegroup_number = new_sg ? spacegroup->ccp4 : 0; + spacegroup_name = new_sg ? spacegroup->hm : ""; + } + + Dataset& last_dataset() { + if (datasets.empty()) + fail("MTZ dataset not found (missing DATASET header line?)."); + return datasets.back(); + } + + Dataset& dataset(int id) { + if ((size_t)id < datasets.size() && datasets[id].id == id) + return datasets[id]; + for (Dataset& d : datasets) + if (d.id == id) + return d; + fail("MTZ file has no dataset with ID " + std::to_string(id)); + } + const Dataset& dataset(int id) const { + return const_cast(this)->dataset(id); + } + + Dataset* dataset_with_name(const std::string& name) { + for (Dataset& d : datasets) + if (d.dataset_name == name) + return &d; + return nullptr; + } + const Dataset* dataset_with_name(const std::string& label) const { + return const_cast(this)->dataset_with_name(label); + } + + int count(const std::string& label) const { + int n = 0; + for (const Column& col : columns) + if (col.label == label) + ++n; + return n; + } + + int count_type(char type) const { + int n = 0; + for (const Column& col : columns) + if (col.type == type) + ++n; + return n; + } + + Column* column_with_label(const std::string& label, const Dataset* ds=nullptr, char type='*') { + for (Column& col : columns) + if (col.label == label && (!ds || ds->id == col.dataset_id) + && (type == '*' || type == col.type)) + return &col; + return nullptr; + } + const Column* column_with_label(const std::string& label, const Dataset* ds=nullptr, + char type='*') const { + return const_cast(this)->column_with_label(label, ds, type); + } + + const Column& get_column_with_label(const std::string& label, const Dataset* ds=nullptr) const { + if (const Column* col = column_with_label(label, ds)) + return *col; + fail("Column label not found: " + label); + } + + std::vector columns_with_type(char type) const { + std::vector cols; + for (const Column& col : columns) + if (col.type == type) + cols.push_back(&col); + return cols; + } + + std::vector positions_of_columns_with_type(char col_type) const { + std::vector cols; + for (int i = 0; i < (int) columns.size(); ++i) + if (columns[i].type == col_type) + cols.push_back(i); + return cols; + } + + // F(+)/(-) pairs should have type G (and L for sigma), + // I(+)/(-) -- K (M for sigma), but E(+)/(-) has no special column type, + // so here we use column labels not types. + std::vector> positions_of_plus_minus_columns() const { + std::vector> r; + for (int i = 0; i < (int) columns.size(); ++i) { + const Column& col = columns[i]; + size_t sign_pos = col.label.find("(+)"); + if (sign_pos != std::string::npos) { + std::string minus_label = columns[i].label; + minus_label[sign_pos+1] = '-'; + for (int j = 0; j < (int) columns.size(); ++j) + if (columns[j].label == minus_label && + columns[j].type == col.type && + columns[j].dataset_id == col.dataset_id) { + r.emplace_back(i, j); + break; + } + } + } + return r; + } + + /// the order of labels matters + const Column* column_with_one_of_labels(std::initializer_list labels, + char type='*') const { + for (const char* label : labels) + if (const Column* col = column_with_label(label, nullptr, type)) + return col; + return nullptr; + } + + /// the order of labels doesn't matter + Column* column_with_type_and_any_of_labels(char type, std::initializer_list labels) { + for (Column& col : columns) + if (col.type == type) { + for (const char* label : labels) + if (col.label == label) + return &col; + } + return nullptr; + } + + Column* rfree_column() { + // cf. MtzToCif::default_spec in mtz2cif.hpp + return column_with_type_and_any_of_labels('I', + {"FREE", "RFREE", "FREER", "FreeR_flag", "R-free-flags", "FreeRflag", "R_FREE_FLAGS"}); + } + const Column* rfree_column() const { + return const_cast(this)->rfree_column(); + } + + Column* imean_column() { + return column_with_type_and_any_of_labels('J', {"IMEAN", "I", "IOBS", "I-obs"}); + } + const Column* imean_column() const { + return const_cast(this)->imean_column(); + } + + Column* iplus_column() { + return column_with_type_and_any_of_labels('K', {"I(+)", "IOBS(+)", "I-obs(+)", "Iplus"}); + } + const Column* iplus_column() const { + return const_cast(this)->iplus_column(); + } + + Column* iminus_column() { + return column_with_type_and_any_of_labels('K', {"I(-)", "IOBS(-)", "I-obs(-)", "Iminus"}); + } + const Column* iminus_column() const { + return const_cast(this)->iminus_column(); + } + + bool has_data() const { + return data.size() == columns.size() * nreflections; + } + + bool is_merged() const { return batches.empty(); } + + /// Calculates min/max for all combinations of reflections and unit cells, + /// where unit cells are a global CELL and per-dataset DCELL. + std::array calculate_min_max_1_d2() const; + + void update_reso() { + std::array reso = calculate_min_max_1_d2(); + min_1_d2 = reso[0]; + max_1_d2 = reso[1]; + } + + // Functions for reading MTZ headers and data. + + void toggle_endianness() { + same_byte_order = !same_byte_order; + swap_eight_bytes(&header_offset); + } + + void read_first_bytes(AnyStream& stream); + + /// read headers until END + void read_main_headers(AnyStream& stream, std::vector* save_headers); + + /// read the part between END and MTZENDOFHEADERS + void read_history_and_batch_headers(AnyStream& stream); + + void setup_spacegroup(); + + void read_raw_data(AnyStream& stream, bool do_read=true); + + void read_all_headers(AnyStream& stream); + + void read_stream(AnyStream& stream, bool with_data); + + void read_file(const std::string& path) { + try { + source_path = path; + FileStream stream(path.c_str(), "rb"); + read_stream(stream, true); + } catch (std::system_error&) { + throw; // system_error::what() includes path, don't add anything + } catch (std::runtime_error& e) { + fail(std::string(e.what()) + ": " + path); + } + } + + template + void read_input(Input&& input, bool with_data) { + source_path = input.path(); + read_stream(*input.create_stream(), with_data); + } + + /// the same as read_input(MaybeGzipped(path), with_data) + void read_file_gz(const std::string& path, bool with_data=true); + + std::vector sorted_row_indices(int use_first=3) const; + bool sort(int use_first=3); + + Miller get_hkl(size_t offset) const { + return {{(int)data[offset], (int)data[offset+1], (int)data[offset+2]}}; + } + void set_hkl(size_t offset, const Miller& hkl) { + for (int i = 0; i != 3; ++i) + data[offset + i] = static_cast(hkl[i]); + } + + /// Returns offset of the first hkl or (size_t)-1. Can be slow. + size_t find_offset_of_hkl(const Miller& hkl, size_t start=0) const; + + /// (for merged MTZ only) change HKL to ASU equivalent, adjust phases, etc + void ensure_asu(bool tnt_asu=false); + + /// Reindex data, usually followed by ensure_asu(). Outputs messages through logger. + void reindex(const Op& op); + + /// Change symmetry to P1 and expand reflections. Does not sort. + /// Similar to command EXPAND in SFTOOLS. + void expand_to_p1(); + + /// (for unmerged MTZ only) change HKL according to M/ISYM + bool switch_to_original_hkl(); + + /// (for unmerged MTZ only) change HKL to ASU equivalent and set ISYM + bool switch_to_asu_hkl(); + + Dataset& add_dataset(const std::string& name) { + int id = 0; + for (const Dataset& d : datasets) + if (d.id >= id) + id = d.id + 1; + datasets.push_back({id, name, name, name, cell, 0.0}); + return datasets.back(); + } + + Column& add_column(const std::string& label, char type, + int dataset_id, int pos, bool expand_data); + + // extra_col are columns right after src_col that are also copied. + Column& replace_column(size_t dest_idx, const Column& src_col, + const std::vector& trailing_cols={}); + + // If dest_idx < 0 - columns are appended at the end + // append new column(s), otherwise overwrite existing ones. + Column& copy_column(int dest_idx, const Column& src_col, + const std::vector& trailing_cols={}); + + void remove_column(size_t idx); + + template + void remove_rows_if(Func condition) { + if (!has_data()) + fail("No data."); + auto out = data.begin(); + size_t width = columns.size(); + for (auto r = data.begin(); r < data.end(); r += width) + if (!condition(&*r)) { + if (r != out) + std::copy(r, r + width, out); + out += width; + } + data.erase(out, data.end()); + nreflections = int(data.size() / width); + } + + void expand_data_rows(size_t added, int pos_=-1) { + size_t old_row_size = columns.size() - added; + if (data.size() != old_row_size * nreflections) + fail("Internal error"); + size_t pos = pos_ == -1 ? old_row_size : (size_t) pos_; + if (pos > old_row_size) + fail("expand_data_rows(): pos out of range"); + vector_insert_columns(data, old_row_size, (size_t)nreflections, added, pos, NAN); + } + + void set_data(const float* new_data, size_t n) { + size_t ncols = columns.size(); + if (n % ncols != 0) + fail("Mtz.set_data(): expected " + std::to_string(ncols) + " columns."); + nreflections = int(n / ncols); + data.assign(new_data, new_data + n); + } + + // Function for writing MTZ file + void write_to_cstream(std::FILE* stream) const; + void write_to_string(std::string& str) const; + void write_to_file(const std::string& path) const; + size_t size_to_write() const; + size_t write_to_buffer(char* buf, size_t maxlen) const; + +private: + template void write_to_stream(Write write) const; +}; + + +inline Mtz read_mtz_file(const std::string& path) { + Mtz mtz; + mtz.read_file(path); + return mtz; +} + +template +Mtz read_mtz(Input&& input, bool with_data) { + Mtz mtz; + mtz.read_input(std::forward(input), with_data); + return mtz; +} + +// Abstraction of data source, cf. ReflnDataProxy. +struct MtzDataProxy { + const Mtz& mtz_; + size_t stride() const { return mtz_.columns.size(); } + size_t size() const { return mtz_.data.size(); } + using num_type = float; + float get_num(size_t n) const { return mtz_.data[n]; } + const UnitCell& unit_cell() const { return mtz_.cell; } + const SpaceGroup* spacegroup() const { return mtz_.spacegroup; } + Miller get_hkl(size_t offset) const { return mtz_.get_hkl(offset); } + + size_t column_index(const std::string& label) const { + if (const Mtz::Column* col = mtz_.column_with_label(label)) + return col->idx; + fail("MTZ file has no column with label: " + label); + } +}; + +// Like above, but here the data is stored outside of the Mtz class +struct MtzExternalDataProxy : MtzDataProxy { + const float* data_; + MtzExternalDataProxy(const Mtz& mtz, const float* data) + : MtzDataProxy{mtz}, data_(data) {} + size_t size() const { return mtz_.columns.size() * mtz_.nreflections; } + float get_num(size_t n) const { return data_[n]; } + Miller get_hkl(size_t offset) const { + return {{(int)data_[offset + 0], + (int)data_[offset + 1], + (int)data_[offset + 2]}}; + } +}; + +inline MtzDataProxy data_proxy(const Mtz& mtz) { return {mtz}; } + +} // namespace gemmi + +#endif diff --git a/gemmi_gph/gemmi/sprintf.hpp b/gemmi_gph/gemmi/sprintf.hpp new file mode 100644 index 00000000..ead2ce0f --- /dev/null +++ b/gemmi_gph/gemmi/sprintf.hpp @@ -0,0 +1,80 @@ +// Copyright 2017 Global Phasing Ltd. +// +// interface to stb_sprintf: snprintf_z, to_str(float|double) + +#ifndef GEMMI_SPRINTF_HPP_ +#define GEMMI_SPRINTF_HPP_ + +#include +#ifdef __has_include +# if __has_include() && !(defined(_MSVC_LANG) && _MSVC_LANG < 201703L) +# include +# endif +#endif + +#if __cpp_lib_to_chars < 201611L +# include // for min +#endif + +#include "fail.hpp" // for GEMMI_DLL + +namespace gemmi { + +// On MinGW format(printf) doesn't support %zu. +#if (defined(__GNUC__) && !defined(__MINGW32__)) || defined(__clang__) +# define GEMMI_ATTRIBUTE_FORMAT(fmt,va) __attribute__((format(printf,fmt,va))) +#else +# define GEMMI_ATTRIBUTE_FORMAT(fmt,va) +#endif +/// stb_snprintf in gemmi namespace - like snprintf, but ignores locale +/// and is always zero-terminated (hence _z). +GEMMI_DLL int snprintf_z(char *buf, int count, char const *fmt, ...) + GEMMI_ATTRIBUTE_FORMAT(3,4); +/// stb_sprintf in gemmi namespace +GEMMI_DLL int sprintf_z(char *buf, char const *fmt, ...) GEMMI_ATTRIBUTE_FORMAT(2,3); + +inline std::string to_str(double d) { + char buf[24]; + int len = sprintf_z(buf, "%.9g", d); + return std::string(buf, len > 0 ? len : 0); +} + +inline std::string to_str(float d) { + char buf[16]; + int len = sprintf_z(buf, "%.6g", d); + return std::string(buf, len > 0 ? len : 0); +} + +template +std::string to_str_prec(double d) { + static_assert(Prec >= 0 && Prec < 7, "unsupported precision"); + char buf[16]; + int len = d > -1e8 && d < 1e8 ? sprintf_z(buf, "%.*f", Prec, d) + : sprintf_z(buf, "%g", d); + return std::string(buf, len > 0 ? len : 0); +} + +/// zero-terminated to_chars() +inline char* to_chars_z(char* first, char* last, int value) { +#if __cpp_lib_to_chars >= 201611L + auto result = std::to_chars(first, last-1, value); + *result.ptr = '\0'; + return result.ptr; +#else + int n = snprintf_z(first, int(last - first), "%d", value); + return std::min(first + n, last - 1); +#endif +} +inline char* to_chars_z(char* first, char* last, size_t value) { +#if __cpp_lib_to_chars >= 201611L + auto result = std::to_chars(first, last-1, value); + *result.ptr = '\0'; + return result.ptr; +#else + int n = snprintf_z(first, int(last - first), "%zu", value); + return std::min(first + n, last - 1); +#endif +} + +} // namespace gemmi +#endif diff --git a/symmetry/gemmi/symmetry.hpp b/gemmi_gph/gemmi/symmetry.hpp similarity index 100% rename from symmetry/gemmi/symmetry.hpp rename to gemmi_gph/gemmi/symmetry.hpp diff --git a/gemmi_gph/gemmi/third_party/fast_float.h b/gemmi_gph/gemmi/third_party/fast_float.h new file mode 100644 index 00000000..10afb792 --- /dev/null +++ b/gemmi_gph/gemmi/third_party/fast_float.h @@ -0,0 +1,4933 @@ +// fast_float by Daniel Lemire +// fast_float by João Paulo Magalhaes +// +// +// with contributions from Eugene Golushkov +// with contributions from Maksim Kita +// with contributions from Marcin Wojdyr +// with contributions from Neal Richardson +// with contributions from Tim Paine +// with contributions from Fabio Pellacini +// with contributions from Lénárd Szolnoki +// with contributions from Jan Pharago +// with contributions from Maya Warrier +// with contributions from Taha Khokhar +// with contributions from Anders Dalvander +// +// +// Licensed under the Apache License, Version 2.0, or the +// MIT License or the Boost License. This file may not be copied, +// modified, or distributed except according to those terms. +// +// MIT License Notice +// +// MIT License +// +// Copyright (c) 2021 The fast_float authors +// +// Permission is hereby granted, free of charge, to any +// person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the +// Software without restriction, including without +// limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of +// the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice +// shall be included in all copies or substantial portions +// of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. +// +// Apache License (Version 2.0) Notice +// +// Copyright 2021 The fast_float authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// +// BOOST License Notice +// +// Boost Software License - Version 1.0 - August 17th, 2003 +// +// Permission is hereby granted, free of charge, to any person or organization +// obtaining a copy of the software and accompanying documentation covered by +// this license (the "Software") to use, reproduce, display, distribute, +// execute, and transmit the Software, and to prepare derivative works of the +// Software, and to permit third-parties to whom the Software is furnished to +// do so, all subject to the following: +// +// The copyright notices in the Software and this entire statement, including +// the above license grant, this restriction and the following disclaimer, +// must be included in all copies of the Software, in whole or in part, and +// all derivative works of the Software, unless such copies or derivative +// works are solely in the form of machine-executable object code generated by +// a source language processor. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. +// + +#ifndef FASTFLOAT_CONSTEXPR_FEATURE_DETECT_H +#define FASTFLOAT_CONSTEXPR_FEATURE_DETECT_H + +#ifdef __has_include +#if __has_include() +#include +#endif +#endif + +// Testing for https://wg21.link/N3652, adopted in C++14 +#if defined(__cpp_constexpr) && __cpp_constexpr >= 201304 +#define FASTFLOAT_CONSTEXPR14 constexpr +#else +#define FASTFLOAT_CONSTEXPR14 +#endif + +#if defined(__cpp_lib_bit_cast) && __cpp_lib_bit_cast >= 201806L +#define FASTFLOAT_HAS_BIT_CAST 1 +#else +#define FASTFLOAT_HAS_BIT_CAST 0 +#endif + +#if defined(__cpp_lib_is_constant_evaluated) && \ + __cpp_lib_is_constant_evaluated >= 201811L +#define FASTFLOAT_HAS_IS_CONSTANT_EVALUATED 1 +#else +#define FASTFLOAT_HAS_IS_CONSTANT_EVALUATED 0 +#endif + +#if defined(__cpp_if_constexpr) && __cpp_if_constexpr >= 201606L +#define FASTFLOAT_IF_CONSTEXPR17(x) if constexpr (x) +#else +#define FASTFLOAT_IF_CONSTEXPR17(x) if (x) +#endif + +// Testing for relevant C++20 constexpr library features +#if FASTFLOAT_HAS_IS_CONSTANT_EVALUATED && FASTFLOAT_HAS_BIT_CAST && \ + defined(__cpp_lib_constexpr_algorithms) && \ + __cpp_lib_constexpr_algorithms >= 201806L /*For std::copy and std::fill*/ +#define FASTFLOAT_CONSTEXPR20 constexpr +#define FASTFLOAT_IS_CONSTEXPR 1 +#else +#define FASTFLOAT_CONSTEXPR20 +#define FASTFLOAT_IS_CONSTEXPR 0 +#endif + +#if __cplusplus >= 201703L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) +#define FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE 0 +#else +#define FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE 1 +#endif + +#endif // FASTFLOAT_CONSTEXPR_FEATURE_DETECT_H + +#ifndef FASTFLOAT_FLOAT_COMMON_H +#define FASTFLOAT_FLOAT_COMMON_H + +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef __has_include +#if __has_include() && (__cplusplus > 202002L || (defined(_MSVC_LANG) && (_MSVC_LANG > 202002L))) +#include +#endif +#endif + +#define FASTFLOAT_VERSION_MAJOR 8 +#define FASTFLOAT_VERSION_MINOR 2 +#define FASTFLOAT_VERSION_PATCH 3 + +#define FASTFLOAT_STRINGIZE_IMPL(x) #x +#define FASTFLOAT_STRINGIZE(x) FASTFLOAT_STRINGIZE_IMPL(x) + +#define FASTFLOAT_VERSION_STR \ + FASTFLOAT_STRINGIZE(FASTFLOAT_VERSION_MAJOR) \ + "." FASTFLOAT_STRINGIZE(FASTFLOAT_VERSION_MINOR) "." FASTFLOAT_STRINGIZE( \ + FASTFLOAT_VERSION_PATCH) + +#define FASTFLOAT_VERSION \ + (FASTFLOAT_VERSION_MAJOR * 10000 + FASTFLOAT_VERSION_MINOR * 100 + \ + FASTFLOAT_VERSION_PATCH) + +namespace fast_float { + +enum class chars_format : uint64_t; + +namespace detail { +constexpr chars_format basic_json_fmt = chars_format(1 << 5); +constexpr chars_format basic_fortran_fmt = chars_format(1 << 6); +} // namespace detail + +enum class chars_format : uint64_t { + scientific = 1 << 0, + fixed = 1 << 2, + hex = 1 << 3, + no_infnan = 1 << 4, + // RFC 8259: https://datatracker.ietf.org/doc/html/rfc8259#section-6 + json = uint64_t(detail::basic_json_fmt) | fixed | scientific | no_infnan, + // Extension of RFC 8259 where, e.g., "inf" and "nan" are allowed. + json_or_infnan = uint64_t(detail::basic_json_fmt) | fixed | scientific, + fortran = uint64_t(detail::basic_fortran_fmt) | fixed | scientific, + general = fixed | scientific, + allow_leading_plus = 1 << 7, + skip_white_space = 1 << 8, +}; + +template struct from_chars_result_t { + UC const *ptr; + std::errc ec; + + // https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2023/p2497r0.html + constexpr explicit operator bool() const noexcept { + return ec == std::errc(); + } +}; + +using from_chars_result = from_chars_result_t; + +template struct parse_options_t { + constexpr explicit parse_options_t(chars_format fmt = chars_format::general, + UC dot = UC('.'), int b = 10) + : format(fmt), decimal_point(dot), base(b) {} + + /** Which number formats are accepted */ + chars_format format; + /** The character used as decimal point */ + UC decimal_point; + /** The base used for integers */ + int base; +}; + +using parse_options = parse_options_t; + +} // namespace fast_float + +#if FASTFLOAT_HAS_BIT_CAST +#include +#endif + +#if (defined(__x86_64) || defined(__x86_64__) || defined(_M_X64) || \ + defined(__amd64) || defined(__aarch64__) || defined(_M_ARM64) || \ + defined(__MINGW64__) || defined(__s390x__) || \ + (defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) || \ + defined(__PPC64LE__)) || \ + defined(__loongarch64) || (defined(__riscv) && __riscv_xlen == 64)) +#define FASTFLOAT_64BIT 1 +#elif (defined(__i386) || defined(__i386__) || defined(_M_IX86) || \ + defined(__arm__) || defined(_M_ARM) || defined(__ppc__) || \ + defined(__MINGW32__) || defined(__EMSCRIPTEN__) || \ + (defined(__riscv) && __riscv_xlen == 32)) +#define FASTFLOAT_32BIT 1 +#else + // Need to check incrementally, since SIZE_MAX is a size_t, avoid overflow. +// We can never tell the register width, but the SIZE_MAX is a good +// approximation. UINTPTR_MAX and INTPTR_MAX are optional, so avoid them for max +// portability. +#if SIZE_MAX == 0xffff +#error Unknown platform (16-bit, unsupported) +#elif SIZE_MAX == 0xffffffff +#define FASTFLOAT_32BIT 1 +#elif SIZE_MAX == 0xffffffffffffffff +#define FASTFLOAT_64BIT 1 +#else +#error Unknown platform (not 32-bit, not 64-bit?) +#endif +#endif + +#if ((defined(_WIN32) || defined(_WIN64)) && !defined(__clang__)) || \ + (defined(_M_ARM64) && !defined(__MINGW32__)) +#include +#endif + +#if defined(_MSC_VER) && !defined(__clang__) +#define FASTFLOAT_VISUAL_STUDIO 1 +#endif + +#if defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__ +#define FASTFLOAT_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +#elif defined _WIN32 +#define FASTFLOAT_IS_BIG_ENDIAN 0 +#else +#if defined(__APPLE__) || defined(__FreeBSD__) +#include +#elif defined(sun) || defined(__sun) +#include +#elif defined(__MVS__) +#include +#else +#ifdef __has_include +#if __has_include() +#include +#endif //__has_include() +#endif //__has_include +#endif +# +#ifndef __BYTE_ORDER__ +// safe choice +#define FASTFLOAT_IS_BIG_ENDIAN 0 +#endif +# +#ifndef __ORDER_LITTLE_ENDIAN__ +// safe choice +#define FASTFLOAT_IS_BIG_ENDIAN 0 +#endif +# +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define FASTFLOAT_IS_BIG_ENDIAN 0 +#else +#define FASTFLOAT_IS_BIG_ENDIAN 1 +#endif +#endif + +#if defined(__SSE2__) || (defined(FASTFLOAT_VISUAL_STUDIO) && \ + (defined(_M_AMD64) || defined(_M_X64) || \ + (defined(_M_IX86_FP) && _M_IX86_FP == 2))) +#define FASTFLOAT_SSE2 1 +#endif + +#if defined(__aarch64__) || defined(_M_ARM64) +#define FASTFLOAT_NEON 1 +#endif + +#if defined(FASTFLOAT_SSE2) || defined(FASTFLOAT_NEON) +#define FASTFLOAT_HAS_SIMD 1 +#endif + +#if defined(__GNUC__) +// disable -Wcast-align=strict (GCC only) +#define FASTFLOAT_SIMD_DISABLE_WARNINGS \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wcast-align\"") +#else +#define FASTFLOAT_SIMD_DISABLE_WARNINGS +#endif + +#if defined(__GNUC__) +#define FASTFLOAT_SIMD_RESTORE_WARNINGS _Pragma("GCC diagnostic pop") +#else +#define FASTFLOAT_SIMD_RESTORE_WARNINGS +#endif + +#ifdef FASTFLOAT_VISUAL_STUDIO +#define fastfloat_really_inline __forceinline +#else +#define fastfloat_really_inline inline __attribute__((always_inline)) +#endif + +#ifndef FASTFLOAT_ASSERT +#define FASTFLOAT_ASSERT(x) \ + { ((void)(x)); } +#endif + +#ifndef FASTFLOAT_DEBUG_ASSERT +#define FASTFLOAT_DEBUG_ASSERT(x) \ + { ((void)(x)); } +#endif + +// rust style `try!()` macro, or `?` operator +#define FASTFLOAT_TRY(x) \ + { \ + if (!(x)) \ + return false; \ + } + +#define FASTFLOAT_ENABLE_IF(...) \ + typename std::enable_if<(__VA_ARGS__), int>::type + +namespace fast_float { + +fastfloat_really_inline constexpr bool cpp20_and_in_constexpr() { +#if FASTFLOAT_HAS_IS_CONSTANT_EVALUATED + return std::is_constant_evaluated(); +#else + return false; +#endif +} + +template +struct is_supported_float_type + : std::integral_constant< + bool, std::is_same::value || std::is_same::value +#ifdef __STDCPP_FLOAT64_T__ + || std::is_same::value +#endif +#ifdef __STDCPP_FLOAT32_T__ + || std::is_same::value +#endif +#ifdef __STDCPP_FLOAT16_T__ + || std::is_same::value +#endif +#ifdef __STDCPP_BFLOAT16_T__ + || std::is_same::value +#endif + > { +}; + +template +using equiv_uint_t = typename std::conditional< + sizeof(T) == 1, uint8_t, + typename std::conditional< + sizeof(T) == 2, uint16_t, + typename std::conditional::type>::type>::type; + +template struct is_supported_integer_type : std::is_integral {}; + +template +struct is_supported_char_type + : std::integral_constant::value || + std::is_same::value || + std::is_same::value || + std::is_same::value +#ifdef __cpp_char8_t + || std::is_same::value +#endif + > { +}; + +template +inline FASTFLOAT_CONSTEXPR14 bool +fastfloat_strncasecmp3(UC const *actual_mixedcase, + UC const *expected_lowercase) { + uint64_t mask{0}; + FASTFLOAT_IF_CONSTEXPR17(sizeof(UC) == 1) { mask = 0x2020202020202020; } + else FASTFLOAT_IF_CONSTEXPR17(sizeof(UC) == 2) { + mask = 0x0020002000200020; + } + else FASTFLOAT_IF_CONSTEXPR17(sizeof(UC) == 4) { + mask = 0x0000002000000020; + } + else { + return false; + } + + uint64_t val1{0}, val2{0}; + if (cpp20_and_in_constexpr()) { + for (size_t i = 0; i < 3; i++) { + if ((actual_mixedcase[i] | 32) != expected_lowercase[i]) { + return false; + } + } + return true; + } else { + FASTFLOAT_IF_CONSTEXPR17(sizeof(UC) == 1 || sizeof(UC) == 2) { + ::memcpy(&val1, actual_mixedcase, 3 * sizeof(UC)); + ::memcpy(&val2, expected_lowercase, 3 * sizeof(UC)); + val1 |= mask; + val2 |= mask; + return val1 == val2; + } + else FASTFLOAT_IF_CONSTEXPR17(sizeof(UC) == 4) { + ::memcpy(&val1, actual_mixedcase, 2 * sizeof(UC)); + ::memcpy(&val2, expected_lowercase, 2 * sizeof(UC)); + val1 |= mask; + if (val1 != val2) { + return false; + } + return (actual_mixedcase[2] | 32) == (expected_lowercase[2]); + } + else { + return false; + } + } + + return true; +} + +template +inline FASTFLOAT_CONSTEXPR14 bool +fastfloat_strncasecmp5(UC const *actual_mixedcase, + UC const *expected_lowercase) { + uint64_t mask{0}; + uint64_t val1{0}, val2{0}; + if (cpp20_and_in_constexpr()) { + for (size_t i = 0; i < 5; i++) { + if ((actual_mixedcase[i] | 32) != expected_lowercase[i]) { + return false; + } + } + return true; + } else { + FASTFLOAT_IF_CONSTEXPR17(sizeof(UC) == 1) { + mask = 0x2020202020202020; + ::memcpy(&val1, actual_mixedcase, 5 * sizeof(UC)); + ::memcpy(&val2, expected_lowercase, 5 * sizeof(UC)); + val1 |= mask; + val2 |= mask; + return val1 == val2; + } + else FASTFLOAT_IF_CONSTEXPR17(sizeof(UC) == 2) { + mask = 0x0020002000200020; + ::memcpy(&val1, actual_mixedcase, 4 * sizeof(UC)); + ::memcpy(&val2, expected_lowercase, 4 * sizeof(UC)); + val1 |= mask; + if (val1 != val2) { + return false; + } + return (actual_mixedcase[4] | 32) == (expected_lowercase[4]); + } + else FASTFLOAT_IF_CONSTEXPR17(sizeof(UC) == 4) { + mask = 0x0000002000000020; + ::memcpy(&val1, actual_mixedcase, 2 * sizeof(UC)); + ::memcpy(&val2, expected_lowercase, 2 * sizeof(UC)); + val1 |= mask; + if (val1 != val2) { + return false; + } + ::memcpy(&val1, actual_mixedcase + 2, 2 * sizeof(UC)); + ::memcpy(&val2, expected_lowercase + 2, 2 * sizeof(UC)); + val1 |= mask; + if (val1 != val2) { + return false; + } + return (actual_mixedcase[4] | 32) == (expected_lowercase[4]); + } + else { + return false; + } + } + + return true; +} + +// Compares two ASCII strings in a case insensitive manner. +template +inline FASTFLOAT_CONSTEXPR14 bool +fastfloat_strncasecmp(UC const *actual_mixedcase, UC const *expected_lowercase, + size_t length) { + uint64_t mask{0}; + FASTFLOAT_IF_CONSTEXPR17(sizeof(UC) == 1) { mask = 0x2020202020202020; } + else FASTFLOAT_IF_CONSTEXPR17(sizeof(UC) == 2) { + mask = 0x0020002000200020; + } + else FASTFLOAT_IF_CONSTEXPR17(sizeof(UC) == 4) { + mask = 0x0000002000000020; + } + else { + return false; + } + + if (cpp20_and_in_constexpr()) { + for (size_t i = 0; i < length; i++) { + if ((actual_mixedcase[i] | 32) != expected_lowercase[i]) { + return false; + } + } + return true; + } else { + uint64_t val1{0}, val2{0}; + size_t sz{8 / (sizeof(UC))}; + for (size_t i = 0; i < length; i += sz) { + val1 = val2 = 0; + sz = std::min(sz, length - i); + ::memcpy(&val1, actual_mixedcase + i, sz * sizeof(UC)); + ::memcpy(&val2, expected_lowercase + i, sz * sizeof(UC)); + val1 |= mask; + val2 |= mask; + if (val1 != val2) { + return false; + } + } + return true; + } +} + +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif + +// a pointer and a length to a contiguous block of memory +template struct span { + T const *ptr; + size_t length; + + constexpr span(T const *_ptr, size_t _length) : ptr(_ptr), length(_length) {} + + constexpr span() : ptr(nullptr), length(0) {} + + constexpr size_t len() const noexcept { return length; } + + FASTFLOAT_CONSTEXPR14 const T &operator[](size_t index) const noexcept { + FASTFLOAT_DEBUG_ASSERT(index < length); + return ptr[index]; + } +}; + +struct value128 { + uint64_t low; + uint64_t high; + + constexpr value128(uint64_t _low, uint64_t _high) : low(_low), high(_high) {} + + constexpr value128() : low(0), high(0) {} +}; + +/* Helper C++14 constexpr generic implementation of leading_zeroes */ +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 int +leading_zeroes_generic(uint64_t input_num, int last_bit = 0) { + if (input_num & uint64_t(0xffffffff00000000)) { + input_num >>= 32; + last_bit |= 32; + } + if (input_num & uint64_t(0xffff0000)) { + input_num >>= 16; + last_bit |= 16; + } + if (input_num & uint64_t(0xff00)) { + input_num >>= 8; + last_bit |= 8; + } + if (input_num & uint64_t(0xf0)) { + input_num >>= 4; + last_bit |= 4; + } + if (input_num & uint64_t(0xc)) { + input_num >>= 2; + last_bit |= 2; + } + if (input_num & uint64_t(0x2)) { /* input_num >>= 1; */ + last_bit |= 1; + } + return 63 - last_bit; +} + +/* result might be undefined when input_num is zero */ +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 int +leading_zeroes(uint64_t input_num) { + assert(input_num > 0); + if (cpp20_and_in_constexpr()) { + return leading_zeroes_generic(input_num); + } +#ifdef FASTFLOAT_VISUAL_STUDIO +#if defined(_M_X64) || defined(_M_ARM64) + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + _BitScanReverse64(&leading_zero, input_num); + return (int)(63 - leading_zero); +#else + return leading_zeroes_generic(input_num); +#endif +#else + return __builtin_clzll(input_num); +#endif +} + +/* Helper C++14 constexpr generic implementation of countr_zero for 32-bit */ +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 int +countr_zero_generic_32(uint32_t input_num) { + if (input_num == 0) { + return 32; + } + int last_bit = 0; + if (!(input_num & 0x0000FFFF)) { + input_num >>= 16; + last_bit |= 16; + } + if (!(input_num & 0x00FF)) { + input_num >>= 8; + last_bit |= 8; + } + if (!(input_num & 0x0F)) { + input_num >>= 4; + last_bit |= 4; + } + if (!(input_num & 0x3)) { + input_num >>= 2; + last_bit |= 2; + } + if (!(input_num & 0x1)) { + last_bit |= 1; + } + return last_bit; +} + +/* count trailing zeroes for 32-bit integers */ +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 int +countr_zero_32(uint32_t input_num) { + if (cpp20_and_in_constexpr()) { + return countr_zero_generic_32(input_num); + } +#ifdef FASTFLOAT_VISUAL_STUDIO + unsigned long trailing_zero = 0; + if (_BitScanForward(&trailing_zero, input_num)) { + return (int)trailing_zero; + } + return 32; +#else + return input_num == 0 ? 32 : __builtin_ctz(input_num); +#endif +} + +// slow emulation routine for 32-bit +fastfloat_really_inline constexpr uint64_t emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} + +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint64_t +umul128_generic(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = (uint64_t)(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + (uint64_t)(lo < bd); + return lo; +} + +#ifdef FASTFLOAT_32BIT + +// slow emulation routine for 32-bit +#if !defined(__MINGW64__) +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint64_t _umul128(uint64_t ab, + uint64_t cd, + uint64_t *hi) { + return umul128_generic(ab, cd, hi); +} +#endif // !__MINGW64__ + +#endif // FASTFLOAT_32BIT + +// compute 64-bit a*b +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 value128 +full_multiplication(uint64_t a, uint64_t b) { + if (cpp20_and_in_constexpr()) { + value128 answer; + answer.low = umul128_generic(a, b, &answer.high); + return answer; + } + value128 answer; +#if defined(_M_ARM64) && !defined(__MINGW32__) + // ARM64 has native support for 64-bit multiplications, no need to emulate + // But MinGW on ARM64 doesn't have native support for 64-bit multiplications + answer.high = __umulh(a, b); + answer.low = a * b; +#elif defined(FASTFLOAT_32BIT) || (defined(_WIN64) && !defined(__clang__) && \ + !defined(_M_ARM64) && !defined(__GNUC__)) + answer.low = _umul128(a, b, &answer.high); // _umul128 not available on ARM64 +#elif defined(FASTFLOAT_64BIT) && defined(__SIZEOF_INT128__) + __uint128_t r = ((__uint128_t)a) * b; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#else + answer.low = umul128_generic(a, b, &answer.high); +#endif + return answer; +} + +struct adjusted_mantissa { + uint64_t mantissa{0}; + int32_t power2{0}; // a negative value indicates an invalid result + adjusted_mantissa() = default; + + constexpr bool operator==(adjusted_mantissa const &o) const { + return mantissa == o.mantissa && power2 == o.power2; + } + + constexpr bool operator!=(adjusted_mantissa const &o) const { + return mantissa != o.mantissa || power2 != o.power2; + } +}; + +// Bias so we can get the real exponent with an invalid adjusted_mantissa. +constexpr static int32_t invalid_am_bias = -0x8000; + +// used for binary_format_lookup_tables::max_mantissa +constexpr uint64_t constant_55555 = 5 * 5 * 5 * 5 * 5; + +template struct binary_format_lookup_tables; + +template struct binary_format : binary_format_lookup_tables { + using equiv_uint = equiv_uint_t; + + static constexpr int mantissa_explicit_bits(); + static constexpr int minimum_exponent(); + static constexpr int infinite_power(); + static constexpr int sign_index(); + static constexpr int + min_exponent_fast_path(); // used when fegetround() == FE_TONEAREST + static constexpr int max_exponent_fast_path(); + static constexpr int max_exponent_round_to_even(); + static constexpr int min_exponent_round_to_even(); + static constexpr uint64_t max_mantissa_fast_path(int64_t power); + static constexpr uint64_t + max_mantissa_fast_path(); // used when fegetround() == FE_TONEAREST + static constexpr int largest_power_of_ten(); + static constexpr int smallest_power_of_ten(); + static constexpr T exact_power_of_ten(int64_t power); + static constexpr size_t max_digits(); + static constexpr equiv_uint exponent_mask(); + static constexpr equiv_uint mantissa_mask(); + static constexpr equiv_uint hidden_bit_mask(); +}; + +template struct binary_format_lookup_tables { + static constexpr double powers_of_ten[] = { + 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, + 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22}; + + // Largest integer value v so that (5**index * v) <= 1<<53. + // 0x20000000000000 == 1 << 53 + static constexpr uint64_t max_mantissa[] = { + 0x20000000000000, + 0x20000000000000 / 5, + 0x20000000000000 / (5 * 5), + 0x20000000000000 / (5 * 5 * 5), + 0x20000000000000 / (5 * 5 * 5 * 5), + 0x20000000000000 / (constant_55555), + 0x20000000000000 / (constant_55555 * 5), + 0x20000000000000 / (constant_55555 * 5 * 5), + 0x20000000000000 / (constant_55555 * 5 * 5 * 5), + 0x20000000000000 / (constant_55555 * 5 * 5 * 5 * 5), + 0x20000000000000 / (constant_55555 * constant_55555), + 0x20000000000000 / (constant_55555 * constant_55555 * 5), + 0x20000000000000 / (constant_55555 * constant_55555 * 5 * 5), + 0x20000000000000 / (constant_55555 * constant_55555 * 5 * 5 * 5), + 0x20000000000000 / (constant_55555 * constant_55555 * constant_55555), + 0x20000000000000 / (constant_55555 * constant_55555 * constant_55555 * 5), + 0x20000000000000 / + (constant_55555 * constant_55555 * constant_55555 * 5 * 5), + 0x20000000000000 / + (constant_55555 * constant_55555 * constant_55555 * 5 * 5 * 5), + 0x20000000000000 / + (constant_55555 * constant_55555 * constant_55555 * 5 * 5 * 5 * 5), + 0x20000000000000 / + (constant_55555 * constant_55555 * constant_55555 * constant_55555), + 0x20000000000000 / (constant_55555 * constant_55555 * constant_55555 * + constant_55555 * 5), + 0x20000000000000 / (constant_55555 * constant_55555 * constant_55555 * + constant_55555 * 5 * 5), + 0x20000000000000 / (constant_55555 * constant_55555 * constant_55555 * + constant_55555 * 5 * 5 * 5), + 0x20000000000000 / (constant_55555 * constant_55555 * constant_55555 * + constant_55555 * 5 * 5 * 5 * 5)}; +}; + +#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE + +template +constexpr double binary_format_lookup_tables::powers_of_ten[]; + +template +constexpr uint64_t binary_format_lookup_tables::max_mantissa[]; + +#endif + +template struct binary_format_lookup_tables { + static constexpr float powers_of_ten[] = {1e0f, 1e1f, 1e2f, 1e3f, 1e4f, 1e5f, + 1e6f, 1e7f, 1e8f, 1e9f, 1e10f}; + + // Largest integer value v so that (5**index * v) <= 1<<24. + // 0x1000000 == 1<<24 + static constexpr uint64_t max_mantissa[] = { + 0x1000000, + 0x1000000 / 5, + 0x1000000 / (5 * 5), + 0x1000000 / (5 * 5 * 5), + 0x1000000 / (5 * 5 * 5 * 5), + 0x1000000 / (constant_55555), + 0x1000000 / (constant_55555 * 5), + 0x1000000 / (constant_55555 * 5 * 5), + 0x1000000 / (constant_55555 * 5 * 5 * 5), + 0x1000000 / (constant_55555 * 5 * 5 * 5 * 5), + 0x1000000 / (constant_55555 * constant_55555), + 0x1000000 / (constant_55555 * constant_55555 * 5)}; +}; + +#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE + +template +constexpr float binary_format_lookup_tables::powers_of_ten[]; + +template +constexpr uint64_t binary_format_lookup_tables::max_mantissa[]; + +#endif + +template <> +inline constexpr int binary_format::min_exponent_fast_path() { +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + return 0; +#else + return -22; +#endif +} + +template <> +inline constexpr int binary_format::min_exponent_fast_path() { +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + return 0; +#else + return -10; +#endif +} + +template <> +inline constexpr int binary_format::mantissa_explicit_bits() { + return 52; +} + +template <> +inline constexpr int binary_format::mantissa_explicit_bits() { + return 23; +} + +template <> +inline constexpr int binary_format::max_exponent_round_to_even() { + return 23; +} + +template <> +inline constexpr int binary_format::max_exponent_round_to_even() { + return 10; +} + +template <> +inline constexpr int binary_format::min_exponent_round_to_even() { + return -4; +} + +template <> +inline constexpr int binary_format::min_exponent_round_to_even() { + return -17; +} + +template <> inline constexpr int binary_format::minimum_exponent() { + return -1023; +} + +template <> inline constexpr int binary_format::minimum_exponent() { + return -127; +} + +template <> inline constexpr int binary_format::infinite_power() { + return 0x7FF; +} + +template <> inline constexpr int binary_format::infinite_power() { + return 0xFF; +} + +template <> inline constexpr int binary_format::sign_index() { + return 63; +} + +template <> inline constexpr int binary_format::sign_index() { + return 31; +} + +template <> +inline constexpr int binary_format::max_exponent_fast_path() { + return 22; +} + +template <> +inline constexpr int binary_format::max_exponent_fast_path() { + return 10; +} + +template <> +inline constexpr uint64_t binary_format::max_mantissa_fast_path() { + return uint64_t(2) << mantissa_explicit_bits(); +} + +template <> +inline constexpr uint64_t binary_format::max_mantissa_fast_path() { + return uint64_t(2) << mantissa_explicit_bits(); +} + +// credit: Jakub Jelínek +#ifdef __STDCPP_FLOAT16_T__ +template struct binary_format_lookup_tables { + static constexpr std::float16_t powers_of_ten[] = {1e0f16, 1e1f16, 1e2f16, + 1e3f16, 1e4f16}; + + // Largest integer value v so that (5**index * v) <= 1<<11. + // 0x800 == 1<<11 + static constexpr uint64_t max_mantissa[] = {0x800, + 0x800 / 5, + 0x800 / (5 * 5), + 0x800 / (5 * 5 * 5), + 0x800 / (5 * 5 * 5 * 5), + 0x800 / (constant_55555)}; +}; + +#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE + +template +constexpr std::float16_t + binary_format_lookup_tables::powers_of_ten[]; + +template +constexpr uint64_t + binary_format_lookup_tables::max_mantissa[]; + +#endif + +template <> +inline constexpr std::float16_t +binary_format::exact_power_of_ten(int64_t power) { + // Work around clang bug https://godbolt.org/z/zedh7rrhc + return (void)powers_of_ten[0], powers_of_ten[power]; +} + +template <> +inline constexpr binary_format::equiv_uint +binary_format::exponent_mask() { + return 0x7C00; +} + +template <> +inline constexpr binary_format::equiv_uint +binary_format::mantissa_mask() { + return 0x03FF; +} + +template <> +inline constexpr binary_format::equiv_uint +binary_format::hidden_bit_mask() { + return 0x0400; +} + +template <> +inline constexpr int binary_format::max_exponent_fast_path() { + return 4; +} + +template <> +inline constexpr int binary_format::mantissa_explicit_bits() { + return 10; +} + +template <> +inline constexpr uint64_t +binary_format::max_mantissa_fast_path() { + return uint64_t(2) << mantissa_explicit_bits(); +} + +template <> +inline constexpr uint64_t +binary_format::max_mantissa_fast_path(int64_t power) { + // caller is responsible to ensure that + // power >= 0 && power <= 4 + // + // Work around clang bug https://godbolt.org/z/zedh7rrhc + return (void)max_mantissa[0], max_mantissa[power]; +} + +template <> +inline constexpr int binary_format::min_exponent_fast_path() { + return 0; +} + +template <> +inline constexpr int +binary_format::max_exponent_round_to_even() { + return 5; +} + +template <> +inline constexpr int +binary_format::min_exponent_round_to_even() { + return -22; +} + +template <> +inline constexpr int binary_format::minimum_exponent() { + return -15; +} + +template <> +inline constexpr int binary_format::infinite_power() { + return 0x1F; +} + +template <> inline constexpr int binary_format::sign_index() { + return 15; +} + +template <> +inline constexpr int binary_format::largest_power_of_ten() { + return 4; +} + +template <> +inline constexpr int binary_format::smallest_power_of_ten() { + return -27; +} + +template <> +inline constexpr size_t binary_format::max_digits() { + return 22; +} +#endif // __STDCPP_FLOAT16_T__ + +// credit: Jakub Jelínek +#ifdef __STDCPP_BFLOAT16_T__ +template struct binary_format_lookup_tables { + static constexpr std::bfloat16_t powers_of_ten[] = {1e0bf16, 1e1bf16, 1e2bf16, + 1e3bf16}; + + // Largest integer value v so that (5**index * v) <= 1<<8. + // 0x100 == 1<<8 + static constexpr uint64_t max_mantissa[] = {0x100, 0x100 / 5, 0x100 / (5 * 5), + 0x100 / (5 * 5 * 5), + 0x100 / (5 * 5 * 5 * 5)}; +}; + +#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE + +template +constexpr std::bfloat16_t + binary_format_lookup_tables::powers_of_ten[]; + +template +constexpr uint64_t + binary_format_lookup_tables::max_mantissa[]; + +#endif + +template <> +inline constexpr std::bfloat16_t +binary_format::exact_power_of_ten(int64_t power) { + // Work around clang bug https://godbolt.org/z/zedh7rrhc + return (void)powers_of_ten[0], powers_of_ten[power]; +} + +template <> +inline constexpr int binary_format::max_exponent_fast_path() { + return 3; +} + +template <> +inline constexpr binary_format::equiv_uint +binary_format::exponent_mask() { + return 0x7F80; +} + +template <> +inline constexpr binary_format::equiv_uint +binary_format::mantissa_mask() { + return 0x007F; +} + +template <> +inline constexpr binary_format::equiv_uint +binary_format::hidden_bit_mask() { + return 0x0080; +} + +template <> +inline constexpr int binary_format::mantissa_explicit_bits() { + return 7; +} + +template <> +inline constexpr uint64_t +binary_format::max_mantissa_fast_path() { + return uint64_t(2) << mantissa_explicit_bits(); +} + +template <> +inline constexpr uint64_t +binary_format::max_mantissa_fast_path(int64_t power) { + // caller is responsible to ensure that + // power >= 0 && power <= 3 + // + // Work around clang bug https://godbolt.org/z/zedh7rrhc + return (void)max_mantissa[0], max_mantissa[power]; +} + +template <> +inline constexpr int binary_format::min_exponent_fast_path() { + return 0; +} + +template <> +inline constexpr int +binary_format::max_exponent_round_to_even() { + return 3; +} + +template <> +inline constexpr int +binary_format::min_exponent_round_to_even() { + return -24; +} + +template <> +inline constexpr int binary_format::minimum_exponent() { + return -127; +} + +template <> +inline constexpr int binary_format::infinite_power() { + return 0xFF; +} + +template <> inline constexpr int binary_format::sign_index() { + return 15; +} + +template <> +inline constexpr int binary_format::largest_power_of_ten() { + return 38; +} + +template <> +inline constexpr int binary_format::smallest_power_of_ten() { + return -60; +} + +template <> +inline constexpr size_t binary_format::max_digits() { + return 98; +} +#endif // __STDCPP_BFLOAT16_T__ + +template <> +inline constexpr uint64_t +binary_format::max_mantissa_fast_path(int64_t power) { + // caller is responsible to ensure that + // power >= 0 && power <= 22 + // + // Work around clang bug https://godbolt.org/z/zedh7rrhc + return (void)max_mantissa[0], max_mantissa[power]; +} + +template <> +inline constexpr uint64_t +binary_format::max_mantissa_fast_path(int64_t power) { + // caller is responsible to ensure that + // power >= 0 && power <= 10 + // + // Work around clang bug https://godbolt.org/z/zedh7rrhc + return (void)max_mantissa[0], max_mantissa[power]; +} + +template <> +inline constexpr double +binary_format::exact_power_of_ten(int64_t power) { + // Work around clang bug https://godbolt.org/z/zedh7rrhc + return (void)powers_of_ten[0], powers_of_ten[power]; +} + +template <> +inline constexpr float binary_format::exact_power_of_ten(int64_t power) { + // Work around clang bug https://godbolt.org/z/zedh7rrhc + return (void)powers_of_ten[0], powers_of_ten[power]; +} + +template <> inline constexpr int binary_format::largest_power_of_ten() { + return 308; +} + +template <> inline constexpr int binary_format::largest_power_of_ten() { + return 38; +} + +template <> +inline constexpr int binary_format::smallest_power_of_ten() { + return -342; +} + +template <> inline constexpr int binary_format::smallest_power_of_ten() { + return -64; +} + +template <> inline constexpr size_t binary_format::max_digits() { + return 769; +} + +template <> inline constexpr size_t binary_format::max_digits() { + return 114; +} + +template <> +inline constexpr binary_format::equiv_uint +binary_format::exponent_mask() { + return 0x7F800000; +} + +template <> +inline constexpr binary_format::equiv_uint +binary_format::exponent_mask() { + return 0x7FF0000000000000; +} + +template <> +inline constexpr binary_format::equiv_uint +binary_format::mantissa_mask() { + return 0x007FFFFF; +} + +template <> +inline constexpr binary_format::equiv_uint +binary_format::mantissa_mask() { + return 0x000FFFFFFFFFFFFF; +} + +template <> +inline constexpr binary_format::equiv_uint +binary_format::hidden_bit_mask() { + return 0x00800000; +} + +template <> +inline constexpr binary_format::equiv_uint +binary_format::hidden_bit_mask() { + return 0x0010000000000000; +} + +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void +to_float(bool negative, adjusted_mantissa am, T &value) { + using equiv_uint = equiv_uint_t; + equiv_uint word = equiv_uint(am.mantissa); + word = equiv_uint(word | equiv_uint(am.power2) + << binary_format::mantissa_explicit_bits()); + word = + equiv_uint(word | equiv_uint(negative) << binary_format::sign_index()); +#if FASTFLOAT_HAS_BIT_CAST + value = std::bit_cast(word); +#else + ::memcpy(&value, &word, sizeof(T)); +#endif +} + +template struct space_lut { + static constexpr bool value[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; +}; + +#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE + +template constexpr bool space_lut::value[]; + +#endif + +template constexpr bool is_space(UC c) { + return c < 256 && space_lut<>::value[uint8_t(c)]; +} + +template static constexpr uint64_t int_cmp_zeros() { + static_assert((sizeof(UC) == 1) || (sizeof(UC) == 2) || (sizeof(UC) == 4), + "Unsupported character size"); + return (sizeof(UC) == 1) ? 0x3030303030303030 + : (sizeof(UC) == 2) + ? (uint64_t(UC('0')) << 48 | uint64_t(UC('0')) << 32 | + uint64_t(UC('0')) << 16 | UC('0')) + : (uint64_t(UC('0')) << 32 | UC('0')); +} + +template static constexpr int int_cmp_len() { + return sizeof(uint64_t) / sizeof(UC); +} + +template constexpr UC const *str_const_nan(); + +template <> constexpr char const *str_const_nan() { return "nan"; } + +template <> constexpr wchar_t const *str_const_nan() { return L"nan"; } + +template <> constexpr char16_t const *str_const_nan() { + return u"nan"; +} + +template <> constexpr char32_t const *str_const_nan() { + return U"nan"; +} + +#ifdef __cpp_char8_t +template <> constexpr char8_t const *str_const_nan() { + return u8"nan"; +} +#endif + +template constexpr UC const *str_const_inf(); + +template <> constexpr char const *str_const_inf() { return "infinity"; } + +template <> constexpr wchar_t const *str_const_inf() { + return L"infinity"; +} + +template <> constexpr char16_t const *str_const_inf() { + return u"infinity"; +} + +template <> constexpr char32_t const *str_const_inf() { + return U"infinity"; +} + +#ifdef __cpp_char8_t +template <> constexpr char8_t const *str_const_inf() { + return u8"infinity"; +} +#endif + +template struct int_luts { + static constexpr uint8_t chdigit[] = { + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 255, 255, + 255, 255, 255, 255, 255, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, + 35, 255, 255, 255, 255, 255, 255, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + 33, 34, 35, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255}; + + static constexpr size_t maxdigits_u64[] = { + 64, 41, 32, 28, 25, 23, 22, 21, 20, 19, 18, 18, 17, 17, 16, 16, 16, 16, + 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13}; + + static constexpr uint64_t min_safe_u64[] = { + 9223372036854775808ull, 12157665459056928801ull, 4611686018427387904, + 7450580596923828125, 4738381338321616896, 3909821048582988049, + 9223372036854775808ull, 12157665459056928801ull, 10000000000000000000ull, + 5559917313492231481, 2218611106740436992, 8650415919381337933, + 2177953337809371136, 6568408355712890625, 1152921504606846976, + 2862423051509815793, 6746640616477458432, 15181127029874798299ull, + 1638400000000000000, 3243919932521508681, 6221821273427820544, + 11592836324538749809ull, 876488338465357824, 1490116119384765625, + 2481152873203736576, 4052555153018976267, 6502111422497947648, + 10260628712958602189ull, 15943230000000000000ull, 787662783788549761, + 1152921504606846976, 1667889514952984961, 2386420683693101056, + 3379220508056640625, 4738381338321616896}; +}; + +#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE + +template constexpr uint8_t int_luts::chdigit[]; + +template constexpr size_t int_luts::maxdigits_u64[]; + +template constexpr uint64_t int_luts::min_safe_u64[]; + +#endif + +template +fastfloat_really_inline constexpr uint8_t ch_to_digit(UC c) { + // wchar_t and char can be signed, so we need to be careful. + using UnsignedUC = typename std::make_unsigned::type; + return int_luts<>::chdigit[static_cast( + static_cast(c) & + static_cast( + -((static_cast(c) & ~0xFFull) == 0)))]; +} + +fastfloat_really_inline constexpr size_t max_digits_u64(int base) { + return int_luts<>::maxdigits_u64[base - 2]; +} + +// If a u64 is exactly max_digits_u64() in length, this is +// the value below which it has definitely overflowed. +fastfloat_really_inline constexpr uint64_t min_safe_u64(int base) { + return int_luts<>::min_safe_u64[base - 2]; +} + +static_assert(std::is_same, uint64_t>::value, + "equiv_uint should be uint64_t for double"); +static_assert(std::numeric_limits::is_iec559, + "double must fulfill the requirements of IEC 559 (IEEE 754)"); + +static_assert(std::is_same, uint32_t>::value, + "equiv_uint should be uint32_t for float"); +static_assert(std::numeric_limits::is_iec559, + "float must fulfill the requirements of IEC 559 (IEEE 754)"); + +#ifdef __STDCPP_FLOAT64_T__ +static_assert(std::is_same, uint64_t>::value, + "equiv_uint should be uint64_t for std::float64_t"); +static_assert( + std::numeric_limits::is_iec559, + "std::float64_t must fulfill the requirements of IEC 559 (IEEE 754)"); + +template <> +struct binary_format : public binary_format {}; +#endif // __STDCPP_FLOAT64_T__ + +#ifdef __STDCPP_FLOAT32_T__ +static_assert(std::is_same, uint32_t>::value, + "equiv_uint should be uint32_t for std::float32_t"); +static_assert( + std::numeric_limits::is_iec559, + "std::float32_t must fulfill the requirements of IEC 559 (IEEE 754)"); + +template <> +struct binary_format : public binary_format {}; +#endif // __STDCPP_FLOAT32_T__ + +#ifdef __STDCPP_FLOAT16_T__ +static_assert( + std::is_same::equiv_uint, uint16_t>::value, + "equiv_uint should be uint16_t for std::float16_t"); +static_assert( + std::numeric_limits::is_iec559, + "std::float16_t must fulfill the requirements of IEC 559 (IEEE 754)"); +#endif // __STDCPP_FLOAT16_T__ + +#ifdef __STDCPP_BFLOAT16_T__ +static_assert( + std::is_same::equiv_uint, uint16_t>::value, + "equiv_uint should be uint16_t for std::bfloat16_t"); +static_assert( + std::numeric_limits::is_iec559, + "std::bfloat16_t must fulfill the requirements of IEC 559 (IEEE 754)"); +#endif // __STDCPP_BFLOAT16_T__ + +constexpr chars_format operator~(chars_format rhs) noexcept { + using int_type = std::underlying_type::type; + return static_cast(~static_cast(rhs)); +} + +constexpr chars_format operator&(chars_format lhs, chars_format rhs) noexcept { + using int_type = std::underlying_type::type; + return static_cast(static_cast(lhs) & + static_cast(rhs)); +} + +constexpr chars_format operator|(chars_format lhs, chars_format rhs) noexcept { + using int_type = std::underlying_type::type; + return static_cast(static_cast(lhs) | + static_cast(rhs)); +} + +constexpr chars_format operator^(chars_format lhs, chars_format rhs) noexcept { + using int_type = std::underlying_type::type; + return static_cast(static_cast(lhs) ^ + static_cast(rhs)); +} + +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 chars_format & +operator&=(chars_format &lhs, chars_format rhs) noexcept { + return lhs = (lhs & rhs); +} + +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 chars_format & +operator|=(chars_format &lhs, chars_format rhs) noexcept { + return lhs = (lhs | rhs); +} + +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 chars_format & +operator^=(chars_format &lhs, chars_format rhs) noexcept { + return lhs = (lhs ^ rhs); +} + +namespace detail { +// adjust for deprecated feature macros +constexpr chars_format adjust_for_feature_macros(chars_format fmt) { + return fmt +#ifdef FASTFLOAT_ALLOWS_LEADING_PLUS + | chars_format::allow_leading_plus +#endif +#ifdef FASTFLOAT_SKIP_WHITE_SPACE + | chars_format::skip_white_space +#endif + ; +} +} // namespace detail +} // namespace fast_float + +#endif + + +#ifndef FASTFLOAT_FAST_FLOAT_H +#define FASTFLOAT_FAST_FLOAT_H + + +namespace fast_float { +/** + * This function parses the character sequence [first,last) for a number. It + * parses floating-point numbers expecting a locale-indepent format equivalent + * to what is used by std::strtod in the default ("C") locale. The resulting + * floating-point value is the closest floating-point values (using either float + * or double), using the "round to even" convention for values that would + * otherwise fall right in-between two values. That is, we provide exact parsing + * according to the IEEE standard. + * + * Given a successful parse, the pointer (`ptr`) in the returned value is set to + * point right after the parsed number, and the `value` referenced is set to the + * parsed value. In case of error, the returned `ec` contains a representative + * error, otherwise the default (`std::errc()`) value is stored. + * + * The implementation does not throw and does not allocate memory (e.g., with + * `new` or `malloc`). + * + * Like the C++17 standard, the `fast_float::from_chars` functions take an + * optional last argument of the type `fast_float::chars_format`. It is a bitset + * value: we check whether `fmt & fast_float::chars_format::fixed` and `fmt & + * fast_float::chars_format::scientific` are set to determine whether we allow + * the fixed point and scientific notation respectively. The default is + * `fast_float::chars_format::general` which allows both `fixed` and + * `scientific`. + */ +template ::value)> +FASTFLOAT_CONSTEXPR20 from_chars_result_t +from_chars(UC const *first, UC const *last, T &value, + chars_format fmt = chars_format::general) noexcept; + +/** + * Like from_chars, but accepts an `options` argument to govern number parsing. + * Both for floating-point types and integer types. + */ +template +FASTFLOAT_CONSTEXPR20 from_chars_result_t +from_chars_advanced(UC const *first, UC const *last, T &value, + parse_options_t options) noexcept; + +/** + * This function multiplies an integer number by a power of 10 and returns + * the result as a double precision floating-point value that is correctly + * rounded. The resulting floating-point value is the closest floating-point + * value, using the "round to nearest, tie to even" convention for values that + * would otherwise fall right in-between two values. That is, we provide exact + * conversion according to the IEEE standard. + * + * On overflow infinity is returned, on underflow 0 is returned. + * + * The implementation does not throw and does not allocate memory (e.g., with + * `new` or `malloc`). + */ +FASTFLOAT_CONSTEXPR20 inline double +integer_times_pow10(uint64_t mantissa, int decimal_exponent) noexcept; +FASTFLOAT_CONSTEXPR20 inline double +integer_times_pow10(int64_t mantissa, int decimal_exponent) noexcept; + +/** + * This function is a template overload of `integer_times_pow10()` + * that returns a floating-point value of type `T` that is one of + * supported floating-point types (e.g. `double`, `float`). + */ +template +FASTFLOAT_CONSTEXPR20 + typename std::enable_if::value, T>::type + integer_times_pow10(uint64_t mantissa, int decimal_exponent) noexcept; +template +FASTFLOAT_CONSTEXPR20 + typename std::enable_if::value, T>::type + integer_times_pow10(int64_t mantissa, int decimal_exponent) noexcept; + +/** + * from_chars for integer types. + */ +template ::value)> +FASTFLOAT_CONSTEXPR20 from_chars_result_t +from_chars(UC const *first, UC const *last, T &value, int base = 10) noexcept; + +} // namespace fast_float + +#endif // FASTFLOAT_FAST_FLOAT_H + +#ifndef FASTFLOAT_ASCII_NUMBER_H +#define FASTFLOAT_ASCII_NUMBER_H + +#include +#include +#include +#include +#include +#include + + +#ifdef FASTFLOAT_SSE2 +#include +#endif + +#ifdef FASTFLOAT_NEON +#include +#endif + +namespace fast_float { + +template fastfloat_really_inline constexpr bool has_simd_opt() { +#ifdef FASTFLOAT_HAS_SIMD + return std::is_same::value; +#else + return false; +#endif +} + +// Next function can be micro-optimized, but compilers are entirely +// able to optimize it well. +template +fastfloat_really_inline constexpr bool is_integer(UC c) noexcept { + return (unsigned)(c - UC('0')) <= 9u; +} + +fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) { + return (val & 0xFF00000000000000) >> 56 | (val & 0x00FF000000000000) >> 40 | + (val & 0x0000FF0000000000) >> 24 | (val & 0x000000FF00000000) >> 8 | + (val & 0x00000000FF000000) << 8 | (val & 0x0000000000FF0000) << 24 | + (val & 0x000000000000FF00) << 40 | (val & 0x00000000000000FF) << 56; +} + +fastfloat_really_inline constexpr uint32_t byteswap_32(uint32_t val) { + return (val >> 24) | ((val >> 8) & 0x0000FF00u) | ((val << 8) & 0x00FF0000u) | + (val << 24); +} + +// Read 8 UC into a u64. Truncates UC if not char. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t +read8_to_u64(UC const *chars) { + if (cpp20_and_in_constexpr() || !std::is_same::value) { + uint64_t val = 0; + for (int i = 0; i < 8; ++i) { + val |= uint64_t(uint8_t(*chars)) << (i * 8); + ++chars; + } + return val; + } + uint64_t val; + ::memcpy(&val, chars, sizeof(uint64_t)); +#if FASTFLOAT_IS_BIG_ENDIAN == 1 + // Need to read as-if the number was in little-endian order. + val = byteswap(val); +#endif + return val; +} + +// Read 4 UC into a u32. Truncates UC if not char. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint32_t +read4_to_u32(UC const *chars) { + if (cpp20_and_in_constexpr() || !std::is_same::value) { + uint32_t val = 0; + for (int i = 0; i < 4; ++i) { + val |= uint32_t(uint8_t(*chars)) << (i * 8); + ++chars; + } + return val; + } + uint32_t val; + ::memcpy(&val, chars, sizeof(uint32_t)); +#if FASTFLOAT_IS_BIG_ENDIAN == 1 + val = byteswap_32(val); +#endif + return val; +} +#ifdef FASTFLOAT_SSE2 + +fastfloat_really_inline uint64_t simd_read8_to_u64(__m128i const data) { + FASTFLOAT_SIMD_DISABLE_WARNINGS + __m128i const packed = _mm_packus_epi16(data, data); +#ifdef FASTFLOAT_64BIT + return uint64_t(_mm_cvtsi128_si64(packed)); +#else + uint64_t value; + // Visual Studio + older versions of GCC don't support _mm_storeu_si64 + _mm_storel_epi64(reinterpret_cast<__m128i *>(&value), packed); + return value; +#endif + FASTFLOAT_SIMD_RESTORE_WARNINGS +} + +fastfloat_really_inline uint64_t simd_read8_to_u64(char16_t const *chars) { + FASTFLOAT_SIMD_DISABLE_WARNINGS + return simd_read8_to_u64( + _mm_loadu_si128(reinterpret_cast<__m128i const *>(chars))); + FASTFLOAT_SIMD_RESTORE_WARNINGS +} + +#elif defined(FASTFLOAT_NEON) + +fastfloat_really_inline uint64_t simd_read8_to_u64(uint16x8_t const data) { + FASTFLOAT_SIMD_DISABLE_WARNINGS + uint8x8_t utf8_packed = vmovn_u16(data); + return vget_lane_u64(vreinterpret_u64_u8(utf8_packed), 0); + FASTFLOAT_SIMD_RESTORE_WARNINGS +} + +fastfloat_really_inline uint64_t simd_read8_to_u64(char16_t const *chars) { + FASTFLOAT_SIMD_DISABLE_WARNINGS + return simd_read8_to_u64( + vld1q_u16(reinterpret_cast(chars))); + FASTFLOAT_SIMD_RESTORE_WARNINGS +} + +#endif // FASTFLOAT_SSE2 + +// MSVC SFINAE is broken pre-VS2017 +#if defined(_MSC_VER) && _MSC_VER <= 1900 +template +#else +template ()) = 0> +#endif +// dummy for compile +uint64_t simd_read8_to_u64(UC const *) { + return 0; +} + +// credit @aqrit +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint32_t +parse_eight_digits_unrolled(uint64_t val) { + uint64_t const mask = 0x000000FF000000FF; + uint64_t const mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32) + uint64_t const mul2 = 0x0000271000000001; // 1 + (10000ULL << 32) + val -= 0x3030303030303030; + val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8; + val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; + return uint32_t(val); +} + +// Call this if chars are definitely 8 digits. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint32_t +parse_eight_digits_unrolled(UC const *chars) noexcept { + if (cpp20_and_in_constexpr() || !has_simd_opt()) { + return parse_eight_digits_unrolled(read8_to_u64(chars)); // truncation okay + } + return parse_eight_digits_unrolled(simd_read8_to_u64(chars)); +} + +// credit @aqrit +fastfloat_really_inline constexpr bool +is_made_of_eight_digits_fast(uint64_t val) noexcept { + return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) & + 0x8080808080808080)); +} + +fastfloat_really_inline constexpr bool +is_made_of_four_digits_fast(uint32_t val) noexcept { + return !((((val + 0x46464646) | (val - 0x30303030)) & 0x80808080)); +} + +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint32_t +parse_four_digits_unrolled(uint32_t val) noexcept { + val -= 0x30303030; + val = (val * 10) + (val >> 8); + return (((val & 0x00FF00FF) * 0x00640001) >> 16) & 0xFFFF; +} + +#ifdef FASTFLOAT_HAS_SIMD + +// Call this if chars might not be 8 digits. +// Using this style (instead of is_made_of_eight_digits_fast() then +// parse_eight_digits_unrolled()) ensures we don't load SIMD registers twice. +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool +simd_parse_if_eight_digits_unrolled(char16_t const *chars, + uint64_t &i) noexcept { + if (cpp20_and_in_constexpr()) { + return false; + } +#ifdef FASTFLOAT_SSE2 + FASTFLOAT_SIMD_DISABLE_WARNINGS + __m128i const data = + _mm_loadu_si128(reinterpret_cast<__m128i const *>(chars)); + + // (x - '0') <= 9 + // http://0x80.pl/articles/simd-parsing-int-sequences.html + __m128i const t0 = _mm_add_epi16(data, _mm_set1_epi16(32720)); + __m128i const t1 = _mm_cmpgt_epi16(t0, _mm_set1_epi16(-32759)); + + if (_mm_movemask_epi8(t1) == 0) { + i = i * 100000000 + parse_eight_digits_unrolled(simd_read8_to_u64(data)); + return true; + } else + return false; + FASTFLOAT_SIMD_RESTORE_WARNINGS +#elif defined(FASTFLOAT_NEON) + FASTFLOAT_SIMD_DISABLE_WARNINGS + uint16x8_t const data = vld1q_u16(reinterpret_cast(chars)); + + // (x - '0') <= 9 + // http://0x80.pl/articles/simd-parsing-int-sequences.html + uint16x8_t const t0 = vsubq_u16(data, vmovq_n_u16('0')); + uint16x8_t const mask = vcltq_u16(t0, vmovq_n_u16('9' - '0' + 1)); + + if (vminvq_u16(mask) == 0xFFFF) { + i = i * 100000000 + parse_eight_digits_unrolled(simd_read8_to_u64(data)); + return true; + } else + return false; + FASTFLOAT_SIMD_RESTORE_WARNINGS +#else + (void)chars; + (void)i; + return false; +#endif // FASTFLOAT_SSE2 +} + +#endif // FASTFLOAT_HAS_SIMD + +// MSVC SFINAE is broken pre-VS2017 +#if defined(_MSC_VER) && _MSC_VER <= 1900 +template +#else +template ()) = 0> +#endif +// dummy for compile +bool simd_parse_if_eight_digits_unrolled(UC const *, uint64_t &) { + return 0; +} + +template ::value) = 0> +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void +loop_parse_if_eight_digits(UC const *&p, UC const *const pend, uint64_t &i) { + if (!has_simd_opt()) { + return; + } + while ((std::distance(p, pend) >= 8) && + simd_parse_if_eight_digits_unrolled( + p, i)) { // in rare cases, this will overflow, but that's ok + p += 8; + } +} + +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void +loop_parse_if_eight_digits(char const *&p, char const *const pend, + uint64_t &i) { + // optimizes better than parse_if_eight_digits_unrolled() for UC = char. + while ((std::distance(p, pend) >= 8) && + is_made_of_eight_digits_fast(read8_to_u64(p))) { + i = i * 100000000 + + parse_eight_digits_unrolled(read8_to_u64( + p)); // in rare cases, this will overflow, but that's ok + p += 8; + } +} + +enum class parse_error { + no_error, + // [JSON-only] The minus sign must be followed by an integer. + missing_integer_after_sign, + // A sign must be followed by an integer or dot. + missing_integer_or_dot_after_sign, + // [JSON-only] The integer part must not have leading zeros. + leading_zeros_in_integer_part, + // [JSON-only] The integer part must have at least one digit. + no_digits_in_integer_part, + // [JSON-only] If there is a decimal point, there must be digits in the + // fractional part. + no_digits_in_fractional_part, + // The mantissa must have at least one digit. + no_digits_in_mantissa, + // Scientific notation requires an exponential part. + missing_exponential_part, +}; + +template struct parsed_number_string_t { + int64_t exponent{0}; + uint64_t mantissa{0}; + UC const *lastmatch{nullptr}; + bool negative{false}; + bool valid{false}; + bool too_many_digits{false}; + // contains the range of the significant digits + span integer{}; // non-nullable + span fraction{}; // nullable + parse_error error{parse_error::no_error}; +}; + +using byte_span = span; +using parsed_number_string = parsed_number_string_t; + +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 parsed_number_string_t +report_parse_error(UC const *p, parse_error error) { + parsed_number_string_t answer; + answer.valid = false; + answer.lastmatch = p; + answer.error = error; + return answer; +} + +// Assuming that you use no more than 19 digits, this will +// parse an ASCII string. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 parsed_number_string_t +parse_number_string(UC const *p, UC const *pend, + parse_options_t options) noexcept { + chars_format const fmt = detail::adjust_for_feature_macros(options.format); + UC const decimal_point = options.decimal_point; + + parsed_number_string_t answer; + answer.valid = false; + answer.too_many_digits = false; + // assume p < pend, so dereference without checks; + answer.negative = (*p == UC('-')); + // C++17 20.19.3.(7.1) explicitly forbids '+' sign here + if ((*p == UC('-')) || (uint64_t(fmt & chars_format::allow_leading_plus) && + !basic_json_fmt && *p == UC('+'))) { + ++p; + if (p == pend) { + return report_parse_error( + p, parse_error::missing_integer_or_dot_after_sign); + } + FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) { + if (!is_integer(*p)) { // a sign must be followed by an integer + return report_parse_error(p, + parse_error::missing_integer_after_sign); + } + } + else { + if (!is_integer(*p) && + (*p != + decimal_point)) { // a sign must be followed by an integer or the dot + return report_parse_error( + p, parse_error::missing_integer_or_dot_after_sign); + } + } + } + UC const *const start_digits = p; + + uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad) + + while ((p != pend) && is_integer(*p)) { + // a multiplication by 10 is cheaper than an arbitrary integer + // multiplication + i = 10 * i + + uint64_t(*p - + UC('0')); // might overflow, we will handle the overflow later + ++p; + } + UC const *const end_of_integer_part = p; + int64_t digit_count = int64_t(end_of_integer_part - start_digits); + answer.integer = span(start_digits, size_t(digit_count)); + FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) { + // at least 1 digit in integer part, without leading zeros + if (digit_count == 0) { + return report_parse_error(p, parse_error::no_digits_in_integer_part); + } + if ((start_digits[0] == UC('0') && digit_count > 1)) { + return report_parse_error(start_digits, + parse_error::leading_zeros_in_integer_part); + } + } + + int64_t exponent = 0; + bool const has_decimal_point = (p != pend) && (*p == decimal_point); + if (has_decimal_point) { + ++p; + UC const *before = p; + // can occur at most twice without overflowing, but let it occur more, since + // for integers with many digits, digit parsing is the primary bottleneck. + loop_parse_if_eight_digits(p, pend, i); + + while ((p != pend) && is_integer(*p)) { + uint8_t digit = uint8_t(*p - UC('0')); + ++p; + i = i * 10 + digit; // in rare cases, this will overflow, but that's ok + } + exponent = before - p; + answer.fraction = span(before, size_t(p - before)); + digit_count -= exponent; + } + FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) { + // at least 1 digit in fractional part + if (has_decimal_point && exponent == 0) { + return report_parse_error(p, + parse_error::no_digits_in_fractional_part); + } + } + else if (digit_count == 0) { // we must have encountered at least one integer! + return report_parse_error(p, parse_error::no_digits_in_mantissa); + } + int64_t exp_number = 0; // explicit exponential part + if ((uint64_t(fmt & chars_format::scientific) && (p != pend) && + ((UC('e') == *p) || (UC('E') == *p))) || + (uint64_t(fmt & detail::basic_fortran_fmt) && (p != pend) && + ((UC('+') == *p) || (UC('-') == *p) || (UC('d') == *p) || + (UC('D') == *p)))) { + UC const *location_of_e = p; + if ((UC('e') == *p) || (UC('E') == *p) || (UC('d') == *p) || + (UC('D') == *p)) { + ++p; + } + bool neg_exp = false; + if ((p != pend) && (UC('-') == *p)) { + neg_exp = true; + ++p; + } else if ((p != pend) && + (UC('+') == + *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1) + ++p; + } + if ((p == pend) || !is_integer(*p)) { + if (!uint64_t(fmt & chars_format::fixed)) { + // The exponential part is invalid for scientific notation, so it must + // be a trailing token for fixed notation. However, fixed notation is + // disabled, so report a scientific notation error. + return report_parse_error(p, parse_error::missing_exponential_part); + } + // Otherwise, we will be ignoring the 'e'. + p = location_of_e; + } else { + while ((p != pend) && is_integer(*p)) { + uint8_t digit = uint8_t(*p - UC('0')); + if (exp_number < 0x10000000) { + exp_number = 10 * exp_number + digit; + } + ++p; + } + if (neg_exp) { + exp_number = -exp_number; + } + exponent += exp_number; + } + } else { + // If it scientific and not fixed, we have to bail out. + if (uint64_t(fmt & chars_format::scientific) && + !uint64_t(fmt & chars_format::fixed)) { + return report_parse_error(p, parse_error::missing_exponential_part); + } + } + answer.lastmatch = p; + answer.valid = true; + + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon. + // + // We can deal with up to 19 digits. + if (digit_count > 19) { // this is uncommon + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + // We need to be mindful of the case where we only have zeroes... + // E.g., 0.000000000...000. + UC const *start = start_digits; + while ((start != pend) && (*start == UC('0') || *start == decimal_point)) { + if (*start == UC('0')) { + digit_count--; + } + start++; + } + + if (digit_count > 19) { + answer.too_many_digits = true; + // Let us start again, this time, avoiding overflows. + // We don't need to call if is_integer, since we use the + // pre-tokenized spans from above. + i = 0; + p = answer.integer.ptr; + UC const *int_end = p + answer.integer.len(); + uint64_t const minimal_nineteen_digit_integer{1000000000000000000}; + while ((i < minimal_nineteen_digit_integer) && (p != int_end)) { + i = i * 10 + uint64_t(*p - UC('0')); + ++p; + } + if (i >= minimal_nineteen_digit_integer) { // We have a big integer + exponent = end_of_integer_part - p + exp_number; + } else { // We have a value with a fractional component. + p = answer.fraction.ptr; + UC const *frac_end = p + answer.fraction.len(); + while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) { + i = i * 10 + uint64_t(*p - UC('0')); + ++p; + } + exponent = answer.fraction.ptr - p + exp_number; + } + // We have now corrected both exponent and i, to a truncated value + } + } + answer.exponent = exponent; + answer.mantissa = i; + return answer; +} + +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 from_chars_result_t +parse_int_string(UC const *p, UC const *pend, T &value, + parse_options_t options) { + chars_format const fmt = detail::adjust_for_feature_macros(options.format); + int const base = options.base; + + from_chars_result_t answer; + + UC const *const first = p; + + bool const negative = (*p == UC('-')); +#ifdef FASTFLOAT_VISUAL_STUDIO +#pragma warning(push) +#pragma warning(disable : 4127) +#endif + if (!std::is_signed::value && negative) { +#ifdef FASTFLOAT_VISUAL_STUDIO +#pragma warning(pop) +#endif + answer.ec = std::errc::invalid_argument; + answer.ptr = first; + return answer; + } + if ((*p == UC('-')) || + (uint64_t(fmt & chars_format::allow_leading_plus) && (*p == UC('+')))) { + ++p; + } + + UC const *const start_num = p; + + while (p != pend && *p == UC('0')) { + ++p; + } + + bool const has_leading_zeros = p > start_num; + + UC const *const start_digits = p; + + FASTFLOAT_IF_CONSTEXPR17((std::is_same::value)) { + if (base == 10) { + const size_t len = (size_t)(pend - p); + if (len == 0) { + if (has_leading_zeros) { + value = 0; + answer.ec = std::errc(); + answer.ptr = p; + } else { + answer.ec = std::errc::invalid_argument; + answer.ptr = first; + } + return answer; + } + + uint32_t digits; + +#if FASTFLOAT_HAS_IS_CONSTANT_EVALUATED && FASTFLOAT_HAS_BIT_CAST + if (std::is_constant_evaluated()) { + uint8_t str[4]{}; + for (size_t j = 0; j < 4 && j < len; ++j) { + str[j] = static_cast(p[j]); + } + digits = std::bit_cast(str); +#if FASTFLOAT_IS_BIG_ENDIAN + digits = byteswap_32(digits); +#endif + } +#else + if (false) { + } +#endif + else if (len >= 4) { + ::memcpy(&digits, p, 4); +#if FASTFLOAT_IS_BIG_ENDIAN + digits = byteswap_32(digits); +#endif + } else { + uint32_t b0 = static_cast(p[0]); + uint32_t b1 = (len > 1) ? static_cast(p[1]) : 0xFFu; + uint32_t b2 = (len > 2) ? static_cast(p[2]) : 0xFFu; + uint32_t b3 = 0xFFu; + digits = b0 | (b1 << 8) | (b2 << 16) | (b3 << 24); + } + + uint32_t magic = + ((digits + 0x46464646u) | (digits - 0x30303030u)) & 0x80808080u; + uint32_t tz = (uint32_t)countr_zero_32(magic); // 7, 15, 23, 31, or 32 + uint32_t nd = (tz == 32) ? 4 : (tz >> 3); + nd = (uint32_t)std::min((size_t)nd, len); + if (nd == 0) { + if (has_leading_zeros) { + value = 0; + answer.ec = std::errc(); + answer.ptr = p; + return answer; + } + answer.ec = std::errc::invalid_argument; + answer.ptr = first; + return answer; + } + if (nd > 3) { + const UC *q = p + nd; + size_t rem = len - nd; + while (rem) { + if (*q < UC('0') || *q > UC('9')) + break; + ++q; + --rem; + } + answer.ec = std::errc::result_out_of_range; + answer.ptr = q; + return answer; + } + + digits ^= 0x30303030u; + digits <<= ((4 - nd) * 8); + + uint32_t check = ((digits >> 24) & 0xff) | ((digits >> 8) & 0xff00) | + ((digits << 8) & 0xff0000); + if (check > 0x00020505) { + answer.ec = std::errc::result_out_of_range; + answer.ptr = p + nd; + return answer; + } + value = (uint8_t)((0x640a01 * digits) >> 24); + answer.ec = std::errc(); + answer.ptr = p + nd; + return answer; + } + } + + FASTFLOAT_IF_CONSTEXPR17((std::is_same::value)) { + if (base == 10) { + const size_t len = size_t(pend - p); + if (len == 0) { + if (has_leading_zeros) { + value = 0; + answer.ec = std::errc(); + answer.ptr = p; + } else { + answer.ec = std::errc::invalid_argument; + answer.ptr = first; + } + return answer; + } + + if (len >= 4) { + uint32_t digits = read4_to_u32(p); + if (is_made_of_four_digits_fast(digits)) { + uint32_t v = parse_four_digits_unrolled(digits); + if (len >= 5 && is_integer(p[4])) { + v = v * 10 + uint32_t(p[4] - '0'); + if (len >= 6 && is_integer(p[5])) { + answer.ec = std::errc::result_out_of_range; + const UC *q = p + 5; + while (q != pend && is_integer(*q)) { + q++; + } + answer.ptr = q; + return answer; + } + if (v > 65535) { + answer.ec = std::errc::result_out_of_range; + answer.ptr = p + 5; + return answer; + } + value = uint16_t(v); + answer.ec = std::errc(); + answer.ptr = p + 5; + return answer; + } + // 4 digits + value = uint16_t(v); + answer.ec = std::errc(); + answer.ptr = p + 4; + return answer; + } + } + } + } + + uint64_t i = 0; + if (base == 10) { + loop_parse_if_eight_digits(p, pend, i); // use SIMD if possible + } + while (p != pend) { + uint8_t digit = ch_to_digit(*p); + if (digit >= base) { + break; + } + i = uint64_t(base) * i + digit; // might overflow, check this later + p++; + } + + size_t digit_count = size_t(p - start_digits); + + if (digit_count == 0) { + if (has_leading_zeros) { + value = 0; + answer.ec = std::errc(); + answer.ptr = p; + } else { + answer.ec = std::errc::invalid_argument; + answer.ptr = first; + } + return answer; + } + + answer.ptr = p; + + // check u64 overflow + size_t max_digits = max_digits_u64(base); + if (digit_count > max_digits) { + answer.ec = std::errc::result_out_of_range; + return answer; + } + // this check can be eliminated for all other types, but they will all require + // a max_digits(base) equivalent + if (digit_count == max_digits && i < min_safe_u64(base)) { + answer.ec = std::errc::result_out_of_range; + return answer; + } + + // check other types overflow + if (!std::is_same::value) { + if (i > uint64_t(std::numeric_limits::max()) + uint64_t(negative)) { + answer.ec = std::errc::result_out_of_range; + return answer; + } + } + + if (negative) { +#ifdef FASTFLOAT_VISUAL_STUDIO +#pragma warning(push) +#pragma warning(disable : 4146) +#endif + // this weird workaround is required because: + // - converting unsigned to signed when its value is greater than signed max + // is UB pre-C++23. + // - reinterpret_casting (~i + 1) would work, but it is not constexpr + // this is always optimized into a neg instruction (note: T is an integer + // type) + value = T(-std::numeric_limits::max() - + T(i - uint64_t(std::numeric_limits::max()))); +#ifdef FASTFLOAT_VISUAL_STUDIO +#pragma warning(pop) +#endif + } else { + value = T(i); + } + + answer.ec = std::errc(); + return answer; +} + +} // namespace fast_float + +#endif + +#ifndef FASTFLOAT_FAST_TABLE_H +#define FASTFLOAT_FAST_TABLE_H + +#include + +namespace fast_float { + +/** + * When mapping numbers from decimal to binary, + * we go from w * 10^q to m * 2^p but we have + * 10^q = 5^q * 2^q, so effectively + * we are trying to match + * w * 2^q * 5^q to m * 2^p. Thus the powers of two + * are not a concern since they can be represented + * exactly using the binary notation, only the powers of five + * affect the binary significand. + */ + +/** + * The smallest non-zero float (binary64) is 2^-1074. + * We take as input numbers of the form w x 10^q where w < 2^64. + * We have that w * 10^-343 < 2^(64-344) 5^-343 < 2^-1076. + * However, we have that + * (2^64-1) * 10^-342 = (2^64-1) * 2^-342 * 5^-342 > 2^-1074. + * Thus it is possible for a number of the form w * 10^-342 where + * w is a 64-bit value to be a non-zero floating-point number. + ********* + * Any number of form w * 10^309 where w>= 1 is going to be + * infinite in binary64 so we never need to worry about powers + * of 5 greater than 308. + */ +template struct powers_template { + + constexpr static int smallest_power_of_five = + binary_format::smallest_power_of_ten(); + constexpr static int largest_power_of_five = + binary_format::largest_power_of_ten(); + constexpr static int number_of_entries = + 2 * (largest_power_of_five - smallest_power_of_five + 1); + // Powers of five from 5^-342 all the way to 5^308 rounded toward one. + constexpr static uint64_t power_of_five_128[number_of_entries] = { + 0xeef453d6923bd65a, 0x113faa2906a13b3f, + 0x9558b4661b6565f8, 0x4ac7ca59a424c507, + 0xbaaee17fa23ebf76, 0x5d79bcf00d2df649, + 0xe95a99df8ace6f53, 0xf4d82c2c107973dc, + 0x91d8a02bb6c10594, 0x79071b9b8a4be869, + 0xb64ec836a47146f9, 0x9748e2826cdee284, + 0xe3e27a444d8d98b7, 0xfd1b1b2308169b25, + 0x8e6d8c6ab0787f72, 0xfe30f0f5e50e20f7, + 0xb208ef855c969f4f, 0xbdbd2d335e51a935, + 0xde8b2b66b3bc4723, 0xad2c788035e61382, + 0x8b16fb203055ac76, 0x4c3bcb5021afcc31, + 0xaddcb9e83c6b1793, 0xdf4abe242a1bbf3d, + 0xd953e8624b85dd78, 0xd71d6dad34a2af0d, + 0x87d4713d6f33aa6b, 0x8672648c40e5ad68, + 0xa9c98d8ccb009506, 0x680efdaf511f18c2, + 0xd43bf0effdc0ba48, 0x212bd1b2566def2, + 0x84a57695fe98746d, 0x14bb630f7604b57, + 0xa5ced43b7e3e9188, 0x419ea3bd35385e2d, + 0xcf42894a5dce35ea, 0x52064cac828675b9, + 0x818995ce7aa0e1b2, 0x7343efebd1940993, + 0xa1ebfb4219491a1f, 0x1014ebe6c5f90bf8, + 0xca66fa129f9b60a6, 0xd41a26e077774ef6, + 0xfd00b897478238d0, 0x8920b098955522b4, + 0x9e20735e8cb16382, 0x55b46e5f5d5535b0, + 0xc5a890362fddbc62, 0xeb2189f734aa831d, + 0xf712b443bbd52b7b, 0xa5e9ec7501d523e4, + 0x9a6bb0aa55653b2d, 0x47b233c92125366e, + 0xc1069cd4eabe89f8, 0x999ec0bb696e840a, + 0xf148440a256e2c76, 0xc00670ea43ca250d, + 0x96cd2a865764dbca, 0x380406926a5e5728, + 0xbc807527ed3e12bc, 0xc605083704f5ecf2, + 0xeba09271e88d976b, 0xf7864a44c633682e, + 0x93445b8731587ea3, 0x7ab3ee6afbe0211d, + 0xb8157268fdae9e4c, 0x5960ea05bad82964, + 0xe61acf033d1a45df, 0x6fb92487298e33bd, + 0x8fd0c16206306bab, 0xa5d3b6d479f8e056, + 0xb3c4f1ba87bc8696, 0x8f48a4899877186c, + 0xe0b62e2929aba83c, 0x331acdabfe94de87, + 0x8c71dcd9ba0b4925, 0x9ff0c08b7f1d0b14, + 0xaf8e5410288e1b6f, 0x7ecf0ae5ee44dd9, + 0xdb71e91432b1a24a, 0xc9e82cd9f69d6150, + 0x892731ac9faf056e, 0xbe311c083a225cd2, + 0xab70fe17c79ac6ca, 0x6dbd630a48aaf406, + 0xd64d3d9db981787d, 0x92cbbccdad5b108, + 0x85f0468293f0eb4e, 0x25bbf56008c58ea5, + 0xa76c582338ed2621, 0xaf2af2b80af6f24e, + 0xd1476e2c07286faa, 0x1af5af660db4aee1, + 0x82cca4db847945ca, 0x50d98d9fc890ed4d, + 0xa37fce126597973c, 0xe50ff107bab528a0, + 0xcc5fc196fefd7d0c, 0x1e53ed49a96272c8, + 0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7a, + 0x9faacf3df73609b1, 0x77b191618c54e9ac, + 0xc795830d75038c1d, 0xd59df5b9ef6a2417, + 0xf97ae3d0d2446f25, 0x4b0573286b44ad1d, + 0x9becce62836ac577, 0x4ee367f9430aec32, + 0xc2e801fb244576d5, 0x229c41f793cda73f, + 0xf3a20279ed56d48a, 0x6b43527578c1110f, + 0x9845418c345644d6, 0x830a13896b78aaa9, + 0xbe5691ef416bd60c, 0x23cc986bc656d553, + 0xedec366b11c6cb8f, 0x2cbfbe86b7ec8aa8, + 0x94b3a202eb1c3f39, 0x7bf7d71432f3d6a9, + 0xb9e08a83a5e34f07, 0xdaf5ccd93fb0cc53, + 0xe858ad248f5c22c9, 0xd1b3400f8f9cff68, + 0x91376c36d99995be, 0x23100809b9c21fa1, + 0xb58547448ffffb2d, 0xabd40a0c2832a78a, + 0xe2e69915b3fff9f9, 0x16c90c8f323f516c, + 0x8dd01fad907ffc3b, 0xae3da7d97f6792e3, + 0xb1442798f49ffb4a, 0x99cd11cfdf41779c, + 0xdd95317f31c7fa1d, 0x40405643d711d583, + 0x8a7d3eef7f1cfc52, 0x482835ea666b2572, + 0xad1c8eab5ee43b66, 0xda3243650005eecf, + 0xd863b256369d4a40, 0x90bed43e40076a82, + 0x873e4f75e2224e68, 0x5a7744a6e804a291, + 0xa90de3535aaae202, 0x711515d0a205cb36, + 0xd3515c2831559a83, 0xd5a5b44ca873e03, + 0x8412d9991ed58091, 0xe858790afe9486c2, + 0xa5178fff668ae0b6, 0x626e974dbe39a872, + 0xce5d73ff402d98e3, 0xfb0a3d212dc8128f, + 0x80fa687f881c7f8e, 0x7ce66634bc9d0b99, + 0xa139029f6a239f72, 0x1c1fffc1ebc44e80, + 0xc987434744ac874e, 0xa327ffb266b56220, + 0xfbe9141915d7a922, 0x4bf1ff9f0062baa8, + 0x9d71ac8fada6c9b5, 0x6f773fc3603db4a9, + 0xc4ce17b399107c22, 0xcb550fb4384d21d3, + 0xf6019da07f549b2b, 0x7e2a53a146606a48, + 0x99c102844f94e0fb, 0x2eda7444cbfc426d, + 0xc0314325637a1939, 0xfa911155fefb5308, + 0xf03d93eebc589f88, 0x793555ab7eba27ca, + 0x96267c7535b763b5, 0x4bc1558b2f3458de, + 0xbbb01b9283253ca2, 0x9eb1aaedfb016f16, + 0xea9c227723ee8bcb, 0x465e15a979c1cadc, + 0x92a1958a7675175f, 0xbfacd89ec191ec9, + 0xb749faed14125d36, 0xcef980ec671f667b, + 0xe51c79a85916f484, 0x82b7e12780e7401a, + 0x8f31cc0937ae58d2, 0xd1b2ecb8b0908810, + 0xb2fe3f0b8599ef07, 0x861fa7e6dcb4aa15, + 0xdfbdcece67006ac9, 0x67a791e093e1d49a, + 0x8bd6a141006042bd, 0xe0c8bb2c5c6d24e0, + 0xaecc49914078536d, 0x58fae9f773886e18, + 0xda7f5bf590966848, 0xaf39a475506a899e, + 0x888f99797a5e012d, 0x6d8406c952429603, + 0xaab37fd7d8f58178, 0xc8e5087ba6d33b83, + 0xd5605fcdcf32e1d6, 0xfb1e4a9a90880a64, + 0x855c3be0a17fcd26, 0x5cf2eea09a55067f, + 0xa6b34ad8c9dfc06f, 0xf42faa48c0ea481e, + 0xd0601d8efc57b08b, 0xf13b94daf124da26, + 0x823c12795db6ce57, 0x76c53d08d6b70858, + 0xa2cb1717b52481ed, 0x54768c4b0c64ca6e, + 0xcb7ddcdda26da268, 0xa9942f5dcf7dfd09, + 0xfe5d54150b090b02, 0xd3f93b35435d7c4c, + 0x9efa548d26e5a6e1, 0xc47bc5014a1a6daf, + 0xc6b8e9b0709f109a, 0x359ab6419ca1091b, + 0xf867241c8cc6d4c0, 0xc30163d203c94b62, + 0x9b407691d7fc44f8, 0x79e0de63425dcf1d, + 0xc21094364dfb5636, 0x985915fc12f542e4, + 0xf294b943e17a2bc4, 0x3e6f5b7b17b2939d, + 0x979cf3ca6cec5b5a, 0xa705992ceecf9c42, + 0xbd8430bd08277231, 0x50c6ff782a838353, + 0xece53cec4a314ebd, 0xa4f8bf5635246428, + 0x940f4613ae5ed136, 0x871b7795e136be99, + 0xb913179899f68584, 0x28e2557b59846e3f, + 0xe757dd7ec07426e5, 0x331aeada2fe589cf, + 0x9096ea6f3848984f, 0x3ff0d2c85def7621, + 0xb4bca50b065abe63, 0xfed077a756b53a9, + 0xe1ebce4dc7f16dfb, 0xd3e8495912c62894, + 0x8d3360f09cf6e4bd, 0x64712dd7abbbd95c, + 0xb080392cc4349dec, 0xbd8d794d96aacfb3, + 0xdca04777f541c567, 0xecf0d7a0fc5583a0, + 0x89e42caaf9491b60, 0xf41686c49db57244, + 0xac5d37d5b79b6239, 0x311c2875c522ced5, + 0xd77485cb25823ac7, 0x7d633293366b828b, + 0x86a8d39ef77164bc, 0xae5dff9c02033197, + 0xa8530886b54dbdeb, 0xd9f57f830283fdfc, + 0xd267caa862a12d66, 0xd072df63c324fd7b, + 0x8380dea93da4bc60, 0x4247cb9e59f71e6d, + 0xa46116538d0deb78, 0x52d9be85f074e608, + 0xcd795be870516656, 0x67902e276c921f8b, + 0x806bd9714632dff6, 0xba1cd8a3db53b6, + 0xa086cfcd97bf97f3, 0x80e8a40eccd228a4, + 0xc8a883c0fdaf7df0, 0x6122cd128006b2cd, + 0xfad2a4b13d1b5d6c, 0x796b805720085f81, + 0x9cc3a6eec6311a63, 0xcbe3303674053bb0, + 0xc3f490aa77bd60fc, 0xbedbfc4411068a9c, + 0xf4f1b4d515acb93b, 0xee92fb5515482d44, + 0x991711052d8bf3c5, 0x751bdd152d4d1c4a, + 0xbf5cd54678eef0b6, 0xd262d45a78a0635d, + 0xef340a98172aace4, 0x86fb897116c87c34, + 0x9580869f0e7aac0e, 0xd45d35e6ae3d4da0, + 0xbae0a846d2195712, 0x8974836059cca109, + 0xe998d258869facd7, 0x2bd1a438703fc94b, + 0x91ff83775423cc06, 0x7b6306a34627ddcf, + 0xb67f6455292cbf08, 0x1a3bc84c17b1d542, + 0xe41f3d6a7377eeca, 0x20caba5f1d9e4a93, + 0x8e938662882af53e, 0x547eb47b7282ee9c, + 0xb23867fb2a35b28d, 0xe99e619a4f23aa43, + 0xdec681f9f4c31f31, 0x6405fa00e2ec94d4, + 0x8b3c113c38f9f37e, 0xde83bc408dd3dd04, + 0xae0b158b4738705e, 0x9624ab50b148d445, + 0xd98ddaee19068c76, 0x3badd624dd9b0957, + 0x87f8a8d4cfa417c9, 0xe54ca5d70a80e5d6, + 0xa9f6d30a038d1dbc, 0x5e9fcf4ccd211f4c, + 0xd47487cc8470652b, 0x7647c3200069671f, + 0x84c8d4dfd2c63f3b, 0x29ecd9f40041e073, + 0xa5fb0a17c777cf09, 0xf468107100525890, + 0xcf79cc9db955c2cc, 0x7182148d4066eeb4, + 0x81ac1fe293d599bf, 0xc6f14cd848405530, + 0xa21727db38cb002f, 0xb8ada00e5a506a7c, + 0xca9cf1d206fdc03b, 0xa6d90811f0e4851c, + 0xfd442e4688bd304a, 0x908f4a166d1da663, + 0x9e4a9cec15763e2e, 0x9a598e4e043287fe, + 0xc5dd44271ad3cdba, 0x40eff1e1853f29fd, + 0xf7549530e188c128, 0xd12bee59e68ef47c, + 0x9a94dd3e8cf578b9, 0x82bb74f8301958ce, + 0xc13a148e3032d6e7, 0xe36a52363c1faf01, + 0xf18899b1bc3f8ca1, 0xdc44e6c3cb279ac1, + 0x96f5600f15a7b7e5, 0x29ab103a5ef8c0b9, + 0xbcb2b812db11a5de, 0x7415d448f6b6f0e7, + 0xebdf661791d60f56, 0x111b495b3464ad21, + 0x936b9fcebb25c995, 0xcab10dd900beec34, + 0xb84687c269ef3bfb, 0x3d5d514f40eea742, + 0xe65829b3046b0afa, 0xcb4a5a3112a5112, + 0x8ff71a0fe2c2e6dc, 0x47f0e785eaba72ab, + 0xb3f4e093db73a093, 0x59ed216765690f56, + 0xe0f218b8d25088b8, 0x306869c13ec3532c, + 0x8c974f7383725573, 0x1e414218c73a13fb, + 0xafbd2350644eeacf, 0xe5d1929ef90898fa, + 0xdbac6c247d62a583, 0xdf45f746b74abf39, + 0x894bc396ce5da772, 0x6b8bba8c328eb783, + 0xab9eb47c81f5114f, 0x66ea92f3f326564, + 0xd686619ba27255a2, 0xc80a537b0efefebd, + 0x8613fd0145877585, 0xbd06742ce95f5f36, + 0xa798fc4196e952e7, 0x2c48113823b73704, + 0xd17f3b51fca3a7a0, 0xf75a15862ca504c5, + 0x82ef85133de648c4, 0x9a984d73dbe722fb, + 0xa3ab66580d5fdaf5, 0xc13e60d0d2e0ebba, + 0xcc963fee10b7d1b3, 0x318df905079926a8, + 0xffbbcfe994e5c61f, 0xfdf17746497f7052, + 0x9fd561f1fd0f9bd3, 0xfeb6ea8bedefa633, + 0xc7caba6e7c5382c8, 0xfe64a52ee96b8fc0, + 0xf9bd690a1b68637b, 0x3dfdce7aa3c673b0, + 0x9c1661a651213e2d, 0x6bea10ca65c084e, + 0xc31bfa0fe5698db8, 0x486e494fcff30a62, + 0xf3e2f893dec3f126, 0x5a89dba3c3efccfa, + 0x986ddb5c6b3a76b7, 0xf89629465a75e01c, + 0xbe89523386091465, 0xf6bbb397f1135823, + 0xee2ba6c0678b597f, 0x746aa07ded582e2c, + 0x94db483840b717ef, 0xa8c2a44eb4571cdc, + 0xba121a4650e4ddeb, 0x92f34d62616ce413, + 0xe896a0d7e51e1566, 0x77b020baf9c81d17, + 0x915e2486ef32cd60, 0xace1474dc1d122e, + 0xb5b5ada8aaff80b8, 0xd819992132456ba, + 0xe3231912d5bf60e6, 0x10e1fff697ed6c69, + 0x8df5efabc5979c8f, 0xca8d3ffa1ef463c1, + 0xb1736b96b6fd83b3, 0xbd308ff8a6b17cb2, + 0xddd0467c64bce4a0, 0xac7cb3f6d05ddbde, + 0x8aa22c0dbef60ee4, 0x6bcdf07a423aa96b, + 0xad4ab7112eb3929d, 0x86c16c98d2c953c6, + 0xd89d64d57a607744, 0xe871c7bf077ba8b7, + 0x87625f056c7c4a8b, 0x11471cd764ad4972, + 0xa93af6c6c79b5d2d, 0xd598e40d3dd89bcf, + 0xd389b47879823479, 0x4aff1d108d4ec2c3, + 0x843610cb4bf160cb, 0xcedf722a585139ba, + 0xa54394fe1eedb8fe, 0xc2974eb4ee658828, + 0xce947a3da6a9273e, 0x733d226229feea32, + 0x811ccc668829b887, 0x806357d5a3f525f, + 0xa163ff802a3426a8, 0xca07c2dcb0cf26f7, + 0xc9bcff6034c13052, 0xfc89b393dd02f0b5, + 0xfc2c3f3841f17c67, 0xbbac2078d443ace2, + 0x9d9ba7832936edc0, 0xd54b944b84aa4c0d, + 0xc5029163f384a931, 0xa9e795e65d4df11, + 0xf64335bcf065d37d, 0x4d4617b5ff4a16d5, + 0x99ea0196163fa42e, 0x504bced1bf8e4e45, + 0xc06481fb9bcf8d39, 0xe45ec2862f71e1d6, + 0xf07da27a82c37088, 0x5d767327bb4e5a4c, + 0x964e858c91ba2655, 0x3a6a07f8d510f86f, + 0xbbe226efb628afea, 0x890489f70a55368b, + 0xeadab0aba3b2dbe5, 0x2b45ac74ccea842e, + 0x92c8ae6b464fc96f, 0x3b0b8bc90012929d, + 0xb77ada0617e3bbcb, 0x9ce6ebb40173744, + 0xe55990879ddcaabd, 0xcc420a6a101d0515, + 0x8f57fa54c2a9eab6, 0x9fa946824a12232d, + 0xb32df8e9f3546564, 0x47939822dc96abf9, + 0xdff9772470297ebd, 0x59787e2b93bc56f7, + 0x8bfbea76c619ef36, 0x57eb4edb3c55b65a, + 0xaefae51477a06b03, 0xede622920b6b23f1, + 0xdab99e59958885c4, 0xe95fab368e45eced, + 0x88b402f7fd75539b, 0x11dbcb0218ebb414, + 0xaae103b5fcd2a881, 0xd652bdc29f26a119, + 0xd59944a37c0752a2, 0x4be76d3346f0495f, + 0x857fcae62d8493a5, 0x6f70a4400c562ddb, + 0xa6dfbd9fb8e5b88e, 0xcb4ccd500f6bb952, + 0xd097ad07a71f26b2, 0x7e2000a41346a7a7, + 0x825ecc24c873782f, 0x8ed400668c0c28c8, + 0xa2f67f2dfa90563b, 0x728900802f0f32fa, + 0xcbb41ef979346bca, 0x4f2b40a03ad2ffb9, + 0xfea126b7d78186bc, 0xe2f610c84987bfa8, + 0x9f24b832e6b0f436, 0xdd9ca7d2df4d7c9, + 0xc6ede63fa05d3143, 0x91503d1c79720dbb, + 0xf8a95fcf88747d94, 0x75a44c6397ce912a, + 0x9b69dbe1b548ce7c, 0xc986afbe3ee11aba, + 0xc24452da229b021b, 0xfbe85badce996168, + 0xf2d56790ab41c2a2, 0xfae27299423fb9c3, + 0x97c560ba6b0919a5, 0xdccd879fc967d41a, + 0xbdb6b8e905cb600f, 0x5400e987bbc1c920, + 0xed246723473e3813, 0x290123e9aab23b68, + 0x9436c0760c86e30b, 0xf9a0b6720aaf6521, + 0xb94470938fa89bce, 0xf808e40e8d5b3e69, + 0xe7958cb87392c2c2, 0xb60b1d1230b20e04, + 0x90bd77f3483bb9b9, 0xb1c6f22b5e6f48c2, + 0xb4ecd5f01a4aa828, 0x1e38aeb6360b1af3, + 0xe2280b6c20dd5232, 0x25c6da63c38de1b0, + 0x8d590723948a535f, 0x579c487e5a38ad0e, + 0xb0af48ec79ace837, 0x2d835a9df0c6d851, + 0xdcdb1b2798182244, 0xf8e431456cf88e65, + 0x8a08f0f8bf0f156b, 0x1b8e9ecb641b58ff, + 0xac8b2d36eed2dac5, 0xe272467e3d222f3f, + 0xd7adf884aa879177, 0x5b0ed81dcc6abb0f, + 0x86ccbb52ea94baea, 0x98e947129fc2b4e9, + 0xa87fea27a539e9a5, 0x3f2398d747b36224, + 0xd29fe4b18e88640e, 0x8eec7f0d19a03aad, + 0x83a3eeeef9153e89, 0x1953cf68300424ac, + 0xa48ceaaab75a8e2b, 0x5fa8c3423c052dd7, + 0xcdb02555653131b6, 0x3792f412cb06794d, + 0x808e17555f3ebf11, 0xe2bbd88bbee40bd0, + 0xa0b19d2ab70e6ed6, 0x5b6aceaeae9d0ec4, + 0xc8de047564d20a8b, 0xf245825a5a445275, + 0xfb158592be068d2e, 0xeed6e2f0f0d56712, + 0x9ced737bb6c4183d, 0x55464dd69685606b, + 0xc428d05aa4751e4c, 0xaa97e14c3c26b886, + 0xf53304714d9265df, 0xd53dd99f4b3066a8, + 0x993fe2c6d07b7fab, 0xe546a8038efe4029, + 0xbf8fdb78849a5f96, 0xde98520472bdd033, + 0xef73d256a5c0f77c, 0x963e66858f6d4440, + 0x95a8637627989aad, 0xdde7001379a44aa8, + 0xbb127c53b17ec159, 0x5560c018580d5d52, + 0xe9d71b689dde71af, 0xaab8f01e6e10b4a6, + 0x9226712162ab070d, 0xcab3961304ca70e8, + 0xb6b00d69bb55c8d1, 0x3d607b97c5fd0d22, + 0xe45c10c42a2b3b05, 0x8cb89a7db77c506a, + 0x8eb98a7a9a5b04e3, 0x77f3608e92adb242, + 0xb267ed1940f1c61c, 0x55f038b237591ed3, + 0xdf01e85f912e37a3, 0x6b6c46dec52f6688, + 0x8b61313bbabce2c6, 0x2323ac4b3b3da015, + 0xae397d8aa96c1b77, 0xabec975e0a0d081a, + 0xd9c7dced53c72255, 0x96e7bd358c904a21, + 0x881cea14545c7575, 0x7e50d64177da2e54, + 0xaa242499697392d2, 0xdde50bd1d5d0b9e9, + 0xd4ad2dbfc3d07787, 0x955e4ec64b44e864, + 0x84ec3c97da624ab4, 0xbd5af13bef0b113e, + 0xa6274bbdd0fadd61, 0xecb1ad8aeacdd58e, + 0xcfb11ead453994ba, 0x67de18eda5814af2, + 0x81ceb32c4b43fcf4, 0x80eacf948770ced7, + 0xa2425ff75e14fc31, 0xa1258379a94d028d, + 0xcad2f7f5359a3b3e, 0x96ee45813a04330, + 0xfd87b5f28300ca0d, 0x8bca9d6e188853fc, + 0x9e74d1b791e07e48, 0x775ea264cf55347e, + 0xc612062576589dda, 0x95364afe032a819e, + 0xf79687aed3eec551, 0x3a83ddbd83f52205, + 0x9abe14cd44753b52, 0xc4926a9672793543, + 0xc16d9a0095928a27, 0x75b7053c0f178294, + 0xf1c90080baf72cb1, 0x5324c68b12dd6339, + 0x971da05074da7bee, 0xd3f6fc16ebca5e04, + 0xbce5086492111aea, 0x88f4bb1ca6bcf585, + 0xec1e4a7db69561a5, 0x2b31e9e3d06c32e6, + 0x9392ee8e921d5d07, 0x3aff322e62439fd0, + 0xb877aa3236a4b449, 0x9befeb9fad487c3, + 0xe69594bec44de15b, 0x4c2ebe687989a9b4, + 0x901d7cf73ab0acd9, 0xf9d37014bf60a11, + 0xb424dc35095cd80f, 0x538484c19ef38c95, + 0xe12e13424bb40e13, 0x2865a5f206b06fba, + 0x8cbccc096f5088cb, 0xf93f87b7442e45d4, + 0xafebff0bcb24aafe, 0xf78f69a51539d749, + 0xdbe6fecebdedd5be, 0xb573440e5a884d1c, + 0x89705f4136b4a597, 0x31680a88f8953031, + 0xabcc77118461cefc, 0xfdc20d2b36ba7c3e, + 0xd6bf94d5e57a42bc, 0x3d32907604691b4d, + 0x8637bd05af6c69b5, 0xa63f9a49c2c1b110, + 0xa7c5ac471b478423, 0xfcf80dc33721d54, + 0xd1b71758e219652b, 0xd3c36113404ea4a9, + 0x83126e978d4fdf3b, 0x645a1cac083126ea, + 0xa3d70a3d70a3d70a, 0x3d70a3d70a3d70a4, + 0xcccccccccccccccc, 0xcccccccccccccccd, + 0x8000000000000000, 0x0, + 0xa000000000000000, 0x0, + 0xc800000000000000, 0x0, + 0xfa00000000000000, 0x0, + 0x9c40000000000000, 0x0, + 0xc350000000000000, 0x0, + 0xf424000000000000, 0x0, + 0x9896800000000000, 0x0, + 0xbebc200000000000, 0x0, + 0xee6b280000000000, 0x0, + 0x9502f90000000000, 0x0, + 0xba43b74000000000, 0x0, + 0xe8d4a51000000000, 0x0, + 0x9184e72a00000000, 0x0, + 0xb5e620f480000000, 0x0, + 0xe35fa931a0000000, 0x0, + 0x8e1bc9bf04000000, 0x0, + 0xb1a2bc2ec5000000, 0x0, + 0xde0b6b3a76400000, 0x0, + 0x8ac7230489e80000, 0x0, + 0xad78ebc5ac620000, 0x0, + 0xd8d726b7177a8000, 0x0, + 0x878678326eac9000, 0x0, + 0xa968163f0a57b400, 0x0, + 0xd3c21bcecceda100, 0x0, + 0x84595161401484a0, 0x0, + 0xa56fa5b99019a5c8, 0x0, + 0xcecb8f27f4200f3a, 0x0, + 0x813f3978f8940984, 0x4000000000000000, + 0xa18f07d736b90be5, 0x5000000000000000, + 0xc9f2c9cd04674ede, 0xa400000000000000, + 0xfc6f7c4045812296, 0x4d00000000000000, + 0x9dc5ada82b70b59d, 0xf020000000000000, + 0xc5371912364ce305, 0x6c28000000000000, + 0xf684df56c3e01bc6, 0xc732000000000000, + 0x9a130b963a6c115c, 0x3c7f400000000000, + 0xc097ce7bc90715b3, 0x4b9f100000000000, + 0xf0bdc21abb48db20, 0x1e86d40000000000, + 0x96769950b50d88f4, 0x1314448000000000, + 0xbc143fa4e250eb31, 0x17d955a000000000, + 0xeb194f8e1ae525fd, 0x5dcfab0800000000, + 0x92efd1b8d0cf37be, 0x5aa1cae500000000, + 0xb7abc627050305ad, 0xf14a3d9e40000000, + 0xe596b7b0c643c719, 0x6d9ccd05d0000000, + 0x8f7e32ce7bea5c6f, 0xe4820023a2000000, + 0xb35dbf821ae4f38b, 0xdda2802c8a800000, + 0xe0352f62a19e306e, 0xd50b2037ad200000, + 0x8c213d9da502de45, 0x4526f422cc340000, + 0xaf298d050e4395d6, 0x9670b12b7f410000, + 0xdaf3f04651d47b4c, 0x3c0cdd765f114000, + 0x88d8762bf324cd0f, 0xa5880a69fb6ac800, + 0xab0e93b6efee0053, 0x8eea0d047a457a00, + 0xd5d238a4abe98068, 0x72a4904598d6d880, + 0x85a36366eb71f041, 0x47a6da2b7f864750, + 0xa70c3c40a64e6c51, 0x999090b65f67d924, + 0xd0cf4b50cfe20765, 0xfff4b4e3f741cf6d, + 0x82818f1281ed449f, 0xbff8f10e7a8921a4, + 0xa321f2d7226895c7, 0xaff72d52192b6a0d, + 0xcbea6f8ceb02bb39, 0x9bf4f8a69f764490, + 0xfee50b7025c36a08, 0x2f236d04753d5b4, + 0x9f4f2726179a2245, 0x1d762422c946590, + 0xc722f0ef9d80aad6, 0x424d3ad2b7b97ef5, + 0xf8ebad2b84e0d58b, 0xd2e0898765a7deb2, + 0x9b934c3b330c8577, 0x63cc55f49f88eb2f, + 0xc2781f49ffcfa6d5, 0x3cbf6b71c76b25fb, + 0xf316271c7fc3908a, 0x8bef464e3945ef7a, + 0x97edd871cfda3a56, 0x97758bf0e3cbb5ac, + 0xbde94e8e43d0c8ec, 0x3d52eeed1cbea317, + 0xed63a231d4c4fb27, 0x4ca7aaa863ee4bdd, + 0x945e455f24fb1cf8, 0x8fe8caa93e74ef6a, + 0xb975d6b6ee39e436, 0xb3e2fd538e122b44, + 0xe7d34c64a9c85d44, 0x60dbbca87196b616, + 0x90e40fbeea1d3a4a, 0xbc8955e946fe31cd, + 0xb51d13aea4a488dd, 0x6babab6398bdbe41, + 0xe264589a4dcdab14, 0xc696963c7eed2dd1, + 0x8d7eb76070a08aec, 0xfc1e1de5cf543ca2, + 0xb0de65388cc8ada8, 0x3b25a55f43294bcb, + 0xdd15fe86affad912, 0x49ef0eb713f39ebe, + 0x8a2dbf142dfcc7ab, 0x6e3569326c784337, + 0xacb92ed9397bf996, 0x49c2c37f07965404, + 0xd7e77a8f87daf7fb, 0xdc33745ec97be906, + 0x86f0ac99b4e8dafd, 0x69a028bb3ded71a3, + 0xa8acd7c0222311bc, 0xc40832ea0d68ce0c, + 0xd2d80db02aabd62b, 0xf50a3fa490c30190, + 0x83c7088e1aab65db, 0x792667c6da79e0fa, + 0xa4b8cab1a1563f52, 0x577001b891185938, + 0xcde6fd5e09abcf26, 0xed4c0226b55e6f86, + 0x80b05e5ac60b6178, 0x544f8158315b05b4, + 0xa0dc75f1778e39d6, 0x696361ae3db1c721, + 0xc913936dd571c84c, 0x3bc3a19cd1e38e9, + 0xfb5878494ace3a5f, 0x4ab48a04065c723, + 0x9d174b2dcec0e47b, 0x62eb0d64283f9c76, + 0xc45d1df942711d9a, 0x3ba5d0bd324f8394, + 0xf5746577930d6500, 0xca8f44ec7ee36479, + 0x9968bf6abbe85f20, 0x7e998b13cf4e1ecb, + 0xbfc2ef456ae276e8, 0x9e3fedd8c321a67e, + 0xefb3ab16c59b14a2, 0xc5cfe94ef3ea101e, + 0x95d04aee3b80ece5, 0xbba1f1d158724a12, + 0xbb445da9ca61281f, 0x2a8a6e45ae8edc97, + 0xea1575143cf97226, 0xf52d09d71a3293bd, + 0x924d692ca61be758, 0x593c2626705f9c56, + 0xb6e0c377cfa2e12e, 0x6f8b2fb00c77836c, + 0xe498f455c38b997a, 0xb6dfb9c0f956447, + 0x8edf98b59a373fec, 0x4724bd4189bd5eac, + 0xb2977ee300c50fe7, 0x58edec91ec2cb657, + 0xdf3d5e9bc0f653e1, 0x2f2967b66737e3ed, + 0x8b865b215899f46c, 0xbd79e0d20082ee74, + 0xae67f1e9aec07187, 0xecd8590680a3aa11, + 0xda01ee641a708de9, 0xe80e6f4820cc9495, + 0x884134fe908658b2, 0x3109058d147fdcdd, + 0xaa51823e34a7eede, 0xbd4b46f0599fd415, + 0xd4e5e2cdc1d1ea96, 0x6c9e18ac7007c91a, + 0x850fadc09923329e, 0x3e2cf6bc604ddb0, + 0xa6539930bf6bff45, 0x84db8346b786151c, + 0xcfe87f7cef46ff16, 0xe612641865679a63, + 0x81f14fae158c5f6e, 0x4fcb7e8f3f60c07e, + 0xa26da3999aef7749, 0xe3be5e330f38f09d, + 0xcb090c8001ab551c, 0x5cadf5bfd3072cc5, + 0xfdcb4fa002162a63, 0x73d9732fc7c8f7f6, + 0x9e9f11c4014dda7e, 0x2867e7fddcdd9afa, + 0xc646d63501a1511d, 0xb281e1fd541501b8, + 0xf7d88bc24209a565, 0x1f225a7ca91a4226, + 0x9ae757596946075f, 0x3375788de9b06958, + 0xc1a12d2fc3978937, 0x52d6b1641c83ae, + 0xf209787bb47d6b84, 0xc0678c5dbd23a49a, + 0x9745eb4d50ce6332, 0xf840b7ba963646e0, + 0xbd176620a501fbff, 0xb650e5a93bc3d898, + 0xec5d3fa8ce427aff, 0xa3e51f138ab4cebe, + 0x93ba47c980e98cdf, 0xc66f336c36b10137, + 0xb8a8d9bbe123f017, 0xb80b0047445d4184, + 0xe6d3102ad96cec1d, 0xa60dc059157491e5, + 0x9043ea1ac7e41392, 0x87c89837ad68db2f, + 0xb454e4a179dd1877, 0x29babe4598c311fb, + 0xe16a1dc9d8545e94, 0xf4296dd6fef3d67a, + 0x8ce2529e2734bb1d, 0x1899e4a65f58660c, + 0xb01ae745b101e9e4, 0x5ec05dcff72e7f8f, + 0xdc21a1171d42645d, 0x76707543f4fa1f73, + 0x899504ae72497eba, 0x6a06494a791c53a8, + 0xabfa45da0edbde69, 0x487db9d17636892, + 0xd6f8d7509292d603, 0x45a9d2845d3c42b6, + 0x865b86925b9bc5c2, 0xb8a2392ba45a9b2, + 0xa7f26836f282b732, 0x8e6cac7768d7141e, + 0xd1ef0244af2364ff, 0x3207d795430cd926, + 0x8335616aed761f1f, 0x7f44e6bd49e807b8, + 0xa402b9c5a8d3a6e7, 0x5f16206c9c6209a6, + 0xcd036837130890a1, 0x36dba887c37a8c0f, + 0x802221226be55a64, 0xc2494954da2c9789, + 0xa02aa96b06deb0fd, 0xf2db9baa10b7bd6c, + 0xc83553c5c8965d3d, 0x6f92829494e5acc7, + 0xfa42a8b73abbf48c, 0xcb772339ba1f17f9, + 0x9c69a97284b578d7, 0xff2a760414536efb, + 0xc38413cf25e2d70d, 0xfef5138519684aba, + 0xf46518c2ef5b8cd1, 0x7eb258665fc25d69, + 0x98bf2f79d5993802, 0xef2f773ffbd97a61, + 0xbeeefb584aff8603, 0xaafb550ffacfd8fa, + 0xeeaaba2e5dbf6784, 0x95ba2a53f983cf38, + 0x952ab45cfa97a0b2, 0xdd945a747bf26183, + 0xba756174393d88df, 0x94f971119aeef9e4, + 0xe912b9d1478ceb17, 0x7a37cd5601aab85d, + 0x91abb422ccb812ee, 0xac62e055c10ab33a, + 0xb616a12b7fe617aa, 0x577b986b314d6009, + 0xe39c49765fdf9d94, 0xed5a7e85fda0b80b, + 0x8e41ade9fbebc27d, 0x14588f13be847307, + 0xb1d219647ae6b31c, 0x596eb2d8ae258fc8, + 0xde469fbd99a05fe3, 0x6fca5f8ed9aef3bb, + 0x8aec23d680043bee, 0x25de7bb9480d5854, + 0xada72ccc20054ae9, 0xaf561aa79a10ae6a, + 0xd910f7ff28069da4, 0x1b2ba1518094da04, + 0x87aa9aff79042286, 0x90fb44d2f05d0842, + 0xa99541bf57452b28, 0x353a1607ac744a53, + 0xd3fa922f2d1675f2, 0x42889b8997915ce8, + 0x847c9b5d7c2e09b7, 0x69956135febada11, + 0xa59bc234db398c25, 0x43fab9837e699095, + 0xcf02b2c21207ef2e, 0x94f967e45e03f4bb, + 0x8161afb94b44f57d, 0x1d1be0eebac278f5, + 0xa1ba1ba79e1632dc, 0x6462d92a69731732, + 0xca28a291859bbf93, 0x7d7b8f7503cfdcfe, + 0xfcb2cb35e702af78, 0x5cda735244c3d43e, + 0x9defbf01b061adab, 0x3a0888136afa64a7, + 0xc56baec21c7a1916, 0x88aaa1845b8fdd0, + 0xf6c69a72a3989f5b, 0x8aad549e57273d45, + 0x9a3c2087a63f6399, 0x36ac54e2f678864b, + 0xc0cb28a98fcf3c7f, 0x84576a1bb416a7dd, + 0xf0fdf2d3f3c30b9f, 0x656d44a2a11c51d5, + 0x969eb7c47859e743, 0x9f644ae5a4b1b325, + 0xbc4665b596706114, 0x873d5d9f0dde1fee, + 0xeb57ff22fc0c7959, 0xa90cb506d155a7ea, + 0x9316ff75dd87cbd8, 0x9a7f12442d588f2, + 0xb7dcbf5354e9bece, 0xc11ed6d538aeb2f, + 0xe5d3ef282a242e81, 0x8f1668c8a86da5fa, + 0x8fa475791a569d10, 0xf96e017d694487bc, + 0xb38d92d760ec4455, 0x37c981dcc395a9ac, + 0xe070f78d3927556a, 0x85bbe253f47b1417, + 0x8c469ab843b89562, 0x93956d7478ccec8e, + 0xaf58416654a6babb, 0x387ac8d1970027b2, + 0xdb2e51bfe9d0696a, 0x6997b05fcc0319e, + 0x88fcf317f22241e2, 0x441fece3bdf81f03, + 0xab3c2fddeeaad25a, 0xd527e81cad7626c3, + 0xd60b3bd56a5586f1, 0x8a71e223d8d3b074, + 0x85c7056562757456, 0xf6872d5667844e49, + 0xa738c6bebb12d16c, 0xb428f8ac016561db, + 0xd106f86e69d785c7, 0xe13336d701beba52, + 0x82a45b450226b39c, 0xecc0024661173473, + 0xa34d721642b06084, 0x27f002d7f95d0190, + 0xcc20ce9bd35c78a5, 0x31ec038df7b441f4, + 0xff290242c83396ce, 0x7e67047175a15271, + 0x9f79a169bd203e41, 0xf0062c6e984d386, + 0xc75809c42c684dd1, 0x52c07b78a3e60868, + 0xf92e0c3537826145, 0xa7709a56ccdf8a82, + 0x9bbcc7a142b17ccb, 0x88a66076400bb691, + 0xc2abf989935ddbfe, 0x6acff893d00ea435, + 0xf356f7ebf83552fe, 0x583f6b8c4124d43, + 0x98165af37b2153de, 0xc3727a337a8b704a, + 0xbe1bf1b059e9a8d6, 0x744f18c0592e4c5c, + 0xeda2ee1c7064130c, 0x1162def06f79df73, + 0x9485d4d1c63e8be7, 0x8addcb5645ac2ba8, + 0xb9a74a0637ce2ee1, 0x6d953e2bd7173692, + 0xe8111c87c5c1ba99, 0xc8fa8db6ccdd0437, + 0x910ab1d4db9914a0, 0x1d9c9892400a22a2, + 0xb54d5e4a127f59c8, 0x2503beb6d00cab4b, + 0xe2a0b5dc971f303a, 0x2e44ae64840fd61d, + 0x8da471a9de737e24, 0x5ceaecfed289e5d2, + 0xb10d8e1456105dad, 0x7425a83e872c5f47, + 0xdd50f1996b947518, 0xd12f124e28f77719, + 0x8a5296ffe33cc92f, 0x82bd6b70d99aaa6f, + 0xace73cbfdc0bfb7b, 0x636cc64d1001550b, + 0xd8210befd30efa5a, 0x3c47f7e05401aa4e, + 0x8714a775e3e95c78, 0x65acfaec34810a71, + 0xa8d9d1535ce3b396, 0x7f1839a741a14d0d, + 0xd31045a8341ca07c, 0x1ede48111209a050, + 0x83ea2b892091e44d, 0x934aed0aab460432, + 0xa4e4b66b68b65d60, 0xf81da84d5617853f, + 0xce1de40642e3f4b9, 0x36251260ab9d668e, + 0x80d2ae83e9ce78f3, 0xc1d72b7c6b426019, + 0xa1075a24e4421730, 0xb24cf65b8612f81f, + 0xc94930ae1d529cfc, 0xdee033f26797b627, + 0xfb9b7cd9a4a7443c, 0x169840ef017da3b1, + 0x9d412e0806e88aa5, 0x8e1f289560ee864e, + 0xc491798a08a2ad4e, 0xf1a6f2bab92a27e2, + 0xf5b5d7ec8acb58a2, 0xae10af696774b1db, + 0x9991a6f3d6bf1765, 0xacca6da1e0a8ef29, + 0xbff610b0cc6edd3f, 0x17fd090a58d32af3, + 0xeff394dcff8a948e, 0xddfc4b4cef07f5b0, + 0x95f83d0a1fb69cd9, 0x4abdaf101564f98e, + 0xbb764c4ca7a4440f, 0x9d6d1ad41abe37f1, + 0xea53df5fd18d5513, 0x84c86189216dc5ed, + 0x92746b9be2f8552c, 0x32fd3cf5b4e49bb4, + 0xb7118682dbb66a77, 0x3fbc8c33221dc2a1, + 0xe4d5e82392a40515, 0xfabaf3feaa5334a, + 0x8f05b1163ba6832d, 0x29cb4d87f2a7400e, + 0xb2c71d5bca9023f8, 0x743e20e9ef511012, + 0xdf78e4b2bd342cf6, 0x914da9246b255416, + 0x8bab8eefb6409c1a, 0x1ad089b6c2f7548e, + 0xae9672aba3d0c320, 0xa184ac2473b529b1, + 0xda3c0f568cc4f3e8, 0xc9e5d72d90a2741e, + 0x8865899617fb1871, 0x7e2fa67c7a658892, + 0xaa7eebfb9df9de8d, 0xddbb901b98feeab7, + 0xd51ea6fa85785631, 0x552a74227f3ea565, + 0x8533285c936b35de, 0xd53a88958f87275f, + 0xa67ff273b8460356, 0x8a892abaf368f137, + 0xd01fef10a657842c, 0x2d2b7569b0432d85, + 0x8213f56a67f6b29b, 0x9c3b29620e29fc73, + 0xa298f2c501f45f42, 0x8349f3ba91b47b8f, + 0xcb3f2f7642717713, 0x241c70a936219a73, + 0xfe0efb53d30dd4d7, 0xed238cd383aa0110, + 0x9ec95d1463e8a506, 0xf4363804324a40aa, + 0xc67bb4597ce2ce48, 0xb143c6053edcd0d5, + 0xf81aa16fdc1b81da, 0xdd94b7868e94050a, + 0x9b10a4e5e9913128, 0xca7cf2b4191c8326, + 0xc1d4ce1f63f57d72, 0xfd1c2f611f63a3f0, + 0xf24a01a73cf2dccf, 0xbc633b39673c8cec, + 0x976e41088617ca01, 0xd5be0503e085d813, + 0xbd49d14aa79dbc82, 0x4b2d8644d8a74e18, + 0xec9c459d51852ba2, 0xddf8e7d60ed1219e, + 0x93e1ab8252f33b45, 0xcabb90e5c942b503, + 0xb8da1662e7b00a17, 0x3d6a751f3b936243, + 0xe7109bfba19c0c9d, 0xcc512670a783ad4, + 0x906a617d450187e2, 0x27fb2b80668b24c5, + 0xb484f9dc9641e9da, 0xb1f9f660802dedf6, + 0xe1a63853bbd26451, 0x5e7873f8a0396973, + 0x8d07e33455637eb2, 0xdb0b487b6423e1e8, + 0xb049dc016abc5e5f, 0x91ce1a9a3d2cda62, + 0xdc5c5301c56b75f7, 0x7641a140cc7810fb, + 0x89b9b3e11b6329ba, 0xa9e904c87fcb0a9d, + 0xac2820d9623bf429, 0x546345fa9fbdcd44, + 0xd732290fbacaf133, 0xa97c177947ad4095, + 0x867f59a9d4bed6c0, 0x49ed8eabcccc485d, + 0xa81f301449ee8c70, 0x5c68f256bfff5a74, + 0xd226fc195c6a2f8c, 0x73832eec6fff3111, + 0x83585d8fd9c25db7, 0xc831fd53c5ff7eab, + 0xa42e74f3d032f525, 0xba3e7ca8b77f5e55, + 0xcd3a1230c43fb26f, 0x28ce1bd2e55f35eb, + 0x80444b5e7aa7cf85, 0x7980d163cf5b81b3, + 0xa0555e361951c366, 0xd7e105bcc332621f, + 0xc86ab5c39fa63440, 0x8dd9472bf3fefaa7, + 0xfa856334878fc150, 0xb14f98f6f0feb951, + 0x9c935e00d4b9d8d2, 0x6ed1bf9a569f33d3, + 0xc3b8358109e84f07, 0xa862f80ec4700c8, + 0xf4a642e14c6262c8, 0xcd27bb612758c0fa, + 0x98e7e9cccfbd7dbd, 0x8038d51cb897789c, + 0xbf21e44003acdd2c, 0xe0470a63e6bd56c3, + 0xeeea5d5004981478, 0x1858ccfce06cac74, + 0x95527a5202df0ccb, 0xf37801e0c43ebc8, + 0xbaa718e68396cffd, 0xd30560258f54e6ba, + 0xe950df20247c83fd, 0x47c6b82ef32a2069, + 0x91d28b7416cdd27e, 0x4cdc331d57fa5441, + 0xb6472e511c81471d, 0xe0133fe4adf8e952, + 0xe3d8f9e563a198e5, 0x58180fddd97723a6, + 0x8e679c2f5e44ff8f, 0x570f09eaa7ea7648, + }; +}; + +#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE + +template +constexpr uint64_t + powers_template::power_of_five_128[number_of_entries]; + +#endif + +using powers = powers_template<>; + +} // namespace fast_float + +#endif + +#ifndef FASTFLOAT_DECIMAL_TO_BINARY_H +#define FASTFLOAT_DECIMAL_TO_BINARY_H + +#include +#include +#include +#include +#include +#include + +namespace fast_float { + +// This will compute or rather approximate w * 5**q and return a pair of 64-bit +// words approximating the result, with the "high" part corresponding to the +// most significant bits and the low part corresponding to the least significant +// bits. +// +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 value128 +compute_product_approximation(int64_t q, uint64_t w) { + int const index = 2 * int(q - powers::smallest_power_of_five); + // For small values of q, e.g., q in [0,27], the answer is always exact + // because The line value128 firstproduct = full_multiplication(w, + // power_of_five_128[index]); gives the exact answer. + value128 firstproduct = + full_multiplication(w, powers::power_of_five_128[index]); + static_assert((bit_precision >= 0) && (bit_precision <= 64), + " precision should be in (0,64]"); + constexpr uint64_t precision_mask = + (bit_precision < 64) ? (uint64_t(0xFFFFFFFFFFFFFFFF) >> bit_precision) + : uint64_t(0xFFFFFFFFFFFFFFFF); + if ((firstproduct.high & precision_mask) == + precision_mask) { // could further guard with (lower + w < lower) + // regarding the second product, we only need secondproduct.high, but our + // expectation is that the compiler will optimize this extra work away if + // needed. + value128 secondproduct = + full_multiplication(w, powers::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if (secondproduct.high > firstproduct.low) { + firstproduct.high++; + } + } + return firstproduct; +} + +namespace detail { +/** + * For q in (0,350), we have that + * f = (((152170 + 65536) * q ) >> 16); + * is equal to + * floor(p) + q + * where + * p = log(5**q)/log(2) = q * log(5)/log(2) + * + * For negative values of q in (-400,0), we have that + * f = (((152170 + 65536) * q ) >> 16); + * is equal to + * -ceil(p) + q + * where + * p = log(5**-q)/log(2) = -q * log(5)/log(2) + */ +constexpr fastfloat_really_inline int32_t power(int32_t q) noexcept { + return (((152170 + 65536) * q) >> 16) + 63; +} +} // namespace detail + +// create an adjusted mantissa, biased by the invalid power2 +// for significant digits already multiplied by 10 ** q. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 adjusted_mantissa +compute_error_scaled(int64_t q, uint64_t w, int lz) noexcept { + int hilz = int(w >> 63) ^ 1; + adjusted_mantissa answer; + answer.mantissa = w << hilz; + int bias = binary::mantissa_explicit_bits() - binary::minimum_exponent(); + answer.power2 = int32_t(detail::power(int32_t(q)) + bias - hilz - lz - 62 + + invalid_am_bias); + return answer; +} + +// w * 10 ** q, without rounding the representation up. +// the power2 in the exponent will be adjusted by invalid_am_bias. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa +compute_error(int64_t q, uint64_t w) noexcept { + int lz = leading_zeroes(w); + w <<= lz; + value128 product = + compute_product_approximation(q, w); + return compute_error_scaled(q, product.high, lz); +} + +// Computers w * 10 ** q. +// The returned value should be a valid number that simply needs to be +// packed. However, in some very rare cases, the computation will fail. In such +// cases, we return an adjusted_mantissa with a negative power of 2: the caller +// should recompute in such cases. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa +compute_float(int64_t q, uint64_t w) noexcept { + adjusted_mantissa answer; + if ((w == 0) || (q < binary::smallest_power_of_ten())) { + answer.power2 = 0; + answer.mantissa = 0; + // result should be zero + return answer; + } + if (q > binary::largest_power_of_ten()) { + // we want to get infinity: + answer.power2 = binary::infinite_power(); + answer.mantissa = 0; + return answer; + } + // At this point in time q is in [powers::smallest_power_of_five, + // powers::largest_power_of_five]. + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(w); + w <<= lz; + + // The required precision is binary::mantissa_explicit_bits() + 3 because + // 1. We need the implicit bit + // 2. We need an extra bit for rounding purposes + // 3. We might lose a bit due to the "upperbit" routine (result too small, + // requiring a shift) + + value128 product = + compute_product_approximation(q, w); + // The computed 'product' is always sufficient. + // Mathematical proof: + // Noble Mushtak and Daniel Lemire, Fast Number Parsing Without Fallback (to + // appear) See script/mushtak_lemire.py + + // The "compute_product_approximation" function can be slightly slower than a + // branchless approach: value128 product = compute_product(q, w); but in + // practice, we can win big with the compute_product_approximation if its + // additional branch is easily predicted. Which is best is data specific. + int upperbit = int(product.high >> 63); + int shift = upperbit + 64 - binary::mantissa_explicit_bits() - 3; + + answer.mantissa = product.high >> shift; + + answer.power2 = int32_t(detail::power(int32_t(q)) + upperbit - lz - + binary::minimum_exponent()); + if (answer.power2 <= 0) { // we have a subnormal? + // Here have that answer.power2 <= 0 so -answer.power2 >= 0 + if (-answer.power2 + 1 >= + 64) { // if we have more than 64 bits below the minimum exponent, you + // have a zero for sure. + answer.power2 = 0; + answer.mantissa = 0; + // result should be zero + return answer; + } + // next line is safe because -answer.power2 + 1 < 64 + answer.mantissa >>= -answer.power2 + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0 in the 32-bit and + // and 64-bit case (with no more than 19 digits). + answer.mantissa += (answer.mantissa & 1); // round up + answer.mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + answer.power2 = + (answer.mantissa < (uint64_t(1) << binary::mantissa_explicit_bits())) + ? 0 + : 1; + return answer; + } + + // usually, we round *up*, but if we fall right in between and and we have an + // even basis, we need to round down + // We are only concerned with the cases where 5**q fits in single 64-bit word. + if ((product.low <= 1) && (q >= binary::min_exponent_round_to_even()) && + (q <= binary::max_exponent_round_to_even()) && + ((answer.mantissa & 3) == 1)) { // we may fall between two floats! + // To be in-between two floats we need that in doing + // answer.mantissa = product.high >> (upperbit + 64 - + // binary::mantissa_explicit_bits() - 3); + // ... we dropped out only zeroes. But if this happened, then we can go + // back!!! + if ((answer.mantissa << shift) == product.high) { + answer.mantissa &= ~uint64_t(1); // flip it so that we do not round up + } + } + + answer.mantissa += (answer.mantissa & 1); // round up + answer.mantissa >>= 1; + if (answer.mantissa >= (uint64_t(2) << binary::mantissa_explicit_bits())) { + answer.mantissa = (uint64_t(1) << binary::mantissa_explicit_bits()); + answer.power2++; // undo previous addition + } + + answer.mantissa &= ~(uint64_t(1) << binary::mantissa_explicit_bits()); + if (answer.power2 >= binary::infinite_power()) { // infinity + answer.power2 = binary::infinite_power(); + answer.mantissa = 0; + } + return answer; +} + +} // namespace fast_float + +#endif + +#ifndef FASTFLOAT_BIGINT_H +#define FASTFLOAT_BIGINT_H + +#include +#include +#include +#include + + +namespace fast_float { + +// the limb width: we want efficient multiplication of double the bits in +// limb, or for 64-bit limbs, at least 64-bit multiplication where we can +// extract the high and low parts efficiently. this is every 64-bit +// architecture except for sparc, which emulates 128-bit multiplication. +// we might have platforms where `CHAR_BIT` is not 8, so let's avoid +// doing `8 * sizeof(limb)`. +#if defined(FASTFLOAT_64BIT) && !defined(__sparc) +#define FASTFLOAT_64BIT_LIMB 1 +typedef uint64_t limb; +constexpr size_t limb_bits = 64; +#else +#define FASTFLOAT_32BIT_LIMB +typedef uint32_t limb; +constexpr size_t limb_bits = 32; +#endif + +typedef span limb_span; + +// number of bits in a bigint. this needs to be at least the number +// of bits required to store the largest bigint, which is +// `log2(10**(digits + max_exp))`, or `log2(10**(767 + 342))`, or +// ~3600 bits, so we round to 4000. +constexpr size_t bigint_bits = 4000; +constexpr size_t bigint_limbs = bigint_bits / limb_bits; + +// vector-like type that is allocated on the stack. the entire +// buffer is pre-allocated, and only the length changes. +template struct stackvec { + limb data[size]; + // we never need more than 150 limbs + uint16_t length{0}; + + stackvec() = default; + stackvec(stackvec const &) = delete; + stackvec &operator=(stackvec const &) = delete; + stackvec(stackvec &&) = delete; + stackvec &operator=(stackvec &&other) = delete; + + // create stack vector from existing limb span. + FASTFLOAT_CONSTEXPR20 stackvec(limb_span s) { + FASTFLOAT_ASSERT(try_extend(s)); + } + + FASTFLOAT_CONSTEXPR14 limb &operator[](size_t index) noexcept { + FASTFLOAT_DEBUG_ASSERT(index < length); + return data[index]; + } + + FASTFLOAT_CONSTEXPR14 const limb &operator[](size_t index) const noexcept { + FASTFLOAT_DEBUG_ASSERT(index < length); + return data[index]; + } + + // index from the end of the container + FASTFLOAT_CONSTEXPR14 const limb &rindex(size_t index) const noexcept { + FASTFLOAT_DEBUG_ASSERT(index < length); + size_t rindex = length - index - 1; + return data[rindex]; + } + + // set the length, without bounds checking. + FASTFLOAT_CONSTEXPR14 void set_len(size_t len) noexcept { + length = uint16_t(len); + } + + constexpr size_t len() const noexcept { return length; } + + constexpr bool is_empty() const noexcept { return length == 0; } + + constexpr size_t capacity() const noexcept { return size; } + + // append item to vector, without bounds checking + FASTFLOAT_CONSTEXPR14 void push_unchecked(limb value) noexcept { + data[length] = value; + length++; + } + + // append item to vector, returning if item was added + FASTFLOAT_CONSTEXPR14 bool try_push(limb value) noexcept { + if (len() < capacity()) { + push_unchecked(value); + return true; + } else { + return false; + } + } + + // add items to the vector, from a span, without bounds checking + FASTFLOAT_CONSTEXPR20 void extend_unchecked(limb_span s) noexcept { + limb *ptr = data + length; + std::copy_n(s.ptr, s.len(), ptr); + set_len(len() + s.len()); + } + + // try to add items to the vector, returning if items were added + FASTFLOAT_CONSTEXPR20 bool try_extend(limb_span s) noexcept { + if (len() + s.len() <= capacity()) { + extend_unchecked(s); + return true; + } else { + return false; + } + } + + // resize the vector, without bounds checking + // if the new size is longer than the vector, assign value to each + // appended item. + FASTFLOAT_CONSTEXPR20 + void resize_unchecked(size_t new_len, limb value) noexcept { + if (new_len > len()) { + size_t count = new_len - len(); + limb *first = data + len(); + limb *last = first + count; + ::std::fill(first, last, value); + set_len(new_len); + } else { + set_len(new_len); + } + } + + // try to resize the vector, returning if the vector was resized. + FASTFLOAT_CONSTEXPR20 bool try_resize(size_t new_len, limb value) noexcept { + if (new_len > capacity()) { + return false; + } else { + resize_unchecked(new_len, value); + return true; + } + } + + // check if any limbs are non-zero after the given index. + // this needs to be done in reverse order, since the index + // is relative to the most significant limbs. + FASTFLOAT_CONSTEXPR14 bool nonzero(size_t index) const noexcept { + while (index < len()) { + if (rindex(index) != 0) { + return true; + } + index++; + } + return false; + } + + // normalize the big integer, so most-significant zero limbs are removed. + FASTFLOAT_CONSTEXPR14 void normalize() noexcept { + while (len() > 0 && rindex(0) == 0) { + length--; + } + } +}; + +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint64_t +empty_hi64(bool &truncated) noexcept { + truncated = false; + return 0; +} + +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t +uint64_hi64(uint64_t r0, bool &truncated) noexcept { + truncated = false; + int shl = leading_zeroes(r0); + return r0 << shl; +} + +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t +uint64_hi64(uint64_t r0, uint64_t r1, bool &truncated) noexcept { + int shl = leading_zeroes(r0); + if (shl == 0) { + truncated = r1 != 0; + return r0; + } else { + int shr = 64 - shl; + truncated = (r1 << shl) != 0; + return (r0 << shl) | (r1 >> shr); + } +} + +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t +uint32_hi64(uint32_t r0, bool &truncated) noexcept { + return uint64_hi64(r0, truncated); +} + +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t +uint32_hi64(uint32_t r0, uint32_t r1, bool &truncated) noexcept { + uint64_t x0 = r0; + uint64_t x1 = r1; + return uint64_hi64((x0 << 32) | x1, truncated); +} + +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t +uint32_hi64(uint32_t r0, uint32_t r1, uint32_t r2, bool &truncated) noexcept { + uint64_t x0 = r0; + uint64_t x1 = r1; + uint64_t x2 = r2; + return uint64_hi64(x0, (x1 << 32) | x2, truncated); +} + +// add two small integers, checking for overflow. +// we want an efficient operation. for msvc, where +// we don't have built-in intrinsics, this is still +// pretty fast. +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 limb +scalar_add(limb x, limb y, bool &overflow) noexcept { + limb z; +// gcc and clang +#if defined(__has_builtin) +#if __has_builtin(__builtin_add_overflow) + if (!cpp20_and_in_constexpr()) { + overflow = __builtin_add_overflow(x, y, &z); + return z; + } +#endif +#endif + + // generic, this still optimizes correctly on MSVC. + z = x + y; + overflow = z < x; + return z; +} + +// multiply two small integers, getting both the high and low bits. +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 limb +scalar_mul(limb x, limb y, limb &carry) noexcept { +#ifdef FASTFLOAT_64BIT_LIMB +#if defined(__SIZEOF_INT128__) + // GCC and clang both define it as an extension. + __uint128_t z = __uint128_t(x) * __uint128_t(y) + __uint128_t(carry); + carry = limb(z >> limb_bits); + return limb(z); +#else + // fallback, no native 128-bit integer multiplication with carry. + // on msvc, this optimizes identically, somehow. + value128 z = full_multiplication(x, y); + bool overflow; + z.low = scalar_add(z.low, carry, overflow); + z.high += uint64_t(overflow); // cannot overflow + carry = z.high; + return z.low; +#endif +#else + uint64_t z = uint64_t(x) * uint64_t(y) + uint64_t(carry); + carry = limb(z >> limb_bits); + return limb(z); +#endif +} + +// add scalar value to bigint starting from offset. +// used in grade school multiplication +template +inline FASTFLOAT_CONSTEXPR20 bool small_add_from(stackvec &vec, limb y, + size_t start) noexcept { + size_t index = start; + limb carry = y; + bool overflow; + while (carry != 0 && index < vec.len()) { + vec[index] = scalar_add(vec[index], carry, overflow); + carry = limb(overflow); + index += 1; + } + if (carry != 0) { + FASTFLOAT_TRY(vec.try_push(carry)); + } + return true; +} + +// add scalar value to bigint. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool +small_add(stackvec &vec, limb y) noexcept { + return small_add_from(vec, y, 0); +} + +// multiply bigint by scalar value. +template +inline FASTFLOAT_CONSTEXPR20 bool small_mul(stackvec &vec, + limb y) noexcept { + limb carry = 0; + for (size_t index = 0; index < vec.len(); index++) { + vec[index] = scalar_mul(vec[index], y, carry); + } + if (carry != 0) { + FASTFLOAT_TRY(vec.try_push(carry)); + } + return true; +} + +// add bigint to bigint starting from index. +// used in grade school multiplication +template +FASTFLOAT_CONSTEXPR20 bool large_add_from(stackvec &x, limb_span y, + size_t start) noexcept { + // the effective x buffer is from `xstart..x.len()`, so exit early + // if we can't get that current range. + if (x.len() < start || y.len() > x.len() - start) { + FASTFLOAT_TRY(x.try_resize(y.len() + start, 0)); + } + + bool carry = false; + for (size_t index = 0; index < y.len(); index++) { + limb xi = x[index + start]; + limb yi = y[index]; + bool c1 = false; + bool c2 = false; + xi = scalar_add(xi, yi, c1); + if (carry) { + xi = scalar_add(xi, 1, c2); + } + x[index + start] = xi; + carry = c1 | c2; + } + + // handle overflow + if (carry) { + FASTFLOAT_TRY(small_add_from(x, 1, y.len() + start)); + } + return true; +} + +// add bigint to bigint. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool +large_add_from(stackvec &x, limb_span y) noexcept { + return large_add_from(x, y, 0); +} + +// grade-school multiplication algorithm +template +FASTFLOAT_CONSTEXPR20 bool long_mul(stackvec &x, limb_span y) noexcept { + limb_span xs = limb_span(x.data, x.len()); + stackvec z(xs); + limb_span zs = limb_span(z.data, z.len()); + + if (y.len() != 0) { + limb y0 = y[0]; + FASTFLOAT_TRY(small_mul(x, y0)); + for (size_t index = 1; index < y.len(); index++) { + limb yi = y[index]; + stackvec zi; + if (yi != 0) { + // re-use the same buffer throughout + zi.set_len(0); + FASTFLOAT_TRY(zi.try_extend(zs)); + FASTFLOAT_TRY(small_mul(zi, yi)); + limb_span zis = limb_span(zi.data, zi.len()); + FASTFLOAT_TRY(large_add_from(x, zis, index)); + } + } + } + + x.normalize(); + return true; +} + +// grade-school multiplication algorithm +template +FASTFLOAT_CONSTEXPR20 bool large_mul(stackvec &x, limb_span y) noexcept { + if (y.len() == 1) { + FASTFLOAT_TRY(small_mul(x, y[0])); + } else { + FASTFLOAT_TRY(long_mul(x, y)); + } + return true; +} + +template struct pow5_tables { + static constexpr uint32_t large_step = 135; + static constexpr uint64_t small_power_of_5[] = { + 1UL, + 5UL, + 25UL, + 125UL, + 625UL, + 3125UL, + 15625UL, + 78125UL, + 390625UL, + 1953125UL, + 9765625UL, + 48828125UL, + 244140625UL, + 1220703125UL, + 6103515625UL, + 30517578125UL, + 152587890625UL, + 762939453125UL, + 3814697265625UL, + 19073486328125UL, + 95367431640625UL, + 476837158203125UL, + 2384185791015625UL, + 11920928955078125UL, + 59604644775390625UL, + 298023223876953125UL, + 1490116119384765625UL, + 7450580596923828125UL, + }; +#ifdef FASTFLOAT_64BIT_LIMB + constexpr static limb large_power_of_5[] = { + 1414648277510068013UL, 9180637584431281687UL, 4539964771860779200UL, + 10482974169319127550UL, 198276706040285095UL}; +#else + constexpr static limb large_power_of_5[] = { + 4279965485U, 329373468U, 4020270615U, 2137533757U, 4287402176U, + 1057042919U, 1071430142U, 2440757623U, 381945767U, 46164893U}; +#endif +}; + +#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE + +template constexpr uint32_t pow5_tables::large_step; + +template constexpr uint64_t pow5_tables::small_power_of_5[]; + +template constexpr limb pow5_tables::large_power_of_5[]; + +#endif + +// big integer type. implements a small subset of big integer +// arithmetic, using simple algorithms since asymptotically +// faster algorithms are slower for a small number of limbs. +// all operations assume the big-integer is normalized. +struct bigint : pow5_tables<> { + // storage of the limbs, in little-endian order. + stackvec vec; + + FASTFLOAT_CONSTEXPR20 bigint() : vec() {} + + bigint(bigint const &) = delete; + bigint &operator=(bigint const &) = delete; + bigint(bigint &&) = delete; + bigint &operator=(bigint &&other) = delete; + + FASTFLOAT_CONSTEXPR20 bigint(uint64_t value) : vec() { +#ifdef FASTFLOAT_64BIT_LIMB + vec.push_unchecked(value); +#else + vec.push_unchecked(uint32_t(value)); + vec.push_unchecked(uint32_t(value >> 32)); +#endif + vec.normalize(); + } + + // get the high 64 bits from the vector, and if bits were truncated. + // this is to get the significant digits for the float. + FASTFLOAT_CONSTEXPR20 uint64_t hi64(bool &truncated) const noexcept { +#ifdef FASTFLOAT_64BIT_LIMB + if (vec.len() == 0) { + return empty_hi64(truncated); + } else if (vec.len() == 1) { + return uint64_hi64(vec.rindex(0), truncated); + } else { + uint64_t result = uint64_hi64(vec.rindex(0), vec.rindex(1), truncated); + truncated |= vec.nonzero(2); + return result; + } +#else + if (vec.len() == 0) { + return empty_hi64(truncated); + } else if (vec.len() == 1) { + return uint32_hi64(vec.rindex(0), truncated); + } else if (vec.len() == 2) { + return uint32_hi64(vec.rindex(0), vec.rindex(1), truncated); + } else { + uint64_t result = + uint32_hi64(vec.rindex(0), vec.rindex(1), vec.rindex(2), truncated); + truncated |= vec.nonzero(3); + return result; + } +#endif + } + + // compare two big integers, returning the large value. + // assumes both are normalized. if the return value is + // negative, other is larger, if the return value is + // positive, this is larger, otherwise they are equal. + // the limbs are stored in little-endian order, so we + // must compare the limbs in ever order. + FASTFLOAT_CONSTEXPR20 int compare(bigint const &other) const noexcept { + if (vec.len() > other.vec.len()) { + return 1; + } else if (vec.len() < other.vec.len()) { + return -1; + } else { + for (size_t index = vec.len(); index > 0; index--) { + limb xi = vec[index - 1]; + limb yi = other.vec[index - 1]; + if (xi > yi) { + return 1; + } else if (xi < yi) { + return -1; + } + } + return 0; + } + } + + // shift left each limb n bits, carrying over to the new limb + // returns true if we were able to shift all the digits. + FASTFLOAT_CONSTEXPR20 bool shl_bits(size_t n) noexcept { + // Internally, for each item, we shift left by n, and add the previous + // right shifted limb-bits. + // For example, we transform (for u8) shifted left 2, to: + // b10100100 b01000010 + // b10 b10010001 b00001000 + FASTFLOAT_DEBUG_ASSERT(n != 0); + FASTFLOAT_DEBUG_ASSERT(n < sizeof(limb) * 8); + + size_t shl = n; + size_t shr = limb_bits - shl; + limb prev = 0; + for (size_t index = 0; index < vec.len(); index++) { + limb xi = vec[index]; + vec[index] = (xi << shl) | (prev >> shr); + prev = xi; + } + + limb carry = prev >> shr; + if (carry != 0) { + return vec.try_push(carry); + } + return true; + } + + // move the limbs left by `n` limbs. + FASTFLOAT_CONSTEXPR20 bool shl_limbs(size_t n) noexcept { + FASTFLOAT_DEBUG_ASSERT(n != 0); + if (n + vec.len() > vec.capacity()) { + return false; + } else if (!vec.is_empty()) { + // move limbs + limb *dst = vec.data + n; + limb const *src = vec.data; + std::copy_backward(src, src + vec.len(), dst + vec.len()); + // fill in empty limbs + limb *first = vec.data; + limb *last = first + n; + ::std::fill(first, last, 0); + vec.set_len(n + vec.len()); + return true; + } else { + return true; + } + } + + // move the limbs left by `n` bits. + FASTFLOAT_CONSTEXPR20 bool shl(size_t n) noexcept { + size_t rem = n % limb_bits; + size_t div = n / limb_bits; + if (rem != 0) { + FASTFLOAT_TRY(shl_bits(rem)); + } + if (div != 0) { + FASTFLOAT_TRY(shl_limbs(div)); + } + return true; + } + + // get the number of leading zeros in the bigint. + FASTFLOAT_CONSTEXPR20 int ctlz() const noexcept { + if (vec.is_empty()) { + return 0; + } else { +#ifdef FASTFLOAT_64BIT_LIMB + return leading_zeroes(vec.rindex(0)); +#else + // no use defining a specialized leading_zeroes for a 32-bit type. + uint64_t r0 = vec.rindex(0); + return leading_zeroes(r0 << 32); +#endif + } + } + + // get the number of bits in the bigint. + FASTFLOAT_CONSTEXPR20 int bit_length() const noexcept { + int lz = ctlz(); + return int(limb_bits * vec.len()) - lz; + } + + FASTFLOAT_CONSTEXPR20 bool mul(limb y) noexcept { return small_mul(vec, y); } + + FASTFLOAT_CONSTEXPR20 bool add(limb y) noexcept { return small_add(vec, y); } + + // multiply as if by 2 raised to a power. + FASTFLOAT_CONSTEXPR20 bool pow2(uint32_t exp) noexcept { return shl(exp); } + + // multiply as if by 5 raised to a power. + FASTFLOAT_CONSTEXPR20 bool pow5(uint32_t exp) noexcept { + // multiply by a power of 5 + size_t large_length = sizeof(large_power_of_5) / sizeof(limb); + limb_span large = limb_span(large_power_of_5, large_length); + while (exp >= large_step) { + FASTFLOAT_TRY(large_mul(vec, large)); + exp -= large_step; + } +#ifdef FASTFLOAT_64BIT_LIMB + uint32_t small_step = 27; + limb max_native = 7450580596923828125UL; +#else + uint32_t small_step = 13; + limb max_native = 1220703125U; +#endif + while (exp >= small_step) { + FASTFLOAT_TRY(small_mul(vec, max_native)); + exp -= small_step; + } + if (exp != 0) { + // Work around clang bug https://godbolt.org/z/zedh7rrhc + // This is similar to https://github.com/llvm/llvm-project/issues/47746, + // except the workaround described there don't work here + FASTFLOAT_TRY(small_mul( + vec, limb(((void)small_power_of_5[0], small_power_of_5[exp])))); + } + + return true; + } + + // multiply as if by 10 raised to a power. + FASTFLOAT_CONSTEXPR20 bool pow10(uint32_t exp) noexcept { + FASTFLOAT_TRY(pow5(exp)); + return pow2(exp); + } +}; + +} // namespace fast_float + +#endif + +#ifndef FASTFLOAT_DIGIT_COMPARISON_H +#define FASTFLOAT_DIGIT_COMPARISON_H + +#include +#include +#include +#include + + +namespace fast_float { + +// 1e0 to 1e19 +constexpr static uint64_t powers_of_ten_uint64[] = {1UL, + 10UL, + 100UL, + 1000UL, + 10000UL, + 100000UL, + 1000000UL, + 10000000UL, + 100000000UL, + 1000000000UL, + 10000000000UL, + 100000000000UL, + 1000000000000UL, + 10000000000000UL, + 100000000000000UL, + 1000000000000000UL, + 10000000000000000UL, + 100000000000000000UL, + 1000000000000000000UL, + 10000000000000000000UL}; + +// calculate the exponent, in scientific notation, of the number. +// this algorithm is not even close to optimized, but it has no practical +// effect on performance: in order to have a faster algorithm, we'd need +// to slow down performance for faster algorithms, and this is still fast. +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 int32_t +scientific_exponent(uint64_t mantissa, int32_t exponent) noexcept { + while (mantissa >= 10000) { + mantissa /= 10000; + exponent += 4; + } + while (mantissa >= 100) { + mantissa /= 100; + exponent += 2; + } + while (mantissa >= 10) { + mantissa /= 10; + exponent += 1; + } + return exponent; +} + +// this converts a native floating-point number to an extended-precision float. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa +to_extended(T value) noexcept { + using equiv_uint = equiv_uint_t; + constexpr equiv_uint exponent_mask = binary_format::exponent_mask(); + constexpr equiv_uint mantissa_mask = binary_format::mantissa_mask(); + constexpr equiv_uint hidden_bit_mask = binary_format::hidden_bit_mask(); + + adjusted_mantissa am; + int32_t bias = binary_format::mantissa_explicit_bits() - + binary_format::minimum_exponent(); + equiv_uint bits; +#if FASTFLOAT_HAS_BIT_CAST + bits = std::bit_cast(value); +#else + ::memcpy(&bits, &value, sizeof(T)); +#endif + if ((bits & exponent_mask) == 0) { + // denormal + am.power2 = 1 - bias; + am.mantissa = bits & mantissa_mask; + } else { + // normal + am.power2 = int32_t((bits & exponent_mask) >> + binary_format::mantissa_explicit_bits()); + am.power2 -= bias; + am.mantissa = (bits & mantissa_mask) | hidden_bit_mask; + } + + return am; +} + +// get the extended precision value of the halfway point between b and b+u. +// we are given a native float that represents b, so we need to adjust it +// halfway between b and b+u. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa +to_extended_halfway(T value) noexcept { + adjusted_mantissa am = to_extended(value); + am.mantissa <<= 1; + am.mantissa += 1; + am.power2 -= 1; + return am; +} + +// round an extended-precision float to the nearest machine float. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void round(adjusted_mantissa &am, + callback cb) noexcept { + int32_t mantissa_shift = 64 - binary_format::mantissa_explicit_bits() - 1; + if (-am.power2 >= mantissa_shift) { + // have a denormal float + int32_t shift = -am.power2 + 1; + cb(am, std::min(shift, 64)); + // check for round-up: if rounding-nearest carried us to the hidden bit. + am.power2 = (am.mantissa < + (uint64_t(1) << binary_format::mantissa_explicit_bits())) + ? 0 + : 1; + return; + } + + // have a normal float, use the default shift. + cb(am, mantissa_shift); + + // check for carry + if (am.mantissa >= + (uint64_t(2) << binary_format::mantissa_explicit_bits())) { + am.mantissa = (uint64_t(1) << binary_format::mantissa_explicit_bits()); + am.power2++; + } + + // check for infinite: we could have carried to an infinite power + am.mantissa &= ~(uint64_t(1) << binary_format::mantissa_explicit_bits()); + if (am.power2 >= binary_format::infinite_power()) { + am.power2 = binary_format::infinite_power(); + am.mantissa = 0; + } +} + +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void +round_nearest_tie_even(adjusted_mantissa &am, int32_t shift, + callback cb) noexcept { + uint64_t const mask = (shift == 64) ? UINT64_MAX : (uint64_t(1) << shift) - 1; + uint64_t const halfway = (shift == 0) ? 0 : uint64_t(1) << (shift - 1); + uint64_t truncated_bits = am.mantissa & mask; + bool is_above = truncated_bits > halfway; + bool is_halfway = truncated_bits == halfway; + + // shift digits into position + if (shift == 64) { + am.mantissa = 0; + } else { + am.mantissa >>= shift; + } + am.power2 += shift; + + bool is_odd = (am.mantissa & 1) == 1; + am.mantissa += uint64_t(cb(is_odd, is_halfway, is_above)); +} + +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void +round_down(adjusted_mantissa &am, int32_t shift) noexcept { + if (shift == 64) { + am.mantissa = 0; + } else { + am.mantissa >>= shift; + } + am.power2 += shift; +} + +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void +skip_zeros(UC const *&first, UC const *last) noexcept { + uint64_t val; + while (!cpp20_and_in_constexpr() && + std::distance(first, last) >= int_cmp_len()) { + ::memcpy(&val, first, sizeof(uint64_t)); + if (val != int_cmp_zeros()) { + break; + } + first += int_cmp_len(); + } + while (first != last) { + if (*first != UC('0')) { + break; + } + first++; + } +} + +// determine if any non-zero digits were truncated. +// all characters must be valid digits. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool +is_truncated(UC const *first, UC const *last) noexcept { + // do 8-bit optimizations, can just compare to 8 literal 0s. + uint64_t val; + while (!cpp20_and_in_constexpr() && + std::distance(first, last) >= int_cmp_len()) { + ::memcpy(&val, first, sizeof(uint64_t)); + if (val != int_cmp_zeros()) { + return true; + } + first += int_cmp_len(); + } + while (first != last) { + if (*first != UC('0')) { + return true; + } + ++first; + } + return false; +} + +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool +is_truncated(span s) noexcept { + return is_truncated(s.ptr, s.ptr + s.len()); +} + +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void +parse_eight_digits(UC const *&p, limb &value, size_t &counter, + size_t &count) noexcept { + value = value * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + counter += 8; + count += 8; +} + +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void +parse_one_digit(UC const *&p, limb &value, size_t &counter, + size_t &count) noexcept { + value = value * 10 + limb(*p - UC('0')); + p++; + counter++; + count++; +} + +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void +add_native(bigint &big, limb power, limb value) noexcept { + big.mul(power); + big.add(value); +} + +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void +round_up_bigint(bigint &big, size_t &count) noexcept { + // need to round-up the digits, but need to avoid rounding + // ....9999 to ...10000, which could cause a false halfway point. + add_native(big, 10, 1); + count++; +} + +// parse the significant digits into a big integer +template +inline FASTFLOAT_CONSTEXPR20 void +parse_mantissa(bigint &result, parsed_number_string_t &num, + size_t max_digits, size_t &digits) noexcept { + // try to minimize the number of big integer and scalar multiplication. + // therefore, try to parse 8 digits at a time, and multiply by the largest + // scalar value (9 or 19 digits) for each step. + size_t counter = 0; + digits = 0; + limb value = 0; +#ifdef FASTFLOAT_64BIT_LIMB + size_t step = 19; +#else + size_t step = 9; +#endif + + // process all integer digits. + UC const *p = num.integer.ptr; + UC const *pend = p + num.integer.len(); + skip_zeros(p, pend); + // process all digits, in increments of step per loop + while (p != pend) { + while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && + (max_digits - digits >= 8)) { + parse_eight_digits(p, value, counter, digits); + } + while (counter < step && p != pend && digits < max_digits) { + parse_one_digit(p, value, counter, digits); + } + if (digits == max_digits) { + // add the temporary value, then check if we've truncated any digits + add_native(result, limb(powers_of_ten_uint64[counter]), value); + bool truncated = is_truncated(p, pend); + if (num.fraction.ptr != nullptr) { + truncated |= is_truncated(num.fraction); + } + if (truncated) { + round_up_bigint(result, digits); + } + return; + } else { + add_native(result, limb(powers_of_ten_uint64[counter]), value); + counter = 0; + value = 0; + } + } + + // add our fraction digits, if they're available. + if (num.fraction.ptr != nullptr) { + p = num.fraction.ptr; + pend = p + num.fraction.len(); + if (digits == 0) { + skip_zeros(p, pend); + } + // process all digits, in increments of step per loop + while (p != pend) { + while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && + (max_digits - digits >= 8)) { + parse_eight_digits(p, value, counter, digits); + } + while (counter < step && p != pend && digits < max_digits) { + parse_one_digit(p, value, counter, digits); + } + if (digits == max_digits) { + // add the temporary value, then check if we've truncated any digits + add_native(result, limb(powers_of_ten_uint64[counter]), value); + bool truncated = is_truncated(p, pend); + if (truncated) { + round_up_bigint(result, digits); + } + return; + } else { + add_native(result, limb(powers_of_ten_uint64[counter]), value); + counter = 0; + value = 0; + } + } + } + + if (counter != 0) { + add_native(result, limb(powers_of_ten_uint64[counter]), value); + } +} + +template +inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa +positive_digit_comp(bigint &bigmant, int32_t exponent) noexcept { + FASTFLOAT_ASSERT(bigmant.pow10(uint32_t(exponent))); + adjusted_mantissa answer; + bool truncated; + answer.mantissa = bigmant.hi64(truncated); + int bias = binary_format::mantissa_explicit_bits() - + binary_format::minimum_exponent(); + answer.power2 = bigmant.bit_length() - 64 + bias; + + round(answer, [truncated](adjusted_mantissa &a, int32_t shift) { + round_nearest_tie_even( + a, shift, + [truncated](bool is_odd, bool is_halfway, bool is_above) -> bool { + return is_above || (is_halfway && truncated) || + (is_odd && is_halfway); + }); + }); + + return answer; +} + +// the scaling here is quite simple: we have, for the real digits `m * 10^e`, +// and for the theoretical digits `n * 2^f`. Since `e` is always negative, +// to scale them identically, we do `n * 2^f * 5^-f`, so we now have `m * 2^e`. +// we then need to scale by `2^(f- e)`, and then the two significant digits +// are of the same magnitude. +template +inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa negative_digit_comp( + bigint &bigmant, adjusted_mantissa am, int32_t exponent) noexcept { + bigint &real_digits = bigmant; + int32_t real_exp = exponent; + + // get the value of `b`, rounded down, and get a bigint representation of b+h + adjusted_mantissa am_b = am; + // gcc7 buf: use a lambda to remove the noexcept qualifier bug with + // -Wnoexcept-type. + round(am_b, + [](adjusted_mantissa &a, int32_t shift) { round_down(a, shift); }); + T b; + to_float(false, am_b, b); + adjusted_mantissa theor = to_extended_halfway(b); + bigint theor_digits(theor.mantissa); + int32_t theor_exp = theor.power2; + + // scale real digits and theor digits to be same power. + int32_t pow2_exp = theor_exp - real_exp; + uint32_t pow5_exp = uint32_t(-real_exp); + if (pow5_exp != 0) { + FASTFLOAT_ASSERT(theor_digits.pow5(pow5_exp)); + } + if (pow2_exp > 0) { + FASTFLOAT_ASSERT(theor_digits.pow2(uint32_t(pow2_exp))); + } else if (pow2_exp < 0) { + FASTFLOAT_ASSERT(real_digits.pow2(uint32_t(-pow2_exp))); + } + + // compare digits, and use it to direct rounding + int ord = real_digits.compare(theor_digits); + adjusted_mantissa answer = am; + round(answer, [ord](adjusted_mantissa &a, int32_t shift) { + round_nearest_tie_even( + a, shift, [ord](bool is_odd, bool _, bool __) -> bool { + (void)_; // not needed, since we've done our comparison + (void)__; // not needed, since we've done our comparison + if (ord > 0) { + return true; + } else if (ord < 0) { + return false; + } else { + return is_odd; + } + }); + }); + + return answer; +} + +// parse the significant digits as a big integer to unambiguously round +// the significant digits. here, we are trying to determine how to round +// an extended float representation close to `b+h`, halfway between `b` +// (the float rounded-down) and `b+u`, the next positive float. this +// algorithm is always correct, and uses one of two approaches. when +// the exponent is positive relative to the significant digits (such as +// 1234), we create a big-integer representation, get the high 64-bits, +// determine if any lower bits are truncated, and use that to direct +// rounding. in case of a negative exponent relative to the significant +// digits (such as 1.2345), we create a theoretical representation of +// `b` as a big-integer type, scaled to the same binary exponent as +// the actual digits. we then compare the big integer representations +// of both, and use that to direct rounding. +template +inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa +digit_comp(parsed_number_string_t &num, adjusted_mantissa am) noexcept { + // remove the invalid exponent bias + am.power2 -= invalid_am_bias; + + int32_t sci_exp = + scientific_exponent(num.mantissa, static_cast(num.exponent)); + size_t max_digits = binary_format::max_digits(); + size_t digits = 0; + bigint bigmant; + parse_mantissa(bigmant, num, max_digits, digits); + // can't underflow, since digits is at most max_digits. + int32_t exponent = sci_exp + 1 - int32_t(digits); + if (exponent >= 0) { + return positive_digit_comp(bigmant, exponent); + } else { + return negative_digit_comp(bigmant, am, exponent); + } +} + +} // namespace fast_float + +#endif + +#ifndef FASTFLOAT_PARSE_NUMBER_H +#define FASTFLOAT_PARSE_NUMBER_H + + +#include +#include +#include +#include + +namespace fast_float { + +namespace detail { +/** + * Special case +inf, -inf, nan, infinity, -infinity. + * The case comparisons could be made much faster given that we know that the + * strings a null-free and fixed. + **/ +template +from_chars_result_t + FASTFLOAT_CONSTEXPR14 parse_infnan(UC const *first, UC const *last, + T &value, chars_format fmt) noexcept { + from_chars_result_t answer{}; + answer.ptr = first; + answer.ec = std::errc(); // be optimistic + // assume first < last, so dereference without checks; + bool const minusSign = (*first == UC('-')); + // C++17 20.19.3.(7.1) explicitly forbids '+' sign here + if ((*first == UC('-')) || + (uint64_t(fmt & chars_format::allow_leading_plus) && + (*first == UC('+')))) { + ++first; + } + if (last - first >= 3) { + if (fastfloat_strncasecmp3(first, str_const_nan())) { + answer.ptr = (first += 3); + value = minusSign ? -std::numeric_limits::quiet_NaN() + : std::numeric_limits::quiet_NaN(); + // Check for possible nan(n-char-seq-opt), C++17 20.19.3.7, + // C11 7.20.1.3.3. At least MSVC produces nan(ind) and nan(snan). + if (first != last && *first == UC('(')) { + for (UC const *ptr = first + 1; ptr != last; ++ptr) { + if (*ptr == UC(')')) { + answer.ptr = ptr + 1; // valid nan(n-char-seq-opt) + break; + } else if (!((UC('a') <= *ptr && *ptr <= UC('z')) || + (UC('A') <= *ptr && *ptr <= UC('Z')) || + (UC('0') <= *ptr && *ptr <= UC('9')) || *ptr == UC('_'))) + break; // forbidden char, not nan(n-char-seq-opt) + } + } + return answer; + } + if (fastfloat_strncasecmp3(first, str_const_inf())) { + if ((last - first >= 8) && + fastfloat_strncasecmp5(first + 3, str_const_inf() + 3)) { + answer.ptr = first + 8; + } else { + answer.ptr = first + 3; + } + value = minusSign ? -std::numeric_limits::infinity() + : std::numeric_limits::infinity(); + return answer; + } + } + answer.ec = std::errc::invalid_argument; + return answer; +} + +/** + * Returns true if the floating-pointing rounding mode is to 'nearest'. + * It is the default on most system. This function is meant to be inexpensive. + * Credit : @mwalcott3 + */ +fastfloat_really_inline bool rounds_to_nearest() noexcept { + // https://lemire.me/blog/2020/06/26/gcc-not-nearest/ +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + return false; +#endif + // See + // A fast function to check your floating-point rounding mode + // https://lemire.me/blog/2022/11/16/a-fast-function-to-check-your-floating-point-rounding-mode/ + // + // This function is meant to be equivalent to : + // prior: #include + // return fegetround() == FE_TONEAREST; + // However, it is expected to be much faster than the fegetround() + // function call. + // + // The volatile keyword prevents the compiler from computing the function + // at compile-time. + // There might be other ways to prevent compile-time optimizations (e.g., + // asm). The value does not need to be std::numeric_limits::min(), any + // small value so that 1 + x should round to 1 would do (after accounting for + // excess precision, as in 387 instructions). + static float volatile fmin = std::numeric_limits::min(); + float fmini = fmin; // we copy it so that it gets loaded at most once. +// +// Explanation: +// Only when fegetround() == FE_TONEAREST do we have that +// fmin + 1.0f == 1.0f - fmin. +// +// FE_UPWARD: +// fmin + 1.0f > 1 +// 1.0f - fmin == 1 +// +// FE_DOWNWARD or FE_TOWARDZERO: +// fmin + 1.0f == 1 +// 1.0f - fmin < 1 +// +// Note: This may fail to be accurate if fast-math has been +// enabled, as rounding conventions may not apply. +#ifdef FASTFLOAT_VISUAL_STUDIO +#pragma warning(push) +// todo: is there a VS warning? +// see +// https://stackoverflow.com/questions/46079446/is-there-a-warning-for-floating-point-equality-checking-in-visual-studio-2013 +#elif defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wfloat-equal" +#elif defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wfloat-equal" +#endif + return (fmini + 1.0f == 1.0f - fmini); +#ifdef FASTFLOAT_VISUAL_STUDIO +#pragma warning(pop) +#elif defined(__clang__) +#pragma clang diagnostic pop +#elif defined(__GNUC__) +#pragma GCC diagnostic pop +#endif +} + +} // namespace detail + +template struct from_chars_caller { + template + FASTFLOAT_CONSTEXPR20 static from_chars_result_t + call(UC const *first, UC const *last, T &value, + parse_options_t options) noexcept { + return from_chars_advanced(first, last, value, options); + } +}; + +#ifdef __STDCPP_FLOAT32_T__ +template <> struct from_chars_caller { + template + FASTFLOAT_CONSTEXPR20 static from_chars_result_t + call(UC const *first, UC const *last, std::float32_t &value, + parse_options_t options) noexcept { + // if std::float32_t is defined, and we are in C++23 mode; macro set for + // float32; set value to float due to equivalence between float and + // float32_t + float val; + auto ret = from_chars_advanced(first, last, val, options); + value = val; + return ret; + } +}; +#endif + +#ifdef __STDCPP_FLOAT64_T__ +template <> struct from_chars_caller { + template + FASTFLOAT_CONSTEXPR20 static from_chars_result_t + call(UC const *first, UC const *last, std::float64_t &value, + parse_options_t options) noexcept { + // if std::float64_t is defined, and we are in C++23 mode; macro set for + // float64; set value as double due to equivalence between double and + // float64_t + double val; + auto ret = from_chars_advanced(first, last, val, options); + value = val; + return ret; + } +}; +#endif + +template +FASTFLOAT_CONSTEXPR20 from_chars_result_t +from_chars(UC const *first, UC const *last, T &value, + chars_format fmt /*= chars_format::general*/) noexcept { + return from_chars_caller::call(first, last, value, + parse_options_t(fmt)); +} + +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool +clinger_fast_path_impl(uint64_t mantissa, int64_t exponent, bool is_negative, + T &value) noexcept { + // The implementation of the Clinger's fast path is convoluted because + // we want round-to-nearest in all cases, irrespective of the rounding mode + // selected on the thread. + // We proceed optimistically, assuming that detail::rounds_to_nearest() + // returns true. + if (binary_format::min_exponent_fast_path() <= exponent && + exponent <= binary_format::max_exponent_fast_path()) { + // Unfortunately, the conventional Clinger's fast path is only possible + // when the system rounds to the nearest float. + // + // We expect the next branch to almost always be selected. + // We could check it first (before the previous branch), but + // there might be performance advantages at having the check + // be last. + if (!cpp20_and_in_constexpr() && detail::rounds_to_nearest()) { + // We have that fegetround() == FE_TONEAREST. + // Next is Clinger's fast path. + if (mantissa <= binary_format::max_mantissa_fast_path()) { + value = T(mantissa); + if (exponent < 0) { + value = value / binary_format::exact_power_of_ten(-exponent); + } else { + value = value * binary_format::exact_power_of_ten(exponent); + } + if (is_negative) { + value = -value; + } + return true; + } + } else { + // We do not have that fegetround() == FE_TONEAREST. + // Next is a modified Clinger's fast path, inspired by Jakub Jelínek's + // proposal + if (exponent >= 0 && + mantissa <= binary_format::max_mantissa_fast_path(exponent)) { +#if defined(__clang__) || defined(FASTFLOAT_32BIT) + // Clang may map 0 to -0.0 when fegetround() == FE_DOWNWARD + if (mantissa == 0) { + value = is_negative ? T(-0.) : T(0.); + return true; + } +#endif + value = T(mantissa) * binary_format::exact_power_of_ten(exponent); + if (is_negative) { + value = -value; + } + return true; + } + } + } + return false; +} + +/** + * This function overload takes parsed_number_string_t structure that is created + * and populated either by from_chars_advanced function taking chars range and + * parsing options or other parsing custom function implemented by user. + */ +template +FASTFLOAT_CONSTEXPR20 from_chars_result_t +from_chars_advanced(parsed_number_string_t &pns, T &value) noexcept { + static_assert(is_supported_float_type::value, + "only some floating-point types are supported"); + static_assert(is_supported_char_type::value, + "only char, wchar_t, char16_t and char32_t are supported"); + + from_chars_result_t answer; + + answer.ec = std::errc(); // be optimistic + answer.ptr = pns.lastmatch; + + if (!pns.too_many_digits && + clinger_fast_path_impl(pns.mantissa, pns.exponent, pns.negative, value)) + return answer; + + adjusted_mantissa am = + compute_float>(pns.exponent, pns.mantissa); + if (pns.too_many_digits && am.power2 >= 0) { + if (am != compute_float>(pns.exponent, pns.mantissa + 1)) { + am = compute_error>(pns.exponent, pns.mantissa); + } + } + // If we called compute_float>(pns.exponent, pns.mantissa) + // and we have an invalid power (am.power2 < 0), then we need to go the long + // way around again. This is very uncommon. + if (am.power2 < 0) { + am = digit_comp(pns, am); + } + to_float(pns.negative, am, value); + // Test for over/underflow. + if ((pns.mantissa != 0 && am.mantissa == 0 && am.power2 == 0) || + am.power2 == binary_format::infinite_power()) { + answer.ec = std::errc::result_out_of_range; + } + return answer; +} + +template +FASTFLOAT_CONSTEXPR20 from_chars_result_t +from_chars_float_advanced(UC const *first, UC const *last, T &value, + parse_options_t options) noexcept { + + static_assert(is_supported_float_type::value, + "only some floating-point types are supported"); + static_assert(is_supported_char_type::value, + "only char, wchar_t, char16_t and char32_t are supported"); + + chars_format const fmt = detail::adjust_for_feature_macros(options.format); + + from_chars_result_t answer; + if (uint64_t(fmt & chars_format::skip_white_space)) { + while ((first != last) && fast_float::is_space(*first)) { + first++; + } + } + if (first == last) { + answer.ec = std::errc::invalid_argument; + answer.ptr = first; + return answer; + } + parsed_number_string_t pns = + uint64_t(fmt & detail::basic_json_fmt) + ? parse_number_string(first, last, options) + : parse_number_string(first, last, options); + if (!pns.valid) { + if (uint64_t(fmt & chars_format::no_infnan)) { + answer.ec = std::errc::invalid_argument; + answer.ptr = first; + return answer; + } else { + return detail::parse_infnan(first, last, value, fmt); + } + } + + // call overload that takes parsed_number_string_t directly. + return from_chars_advanced(pns, value); +} + +template +FASTFLOAT_CONSTEXPR20 from_chars_result_t +from_chars(UC const *first, UC const *last, T &value, int base) noexcept { + + static_assert(is_supported_integer_type::value, + "only integer types are supported"); + static_assert(is_supported_char_type::value, + "only char, wchar_t, char16_t and char32_t are supported"); + + parse_options_t options; + options.base = base; + return from_chars_advanced(first, last, value, options); +} + +template +FASTFLOAT_CONSTEXPR20 + typename std::enable_if::value, T>::type + integer_times_pow10(uint64_t mantissa, int decimal_exponent) noexcept { + T value; + if (clinger_fast_path_impl(mantissa, decimal_exponent, false, value)) + return value; + + adjusted_mantissa am = + compute_float>(decimal_exponent, mantissa); + to_float(false, am, value); + return value; +} + +template +FASTFLOAT_CONSTEXPR20 + typename std::enable_if::value, T>::type + integer_times_pow10(int64_t mantissa, int decimal_exponent) noexcept { + const bool is_negative = mantissa < 0; + const uint64_t m = static_cast(is_negative ? -mantissa : mantissa); + + T value; + if (clinger_fast_path_impl(m, decimal_exponent, is_negative, value)) + return value; + + adjusted_mantissa am = compute_float>(decimal_exponent, m); + to_float(is_negative, am, value); + return value; +} + +FASTFLOAT_CONSTEXPR20 inline double +integer_times_pow10(uint64_t mantissa, int decimal_exponent) noexcept { + return integer_times_pow10(mantissa, decimal_exponent); +} + +FASTFLOAT_CONSTEXPR20 inline double +integer_times_pow10(int64_t mantissa, int decimal_exponent) noexcept { + return integer_times_pow10(mantissa, decimal_exponent); +} + +// the following overloads are here to avoid surprising ambiguity for int, +// unsigned, etc. +template +FASTFLOAT_CONSTEXPR20 + typename std::enable_if::value && + std::is_integral::value && + !std::is_signed::value, + T>::type + integer_times_pow10(Int mantissa, int decimal_exponent) noexcept { + return integer_times_pow10(static_cast(mantissa), + decimal_exponent); +} + +template +FASTFLOAT_CONSTEXPR20 + typename std::enable_if::value && + std::is_integral::value && + std::is_signed::value, + T>::type + integer_times_pow10(Int mantissa, int decimal_exponent) noexcept { + return integer_times_pow10(static_cast(mantissa), + decimal_exponent); +} + +template +FASTFLOAT_CONSTEXPR20 typename std::enable_if< + std::is_integral::value && !std::is_signed::value, double>::type +integer_times_pow10(Int mantissa, int decimal_exponent) noexcept { + return integer_times_pow10(static_cast(mantissa), decimal_exponent); +} + +template +FASTFLOAT_CONSTEXPR20 typename std::enable_if< + std::is_integral::value && std::is_signed::value, double>::type +integer_times_pow10(Int mantissa, int decimal_exponent) noexcept { + return integer_times_pow10(static_cast(mantissa), decimal_exponent); +} + +template +FASTFLOAT_CONSTEXPR20 from_chars_result_t +from_chars_int_advanced(UC const *first, UC const *last, T &value, + parse_options_t options) noexcept { + + static_assert(is_supported_integer_type::value, + "only integer types are supported"); + static_assert(is_supported_char_type::value, + "only char, wchar_t, char16_t and char32_t are supported"); + + chars_format const fmt = detail::adjust_for_feature_macros(options.format); + int const base = options.base; + + from_chars_result_t answer; + if (uint64_t(fmt & chars_format::skip_white_space)) { + while ((first != last) && fast_float::is_space(*first)) { + first++; + } + } + if (first == last || base < 2 || base > 36) { + answer.ec = std::errc::invalid_argument; + answer.ptr = first; + return answer; + } + + return parse_int_string(first, last, value, options); +} + +template struct from_chars_advanced_caller { + static_assert(TypeIx > 0, "unsupported type"); +}; + +template <> struct from_chars_advanced_caller<1> { + template + FASTFLOAT_CONSTEXPR20 static from_chars_result_t + call(UC const *first, UC const *last, T &value, + parse_options_t options) noexcept { + return from_chars_float_advanced(first, last, value, options); + } +}; + +template <> struct from_chars_advanced_caller<2> { + template + FASTFLOAT_CONSTEXPR20 static from_chars_result_t + call(UC const *first, UC const *last, T &value, + parse_options_t options) noexcept { + return from_chars_int_advanced(first, last, value, options); + } +}; + +template +FASTFLOAT_CONSTEXPR20 from_chars_result_t +from_chars_advanced(UC const *first, UC const *last, T &value, + parse_options_t options) noexcept { + return from_chars_advanced_caller< + size_t(is_supported_float_type::value) + + 2 * size_t(is_supported_integer_type::value)>::call(first, last, value, + options); +} + +} // namespace fast_float + +#endif + diff --git a/symmetry/gemmi/unitcell.hpp b/gemmi_gph/gemmi/unitcell.hpp similarity index 100% rename from symmetry/gemmi/unitcell.hpp rename to gemmi_gph/gemmi/unitcell.hpp diff --git a/gemmi_gph/gemmi/util.hpp b/gemmi_gph/gemmi/util.hpp new file mode 100644 index 00000000..9d4e0a12 --- /dev/null +++ b/gemmi_gph/gemmi/util.hpp @@ -0,0 +1,315 @@ +// Copyright 2017 Global Phasing Ltd. +// +// Utilities. Mostly for working with strings and vectors. + +#ifndef GEMMI_UTIL_HPP_ +#define GEMMI_UTIL_HPP_ + +#include +#include // for isspace +#include // for strncmp +#include // for equal, find, remove_if +#include // for begin, end, make_move_iterator +#include +#include + +namespace gemmi { + +// ##### string helpers ##### + +inline void append_to_str(std::string& out, int v) { out += std::to_string(v); } +inline void append_to_str(std::string& out, size_t v) { out += std::to_string(v); } +void append_to_str(std::string& out, double) = delete; +template +void append_to_str(std::string& out, const T& v) { out += v; } + +inline void cat_to(std::string&) {} +template +void cat_to(std::string& out, const T& value, Args const&... args) { + append_to_str(out, value); + cat_to(out, args...); +} +template +std::string cat(Args const&... args) { + std::string out; + cat_to(out, args...); + return out; +} + +inline bool starts_with(const std::string& str, const std::string& prefix) { + size_t sl = prefix.length(); + return str.length() >= sl && str.compare(0, sl, prefix) == 0; +} + +template bool starts_with(const char* a, const char (&b)[N]) { + return std::strncmp(a, b, N-1) == 0; +} + +inline bool ends_with(const std::string& str, const std::string& suffix) { + size_t sl = suffix.length(); + return str.length() >= sl && str.compare(str.length() - sl, sl, suffix) == 0; +} + +// can be faster than std::tolower() b/c it takes char not int +inline char lower(char c) { + if (c >= 'A' && c <= 'Z') + return c | 0x20; + return c; +} + +// works as expected only for a-zA-Z +inline char alpha_up(char c) { return c & ~0x20; } + +inline std::string to_lower(std::string str) { + for (char& c : str) + if (c >= 'A' && c <= 'Z') + c |= 0x20; + return str; +} + +inline std::string to_upper(std::string str) { + for (char& c : str) + if (c >= 'a' && c <= 'z') + c &= ~0x20; + return str; +} + +// case-insensitive character comparison +inline bool isame(char a, char b) { + return a == b || ((a^b) == 0x20 && (a|0x20) >= 'a' && (a|0x20) <= 'z'); +} + +// Case-insensitive comparisons. The second arg must be lowercase. + +inline bool iequal_from(const std::string& str, size_t offset, const std::string& low) { + return str.length() == low.length() + offset && + std::equal(std::begin(low), std::end(low), str.begin() + offset, + [](char c1, char c2) { return c1 == lower(c2); }); +} + +inline bool iequal(const std::string& str, const std::string& low) { + return iequal_from(str, 0, low); +} + +inline bool istarts_with(const std::string& str, const std::string& prefix) { + return str.length() >= prefix.length() && + std::equal(std::begin(prefix), std::end(prefix), str.begin(), + [](char c1, char c2) { return c1 == lower(c2); }); +} +inline bool iends_with(const std::string& str, const std::string& suffix) { + size_t sl = suffix.length(); + return str.length() >= sl && + std::equal(std::begin(suffix), std::end(suffix), str.end() - sl, + [](char c1, char c2) { return c1 == lower(c2); }); +} + +inline bool giends_with(const std::string& str, const std::string& suffix) { + return iends_with(str, suffix) || iends_with(str, suffix + ".gz"); +} + +inline std::string trim_str(const std::string& str) { + const std::string ws = " \r\n\t"; + std::string::size_type first = str.find_first_not_of(ws); + if (first == std::string::npos) + return std::string{}; + std::string::size_type last = str.find_last_not_of(ws); + return str.substr(first, last - first + 1); +} + +inline std::string rtrim_str(const std::string& str) { + std::string::size_type last = str.find_last_not_of(" \r\n\t"); + return str.substr(0, last == std::string::npos ? 0 : last + 1); +} + +// end is after the last character of the string (typically \0) +inline const char* rtrim_cstr(const char* start, const char* end=nullptr) { + if (!start) + return nullptr; + if (!end) { + end = start; + while (*end != '\0') + ++end; + } + while (end > start && std::isspace(end[-1])) + --end; + return end; +} + +namespace impl { +inline size_t length(char) { return 1; } +inline size_t length(const std::string& s) { return s.length(); } +} + +// takes a single separator (usually char or string); +// may return empty fields +template +void split_str_into(const std::string& str, S sep, + std::vector& result) { + std::size_t start = 0, end; + while ((end = str.find(sep, start)) != std::string::npos) { + result.emplace_back(str, start, end - start); + start = end + impl::length(sep); + } + result.emplace_back(str, start); +} + +template +std::vector split_str(const std::string& str, S sep) { + std::vector result; + split_str_into(str, sep, result); + return result; +} + +// _multi variants takes multiple 1-char separators as a string; +// discards empty fields +inline void split_str_into_multi(const std::string& str, const char* seps, + std::vector& result) { + std::size_t start = str.find_first_not_of(seps); + while (start != std::string::npos) { + std::size_t end = str.find_first_of(seps, start); + result.emplace_back(str, start, end - start); + start = str.find_first_not_of(seps, end); + } +} + +inline std::vector split_str_multi(const std::string& str, + const char* seps=" \t") { + std::vector result; + split_str_into_multi(str, seps, result); + return result; +} + +template +std::string join_str(T begin, T end, const S& sep, const F& getter) { + std::string r; + bool first = true; + for (T i = begin; i != end; ++i) { + if (!first) + r += sep; + r += getter(*i); + first = false; + } + return r; +} + +template +std::string join_str(T begin, T end, const S& sep) { + return join_str(begin, end, sep, [](const std::string& t) { return t; }); +} + +template +std::string join_str(const T& iterable, const S& sep, const F& getter) { + return join_str(iterable.begin(), iterable.end(), sep, getter); +} + +template +std::string join_str(const T& iterable, const S& sep) { + return join_str(iterable.begin(), iterable.end(), sep); +} + +template +void string_append_sep(std::string& str, S sep, const T& item) { + if (!str.empty()) + str += sep; + str += item; +} + +inline void replace_all(std::string &s, + const std::string &old, const std::string &new_) { + std::string::size_type pos = 0; + while ((pos = s.find(old, pos)) != std::string::npos) { + s.replace(pos, old.size(), new_); + pos += new_.size(); + } +} + +// list is a comma separated string +inline bool is_in_list(const std::string& name, const std::string& list, + char sep=',') { + if (name.length() >= list.length()) + return name == list; + for (size_t start=0, end=0; end != std::string::npos; start=end+1) { + end = list.find(sep, start); + if (list.compare(start, end - start, name) == 0) + return true; + } + return false; +} + +// ##### vector helpers ##### + +template +bool in_vector(const T& x, const std::vector& v) { + return std::find(v.begin(), v.end(), x) != v.end(); +} + +template +bool in_vector_f(F f, const std::vector& v) { + return std::find_if(v.begin(), v.end(), f) != v.end(); +} + +template +T* vector_end_ptr(std::vector& v) { return v.data() + v.size(); } +template +const T* vector_end_ptr(const std::vector& v) { return v.data() + v.size(); } + +template +void vector_move_extend(std::vector& dst, std::vector&& src) { + if (dst.empty()) + dst = std::move(src); + else + dst.insert(dst.end(), std::make_move_iterator(src.begin()), + std::make_move_iterator(src.end())); +} + +// wrapper around the erase-remove idiom +template +void vector_remove_if(std::vector& v, F&& condition) { + v.erase(std::remove_if(v.begin(), v.end(), condition), v.end()); +} + +/// \par data - 2d array (old_width x length) in a vector +/// Insert \par n new columns at position pos. +template +void vector_insert_columns(std::vector& data, size_t old_width, + size_t length, size_t n, size_t pos, const T& new_value) { + assert(data.size() == old_width * length); + assert(pos <= old_width); + data.resize(data.size() + n * length); + typename std::vector::iterator dst = data.end(); + for (size_t i = length; i-- != 0; ) { + for (size_t j = old_width; j-- != pos; ) + *--dst = data[i * old_width + j]; + for (size_t j = n; j-- != 0; ) + *--dst = new_value; + for (size_t j = pos; j-- != 0; ) + *--dst = data[i * old_width + j]; + } + assert(dst == data.begin()); +} +/// \par data - 2d array with new_width+1 columns, in a vector +/// Remove column at position pos. +template +void vector_remove_column(std::vector& data, size_t new_width, size_t pos) { + assert(pos <= new_width); + for (size_t source = pos + 1; source < data.size(); ++source) + for (size_t i = 0; i < new_width && source < data.size(); ++i) + data[pos++] = data[source++]; + data.resize(pos); +} + + +// ##### other helpers ##### + +// Numeric ID used for case-insensitive comparison of 4 letters. +// s must have 4 chars or 3 chars + NUL, ' ' and NUL are equivalent in s. +constexpr int ialpha4_id(const char* s) { + return (s[0] << 24 | s[1] << 16 | s[2] << 8 | s[3]) & ~0x20202020; +} +// Numeric ID used for case-insensitive comparison of 3 letters. +constexpr int ialpha3_id(const char* s) { + return (s[0] << 16 | s[1] << 8 | s[2]) & ~0x20202020; +} + +} // namespace gemmi +#endif diff --git a/gemmi_gph/gemmi/xds_ascii.hpp b/gemmi_gph/gemmi/xds_ascii.hpp new file mode 100644 index 00000000..38b7a93f --- /dev/null +++ b/gemmi_gph/gemmi/xds_ascii.hpp @@ -0,0 +1,183 @@ +// Copyright 2020 Global Phasing Ltd. +// +// Read XDS files: XDS_ASCII.HKL and INTEGRATE.HKL. + +#ifndef GEMMI_XDS_ASCII_HPP_ +#define GEMMI_XDS_ASCII_HPP_ + +#include "input.hpp" // for AnyStream, FileStream +#include "unitcell.hpp" // for UnitCell +#include "util.hpp" // for starts_with + +namespace gemmi { + +// from Pointless docs: likely in-house source, in which case +// the unpolarised value is left unchanged (recognised wavelengths +// are CuKalpha 1.5418 +- 0.0019, Mo 0.7107 +- 0.0002, Cr 2.29 +- 0.01) +inline bool likely_in_house_source(double wavelength) { + return std::fabs(wavelength - 1.5418) < 0.0019 || + std::fabs(wavelength - 0.7107) < 0.0002 || + std::fabs(wavelength - 2.29) < 0.01; +} + +struct XdsAsciiMetadata { + struct Iset { + int id; + std::string input_file; + double wavelength = 0.; + std::array cell_constants = {0., 0., 0., 0., 0., 0.}; + //statistics set by gather_iset_statistics() + int frame_number_min = -1; + int frame_number_max = -1; + int frame_count = -1; + int reflection_count = -1; + + Iset(int id_) : id(id_) {} + }; + std::string source_path; + int read_columns = 0; // doesn't include ITEM_ISET from XSCALE + int spacegroup_number = 0; + double wavelength = 0.; + std::array cell_constants = {0., 0., 0., 0., 0., 0.}; + Mat33 cell_axes{0.}; + Vec3 incident_beam_dir; + double oscillation_range = 0.; + Vec3 rotation_axis; + double starting_angle = 0.; + double reflecting_range_esd = 0.; + char friedels_law = '\0'; + int starting_frame = 1; + int nx = 0; // detector size - number of pixels + int ny = 0; + double qx = 0.; // pixel size in mm + double qy = 0.; + double orgx = 0.; + double orgy = 0.; + double detector_distance = 0.; + std::string generated_by; + std::string version_str; + std::vector isets; +}; + +struct GEMMI_DLL XdsAscii : XdsAsciiMetadata { + struct Refl { + Miller hkl; + int iset = 1; + double iobs; + double sigma; + double xd; + double yd; + double zd; + double rlp; + double peak; + double corr; // is it always integer? + double maxc; + + // ZD can be negative for a few reflections + int frame() const { return (int) std::floor(zd + 1); } + }; + std::vector data; + + XdsAscii() = default; + XdsAscii(const XdsAsciiMetadata& m) : XdsAsciiMetadata(m) {} + + Iset& find_or_add_iset(int id) { + for (Iset& i : isets) + if (i.id == id) + return i; + isets.emplace_back(id); + return isets.back(); + } + void read_stream(AnyStream& reader, const std::string& source); + + template + void read_input(T&& input) { + read_stream(*input.create_stream(), input.path()); + } + + bool is_merged() const { return read_columns < 8; } + + // set a few Iset properties in isets + void gather_iset_statistics(); + + double rot_angle(const Refl& refl) const { + double z = refl.zd - starting_frame + 1; + return starting_angle + oscillation_range * z; + } + + // it's already normalized, but just in case normalize it again + Vec3 get_rotation_axis() const { + double length = rotation_axis.length(); + if (length == 0) + fail("unknown rotation axis"); + return rotation_axis / length; + } + + // I'm not sure if always |incident_beam_dir| == 1/wavelength + Vec3 get_s0_direction() const { + double length = incident_beam_dir.length(); + if (length == 0) + fail("unknown incident beam direction"); + return incident_beam_dir / length; + } + + bool has_cell_axes() const { + for (int i = 0; i < 3; ++i) + if (cell_axes[i][0] == 0 && cell_axes[i][1] == 0 && cell_axes[i][2] == 0) + return false; + return true; + } + + /// Return transition matrix from "Cambridge" frame to XDS frame. + /// x_xds = M x_cam + Mat33 calculate_conversion_from_cambridge() const { + // Cambridge z direction is along the principal rotation axis + Vec3 z = get_rotation_axis(); + // Cambridge z direction is along beam + Vec3 x = get_s0_direction(); + Vec3 y = z.cross(x).normalized(); + // beam and rotation axis may not be orthogonal + x = y.cross(z).normalized(); + return Mat33::from_columns(x, y, z); + } + + Mat33 get_orientation() const { + if (!has_cell_axes()) + fail("unknown unit cell axes"); + Vec3 a = cell_axes.row_copy(0); + Vec3 b = cell_axes.row_copy(1); + Vec3 c = cell_axes.row_copy(2); + Vec3 ar = b.cross(c).normalized(); + Vec3 br = c.cross(a); + Vec3 cr = ar.cross(br).normalized(); + br = cr.cross(ar); + return Mat33::from_columns(ar, br, cr); + } + + /// \par p is degree of polarization from range (0,1), as used in XDS. + void apply_polarization_correction(double p, Vec3 normal); + + /// \par overload is maximally allowed pixel value in a peak (MAXC). + void eliminate_overloads(double overload) { + vector_remove_if(data, [&](Refl& r) { return r.maxc > overload; }); + } + + /// \par batchmin lowest allowed batch number. + void eliminate_batchmin(int batchmin) { + double minz = batchmin - 1; + vector_remove_if(data, [&](Refl& r) { return r.zd < minz; }); + } +}; + +inline XdsAscii read_xds_ascii_file(const std::string& path) { + XdsAscii ret; + FileStream stream(path.c_str(), "rb"); + ret.read_stream(stream, path); + return ret; +} + +/// read possibly gzipped file +GEMMI_DLL XdsAscii read_xds_ascii(const std::string& path); + +} // namespace gemmi +#endif diff --git a/gemmi_gph/gz.cpp b/gemmi_gph/gz.cpp new file mode 100644 index 00000000..d5e4123e --- /dev/null +++ b/gemmi_gph/gz.cpp @@ -0,0 +1,189 @@ +// Copyright Global Phasing Ltd. + +#include +#include +#include // fseek, ftell, fread +#include // INT_MAX +#if USE_ZLIB_NG +# define WITH_GZFILEOP 1 +# include +# define GG(name) zng_ ## name +#else +# include +# define GG(name) name +#endif +#include // file_open + +namespace gemmi { + +const char* const zlib_description = +#if USE_ZLIB_NG + "zlib-ng " ZLIBNG_VERSION; +#else + "zlib " ZLIB_VERSION; +#endif + +// Throws if the size is not found or if it is suspicious. +// Anything outside of the arbitrary limits from 1 to 10x of the compressed +// size looks suspicious to us. +// **This function should not be relied upon.** +// In particular, if the return values is >= 4GiB - it's only a guess. +size_t estimate_uncompressed_size(const std::string& path) { + fileptr_t f = file_open(path.c_str(), "rb"); + unsigned char buf[4]; + if (std::fread(buf, 1, 2, f.get()) != 2) + sys_fail("Failed to read: " + path); + if (buf[0] != 0x1f || buf[1] != 0x8b) + fail("File not in the gzip format: " + path); + if (std::fseek(f.get(), -4, SEEK_END) != 0) + sys_fail("fseek() failed (empty file?): " + path); + long pos = std::ftell(f.get()); + if (pos <= 0) + sys_fail("ftell() failed on " + path); + size_t gzipped_size = pos + 4; + if (std::fread(buf, 1, 4, f.get()) != 4) + sys_fail("Failed to read last 4 bytes of: " + path); + unsigned orig_size = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0]; + if (orig_size + 100 < gzipped_size || orig_size > 100 * gzipped_size) { + // The size is stored as 32-bit number. If the original size exceeds 4GiB, + // the stored number is modulo 4 GiB. So we just guess... + constexpr size_t max_uint = 4294967295U; + if (gzipped_size > max_uint / 6) + return max_uint + (sizeof(size_t) > 4 ? orig_size : 0); + fail("Cannot determine uncompressed size of " + path + + "\nWould it be " + std::to_string(gzipped_size) + " -> " + + std::to_string(orig_size) + " bytes?"); + } + return orig_size; +} + +static size_t big_gzread(gzFile file, void* buf, size_t len) { +#if USE_ZLIB_NG + return GG(gzfread)(buf, 1, len, file); +#else + // In zlib >= 1.2.9 we could use gzfread() + size_t read_bytes = 0; + while (len > INT_MAX) { + int ret = gzread(file, buf, INT_MAX); + read_bytes += ret; + if (ret != INT_MAX) + return read_bytes; + len -= INT_MAX; + buf = (char*) buf + INT_MAX; + } + read_bytes += gzread(file, buf, (unsigned) len); + return read_bytes; +#endif +} + +char* GzStream::gets(char* line, int size) { + return GG(gzgets)((gzFile)f, line, size); +} + +int GzStream::getc() { + return GG(gzgetc)((gzFile)f); +} + +bool GzStream::read(void* buf, size_t len) { + return big_gzread((gzFile)f, buf, len) == len; +} + +bool GzStream::skip(size_t n) { + return GG(gzseek)((gzFile)f, n, SEEK_CUR) != -1; +} + +long GzStream::tell() { + return GG(gztell)((gzFile)f); +} + +std::string GzStream::read_rest() { + std::string retval; + int c = getc(); + if (c != EOF) { + retval += (char)c; + char buf[512]; + for (;;) { + size_t n = big_gzread((gzFile)f, buf, sizeof(buf)); + retval.append(buf, n); + if (n != sizeof(buf)) + break; + } + } + return retval; +} + + +MaybeGzipped::MaybeGzipped(const std::string& path) : BasicInput(path) {} + +MaybeGzipped::~MaybeGzipped() { + if (file_) +#if USE_ZLIB_NG || (ZLIB_VERNUM >= 0x1235) + GG(gzclose_r)((gzFile)file_); +#else + gzclose((gzFile)file_); +#endif +} + +size_t MaybeGzipped::gzread_checked(void* buf, size_t len) { + gzFile file = (gzFile) file_; + size_t read_bytes = big_gzread(file, buf, len); + if (read_bytes != len && !GG(gzeof)(file)) { + int errnum = 0; + std::string err_str = GG(gzerror)(file, &errnum); + if (errnum == Z_ERRNO) + sys_fail("failed to read " + path()); + if (errnum) + fail("Error reading " + path() + ": " + err_str); + } + if (read_bytes > len) // should never happen + fail("Error reading " + path()); + return read_bytes; +} + +CharArray MaybeGzipped::uncompress_into_buffer(size_t limit) { + if (!is_compressed()) + return BasicInput::uncompress_into_buffer(); + size_t size = (limit == 0 ? estimate_uncompressed_size(path()) : limit); + file_ = GG(gzopen)(path().c_str(), "rb"); + if (!file_) + sys_fail("Failed to gzopen " + path()); + if (size > 3221225471) + // if this exception is changed adjust prog/cif2mtz.cpp + fail("For now gz files above 3 GiB uncompressed are not supported.\n" + "To read " + path() + " first uncompress it."); + CharArray mem(size); + size_t read_bytes = gzread_checked(mem.data(), size); + // if the file is shorter than the size from header, adjust size + if (read_bytes < size) { + mem.set_size(read_bytes); // should we call resize() here + } else if (limit == 0) { // read_bytes == size + // if the file is longer than the size from header, read in the rest + int next_char; + while (!GG(gzeof)((gzFile)file_) && (next_char = GG(gzgetc)((gzFile)file_)) != -1) { + if (mem.size() > 3221225471) + fail("For now gz files above 3 GiB uncompressed are not supported.\n" + "To read " + path() + " first uncompress it."); + GG(gzungetc)(next_char, (gzFile)file_); + size_t old_size = mem.size(); + mem.resize(2 * old_size); + size_t n = gzread_checked(mem.data() + old_size, old_size); + mem.set_size(old_size + n); + } + } + return mem; +} + +std::unique_ptr MaybeGzipped::create_stream() { + if (is_compressed()) { + file_ = GG(gzopen)(path().c_str(), "rb"); + if (!file_) + sys_fail("Failed to gzopen " + path()); +#if ZLIB_VERNUM >= 0x1235 + GG(gzbuffer)((gzFile)file_, 64*1024); +#endif + return std::unique_ptr(new GzStream(file_)); + } + return BasicInput::create_stream(); +} + +} // namespace gemmi diff --git a/gemmi_gph/mtz.cpp b/gemmi_gph/mtz.cpp new file mode 100644 index 00000000..04fd5bc8 --- /dev/null +++ b/gemmi_gph/mtz.cpp @@ -0,0 +1,991 @@ +// Copyright 2019-2023 Global Phasing Ltd. + +#include +#include // for memcpy +#include // for stable_sort +#include // for fast_atof +#include // for simple_atoi, read_word +#include +#include + +namespace gemmi { + +namespace { + +double wrap_degrees(double phi) { + if (phi >= 0 && phi < 360.) + return phi; + return phi - std::floor(phi / 360.) * 360.; +} + +void shift_phase(float& phi, double shift, bool negate=false) { + double phi_ = phi + deg(shift); + phi = float(wrap_degrees(negate ? -phi_ : phi_)); +} + +// apply phase shift to Hendrickson–Lattman coefficients HLA, HLB, HLC and HLD +void shift_hl_coefficients(float& a, float& b, float& c, float& d, + double shift, bool negate=false) { + double sinx = std::sin(shift); + double cosx = std::cos(shift); + double sin2x = 2 * sinx * cosx; + double cos2x = sq(cosx)- sq(sinx); + // a sin(x+y) + b cos(x+y) = a sin(x) cos(y) - b sin(x) sin(y) + // + a cos(x) sin(y) + b cos(x) cos(y) + float a_ = float(a * cosx - b * sinx); + float b_ = float(a * sinx + b * cosx); + float c_ = float(c * cos2x - d * sin2x); + float d_ = float(c * sin2x + d * cos2x); + a = a_; // cos(phi) + b = negate ? -b_ : b_; // sin(phi) + c = c_; // cos(2 phi) + d = negate ? -d_ : d_; // sin(2 phi) +} + +// this function is generic because it was used in other places in the past +template ::value_type> +std::array calculate_min_max_disregarding_nans(T begin, T end) { + std::array minmax = {{NAN, NAN}}; + T i = begin; + while (i != end && std::isnan(*i)) + ++i; + if (i != end) { + minmax[0] = minmax[1] = *i; + while (++i != end) { + if (*i < minmax[0]) + minmax[0] = *i; + else if (*i > minmax[1]) + minmax[1] = *i; + } + } + return minmax; +} + +const char* skip_word_and_space(const char* line) { + while (*line != '\0' && !std::isspace(*line)) + ++line; + while (std::isspace(*line)) + ++line; + return line; +} + +UnitCell read_cell_parameters(const char* line) { + double a = fast_atof(line, &line); + double b = fast_atof(line, &line); + double c = fast_atof(line, &line); + double alpha = fast_atof(line, &line); + double beta = fast_atof(line, &line); + double gamma = fast_atof(line, &line); + return UnitCell(a, b, c, alpha, beta, gamma); +} + +} // anonymous namespace + +UnitCellParameters Mtz::get_average_cell_from_batch_headers(double* rmsd) const { + if (rmsd) + for (int i = 0; i < 6; ++i) + rmsd[i] = 0.; + std::array avg = {0., 0., 0., 0., 0., 0.}; + for (const Batch& batch : batches) + for (int i = 0; i < 6; ++i) { + // if batch headers are not set correctly, return global cell + if (batch.floats[i] <= 0) + return cell; + avg[i] += batch.floats[i]; + } + if (avg[0] <= 0 || avg[1] <= 0 || avg[2] <= 0 || + avg[3] <= 0 || avg[4] <= 0 || avg[5] <= 0) + return UnitCellParameters(); + size_t n = batches.size(); + for (int i = 0; i < 6; ++i) + avg[i] /= n; + if (rmsd) { + for (const Batch& batch : batches) + for (int i = 0; i < 6; ++i) + rmsd[i] += sq(avg[i] - batch.floats[i]); + for (int i = 0; i < 6; ++i) + rmsd[i] = std::sqrt(rmsd[i] / n); + } + // If average parameters are almost equal to the global cell, use the latter + // to avoid 32-bit precision artifacts (58.28 -> 58.279998). + if (UnitCellParameters(avg).approx(cell, 1e-4)) + return cell; + return UnitCellParameters(avg); +} + +std::array Mtz::calculate_min_max_1_d2() const { + auto extend_min_max_1_d2 = [&](const UnitCell& uc, double& min, double& max) { + for (size_t i = 0; i < data.size(); i += columns.size()) { + double res = uc.calculate_1_d2_double(data[i+0], data[i+1], data[i+2]); + if (res < min) + min = res; + if (res > max) + max = res; + } + }; + if (!has_data() || columns.size() < 3) + fail("No data."); + double min_value = INFINITY; + double max_value = 0.; + if (cell.is_crystal() && cell.a > 0) + extend_min_max_1_d2(cell, min_value, max_value); + const UnitCell* prev_cell = nullptr; + for (const Dataset& ds : datasets) + if (ds.cell.is_crystal() && ds.cell.a > 0 && ds.cell != cell && + (!prev_cell || ds.cell != *prev_cell)) { + extend_min_max_1_d2(ds.cell, min_value, max_value); + prev_cell = &ds.cell; + } + if (min_value == INFINITY) + min_value = 0; + return {{min_value, max_value}}; +} + +void Mtz::read_first_bytes(AnyStream& stream) { + char buf[20] = {0}; + + if (!stream.read(buf, 20)) + fail("Could not read the MTZ file (is it empty?)"); + if (buf[0] != 'M' || buf[1] != 'T' || buf[2] != 'Z' || buf[3] != ' ') + fail("Not an MTZ file - it does not start with 'MTZ '"); + + // Bytes 9-12 have so-called machine stamp: + // "The first 4 half-bytes represent the real, complex, integer and + // character formats". + // We don't try to handle all the combinations here, only the two most + // common: big endian (for all types) and little endian (for all types). + // BE is denoted by 1 and LE by 4. + // If we get a value different than 1 and 4 we assume the native byte order. + if ((buf[9] & 0xf0) == (is_little_endian() ? 0x10 : 0x40)) + toggle_endianness(); + + std::int32_t tmp_header_offset; + std::memcpy(&tmp_header_offset, buf + 4, 4); + if (!same_byte_order) + swap_four_bytes(&tmp_header_offset); + + if (tmp_header_offset == -1) { + std::memcpy(&header_offset, buf + 12, 8); + if (!same_byte_order) { + swap_eight_bytes(&header_offset); + } + } else { + header_offset = (int64_t) tmp_header_offset; + } + stream.skip(60); +} + +void Mtz::read_main_headers(AnyStream& stream, std::vector* save_headers) { + char line[81] = {0}; + std::ptrdiff_t header_pos = 4 * std::ptrdiff_t(header_offset - 1); + // temporary check + long cur_pos = stream.tell(); + if (cur_pos != header_pos && cur_pos != -1) + fail(cat("wrong pos ", int(header_pos), " ", int(stream.tell()))); + int ncol = 0; + bool has_batch = false; + while (stream.read(line, 80)) { + if (save_headers) + save_headers->emplace_back(line, line+80); + if (ialpha3_id(line) == ialpha3_id("END")) + break; + const char* args = skip_word_and_space(line); + switch (ialpha4_id(line)) { + case ialpha4_id("VERS"): + version_stamp = rtrim_str(args); + break; + case ialpha4_id("TITL"): + title = rtrim_str(args); + break; + case ialpha4_id("NCOL"): { + ncol = simple_atoi(args, &args); + nreflections = simple_atoi(args, &args); + int nbatches = simple_atoi(args); + if (nbatches < 0 || nbatches > 10000000) // sanity check + fail("Wrong NCOL header"); + batches.resize(nbatches); + break; + } + case ialpha4_id("CELL"): + cell = read_cell_parameters(args); + break; + case ialpha4_id("SORT"): + for (int& n : sort_order) + n = simple_atoi(args, &args); + break; + case ialpha4_id("SYMI"): { + nsymop = simple_atoi(args, &args); + symops.reserve(nsymop); + simple_atoi(args, &args); // ignore number of primitive operations + args = skip_word_and_space(skip_blank(args)); // ignore lattice type + spacegroup_number = simple_atoi(args, &args); + args = skip_blank(args); + if (*args != '\'') + spacegroup_name = read_word(args); + else if (const char* end = std::strchr(++args, '\'')) + spacegroup_name.assign(args, end); + // ignore point group which is at the end of args + break; + } + case ialpha4_id("SYMM"): + symops.push_back(parse_triplet(args)); + break; + case ialpha4_id("RESO"): + min_1_d2 = fast_atof(args, &args); + max_1_d2 = fast_atof(args, &args); + break; + case ialpha4_id("VALM"): + if (*args != 'N') { + const char* endptr; + float v = (float) fast_atof(args, &endptr); + if (*endptr == '\0' || is_space(*endptr)) + valm = v; + else + logger.note("Unexpected VALM value: " + rtrim_str(args)); + } + break; + case ialpha4_id("COLU"): { + columns.emplace_back(); + Column& col = columns.back(); + col.label = read_word(args, &args); + col.type = read_word(args, &args)[0]; + col.min_value = (float) fast_atof(args, &args); + col.max_value = (float) fast_atof(args, &args); + col.dataset_id = simple_atoi(args); + col.parent = this; + col.idx = columns.size() - 1; + break; + } + case ialpha4_id("COLS"): + // COLSRC is undocumented. CMTZ (libccp4) adds it after COLUMN: + // COLUMN IMEAN J -300.600006 4619 1 + // COLSRC IMEAN CREATED_07/08/2019_11:00:23 1 + if (!columns.empty() && columns.back().label == read_word(args, &args)) + columns.back().source = read_word(args); + else + logger.note("MTZ: COLSRC is not after matching COLUMN"); + break; + case ialpha4_id("COLG"): + // Column group - not used. + break; + case ialpha4_id("NDIF"): + datasets.reserve(simple_atoi(args)); + break; + case ialpha4_id("PROJ"): + datasets.emplace_back(); + datasets.back().id = simple_atoi(args, &args); + datasets.back().project_name = read_word(skip_word_and_space(args)); + datasets.back().wavelength = 0.0; + break; + case ialpha4_id("CRYS"): + if (simple_atoi(args, &args) == last_dataset().id) + datasets.back().crystal_name = read_word(args); + else + logger.note("MTZ CRYSTAL line: unusual numbering."); + break; + case ialpha4_id("DATA"): + if (simple_atoi(args, &args) == last_dataset().id) + datasets.back().dataset_name = read_word(args); + else + logger.note("MTZ DATASET line: unusual numbering."); + break; + case ialpha4_id("DCEL"): + if (simple_atoi(args, &args) == last_dataset().id) + datasets.back().cell = read_cell_parameters(args); + else + logger.note("MTZ DCELL line: unusual numbering."); + break; + // case("DRES"): not in use yet + case ialpha4_id("DWAV"): + if (simple_atoi(args, &args) == last_dataset().id) + datasets.back().wavelength = fast_atof(args); + else + logger.note("MTZ DWAV line: unusual numbering."); + break; + case ialpha4_id("BATCH"): + // We take number of batches from the NCOL record and serial numbers + // from BH. This header could be used only to check consistency. + has_batch = true; + break; + default: + logger.note("Unknown header: " + rtrim_str(line)); + } + } + if (ncol != (int) columns.size()) + fail("Number of COLU records inconsistent with NCOL record."); + if (has_batch != !batches.empty()) + fail("BATCH header inconsistent with NCOL record."); + // adjust data size, if necessary + if (!data.empty()) { + size_t expected_size = columns.size() * nreflections; + if (data.size() > expected_size) + data.resize(expected_size); + else if (data.size() < expected_size) + fail("internal error, wrong data size"); + } +} + +void Mtz::read_history_and_batch_headers(AnyStream& stream) { + char buf[81] = {0}; + int n_headers = 0; + while (stream.read(buf, 80) && ialpha4_id(buf) != ialpha4_id("MTZE")) { + if (n_headers != 0) { + const char* start = skip_blank(buf); + const char* end = rtrim_cstr(start, start+80); + history.emplace_back(start, end); + --n_headers; + } else if (ialpha4_id(buf) == ialpha4_id("MTZH")) { + n_headers = simple_atoi(skip_word_and_space(buf+4)); + if (n_headers < 0 || n_headers > 30) { + logger.note("Wrong MTZ: number of headers should be between 0 and 30"); + return; + } + history.reserve(n_headers); + } else if (ialpha4_id(buf) == ialpha4_id("MTZB")) { + for (Batch& batch : batches) { + stream.read(buf, 80); + if (ialpha3_id(buf) != ialpha3_id("BH ")) + fail("Missing BH header"); + const char* args = skip_blank(buf + 2); + batch.number = simple_atoi(args, &args); + int total_words = simple_atoi(args, &args); + int int_words = simple_atoi(args, &args); + int float_words = simple_atoi(args); + if (total_words != int_words + float_words || total_words > 1000) + fail("Wrong BH header"); + stream.read(buf, 80); // TITLE + const char* end = rtrim_cstr(buf + 6, buf+76); + batch.title.assign(buf, end - buf); + batch.ints.resize(int_words); + stream.read(batch.ints.data(), int_words * 4); + batch.floats.resize(float_words); + stream.read(batch.floats.data(), float_words * 4); + stream.read(buf, 80); + if (ialpha4_id(buf) != ialpha4_id("BHCH")) + fail("Missing BHCH header"); + split_str_into_multi(buf + 5, " \t", batch.axes); + } + } + } + appended_text = stream.read_rest(); +} + +void Mtz::setup_spacegroup() { + spacegroup = find_spacegroup_by_name(spacegroup_name, cell.alpha, cell.gamma); + if (!spacegroup) { + logger.note("MTZ: unrecognized spacegroup name: " + spacegroup_name); + return; + } + if (spacegroup->ccp4 != spacegroup_number) + logger.note("MTZ: inconsistent spacegroup name and number"); + cell.set_cell_images_from_spacegroup(spacegroup); + for (Dataset& d : datasets) + d.cell.set_cell_images_from_spacegroup(spacegroup); +} + +// we should be at byte 80 +void Mtz::read_raw_data(AnyStream& stream, bool do_read) { + size_t n = size_t(header_offset - 1 - 20); + if (!do_read) { + if (!stream.skip(4 * n)) + fail("ignoring mtz data segment failed"); + return; + } + data.resize(n); + if (!stream.read(data.data(), 4 * n)) + fail("Error when reading MTZ data"); + if (!same_byte_order) + for (float& f : data) + swap_four_bytes(&f); +} + +void Mtz::read_stream(AnyStream& stream, bool with_data) { + read_first_bytes(stream); + // The older implementation of MTZ reading first read the headers, + // then the data. This required jumping to the headers at the end, + // then back to the beginning of the data (byte 80). + // The current implementation avoids calling seek(), allowing + // incremental reading of streams (stdin, gzipped files, etc). + read_raw_data(stream, with_data); + read_main_headers(stream, nullptr); + read_history_and_batch_headers(stream); + setup_spacegroup(); + if (datasets.empty()) + datasets.push_back({0, "HKL_base", "HKL_base", "HKL_base", cell, 0.}); +} + +// for probing/testing individual reflections, no need to optimize it +size_t Mtz::find_offset_of_hkl(const Miller& hkl, size_t start) const { + if (!has_data() || columns.size() < 3) + fail("No data."); + if (start != 0) + start -= (start % columns.size()); + for (size_t n = start; n + 2 < data.size(); n += columns.size()) + if (get_hkl(n) == hkl) + return n; + return (size_t)-1; +} + +void Mtz::ensure_asu(bool tnt_asu) { + if (!is_merged()) + fail("Mtz::ensure_asu() is for merged MTZ only"); + if (!spacegroup) + return; + GroupOps gops = spacegroup->operations(); + ReciprocalAsu asu(spacegroup, tnt_asu); + std::vector phase_columns = positions_of_columns_with_type('P'); + std::vector abcd_columns = positions_of_columns_with_type('A'); + std::vector dano_columns = positions_of_columns_with_type('D'); + std::vector> plus_minus_columns = positions_of_plus_minus_columns(); + bool no_special_columns = phase_columns.empty() && abcd_columns.empty() && + plus_minus_columns.empty() && dano_columns.empty(); + bool centric = no_special_columns || gops.is_centrosymmetric(); + for (size_t n = 0; n < data.size(); n += columns.size()) { + Miller hkl = get_hkl(n); + if (asu.is_in(hkl)) + continue; + auto result = asu.to_asu(hkl, gops); + // cf. impl::move_to_asu() in asudata.hpp + set_hkl(n, result.first); + if (no_special_columns) + continue; + int isym = result.second; + if (!phase_columns.empty() || !abcd_columns.empty()) { + const Op& op = gops.sym_ops[(isym - 1) / 2]; + double shift = op.phase_shift(hkl); + bool negate = (isym % 2 == 0); + for (int col : phase_columns) + shift_phase(data[n + col], shift, negate); + for (auto i = abcd_columns.begin(); i+3 < abcd_columns.end(); i += 4) + // we expect coefficients HLA, HLB, HLC and HLD - in this order + shift_hl_coefficients(data[n + *(i+0)], data[n + *(i+1)], + data[n + *(i+2)], data[n + *(i+3)], + shift, negate); + } + if (isym % 2 == 0 && !centric && + // usually, centric reflections have empty F(-), so avoid swapping it + !gops.is_reflection_centric(hkl)) { + for (std::pair cols : plus_minus_columns) + std::swap(data[n + cols.first], data[n + cols.second]); + for (int col : dano_columns) + data[n + col] = -data[n + col]; + } + } +} + +void Mtz::reindex(const Op& op) { + if (op.tran != Op::Tran{0, 0, 0}) + gemmi::fail("reindexing operator must not have a translation"); + if (op.det_rot() < 0) + gemmi::fail("reindexing operator must preserve the hand of the axes"); + switch_to_original_hkl(); // changes hkl for unmerged data only + Op xyz_op = op.as_xyz(); + logger.mesg("Real space transformation: ", op.as_xyz().triplet()); + bool row_removal = false; + // change Miller indices + for (size_t n = 0; n < data.size(); n += columns.size()) { + Miller hkl_den = op.apply_to_hkl_without_division(get_hkl(n)); + Miller hkl = Op::divide_hkl_by_DEN(hkl_den); + if (hkl[0] * Op::DEN == hkl_den[0] && + hkl[1] * Op::DEN == hkl_den[1] && + hkl[2] * Op::DEN == hkl_den[2]) { + set_hkl(n, hkl); + } else { // fractional hkl - remove + row_removal = true; + data[n] = NAN; // mark for removal + } + } + + // remove reflections marked for removal + if (row_removal) { + int n_before = nreflections; + remove_rows_if([](const float* h) { return std::isnan(*h); }); + logger.mesg("Reflections removed (because of fractional indices): ", n_before - nreflections); + } + + switch_to_asu_hkl(); // revert switch_to_original_hkl() for unmerged data + + // change space group + if (spacegroup) { + GroupOps gops = spacegroup->operations(); + gops.change_basis_backward(xyz_op); + const SpaceGroup* new_sg = find_spacegroup_by_ops(gops); + if (!new_sg) + fail("reindexing: failed to determine new space group name"); + if (new_sg != spacegroup) { + logger.mesg("Space group changed from ", spacegroup->xhm(), " to ", new_sg->xhm(), '.'); + set_spacegroup(new_sg); + } else { + logger.mesg("Space group stays the same:", spacegroup->xhm(), '.'); + } + } + + // change unit cell parameters + cell = cell.changed_basis_backward(xyz_op, false); + for (Mtz::Dataset& ds : datasets) + ds.cell = ds.cell.changed_basis_backward(xyz_op, false); + for (Mtz::Batch& batch : batches) + batch.set_cell(batch.get_cell().changed_basis_backward(xyz_op, false)); +} + +void Mtz::expand_to_p1() { + if (!spacegroup || !has_data()) + return; + std::vector phase_columns = positions_of_columns_with_type('P'); + std::vector abcd_columns = positions_of_columns_with_type('A'); + bool has_phases = (!phase_columns.empty() || !abcd_columns.empty()); + GroupOps gops = spacegroup->operations(); + data.reserve(gops.sym_ops.size() * data.size()); + size_t orig_size = data.size(); + std::vector hkl_copies; + for (size_t n = 0; n < orig_size; n += columns.size()) { + hkl_copies.clear(); + Miller hkl = get_hkl(n); + // no reallocations because of reserve() above + auto orig_iter = data.begin() + n; + for (auto op = gops.sym_ops.begin() + 1; op < gops.sym_ops.end(); ++op) { + Miller new_hkl = op->apply_to_hkl(hkl); + Op::Miller negated{{-new_hkl[0], -new_hkl[1], -new_hkl[2]}}; + if (new_hkl != hkl && !in_vector(new_hkl, hkl_copies) && + negated != hkl && !in_vector(negated, hkl_copies)) { + hkl_copies.push_back(new_hkl); + size_t offset = data.size(); + data.insert(data.end(), orig_iter, orig_iter + columns.size()); + set_hkl(offset, new_hkl); + if (has_phases) { + double shift = op->phase_shift(hkl); + if (shift != 0) { + for (int col : phase_columns) + shift_phase(data[offset + col], shift); + for (auto i = abcd_columns.begin(); i+3 < abcd_columns.end(); i += 4) + // we expect coefficients HLA, HLB, HLC and HLD - in this order + shift_hl_coefficients(data[offset + *(i+0)], data[offset + *(i+1)], + data[offset + *(i+2)], data[offset + *(i+3)], shift); + } + } + } + } + } + nreflections = int(data.size() / columns.size()); + sort_order = {{0, 0, 0, 0, 0}}; + set_spacegroup(&get_spacegroup_p1()); +} + +bool Mtz::switch_to_original_hkl() { + if (indices_switched_to_original) + return false; + if (!has_data()) + fail("switch_to_original_hkl(): data not read yet"); + if (nreflections == 0) { + // This function can be called before the data is populated + // to set indices_switched_to_original, which is not exposed in Python. + indices_switched_to_original = true; + return true; + } + const Column* col = column_with_label("M/ISYM"); + if (col == nullptr || col->type != 'Y' || col->idx < 3) + return false; + std::vector inv_symops; + inv_symops.reserve(symops.size()); + for (const Op& op : symops) + inv_symops.push_back(op.inverse()); + for (size_t n = 0; n + col->idx < data.size(); n += columns.size()) { + int isym = static_cast(data[n + col->idx]) & 0xFF; + const Op& op = inv_symops.at((isym - 1) / 2); + Miller hkl = op.apply_to_hkl(get_hkl(n)); + int sign = (isym & 1) ? 1 : -1; + for (int i = 0; i < 3; ++i) + data[n+i] = static_cast(sign * hkl[i]); + } + indices_switched_to_original = true; + return true; +} + +bool Mtz::switch_to_asu_hkl() { + if (!indices_switched_to_original) + return false; + if (!has_data()) + fail("switch_to_asu_hkl(): data not read yet"); + const Column* col = column_with_label("M/ISYM"); + if (col == nullptr || col->type != 'Y' || col->idx < 3 || !spacegroup) + return false; + size_t misym_idx = col->idx; + UnmergedHklMover hkl_mover(spacegroup); + for (size_t n = 0; n + col->idx < data.size(); n += columns.size()) { + Miller hkl = get_hkl(n); + int isym = hkl_mover.move_to_asu(hkl); // modifies hkl + set_hkl(n, hkl); + float& misym = data[n + misym_idx]; + misym = float(((int)misym & ~0xff) | isym); + } + indices_switched_to_original = false; + return true; +} + +void Mtz::read_file_gz(const std::string& path, bool with_data) { + try { + read_input(MaybeGzipped(path), with_data); + } catch (std::runtime_error& e) { + // append path to the error like in read_file(), but shouldn't the path go first? + fail(std::string(e.what()) + ": " + path); + } +} + +std::vector Mtz::sorted_row_indices(int use_first) const { + if (!has_data()) + fail("No data."); + if (use_first <= 0 || use_first >= (int) columns.size()) + fail("Wrong use_first arg in Mtz::sort."); + std::vector indices(nreflections); + for (int i = 0; i != nreflections; ++i) + indices[i] = i; + std::stable_sort(indices.begin(), indices.end(), [&](int i, int j) { + int a = i * (int) columns.size(); + int b = j * (int) columns.size(); + for (int n = 0; n < use_first; ++n) + if (data[a+n] != data[b+n]) + return data[a+n] < data[b+n]; + return false; + }); + return indices; +} + +bool Mtz::sort(int use_first) { + std::vector indices = sorted_row_indices(use_first); + sort_order = {{0, 0, 0, 0, 0}}; + for (int i = 0; i < use_first; ++i) + sort_order[i] = i + 1; + if (std::is_sorted(indices.begin(), indices.end())) + return false; + std::vector new_data(data.size()); + size_t w = columns.size(); + for (size_t i = 0; i != indices.size(); ++i) + std::memcpy(&new_data[i * w], &data[indices[i] * w], w * sizeof(float)); + data.swap(new_data); + return true; +} + +Mtz::Column& Mtz::add_column(const std::string& label, char type, + int dataset_id, int pos, bool expand_data) { + if (datasets.empty()) + fail("No datasets."); + if (dataset_id < 0) + dataset_id = datasets.back().id; + else + dataset(dataset_id); // check if such dataset exist + if (pos > (int) columns.size()) + fail("Requested column position after the end."); + if (pos < 0) + pos = (int) columns.size(); + auto col = columns.emplace(columns.begin() + pos); + for (auto i = col + 1; i != columns.end(); ++i) + i->idx++; + col->dataset_id = dataset_id; + col->type = type; + col->label = label; + col->parent = this; + col->idx = pos; + if (expand_data) + expand_data_rows(1, pos); + return *col; +} + + +namespace { // helper functions for copying, replacing and removing columns + +void check_column(const Mtz& mtz, size_t idx, const char* msg) { + if (!mtz.has_data()) + fail(msg, ": data not read yet"); + if (idx >= mtz.columns.size()) + fail(msg, ": no column with 0-based index ", std::to_string(idx)); +} + +void check_trailing_cols(const Mtz& mtz, const Mtz::Column& src_col, + const std::vector& trailing_cols) { + assert(src_col.parent == &mtz); + if (!mtz.has_data()) + fail("data in source mtz not read yet"); + if (src_col.idx + trailing_cols.size() >= mtz.columns.size()) + fail("Not enough columns after " + src_col.label); + for (size_t i = 0; i < trailing_cols.size(); ++i) + if (!trailing_cols[i].empty() && + trailing_cols[i] != mtz.columns[src_col.idx + i + 1].label) + fail("expected trailing column ", trailing_cols[i], ", found ", src_col.label); +} + +void do_replace_column(Mtz& mtz, size_t dest_idx, const Mtz::Column& src_col, + const std::vector& trailing_cols) { + const Mtz* src_mtz = src_col.parent; + for (size_t i = 0; i <= trailing_cols.size(); ++i) { + Mtz::Column& dst = mtz.columns[dest_idx + i]; + const Mtz::Column& src = src_mtz->columns[src_col.idx + i]; + dst.type = src.type; + dst.label = src.label; + dst.min_value = src.min_value; + dst.max_value = src.max_value; + dst.source = src.source; + dst.dataset_id = src.dataset_id; + } + if (src_mtz == &mtz) { + // internal copying + for (size_t n = 0; n < mtz.data.size(); n += mtz.columns.size()) + for (size_t i = 0; i <= trailing_cols.size(); ++i) + mtz.data[n + dest_idx + i] = mtz.data[n + src_col.idx + i]; + } else { + // external copying - need to match indices + std::vector dst_indices = mtz.sorted_row_indices(); + std::vector src_indices = src_mtz->sorted_row_indices(); + // cf. for_matching_reflections() + size_t dst_stride = mtz.columns.size(); + size_t src_stride = src_mtz->columns.size(); + auto dst = dst_indices.begin(); + auto src = src_indices.begin(); + while (dst != dst_indices.end() && src != src_indices.end()) { + Miller dst_hkl = mtz.get_hkl(*dst * dst_stride); + Miller src_hkl = src_mtz->get_hkl(*src * src_stride); + if (dst_hkl == src_hkl) { + // copy values + for (size_t i = 0; i <= trailing_cols.size(); ++i) + mtz.data[*dst * dst_stride + dest_idx + i] = + src_mtz->data[*src * src_stride + src_col.idx + i]; + ++dst; + ++src; + } else if (dst_hkl < src_hkl) { + ++dst; + } else { + ++src; + } + } + } +} + +} // anonymous namespace + +Mtz::Column& Mtz::replace_column(size_t dest_idx, const Mtz::Column& src_col, + const std::vector& trailing_cols) { + check_trailing_cols(*src_col.parent, src_col, trailing_cols); + check_column(*this, dest_idx + trailing_cols.size(), "replace_column()"); + do_replace_column(*this, dest_idx, src_col, trailing_cols); + return columns[dest_idx]; +} + +Mtz::Column& Mtz::copy_column(int dest_idx, const Mtz::Column& src_col, + const std::vector& trailing_cols) { + // check input consistency + if (!has_data()) + fail("copy_column(): data not read yet"); + check_trailing_cols(*src_col.parent, src_col, trailing_cols); + // add new columns + if (dest_idx < 0) + dest_idx = (int) columns.size(); + // if src_col is from this Mtz it may get invalidated when adding columns + int col_idx = -1; + if (src_col.parent == this) { + col_idx = (int) src_col.idx; + if (col_idx >= dest_idx) + col_idx += 1 + (int)trailing_cols.size(); + } + for (int i = 0; i <= (int) trailing_cols.size(); ++i) + add_column("", ' ', -1, dest_idx + i, false); + expand_data_rows(1 + trailing_cols.size(), dest_idx); + // copy the data + const Column& src_col_now = col_idx < 0 ? src_col : columns[col_idx]; + // most of the work (hkl-based row matching and data copying) is done here: + do_replace_column(*this, dest_idx, src_col_now, trailing_cols); + return columns[dest_idx]; +} + +void Mtz::remove_column(size_t idx) { + check_column(*this, idx, "remove_column()"); + columns.erase(columns.begin() + idx); + for (size_t i = idx; i < columns.size(); ++i) + --columns[i].idx; + vector_remove_column(data, columns.size(), idx); + assert(columns.size() * nreflections == data.size()); +} + + +#define WRITE(...) do { \ + int len = snprintf_z(buf, 81, __VA_ARGS__); \ + if (len < 80) \ + std::memset(buf + len, ' ', 80 - len); \ + if (write(buf, 80, 1) != 1) \ + sys_fail("Writing MTZ file failed"); \ + } while(0) + +template +void Mtz::write_to_stream(Write write) const { + // uses: data, spacegroup, nreflections, batches, cell, sort_order, + // valm, columns, datasets, history + if (!has_data()) + fail("Cannot write Mtz which has no data"); + if (!spacegroup) + fail("Cannot write Mtz which has no space group"); + char buf[81] = {'M', 'T', 'Z', ' ', '\0'}; + std::int64_t real_header_start = (int64_t) columns.size() * nreflections + 21; + std::int32_t header_start = (int32_t) real_header_start; + if (real_header_start > std::numeric_limits::max()) { + header_start = -1; + } else { + real_header_start = 0; + } + std::memcpy(buf + 4, &header_start, 4); + std::int32_t machst = is_little_endian() ? 0x00004144 : 0x11110000; + std::memcpy(buf + 8, &machst, 4); + std::memcpy(buf + 12, &real_header_start, 8); + if (write(buf, 80, 1) != 1 || + write(data.data(), 4, data.size()) != data.size()) + fail("Writing MTZ file failed"); + WRITE("VERS MTZ:V1.1"); + WRITE("TITLE %s", title.c_str()); + WRITE("NCOL %8zu %12d %8zu", columns.size(), nreflections, batches.size()); + if (cell.is_crystal()) + WRITE("CELL %9.4f %9.4f %9.4f %9.4f %9.4f %9.4f", + cell.a, cell.b, cell.c, cell.alpha, cell.beta, cell.gamma); + WRITE("SORT %3d %3d %3d %3d %3d", sort_order[0], sort_order[1], + sort_order[2], sort_order[3], sort_order[4]); + GroupOps ops = spacegroup->operations(); + char lat_type = spacegroup->ccp4_lattice_type(); + WRITE("SYMINF %3d %2d %c %5d %*s'%c%s' PG%s", + ops.order(), // number of symmetry operations + (int) ops.sym_ops.size(), // number of primitive operations + lat_type, // lattice type + spacegroup->ccp4, // space group number + 20 - (int) std::strlen(spacegroup->hm), "", + lat_type, // space group name (first letter) + spacegroup->hm + 1, // space group name (the rest) + spacegroup->point_group_hm()); // point group name + // If we have symops that are the same as spacegroup->operations(), + // write symops to preserve the order of SYMM records. + if (!symops.empty() && ops.is_same_as(split_centering_vectors(symops))) + for (Op op : symops) + WRITE("SYMM %s", to_upper(op.triplet()).c_str()); + else + for (Op op : ops) + WRITE("SYMM %s", to_upper(op.triplet()).c_str()); + auto reso = calculate_min_max_1_d2(); + WRITE("RESO %-20.12f %-20.12f", reso[0], reso[1]); + if (std::isnan(valm)) + WRITE("VALM NAN"); + else + WRITE("VALM %f", valm); + auto format17 = [](float f) { + char buffer[18]; + int len = snprintf_z(buffer, 18, "%.9f", f); + return std::string(buffer, len > 0 ? std::min(len, 17) : 0); + }; + for (const Column& col : columns) { + auto minmax = calculate_min_max_disregarding_nans(col.begin(), col.end()); + const char* label = !col.label.empty() ? col.label.c_str() : "_"; + WRITE("COLUMN %-30s %c %17s %17s %4d", + label, col.type, + format17(minmax[0]).c_str(), format17(minmax[1]).c_str(), + col.dataset_id); + if (!col.source.empty()) + WRITE("COLSRC %-30s %-36s %4d", label, col.source.c_str(), col.dataset_id); + } + WRITE("NDIF %8zu", datasets.size()); + for (const Dataset& ds : datasets) { + WRITE("PROJECT %7d %s", ds.id, ds.project_name.c_str()); + WRITE("CRYSTAL %7d %s", ds.id, ds.crystal_name.c_str()); + WRITE("DATASET %7d %s", ds.id, ds.dataset_name.c_str()); + const UnitCell& uc = (ds.cell.is_crystal() && ds.cell.a > 0 ? ds.cell : cell); + WRITE("DCELL %9d %10.4f%10.4f%10.4f%10.4f%10.4f%10.4f", + ds.id, uc.a, uc.b, uc.c, uc.alpha, uc.beta, uc.gamma); + WRITE("DWAVEL %8d %10.5f", ds.id, ds.wavelength); + } + int pos = 0; + for (const Batch& batch : batches) { + if (pos == 0) + std::memcpy(buf, "BATCH ", 6); // NOLINT(bugprone-not-null-terminated-result) + pos += 6; + snprintf_z(buf + pos, 7, "%6d", batch.number); + if (pos > 72 || &batch == &batches.back()) { + std::memset(buf + pos, ' ', 80 - pos); + if (write(buf, 80, 1) != 1) + fail("Writing MTZ file failed"); + pos = 0; + } + } + WRITE("END"); + if (!history.empty()) { + // According to mtzformat.html the file can have only up to 30 history + // lines, but we don't enforce it here. + WRITE("MTZHIST %3zu", history.size()); + for (const std::string& line : history) + WRITE("%s", line.c_str()); + } + if (!batches.empty()) { + WRITE("MTZBATS"); + for (const Batch& batch : batches) { + // keep the numbers the same as in files written by libccp4 + WRITE("BH %8d %7zu %7zu %7zu", + batch.number, batch.ints.size() + batch.floats.size(), + batch.ints.size(), batch.floats.size()); + WRITE("TITLE %.70s", batch.title.c_str()); + if (batch.ints.size() != 29 || batch.floats.size() != 156) + fail("wrong size of binaries batch headers"); + write(batch.ints.data(), 4, batch.ints.size()); + write(batch.floats.data(), 4, batch.floats.size()); + WRITE("BHCH %7.7s %7.7s %7.7s", + batch.axes.size() > 0 ? batch.axes[0].c_str() : "", + batch.axes.size() > 1 ? batch.axes[1].c_str() : "", + batch.axes.size() > 2 ? batch.axes[2].c_str() : ""); + } + } + WRITE("MTZENDOFHEADERS"); + if (!appended_text.empty()) { + if (write(appended_text.data(), appended_text.size(), 1) != 1) + fail("Writing MTZ file failed"); + } +} + +#undef WRITE + +void Mtz::write_to_cstream(std::FILE* stream) const { + write_to_stream([&](const void *ptr, size_t size, size_t nmemb) { + return std::fwrite(ptr, size, nmemb, stream); + }); +} + +void Mtz::write_to_string(std::string& str) const { + // Calculate the size beforehand to avoid memory re-allocations + // and minimize memory usage. It hasn't been benchmarked against + // a single-pass writing. + size_t nbytes = size_to_write(); + str.resize(nbytes); + write_to_buffer(&str[0], nbytes); +} + +void Mtz::write_to_file(const std::string& path) const { + fileptr_t f = file_open(path.c_str(), "wb"); + try { + write_to_cstream(f.get()); + } catch (std::runtime_error& e) { + fail(std::string(e.what()) + ": " + path); + } +} + +size_t Mtz::size_to_write() const { + size_t nbytes = 0; + write_to_stream([&](const void *, size_t size, size_t nmemb) { + nbytes += size * nmemb; + return nmemb; + }); + return nbytes; +} + +size_t Mtz::write_to_buffer(char* buf, size_t maxlen) const { + size_t len = 0; + write_to_stream([&](const void *ptr, size_t size, size_t nmemb) { + len += size * nmemb; + if (len > maxlen) + fail("Mtz::write_to_buffer: size too small"); + memcpy(buf, ptr, size * nmemb); + buf += size * nmemb; + return nmemb; + }); + return len; +} + +} // namespace gemmi diff --git a/gemmi_gph/sprintf.cpp b/gemmi_gph/sprintf.cpp new file mode 100644 index 00000000..68b32b67 --- /dev/null +++ b/gemmi_gph/sprintf.cpp @@ -0,0 +1,68 @@ +// Copyright 2017 Global Phasing Ltd. + +#include +#include // for va_list + +#ifdef USE_STD_SNPRINTF // useful for benchmarking and testing only +# include +# include // for min +#else +# define STB_SPRINTF_IMPLEMENTATION +# define STB_SPRINTF_STATIC +# define STB_SPRINTF_NOUNALIGNED 1 +// Making functions from stb_sprintf static may trigger warnings. +# if defined(__GNUC__) +# pragma GCC diagnostic ignored "-Wunused-function" +# endif +# if defined(__clang__) +# pragma clang diagnostic ignored "-Wunused-function" +# endif + +// To use system stb_sprintf.h (not recommended, but some Linux distros +// don't like bundled libraries) define GEMMI_USE_SYSTEM_STB or remove +// third_party/stb_sprintf.h. +# if defined(__has_include) +# if !__has_include("../third_party/stb_sprintf.h") +# define GEMMI_USE_SYSTEM_STB 1 +# endif +# endif +# ifdef GEMMI_USE_SYSTEM_STB +# pragma message("Using system stb_sprintf.h, not the bundled one. It may not work.") +# include "stb/stb_sprintf.h" +# else +# include "../third_party/stb_sprintf.h" +# endif +#endif // USE_STD_SNPRINTF + +namespace gemmi { + +// We copy functions from sprintf.h only to have them declared with GEMMI_DLL. +int sprintf_z(char *buf, char const *fmt, ...) { + int result; + va_list va; + va_start(va, fmt); +#ifdef USE_STD_SNPRINTF + result = std::vsprintf(buf, fmt, va); +#else + result = STB_SPRINTF_DECORATE(vsprintfcb)(0, 0, buf, fmt, va); +#endif + va_end(va); + return result; +} + +int snprintf_z(char *buf, int count, char const *fmt, ...) { + int result; + va_list va; + va_start(va, fmt); +#ifdef USE_STD_SNPRINTF + result = std::vsnprintf(buf, count, fmt, va); + // stbsp_snprintf always returns a zero-terminated string + buf[std::min(result, count-1)] = '\0'; +#else + result = STB_SPRINTF_DECORATE(vsnprintf)(buf, count, fmt, va); +#endif + va_end(va); + return result; +} + +} // namespace gemmi diff --git a/gemmi_gph/stb/stb_sprintf.h b/gemmi_gph/stb/stb_sprintf.h new file mode 100644 index 00000000..28e9d64b --- /dev/null +++ b/gemmi_gph/stb/stb_sprintf.h @@ -0,0 +1,1906 @@ +// stb_sprintf - v1.10 - public domain snprintf() implementation +// originally by Jeff Roberts / RAD Game Tools, 2015/10/20 +// http://github.com/nothings/stb +// +// allowed types: sc uidBboXx p AaGgEef n +// lengths : hh h ll j z t I64 I32 I +// +// Contributors: +// Fabian "ryg" Giesen (reformatting) +// github:aganm (attribute format) +// +// Contributors (bugfixes): +// github:d26435 +// github:trex78 +// github:account-login +// Jari Komppa (SI suffixes) +// Rohit Nirmal +// Marcin Wojdyr +// Leonard Ritter +// Stefano Zanotti +// Adam Allison +// Arvid Gerstmann +// Markus Kolb +// +// LICENSE: +// +// See end of file for license information. + +#ifndef STB_SPRINTF_H_INCLUDE +#define STB_SPRINTF_H_INCLUDE + +/* +Single file sprintf replacement. + +Originally written by Jeff Roberts at RAD Game Tools - 2015/10/20. +Hereby placed in public domain. + +This is a full sprintf replacement that supports everything that +the C runtime sprintfs support, including float/double, 64-bit integers, +hex floats, field parameters (%*.*d stuff), length reads backs, etc. + +Why would you need this if sprintf already exists? Well, first off, +it's *much* faster (see below). It's also much smaller than the CRT +versions code-space-wise. We've also added some simple improvements +that are super handy (commas in thousands, callbacks at buffer full, +for example). Finally, the format strings for MSVC and GCC differ +for 64-bit integers (among other small things), so this lets you use +the same format strings in cross platform code. + +It uses the standard single file trick of being both the header file +and the source itself. If you just include it normally, you just get +the header file function definitions. To get the code, you include +it from a C or C++ file and define STB_SPRINTF_IMPLEMENTATION first. + +It only uses va_args macros from the C runtime to do it's work. It +does cast doubles to S64s and shifts and divides U64s, which does +drag in CRT code on most platforms. + +It compiles to roughly 8K with float support, and 4K without. +As a comparison, when using MSVC static libs, calling sprintf drags +in 16K. + +API: +==== +int stbsp_sprintf( char * buf, char const * fmt, ... ) +int stbsp_snprintf( char * buf, int count, char const * fmt, ... ) + Convert an arg list into a buffer. stbsp_snprintf always returns + a zero-terminated string (unlike regular snprintf). + +int stbsp_vsprintf( char * buf, char const * fmt, va_list va ) +int stbsp_vsnprintf( char * buf, int count, char const * fmt, va_list va ) + Convert a va_list arg list into a buffer. stbsp_vsnprintf always returns + a zero-terminated string (unlike regular snprintf). + +int stbsp_vsprintfcb( STBSP_SPRINTFCB * callback, void * user, char * buf, char const * fmt, va_list va ) + typedef char * STBSP_SPRINTFCB( char const * buf, void * user, int len ); + Convert into a buffer, calling back every STB_SPRINTF_MIN chars. + Your callback can then copy the chars out, print them or whatever. + This function is actually the workhorse for everything else. + The buffer you pass in must hold at least STB_SPRINTF_MIN characters. + // you return the next buffer to use or 0 to stop converting + +void stbsp_set_separators( char comma, char period ) + Set the comma and period characters to use. + +FLOATS/DOUBLES: +=============== +This code uses a internal float->ascii conversion method that uses +doubles with error correction (double-doubles, for ~105 bits of +precision). This conversion is round-trip perfect - that is, an atof +of the values output here will give you the bit-exact double back. + +One difference is that our insignificant digits will be different than +with MSVC or GCC (but they don't match each other either). We also +don't attempt to find the minimum length matching float (pre-MSVC15 +doesn't either). + +If you don't need float or doubles at all, define STB_SPRINTF_NOFLOAT +and you'll save 4K of code space. + +64-BIT INTS: +============ +This library also supports 64-bit integers and you can use MSVC style or +GCC style indicators (%I64d or %lld). It supports the C99 specifiers +for size_t and ptr_diff_t (%jd %zd) as well. + +EXTRAS: +======= +Like some GCCs, for integers and floats, you can use a ' (single quote) +specifier and commas will be inserted on the thousands: "%'d" on 12345 +would print 12,345. + +For integers and floats, you can use a "$" specifier and the number +will be converted to float and then divided to get kilo, mega, giga or +tera and then printed, so "%$d" 1000 is "1.0 k", "%$.2d" 2536000 is +"2.53 M", etc. For byte values, use two $:s, like "%$$d" to turn +2536000 to "2.42 Mi". If you prefer JEDEC suffixes to SI ones, use three +$:s: "%$$$d" -> "2.42 M". To remove the space between the number and the +suffix, add "_" specifier: "%_$d" -> "2.53M". + +In addition to octal and hexadecimal conversions, you can print +integers in binary: "%b" for 256 would print 100. + +PERFORMANCE vs MSVC 2008 32-/64-bit (GCC is even slower than MSVC): +=================================================================== +"%d" across all 32-bit ints (4.8x/4.0x faster than 32-/64-bit MSVC) +"%24d" across all 32-bit ints (4.5x/4.2x faster) +"%x" across all 32-bit ints (4.5x/3.8x faster) +"%08x" across all 32-bit ints (4.3x/3.8x faster) +"%f" across e-10 to e+10 floats (7.3x/6.0x faster) +"%e" across e-10 to e+10 floats (8.1x/6.0x faster) +"%g" across e-10 to e+10 floats (10.0x/7.1x faster) +"%f" for values near e-300 (7.9x/6.5x faster) +"%f" for values near e+300 (10.0x/9.1x faster) +"%e" for values near e-300 (10.1x/7.0x faster) +"%e" for values near e+300 (9.2x/6.0x faster) +"%.320f" for values near e-300 (12.6x/11.2x faster) +"%a" for random values (8.6x/4.3x faster) +"%I64d" for 64-bits with 32-bit values (4.8x/3.4x faster) +"%I64d" for 64-bits > 32-bit values (4.9x/5.5x faster) +"%s%s%s" for 64 char strings (7.1x/7.3x faster) +"...512 char string..." ( 35.0x/32.5x faster!) +*/ + +#if defined(__clang__) + #if defined(__has_feature) && defined(__has_attribute) + #if __has_feature(address_sanitizer) + #if __has_attribute(__no_sanitize__) + #define STBSP__ASAN __attribute__((__no_sanitize__("address"))) + #elif __has_attribute(__no_sanitize_address__) + #define STBSP__ASAN __attribute__((__no_sanitize_address__)) + #elif __has_attribute(__no_address_safety_analysis__) + #define STBSP__ASAN __attribute__((__no_address_safety_analysis__)) + #endif + #endif + #endif +#elif defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) + #if defined(__SANITIZE_ADDRESS__) && __SANITIZE_ADDRESS__ + #define STBSP__ASAN __attribute__((__no_sanitize_address__)) + #endif +#endif + +#ifndef STBSP__ASAN +#define STBSP__ASAN +#endif + +#ifdef STB_SPRINTF_STATIC +#define STBSP__PUBLICDEC static +#define STBSP__PUBLICDEF static STBSP__ASAN +#else +#ifdef __cplusplus +#define STBSP__PUBLICDEC extern "C" +#define STBSP__PUBLICDEF extern "C" STBSP__ASAN +#else +#define STBSP__PUBLICDEC extern +#define STBSP__PUBLICDEF STBSP__ASAN +#endif +#endif + +#if defined(__has_attribute) && !defined(__MINGW32__) + #if __has_attribute(format) + #define STBSP__ATTRIBUTE_FORMAT(fmt,va) __attribute__((format(printf,fmt,va))) + #endif +#endif + +#ifndef STBSP__ATTRIBUTE_FORMAT +#define STBSP__ATTRIBUTE_FORMAT(fmt,va) +#endif + +#ifdef _MSC_VER +#define STBSP__NOTUSED(v) (void)(v) +#else +#define STBSP__NOTUSED(v) (void)sizeof(v) +#endif + +#include // for va_arg(), va_list() +#include // size_t, ptrdiff_t + +#ifndef STB_SPRINTF_MIN +#define STB_SPRINTF_MIN 512 // how many characters per callback +#endif +typedef char *STBSP_SPRINTFCB(const char *buf, void *user, int len); + +#ifndef STB_SPRINTF_DECORATE +#define STB_SPRINTF_DECORATE(name) stbsp_##name // define this before including if you want to change the names +#endif + +STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(vsprintf)(char *buf, char const *fmt, va_list va); +STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(vsnprintf)(char *buf, int count, char const *fmt, va_list va); +STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(sprintf)(char *buf, char const *fmt, ...) STBSP__ATTRIBUTE_FORMAT(2,3); +STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(snprintf)(char *buf, int count, char const *fmt, ...) STBSP__ATTRIBUTE_FORMAT(3,4); + +STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(vsprintfcb)(STBSP_SPRINTFCB *callback, void *user, char *buf, char const *fmt, va_list va); +STBSP__PUBLICDEC void STB_SPRINTF_DECORATE(set_separators)(char comma, char period); + +#endif // STB_SPRINTF_H_INCLUDE + +#ifdef STB_SPRINTF_IMPLEMENTATION + +#define stbsp__uint32 unsigned int +#define stbsp__int32 signed int + +#ifdef _MSC_VER +#define stbsp__uint64 unsigned __int64 +#define stbsp__int64 signed __int64 +#else +#define stbsp__uint64 unsigned long long +#define stbsp__int64 signed long long +#endif +#define stbsp__uint16 unsigned short + +#ifndef stbsp__uintptr +#if defined(__ppc64__) || defined(__powerpc64__) || defined(__aarch64__) || defined(_M_X64) || defined(__x86_64__) || defined(__x86_64) || defined(__s390x__) +#define stbsp__uintptr stbsp__uint64 +#else +#define stbsp__uintptr stbsp__uint32 +#endif +#endif + +#ifndef STB_SPRINTF_MSVC_MODE // used for MSVC2013 and earlier (MSVC2015 matches GCC) +#if defined(_MSC_VER) && (_MSC_VER < 1900) +#define STB_SPRINTF_MSVC_MODE +#endif +#endif + +#ifdef STB_SPRINTF_NOUNALIGNED // define this before inclusion to force stbsp_sprintf to always use aligned accesses +#define STBSP__UNALIGNED(code) +#else +#define STBSP__UNALIGNED(code) code +#endif + +#ifndef STB_SPRINTF_NOFLOAT +// internal float utility functions +static stbsp__int32 stbsp__real_to_str(char const **start, stbsp__uint32 *len, char *out, stbsp__int32 *decimal_pos, double value, stbsp__uint32 frac_digits); +static stbsp__int32 stbsp__real_to_parts(stbsp__int64 *bits, stbsp__int32 *expo, double value); +#define STBSP__SPECIAL 0x7000 +#endif + +static char stbsp__period = '.'; +static char stbsp__comma = ','; +static struct +{ + short temp; // force next field to be 2-byte aligned + char pair[201]; +} stbsp__digitpair = +{ + 0, + "00010203040506070809101112131415161718192021222324" + "25262728293031323334353637383940414243444546474849" + "50515253545556575859606162636465666768697071727374" + "75767778798081828384858687888990919293949596979899" +}; + +STBSP__PUBLICDEF void STB_SPRINTF_DECORATE(set_separators)(char pcomma, char pperiod) +{ + stbsp__period = pperiod; + stbsp__comma = pcomma; +} + +#define STBSP__LEFTJUST 1 +#define STBSP__LEADINGPLUS 2 +#define STBSP__LEADINGSPACE 4 +#define STBSP__LEADING_0X 8 +#define STBSP__LEADINGZERO 16 +#define STBSP__INTMAX 32 +#define STBSP__TRIPLET_COMMA 64 +#define STBSP__NEGATIVE 128 +#define STBSP__METRIC_SUFFIX 256 +#define STBSP__HALFWIDTH 512 +#define STBSP__METRIC_NOSPACE 1024 +#define STBSP__METRIC_1024 2048 +#define STBSP__METRIC_JEDEC 4096 + +static void stbsp__lead_sign(stbsp__uint32 fl, char *sign) +{ + sign[0] = 0; + if (fl & STBSP__NEGATIVE) { + sign[0] = 1; + sign[1] = '-'; + } else if (fl & STBSP__LEADINGSPACE) { + sign[0] = 1; + sign[1] = ' '; + } else if (fl & STBSP__LEADINGPLUS) { + sign[0] = 1; + sign[1] = '+'; + } +} + +static STBSP__ASAN stbsp__uint32 stbsp__strlen_limited(char const *s, stbsp__uint32 limit) +{ + char const * sn = s; + + // get up to 4-byte alignment + for (;;) { + if (((stbsp__uintptr)sn & 3) == 0) + break; + + if (!limit || *sn == 0) + return (stbsp__uint32)(sn - s); + + ++sn; + --limit; + } + + // scan over 4 bytes at a time to find terminating 0 + // this will intentionally scan up to 3 bytes past the end of buffers, + // but becase it works 4B aligned, it will never cross page boundaries + // (hence the STBSP__ASAN markup; the over-read here is intentional + // and harmless) + while (limit >= 4) { + stbsp__uint32 v = *(stbsp__uint32 *)sn; + // bit hack to find if there's a 0 byte in there + if ((v - 0x01010101) & (~v) & 0x80808080UL) + break; + + sn += 4; + limit -= 4; + } + + // handle the last few characters to find actual size + while (limit && *sn) { + ++sn; + --limit; + } + + return (stbsp__uint32)(sn - s); +} + +STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(vsprintfcb)(STBSP_SPRINTFCB *callback, void *user, char *buf, char const *fmt, va_list va) +{ + static char hex[] = "0123456789abcdefxp"; + static char hexu[] = "0123456789ABCDEFXP"; + char *bf; + char const *f; + int tlen = 0; + + bf = buf; + f = fmt; + for (;;) { + stbsp__int32 fw, pr, tz; + stbsp__uint32 fl; + + // macros for the callback buffer stuff + #define stbsp__chk_cb_bufL(bytes) \ + { \ + int len = (int)(bf - buf); \ + if ((len + (bytes)) >= STB_SPRINTF_MIN) { \ + tlen += len; \ + if (0 == (bf = buf = callback(buf, user, len))) \ + goto done; \ + } \ + } + #define stbsp__chk_cb_buf(bytes) \ + { \ + if (callback) { \ + stbsp__chk_cb_bufL(bytes); \ + } \ + } + #define stbsp__flush_cb() \ + { \ + stbsp__chk_cb_bufL(STB_SPRINTF_MIN - 1); \ + } // flush if there is even one byte in the buffer + #define stbsp__cb_buf_clamp(cl, v) \ + cl = v; \ + if (callback) { \ + int lg = STB_SPRINTF_MIN - (int)(bf - buf); \ + if (cl > lg) \ + cl = lg; \ + } + + // fast copy everything up to the next % (or end of string) + for (;;) { + while (((stbsp__uintptr)f) & 3) { + schk1: + if (f[0] == '%') + goto scandd; + schk2: + if (f[0] == 0) + goto endfmt; + stbsp__chk_cb_buf(1); + *bf++ = f[0]; + ++f; + } + for (;;) { + // Check if the next 4 bytes contain %(0x25) or end of string. + // Using the 'hasless' trick: + // https://graphics.stanford.edu/~seander/bithacks.html#HasLessInWord + stbsp__uint32 v, c; + v = *(stbsp__uint32 *)f; + c = (~v) & 0x80808080; + if (((v ^ 0x25252525) - 0x01010101) & c) + goto schk1; + if ((v - 0x01010101) & c) + goto schk2; + if (callback) + if ((STB_SPRINTF_MIN - (int)(bf - buf)) < 4) + goto schk1; + #ifdef STB_SPRINTF_NOUNALIGNED + if(((stbsp__uintptr)bf) & 3) { + bf[0] = f[0]; + bf[1] = f[1]; + bf[2] = f[2]; + bf[3] = f[3]; + } else + #endif + { + *(stbsp__uint32 *)bf = v; + } + bf += 4; + f += 4; + } + } + scandd: + + ++f; + + // ok, we have a percent, read the modifiers first + fw = 0; + pr = -1; + fl = 0; + tz = 0; + + // flags + for (;;) { + switch (f[0]) { + // if we have left justify + case '-': + fl |= STBSP__LEFTJUST; + ++f; + continue; + // if we have leading plus + case '+': + fl |= STBSP__LEADINGPLUS; + ++f; + continue; + // if we have leading space + case ' ': + fl |= STBSP__LEADINGSPACE; + ++f; + continue; + // if we have leading 0x + case '#': + fl |= STBSP__LEADING_0X; + ++f; + continue; + // if we have thousand commas + case '\'': + fl |= STBSP__TRIPLET_COMMA; + ++f; + continue; + // if we have kilo marker (none->kilo->kibi->jedec) + case '$': + if (fl & STBSP__METRIC_SUFFIX) { + if (fl & STBSP__METRIC_1024) { + fl |= STBSP__METRIC_JEDEC; + } else { + fl |= STBSP__METRIC_1024; + } + } else { + fl |= STBSP__METRIC_SUFFIX; + } + ++f; + continue; + // if we don't want space between metric suffix and number + case '_': + fl |= STBSP__METRIC_NOSPACE; + ++f; + continue; + // if we have leading zero + case '0': + fl |= STBSP__LEADINGZERO; + ++f; + goto flags_done; + default: goto flags_done; + } + } + flags_done: + + // get the field width + if (f[0] == '*') { + fw = va_arg(va, stbsp__uint32); + ++f; + } else { + while ((f[0] >= '0') && (f[0] <= '9')) { + fw = fw * 10 + f[0] - '0'; + f++; + } + } + // get the precision + if (f[0] == '.') { + ++f; + if (f[0] == '*') { + pr = va_arg(va, stbsp__uint32); + ++f; + } else { + pr = 0; + while ((f[0] >= '0') && (f[0] <= '9')) { + pr = pr * 10 + f[0] - '0'; + f++; + } + } + } + + // handle integer size overrides + switch (f[0]) { + // are we halfwidth? + case 'h': + fl |= STBSP__HALFWIDTH; + ++f; + if (f[0] == 'h') + ++f; // QUARTERWIDTH + break; + // are we 64-bit (unix style) + case 'l': + fl |= ((sizeof(long) == 8) ? STBSP__INTMAX : 0); + ++f; + if (f[0] == 'l') { + fl |= STBSP__INTMAX; + ++f; + } + break; + // are we 64-bit on intmax? (c99) + case 'j': + fl |= (sizeof(size_t) == 8) ? STBSP__INTMAX : 0; + ++f; + break; + // are we 64-bit on size_t or ptrdiff_t? (c99) + case 'z': + fl |= (sizeof(ptrdiff_t) == 8) ? STBSP__INTMAX : 0; + ++f; + break; + case 't': + fl |= (sizeof(ptrdiff_t) == 8) ? STBSP__INTMAX : 0; + ++f; + break; + // are we 64-bit (msft style) + case 'I': + if ((f[1] == '6') && (f[2] == '4')) { + fl |= STBSP__INTMAX; + f += 3; + } else if ((f[1] == '3') && (f[2] == '2')) { + f += 3; + } else { + fl |= ((sizeof(void *) == 8) ? STBSP__INTMAX : 0); + ++f; + } + break; + default: break; + } + + // handle each replacement + switch (f[0]) { + #define STBSP__NUMSZ 512 // big enough for e308 (with commas) or e-307 + char num[STBSP__NUMSZ]; + char lead[8]; + char tail[8]; + char *s; + char const *h; + stbsp__uint32 l, n, cs; + stbsp__uint64 n64; +#ifndef STB_SPRINTF_NOFLOAT + double fv; +#endif + stbsp__int32 dp; + char const *sn; + + case 's': + // get the string + s = va_arg(va, char *); + if (s == 0) + s = (char *)"null"; + // get the length, limited to desired precision + // always limit to ~0u chars since our counts are 32b + l = stbsp__strlen_limited(s, (pr >= 0) ? pr : ~0u); + lead[0] = 0; + tail[0] = 0; + pr = 0; + dp = 0; + cs = 0; + // copy the string in + goto scopy; + + case 'c': // char + // get the character + s = num + STBSP__NUMSZ - 1; + *s = (char)va_arg(va, int); + l = 1; + lead[0] = 0; + tail[0] = 0; + pr = 0; + dp = 0; + cs = 0; + goto scopy; + + case 'n': // weird write-bytes specifier + { + int *d = va_arg(va, int *); + *d = tlen + (int)(bf - buf); + } break; + +#ifdef STB_SPRINTF_NOFLOAT + case 'A': // float + case 'a': // hex float + case 'G': // float + case 'g': // float + case 'E': // float + case 'e': // float + case 'f': // float + va_arg(va, double); // eat it + s = (char *)"No float"; + l = 8; + lead[0] = 0; + tail[0] = 0; + pr = 0; + cs = 0; + STBSP__NOTUSED(dp); + goto scopy; +#else + case 'A': // hex float + case 'a': // hex float + h = (f[0] == 'A') ? hexu : hex; + fv = va_arg(va, double); + if (pr == -1) + pr = 6; // default is 6 + // read the double into a string + if (stbsp__real_to_parts((stbsp__int64 *)&n64, &dp, fv)) + fl |= STBSP__NEGATIVE; + + s = num + 64; + + stbsp__lead_sign(fl, lead); + + if (dp == -1023) + dp = (n64) ? -1022 : 0; + else + n64 |= (((stbsp__uint64)1) << 52); + n64 <<= (64 - 56); + if (pr < 15) + n64 += ((((stbsp__uint64)8) << 56) >> (pr * 4)); +// add leading chars + +#ifdef STB_SPRINTF_MSVC_MODE + *s++ = '0'; + *s++ = 'x'; +#else + lead[1 + lead[0]] = '0'; + lead[2 + lead[0]] = 'x'; + lead[0] += 2; +#endif + *s++ = h[(n64 >> 60) & 15]; + n64 <<= 4; + if (pr) + *s++ = stbsp__period; + sn = s; + + // print the bits + n = pr; + if (n > 13) + n = 13; + if (pr > (stbsp__int32)n) + tz = pr - n; + pr = 0; + while (n--) { + *s++ = h[(n64 >> 60) & 15]; + n64 <<= 4; + } + + // print the expo + tail[1] = h[17]; + if (dp < 0) { + tail[2] = '-'; + dp = -dp; + } else + tail[2] = '+'; + n = (dp >= 1000) ? 6 : ((dp >= 100) ? 5 : ((dp >= 10) ? 4 : 3)); + tail[0] = (char)n; + for (;;) { + tail[n] = '0' + dp % 10; + if (n <= 3) + break; + --n; + dp /= 10; + } + + dp = (int)(s - sn); + l = (int)(s - (num + 64)); + s = num + 64; + cs = 1 + (3 << 24); + goto scopy; + + case 'G': // float + case 'g': // float + h = (f[0] == 'G') ? hexu : hex; + fv = va_arg(va, double); + if (pr == -1) + pr = 6; + else if (pr == 0) + pr = 1; // default is 6 + // read the double into a string + if (stbsp__real_to_str(&sn, &l, num, &dp, fv, (pr - 1) | 0x80000000)) + fl |= STBSP__NEGATIVE; + + // clamp the precision and delete extra zeros after clamp + n = pr; + if (l > (stbsp__uint32)pr) + l = pr; + while ((l > 1) && (pr) && (sn[l - 1] == '0')) { + --pr; + --l; + } + + // should we use %e + if ((dp <= -4) || (dp > (stbsp__int32)n)) { + if (pr > (stbsp__int32)l) + pr = l - 1; + else if (pr) + --pr; // when using %e, there is one digit before the decimal + goto doexpfromg; + } + // this is the insane action to get the pr to match %g semantics for %f + if (dp > 0) { + pr = (dp < (stbsp__int32)l) ? l - dp : 0; + } else { + pr = -dp + ((pr > (stbsp__int32)l) ? (stbsp__int32) l : pr); + } + goto dofloatfromg; + + case 'E': // float + case 'e': // float + h = (f[0] == 'E') ? hexu : hex; + fv = va_arg(va, double); + if (pr == -1) + pr = 6; // default is 6 + // read the double into a string + if (stbsp__real_to_str(&sn, &l, num, &dp, fv, pr | 0x80000000)) + fl |= STBSP__NEGATIVE; + doexpfromg: + tail[0] = 0; + stbsp__lead_sign(fl, lead); + if (dp == STBSP__SPECIAL) { + s = (char *)sn; + cs = 0; + pr = 0; + goto scopy; + } + s = num + 64; + // handle leading chars + *s++ = sn[0]; + + if (pr) + *s++ = stbsp__period; + + // handle after decimal + if ((l - 1) > (stbsp__uint32)pr) + l = pr + 1; + for (n = 1; n < l; n++) + *s++ = sn[n]; + // trailing zeros + tz = pr - (l - 1); + pr = 0; + // dump expo + tail[1] = h[0xe]; + dp -= 1; + if (dp < 0) { + tail[2] = '-'; + dp = -dp; + } else + tail[2] = '+'; +#ifdef STB_SPRINTF_MSVC_MODE + n = 5; +#else + n = (dp >= 100) ? 5 : 4; +#endif + tail[0] = (char)n; + for (;;) { + tail[n] = '0' + dp % 10; + if (n <= 3) + break; + --n; + dp /= 10; + } + cs = 1 + (3 << 24); // how many tens + goto flt_lead; + + case 'f': // float + fv = va_arg(va, double); + doafloat: + // do kilos + if (fl & STBSP__METRIC_SUFFIX) { + double divisor; + divisor = 1000.0f; + if (fl & STBSP__METRIC_1024) + divisor = 1024.0; + while (fl < 0x4000000) { + if ((fv < divisor) && (fv > -divisor)) + break; + fv /= divisor; + fl += 0x1000000; + } + } + if (pr == -1) + pr = 6; // default is 6 + // read the double into a string + if (stbsp__real_to_str(&sn, &l, num, &dp, fv, pr)) + fl |= STBSP__NEGATIVE; + dofloatfromg: + tail[0] = 0; + stbsp__lead_sign(fl, lead); + if (dp == STBSP__SPECIAL) { + s = (char *)sn; + cs = 0; + pr = 0; + goto scopy; + } + s = num + 64; + + // handle the three decimal varieties + if (dp <= 0) { + stbsp__int32 i; + // handle 0.000*000xxxx + *s++ = '0'; + if (pr) + *s++ = stbsp__period; + n = -dp; + if ((stbsp__int32)n > pr) + n = pr; + i = n; + while (i) { + if ((((stbsp__uintptr)s) & 3) == 0) + break; + *s++ = '0'; + --i; + } + while (i >= 4) { + *(stbsp__uint32 *)s = 0x30303030; + s += 4; + i -= 4; + } + while (i) { + *s++ = '0'; + --i; + } + if ((stbsp__int32)(l + n) > pr) + l = pr - n; + i = l; + while (i) { + *s++ = *sn++; + --i; + } + tz = pr - (n + l); + cs = 1 + (3 << 24); // how many tens did we write (for commas below) + } else { + cs = (fl & STBSP__TRIPLET_COMMA) ? ((600 - (stbsp__uint32)dp) % 3) : 0; + if ((stbsp__uint32)dp >= l) { + // handle xxxx000*000.0 + n = 0; + for (;;) { + if ((fl & STBSP__TRIPLET_COMMA) && (++cs == 4)) { + cs = 0; + *s++ = stbsp__comma; + } else { + *s++ = sn[n]; + ++n; + if (n >= l) + break; + } + } + if (n < (stbsp__uint32)dp) { + n = dp - n; + if ((fl & STBSP__TRIPLET_COMMA) == 0) { + while (n) { + if ((((stbsp__uintptr)s) & 3) == 0) + break; + *s++ = '0'; + --n; + } + while (n >= 4) { + *(stbsp__uint32 *)s = 0x30303030; + s += 4; + n -= 4; + } + } + while (n) { + if ((fl & STBSP__TRIPLET_COMMA) && (++cs == 4)) { + cs = 0; + *s++ = stbsp__comma; + } else { + *s++ = '0'; + --n; + } + } + } + cs = (int)(s - (num + 64)) + (3 << 24); // cs is how many tens + if (pr) { + *s++ = stbsp__period; + tz = pr; + } + } else { + // handle xxxxx.xxxx000*000 + n = 0; + for (;;) { + if ((fl & STBSP__TRIPLET_COMMA) && (++cs == 4)) { + cs = 0; + *s++ = stbsp__comma; + } else { + *s++ = sn[n]; + ++n; + if (n >= (stbsp__uint32)dp) + break; + } + } + cs = (int)(s - (num + 64)) + (3 << 24); // cs is how many tens + if (pr) + *s++ = stbsp__period; + if ((l - dp) > (stbsp__uint32)pr) + l = pr + dp; + while (n < l) { + *s++ = sn[n]; + ++n; + } + tz = pr - (l - dp); + } + } + pr = 0; + + // handle k,m,g,t + if (fl & STBSP__METRIC_SUFFIX) { + char idx; + idx = 1; + if (fl & STBSP__METRIC_NOSPACE) + idx = 0; + tail[0] = idx; + tail[1] = ' '; + { + if (fl >> 24) { // SI kilo is 'k', JEDEC and SI kibits are 'K'. + if (fl & STBSP__METRIC_1024) + tail[idx + 1] = "_KMGT"[fl >> 24]; + else + tail[idx + 1] = "_kMGT"[fl >> 24]; + idx++; + // If printing kibits and not in jedec, add the 'i'. + if (fl & STBSP__METRIC_1024 && !(fl & STBSP__METRIC_JEDEC)) { + tail[idx + 1] = 'i'; + idx++; + } + tail[0] = idx; + } + } + }; + + flt_lead: + // get the length that we copied + l = (stbsp__uint32)(s - (num + 64)); + s = num + 64; + goto scopy; +#endif + + case 'B': // upper binary + case 'b': // lower binary + h = (f[0] == 'B') ? hexu : hex; + lead[0] = 0; + if (fl & STBSP__LEADING_0X) { + lead[0] = 2; + lead[1] = '0'; + lead[2] = h[0xb]; + } + l = (8 << 4) | (1 << 8); + goto radixnum; + + case 'o': // octal + h = hexu; + lead[0] = 0; + if (fl & STBSP__LEADING_0X) { + lead[0] = 1; + lead[1] = '0'; + } + l = (3 << 4) | (3 << 8); + goto radixnum; + + case 'p': // pointer + fl |= (sizeof(void *) == 8) ? STBSP__INTMAX : 0; + pr = sizeof(void *) * 2; + fl &= ~STBSP__LEADINGZERO; // 'p' only prints the pointer with zeros + // fall through - to X + + case 'X': // upper hex + case 'x': // lower hex + h = (f[0] == 'X') ? hexu : hex; + l = (4 << 4) | (4 << 8); + lead[0] = 0; + if (fl & STBSP__LEADING_0X) { + lead[0] = 2; + lead[1] = '0'; + lead[2] = h[16]; + } + radixnum: + // get the number + if (fl & STBSP__INTMAX) + n64 = va_arg(va, stbsp__uint64); + else + n64 = va_arg(va, stbsp__uint32); + + s = num + STBSP__NUMSZ; + dp = 0; + // clear tail, and clear leading if value is zero + tail[0] = 0; + if (n64 == 0) { + lead[0] = 0; + if (pr == 0) { + l = 0; + cs = 0; + goto scopy; + } + } + // convert to string + for (;;) { + *--s = h[n64 & ((1 << (l >> 8)) - 1)]; + n64 >>= (l >> 8); + if (!((n64) || ((stbsp__int32)((num + STBSP__NUMSZ) - s) < pr))) + break; + if (fl & STBSP__TRIPLET_COMMA) { + ++l; + if ((l & 15) == ((l >> 4) & 15)) { + l &= ~15; + *--s = stbsp__comma; + } + } + }; + // get the tens and the comma pos + cs = (stbsp__uint32)((num + STBSP__NUMSZ) - s) + ((((l >> 4) & 15)) << 24); + // get the length that we copied + l = (stbsp__uint32)((num + STBSP__NUMSZ) - s); + // copy it + goto scopy; + + case 'u': // unsigned + case 'i': + case 'd': // integer + // get the integer and abs it + if (fl & STBSP__INTMAX) { + stbsp__int64 i64 = va_arg(va, stbsp__int64); + n64 = (stbsp__uint64)i64; + if ((f[0] != 'u') && (i64 < 0)) { + n64 = (stbsp__uint64)-i64; + fl |= STBSP__NEGATIVE; + } + } else { + stbsp__int32 i = va_arg(va, stbsp__int32); + n64 = (stbsp__uint32)i; + if ((f[0] != 'u') && (i < 0)) { + n64 = (stbsp__uint32)-i; + fl |= STBSP__NEGATIVE; + } + } + +#ifndef STB_SPRINTF_NOFLOAT + if (fl & STBSP__METRIC_SUFFIX) { + if (n64 < 1024) + pr = 0; + else if (pr == -1) + pr = 1; + fv = (double)(stbsp__int64)n64; + goto doafloat; + } +#endif + + // convert to string + s = num + STBSP__NUMSZ; + l = 0; + + for (;;) { + // do in 32-bit chunks (avoid lots of 64-bit divides even with constant denominators) + char *o = s - 8; + if (n64 >= 100000000) { + n = (stbsp__uint32)(n64 % 100000000); + n64 /= 100000000; + } else { + n = (stbsp__uint32)n64; + n64 = 0; + } + if ((fl & STBSP__TRIPLET_COMMA) == 0) { + do { + s -= 2; + *(stbsp__uint16 *)s = *(stbsp__uint16 *)&stbsp__digitpair.pair[(n % 100) * 2]; + n /= 100; + } while (n); + } + while (n) { + if ((fl & STBSP__TRIPLET_COMMA) && (l++ == 3)) { + l = 0; + *--s = stbsp__comma; + --o; + } else { + *--s = (char)(n % 10) + '0'; + n /= 10; + } + } + if (n64 == 0) { + if ((s[0] == '0') && (s != (num + STBSP__NUMSZ))) + ++s; + break; + } + while (s != o) + if ((fl & STBSP__TRIPLET_COMMA) && (l++ == 3)) { + l = 0; + *--s = stbsp__comma; + --o; + } else { + *--s = '0'; + } + } + + tail[0] = 0; + stbsp__lead_sign(fl, lead); + + // get the length that we copied + l = (stbsp__uint32)((num + STBSP__NUMSZ) - s); + if (l == 0) { + *--s = '0'; + l = 1; + } + cs = l + (3 << 24); + if (pr < 0) + pr = 0; + + scopy: + // get fw=leading/trailing space, pr=leading zeros + if (pr < (stbsp__int32)l) + pr = l; + n = pr + lead[0] + tail[0] + tz; + if (fw < (stbsp__int32)n) + fw = n; + fw -= n; + pr -= l; + + // handle right justify and leading zeros + if ((fl & STBSP__LEFTJUST) == 0) { + if (fl & STBSP__LEADINGZERO) // if leading zeros, everything is in pr + { + pr = (fw > pr) ? fw : pr; + fw = 0; + } else { + fl &= ~STBSP__TRIPLET_COMMA; // if no leading zeros, then no commas + } + } + + // copy the spaces and/or zeros + if (fw + pr) { + stbsp__int32 i; + stbsp__uint32 c; + + // copy leading spaces (or when doing %8.4d stuff) + if ((fl & STBSP__LEFTJUST) == 0) + while (fw > 0) { + stbsp__cb_buf_clamp(i, fw); + fw -= i; + while (i) { + if ((((stbsp__uintptr)bf) & 3) == 0) + break; + *bf++ = ' '; + --i; + } + while (i >= 4) { + *(stbsp__uint32 *)bf = 0x20202020; + bf += 4; + i -= 4; + } + while (i) { + *bf++ = ' '; + --i; + } + stbsp__chk_cb_buf(1); + } + + // copy leader + sn = lead + 1; + while (lead[0]) { + stbsp__cb_buf_clamp(i, lead[0]); + lead[0] -= (char)i; + while (i) { + *bf++ = *sn++; + --i; + } + stbsp__chk_cb_buf(1); + } + + // copy leading zeros + c = cs >> 24; + cs &= 0xffffff; + cs = (fl & STBSP__TRIPLET_COMMA) ? ((stbsp__uint32)(c - ((pr + cs) % (c + 1)))) : 0; + while (pr > 0) { + stbsp__cb_buf_clamp(i, pr); + pr -= i; + if ((fl & STBSP__TRIPLET_COMMA) == 0) { + while (i) { + if ((((stbsp__uintptr)bf) & 3) == 0) + break; + *bf++ = '0'; + --i; + } + while (i >= 4) { + *(stbsp__uint32 *)bf = 0x30303030; + bf += 4; + i -= 4; + } + } + while (i) { + if ((fl & STBSP__TRIPLET_COMMA) && (cs++ == c)) { + cs = 0; + *bf++ = stbsp__comma; + } else + *bf++ = '0'; + --i; + } + stbsp__chk_cb_buf(1); + } + } + + // copy leader if there is still one + sn = lead + 1; + while (lead[0]) { + stbsp__int32 i; + stbsp__cb_buf_clamp(i, lead[0]); + lead[0] -= (char)i; + while (i) { + *bf++ = *sn++; + --i; + } + stbsp__chk_cb_buf(1); + } + + // copy the string + n = l; + while (n) { + stbsp__int32 i; + stbsp__cb_buf_clamp(i, n); + n -= i; + STBSP__UNALIGNED(while (i >= 4) { + *(stbsp__uint32 volatile *)bf = *(stbsp__uint32 volatile *)s; + bf += 4; + s += 4; + i -= 4; + }) + while (i) { + *bf++ = *s++; + --i; + } + stbsp__chk_cb_buf(1); + } + + // copy trailing zeros + while (tz) { + stbsp__int32 i; + stbsp__cb_buf_clamp(i, tz); + tz -= i; + while (i) { + if ((((stbsp__uintptr)bf) & 3) == 0) + break; + *bf++ = '0'; + --i; + } + while (i >= 4) { + *(stbsp__uint32 *)bf = 0x30303030; + bf += 4; + i -= 4; + } + while (i) { + *bf++ = '0'; + --i; + } + stbsp__chk_cb_buf(1); + } + + // copy tail if there is one + sn = tail + 1; + while (tail[0]) { + stbsp__int32 i; + stbsp__cb_buf_clamp(i, tail[0]); + tail[0] -= (char)i; + while (i) { + *bf++ = *sn++; + --i; + } + stbsp__chk_cb_buf(1); + } + + // handle the left justify + if (fl & STBSP__LEFTJUST) + if (fw > 0) { + while (fw) { + stbsp__int32 i; + stbsp__cb_buf_clamp(i, fw); + fw -= i; + while (i) { + if ((((stbsp__uintptr)bf) & 3) == 0) + break; + *bf++ = ' '; + --i; + } + while (i >= 4) { + *(stbsp__uint32 *)bf = 0x20202020; + bf += 4; + i -= 4; + } + while (i--) + *bf++ = ' '; + stbsp__chk_cb_buf(1); + } + } + break; + + default: // unknown, just copy code + s = num + STBSP__NUMSZ - 1; + *s = f[0]; + l = 1; + fw = fl = 0; + lead[0] = 0; + tail[0] = 0; + pr = 0; + dp = 0; + cs = 0; + goto scopy; + } + ++f; + } +endfmt: + + if (!callback) + *bf = 0; + else + stbsp__flush_cb(); + +done: + return tlen + (int)(bf - buf); +} + +// cleanup +#undef STBSP__LEFTJUST +#undef STBSP__LEADINGPLUS +#undef STBSP__LEADINGSPACE +#undef STBSP__LEADING_0X +#undef STBSP__LEADINGZERO +#undef STBSP__INTMAX +#undef STBSP__TRIPLET_COMMA +#undef STBSP__NEGATIVE +#undef STBSP__METRIC_SUFFIX +#undef STBSP__NUMSZ +#undef stbsp__chk_cb_bufL +#undef stbsp__chk_cb_buf +#undef stbsp__flush_cb +#undef stbsp__cb_buf_clamp + +// ============================================================================ +// wrapper functions + +STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(sprintf)(char *buf, char const *fmt, ...) +{ + int result; + va_list va; + va_start(va, fmt); + result = STB_SPRINTF_DECORATE(vsprintfcb)(0, 0, buf, fmt, va); + va_end(va); + return result; +} + +typedef struct stbsp__context { + char *buf; + int count; + int length; + char tmp[STB_SPRINTF_MIN]; +} stbsp__context; + +static char *stbsp__clamp_callback(const char *buf, void *user, int len) +{ + stbsp__context *c = (stbsp__context *)user; + c->length += len; + + if (len > c->count) + len = c->count; + + if (len) { + if (buf != c->buf) { + const char *s, *se; + char *d; + d = c->buf; + s = buf; + se = buf + len; + do { + *d++ = *s++; + } while (s < se); + } + c->buf += len; + c->count -= len; + } + + if (c->count <= 0) + return c->tmp; + return (c->count >= STB_SPRINTF_MIN) ? c->buf : c->tmp; // go direct into buffer if you can +} + +static char * stbsp__count_clamp_callback( const char * buf, void * user, int len ) +{ + stbsp__context * c = (stbsp__context*)user; + (void) sizeof(buf); + + c->length += len; + return c->tmp; // go direct into buffer if you can +} + +STBSP__PUBLICDEF int STB_SPRINTF_DECORATE( vsnprintf )( char * buf, int count, char const * fmt, va_list va ) +{ + stbsp__context c; + + if ( (count == 0) && !buf ) + { + c.length = 0; + + STB_SPRINTF_DECORATE( vsprintfcb )( stbsp__count_clamp_callback, &c, c.tmp, fmt, va ); + } + else + { + int l; + + c.buf = buf; + c.count = count; + c.length = 0; + + STB_SPRINTF_DECORATE( vsprintfcb )( stbsp__clamp_callback, &c, stbsp__clamp_callback(0,&c,0), fmt, va ); + + // zero-terminate + l = (int)( c.buf - buf ); + if ( l >= count ) // should never be greater, only equal (or less) than count + l = count - 1; + buf[l] = 0; + } + + return c.length; +} + +STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(snprintf)(char *buf, int count, char const *fmt, ...) +{ + int result; + va_list va; + va_start(va, fmt); + + result = STB_SPRINTF_DECORATE(vsnprintf)(buf, count, fmt, va); + va_end(va); + + return result; +} + +STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(vsprintf)(char *buf, char const *fmt, va_list va) +{ + return STB_SPRINTF_DECORATE(vsprintfcb)(0, 0, buf, fmt, va); +} + +// ======================================================================= +// low level float utility functions + +#ifndef STB_SPRINTF_NOFLOAT + +// copies d to bits w/ strict aliasing (this compiles to nothing on /Ox) +#define STBSP__COPYFP(dest, src) \ + { \ + int cn; \ + for (cn = 0; cn < 8; cn++) \ + ((char *)&dest)[cn] = ((char *)&src)[cn]; \ + } + +// get float info +static stbsp__int32 stbsp__real_to_parts(stbsp__int64 *bits, stbsp__int32 *expo, double value) +{ + double d; + stbsp__int64 b = 0; + + // load value and round at the frac_digits + d = value; + + STBSP__COPYFP(b, d); + + *bits = b & ((((stbsp__uint64)1) << 52) - 1); + *expo = (stbsp__int32)(((b >> 52) & 2047) - 1023); + + return (stbsp__int32)((stbsp__uint64) b >> 63); +} + +static double const stbsp__bot[23] = { + 1e+000, 1e+001, 1e+002, 1e+003, 1e+004, 1e+005, 1e+006, 1e+007, 1e+008, 1e+009, 1e+010, 1e+011, + 1e+012, 1e+013, 1e+014, 1e+015, 1e+016, 1e+017, 1e+018, 1e+019, 1e+020, 1e+021, 1e+022 +}; +static double const stbsp__negbot[22] = { + 1e-001, 1e-002, 1e-003, 1e-004, 1e-005, 1e-006, 1e-007, 1e-008, 1e-009, 1e-010, 1e-011, + 1e-012, 1e-013, 1e-014, 1e-015, 1e-016, 1e-017, 1e-018, 1e-019, 1e-020, 1e-021, 1e-022 +}; +static double const stbsp__negboterr[22] = { + -5.551115123125783e-018, -2.0816681711721684e-019, -2.0816681711721686e-020, -4.7921736023859299e-021, -8.1803053914031305e-022, 4.5251888174113741e-023, + 4.5251888174113739e-024, -2.0922560830128471e-025, -6.2281591457779853e-026, -3.6432197315497743e-027, 6.0503030718060191e-028, 2.0113352370744385e-029, + -3.0373745563400371e-030, 1.1806906454401013e-032, -7.7705399876661076e-032, 2.0902213275965398e-033, -7.1542424054621921e-034, -7.1542424054621926e-035, + 2.4754073164739869e-036, 5.4846728545790429e-037, 9.2462547772103625e-038, -4.8596774326570872e-039 +}; +static double const stbsp__top[13] = { + 1e+023, 1e+046, 1e+069, 1e+092, 1e+115, 1e+138, 1e+161, 1e+184, 1e+207, 1e+230, 1e+253, 1e+276, 1e+299 +}; +static double const stbsp__negtop[13] = { + 1e-023, 1e-046, 1e-069, 1e-092, 1e-115, 1e-138, 1e-161, 1e-184, 1e-207, 1e-230, 1e-253, 1e-276, 1e-299 +}; +static double const stbsp__toperr[13] = { + 8388608, + 6.8601809640529717e+028, + -7.253143638152921e+052, + -4.3377296974619174e+075, + -1.5559416129466825e+098, + -3.2841562489204913e+121, + -3.7745893248228135e+144, + -1.7356668416969134e+167, + -3.8893577551088374e+190, + -9.9566444326005119e+213, + 6.3641293062232429e+236, + -5.2069140800249813e+259, + -5.2504760255204387e+282 +}; +static double const stbsp__negtoperr[13] = { + 3.9565301985100693e-040, -2.299904345391321e-063, 3.6506201437945798e-086, 1.1875228833981544e-109, + -5.0644902316928607e-132, -6.7156837247865426e-155, -2.812077463003139e-178, -5.7778912386589953e-201, + 7.4997100559334532e-224, -4.6439668915134491e-247, -6.3691100762962136e-270, -9.436808465446358e-293, + 8.0970921678014997e-317 +}; + +#if defined(_MSC_VER) && (_MSC_VER <= 1200) +static stbsp__uint64 const stbsp__powten[20] = { + 1, + 10, + 100, + 1000, + 10000, + 100000, + 1000000, + 10000000, + 100000000, + 1000000000, + 10000000000, + 100000000000, + 1000000000000, + 10000000000000, + 100000000000000, + 1000000000000000, + 10000000000000000, + 100000000000000000, + 1000000000000000000, + 10000000000000000000U +}; +#define stbsp__tento19th ((stbsp__uint64)1000000000000000000) +#else +static stbsp__uint64 const stbsp__powten[20] = { + 1, + 10, + 100, + 1000, + 10000, + 100000, + 1000000, + 10000000, + 100000000, + 1000000000, + 10000000000ULL, + 100000000000ULL, + 1000000000000ULL, + 10000000000000ULL, + 100000000000000ULL, + 1000000000000000ULL, + 10000000000000000ULL, + 100000000000000000ULL, + 1000000000000000000ULL, + 10000000000000000000ULL +}; +#define stbsp__tento19th (1000000000000000000ULL) +#endif + +#define stbsp__ddmulthi(oh, ol, xh, yh) \ + { \ + double ahi = 0, alo, bhi = 0, blo; \ + stbsp__int64 bt; \ + oh = xh * yh; \ + STBSP__COPYFP(bt, xh); \ + bt &= ((~(stbsp__uint64)0) << 27); \ + STBSP__COPYFP(ahi, bt); \ + alo = xh - ahi; \ + STBSP__COPYFP(bt, yh); \ + bt &= ((~(stbsp__uint64)0) << 27); \ + STBSP__COPYFP(bhi, bt); \ + blo = yh - bhi; \ + ol = ((ahi * bhi - oh) + ahi * blo + alo * bhi) + alo * blo; \ + } + +#define stbsp__ddtoS64(ob, xh, xl) \ + { \ + double ahi = 0, alo, vh, t; \ + ob = (stbsp__int64)xh; \ + vh = (double)ob; \ + ahi = (xh - vh); \ + t = (ahi - xh); \ + alo = (xh - (ahi - t)) - (vh + t); \ + ob += (stbsp__int64)(ahi + alo + xl); \ + } + +#define stbsp__ddrenorm(oh, ol) \ + { \ + double s; \ + s = oh + ol; \ + ol = ol - (s - oh); \ + oh = s; \ + } + +#define stbsp__ddmultlo(oh, ol, xh, xl, yh, yl) ol = ol + (xh * yl + xl * yh); + +#define stbsp__ddmultlos(oh, ol, xh, yl) ol = ol + (xh * yl); + +static void stbsp__raise_to_power10(double *ohi, double *olo, double d, stbsp__int32 power) // power can be -323 to +350 +{ + double ph, pl; + if ((power >= 0) && (power <= 22)) { + stbsp__ddmulthi(ph, pl, d, stbsp__bot[power]); + } else { + stbsp__int32 e, et, eb; + double p2h, p2l; + + e = power; + if (power < 0) + e = -e; + et = (e * 0x2c9) >> 14; /* %23 */ + if (et > 13) + et = 13; + eb = e - (et * 23); + + ph = d; + pl = 0.0; + if (power < 0) { + if (eb) { + --eb; + stbsp__ddmulthi(ph, pl, d, stbsp__negbot[eb]); + stbsp__ddmultlos(ph, pl, d, stbsp__negboterr[eb]); + } + if (et) { + stbsp__ddrenorm(ph, pl); + --et; + stbsp__ddmulthi(p2h, p2l, ph, stbsp__negtop[et]); + stbsp__ddmultlo(p2h, p2l, ph, pl, stbsp__negtop[et], stbsp__negtoperr[et]); + ph = p2h; + pl = p2l; + } + } else { + if (eb) { + e = eb; + if (eb > 22) + eb = 22; + e -= eb; + stbsp__ddmulthi(ph, pl, d, stbsp__bot[eb]); + if (e) { + stbsp__ddrenorm(ph, pl); + stbsp__ddmulthi(p2h, p2l, ph, stbsp__bot[e]); + stbsp__ddmultlos(p2h, p2l, stbsp__bot[e], pl); + ph = p2h; + pl = p2l; + } + } + if (et) { + stbsp__ddrenorm(ph, pl); + --et; + stbsp__ddmulthi(p2h, p2l, ph, stbsp__top[et]); + stbsp__ddmultlo(p2h, p2l, ph, pl, stbsp__top[et], stbsp__toperr[et]); + ph = p2h; + pl = p2l; + } + } + } + stbsp__ddrenorm(ph, pl); + *ohi = ph; + *olo = pl; +} + +// given a float value, returns the significant bits in bits, and the position of the +// decimal point in decimal_pos. +/-INF and NAN are specified by special values +// returned in the decimal_pos parameter. +// frac_digits is absolute normally, but if you want from first significant digits (got %g and %e), or in 0x80000000 +static stbsp__int32 stbsp__real_to_str(char const **start, stbsp__uint32 *len, char *out, stbsp__int32 *decimal_pos, double value, stbsp__uint32 frac_digits) +{ + double d; + stbsp__int64 bits = 0; + stbsp__int32 expo, e, ng, tens; + + d = value; + STBSP__COPYFP(bits, d); + expo = (stbsp__int32)((bits >> 52) & 2047); + ng = (stbsp__int32)((stbsp__uint64) bits >> 63); + if (ng) + d = -d; + + if (expo == 2047) // is nan or inf? + { + *start = (bits & ((((stbsp__uint64)1) << 52) - 1)) ? "NaN" : "Inf"; + *decimal_pos = STBSP__SPECIAL; + *len = 3; + return ng; + } + + if (expo == 0) // is zero or denormal + { + if (((stbsp__uint64) bits << 1) == 0) // do zero + { + *decimal_pos = 1; + *start = out; + out[0] = '0'; + *len = 1; + return ng; + } + // find the right expo for denormals + { + stbsp__int64 v = ((stbsp__uint64)1) << 51; + while ((bits & v) == 0) { + --expo; + v >>= 1; + } + } + } + + // find the decimal exponent as well as the decimal bits of the value + { + double ph, pl; + + // log10 estimate - very specifically tweaked to hit or undershoot by no more than 1 of log10 of all expos 1..2046 + tens = expo - 1023; + tens = (tens < 0) ? ((tens * 617) / 2048) : (((tens * 1233) / 4096) + 1); + + // move the significant bits into position and stick them into an int + stbsp__raise_to_power10(&ph, &pl, d, 18 - tens); + + // get full as much precision from double-double as possible + stbsp__ddtoS64(bits, ph, pl); + + // check if we undershot + if (((stbsp__uint64)bits) >= stbsp__tento19th) + ++tens; + } + + // now do the rounding in integer land + frac_digits = (frac_digits & 0x80000000) ? ((frac_digits & 0x7ffffff) + 1) : (tens + frac_digits); + if ((frac_digits < 24)) { + stbsp__uint32 dg = 1; + if ((stbsp__uint64)bits >= stbsp__powten[9]) + dg = 10; + while ((stbsp__uint64)bits >= stbsp__powten[dg]) { + ++dg; + if (dg == 20) + goto noround; + } + if (frac_digits < dg) { + stbsp__uint64 r; + // add 0.5 at the right position and round + e = dg - frac_digits; + if ((stbsp__uint32)e >= 24) + goto noround; + r = stbsp__powten[e]; + bits = bits + (r / 2); + if ((stbsp__uint64)bits >= stbsp__powten[dg]) + ++tens; + bits /= r; + } + noround:; + } + + // kill long trailing runs of zeros + if (bits) { + stbsp__uint32 n; + for (;;) { + if (bits <= 0xffffffff) + break; + if (bits % 1000) + goto donez; + bits /= 1000; + } + n = (stbsp__uint32)bits; + while ((n % 1000) == 0) + n /= 1000; + bits = n; + donez:; + } + + // convert to string + out += 64; + e = 0; + for (;;) { + stbsp__uint32 n; + char *o = out - 8; + // do the conversion in chunks of U32s (avoid most 64-bit divides, worth it, constant denomiators be damned) + if (bits >= 100000000) { + n = (stbsp__uint32)(bits % 100000000); + bits /= 100000000; + } else { + n = (stbsp__uint32)bits; + bits = 0; + } + while (n) { + out -= 2; + *(stbsp__uint16 *)out = *(stbsp__uint16 *)&stbsp__digitpair.pair[(n % 100) * 2]; + n /= 100; + e += 2; + } + if (bits == 0) { + if ((e) && (out[0] == '0')) { + ++out; + --e; + } + break; + } + while (out != o) { + *--out = '0'; + ++e; + } + } + + *decimal_pos = tens; + *start = out; + *len = e; + return ng; +} + +#undef stbsp__ddmulthi +#undef stbsp__ddrenorm +#undef stbsp__ddmultlo +#undef stbsp__ddmultlos +#undef STBSP__SPECIAL +#undef STBSP__COPYFP + +#endif // STB_SPRINTF_NOFLOAT + +// clean up +#undef stbsp__uint16 +#undef stbsp__uint32 +#undef stbsp__int32 +#undef stbsp__uint64 +#undef stbsp__int64 +#undef STBSP__UNALIGNED + +#endif // STB_SPRINTF_IMPLEMENTATION + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Sean Barrett +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ diff --git a/symmetry/symmetry.cpp b/gemmi_gph/symmetry.cpp similarity index 100% rename from symmetry/symmetry.cpp rename to gemmi_gph/symmetry.cpp diff --git a/gemmi_gph/xds_ascii.cpp b/gemmi_gph/xds_ascii.cpp new file mode 100644 index 00000000..373a1480 --- /dev/null +++ b/gemmi_gph/xds_ascii.cpp @@ -0,0 +1,306 @@ +// Copyright 2023 Global Phasing Ltd. + +#include +#include // for fast_from_chars +#include // for skip_blank, read_word +#include // for trim_str +#include +#include + +namespace gemmi { + +void XdsAscii::gather_iset_statistics() { + for (Iset& iset : isets) { + iset.frame_number_min = INT_MAX; + iset.frame_number_max = 0; + for (const XdsAscii::Refl& refl : data) + if (refl.iset == iset.id) { + ++iset.reflection_count; + int frame = refl.frame(); + iset.frame_number_min = std::min(iset.frame_number_min, frame); + iset.frame_number_max = std::max(iset.frame_number_max, frame); + } + if (iset.frame_number_min > iset.frame_number_max) + continue; + std::vector frames(iset.frame_number_max - iset.frame_number_min + 1); + for (const XdsAscii::Refl& refl : data) + if (refl.iset == iset.id) + frames[refl.frame() - iset.frame_number_min] = 1; + iset.frame_count = 0; + for (uint8_t f : frames) + iset.frame_count += f; + } +} + +/// Based on Phil Evans' notes and the literature, see: +/// https://github.com/project-gemmi/gemmi/discussions/248 +/// \par p is defined as in XDS (p=0.5 for unpolarized beam). +void XdsAscii::apply_polarization_correction(double p, Vec3 normal) { + if (!has_cell_axes()) + fail("unknown unit cell axes"); + Mat33 UB = cell_axes.inverse(); + Vec3 rot_axis = get_rotation_axis(); + Vec3 s0_dir = get_s0_direction(); + normal = normal.normalized(); + // The polarization normal is expected to be approx. orthogonal to the beam. + // dot() is the same as cos_angle() for normalized vectors. + if (normal.dot(s0_dir) > std::cos(rad(5.0))) + fail("polarization normal is far from orthogonal to the incident beam"); + // make normal exactly orthogonal to the beam + normal = s0_dir.cross(normal).cross(s0_dir).normalized(); + // wavevector + Vec3 s0 = s0_dir / wavelength; + double s0_m2 = 1. / s0.length_sq(); // s0^-2 + + for (Refl& refl : data) { + double phi = rad(rot_angle(refl)); + Vec3 h(refl.hkl[0], refl.hkl[1], refl.hkl[2]); + Vec3 r0 = UB.multiply(h); + Vec3 r = rotate_about_axis(r0, rot_axis, phi); + Vec3 s = s0 + r; +#if 0 + double two_theta = s0.angle(s); + // 2d sin(theta) = lambda + double bragg_angle = std::asin(wavelength / (2 * unit_cell.calculate_d(refl.hkl))); + printf("(%d %d %d) two-theta %g %g\n", + refl.hkl[0], refl.hkl[1], refl.hkl[2], deg(two_theta), deg(2 * bragg_angle)); +#endif + // we should have |s| == |s0|, but just in case calculate it separately + double s_m2 = 1. / s.length_sq(); + // 1 + cos^2(2theta) = 2 * correction for unpolarized beam + double t = 1 + sq(s.dot(s0)) * s_m2 * s0_m2; + double polariz_factor = (1 - 2*p) * (1 - sq(normal.dot(s)) * s_m2) + p * t; + // We assume that the XDS files has polarization correction applied, + // but for non-polarized beam. So we multiply intensities by P0=t/2 + // and divide by a hopefully more accurate polarization factor. + double mult = 0.5 * t / polariz_factor; + refl.iobs *= mult; + refl.sigma *= mult; + refl.rlp *= mult; + } +} + +namespace { + +template +bool starts_with_ptr(const char* a, const char (&b)[N], const char** endptr) { + if (std::strncmp(a, b, N-1) != 0) + return false; + *endptr = a + N - 1; + return true; +} + +template +bool starts_with_ptr_b(const char* a, const char (&b)[N], const char** endptr) { + return starts_with_ptr(skip_blank(a), b, endptr); +} + +inline const char* parse_number_into(const char* start, const char* end, + double& val, const char* line) { + auto result = fast_from_chars(start, end, val); + if (result.ec != std::errc()) + fail("failed to parse a number in:\n", line); + return result.ptr; +} + +template +void parse_numbers_into_array(const char* start, const char* end, + double (&arr)[N], const char* line) { + for (double& val : arr) + start = parse_number_into(start, end, val, line); +} + +template +void parse_numbers_into_array(const char* start, const char* end, + std::array& arr, const char* line) { + for (double& val : arr) + start = parse_number_into(start, end, val, line); +} + +void parse_numbers_into_vec3(const char* start, const char* end, + Vec3& vec, const char* line) { + for (double* val : {&vec.x, &vec.y, &vec.z}) + start = parse_number_into(start, end, *val, line); +} + + +} // anonymous namespace + +void XdsAscii::read_stream(AnyStream& line_reader, const std::string& source) { + source_path = source; + read_columns = 12; + char line[256]; + size_t len0 = line_reader.copy_line(line, 255); + if (len0 == 0) + fail("empty file"); + int iset_col = 0; + const char xds_ascii_header[] = "!FORMAT=XDS_ASCII MERGE="; + char xds_ascii_type = '\0'; + if (starts_with(line, xds_ascii_header)) { + size_t n = sizeof(xds_ascii_header)-1; + xds_ascii_type = line[n]; + // !FORMAT=XDS_ASCII MERGE=FALSE FRIEDEL'S_LAW= + if (strncmp(line + n + 5, " FRIEDEL'S_LAW=", 18) == 0) + friedels_law = line[50]; + } + if (!xds_ascii_type && !starts_with(line, "!OUTPUT_FILE=INTEGRATE.HKL")) + fail("not an XDS_ASCII nor INTEGRATE.HKL file: " + source_path); + const char* rhs; + while (size_t len = line_reader.copy_line(line, 255)) { + if (line[0] == '!') { + if (starts_with_ptr(line+1, "Generated by ", &rhs)) { + generated_by = read_word(rhs, &rhs); + version_str = trim_str(rhs); + } else if (starts_with_ptr(line+1, "SPACE_GROUP_NUMBER=", &rhs)) { + spacegroup_number = simple_atoi(rhs); + } else if (starts_with_ptr(line+1, "UNIT_CELL_", &rhs)) { + if (starts_with_ptr(rhs, "CONSTANTS=", &rhs)) { // UNIT_CELL_CONSTANTS= + parse_numbers_into_array(rhs, line+len, cell_constants, line); + } else if (starts_with_ptr(rhs, "A-AXIS=", &rhs)) { // UNIT_CELL_A-AXIS= + parse_numbers_into_array(rhs, line+len, cell_axes.a[0], line); + } else if (starts_with_ptr(rhs, "B-AXIS=", &rhs)) { // UNIT_CELL_B-AXIS= + parse_numbers_into_array(rhs, line+len, cell_axes.a[1], line); + } else if (starts_with_ptr(rhs, "C-AXIS=", &rhs)) { // UNIT_CELL_C-AXIS= + parse_numbers_into_array(rhs, line+len, cell_axes.a[2], line); + } + } else if (starts_with_ptr(line+1, "REFLECTING_RANGE_E.S.D.=", &rhs)) { + auto result = fast_from_chars(rhs, line+len, reflecting_range_esd); + if (result.ec != std::errc()) + fail("failed to parse mosaicity:\n", line); + } else if (starts_with_ptr(line+1, "X-RAY_WAVELENGTH=", &rhs)) { + auto result = fast_from_chars(rhs, line+len, wavelength); + if (result.ec != std::errc()) + fail("failed to parse wavelength:\n", line); + } else if (starts_with_ptr(line+1, "INCIDENT_BEAM_DIRECTION=", &rhs)) { + parse_numbers_into_vec3(rhs, line+len, incident_beam_dir, line); + } else if (starts_with_ptr(line+1, "OSCILLATION_RANGE=", &rhs)) { + auto result = fast_from_chars(rhs, line+len, oscillation_range); + if (result.ec != std::errc()) + fail("failed to parse:\n", line); + } else if (starts_with_ptr(line+1, "ROTATION_AXIS=", &rhs)) { + parse_numbers_into_vec3(rhs, line+len, rotation_axis, line); + } else if (starts_with_ptr(line+1, "STARTING_ANGLE=", &rhs)) { + auto result = fast_from_chars(rhs, line+len, starting_angle); + if (result.ec != std::errc()) + fail("failed to parse:\n", line); + } else if (starts_with_ptr(line+1, "STARTING_FRAME=", &rhs)) { + starting_frame = simple_atoi(rhs); + } else if (starts_with_ptr(line+1, " ISET= ", &rhs)) { + const char* endptr; + int id = simple_atoi(rhs, &endptr); + XdsAscii::Iset& iset = find_or_add_iset(id); + endptr = skip_blank(endptr); + if (starts_with_ptr(endptr, "INPUT_FILE=", &rhs)) { + iset.input_file = read_word(rhs); + } else if (starts_with_ptr(endptr, "X-RAY_WAVELENGTH=", &rhs)) { + double w; + auto result = fast_from_chars(rhs, line+len, w); + if (result.ec != std::errc()) + fail("failed to parse iset wavelength:\n", line); + iset.wavelength = w; + } else if (starts_with_ptr(endptr, "UNIT_CELL_CONSTANTS=", &rhs)) { + parse_numbers_into_array(rhs, line+len, iset.cell_constants, line); + } + } else if (starts_with_ptr(line+1, "NX=", &rhs)) { + const char* endptr; + nx = simple_atoi(rhs, &endptr); + if (starts_with_ptr_b(endptr, "NY=", &rhs)) + ny = simple_atoi(rhs, &endptr); + if (starts_with_ptr_b(endptr, "QX=", &rhs)) + endptr = parse_number_into(rhs, line+len, qx, line); + if (starts_with_ptr_b(endptr, "QY=", &rhs)) + parse_number_into(rhs, line+len, qy, line); + } else if (starts_with_ptr(line+1, "ORGX=", &rhs)) { + const char* endptr = parse_number_into(rhs, line+len, orgx, line); + if (starts_with_ptr_b(endptr, "ORGY=", &rhs)) + endptr = parse_number_into(rhs, line+len, orgy, line); + if (starts_with_ptr_b(endptr, "DETECTOR_DISTANCE=", &rhs)) + parse_number_into(rhs, line+len, detector_distance, line); + } else if (starts_with_ptr(line+1, "NUMBER_OF_ITEMS_IN_EACH_DATA_RECORD=", &rhs)) { + int num = simple_atoi(rhs); + // INTEGRATE.HKL has read_columns=12, as set above + if (xds_ascii_type == 'T') // merged file + read_columns = 5; + else if (generated_by == "XSCALE") + read_columns = 8; + else if (generated_by == "CORRECT") + read_columns = 11; + // check if the columns are what they always are + if (num < read_columns) + fail("expected ", std::to_string(read_columns), "+ columns, got:\n", line); + if (generated_by == "INTEGRATE") { + line_reader.copy_line(line, 52); + if (!starts_with(line, "!H,K,L,IOBS,SIGMA,XCAL,YCAL,ZCAL,RLP,PEAK,CORR,MAXC")) + fail("unexpected column order in INTEGRATE.HKL"); + } else { + const char* expected_columns[12] = { + "H=1", "K=2", "L=3", "IOBS=4", "SIGMA(IOBS)=5", + "XD=6", "YD=7", "ZD=8", "RLP=9", "PEAK=10", "CORR=11", "MAXC=12" + }; + for (int i = 0; i < read_columns; ++i) { + const char* col = expected_columns[i]; + line_reader.copy_line(line, 42); + if (std::strncmp(line, "!ITEM_", 6) != 0 || + std::strncmp(line+6, col, std::strlen(col)) != 0) + fail("column !ITEM_" + std::string(col), " not found."); + } + } + } else if (starts_with_ptr(line+1, "ITEM_ISET=", &rhs)) { + iset_col = simple_atoi(rhs); + } else if (starts_with(line+1, "END_OF_DATA")) { + if (isets.empty()) { + isets.emplace_back(1); + isets.back().wavelength = wavelength; + } + for (XdsAscii::Refl& refl : data) + if (size_t(refl.iset - 1) >= isets.size()) + fail("unexpected ITEM_ISET " + std::to_string(refl.iset)); + return; + } + } else { + data.emplace_back(); + XdsAscii::Refl& r = data.back(); + const char* p = line; + for (int i = 0; i < 3; ++i) + r.hkl[i] = simple_atoi(p, &p); + auto result = fast_from_chars(p, line+len, r.iobs); // 4 + result = fast_from_chars(result.ptr, line+len, r.sigma); // 5 + if (read_columns >= 8) { + result = fast_from_chars(result.ptr, line+len, r.xd); // 6 + result = fast_from_chars(result.ptr, line+len, r.yd); // 7 + result = fast_from_chars(result.ptr, line+len, r.zd); // 8 + if (read_columns >= 11) { + result = fast_from_chars(result.ptr, line+len, r.rlp); // 9 + result = fast_from_chars(result.ptr, line+len, r.peak); // 10 + result = fast_from_chars(result.ptr, line+len, r.corr); // 11 + if (read_columns >= 12) { + result = fast_from_chars(result.ptr, line+len, r.maxc); // 12 + } else { + r.maxc = 0; // 12 + } + } else { + r.rlp = r.peak = r.corr = r.maxc = 0; // 9-11 + } + } else { + r.xd = r.yd = r.zd = 0; // 6-8 + } + if (result.ec != std::errc()) + fail("failed to parse data line:\n", line); + if (iset_col >= read_columns) { + const char* iset_ptr = result.ptr; + for (int j = read_columns+1; j < iset_col; ++j) + iset_ptr = skip_word(skip_blank(iset_ptr)); + r.iset = simple_atoi(iset_ptr); + } + } + } + fail("incorrect or unfinished file: " + source_path); +} + +XdsAscii read_xds_ascii(const std::string& path) { + XdsAscii xds_ascii; + xds_ascii.read_input(gemmi::MaybeGzipped(path)); + return xds_ascii; +} + +} // namespace gemmi diff --git a/symmetry/CMakeLists.txt b/symmetry/CMakeLists.txt deleted file mode 100644 index fed3f792..00000000 --- a/symmetry/CMakeLists.txt +++ /dev/null @@ -1,2 +0,0 @@ -ADD_LIBRARY(gemmi STATIC symmetry.cpp gemmi/symmetry.hpp gemmi/fail.hpp) -TARGET_INCLUDE_DIRECTORIES(gemmi PUBLIC .) \ No newline at end of file -- 2.52.0 From 6aa6890a8b374b0792c492f40dbf9c85f34e94cb Mon Sep 17 00:00:00 2001 From: Filip Leonarski Date: Wed, 13 May 2026 14:22:39 +0200 Subject: [PATCH 051/132] Enable reading MTZ files with reference Fcalc --- CMakeLists.txt | 2 ++ common/CrystalLattice.h | 4 +-- common/DiffractionExperiment.h | 2 +- common/JFJochMessages.h | 2 +- gemmi_gph/CMakeLists.txt | 2 +- image_analysis/CMakeLists.txt | 4 ++- image_analysis/IndexAndRefine.cpp | 10 +++--- image_analysis/IndexAndRefine.h | 5 +-- image_analysis/LoadFCalcFromMtz.cpp | 47 +++++++++++++++++++++++++++++ image_analysis/LoadFCalcFromMtz.h | 8 +++++ image_analysis/WriteMmcif.h | 2 +- image_puller/CMakeLists.txt | 2 +- jungfrau/CMakeLists.txt | 2 ++ tools/jfjoch_process.cpp | 28 ++++++++++++++--- 14 files changed, 100 insertions(+), 20 deletions(-) create mode 100644 image_analysis/LoadFCalcFromMtz.cpp create mode 100644 image_analysis/LoadFCalcFromMtz.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 92e97835..02668e08 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,6 +16,8 @@ SET(JFJOCH_INSTALL_DRIVER_SOURCE OFF CACHE BOOL "Install kernel driver source (i SET(JFJOCH_USE_CUDA ON CACHE BOOL "Compile Jungfraujoch with CUDA") SET(JFJOCH_VIEWER_BUILD OFF CACHE BOOL "Compile Jungfraujoch viewer") +FIND_PACKAGE(ZLIB REQUIRED) + SET (ZLIB_USE_STATIC_LIBS TRUE) OPTION(SLS9 "Build with sls_detector_package v9.2.0" OFF) diff --git a/common/CrystalLattice.h b/common/CrystalLattice.h index c5cce66a..45952cd3 100644 --- a/common/CrystalLattice.h +++ b/common/CrystalLattice.h @@ -4,12 +4,12 @@ #ifndef JUNGFRAUJOCH_CRYSTALLATTICE_H #define JUNGFRAUJOCH_CRYSTALLATTICE_H -#include "../symmetry/gemmi/math.hpp" +#include +#include #include #include #include "Coord.h" #include "UnitCell.h" -#include "../symmetry/gemmi/symmetry.hpp" class CrystalLattice { Coord vec[3]; diff --git a/common/DiffractionExperiment.h b/common/DiffractionExperiment.h index 95cc74c8..11e82816 100644 --- a/common/DiffractionExperiment.h +++ b/common/DiffractionExperiment.h @@ -29,7 +29,7 @@ #include "BraggIntegrationSettings.h" #include "ScalingSettings.h" -#include "../symmetry/gemmi/symmetry.hpp" +#include enum class DetectorMode { Standard, PedestalG0, PedestalG1, PedestalG2, DarkMask diff --git a/common/JFJochMessages.h b/common/JFJochMessages.h index 024a19bc..2a2477bd 100644 --- a/common/JFJochMessages.h +++ b/common/JFJochMessages.h @@ -22,7 +22,7 @@ #include "CrystalLattice.h" #include "IndexingSettings.h" #include "XrayFluorescenceSpectrum.h" -#include "../symmetry/gemmi/symmetry.hpp" +#include constexpr const uint64_t user_data_release = 6; constexpr const uint64_t user_data_magic_number = 0x52320000UL | user_data_release; diff --git a/gemmi_gph/CMakeLists.txt b/gemmi_gph/CMakeLists.txt index 66d88819..c7353257 100644 --- a/gemmi_gph/CMakeLists.txt +++ b/gemmi_gph/CMakeLists.txt @@ -5,4 +5,4 @@ ADD_LIBRARY(gemmi STATIC symmetry.cpp gz.cpp mtz.cpp sprintf.cpp xds_ascii.cpp gemmi/unitcell.hpp gemmi/math.hpp) TARGET_INCLUDE_DIRECTORIES(gemmi PUBLIC .) -TARGET_LINK_LIBRARIES(gemmi ) \ No newline at end of file +TARGET_LINK_LIBRARIES(gemmi PRIVATE ZLIB::ZLIB) \ No newline at end of file diff --git a/image_analysis/CMakeLists.txt b/image_analysis/CMakeLists.txt index 394dbaed..e8bc48a8 100644 --- a/image_analysis/CMakeLists.txt +++ b/image_analysis/CMakeLists.txt @@ -30,7 +30,9 @@ ADD_LIBRARY(JFJochImageAnalysis STATIC RotationParameters.cpp RotationParameters.h WriteMmcif.cpp - WriteMmcif.h) + WriteMmcif.h + LoadFCalcFromMtz.cpp + LoadFCalcFromMtz.h) FIND_PACKAGE(Eigen3 3.4 REQUIRED NO_MODULE) # provides Eigen3::Eigen diff --git a/image_analysis/IndexAndRefine.cpp b/image_analysis/IndexAndRefine.cpp index 9b749e89..6a0e62e1 100644 --- a/image_analysis/IndexAndRefine.cpp +++ b/image_analysis/IndexAndRefine.cpp @@ -280,15 +280,15 @@ void IndexAndRefine::ScaleImage(size_t n, ScaleOnTheFly &scaling, ScalingResult result.rotation_wedge_deg[n] = res.wedge; } -ScalingResult IndexAndRefine::ScaleAllImages(size_t nthreads) { - auto merge_result = MergeAll(experiment, reflections); - ScaleOnTheFly scaling(merge_result, experiment); +ScalingResult IndexAndRefine::ScaleAllImages(const std::vector &reference, size_t nthreads) { + ScaleOnTheFly scaling(reference, experiment); return scaling.Scale(reflections, mosaicity, nthreads); } -MergeResult IndexAndRefine::Merge() const { +MergeResult IndexAndRefine::Merge(bool calc_statistics) const { MergeResult out; out.merged = MergeAll(experiment, reflections); - out.statistics = MergeStats(experiment, out.merged, reflections); + if (calc_statistics) + out.statistics = MergeStats(experiment, out.merged, reflections); return out; } diff --git a/image_analysis/IndexAndRefine.h b/image_analysis/IndexAndRefine.h index f80335e3..9acca8a1 100644 --- a/image_analysis/IndexAndRefine.h +++ b/image_analysis/IndexAndRefine.h @@ -63,9 +63,10 @@ class IndexAndRefine { public: IndexAndRefine(const DiffractionExperiment &x, IndexerThreadPool *indexer); void ProcessImage(DataMessage &msg, const SpotFindingSettings &settings, const CompressedImage &image, BraggPrediction &prediction); + IndexAndRefine& ReferenceIntensities(std::vector &reference); - ScalingResult ScaleAllImages(size_t nthreads = 0); - MergeResult Merge() const; + ScalingResult ScaleAllImages(const std::vector &reference, size_t nthreads = 0); + MergeResult Merge(bool statistics) const; std::optional Finalize(); }; diff --git a/image_analysis/LoadFCalcFromMtz.cpp b/image_analysis/LoadFCalcFromMtz.cpp new file mode 100644 index 00000000..b7153f32 --- /dev/null +++ b/image_analysis/LoadFCalcFromMtz.cpp @@ -0,0 +1,47 @@ +// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute +// SPDX-License-Identifier: GPL-3.0-only + +#include "LoadFCalcFromMtz.h" + +#include +#include +#include +#include + +#include + +#include "../common/Reflection.h" + +std::vector LoadFCalcFromMtz(const std::string& path) { + gemmi::Mtz mtz; + mtz.read_file_gz(path, true); + + const gemmi::Mtz::Column* fc = mtz.column_with_label("F-model", nullptr, 'F'); + if (fc == nullptr) + throw std::runtime_error("MTZ does not contain F-model column"); + + std::vector result; + result.reserve(static_cast(mtz.nreflections)); + + const std::size_t stride = mtz.columns.size(); + + for (int i = 0; i < mtz.nreflections; ++i) { + const std::size_t row = static_cast(i) * stride; + const float f = (*fc)[static_cast(i)]; + + if (std::isnan(f)) + continue; + + MergedReflection r; + r.h = static_cast(mtz.data[row + 0]); + r.k = static_cast(mtz.data[row + 1]); + r.l = static_cast(mtz.data[row + 2]); + r.I = f * f; + r.sigma = NAN; + r.d = 0.0f; + + result.emplace_back(r); + } + + return result; +} diff --git a/image_analysis/LoadFCalcFromMtz.h b/image_analysis/LoadFCalcFromMtz.h new file mode 100644 index 00000000..ad2f29c9 --- /dev/null +++ b/image_analysis/LoadFCalcFromMtz.h @@ -0,0 +1,8 @@ +// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute +// SPDX-License-Identifier: GPL-3.0-only + +#pragma once + +#include "../common/Reflection.h" + +std::vector LoadFCalcFromMtz(const std::string& path); diff --git a/image_analysis/WriteMmcif.h b/image_analysis/WriteMmcif.h index 10e05b22..057ee56b 100644 --- a/image_analysis/WriteMmcif.h +++ b/image_analysis/WriteMmcif.h @@ -10,7 +10,7 @@ #include "scale_merge/FrenchWilson.h" #include "../common/UnitCell.h" -#include "../symmetry/gemmi/symmetry.hpp" +#include /// Metadata needed to write a meaningful mmCIF reflection file. struct MmcifMetadata { diff --git a/image_puller/CMakeLists.txt b/image_puller/CMakeLists.txt index 51f875a6..24d01ab1 100644 --- a/image_puller/CMakeLists.txt +++ b/image_puller/CMakeLists.txt @@ -5,4 +5,4 @@ ADD_LIBRARY(JFJochImagePuller ZMQImagePuller.cpp ZMQImagePuller.h TestImagePuller.h TCPImagePuller.cpp TCPImagePuller.h) -TARGET_LINK_LIBRARIES(JFJochImagePuller JFJochZMQ JFJochLogger) +TARGET_LINK_LIBRARIES(JFJochImagePuller JFJochCommon JFJochZMQ JFJochLogger) diff --git a/jungfrau/CMakeLists.txt b/jungfrau/CMakeLists.txt index 072a6900..adc83356 100644 --- a/jungfrau/CMakeLists.txt +++ b/jungfrau/CMakeLists.txt @@ -5,4 +5,6 @@ ADD_LIBRARY(JFCalibration STATIC JFModuleGainCalibration.cpp JFModuleGainCalibration.h JFPedestalCalc.cpp JFPedestalCalc.h) +TARGET_LINK_LIBRARIES(JFCalibration JFJochCommon) + SET_SOURCE_FILES_PROPERTIES(JFPedestalCalc.cpp JFConversionFloatingPoint.cpp PROPERTIES COMPILE_FLAGS -Ofast) diff --git a/tools/jfjoch_process.cpp b/tools/jfjoch_process.cpp index 62343d27..9c2d5769 100644 --- a/tools/jfjoch_process.cpp +++ b/tools/jfjoch_process.cpp @@ -25,6 +25,7 @@ #include "../image_analysis/IndexAndRefine.h" #include "../receiver/JFJochReceiverPlots.h" #include "../compression/JFJochCompressor.h" +#include "../image_analysis/LoadFCalcFromMtz.h" #include "../image_analysis/scale_merge/FrenchWilson.h" #include "../image_analysis/scale_merge/SearchSpaceGroup.h" #include "../image_analysis/WriteMmcif.h" @@ -149,6 +150,7 @@ int main(int argc, char **argv) { bool refine_bfactor = false; bool refine_wedge = false; std::optional wedge_for_scaling; + std::string ref_mtz; IndexingAlgorithmEnum indexing_algorithm = IndexingAlgorithmEnum::Auto; @@ -163,11 +165,14 @@ int main(int argc, char **argv) { } int opt; - while ((opt = getopt(argc, argv, "o:N:s:e:vc:R::FX:xd:S:MP:AD:C:T:t:Bw::")) != -1) { + while ((opt = getopt(argc, argv, "o:N:s:e:vc:R::FX:xd:S:MP:AD:C:T:t:Bw::z:")) != -1) { switch (opt) { case 'o': output_prefix = optarg; break; + case 'z': + ref_mtz = optarg; + break; case 'N': nthreads = atoi(optarg); break; @@ -319,6 +324,12 @@ int main(int argc, char **argv) { logger.Info("Loaded dataset from {}", input_file); + std::vector reference_data; + if (!ref_mtz.empty()) { + reference_data = LoadFCalcFromMtz(ref_mtz); + logger.Info("Loaded {} reflections from {} MTZ file", reference_data.size(), ref_mtz); + } + // 2. Setup Experiment & Components DiffractionExperiment experiment(dataset->experiment); experiment.BitDepthImage(32).Compression(CompressionAlgorithm::BSHUF_LZ4); @@ -579,10 +590,17 @@ int main(int argc, char **argv) { auto scale_start = std::chrono::steady_clock::now(); for (int i = 0; i < 3; i++) { auto iter_start = std::chrono::steady_clock::now(); - auto scale_result = indexer.ScaleAllImages(); - end_msg.image_scale_factor = scale_result.image_scale_g; - scale_result.SaveToFile(output_prefix + "_iter" + std::to_string(i) + "_scale.dat"); + if (reference_data.empty()) { + auto merge_result = indexer.Merge(false); + auto scale_result = indexer.ScaleAllImages(merge_result.merged); + end_msg.image_scale_factor = scale_result.image_scale_g; + scale_result.SaveToFile(output_prefix + "_iter" + std::to_string(i) + "_scale.dat"); + } else { + auto scale_result = indexer.ScaleAllImages(reference_data); + end_msg.image_scale_factor = scale_result.image_scale_g; + scale_result.SaveToFile(output_prefix + "_iter" + std::to_string(i) + "_scale.dat"); + } auto iter_end = std::chrono::steady_clock::now(); double iter_time = std::chrono::duration(iter_end - iter_start).count(); @@ -592,7 +610,7 @@ int main(int argc, char **argv) { double scale_time = std::chrono::duration(scale_end - scale_start).count(); auto merge_start = std::chrono::steady_clock::now(); - auto merge_result = indexer.Merge(); + auto merge_result = indexer.Merge(true); auto merge_end = std::chrono::steady_clock::now(); double merge_time = std::chrono::duration(merge_end - merge_start).count(); -- 2.52.0 From e70d71912b68afb5fbe4ca0f205234dc8a2e9f5a Mon Sep 17 00:00:00 2001 From: Filip Leonarski Date: Wed, 13 May 2026 14:36:41 +0200 Subject: [PATCH 052/132] Merge: Add partiality limit --- common/ScalingSettings.cpp | 4 +++- common/ScalingSettings.h | 3 +++ image_analysis/scale_merge/Merge.cpp | 6 ++++++ image_analysis/scale_merge/Merge.h | 6 +++--- 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/common/ScalingSettings.cpp b/common/ScalingSettings.cpp index 04f0f8b1..16d5e9de 100644 --- a/common/ScalingSettings.cpp +++ b/common/ScalingSettings.cpp @@ -97,4 +97,6 @@ std::optional ScalingSettings::GetRotationWedgeForScaling() const { return wedge_for_scaling; } - +double ScalingSettings::GetMinPartiality() const { + return min_partiality; +} diff --git a/common/ScalingSettings.h b/common/ScalingSettings.h index 941b6dea..f86c5a2b 100644 --- a/common/ScalingSettings.h +++ b/common/ScalingSettings.h @@ -20,6 +20,7 @@ class ScalingSettings { bool merge_friedel = true; std::optional high_resolution_limit_A; std::optional wedge_for_scaling; + constexpr static double min_partiality = 0.02; public: ScalingSettings& SetPartialityModel(PartialityModel mode); ScalingSettings& RefineB(bool input); @@ -46,4 +47,6 @@ public: [[nodiscard]] std::optional GetPartialityModel() const; [[nodiscard]] std::optional GetHighResolutionLimit_A() const; + + [[nodiscard]] double GetMinPartiality() const; }; diff --git a/image_analysis/scale_merge/Merge.cpp b/image_analysis/scale_merge/Merge.cpp index 2cad3d64..a54ffdd7 100644 --- a/image_analysis/scale_merge/Merge.cpp +++ b/image_analysis/scale_merge/Merge.cpp @@ -15,6 +15,7 @@ std::vector MergeAll(const DiffractionExperiment &x, const std auto scaling_settings = x.GetScalingSettings(); HKLKeyGenerator key_generator(scaling_settings.GetMergeFriedel(), x.GetSpaceGroupNumber().value_or(1)); const std::optional high_resolution_limit = scaling_settings.GetHighResolutionLimit_A(); + auto min_partiality = scaling_settings.GetMinPartiality(); struct Accum { // Keep anomalous + / - together, but separate @@ -37,6 +38,8 @@ std::vector MergeAll(const DiffractionExperiment &x, const std continue; if (!AcceptReflection(r, high_resolution_limit)) continue; + if (r.partiality < min_partiality) + continue; const float I_corr = r.I * r.scaling_correction; const float sigma_corr = r.sigma * r.scaling_correction; @@ -106,6 +109,7 @@ MergeStatistics MergeStats(const DiffractionExperiment &x, float d_min = std::numeric_limits::max(); float d_max = 0.0f; + auto min_partiality = x.GetScalingSettings().GetMinPartiality(); auto d_min_limit_A = x.GetScalingSettings().GetHighResolutionLimit_A(); for (const auto &m: merged) { if (!std::isfinite(m.d) || m.d <= 0.0f) @@ -159,6 +163,8 @@ MergeStatistics MergeStats(const DiffractionExperiment &x, if (!AcceptReflection(r, d_min_limit_A)) continue; + if (r.partiality < min_partiality) + continue;; const auto shell = shells.GetShell(r.d); if (!shell.has_value()) diff --git a/image_analysis/scale_merge/Merge.h b/image_analysis/scale_merge/Merge.h index 3ed79f83..a40fb83a 100644 --- a/image_analysis/scale_merge/Merge.h +++ b/image_analysis/scale_merge/Merge.h @@ -33,8 +33,8 @@ struct MergeResult { }; std::vector MergeAll(const DiffractionExperiment &x, - const std::vector > &reflections); + const std::vector > &reflections); MergeStatistics MergeStats(const DiffractionExperiment &x, - const std::vector &merged, - const std::vector > &reflections); \ No newline at end of file + const std::vector &merged, + const std::vector > &reflections); -- 2.52.0 From 2397a108dd7185c8347667b26b9cbdfc3a4903d7 Mon Sep 17 00:00:00 2001 From: Filip Leonarski Date: Wed, 13 May 2026 15:23:32 +0200 Subject: [PATCH 053/132] ScaleOnTheFly: Add CC image/ref --- image_analysis/scale_merge/ScaleOnTheFly.cpp | 59 ++++++++++++++++++++ image_analysis/scale_merge/ScaleOnTheFly.h | 3 + image_analysis/scale_merge/ScalingResult.cpp | 11 ++-- image_analysis/scale_merge/ScalingResult.h | 3 +- tools/jfjoch_process.cpp | 16 +++--- 5 files changed, 78 insertions(+), 14 deletions(-) diff --git a/image_analysis/scale_merge/ScaleOnTheFly.cpp b/image_analysis/scale_merge/ScaleOnTheFly.cpp index d2057fbe..b1e63ac4 100644 --- a/image_analysis/scale_merge/ScaleOnTheFly.cpp +++ b/image_analysis/scale_merge/ScaleOnTheFly.cpp @@ -115,6 +115,57 @@ bool ScaleOnTheFly::Accept(const Reflection &r) { return true; } +std::pair ScaleOnTheFly::CalculateGlobalCC(const std::vector &reflections) const { + double sum_x = 0.0; + double sum_y = 0.0; + double sum_x2 = 0.0; + double sum_y2 = 0.0; + double sum_xy = 0.0; + size_t n = 0; + + for (const auto &r: reflections) { + if (!AcceptReflection(r, s.GetHighResolutionLimit_A())) + continue; + if (r.partiality < s.GetMinPartiality()) + continue; + if (!std::isfinite(r.I) || !std::isfinite(r.scaling_correction) || r.scaling_correction <= 0.0f) + continue; + if (!std::isfinite(r.sigma) || r.sigma <= 0.0f) + continue; + + const HKLKey key = hkl_key_generator(r); + const auto it = reference_data.find(key); + if (it == reference_data.end()) + continue; + + const double image_i = static_cast(r.I) * static_cast(r.scaling_correction); + const double ref_i = it->second; + + if (!std::isfinite(image_i) || !std::isfinite(ref_i)) + continue; + + sum_x += image_i; + sum_y += ref_i; + sum_x2 += image_i * image_i; + sum_y2 += ref_i * ref_i; + sum_xy += image_i * ref_i; + ++n; + } + + if (n < MIN_REFLECTIONS) + return {NAN, n}; + + const double nd = static_cast(n); + const double cov = sum_xy - sum_x * sum_y / nd; + const double var_x = sum_x2 - sum_x * sum_x / nd; + const double var_y = sum_y2 - sum_y * sum_y / nd; + + if (!(var_x > 0.0 && var_y > 0.0)) + return {NAN, n}; + + return {cov / std::sqrt(var_x * var_y), n}; +} + ScaleOnTheFlyResult ScaleOnTheFly::Scale(std::vector &reflections, std::optional mosaicity_deg) { auto start = std::chrono::steady_clock::now(); @@ -235,6 +286,10 @@ ScaleOnTheFlyResult ScaleOnTheFly::Scale(std::vector &reflections, s } } + const auto [cc, cc_n] = CalculateGlobalCC(reflections); + result.cc = cc; + result.cc_n = cc_n; + auto end = std::chrono::steady_clock::now(); result.time_s = std::chrono::duration(end - start).count(); return result; @@ -262,6 +317,8 @@ ScalingResult ScaleOnTheFly::Scale(std::vector > &reflec result.image_bfactor_Ang2[i] = local_result.B; result.image_scale_g[i] = local_result.G; result.rotation_wedge_deg[i] = local_result.wedge; + result.image_cc[i] = local_result.cc; + result.image_cc_n[i] = local_result.cc_n; } } else { auto local_nthreads = std::min(nthreads, reflections.size()); @@ -285,6 +342,8 @@ ScalingResult ScaleOnTheFly::Scale(std::vector > &reflec result.image_bfactor_Ang2[i] = local_result.B; result.image_scale_g[i] = local_result.G; result.rotation_wedge_deg[i] = local_result.wedge; + result.image_cc[i] = local_result.cc; + result.image_cc_n[i] = local_result.cc_n; i = curr_image.fetch_add(1); } })); diff --git a/image_analysis/scale_merge/ScaleOnTheFly.h b/image_analysis/scale_merge/ScaleOnTheFly.h index c491c4a5..fac68f3b 100644 --- a/image_analysis/scale_merge/ScaleOnTheFly.h +++ b/image_analysis/scale_merge/ScaleOnTheFly.h @@ -16,6 +16,8 @@ struct ScaleOnTheFlyResult { double G = 1.0; double mos = 0.1; double wedge = 0.1; + double cc = NAN; + size_t cc_n = 0; float time_s = 0.0; bool succesful = false; }; @@ -32,6 +34,7 @@ class ScaleOnTheFly { std::map reference_data; bool Accept(const Reflection &r); + [[nodiscard]] std::pair CalculateGlobalCC(const std::vector &reflections) const; public: ScaleOnTheFly(const std::vector &ref, const DiffractionExperiment &x); ScaleOnTheFlyResult Scale(std::vector &r, std::optional mosaicity_deg); diff --git a/image_analysis/scale_merge/ScalingResult.cpp b/image_analysis/scale_merge/ScalingResult.cpp index 8c683da9..a5dc4aae 100644 --- a/image_analysis/scale_merge/ScalingResult.cpp +++ b/image_analysis/scale_merge/ScalingResult.cpp @@ -11,25 +11,26 @@ ScalingResult::ScalingResult(size_t n) : image_scale_g(n, NAN), mosaicity_deg(n, NAN), image_bfactor_Ang2(n, NAN), - rotation_wedge_deg(n, NAN) { -} + rotation_wedge_deg(n, NAN), + image_cc(n, NAN), + image_cc_n(n, 0) {} void ScalingResult::SaveToFile(const std::string &filename) { const std::string img_path = filename + "_image.dat"; - std::ofstream img_file(img_path); + std::ofstream img_file(img_path, std::ofstream::out | std::ofstream::trunc); if (!img_file) { throw JFJochException(JFJochExceptionCategory::FileWriteError , "Cannot open {} for writing"); } - img_file << "# image_id G B mosaicity_deg wedge_deg\n"; - for (size_t i = 0; i < image_scale_g.size(); ++i) { img_file << i << " " << image_scale_g[i] << " " << image_bfactor_Ang2[i] << " " << mosaicity_deg[i] << " " << rotation_wedge_deg[i] + << " " << image_cc[i] + << " " << image_cc_n[i] << "\n"; } diff --git a/image_analysis/scale_merge/ScalingResult.h b/image_analysis/scale_merge/ScalingResult.h index 880f5c55..91bc8c2a 100644 --- a/image_analysis/scale_merge/ScalingResult.h +++ b/image_analysis/scale_merge/ScalingResult.h @@ -11,7 +11,8 @@ struct ScalingResult { std::vector mosaicity_deg; std::vector image_bfactor_Ang2; std::vector rotation_wedge_deg; - + std::vector image_cc; + std::vector image_cc_n; explicit ScalingResult(size_t n); void SaveToFile(const std::string &filename); }; diff --git a/tools/jfjoch_process.cpp b/tools/jfjoch_process.cpp index 9c2d5769..5eceeed0 100644 --- a/tools/jfjoch_process.cpp +++ b/tools/jfjoch_process.cpp @@ -587,20 +587,20 @@ int main(int argc, char **argv) { const bool fixed_space_group = space_group || experiment.GetGemmiSpaceGroup().has_value(); + ScalingResult scale_result(0); + auto scale_start = std::chrono::steady_clock::now(); for (int i = 0; i < 3; i++) { auto iter_start = std::chrono::steady_clock::now(); if (reference_data.empty()) { auto merge_result = indexer.Merge(false); - auto scale_result = indexer.ScaleAllImages(merge_result.merged); - end_msg.image_scale_factor = scale_result.image_scale_g; - scale_result.SaveToFile(output_prefix + "_iter" + std::to_string(i) + "_scale.dat"); - } else { - auto scale_result = indexer.ScaleAllImages(reference_data); - end_msg.image_scale_factor = scale_result.image_scale_g; - scale_result.SaveToFile(output_prefix + "_iter" + std::to_string(i) + "_scale.dat"); - } + scale_result = indexer.ScaleAllImages(merge_result.merged); + } else + scale_result = indexer.ScaleAllImages(reference_data); + + end_msg.image_scale_factor = scale_result.image_scale_g; + scale_result.SaveToFile(output_prefix + "_iter" + std::to_string(i) + "_scale.dat"); auto iter_end = std::chrono::steady_clock::now(); double iter_time = std::chrono::duration(iter_end - iter_start).count(); -- 2.52.0 From 1afbaf6f116295411e5f2d6d962ec53001b87a79 Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Wed, 13 May 2026 21:21:14 +0200 Subject: [PATCH 054/132] CrystalLattice: Need to include gemmi via exact path --- common/CrystalLattice.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/common/CrystalLattice.h b/common/CrystalLattice.h index 45952cd3..c7b8d358 100644 --- a/common/CrystalLattice.h +++ b/common/CrystalLattice.h @@ -4,8 +4,9 @@ #ifndef JUNGFRAUJOCH_CRYSTALLATTICE_H #define JUNGFRAUJOCH_CRYSTALLATTICE_H -#include -#include +#include "../gemmi_gph/gemmi/math.hpp" +#include "../gemmi_gph/gemmi/symmetry.hpp" + #include #include #include "Coord.h" -- 2.52.0 From 1bccbda3709b752c6e5bf238eb2c8e59484b847b Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Wed, 13 May 2026 21:31:05 +0200 Subject: [PATCH 055/132] JFJochMessages: Need to include gemmi via exact path --- common/JFJochMessages.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/JFJochMessages.h b/common/JFJochMessages.h index 2a2477bd..43ebf67b 100644 --- a/common/JFJochMessages.h +++ b/common/JFJochMessages.h @@ -22,7 +22,7 @@ #include "CrystalLattice.h" #include "IndexingSettings.h" #include "XrayFluorescenceSpectrum.h" -#include +#include "../gemmi_gph/gemmi/symmetry.hpp" constexpr const uint64_t user_data_release = 6; constexpr const uint64_t user_data_magic_number = 0x52320000UL | user_data_release; -- 2.52.0 From 0e9f7cc956992161ae4963a1df51e616fab59ad9 Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Thu, 14 May 2026 15:45:58 +0200 Subject: [PATCH 056/132] Save/transfer/read/display image scale results --- broker/OpenAPIConvert.cpp | 2 + broker/jfjoch_api.yaml | 2 + broker/redoc-static.html | 6 +- common/JFJochMessages.h | 13 ++- common/Plot.h | 3 +- common/Reflection.h | 2 +- common/ScanResult.h | 3 +- common/ScanResultGenerator.cpp | 5 +- docs/CBOR.md | 95 ++++++++++--------- frame_serialize/CBORStream2Deserializer.cpp | 20 ++++ frame_serialize/CBORStream2Serializer.cpp | 11 ++- frontend/package-lock.json | 4 +- frontend/src/openapi/models/plot_type.ts | 2 + .../src/openapi/services/DefaultService.ts | 4 +- image_analysis/IndexAndRefine.cpp | 24 +++-- image_analysis/IndexAndRefine.h | 3 +- .../bragg_integration/BraggIntegrate2D.cpp | 2 +- .../bragg_prediction/BraggPrediction.cpp | 2 +- .../bragg_prediction/BraggPredictionGPU.cu | 2 +- .../bragg_prediction/BraggPredictionRot.cpp | 2 +- .../bragg_prediction/BraggPredictionRotGPU.cu | 2 +- image_analysis/scale_merge/Merge.cpp | 8 +- image_analysis/scale_merge/ScaleOnTheFly.cpp | 8 +- reader/JFJochHDF5Reader.cpp | 8 +- reader/JFJochHttpReader.cpp | 2 + reader/JFJochReaderDataset.h | 4 +- receiver/JFJochReceiverPlots.cpp | 23 +++++ receiver/JFJochReceiverPlots.h | 3 + tools/jfjoch_process.cpp | 39 ++++---- viewer/JFJochViewerDatasetInfo.cpp | 10 +- .../windows/JFJochViewerImageListWindow.cpp | 35 ++++++- viewer/windows/JFJochViewerImageListWindow.h | 7 +- writer/HDF5DataFilePluginMX.cpp | 20 ++++ writer/HDF5DataFilePluginMX.h | 7 ++ writer/HDF5DataFilePluginPerformance.cpp | 4 + writer/HDF5DataFilePluginPerformance.h | 1 + writer/HDF5NXmx.cpp | 5 +- 37 files changed, 283 insertions(+), 110 deletions(-) diff --git a/broker/OpenAPIConvert.cpp b/broker/OpenAPIConvert.cpp index b41fb4b8..cf684946 100644 --- a/broker/OpenAPIConvert.cpp +++ b/broker/OpenAPIConvert.cpp @@ -883,6 +883,8 @@ PlotType ConvertPlotType(const std::optional& input) { if (input == "beam_center_x") return PlotType::RefinementBeamX; if (input == "beam_center_y") return PlotType::RefinementBeamY; if (input == "integrated_reflections") return PlotType::IntegratedReflections; + if (input == "image_scale_factor") return PlotType::ImageScaleFactor; + if (input == "image_scale_cc") return PlotType::ImageScaleCC; throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "Plot type not recognized"); diff --git a/broker/jfjoch_api.yaml b/broker/jfjoch_api.yaml index 2908599e..a45e852f 100644 --- a/broker/jfjoch_api.yaml +++ b/broker/jfjoch_api.yaml @@ -116,6 +116,8 @@ components: - beam_center_x - beam_center_y - integrated_reflections + - image_scale_factor + - image_scale_cc roi: in: query name: roi diff --git a/broker/redoc-static.html b/broker/redoc-static.html index 94fae561..f00c23f0 100644 --- a/broker/redoc-static.html +++ b/broker/redoc-static.html @@ -867,7 +867,7 @@ User mask is not automatically applied - i.e. pixels with user mask will have a

Generate 1D plot from Jungfraujoch

query Parameters
binning
integer
Default: 1

Binning of frames for the plot (0 = default binning)

-
type
required
string
Enum: "bkg_estimate" "azint" "azint_1d" "spot_count" "spot_count_low_res" "spot_count_indexed" "spot_count_ice" "indexing_rate" "indexing_unit_cell_length" "indexing_unit_cell_angle" "profile_radius" "mosaicity" "b_factor" "error_pixels" "saturated_pixels" "image_collection_efficiency" "receiver_delay" "receiver_free_send_buf" "strong_pixels" "roi_sum" "roi_mean" "roi_max_count" "roi_pixels" "roi_weighted_x" "roi_weighted_y" "packets_received" "max_pixel_value" "resolution_estimate" "pixel_sum" "processing_time" "beam_center_x" "beam_center_y" "integrated_reflections"

Type of requested plot

+
type
required
string
Enum: "bkg_estimate" "azint" "azint_1d" "spot_count" "spot_count_low_res" "spot_count_indexed" "spot_count_ice" "indexing_rate" "indexing_unit_cell_length" "indexing_unit_cell_angle" "profile_radius" "mosaicity" "b_factor" "error_pixels" "saturated_pixels" "image_collection_efficiency" "receiver_delay" "receiver_free_send_buf" "strong_pixels" "roi_sum" "roi_mean" "roi_max_count" "roi_pixels" "roi_weighted_x" "roi_weighted_y" "packets_received" "max_pixel_value" "resolution_estimate" "pixel_sum" "processing_time" "beam_center_x" "beam_center_y" "integrated_reflections" "image_scale_factor" "image_scale_cc"

Type of requested plot

fill
number <float>

Fill value for elements that were missed during data collection

experimental_coord
boolean
Default: false

If measurement has goniometer axis defined, plot X-axis will represent rotation angle If measurement has grid scan defined, plot X-axis and Y-axis will represent grid position, Z will be used as the final value @@ -879,7 +879,7 @@ For still measurement the number is ignored

http://localhost:5232/preview/plot

Response samples

Content type
application/json
{
  • "title": "string",
  • "unit_x": "image_number",
  • "size_x": 0.1,
  • "size_y": 0.1,
  • "plot": [
    ]
}

Generate 1D plot from Jungfraujoch and send in raw binary format. Data are provided as (32-bit) float binary array. This format doesn't transmit information about X-axis, only values, so it is of limited use for azimuthal integration. -

query Parameters
type
required
string
Enum: "bkg_estimate" "azint" "azint_1d" "spot_count" "spot_count_low_res" "spot_count_indexed" "spot_count_ice" "indexing_rate" "indexing_unit_cell_length" "indexing_unit_cell_angle" "profile_radius" "mosaicity" "b_factor" "error_pixels" "saturated_pixels" "image_collection_efficiency" "receiver_delay" "receiver_free_send_buf" "strong_pixels" "roi_sum" "roi_mean" "roi_max_count" "roi_pixels" "roi_weighted_x" "roi_weighted_y" "packets_received" "max_pixel_value" "resolution_estimate" "pixel_sum" "processing_time" "beam_center_x" "beam_center_y" "integrated_reflections"

Type of requested plot

+
query Parameters
type
required
string
Enum: "bkg_estimate" "azint" "azint_1d" "spot_count" "spot_count_low_res" "spot_count_indexed" "spot_count_ice" "indexing_rate" "indexing_unit_cell_length" "indexing_unit_cell_angle" "profile_radius" "mosaicity" "b_factor" "error_pixels" "saturated_pixels" "image_collection_efficiency" "receiver_delay" "receiver_free_send_buf" "strong_pixels" "roi_sum" "roi_mean" "roi_max_count" "roi_pixels" "roi_weighted_x" "roi_weighted_y" "packets_received" "max_pixel_value" "resolution_estimate" "pixel_sum" "processing_time" "beam_center_x" "beam_center_y" "integrated_reflections" "image_scale_factor" "image_scale_cc"

Type of requested plot

roi
string non-empty

Name of ROI for which plot is requested

Responses