Gemmi: Add more functionality from gemmi 0.7.5

This commit is contained in:
2026-05-13 13:44:02 +02:00
parent b27b140bf0
commit 08bf186766
27 changed files with 10507 additions and 3 deletions
+1 -1
View File
@@ -153,7 +153,7 @@ ADD_SUBDIRECTORY(reader)
ADD_SUBDIRECTORY(detector_control)
ADD_SUBDIRECTORY(image_puller)
ADD_SUBDIRECTORY(preview)
ADD_SUBDIRECTORY(symmetry)
ADD_SUBDIRECTORY(gemmi_gph)
ADD_SUBDIRECTORY(xds-plugin)
IF (JFJOCH_WRITER_ONLY)
+8
View File
@@ -0,0 +1,8 @@
ADD_LIBRARY(gemmi STATIC symmetry.cpp gz.cpp mtz.cpp sprintf.cpp xds_ascii.cpp
gemmi/cellred.hpp
gemmi/symmetry.hpp
gemmi/fail.hpp
gemmi/unitcell.hpp
gemmi/math.hpp)
TARGET_INCLUDE_DIRECTORIES(gemmi PUBLIC .)
TARGET_LINK_LIBRARIES(gemmi )
+41
View File
@@ -0,0 +1,41 @@
// Copyright 2020 Global Phasing Ltd.
//
// Functions that convert strings to floating-point numbers ignoring locale.
// Simple wrappers around fastfloat::from_chars().
#ifndef GEMMI_ATOF_HPP_
#define GEMMI_ATOF_HPP_
#include "atox.hpp" // for is_space
#include "third_party/fast_float.h"
namespace gemmi {
using fast_float::from_chars_result;
inline from_chars_result fast_from_chars(const char* start, const char* end, double& d) {
while (start < end && is_space(*start))
++start;
if (start < end && *start == '+')
++start;
return fast_float::from_chars(start, end, d);
}
inline from_chars_result fast_from_chars(const char* start, double& d) {
while (is_space(*start))
++start;
if (*start == '+')
++start;
return fast_float::from_chars(start, start + std::strlen(start), d);
}
inline double fast_atof(const char* p, const char** endptr=nullptr) {
double d = 0;
auto result = fast_from_chars(p, d);
if (endptr)
*endptr = result.ptr;
return d;
}
} // namespace gemmi
#endif
+135
View File
@@ -0,0 +1,135 @@
// Copyright 2018 Global Phasing Ltd.
//
// Locale-independent functions that convert strings to integers,
// equivalents of standard isspace and isdigit, and a few helper functions.
//
// This file is named similarly to the standard functions atoi() and atof().
// But the functions here are not meant to be equivalent to the standard
// library functions. They are locale-independent (a good thing when reading
// numbers from files). They don't set errno, don't signal overflow and
// underflow. Due to the limited scope these functions tend to be faster
// than the standard-library ones.
#ifndef GEMMI_ATOX_HPP_
#define GEMMI_ATOX_HPP_
#include <cstdint>
#include <stdexcept> // for invalid_argument
#include <string>
namespace gemmi {
// equivalent of std::isspace for C locale (no handling of EOF)
inline bool is_space(char c) {
static const std::uint8_t table[256] = { // 1 for 9-13 and 32
0,0,0,0,0,0,0,0, 0,1,1,1,1,1,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0
};
return table[(std::uint8_t)c] != 0;
}
// equivalent of std::isblank for C locale (no handling of EOF)
inline bool is_blank(char c) {
return c == ' ' || c == '\t';
}
// equivalent of std::isdigit for C locale (no handling of EOF)
inline bool is_digit(char c) {
return c >= '0' && c <= '9';
}
inline const char* skip_blank(const char* p) {
if (p)
while (is_blank(*p))
++p;
return p;
}
inline const char* skip_word(const char* p) {
if (p)
while (*p != '\0' && !is_space(*p))
++p;
return p;
}
inline std::string read_word(const char* line) {
line = skip_blank(line);
return std::string(line, skip_word(line));
}
inline std::string read_word(const char* line, const char** endptr) {
line = skip_blank(line);
*endptr = skip_word(line);
return std::string(line, *endptr);
}
// no checking for overflow
inline int string_to_int(const char* p, bool checked, size_t length=0) {
int mult = -1;
int n = 0;
size_t i = 0;
while ((length == 0 || i < length) && is_space(p[i]))
++i;
if (p[i] == '-') {
mult = 1;
++i;
} else if (p[i] == '+') {
++i;
}
bool has_digits = false;
// use negative numbers because INT_MIN < -INT_MAX
for (; (length == 0 || i < length) && is_digit(p[i]); ++i) {
n = n * 10 - (p[i] - '0');
has_digits = true;
}
if (checked) {
while ((length == 0 || i < length) && is_space(p[i]))
++i;
if (!has_digits || p[i] != '\0')
throw std::invalid_argument("not an integer: " +
std::string(p, length ? length : i+1));
}
return mult * n;
}
inline int string_to_int(const std::string& str, bool checked) {
return string_to_int(str.c_str(), checked);
}
inline int simple_atoi(const char* p, const char** endptr=nullptr) {
int mult = -1;
int n = 0;
while (is_space(*p))
++p;
if (*p == '-') {
mult = 1;
++p;
} else if (*p == '+') {
++p;
}
for (; is_digit(*p); ++p)
n = n * 10 - (*p - '0'); // use negative numbers because INT_MIN < -INT_MAX
if (endptr)
*endptr = p;
return mult * n;
}
inline int no_sign_atoi(const char* p, const char** endptr=nullptr) {
int n = 0;
while (is_space(*p))
++p;
for (; is_digit(*p); ++p)
n = n * 10 + (*p - '0');
if (endptr)
*endptr = p;
return n;
}
} // namespace gemmi
#endif
+173
View File
@@ -0,0 +1,173 @@
// Copyright 2018 Global Phasing Ltd.
//
// File-related utilities.
#ifndef GEMMI_FILEUTIL_HPP_
#define GEMMI_FILEUTIL_HPP_
#include <cassert>
#include <cstdio> // for FILE, fopen, fclose
#include <cstdint>
#include <cstdlib> // for malloc, realloc
#include <cstring> // for strlen
#include <initializer_list>
#include <memory> // for unique_ptr
#include "fail.hpp" // for sys_fail
#if defined(_WIN32) && !defined(GEMMI_USE_FOPEN)
#include "utf.hpp"
#endif
namespace gemmi {
// strip directory and suffixes from filename
inline std::string path_basename(const std::string& path,
std::initializer_list<const char*> exts) {
size_t pos = path.find_last_of("\\/");
std::string basename = pos == std::string::npos ? path : path.substr(pos + 1);
for (const char* ext : exts) {
size_t len = std::strlen(ext);
if (basename.size() > len &&
basename.compare(basename.length() - len, len, ext, len) == 0)
basename.resize(basename.length() - len);
}
return basename;
}
// file operations
/// deleter for fileptr_t
struct needs_fclose {
bool use_fclose;
void operator()(std::FILE* f) const noexcept {
if (use_fclose)
std::fclose(f);
}
};
typedef std::unique_ptr<std::FILE, needs_fclose> fileptr_t;
inline fileptr_t file_open(const char* path, const char* mode) {
std::FILE* file;
#if defined(_WIN32) && !defined(GEMMI_USE_FOPEN)
std::wstring wpath = UTF8_to_wchar(path);
std::wstring wmode = UTF8_to_wchar(mode);
if ((file = ::_wfopen(wpath.c_str(), wmode.c_str())) == nullptr)
#else
if ((file = std::fopen(path, mode)) == nullptr)
#endif
sys_fail(std::string("Failed to open ") + path +
(*mode == 'w' ? " for writing" : ""));
return fileptr_t(file, needs_fclose{true});
}
// helper function for treating "-" as stdin or stdout
inline fileptr_t file_open_or(const char* path, const char* mode,
std::FILE* dash_stream) {
if (path[0] == '-' && path[1] == '\0')
return fileptr_t(dash_stream, needs_fclose{false});
return file_open(path, mode);
}
inline std::size_t file_size(std::FILE* f, const std::string& path) {
if (std::fseek(f, 0, SEEK_END) != 0)
sys_fail(path + ": fseek failed");
long length = std::ftell(f);
if (length < 0)
sys_fail(path + ": ftell failed");
if (std::fseek(f, 0, SEEK_SET) != 0)
sys_fail(path + ": fseek failed");
return length;
}
// helper function for working with binary files
inline bool is_little_endian() {
std::uint32_t x = 1;
return *reinterpret_cast<char *>(&x) == 1;
}
inline void swap_two_bytes(void* start) {
char* bytes = static_cast<char*>(start);
std::swap(bytes[0], bytes[1]);
}
inline void swap_four_bytes(void* start) {
char* bytes = static_cast<char*>(start);
std::swap(bytes[0], bytes[3]);
std::swap(bytes[1], bytes[2]);
}
inline void swap_eight_bytes(void* start) {
char* bytes = static_cast<char*>(start);
std::swap(bytes[0], bytes[7]);
std::swap(bytes[1], bytes[6]);
std::swap(bytes[2], bytes[5]);
std::swap(bytes[3], bytes[4]);
}
class CharArray {
std::unique_ptr<char, decltype(&std::free)> ptr_;
size_t size_;
public:
CharArray() : ptr_(nullptr, &std::free), size_(0) {}
explicit CharArray(size_t n) : ptr_((char*)std::malloc(n), &std::free), size_(n) {}
explicit operator bool() const { return (bool)ptr_; }
char* data() { return ptr_.get(); }
const char* data() const { return ptr_.get(); }
size_t size() const { return size_; }
void set_size(size_t n) { size_ = n; }
void resize(size_t n) {
char* new_ptr = (char*) std::realloc(ptr_.get(), n);
if (!new_ptr && n != 0)
fail("Out of memory.");
(void) ptr_.release(); // NOLINT(bugprone-unused-return-value)
ptr_.reset(new_ptr);
size_ = n;
}
// Remove first n bytes making space for more text at the returned position.
char* roll(size_t n) {
assert(n <= size());
std::memmove(data(), data() + n, n);
return data() + n;
}
};
/// reading file into a memory buffer (optimized: uses fseek to determine file size)
inline CharArray read_file_into_buffer(const std::string& path) {
fileptr_t f = file_open(path.c_str(), "rb");
size_t size = file_size(f.get(), path);
CharArray buffer(size);
if (std::fread(buffer.data(), size, 1, f.get()) != 1)
sys_fail(path + ": fread failed");
return buffer;
}
inline CharArray read_stdin_into_buffer() {
size_t n = 0;
CharArray buffer(16 * 1024);
for (;;) {
n += std::fread(buffer.data() + n, 1, buffer.size() - n, stdin);
if (n != buffer.size()) {
buffer.set_size(n);
break;
}
buffer.resize(2*n);
}
return buffer;
}
template<typename T>
inline CharArray read_into_buffer(T&& input) {
if (input.is_compressed())
return input.uncompress_into_buffer();
if (input.is_stdin())
return read_stdin_into_buffer();
return read_file_into_buffer(input.path());
}
} // namespace gemmi
#endif
+52
View File
@@ -0,0 +1,52 @@
// Copyright 2017 Global Phasing Ltd.
//
// Functions for transparent reading of gzipped files. Uses zlib.
#ifndef GEMMI_GZ_HPP_
#define GEMMI_GZ_HPP_
#include <string>
#include "fail.hpp" // GEMMI_DLL
#include "input.hpp" // BasicInput
#include "util.hpp" // iends_with
namespace gemmi {
GEMMI_DLL extern const char* const zlib_description;
GEMMI_DLL size_t estimate_uncompressed_size(const std::string& path);
// the same interface as FileStream and MemoryStream
struct GEMMI_DLL GzStream final : public AnyStream {
GzStream(void* f_) : f(f_) {}
char* gets(char* line, int size) override;
int getc() override;
bool read(void* buf, size_t len) override;
bool skip(size_t n) override;
long tell() override;
std::string read_rest() override;
private:
void* f; // implementation detail
};
class GEMMI_DLL MaybeGzipped : public BasicInput {
public:
explicit MaybeGzipped(const std::string& path);
~MaybeGzipped();
size_t gzread_checked(void* buf, size_t len);
bool is_compressed() const { return iends_with(path(), ".gz"); }
std::string basepath() const {
return is_compressed() ? path().substr(0, path().size() - 3) : path();
}
CharArray uncompress_into_buffer(size_t limit=0);
std::unique_ptr<AnyStream> create_stream();
private:
void* file_ = nullptr;
};
} // namespace gemmi
#endif
+168
View File
@@ -0,0 +1,168 @@
// Copyright 2018 Global Phasing Ltd.
//
// Input abstraction.
// Used to decouple file reading and decompression.
#ifndef GEMMI_INPUT_HPP_
#define GEMMI_INPUT_HPP_
#include <cstddef> // for ptrdiff_t
#include <cstdio> // for FILE, fseek, fread
#include <cstring> // for memchr
#include <string>
#include "fileutil.hpp" // for fileptr_t
namespace gemmi {
// base class for FileStream, MemoryStream and GzStream
struct AnyStream {
virtual ~AnyStream() = default;
virtual char* gets(char* line, int size) = 0; // for pdb, copy_line()
virtual int getc() = 0; // for copy_line()
virtual bool read(void* buf, size_t len) = 0; // for ccp4, mtz
// these are not used in GzStream because MemoryStream is used for mtz
virtual long tell() = 0; // temporary, for testing
virtual bool skip(size_t n) = 0; // for reading mtz without data
virtual std::string read_rest() { return {}; } // for mtz (appendix)
size_t copy_line(char* line, int size) { // for pdb, xds_ascii
if (!gets(line, size))
return 0;
size_t len = std::strlen(line);
// If a line is longer than size we discard the rest of it.
if (len > 0 && line[len-1] != '\n')
for (int c = getc(); c > 0 /* not 0 nor EOF */ && c != '\n'; c = getc())
continue;
return len;
};
};
struct FileStream final : public AnyStream {
FileStream(std::FILE* f_) : f(f_, needs_fclose{false}) {}
FileStream(const char* path, const char* mode) : f(file_open_or(path, mode, stdin)) {}
char* gets(char* line, int size) override { return std::fgets(line, size, f.get()); }
int getc() override { return std::fgetc(f.get()); }
bool read(void* buf, size_t len) override { return std::fread(buf, len, 1, f.get()) == 1; }
std::string read_rest() override {
std::string ret;
int c = std::fgetc(f.get());
if (c != EOF) {
ret += (char)c;
char buf[512];
for (;;) {
size_t n = std::fread(buf, 1, sizeof(buf), f.get());
ret.append(buf, n);
if (n != sizeof(buf))
break;
}
}
return ret;
}
long tell() override {
return std::ftell(f.get());
}
bool skip(size_t n) override {
#if defined(_MSC_VER)
int result = _fseeki64(f.get(), (std::ptrdiff_t)n, SEEK_CUR);
#elif defined(__MINGW32__)
int result = fseeko(f.get(), (_off_t)n, SEEK_CUR);
#else
int result = std::fseek(f.get(), (long)n, SEEK_CUR);
#endif
if (result != 0) {
char buf[512];
while (n >= sizeof(buf)) {
if (std::fread(buf, sizeof(buf), 1, f.get()) != 1)
return false;
n -= sizeof(buf);
}
if (n > 0 && std::fread(buf, n, 1, f.get()) != 1)
return false;
}
return true;
}
private:
fileptr_t f;
};
struct MemoryStream final : public AnyStream {
MemoryStream(const char* start_, size_t size)
: start(start_), end(start_ + size), cur(start_) {}
char* gets(char* line, int size) override {
--size; // fgets reads in at most one less than size characters
if (cur >= end)
return nullptr;
if (size > end - cur)
size = int(end - cur);
const char* nl = (const char*) std::memchr(cur, '\n', size);
size_t len = nl ? nl - cur + 1 : size;
std::memcpy(line, cur, len);
line[len] = '\0';
cur += len;
return line;
}
int getc() override { return cur < end ? *cur++ : EOF; }
bool read(void* buf, size_t len) override {
if (cur + len > end)
return false;
std::memcpy(buf, cur, len);
cur += len;
return true;
}
std::string read_rest() override {
const char* last = cur;
cur = end;
return std::string(last, end);
}
long tell() override {
return cur - start;
}
bool skip(size_t n) override {
cur += n;
return cur < end;
}
private:
const char* const start;
const char* const end;
const char* cur;
};
class BasicInput {
public:
explicit BasicInput(const std::string& path) : path_(path) {}
const std::string& path() const { return path_; }
const std::string& basepath() const { return path_; }
// Does the path stands for stdin?
// Each reading function needs to call it (some functions use stdin
// and some std::cin, so we don't try to unify it here).
bool is_stdin() const { return path() == "-"; }
// providing the same interface as MaybeGzipped
bool is_compressed() const { return false; }
// for reading (uncompressing into memory) the whole file at once
CharArray uncompress_into_buffer(size_t=0) { return {}; }
std::unique_ptr<AnyStream> create_stream() {
return std::unique_ptr<AnyStream>(new FileStream(path().c_str(), "rb"));
}
private:
std::string path_;
};
} // namespace gemmi
#endif
+287
View File
@@ -0,0 +1,287 @@
// Copyright 2018 Global Phasing Ltd.
//
// Bidirectional iterators (over elements of any container) that can filter,
// uniquify, group, or iterate with a stride.
#ifndef GEMMI_ITERATOR_HPP_
#define GEMMI_ITERATOR_HPP_
#include <iterator> // for bidirectional_iterator_tag
#include <type_traits> // for remove_cv
#include <vector>
namespace gemmi {
// Disable warning "X<T>::operator X<T>() const will not be called for
// implicit or explicit conversions", which is triggered when templates
// StrideIter, IndirectIter and others are expanded with const Value.
#if defined(__INTEL_COMPILER) || defined(__NVCOMPILER)
#pragma diagnostic push
#pragma diag_suppress = conversion_function_not_usable
#elif defined(__NVCC__)
#pragma nv_diagnostic push
#pragma nv_diag_suppress = conversion_function_not_usable
#endif
// implements concept BidirectionalIterator
template <typename Policy>
struct BidirIterator : Policy {
using value_type = typename std::remove_cv<typename Policy::value_type>::type;
using difference_type = std::ptrdiff_t;
using pointer = typename Policy::value_type*;
using reference = typename Policy::reference;
using iterator_category = std::bidirectional_iterator_tag;
BidirIterator() = default;
BidirIterator(Policy&& p) : Policy(p) {}
BidirIterator& operator++() { Policy::increment(); return *this; }
BidirIterator operator++(int) { BidirIterator x = *this; ++*this; return x; }
BidirIterator& operator--() { Policy::decrement(); return *this; }
BidirIterator operator--(int) { BidirIterator x = *this; --*this; return x; }
bool operator==(const BidirIterator &o) const { return Policy::equal(o); }
bool operator!=(const BidirIterator &o) const { return !Policy::equal(o); }
reference operator*() { return Policy::dereference(); }
pointer operator->() { return &Policy::dereference(); }
using const_variant = BidirIterator<typename Policy::const_policy>;
operator const_variant() const {
return const_variant(static_cast<const Policy&>(*this));
}
};
template<typename Value>
class StrideIterPolicy {
public:
using value_type = Value;
using reference = Value&;
StrideIterPolicy() : cur_(nullptr), offset_(0), stride_(0) {}
StrideIterPolicy(Value* ptr, std::size_t offset, size_t stride)
: cur_(ptr), offset_(offset), stride_((unsigned)stride) {}
void increment() { cur_ += stride_; }
void decrement() { cur_ -= stride_; }
bool equal(const StrideIterPolicy& o) const { return cur_ == o.cur_; }
Value& dereference() { return cur_[offset_]; }
using const_policy = StrideIterPolicy<Value const>;
operator const_policy() const { return const_policy(cur_, offset_, stride_); }
private:
Value* cur_;
std::size_t offset_;
unsigned stride_;
};
template<typename Value>
using StrideIter = BidirIterator<StrideIterPolicy<Value>>;
template<typename Redirect, typename Value>
class IndirectIterPolicy {
public:
using value_type = Value;
using reference = Value&;
IndirectIterPolicy() : redir_(nullptr) {}
IndirectIterPolicy(Redirect* redir, std::vector<int>::const_iterator cur)
: redir_(redir), cur_(cur) {}
void increment() { ++cur_; }
void decrement() { --cur_; }
bool equal(const IndirectIterPolicy& o) const { return cur_ == o.cur_; }
Value& dereference() { return redir_->value_at(*cur_); }
using const_policy = IndirectIterPolicy<Redirect const, Value const>;
operator const_policy() const { return const_policy(redir_, cur_); }
// TODO: what should be done with absent optional tags (*cur_ < 0)?
private:
Redirect* redir_;
std::vector<int>::const_iterator cur_; // points into positions
};
template<typename Redirect, typename Value>
using IndirectIter = BidirIterator<IndirectIterPolicy<Redirect, Value>>;
template<typename Vector, typename Value>
class UniqIterPolicy {
public:
using value_type = Value;
using reference = Value&;
UniqIterPolicy() : vec_(nullptr), pos_(0) {}
UniqIterPolicy(Vector* vec, std::size_t pos) : vec_(vec), pos_(pos) {}
void increment() {
// move to the first element of the next group
const auto& key = (*vec_)[pos_].group_key();
++pos_;
while (pos_ != vec_->size() && (*vec_)[pos_].group_key() == key)
++pos_;
}
void decrement() {
--pos_; // now we are at the last element of the previous group
const auto& key = (*vec_)[pos_].group_key();
while (pos_ != 0 && (*vec_)[pos_-1].group_key() == key)
--pos_; // move to the group beginning
}
bool equal(const UniqIterPolicy& o) const { return pos_ == o.pos_; }
Value& dereference() { return (*vec_)[pos_]; }
using const_policy = UniqIterPolicy<Vector const, Value const>;
operator const_policy() const { return const_policy(vec_, pos_); }
private:
Vector* vec_;
std::size_t pos_;
};
template<typename Vector, typename Value>
using UniqIter = BidirIterator<UniqIterPolicy<Vector, Value>>;
template<typename Value, typename Vector=std::vector<Value>>
struct UniqProxy {
Vector& vec;
using iterator = UniqIter<Vector, Value>;
iterator begin() { return {{&vec, 0}}; }
iterator end() { return {{&vec, vec.size()}}; }
};
template<typename Value, typename Vector=std::vector<Value>>
struct ConstUniqProxy {
const Vector& vec;
using iterator = UniqIter<const Vector, const Value>;
iterator begin() const { return {{&vec, 0}}; }
iterator end() const { return {{&vec, vec.size()}}; }
};
template<typename Vector, typename Value>
class GroupingIterPolicy {
public:
using value_type = Value;
using reference = Value&;
GroupingIterPolicy() = default;
GroupingIterPolicy(const Value& span) : span_(span) {}
void increment() {
span_.set_begin(span_.end());
span_.set_size(0);
while (!span_.is_ending() &&
span_.begin()->group_key() == span_.end()->group_key())
span_.set_size(span_.size() + 1);
}
void decrement() {
span_.set_begin(span_.begin() - 1);
span_.set_size(1);
while (!span_.is_beginning() &&
span_.begin()->group_key() == (span_.begin() - 1)->group_key()) {
span_.set_begin(span_.begin() - 1);
span_.set_size(span_.size() + 1);
}
}
bool equal(const GroupingIterPolicy& o) const {
return span_.begin() == o.span_.begin();
}
Value& dereference() { return span_; }
using const_policy = GroupingIterPolicy<Vector const, Value const>;
operator const_policy() const { return const_policy(span_); }
private:
Value span_;
};
template<typename Vector, typename Value>
using GroupingIter = BidirIterator<GroupingIterPolicy<Vector, Value>>;
template<typename Filter, typename Vector, typename Value>
class FilterIterPolicy {
public:
using value_type = Value;
using reference = Value&;
FilterIterPolicy() : vec_(nullptr), pos_(0) {}
FilterIterPolicy(const Filter* filter, Vector* vec, std::size_t pos)
: filter_(filter), vec_(vec), pos_(pos) {
while (pos_ != vec_->size() && !matches(pos_))
++pos_;
}
bool matches(std::size_t p) const { return filter_->matches((*vec_)[p]); }
void increment() { while (++pos_ < vec_->size() && !matches(pos_)) {} }
void decrement() { while (pos_ != 0 && !matches(--pos_)) {} }
bool equal(const FilterIterPolicy& o) const { return pos_ == o.pos_; }
Value& dereference() { return (*vec_)[pos_]; }
using const_policy = FilterIterPolicy<Filter, Vector const, Value const>;
operator const_policy() const { return const_policy(vec_, pos_); }
private:
const Filter* filter_;
Vector* vec_;
std::size_t pos_;
};
template<typename Filter, typename Vector, typename Value>
using FilterIter = BidirIterator<FilterIterPolicy<Filter, Vector, Value>>;
template<typename Filter, typename Value>
struct FilterProxy {
const Filter& filter;
std::vector<Value>& vec;
using iterator = FilterIter<Filter, std::vector<Value>, Value>;
iterator begin() { return {{&filter, &vec, 0}}; }
iterator end() { return {{&filter, &vec, vec.size()}}; }
};
template<typename Filter, typename Value>
struct ConstFilterProxy {
const Filter& filter;
const std::vector<Value>& vec;
using iterator = FilterIter<Filter, const std::vector<Value>, const Value>;
iterator begin() const { return {{&filter, &vec, 0}}; }
iterator end() const { return {{&filter, &vec, vec.size()}}; }
};
template<typename Item>
struct ItemGroup {
using element_type = Item;
ItemGroup(Item* start, const Item* end)
: size_(int(end - start)), extent_(int(end - start)), start_(start) {
for (const Item* i = start + 1; i != end; ++i)
if (i->group_key() != start->group_key())
--size_;
}
struct iterator {
Item* ptr;
const Item* end;
bool operator==(const iterator& o) const { return ptr == o.ptr; }
bool operator!=(const iterator& o) const { return ptr != o.ptr; }
iterator& operator++() {
const Item* prev = ptr++;
while (ptr != end && ptr->group_key() != prev->group_key())
++ptr;
return *this;
}
Item& operator*() { return *ptr; }
Item* operator->() { return ptr; }
};
iterator begin() { return iterator{start_, start_+extent_}; }
iterator end() { return iterator{start_+extent_, start_+extent_}; }
size_t size() const { return (size_t) size_; }
int extent() const { return extent_; }
bool empty() const { return size_ == 0; }
Item& front() { return *start_; }
const Item& front() const { return *start_; }
Item& back() { return start_[extent_ - 1]; }
const Item& back() const { return start_[extent_ - 1]; }
// constant time unless sparse (extend_ > size_)
Item& operator[](std::size_t i) {
if (size_ == extent_ || i == 0)
return start_[i];
for (Item* ptr = start_ + 1; ; ++ptr)
if (ptr->group_key() == start_->group_key())
if (--i == 0)
return *ptr;
}
const Item& operator[](std::size_t i) const {
return const_cast<ItemGroup*>(this)->operator[](i);
}
private:
int size_ = 0;
int extent_ = 0;
Item* start_ = nullptr;
};
#if defined(__INTEL_COMPILER) || defined(__NVCOMPILER)
#pragma diagnostic pop
#elif defined(__NVCC__)
#pragma nv_diagnostic pop
#endif
} // namespace gemmi
#endif
+71
View File
@@ -0,0 +1,71 @@
// Copyright Global Phasing Ltd.
//
// Logger - a tiny utility for passing messages through a callback.
#ifndef GEMMI_LOGGER_HPP_
#define GEMMI_LOGGER_HPP_
#include <cstdio> // for fprintf
#include <functional> // for function
#include "fail.hpp" // for GEMMI_COLD
#include "util.hpp" // for cat
namespace gemmi {
/// Passes messages (including warnings/errors) to a callback function.
/// Messages are passed as strings without a trailing newline.
/// They have syslog-like severity levels: 8=debug, 6=info, 5=notice, 3=error,
/// allowing the use of a threshold to filter them.
/// Quirk: Errors double as both errors and warnings. Unrecoverable errors
/// don't go through this class; Logger only handles errors that can
/// be downgraded to warnings. If a callback is set, the error is passed
/// as a warning message. Otherwise, it's thrown as std::runtime_error.
struct Logger {
/// A function that handles messages.
std::function<void(const std::string&)> callback;
/// Pass messages of this level and all lower (more severe) levels:
/// 8=all, 6=all but debug, 5=notes and warnings, 3=warnings, 0=none
int threshold = 6;
/// suspend() and resume() are used internally to avoid duplicate messages
/// when the same function is called (internally) multiple times.
void suspend() { threshold -= 100; }
void resume() { threshold += 100; }
/// Send a message without any prefix on with a numeric threshold N.
template<int N, class... Args> void level(Args const&... args) const {
if (threshold >= N && callback)
callback(cat(args...));
}
/// Send a debug message.
template<class... Args> void debug(Args const&... args) const { level<8>("Debug: ", args...); }
/// Send a message without any prefix.
template<class... Args> void mesg(Args const&... args) const { level<6>(args...); }
/// Send a note (a notice, a significant message).
template<class... Args> void note(Args const&... args) const { level<5>("Note: ", args...); }
/// Send a warning/error (see Quirk above).
template<class... Args> GEMMI_COLD void err(Args const&... args) const {
if (threshold >= 3) {
std::string msg = cat(args...);
if (callback == nullptr)
fail(msg);
callback("Warning: " + msg);
}
}
// predefined callbacks
/// to be used as: logger.callback = Logger::to_stderr;
static void to_stderr(const std::string& s) {
std::fprintf(stderr, "%s\n", s.c_str());
}
/// to be used as: logger.callback = Logger::to_stdout;
static void to_stdout(const std::string& s) {
std::fprintf(stdout, "%s\n", s.c_str());
}
};
} // namespace gemmi
#endif
+600
View File
@@ -0,0 +1,600 @@
// Copyright 2019 Global Phasing Ltd.
//
// MTZ reflection file format.
#ifndef GEMMI_MTZ_HPP_
#define GEMMI_MTZ_HPP_
#include <cassert>
#include <cmath> // for isnan
#include <cstdint> // for int32_t
#include <algorithm> // for copy
#include <array>
#include <initializer_list>
#include <string>
#include <vector>
#include "fail.hpp" // for fail
#include "input.hpp" // for AnyStream, FileStream, CharArray
#include "iterator.hpp" // for StrideIter
#include "logger.hpp" // for Logger
#include "math.hpp" // for rad, Mat33
#include "symmetry.hpp" // for find_spacegroup_by_name, SpaceGroup
#include "unitcell.hpp" // for UnitCell
#include "util.hpp" // for ialpha4_id, rtrim_str, ialpha3_id, ...
namespace gemmi {
// Unmerged MTZ files always store in-asu hkl indices and symmetry operation
// encoded in the M/ISYM column. Here is a helper for writing such files.
struct UnmergedHklMover {
UnmergedHklMover(const SpaceGroup* spacegroup) : asu_(spacegroup) {
if (spacegroup)
group_ops_ = spacegroup->operations();
}
// Modifies hkl and returns ISYM value for M/ISYM
int move_to_asu(std::array<int, 3>& hkl) {
std::pair<Miller, int> hkl_isym = asu_.to_asu(hkl, group_ops_);
hkl = hkl_isym.first;
return hkl_isym.second;
}
private:
ReciprocalAsu asu_;
GroupOps group_ops_;
};
struct MtzMetadata {
std::string source_path; // input file path, if known
bool same_byte_order = true;
bool indices_switched_to_original = false;
std::int64_t header_offset = 0;
std::string version_stamp;
std::string title;
int nreflections = 0;
std::array<int, 5> sort_order = {};
double min_1_d2 = NAN;
double max_1_d2 = NAN;
float valm = NAN;
int nsymop = 0;
UnitCell cell;
int spacegroup_number = 0;
std::string spacegroup_name;
std::vector<Op> symops;
const SpaceGroup* spacegroup = nullptr;
std::vector<std::string> history;
std::string appended_text;
// used to report non-critical problems when reading a file (also used in mtz2cif)
Logger logger;
};
struct GEMMI_DLL Mtz : public MtzMetadata {
struct Dataset {
int id;
std::string project_name;
std::string crystal_name;
std::string dataset_name;
UnitCell cell;
double wavelength; // 0 means not set
};
struct Column {
int dataset_id;
char type;
std::string label;
float min_value = NAN;
float max_value = NAN;
std::string source; // from COLSRC
Mtz* parent;
std::size_t idx;
Dataset& dataset() { return parent->dataset(dataset_id); }
const Dataset& dataset() const { return parent->dataset(dataset_id); }
bool has_data() const { return parent->has_data(); }
int size() const { return has_data() ? parent->nreflections : 0; }
size_t stride() const { return parent->columns.size(); }
float& operator[](std::size_t n) { return parent->data[idx + n * stride()]; }
float operator[](std::size_t n) const { return parent->data[idx + n * stride()]; }
float& at(std::size_t n) { return parent->data.at(idx + n * stride()); }
float at(std::size_t n) const { return parent->data.at(idx + n * stride()); }
bool is_integer() const {
return type == 'H' || type == 'B' || type == 'Y' || type == 'I';
}
const Column* get_next_column_if_type(char next_type) const {
if (idx + 1 < parent->columns.size()) {
const Column& next_col = parent->columns[idx + 1];
if (next_col.dataset_id == dataset_id && next_col.type == next_type)
return &next_col;
}
return nullptr;
}
using iterator = StrideIter<float>;
iterator begin() {
assert(parent);
assert(&parent->columns[idx] == this);
return iterator({parent->data.data(), idx, stride()});
}
iterator end() {
return iterator({parent->data.data() + parent->data.size(), idx,
stride()});
}
using const_iterator = StrideIter<const float>;
const_iterator begin() const { return const_cast<Column*>(this)->begin(); }
const_iterator end() const { return const_cast<Column*>(this)->end(); }
};
struct Batch {
Batch() {
ints.resize(29, 0);
floats.resize(156, 0.);
// write the same values that are written by CCP4 progs such as COMBAT
ints[0] = 29 + 156;
ints[1] = 29;
ints[2] = 156;
// COMBAT sets BSCALE=1, but Pointless sets it to 0.
//floats[43] = 1.f; // batch scale
}
int number = 0;
std::string title;
std::vector<int> ints;
std::vector<float> floats;
std::vector<std::string> axes;
UnitCell get_cell() const {
return UnitCell(floats[0], floats[1], floats[2],
floats[3], floats[4], floats[5]);
}
void set_cell(const UnitCell& uc) {
floats[0] = (float) uc.a;
floats[1] = (float) uc.b;
floats[2] = (float) uc.c;
floats[3] = (float) uc.alpha;
floats[4] = (float) uc.beta;
floats[5] = (float) uc.gamma;
}
int dataset_id() const { return ints[20]; }
void set_dataset_id(int id) { ints[20] = id; }
float wavelength() const { return floats[86]; }
void set_wavelength(float lambda) { floats[86] = lambda; }
float phi_start() const { return floats[36]; }
float phi_end() const { return floats[37]; }
Mat33 matrix_U() const {
return Mat33(floats[6], floats[9], floats[12],
floats[7], floats[10], floats[13],
floats[8], floats[11], floats[14]);
}
};
std::vector<Dataset> datasets;
std::vector<Column> columns;
std::vector<Batch> batches;
std::vector<float> data;
explicit Mtz(bool with_base=false) {
if (with_base)
add_base();
}
Mtz(Mtz&& o) noexcept { *this = std::move(o); }
Mtz& operator=(Mtz&& o) noexcept {
MtzMetadata::operator=(std::move(o));
datasets = std::move(o.datasets);
columns = std::move(o.columns);
batches = std::move(o.batches);
data = std::move(o.data);
for (Mtz::Column& col : columns)
col.parent = this;
return *this;
}
// explicit to be aware where we make copies
explicit Mtz(const Mtz& o) : MtzMetadata(o) {
datasets = o.datasets;
columns = o.columns;
batches = o.batches;
data = o.data;
for (Mtz::Column& col : columns)
col.parent = this;
}
Mtz& operator=(Mtz const&) = delete;
void add_base() {
datasets.push_back({0, "HKL_base", "HKL_base", "HKL_base", cell, 0.});
for (int i = 0; i != 3; ++i)
add_column(std::string(1, "HKL"[i]), 'H', 0, i, false);
}
// Functions to use after MTZ headers (and data) is read.
double resolution_high() const { return std::sqrt(1.0 / max_1_d2); }
double resolution_low() const { return std::sqrt(1.0 / min_1_d2); }
UnitCell& get_cell(int dataset=-1) {
for (Dataset& ds : datasets)
if (ds.id == dataset && ds.cell.is_crystal() && ds.cell.a > 0)
return ds.cell;
return cell;
}
const UnitCell& get_cell(int dataset=-1) const {
return const_cast<Mtz*>(this)->get_cell(dataset);
}
void set_cell_for_all(const UnitCell& new_cell) {
cell = new_cell;
cell.set_cell_images_from_spacegroup(spacegroup); // probably not needed
for (Dataset& ds : datasets)
ds.cell = cell;
}
UnitCellParameters get_average_cell_from_batch_headers(double* rmsd) const;
void set_spacegroup(const SpaceGroup* new_sg) {
spacegroup = new_sg;
spacegroup_number = new_sg ? spacegroup->ccp4 : 0;
spacegroup_name = new_sg ? spacegroup->hm : "";
}
Dataset& last_dataset() {
if (datasets.empty())
fail("MTZ dataset not found (missing DATASET header line?).");
return datasets.back();
}
Dataset& dataset(int id) {
if ((size_t)id < datasets.size() && datasets[id].id == id)
return datasets[id];
for (Dataset& d : datasets)
if (d.id == id)
return d;
fail("MTZ file has no dataset with ID " + std::to_string(id));
}
const Dataset& dataset(int id) const {
return const_cast<Mtz*>(this)->dataset(id);
}
Dataset* dataset_with_name(const std::string& name) {
for (Dataset& d : datasets)
if (d.dataset_name == name)
return &d;
return nullptr;
}
const Dataset* dataset_with_name(const std::string& label) const {
return const_cast<Mtz*>(this)->dataset_with_name(label);
}
int count(const std::string& label) const {
int n = 0;
for (const Column& col : columns)
if (col.label == label)
++n;
return n;
}
int count_type(char type) const {
int n = 0;
for (const Column& col : columns)
if (col.type == type)
++n;
return n;
}
Column* column_with_label(const std::string& label, const Dataset* ds=nullptr, char type='*') {
for (Column& col : columns)
if (col.label == label && (!ds || ds->id == col.dataset_id)
&& (type == '*' || type == col.type))
return &col;
return nullptr;
}
const Column* column_with_label(const std::string& label, const Dataset* ds=nullptr,
char type='*') const {
return const_cast<Mtz*>(this)->column_with_label(label, ds, type);
}
const Column& get_column_with_label(const std::string& label, const Dataset* ds=nullptr) const {
if (const Column* col = column_with_label(label, ds))
return *col;
fail("Column label not found: " + label);
}
std::vector<const Column*> columns_with_type(char type) const {
std::vector<const Column*> cols;
for (const Column& col : columns)
if (col.type == type)
cols.push_back(&col);
return cols;
}
std::vector<int> positions_of_columns_with_type(char col_type) const {
std::vector<int> cols;
for (int i = 0; i < (int) columns.size(); ++i)
if (columns[i].type == col_type)
cols.push_back(i);
return cols;
}
// F(+)/(-) pairs should have type G (and L for sigma),
// I(+)/(-) -- K (M for sigma), but E(+)/(-) has no special column type,
// so here we use column labels not types.
std::vector<std::pair<int,int>> positions_of_plus_minus_columns() const {
std::vector<std::pair<int,int>> r;
for (int i = 0; i < (int) columns.size(); ++i) {
const Column& col = columns[i];
size_t sign_pos = col.label.find("(+)");
if (sign_pos != std::string::npos) {
std::string minus_label = columns[i].label;
minus_label[sign_pos+1] = '-';
for (int j = 0; j < (int) columns.size(); ++j)
if (columns[j].label == minus_label &&
columns[j].type == col.type &&
columns[j].dataset_id == col.dataset_id) {
r.emplace_back(i, j);
break;
}
}
}
return r;
}
/// the order of labels matters
const Column* column_with_one_of_labels(std::initializer_list<const char*> labels,
char type='*') const {
for (const char* label : labels)
if (const Column* col = column_with_label(label, nullptr, type))
return col;
return nullptr;
}
/// the order of labels doesn't matter
Column* column_with_type_and_any_of_labels(char type, std::initializer_list<const char*> labels) {
for (Column& col : columns)
if (col.type == type) {
for (const char* label : labels)
if (col.label == label)
return &col;
}
return nullptr;
}
Column* rfree_column() {
// cf. MtzToCif::default_spec in mtz2cif.hpp
return column_with_type_and_any_of_labels('I',
{"FREE", "RFREE", "FREER", "FreeR_flag", "R-free-flags", "FreeRflag", "R_FREE_FLAGS"});
}
const Column* rfree_column() const {
return const_cast<Mtz*>(this)->rfree_column();
}
Column* imean_column() {
return column_with_type_and_any_of_labels('J', {"IMEAN", "I", "IOBS", "I-obs"});
}
const Column* imean_column() const {
return const_cast<Mtz*>(this)->imean_column();
}
Column* iplus_column() {
return column_with_type_and_any_of_labels('K', {"I(+)", "IOBS(+)", "I-obs(+)", "Iplus"});
}
const Column* iplus_column() const {
return const_cast<Mtz*>(this)->iplus_column();
}
Column* iminus_column() {
return column_with_type_and_any_of_labels('K', {"I(-)", "IOBS(-)", "I-obs(-)", "Iminus"});
}
const Column* iminus_column() const {
return const_cast<Mtz*>(this)->iminus_column();
}
bool has_data() const {
return data.size() == columns.size() * nreflections;
}
bool is_merged() const { return batches.empty(); }
/// Calculates min/max for all combinations of reflections and unit cells,
/// where unit cells are a global CELL and per-dataset DCELL.
std::array<double,2> calculate_min_max_1_d2() const;
void update_reso() {
std::array<double,2> reso = calculate_min_max_1_d2();
min_1_d2 = reso[0];
max_1_d2 = reso[1];
}
// Functions for reading MTZ headers and data.
void toggle_endianness() {
same_byte_order = !same_byte_order;
swap_eight_bytes(&header_offset);
}
void read_first_bytes(AnyStream& stream);
/// read headers until END
void read_main_headers(AnyStream& stream, std::vector<std::string>* save_headers);
/// read the part between END and MTZENDOFHEADERS
void read_history_and_batch_headers(AnyStream& stream);
void setup_spacegroup();
void read_raw_data(AnyStream& stream, bool do_read=true);
void read_all_headers(AnyStream& stream);
void read_stream(AnyStream& stream, bool with_data);
void read_file(const std::string& path) {
try {
source_path = path;
FileStream stream(path.c_str(), "rb");
read_stream(stream, true);
} catch (std::system_error&) {
throw; // system_error::what() includes path, don't add anything
} catch (std::runtime_error& e) {
fail(std::string(e.what()) + ": " + path);
}
}
template<typename Input>
void read_input(Input&& input, bool with_data) {
source_path = input.path();
read_stream(*input.create_stream(), with_data);
}
/// the same as read_input(MaybeGzipped(path), with_data)
void read_file_gz(const std::string& path, bool with_data=true);
std::vector<int> sorted_row_indices(int use_first=3) const;
bool sort(int use_first=3);
Miller get_hkl(size_t offset) const {
return {{(int)data[offset], (int)data[offset+1], (int)data[offset+2]}};
}
void set_hkl(size_t offset, const Miller& hkl) {
for (int i = 0; i != 3; ++i)
data[offset + i] = static_cast<float>(hkl[i]);
}
/// Returns offset of the first hkl or (size_t)-1. Can be slow.
size_t find_offset_of_hkl(const Miller& hkl, size_t start=0) const;
/// (for merged MTZ only) change HKL to ASU equivalent, adjust phases, etc
void ensure_asu(bool tnt_asu=false);
/// Reindex data, usually followed by ensure_asu(). Outputs messages through logger.
void reindex(const Op& op);
/// Change symmetry to P1 and expand reflections. Does not sort.
/// Similar to command EXPAND in SFTOOLS.
void expand_to_p1();
/// (for unmerged MTZ only) change HKL according to M/ISYM
bool switch_to_original_hkl();
/// (for unmerged MTZ only) change HKL to ASU equivalent and set ISYM
bool switch_to_asu_hkl();
Dataset& add_dataset(const std::string& name) {
int id = 0;
for (const Dataset& d : datasets)
if (d.id >= id)
id = d.id + 1;
datasets.push_back({id, name, name, name, cell, 0.0});
return datasets.back();
}
Column& add_column(const std::string& label, char type,
int dataset_id, int pos, bool expand_data);
// extra_col are columns right after src_col that are also copied.
Column& replace_column(size_t dest_idx, const Column& src_col,
const std::vector<std::string>& trailing_cols={});
// If dest_idx < 0 - columns are appended at the end
// append new column(s), otherwise overwrite existing ones.
Column& copy_column(int dest_idx, const Column& src_col,
const std::vector<std::string>& trailing_cols={});
void remove_column(size_t idx);
template <typename Func>
void remove_rows_if(Func condition) {
if (!has_data())
fail("No data.");
auto out = data.begin();
size_t width = columns.size();
for (auto r = data.begin(); r < data.end(); r += width)
if (!condition(&*r)) {
if (r != out)
std::copy(r, r + width, out);
out += width;
}
data.erase(out, data.end());
nreflections = int(data.size() / width);
}
void expand_data_rows(size_t added, int pos_=-1) {
size_t old_row_size = columns.size() - added;
if (data.size() != old_row_size * nreflections)
fail("Internal error");
size_t pos = pos_ == -1 ? old_row_size : (size_t) pos_;
if (pos > old_row_size)
fail("expand_data_rows(): pos out of range");
vector_insert_columns(data, old_row_size, (size_t)nreflections, added, pos, NAN);
}
void set_data(const float* new_data, size_t n) {
size_t ncols = columns.size();
if (n % ncols != 0)
fail("Mtz.set_data(): expected " + std::to_string(ncols) + " columns.");
nreflections = int(n / ncols);
data.assign(new_data, new_data + n);
}
// Function for writing MTZ file
void write_to_cstream(std::FILE* stream) const;
void write_to_string(std::string& str) const;
void write_to_file(const std::string& path) const;
size_t size_to_write() const;
size_t write_to_buffer(char* buf, size_t maxlen) const;
private:
template<typename Write> void write_to_stream(Write write) const;
};
inline Mtz read_mtz_file(const std::string& path) {
Mtz mtz;
mtz.read_file(path);
return mtz;
}
template<typename Input>
Mtz read_mtz(Input&& input, bool with_data) {
Mtz mtz;
mtz.read_input(std::forward<Input>(input), with_data);
return mtz;
}
// Abstraction of data source, cf. ReflnDataProxy.
struct MtzDataProxy {
const Mtz& mtz_;
size_t stride() const { return mtz_.columns.size(); }
size_t size() const { return mtz_.data.size(); }
using num_type = float;
float get_num(size_t n) const { return mtz_.data[n]; }
const UnitCell& unit_cell() const { return mtz_.cell; }
const SpaceGroup* spacegroup() const { return mtz_.spacegroup; }
Miller get_hkl(size_t offset) const { return mtz_.get_hkl(offset); }
size_t column_index(const std::string& label) const {
if (const Mtz::Column* col = mtz_.column_with_label(label))
return col->idx;
fail("MTZ file has no column with label: " + label);
}
};
// Like above, but here the data is stored outside of the Mtz class
struct MtzExternalDataProxy : MtzDataProxy {
const float* data_;
MtzExternalDataProxy(const Mtz& mtz, const float* data)
: MtzDataProxy{mtz}, data_(data) {}
size_t size() const { return mtz_.columns.size() * mtz_.nreflections; }
float get_num(size_t n) const { return data_[n]; }
Miller get_hkl(size_t offset) const {
return {{(int)data_[offset + 0],
(int)data_[offset + 1],
(int)data_[offset + 2]}};
}
};
inline MtzDataProxy data_proxy(const Mtz& mtz) { return {mtz}; }
} // namespace gemmi
#endif
+80
View File
@@ -0,0 +1,80 @@
// Copyright 2017 Global Phasing Ltd.
//
// interface to stb_sprintf: snprintf_z, to_str(float|double)
#ifndef GEMMI_SPRINTF_HPP_
#define GEMMI_SPRINTF_HPP_
#include <string>
#ifdef __has_include
# if __has_include(<charconv>) && !(defined(_MSVC_LANG) && _MSVC_LANG < 201703L)
# include <charconv>
# endif
#endif
#if __cpp_lib_to_chars < 201611L
# include <algorithm> // for min
#endif
#include "fail.hpp" // for GEMMI_DLL
namespace gemmi {
// On MinGW format(printf) doesn't support %zu.
#if (defined(__GNUC__) && !defined(__MINGW32__)) || defined(__clang__)
# define GEMMI_ATTRIBUTE_FORMAT(fmt,va) __attribute__((format(printf,fmt,va)))
#else
# define GEMMI_ATTRIBUTE_FORMAT(fmt,va)
#endif
/// stb_snprintf in gemmi namespace - like snprintf, but ignores locale
/// and is always zero-terminated (hence _z).
GEMMI_DLL int snprintf_z(char *buf, int count, char const *fmt, ...)
GEMMI_ATTRIBUTE_FORMAT(3,4);
/// stb_sprintf in gemmi namespace
GEMMI_DLL int sprintf_z(char *buf, char const *fmt, ...) GEMMI_ATTRIBUTE_FORMAT(2,3);
inline std::string to_str(double d) {
char buf[24];
int len = sprintf_z(buf, "%.9g", d);
return std::string(buf, len > 0 ? len : 0);
}
inline std::string to_str(float d) {
char buf[16];
int len = sprintf_z(buf, "%.6g", d);
return std::string(buf, len > 0 ? len : 0);
}
template<int Prec>
std::string to_str_prec(double d) {
static_assert(Prec >= 0 && Prec < 7, "unsupported precision");
char buf[16];
int len = d > -1e8 && d < 1e8 ? sprintf_z(buf, "%.*f", Prec, d)
: sprintf_z(buf, "%g", d);
return std::string(buf, len > 0 ? len : 0);
}
/// zero-terminated to_chars()
inline char* to_chars_z(char* first, char* last, int value) {
#if __cpp_lib_to_chars >= 201611L
auto result = std::to_chars(first, last-1, value);
*result.ptr = '\0';
return result.ptr;
#else
int n = snprintf_z(first, int(last - first), "%d", value);
return std::min(first + n, last - 1);
#endif
}
inline char* to_chars_z(char* first, char* last, size_t value) {
#if __cpp_lib_to_chars >= 201611L
auto result = std::to_chars(first, last-1, value);
*result.ptr = '\0';
return result.ptr;
#else
int n = snprintf_z(first, int(last - first), "%zu", value);
return std::min(first + n, last - 1);
#endif
}
} // namespace gemmi
#endif
File diff suppressed because it is too large Load Diff
+315
View File
@@ -0,0 +1,315 @@
// Copyright 2017 Global Phasing Ltd.
//
// Utilities. Mostly for working with strings and vectors.
#ifndef GEMMI_UTIL_HPP_
#define GEMMI_UTIL_HPP_
#include <cassert>
#include <cctype> // for isspace
#include <cstring> // for strncmp
#include <algorithm> // for equal, find, remove_if
#include <iterator> // for begin, end, make_move_iterator
#include <string>
#include <vector>
namespace gemmi {
// ##### string helpers #####
inline void append_to_str(std::string& out, int v) { out += std::to_string(v); }
inline void append_to_str(std::string& out, size_t v) { out += std::to_string(v); }
void append_to_str(std::string& out, double) = delete;
template<typename T>
void append_to_str(std::string& out, const T& v) { out += v; }
inline void cat_to(std::string&) {}
template <typename T, typename... Args>
void cat_to(std::string& out, const T& value, Args const&... args) {
append_to_str(out, value);
cat_to(out, args...);
}
template <class... Args>
std::string cat(Args const&... args) {
std::string out;
cat_to(out, args...);
return out;
}
inline bool starts_with(const std::string& str, const std::string& prefix) {
size_t sl = prefix.length();
return str.length() >= sl && str.compare(0, sl, prefix) == 0;
}
template<size_t N> bool starts_with(const char* a, const char (&b)[N]) {
return std::strncmp(a, b, N-1) == 0;
}
inline bool ends_with(const std::string& str, const std::string& suffix) {
size_t sl = suffix.length();
return str.length() >= sl && str.compare(str.length() - sl, sl, suffix) == 0;
}
// can be faster than std::tolower() b/c it takes char not int
inline char lower(char c) {
if (c >= 'A' && c <= 'Z')
return c | 0x20;
return c;
}
// works as expected only for a-zA-Z
inline char alpha_up(char c) { return c & ~0x20; }
inline std::string to_lower(std::string str) {
for (char& c : str)
if (c >= 'A' && c <= 'Z')
c |= 0x20;
return str;
}
inline std::string to_upper(std::string str) {
for (char& c : str)
if (c >= 'a' && c <= 'z')
c &= ~0x20;
return str;
}
// case-insensitive character comparison
inline bool isame(char a, char b) {
return a == b || ((a^b) == 0x20 && (a|0x20) >= 'a' && (a|0x20) <= 'z');
}
// Case-insensitive comparisons. The second arg must be lowercase.
inline bool iequal_from(const std::string& str, size_t offset, const std::string& low) {
return str.length() == low.length() + offset &&
std::equal(std::begin(low), std::end(low), str.begin() + offset,
[](char c1, char c2) { return c1 == lower(c2); });
}
inline bool iequal(const std::string& str, const std::string& low) {
return iequal_from(str, 0, low);
}
inline bool istarts_with(const std::string& str, const std::string& prefix) {
return str.length() >= prefix.length() &&
std::equal(std::begin(prefix), std::end(prefix), str.begin(),
[](char c1, char c2) { return c1 == lower(c2); });
}
inline bool iends_with(const std::string& str, const std::string& suffix) {
size_t sl = suffix.length();
return str.length() >= sl &&
std::equal(std::begin(suffix), std::end(suffix), str.end() - sl,
[](char c1, char c2) { return c1 == lower(c2); });
}
inline bool giends_with(const std::string& str, const std::string& suffix) {
return iends_with(str, suffix) || iends_with(str, suffix + ".gz");
}
inline std::string trim_str(const std::string& str) {
const std::string ws = " \r\n\t";
std::string::size_type first = str.find_first_not_of(ws);
if (first == std::string::npos)
return std::string{};
std::string::size_type last = str.find_last_not_of(ws);
return str.substr(first, last - first + 1);
}
inline std::string rtrim_str(const std::string& str) {
std::string::size_type last = str.find_last_not_of(" \r\n\t");
return str.substr(0, last == std::string::npos ? 0 : last + 1);
}
// end is after the last character of the string (typically \0)
inline const char* rtrim_cstr(const char* start, const char* end=nullptr) {
if (!start)
return nullptr;
if (!end) {
end = start;
while (*end != '\0')
++end;
}
while (end > start && std::isspace(end[-1]))
--end;
return end;
}
namespace impl {
inline size_t length(char) { return 1; }
inline size_t length(const std::string& s) { return s.length(); }
}
// takes a single separator (usually char or string);
// may return empty fields
template<typename S>
void split_str_into(const std::string& str, S sep,
std::vector<std::string>& result) {
std::size_t start = 0, end;
while ((end = str.find(sep, start)) != std::string::npos) {
result.emplace_back(str, start, end - start);
start = end + impl::length(sep);
}
result.emplace_back(str, start);
}
template<typename S>
std::vector<std::string> split_str(const std::string& str, S sep) {
std::vector<std::string> result;
split_str_into(str, sep, result);
return result;
}
// _multi variants takes multiple 1-char separators as a string;
// discards empty fields
inline void split_str_into_multi(const std::string& str, const char* seps,
std::vector<std::string>& result) {
std::size_t start = str.find_first_not_of(seps);
while (start != std::string::npos) {
std::size_t end = str.find_first_of(seps, start);
result.emplace_back(str, start, end - start);
start = str.find_first_not_of(seps, end);
}
}
inline std::vector<std::string> split_str_multi(const std::string& str,
const char* seps=" \t") {
std::vector<std::string> result;
split_str_into_multi(str, seps, result);
return result;
}
template<typename T, typename S, typename F>
std::string join_str(T begin, T end, const S& sep, const F& getter) {
std::string r;
bool first = true;
for (T i = begin; i != end; ++i) {
if (!first)
r += sep;
r += getter(*i);
first = false;
}
return r;
}
template<typename T, typename S>
std::string join_str(T begin, T end, const S& sep) {
return join_str(begin, end, sep, [](const std::string& t) { return t; });
}
template<typename T, typename S, typename F>
std::string join_str(const T& iterable, const S& sep, const F& getter) {
return join_str(iterable.begin(), iterable.end(), sep, getter);
}
template<typename T, typename S>
std::string join_str(const T& iterable, const S& sep) {
return join_str(iterable.begin(), iterable.end(), sep);
}
template<typename T, typename S>
void string_append_sep(std::string& str, S sep, const T& item) {
if (!str.empty())
str += sep;
str += item;
}
inline void replace_all(std::string &s,
const std::string &old, const std::string &new_) {
std::string::size_type pos = 0;
while ((pos = s.find(old, pos)) != std::string::npos) {
s.replace(pos, old.size(), new_);
pos += new_.size();
}
}
// list is a comma separated string
inline bool is_in_list(const std::string& name, const std::string& list,
char sep=',') {
if (name.length() >= list.length())
return name == list;
for (size_t start=0, end=0; end != std::string::npos; start=end+1) {
end = list.find(sep, start);
if (list.compare(start, end - start, name) == 0)
return true;
}
return false;
}
// ##### vector helpers #####
template <class T>
bool in_vector(const T& x, const std::vector<T>& v) {
return std::find(v.begin(), v.end(), x) != v.end();
}
template <typename F, typename T>
bool in_vector_f(F f, const std::vector<T>& v) {
return std::find_if(v.begin(), v.end(), f) != v.end();
}
template <class T>
T* vector_end_ptr(std::vector<T>& v) { return v.data() + v.size(); }
template <class T>
const T* vector_end_ptr(const std::vector<T>& v) { return v.data() + v.size(); }
template <class T>
void vector_move_extend(std::vector<T>& dst, std::vector<T>&& src) {
if (dst.empty())
dst = std::move(src);
else
dst.insert(dst.end(), std::make_move_iterator(src.begin()),
std::make_move_iterator(src.end()));
}
// wrapper around the erase-remove idiom
template <class T, typename F>
void vector_remove_if(std::vector<T>& v, F&& condition) {
v.erase(std::remove_if(v.begin(), v.end(), condition), v.end());
}
/// \par data - 2d array (old_width x length) in a vector
/// Insert \par n new columns at position pos.
template <class T>
void vector_insert_columns(std::vector<T>& data, size_t old_width,
size_t length, size_t n, size_t pos, const T& new_value) {
assert(data.size() == old_width * length);
assert(pos <= old_width);
data.resize(data.size() + n * length);
typename std::vector<T>::iterator dst = data.end();
for (size_t i = length; i-- != 0; ) {
for (size_t j = old_width; j-- != pos; )
*--dst = data[i * old_width + j];
for (size_t j = n; j-- != 0; )
*--dst = new_value;
for (size_t j = pos; j-- != 0; )
*--dst = data[i * old_width + j];
}
assert(dst == data.begin());
}
/// \par data - 2d array with new_width+1 columns, in a vector
/// Remove column at position pos.
template <class T>
void vector_remove_column(std::vector<T>& data, size_t new_width, size_t pos) {
assert(pos <= new_width);
for (size_t source = pos + 1; source < data.size(); ++source)
for (size_t i = 0; i < new_width && source < data.size(); ++i)
data[pos++] = data[source++];
data.resize(pos);
}
// ##### other helpers #####
// Numeric ID used for case-insensitive comparison of 4 letters.
// s must have 4 chars or 3 chars + NUL, ' ' and NUL are equivalent in s.
constexpr int ialpha4_id(const char* s) {
return (s[0] << 24 | s[1] << 16 | s[2] << 8 | s[3]) & ~0x20202020;
}
// Numeric ID used for case-insensitive comparison of 3 letters.
constexpr int ialpha3_id(const char* s) {
return (s[0] << 16 | s[1] << 8 | s[2]) & ~0x20202020;
}
} // namespace gemmi
#endif
+183
View File
@@ -0,0 +1,183 @@
// Copyright 2020 Global Phasing Ltd.
//
// Read XDS files: XDS_ASCII.HKL and INTEGRATE.HKL.
#ifndef GEMMI_XDS_ASCII_HPP_
#define GEMMI_XDS_ASCII_HPP_
#include "input.hpp" // for AnyStream, FileStream
#include "unitcell.hpp" // for UnitCell
#include "util.hpp" // for starts_with
namespace gemmi {
// from Pointless docs: likely in-house source, in which case
// the unpolarised value is left unchanged (recognised wavelengths
// are CuKalpha 1.5418 +- 0.0019, Mo 0.7107 +- 0.0002, Cr 2.29 +- 0.01)
inline bool likely_in_house_source(double wavelength) {
return std::fabs(wavelength - 1.5418) < 0.0019 ||
std::fabs(wavelength - 0.7107) < 0.0002 ||
std::fabs(wavelength - 2.29) < 0.01;
}
struct XdsAsciiMetadata {
struct Iset {
int id;
std::string input_file;
double wavelength = 0.;
std::array<double,6> cell_constants = {0., 0., 0., 0., 0., 0.};
//statistics set by gather_iset_statistics()
int frame_number_min = -1;
int frame_number_max = -1;
int frame_count = -1;
int reflection_count = -1;
Iset(int id_) : id(id_) {}
};
std::string source_path;
int read_columns = 0; // doesn't include ITEM_ISET from XSCALE
int spacegroup_number = 0;
double wavelength = 0.;
std::array<double,6> cell_constants = {0., 0., 0., 0., 0., 0.};
Mat33 cell_axes{0.};
Vec3 incident_beam_dir;
double oscillation_range = 0.;
Vec3 rotation_axis;
double starting_angle = 0.;
double reflecting_range_esd = 0.;
char friedels_law = '\0';
int starting_frame = 1;
int nx = 0; // detector size - number of pixels
int ny = 0;
double qx = 0.; // pixel size in mm
double qy = 0.;
double orgx = 0.;
double orgy = 0.;
double detector_distance = 0.;
std::string generated_by;
std::string version_str;
std::vector<Iset> isets;
};
struct GEMMI_DLL XdsAscii : XdsAsciiMetadata {
struct Refl {
Miller hkl;
int iset = 1;
double iobs;
double sigma;
double xd;
double yd;
double zd;
double rlp;
double peak;
double corr; // is it always integer?
double maxc;
// ZD can be negative for a few reflections
int frame() const { return (int) std::floor(zd + 1); }
};
std::vector<Refl> data;
XdsAscii() = default;
XdsAscii(const XdsAsciiMetadata& m) : XdsAsciiMetadata(m) {}
Iset& find_or_add_iset(int id) {
for (Iset& i : isets)
if (i.id == id)
return i;
isets.emplace_back(id);
return isets.back();
}
void read_stream(AnyStream& reader, const std::string& source);
template<typename T>
void read_input(T&& input) {
read_stream(*input.create_stream(), input.path());
}
bool is_merged() const { return read_columns < 8; }
// set a few Iset properties in isets
void gather_iset_statistics();
double rot_angle(const Refl& refl) const {
double z = refl.zd - starting_frame + 1;
return starting_angle + oscillation_range * z;
}
// it's already normalized, but just in case normalize it again
Vec3 get_rotation_axis() const {
double length = rotation_axis.length();
if (length == 0)
fail("unknown rotation axis");
return rotation_axis / length;
}
// I'm not sure if always |incident_beam_dir| == 1/wavelength
Vec3 get_s0_direction() const {
double length = incident_beam_dir.length();
if (length == 0)
fail("unknown incident beam direction");
return incident_beam_dir / length;
}
bool has_cell_axes() const {
for (int i = 0; i < 3; ++i)
if (cell_axes[i][0] == 0 && cell_axes[i][1] == 0 && cell_axes[i][2] == 0)
return false;
return true;
}
/// Return transition matrix from "Cambridge" frame to XDS frame.
/// x_xds = M x_cam
Mat33 calculate_conversion_from_cambridge() const {
// Cambridge z direction is along the principal rotation axis
Vec3 z = get_rotation_axis();
// Cambridge z direction is along beam
Vec3 x = get_s0_direction();
Vec3 y = z.cross(x).normalized();
// beam and rotation axis may not be orthogonal
x = y.cross(z).normalized();
return Mat33::from_columns(x, y, z);
}
Mat33 get_orientation() const {
if (!has_cell_axes())
fail("unknown unit cell axes");
Vec3 a = cell_axes.row_copy(0);
Vec3 b = cell_axes.row_copy(1);
Vec3 c = cell_axes.row_copy(2);
Vec3 ar = b.cross(c).normalized();
Vec3 br = c.cross(a);
Vec3 cr = ar.cross(br).normalized();
br = cr.cross(ar);
return Mat33::from_columns(ar, br, cr);
}
/// \par p is degree of polarization from range (0,1), as used in XDS.
void apply_polarization_correction(double p, Vec3 normal);
/// \par overload is maximally allowed pixel value in a peak (MAXC).
void eliminate_overloads(double overload) {
vector_remove_if(data, [&](Refl& r) { return r.maxc > overload; });
}
/// \par batchmin lowest allowed batch number.
void eliminate_batchmin(int batchmin) {
double minz = batchmin - 1;
vector_remove_if(data, [&](Refl& r) { return r.zd < minz; });
}
};
inline XdsAscii read_xds_ascii_file(const std::string& path) {
XdsAscii ret;
FileStream stream(path.c_str(), "rb");
ret.read_stream(stream, path);
return ret;
}
/// read possibly gzipped file
GEMMI_DLL XdsAscii read_xds_ascii(const std::string& path);
} // namespace gemmi
#endif
+189
View File
@@ -0,0 +1,189 @@
// Copyright Global Phasing Ltd.
#include <gemmi/gz.hpp>
#include <cassert>
#include <cstdio> // fseek, ftell, fread
#include <climits> // INT_MAX
#if USE_ZLIB_NG
# define WITH_GZFILEOP 1
# include <zlib-ng.h>
# define GG(name) zng_ ## name
#else
# include <zlib.h>
# define GG(name) name
#endif
#include <gemmi/fileutil.hpp> // file_open
namespace gemmi {
const char* const zlib_description =
#if USE_ZLIB_NG
"zlib-ng " ZLIBNG_VERSION;
#else
"zlib " ZLIB_VERSION;
#endif
// Throws if the size is not found or if it is suspicious.
// Anything outside of the arbitrary limits from 1 to 10x of the compressed
// size looks suspicious to us.
// **This function should not be relied upon.**
// In particular, if the return values is >= 4GiB - it's only a guess.
size_t estimate_uncompressed_size(const std::string& path) {
fileptr_t f = file_open(path.c_str(), "rb");
unsigned char buf[4];
if (std::fread(buf, 1, 2, f.get()) != 2)
sys_fail("Failed to read: " + path);
if (buf[0] != 0x1f || buf[1] != 0x8b)
fail("File not in the gzip format: " + path);
if (std::fseek(f.get(), -4, SEEK_END) != 0)
sys_fail("fseek() failed (empty file?): " + path);
long pos = std::ftell(f.get());
if (pos <= 0)
sys_fail("ftell() failed on " + path);
size_t gzipped_size = pos + 4;
if (std::fread(buf, 1, 4, f.get()) != 4)
sys_fail("Failed to read last 4 bytes of: " + path);
unsigned orig_size = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0];
if (orig_size + 100 < gzipped_size || orig_size > 100 * gzipped_size) {
// The size is stored as 32-bit number. If the original size exceeds 4GiB,
// the stored number is modulo 4 GiB. So we just guess...
constexpr size_t max_uint = 4294967295U;
if (gzipped_size > max_uint / 6)
return max_uint + (sizeof(size_t) > 4 ? orig_size : 0);
fail("Cannot determine uncompressed size of " + path +
"\nWould it be " + std::to_string(gzipped_size) + " -> " +
std::to_string(orig_size) + " bytes?");
}
return orig_size;
}
static size_t big_gzread(gzFile file, void* buf, size_t len) {
#if USE_ZLIB_NG
return GG(gzfread)(buf, 1, len, file);
#else
// In zlib >= 1.2.9 we could use gzfread()
size_t read_bytes = 0;
while (len > INT_MAX) {
int ret = gzread(file, buf, INT_MAX);
read_bytes += ret;
if (ret != INT_MAX)
return read_bytes;
len -= INT_MAX;
buf = (char*) buf + INT_MAX;
}
read_bytes += gzread(file, buf, (unsigned) len);
return read_bytes;
#endif
}
char* GzStream::gets(char* line, int size) {
return GG(gzgets)((gzFile)f, line, size);
}
int GzStream::getc() {
return GG(gzgetc)((gzFile)f);
}
bool GzStream::read(void* buf, size_t len) {
return big_gzread((gzFile)f, buf, len) == len;
}
bool GzStream::skip(size_t n) {
return GG(gzseek)((gzFile)f, n, SEEK_CUR) != -1;
}
long GzStream::tell() {
return GG(gztell)((gzFile)f);
}
std::string GzStream::read_rest() {
std::string retval;
int c = getc();
if (c != EOF) {
retval += (char)c;
char buf[512];
for (;;) {
size_t n = big_gzread((gzFile)f, buf, sizeof(buf));
retval.append(buf, n);
if (n != sizeof(buf))
break;
}
}
return retval;
}
MaybeGzipped::MaybeGzipped(const std::string& path) : BasicInput(path) {}
MaybeGzipped::~MaybeGzipped() {
if (file_)
#if USE_ZLIB_NG || (ZLIB_VERNUM >= 0x1235)
GG(gzclose_r)((gzFile)file_);
#else
gzclose((gzFile)file_);
#endif
}
size_t MaybeGzipped::gzread_checked(void* buf, size_t len) {
gzFile file = (gzFile) file_;
size_t read_bytes = big_gzread(file, buf, len);
if (read_bytes != len && !GG(gzeof)(file)) {
int errnum = 0;
std::string err_str = GG(gzerror)(file, &errnum);
if (errnum == Z_ERRNO)
sys_fail("failed to read " + path());
if (errnum)
fail("Error reading " + path() + ": " + err_str);
}
if (read_bytes > len) // should never happen
fail("Error reading " + path());
return read_bytes;
}
CharArray MaybeGzipped::uncompress_into_buffer(size_t limit) {
if (!is_compressed())
return BasicInput::uncompress_into_buffer();
size_t size = (limit == 0 ? estimate_uncompressed_size(path()) : limit);
file_ = GG(gzopen)(path().c_str(), "rb");
if (!file_)
sys_fail("Failed to gzopen " + path());
if (size > 3221225471)
// if this exception is changed adjust prog/cif2mtz.cpp
fail("For now gz files above 3 GiB uncompressed are not supported.\n"
"To read " + path() + " first uncompress it.");
CharArray mem(size);
size_t read_bytes = gzread_checked(mem.data(), size);
// if the file is shorter than the size from header, adjust size
if (read_bytes < size) {
mem.set_size(read_bytes); // should we call resize() here
} else if (limit == 0) { // read_bytes == size
// if the file is longer than the size from header, read in the rest
int next_char;
while (!GG(gzeof)((gzFile)file_) && (next_char = GG(gzgetc)((gzFile)file_)) != -1) {
if (mem.size() > 3221225471)
fail("For now gz files above 3 GiB uncompressed are not supported.\n"
"To read " + path() + " first uncompress it.");
GG(gzungetc)(next_char, (gzFile)file_);
size_t old_size = mem.size();
mem.resize(2 * old_size);
size_t n = gzread_checked(mem.data() + old_size, old_size);
mem.set_size(old_size + n);
}
}
return mem;
}
std::unique_ptr<AnyStream> MaybeGzipped::create_stream() {
if (is_compressed()) {
file_ = GG(gzopen)(path().c_str(), "rb");
if (!file_)
sys_fail("Failed to gzopen " + path());
#if ZLIB_VERNUM >= 0x1235
GG(gzbuffer)((gzFile)file_, 64*1024);
#endif
return std::unique_ptr<AnyStream>(new GzStream(file_));
}
return BasicInput::create_stream();
}
} // namespace gemmi
+991
View File
@@ -0,0 +1,991 @@
// Copyright 2019-2023 Global Phasing Ltd.
#include <gemmi/mtz.hpp>
#include <cstring> // for memcpy
#include <algorithm> // for stable_sort
#include <gemmi/atof.hpp> // for fast_atof
#include <gemmi/atox.hpp> // for simple_atoi, read_word
#include <gemmi/gz.hpp>
#include <gemmi/sprintf.hpp>
namespace gemmi {
namespace {
double wrap_degrees(double phi) {
if (phi >= 0 && phi < 360.)
return phi;
return phi - std::floor(phi / 360.) * 360.;
}
void shift_phase(float& phi, double shift, bool negate=false) {
double phi_ = phi + deg(shift);
phi = float(wrap_degrees(negate ? -phi_ : phi_));
}
// apply phase shift to HendricksonLattman coefficients HLA, HLB, HLC and HLD
void shift_hl_coefficients(float& a, float& b, float& c, float& d,
double shift, bool negate=false) {
double sinx = std::sin(shift);
double cosx = std::cos(shift);
double sin2x = 2 * sinx * cosx;
double cos2x = sq(cosx)- sq(sinx);
// a sin(x+y) + b cos(x+y) = a sin(x) cos(y) - b sin(x) sin(y)
// + a cos(x) sin(y) + b cos(x) cos(y)
float a_ = float(a * cosx - b * sinx);
float b_ = float(a * sinx + b * cosx);
float c_ = float(c * cos2x - d * sin2x);
float d_ = float(c * sin2x + d * cos2x);
a = a_; // cos(phi)
b = negate ? -b_ : b_; // sin(phi)
c = c_; // cos(2 phi)
d = negate ? -d_ : d_; // sin(2 phi)
}
// this function is generic because it was used in other places in the past
template <typename T, typename FP=typename std::iterator_traits<T>::value_type>
std::array<FP,2> calculate_min_max_disregarding_nans(T begin, T end) {
std::array<FP,2> minmax = {{NAN, NAN}};
T i = begin;
while (i != end && std::isnan(*i))
++i;
if (i != end) {
minmax[0] = minmax[1] = *i;
while (++i != end) {
if (*i < minmax[0])
minmax[0] = *i;
else if (*i > minmax[1])
minmax[1] = *i;
}
}
return minmax;
}
const char* skip_word_and_space(const char* line) {
while (*line != '\0' && !std::isspace(*line))
++line;
while (std::isspace(*line))
++line;
return line;
}
UnitCell read_cell_parameters(const char* line) {
double a = fast_atof(line, &line);
double b = fast_atof(line, &line);
double c = fast_atof(line, &line);
double alpha = fast_atof(line, &line);
double beta = fast_atof(line, &line);
double gamma = fast_atof(line, &line);
return UnitCell(a, b, c, alpha, beta, gamma);
}
} // anonymous namespace
UnitCellParameters Mtz::get_average_cell_from_batch_headers(double* rmsd) const {
if (rmsd)
for (int i = 0; i < 6; ++i)
rmsd[i] = 0.;
std::array<double, 6> avg = {0., 0., 0., 0., 0., 0.};
for (const Batch& batch : batches)
for (int i = 0; i < 6; ++i) {
// if batch headers are not set correctly, return global cell
if (batch.floats[i] <= 0)
return cell;
avg[i] += batch.floats[i];
}
if (avg[0] <= 0 || avg[1] <= 0 || avg[2] <= 0 ||
avg[3] <= 0 || avg[4] <= 0 || avg[5] <= 0)
return UnitCellParameters();
size_t n = batches.size();
for (int i = 0; i < 6; ++i)
avg[i] /= n;
if (rmsd) {
for (const Batch& batch : batches)
for (int i = 0; i < 6; ++i)
rmsd[i] += sq(avg[i] - batch.floats[i]);
for (int i = 0; i < 6; ++i)
rmsd[i] = std::sqrt(rmsd[i] / n);
}
// If average parameters are almost equal to the global cell, use the latter
// to avoid 32-bit precision artifacts (58.28 -> 58.279998).
if (UnitCellParameters(avg).approx(cell, 1e-4))
return cell;
return UnitCellParameters(avg);
}
std::array<double,2> Mtz::calculate_min_max_1_d2() const {
auto extend_min_max_1_d2 = [&](const UnitCell& uc, double& min, double& max) {
for (size_t i = 0; i < data.size(); i += columns.size()) {
double res = uc.calculate_1_d2_double(data[i+0], data[i+1], data[i+2]);
if (res < min)
min = res;
if (res > max)
max = res;
}
};
if (!has_data() || columns.size() < 3)
fail("No data.");
double min_value = INFINITY;
double max_value = 0.;
if (cell.is_crystal() && cell.a > 0)
extend_min_max_1_d2(cell, min_value, max_value);
const UnitCell* prev_cell = nullptr;
for (const Dataset& ds : datasets)
if (ds.cell.is_crystal() && ds.cell.a > 0 && ds.cell != cell &&
(!prev_cell || ds.cell != *prev_cell)) {
extend_min_max_1_d2(ds.cell, min_value, max_value);
prev_cell = &ds.cell;
}
if (min_value == INFINITY)
min_value = 0;
return {{min_value, max_value}};
}
void Mtz::read_first_bytes(AnyStream& stream) {
char buf[20] = {0};
if (!stream.read(buf, 20))
fail("Could not read the MTZ file (is it empty?)");
if (buf[0] != 'M' || buf[1] != 'T' || buf[2] != 'Z' || buf[3] != ' ')
fail("Not an MTZ file - it does not start with 'MTZ '");
// Bytes 9-12 have so-called machine stamp:
// "The first 4 half-bytes represent the real, complex, integer and
// character formats".
// We don't try to handle all the combinations here, only the two most
// common: big endian (for all types) and little endian (for all types).
// BE is denoted by 1 and LE by 4.
// If we get a value different than 1 and 4 we assume the native byte order.
if ((buf[9] & 0xf0) == (is_little_endian() ? 0x10 : 0x40))
toggle_endianness();
std::int32_t tmp_header_offset;
std::memcpy(&tmp_header_offset, buf + 4, 4);
if (!same_byte_order)
swap_four_bytes(&tmp_header_offset);
if (tmp_header_offset == -1) {
std::memcpy(&header_offset, buf + 12, 8);
if (!same_byte_order) {
swap_eight_bytes(&header_offset);
}
} else {
header_offset = (int64_t) tmp_header_offset;
}
stream.skip(60);
}
void Mtz::read_main_headers(AnyStream& stream, std::vector<std::string>* save_headers) {
char line[81] = {0};
std::ptrdiff_t header_pos = 4 * std::ptrdiff_t(header_offset - 1);
// temporary check
long cur_pos = stream.tell();
if (cur_pos != header_pos && cur_pos != -1)
fail(cat("wrong pos ", int(header_pos), " ", int(stream.tell())));
int ncol = 0;
bool has_batch = false;
while (stream.read(line, 80)) {
if (save_headers)
save_headers->emplace_back(line, line+80);
if (ialpha3_id(line) == ialpha3_id("END"))
break;
const char* args = skip_word_and_space(line);
switch (ialpha4_id(line)) {
case ialpha4_id("VERS"):
version_stamp = rtrim_str(args);
break;
case ialpha4_id("TITL"):
title = rtrim_str(args);
break;
case ialpha4_id("NCOL"): {
ncol = simple_atoi(args, &args);
nreflections = simple_atoi(args, &args);
int nbatches = simple_atoi(args);
if (nbatches < 0 || nbatches > 10000000) // sanity check
fail("Wrong NCOL header");
batches.resize(nbatches);
break;
}
case ialpha4_id("CELL"):
cell = read_cell_parameters(args);
break;
case ialpha4_id("SORT"):
for (int& n : sort_order)
n = simple_atoi(args, &args);
break;
case ialpha4_id("SYMI"): {
nsymop = simple_atoi(args, &args);
symops.reserve(nsymop);
simple_atoi(args, &args); // ignore number of primitive operations
args = skip_word_and_space(skip_blank(args)); // ignore lattice type
spacegroup_number = simple_atoi(args, &args);
args = skip_blank(args);
if (*args != '\'')
spacegroup_name = read_word(args);
else if (const char* end = std::strchr(++args, '\''))
spacegroup_name.assign(args, end);
// ignore point group which is at the end of args
break;
}
case ialpha4_id("SYMM"):
symops.push_back(parse_triplet(args));
break;
case ialpha4_id("RESO"):
min_1_d2 = fast_atof(args, &args);
max_1_d2 = fast_atof(args, &args);
break;
case ialpha4_id("VALM"):
if (*args != 'N') {
const char* endptr;
float v = (float) fast_atof(args, &endptr);
if (*endptr == '\0' || is_space(*endptr))
valm = v;
else
logger.note("Unexpected VALM value: " + rtrim_str(args));
}
break;
case ialpha4_id("COLU"): {
columns.emplace_back();
Column& col = columns.back();
col.label = read_word(args, &args);
col.type = read_word(args, &args)[0];
col.min_value = (float) fast_atof(args, &args);
col.max_value = (float) fast_atof(args, &args);
col.dataset_id = simple_atoi(args);
col.parent = this;
col.idx = columns.size() - 1;
break;
}
case ialpha4_id("COLS"):
// COLSRC is undocumented. CMTZ (libccp4) adds it after COLUMN:
// COLUMN IMEAN J -300.600006 4619 1
// COLSRC IMEAN CREATED_07/08/2019_11:00:23 1
if (!columns.empty() && columns.back().label == read_word(args, &args))
columns.back().source = read_word(args);
else
logger.note("MTZ: COLSRC is not after matching COLUMN");
break;
case ialpha4_id("COLG"):
// Column group - not used.
break;
case ialpha4_id("NDIF"):
datasets.reserve(simple_atoi(args));
break;
case ialpha4_id("PROJ"):
datasets.emplace_back();
datasets.back().id = simple_atoi(args, &args);
datasets.back().project_name = read_word(skip_word_and_space(args));
datasets.back().wavelength = 0.0;
break;
case ialpha4_id("CRYS"):
if (simple_atoi(args, &args) == last_dataset().id)
datasets.back().crystal_name = read_word(args);
else
logger.note("MTZ CRYSTAL line: unusual numbering.");
break;
case ialpha4_id("DATA"):
if (simple_atoi(args, &args) == last_dataset().id)
datasets.back().dataset_name = read_word(args);
else
logger.note("MTZ DATASET line: unusual numbering.");
break;
case ialpha4_id("DCEL"):
if (simple_atoi(args, &args) == last_dataset().id)
datasets.back().cell = read_cell_parameters(args);
else
logger.note("MTZ DCELL line: unusual numbering.");
break;
// case("DRES"): not in use yet
case ialpha4_id("DWAV"):
if (simple_atoi(args, &args) == last_dataset().id)
datasets.back().wavelength = fast_atof(args);
else
logger.note("MTZ DWAV line: unusual numbering.");
break;
case ialpha4_id("BATCH"):
// We take number of batches from the NCOL record and serial numbers
// from BH. This header could be used only to check consistency.
has_batch = true;
break;
default:
logger.note("Unknown header: " + rtrim_str(line));
}
}
if (ncol != (int) columns.size())
fail("Number of COLU records inconsistent with NCOL record.");
if (has_batch != !batches.empty())
fail("BATCH header inconsistent with NCOL record.");
// adjust data size, if necessary
if (!data.empty()) {
size_t expected_size = columns.size() * nreflections;
if (data.size() > expected_size)
data.resize(expected_size);
else if (data.size() < expected_size)
fail("internal error, wrong data size");
}
}
void Mtz::read_history_and_batch_headers(AnyStream& stream) {
char buf[81] = {0};
int n_headers = 0;
while (stream.read(buf, 80) && ialpha4_id(buf) != ialpha4_id("MTZE")) {
if (n_headers != 0) {
const char* start = skip_blank(buf);
const char* end = rtrim_cstr(start, start+80);
history.emplace_back(start, end);
--n_headers;
} else if (ialpha4_id(buf) == ialpha4_id("MTZH")) {
n_headers = simple_atoi(skip_word_and_space(buf+4));
if (n_headers < 0 || n_headers > 30) {
logger.note("Wrong MTZ: number of headers should be between 0 and 30");
return;
}
history.reserve(n_headers);
} else if (ialpha4_id(buf) == ialpha4_id("MTZB")) {
for (Batch& batch : batches) {
stream.read(buf, 80);
if (ialpha3_id(buf) != ialpha3_id("BH "))
fail("Missing BH header");
const char* args = skip_blank(buf + 2);
batch.number = simple_atoi(args, &args);
int total_words = simple_atoi(args, &args);
int int_words = simple_atoi(args, &args);
int float_words = simple_atoi(args);
if (total_words != int_words + float_words || total_words > 1000)
fail("Wrong BH header");
stream.read(buf, 80); // TITLE
const char* end = rtrim_cstr(buf + 6, buf+76);
batch.title.assign(buf, end - buf);
batch.ints.resize(int_words);
stream.read(batch.ints.data(), int_words * 4);
batch.floats.resize(float_words);
stream.read(batch.floats.data(), float_words * 4);
stream.read(buf, 80);
if (ialpha4_id(buf) != ialpha4_id("BHCH"))
fail("Missing BHCH header");
split_str_into_multi(buf + 5, " \t", batch.axes);
}
}
}
appended_text = stream.read_rest();
}
void Mtz::setup_spacegroup() {
spacegroup = find_spacegroup_by_name(spacegroup_name, cell.alpha, cell.gamma);
if (!spacegroup) {
logger.note("MTZ: unrecognized spacegroup name: " + spacegroup_name);
return;
}
if (spacegroup->ccp4 != spacegroup_number)
logger.note("MTZ: inconsistent spacegroup name and number");
cell.set_cell_images_from_spacegroup(spacegroup);
for (Dataset& d : datasets)
d.cell.set_cell_images_from_spacegroup(spacegroup);
}
// we should be at byte 80
void Mtz::read_raw_data(AnyStream& stream, bool do_read) {
size_t n = size_t(header_offset - 1 - 20);
if (!do_read) {
if (!stream.skip(4 * n))
fail("ignoring mtz data segment failed");
return;
}
data.resize(n);
if (!stream.read(data.data(), 4 * n))
fail("Error when reading MTZ data");
if (!same_byte_order)
for (float& f : data)
swap_four_bytes(&f);
}
void Mtz::read_stream(AnyStream& stream, bool with_data) {
read_first_bytes(stream);
// The older implementation of MTZ reading first read the headers,
// then the data. This required jumping to the headers at the end,
// then back to the beginning of the data (byte 80).
// The current implementation avoids calling seek(), allowing
// incremental reading of streams (stdin, gzipped files, etc).
read_raw_data(stream, with_data);
read_main_headers(stream, nullptr);
read_history_and_batch_headers(stream);
setup_spacegroup();
if (datasets.empty())
datasets.push_back({0, "HKL_base", "HKL_base", "HKL_base", cell, 0.});
}
// for probing/testing individual reflections, no need to optimize it
size_t Mtz::find_offset_of_hkl(const Miller& hkl, size_t start) const {
if (!has_data() || columns.size() < 3)
fail("No data.");
if (start != 0)
start -= (start % columns.size());
for (size_t n = start; n + 2 < data.size(); n += columns.size())
if (get_hkl(n) == hkl)
return n;
return (size_t)-1;
}
void Mtz::ensure_asu(bool tnt_asu) {
if (!is_merged())
fail("Mtz::ensure_asu() is for merged MTZ only");
if (!spacegroup)
return;
GroupOps gops = spacegroup->operations();
ReciprocalAsu asu(spacegroup, tnt_asu);
std::vector<int> phase_columns = positions_of_columns_with_type('P');
std::vector<int> abcd_columns = positions_of_columns_with_type('A');
std::vector<int> dano_columns = positions_of_columns_with_type('D');
std::vector<std::pair<int,int>> plus_minus_columns = positions_of_plus_minus_columns();
bool no_special_columns = phase_columns.empty() && abcd_columns.empty() &&
plus_minus_columns.empty() && dano_columns.empty();
bool centric = no_special_columns || gops.is_centrosymmetric();
for (size_t n = 0; n < data.size(); n += columns.size()) {
Miller hkl = get_hkl(n);
if (asu.is_in(hkl))
continue;
auto result = asu.to_asu(hkl, gops);
// cf. impl::move_to_asu() in asudata.hpp
set_hkl(n, result.first);
if (no_special_columns)
continue;
int isym = result.second;
if (!phase_columns.empty() || !abcd_columns.empty()) {
const Op& op = gops.sym_ops[(isym - 1) / 2];
double shift = op.phase_shift(hkl);
bool negate = (isym % 2 == 0);
for (int col : phase_columns)
shift_phase(data[n + col], shift, negate);
for (auto i = abcd_columns.begin(); i+3 < abcd_columns.end(); i += 4)
// we expect coefficients HLA, HLB, HLC and HLD - in this order
shift_hl_coefficients(data[n + *(i+0)], data[n + *(i+1)],
data[n + *(i+2)], data[n + *(i+3)],
shift, negate);
}
if (isym % 2 == 0 && !centric &&
// usually, centric reflections have empty F(-), so avoid swapping it
!gops.is_reflection_centric(hkl)) {
for (std::pair<int,int> cols : plus_minus_columns)
std::swap(data[n + cols.first], data[n + cols.second]);
for (int col : dano_columns)
data[n + col] = -data[n + col];
}
}
}
void Mtz::reindex(const Op& op) {
if (op.tran != Op::Tran{0, 0, 0})
gemmi::fail("reindexing operator must not have a translation");
if (op.det_rot() < 0)
gemmi::fail("reindexing operator must preserve the hand of the axes");
switch_to_original_hkl(); // changes hkl for unmerged data only
Op xyz_op = op.as_xyz();
logger.mesg("Real space transformation: ", op.as_xyz().triplet());
bool row_removal = false;
// change Miller indices
for (size_t n = 0; n < data.size(); n += columns.size()) {
Miller hkl_den = op.apply_to_hkl_without_division(get_hkl(n));
Miller hkl = Op::divide_hkl_by_DEN(hkl_den);
if (hkl[0] * Op::DEN == hkl_den[0] &&
hkl[1] * Op::DEN == hkl_den[1] &&
hkl[2] * Op::DEN == hkl_den[2]) {
set_hkl(n, hkl);
} else { // fractional hkl - remove
row_removal = true;
data[n] = NAN; // mark for removal
}
}
// remove reflections marked for removal
if (row_removal) {
int n_before = nreflections;
remove_rows_if([](const float* h) { return std::isnan(*h); });
logger.mesg("Reflections removed (because of fractional indices): ", n_before - nreflections);
}
switch_to_asu_hkl(); // revert switch_to_original_hkl() for unmerged data
// change space group
if (spacegroup) {
GroupOps gops = spacegroup->operations();
gops.change_basis_backward(xyz_op);
const SpaceGroup* new_sg = find_spacegroup_by_ops(gops);
if (!new_sg)
fail("reindexing: failed to determine new space group name");
if (new_sg != spacegroup) {
logger.mesg("Space group changed from ", spacegroup->xhm(), " to ", new_sg->xhm(), '.');
set_spacegroup(new_sg);
} else {
logger.mesg("Space group stays the same:", spacegroup->xhm(), '.');
}
}
// change unit cell parameters
cell = cell.changed_basis_backward(xyz_op, false);
for (Mtz::Dataset& ds : datasets)
ds.cell = ds.cell.changed_basis_backward(xyz_op, false);
for (Mtz::Batch& batch : batches)
batch.set_cell(batch.get_cell().changed_basis_backward(xyz_op, false));
}
void Mtz::expand_to_p1() {
if (!spacegroup || !has_data())
return;
std::vector<int> phase_columns = positions_of_columns_with_type('P');
std::vector<int> abcd_columns = positions_of_columns_with_type('A');
bool has_phases = (!phase_columns.empty() || !abcd_columns.empty());
GroupOps gops = spacegroup->operations();
data.reserve(gops.sym_ops.size() * data.size());
size_t orig_size = data.size();
std::vector<Miller> hkl_copies;
for (size_t n = 0; n < orig_size; n += columns.size()) {
hkl_copies.clear();
Miller hkl = get_hkl(n);
// no reallocations because of reserve() above
auto orig_iter = data.begin() + n;
for (auto op = gops.sym_ops.begin() + 1; op < gops.sym_ops.end(); ++op) {
Miller new_hkl = op->apply_to_hkl(hkl);
Op::Miller negated{{-new_hkl[0], -new_hkl[1], -new_hkl[2]}};
if (new_hkl != hkl && !in_vector(new_hkl, hkl_copies) &&
negated != hkl && !in_vector(negated, hkl_copies)) {
hkl_copies.push_back(new_hkl);
size_t offset = data.size();
data.insert(data.end(), orig_iter, orig_iter + columns.size());
set_hkl(offset, new_hkl);
if (has_phases) {
double shift = op->phase_shift(hkl);
if (shift != 0) {
for (int col : phase_columns)
shift_phase(data[offset + col], shift);
for (auto i = abcd_columns.begin(); i+3 < abcd_columns.end(); i += 4)
// we expect coefficients HLA, HLB, HLC and HLD - in this order
shift_hl_coefficients(data[offset + *(i+0)], data[offset + *(i+1)],
data[offset + *(i+2)], data[offset + *(i+3)], shift);
}
}
}
}
}
nreflections = int(data.size() / columns.size());
sort_order = {{0, 0, 0, 0, 0}};
set_spacegroup(&get_spacegroup_p1());
}
bool Mtz::switch_to_original_hkl() {
if (indices_switched_to_original)
return false;
if (!has_data())
fail("switch_to_original_hkl(): data not read yet");
if (nreflections == 0) {
// This function can be called before the data is populated
// to set indices_switched_to_original, which is not exposed in Python.
indices_switched_to_original = true;
return true;
}
const Column* col = column_with_label("M/ISYM");
if (col == nullptr || col->type != 'Y' || col->idx < 3)
return false;
std::vector<Op> inv_symops;
inv_symops.reserve(symops.size());
for (const Op& op : symops)
inv_symops.push_back(op.inverse());
for (size_t n = 0; n + col->idx < data.size(); n += columns.size()) {
int isym = static_cast<int>(data[n + col->idx]) & 0xFF;
const Op& op = inv_symops.at((isym - 1) / 2);
Miller hkl = op.apply_to_hkl(get_hkl(n));
int sign = (isym & 1) ? 1 : -1;
for (int i = 0; i < 3; ++i)
data[n+i] = static_cast<float>(sign * hkl[i]);
}
indices_switched_to_original = true;
return true;
}
bool Mtz::switch_to_asu_hkl() {
if (!indices_switched_to_original)
return false;
if (!has_data())
fail("switch_to_asu_hkl(): data not read yet");
const Column* col = column_with_label("M/ISYM");
if (col == nullptr || col->type != 'Y' || col->idx < 3 || !spacegroup)
return false;
size_t misym_idx = col->idx;
UnmergedHklMover hkl_mover(spacegroup);
for (size_t n = 0; n + col->idx < data.size(); n += columns.size()) {
Miller hkl = get_hkl(n);
int isym = hkl_mover.move_to_asu(hkl); // modifies hkl
set_hkl(n, hkl);
float& misym = data[n + misym_idx];
misym = float(((int)misym & ~0xff) | isym);
}
indices_switched_to_original = false;
return true;
}
void Mtz::read_file_gz(const std::string& path, bool with_data) {
try {
read_input(MaybeGzipped(path), with_data);
} catch (std::runtime_error& e) {
// append path to the error like in read_file(), but shouldn't the path go first?
fail(std::string(e.what()) + ": " + path);
}
}
std::vector<int> Mtz::sorted_row_indices(int use_first) const {
if (!has_data())
fail("No data.");
if (use_first <= 0 || use_first >= (int) columns.size())
fail("Wrong use_first arg in Mtz::sort.");
std::vector<int> indices(nreflections);
for (int i = 0; i != nreflections; ++i)
indices[i] = i;
std::stable_sort(indices.begin(), indices.end(), [&](int i, int j) {
int a = i * (int) columns.size();
int b = j * (int) columns.size();
for (int n = 0; n < use_first; ++n)
if (data[a+n] != data[b+n])
return data[a+n] < data[b+n];
return false;
});
return indices;
}
bool Mtz::sort(int use_first) {
std::vector<int> indices = sorted_row_indices(use_first);
sort_order = {{0, 0, 0, 0, 0}};
for (int i = 0; i < use_first; ++i)
sort_order[i] = i + 1;
if (std::is_sorted(indices.begin(), indices.end()))
return false;
std::vector<float> new_data(data.size());
size_t w = columns.size();
for (size_t i = 0; i != indices.size(); ++i)
std::memcpy(&new_data[i * w], &data[indices[i] * w], w * sizeof(float));
data.swap(new_data);
return true;
}
Mtz::Column& Mtz::add_column(const std::string& label, char type,
int dataset_id, int pos, bool expand_data) {
if (datasets.empty())
fail("No datasets.");
if (dataset_id < 0)
dataset_id = datasets.back().id;
else
dataset(dataset_id); // check if such dataset exist
if (pos > (int) columns.size())
fail("Requested column position after the end.");
if (pos < 0)
pos = (int) columns.size();
auto col = columns.emplace(columns.begin() + pos);
for (auto i = col + 1; i != columns.end(); ++i)
i->idx++;
col->dataset_id = dataset_id;
col->type = type;
col->label = label;
col->parent = this;
col->idx = pos;
if (expand_data)
expand_data_rows(1, pos);
return *col;
}
namespace { // helper functions for copying, replacing and removing columns
void check_column(const Mtz& mtz, size_t idx, const char* msg) {
if (!mtz.has_data())
fail(msg, ": data not read yet");
if (idx >= mtz.columns.size())
fail(msg, ": no column with 0-based index ", std::to_string(idx));
}
void check_trailing_cols(const Mtz& mtz, const Mtz::Column& src_col,
const std::vector<std::string>& trailing_cols) {
assert(src_col.parent == &mtz);
if (!mtz.has_data())
fail("data in source mtz not read yet");
if (src_col.idx + trailing_cols.size() >= mtz.columns.size())
fail("Not enough columns after " + src_col.label);
for (size_t i = 0; i < trailing_cols.size(); ++i)
if (!trailing_cols[i].empty() &&
trailing_cols[i] != mtz.columns[src_col.idx + i + 1].label)
fail("expected trailing column ", trailing_cols[i], ", found ", src_col.label);
}
void do_replace_column(Mtz& mtz, size_t dest_idx, const Mtz::Column& src_col,
const std::vector<std::string>& trailing_cols) {
const Mtz* src_mtz = src_col.parent;
for (size_t i = 0; i <= trailing_cols.size(); ++i) {
Mtz::Column& dst = mtz.columns[dest_idx + i];
const Mtz::Column& src = src_mtz->columns[src_col.idx + i];
dst.type = src.type;
dst.label = src.label;
dst.min_value = src.min_value;
dst.max_value = src.max_value;
dst.source = src.source;
dst.dataset_id = src.dataset_id;
}
if (src_mtz == &mtz) {
// internal copying
for (size_t n = 0; n < mtz.data.size(); n += mtz.columns.size())
for (size_t i = 0; i <= trailing_cols.size(); ++i)
mtz.data[n + dest_idx + i] = mtz.data[n + src_col.idx + i];
} else {
// external copying - need to match indices
std::vector<int> dst_indices = mtz.sorted_row_indices();
std::vector<int> src_indices = src_mtz->sorted_row_indices();
// cf. for_matching_reflections()
size_t dst_stride = mtz.columns.size();
size_t src_stride = src_mtz->columns.size();
auto dst = dst_indices.begin();
auto src = src_indices.begin();
while (dst != dst_indices.end() && src != src_indices.end()) {
Miller dst_hkl = mtz.get_hkl(*dst * dst_stride);
Miller src_hkl = src_mtz->get_hkl(*src * src_stride);
if (dst_hkl == src_hkl) {
// copy values
for (size_t i = 0; i <= trailing_cols.size(); ++i)
mtz.data[*dst * dst_stride + dest_idx + i] =
src_mtz->data[*src * src_stride + src_col.idx + i];
++dst;
++src;
} else if (dst_hkl < src_hkl) {
++dst;
} else {
++src;
}
}
}
}
} // anonymous namespace
Mtz::Column& Mtz::replace_column(size_t dest_idx, const Mtz::Column& src_col,
const std::vector<std::string>& trailing_cols) {
check_trailing_cols(*src_col.parent, src_col, trailing_cols);
check_column(*this, dest_idx + trailing_cols.size(), "replace_column()");
do_replace_column(*this, dest_idx, src_col, trailing_cols);
return columns[dest_idx];
}
Mtz::Column& Mtz::copy_column(int dest_idx, const Mtz::Column& src_col,
const std::vector<std::string>& trailing_cols) {
// check input consistency
if (!has_data())
fail("copy_column(): data not read yet");
check_trailing_cols(*src_col.parent, src_col, trailing_cols);
// add new columns
if (dest_idx < 0)
dest_idx = (int) columns.size();
// if src_col is from this Mtz it may get invalidated when adding columns
int col_idx = -1;
if (src_col.parent == this) {
col_idx = (int) src_col.idx;
if (col_idx >= dest_idx)
col_idx += 1 + (int)trailing_cols.size();
}
for (int i = 0; i <= (int) trailing_cols.size(); ++i)
add_column("", ' ', -1, dest_idx + i, false);
expand_data_rows(1 + trailing_cols.size(), dest_idx);
// copy the data
const Column& src_col_now = col_idx < 0 ? src_col : columns[col_idx];
// most of the work (hkl-based row matching and data copying) is done here:
do_replace_column(*this, dest_idx, src_col_now, trailing_cols);
return columns[dest_idx];
}
void Mtz::remove_column(size_t idx) {
check_column(*this, idx, "remove_column()");
columns.erase(columns.begin() + idx);
for (size_t i = idx; i < columns.size(); ++i)
--columns[i].idx;
vector_remove_column(data, columns.size(), idx);
assert(columns.size() * nreflections == data.size());
}
#define WRITE(...) do { \
int len = snprintf_z(buf, 81, __VA_ARGS__); \
if (len < 80) \
std::memset(buf + len, ' ', 80 - len); \
if (write(buf, 80, 1) != 1) \
sys_fail("Writing MTZ file failed"); \
} while(0)
template<typename Write>
void Mtz::write_to_stream(Write write) const {
// uses: data, spacegroup, nreflections, batches, cell, sort_order,
// valm, columns, datasets, history
if (!has_data())
fail("Cannot write Mtz which has no data");
if (!spacegroup)
fail("Cannot write Mtz which has no space group");
char buf[81] = {'M', 'T', 'Z', ' ', '\0'};
std::int64_t real_header_start = (int64_t) columns.size() * nreflections + 21;
std::int32_t header_start = (int32_t) real_header_start;
if (real_header_start > std::numeric_limits<int32_t>::max()) {
header_start = -1;
} else {
real_header_start = 0;
}
std::memcpy(buf + 4, &header_start, 4);
std::int32_t machst = is_little_endian() ? 0x00004144 : 0x11110000;
std::memcpy(buf + 8, &machst, 4);
std::memcpy(buf + 12, &real_header_start, 8);
if (write(buf, 80, 1) != 1 ||
write(data.data(), 4, data.size()) != data.size())
fail("Writing MTZ file failed");
WRITE("VERS MTZ:V1.1");
WRITE("TITLE %s", title.c_str());
WRITE("NCOL %8zu %12d %8zu", columns.size(), nreflections, batches.size());
if (cell.is_crystal())
WRITE("CELL %9.4f %9.4f %9.4f %9.4f %9.4f %9.4f",
cell.a, cell.b, cell.c, cell.alpha, cell.beta, cell.gamma);
WRITE("SORT %3d %3d %3d %3d %3d", sort_order[0], sort_order[1],
sort_order[2], sort_order[3], sort_order[4]);
GroupOps ops = spacegroup->operations();
char lat_type = spacegroup->ccp4_lattice_type();
WRITE("SYMINF %3d %2d %c %5d %*s'%c%s' PG%s",
ops.order(), // number of symmetry operations
(int) ops.sym_ops.size(), // number of primitive operations
lat_type, // lattice type
spacegroup->ccp4, // space group number
20 - (int) std::strlen(spacegroup->hm), "",
lat_type, // space group name (first letter)
spacegroup->hm + 1, // space group name (the rest)
spacegroup->point_group_hm()); // point group name
// If we have symops that are the same as spacegroup->operations(),
// write symops to preserve the order of SYMM records.
if (!symops.empty() && ops.is_same_as(split_centering_vectors(symops)))
for (Op op : symops)
WRITE("SYMM %s", to_upper(op.triplet()).c_str());
else
for (Op op : ops)
WRITE("SYMM %s", to_upper(op.triplet()).c_str());
auto reso = calculate_min_max_1_d2();
WRITE("RESO %-20.12f %-20.12f", reso[0], reso[1]);
if (std::isnan(valm))
WRITE("VALM NAN");
else
WRITE("VALM %f", valm);
auto format17 = [](float f) {
char buffer[18];
int len = snprintf_z(buffer, 18, "%.9f", f);
return std::string(buffer, len > 0 ? std::min(len, 17) : 0);
};
for (const Column& col : columns) {
auto minmax = calculate_min_max_disregarding_nans(col.begin(), col.end());
const char* label = !col.label.empty() ? col.label.c_str() : "_";
WRITE("COLUMN %-30s %c %17s %17s %4d",
label, col.type,
format17(minmax[0]).c_str(), format17(minmax[1]).c_str(),
col.dataset_id);
if (!col.source.empty())
WRITE("COLSRC %-30s %-36s %4d", label, col.source.c_str(), col.dataset_id);
}
WRITE("NDIF %8zu", datasets.size());
for (const Dataset& ds : datasets) {
WRITE("PROJECT %7d %s", ds.id, ds.project_name.c_str());
WRITE("CRYSTAL %7d %s", ds.id, ds.crystal_name.c_str());
WRITE("DATASET %7d %s", ds.id, ds.dataset_name.c_str());
const UnitCell& uc = (ds.cell.is_crystal() && ds.cell.a > 0 ? ds.cell : cell);
WRITE("DCELL %9d %10.4f%10.4f%10.4f%10.4f%10.4f%10.4f",
ds.id, uc.a, uc.b, uc.c, uc.alpha, uc.beta, uc.gamma);
WRITE("DWAVEL %8d %10.5f", ds.id, ds.wavelength);
}
int pos = 0;
for (const Batch& batch : batches) {
if (pos == 0)
std::memcpy(buf, "BATCH ", 6); // NOLINT(bugprone-not-null-terminated-result)
pos += 6;
snprintf_z(buf + pos, 7, "%6d", batch.number);
if (pos > 72 || &batch == &batches.back()) {
std::memset(buf + pos, ' ', 80 - pos);
if (write(buf, 80, 1) != 1)
fail("Writing MTZ file failed");
pos = 0;
}
}
WRITE("END");
if (!history.empty()) {
// According to mtzformat.html the file can have only up to 30 history
// lines, but we don't enforce it here.
WRITE("MTZHIST %3zu", history.size());
for (const std::string& line : history)
WRITE("%s", line.c_str());
}
if (!batches.empty()) {
WRITE("MTZBATS");
for (const Batch& batch : batches) {
// keep the numbers the same as in files written by libccp4
WRITE("BH %8d %7zu %7zu %7zu",
batch.number, batch.ints.size() + batch.floats.size(),
batch.ints.size(), batch.floats.size());
WRITE("TITLE %.70s", batch.title.c_str());
if (batch.ints.size() != 29 || batch.floats.size() != 156)
fail("wrong size of binaries batch headers");
write(batch.ints.data(), 4, batch.ints.size());
write(batch.floats.data(), 4, batch.floats.size());
WRITE("BHCH %7.7s %7.7s %7.7s",
batch.axes.size() > 0 ? batch.axes[0].c_str() : "",
batch.axes.size() > 1 ? batch.axes[1].c_str() : "",
batch.axes.size() > 2 ? batch.axes[2].c_str() : "");
}
}
WRITE("MTZENDOFHEADERS");
if (!appended_text.empty()) {
if (write(appended_text.data(), appended_text.size(), 1) != 1)
fail("Writing MTZ file failed");
}
}
#undef WRITE
void Mtz::write_to_cstream(std::FILE* stream) const {
write_to_stream([&](const void *ptr, size_t size, size_t nmemb) {
return std::fwrite(ptr, size, nmemb, stream);
});
}
void Mtz::write_to_string(std::string& str) const {
// Calculate the size beforehand to avoid memory re-allocations
// and minimize memory usage. It hasn't been benchmarked against
// a single-pass writing.
size_t nbytes = size_to_write();
str.resize(nbytes);
write_to_buffer(&str[0], nbytes);
}
void Mtz::write_to_file(const std::string& path) const {
fileptr_t f = file_open(path.c_str(), "wb");
try {
write_to_cstream(f.get());
} catch (std::runtime_error& e) {
fail(std::string(e.what()) + ": " + path);
}
}
size_t Mtz::size_to_write() const {
size_t nbytes = 0;
write_to_stream([&](const void *, size_t size, size_t nmemb) {
nbytes += size * nmemb;
return nmemb;
});
return nbytes;
}
size_t Mtz::write_to_buffer(char* buf, size_t maxlen) const {
size_t len = 0;
write_to_stream([&](const void *ptr, size_t size, size_t nmemb) {
len += size * nmemb;
if (len > maxlen)
fail("Mtz::write_to_buffer: size too small");
memcpy(buf, ptr, size * nmemb);
buf += size * nmemb;
return nmemb;
});
return len;
}
} // namespace gemmi
+68
View File
@@ -0,0 +1,68 @@
// Copyright 2017 Global Phasing Ltd.
#include <gemmi/sprintf.hpp>
#include <stdarg.h> // for va_list
#ifdef USE_STD_SNPRINTF // useful for benchmarking and testing only
# include <cstdio>
# include <algorithm> // for min
#else
# define STB_SPRINTF_IMPLEMENTATION
# define STB_SPRINTF_STATIC
# define STB_SPRINTF_NOUNALIGNED 1
// Making functions from stb_sprintf static may trigger warnings.
# if defined(__GNUC__)
# pragma GCC diagnostic ignored "-Wunused-function"
# endif
# if defined(__clang__)
# pragma clang diagnostic ignored "-Wunused-function"
# endif
// To use system stb_sprintf.h (not recommended, but some Linux distros
// don't like bundled libraries) define GEMMI_USE_SYSTEM_STB or remove
// third_party/stb_sprintf.h.
# if defined(__has_include)
# if !__has_include("../third_party/stb_sprintf.h")
# define GEMMI_USE_SYSTEM_STB 1
# endif
# endif
# ifdef GEMMI_USE_SYSTEM_STB
# pragma message("Using system stb_sprintf.h, not the bundled one. It may not work.")
# include "stb/stb_sprintf.h"
# else
# include "../third_party/stb_sprintf.h"
# endif
#endif // USE_STD_SNPRINTF
namespace gemmi {
// We copy functions from sprintf.h only to have them declared with GEMMI_DLL.
int sprintf_z(char *buf, char const *fmt, ...) {
int result;
va_list va;
va_start(va, fmt);
#ifdef USE_STD_SNPRINTF
result = std::vsprintf(buf, fmt, va);
#else
result = STB_SPRINTF_DECORATE(vsprintfcb)(0, 0, buf, fmt, va);
#endif
va_end(va);
return result;
}
int snprintf_z(char *buf, int count, char const *fmt, ...) {
int result;
va_list va;
va_start(va, fmt);
#ifdef USE_STD_SNPRINTF
result = std::vsnprintf(buf, count, fmt, va);
// stbsp_snprintf always returns a zero-terminated string
buf[std::min(result, count-1)] = '\0';
#else
result = STB_SPRINTF_DECORATE(vsnprintf)(buf, count, fmt, va);
#endif
va_end(va);
return result;
}
} // namespace gemmi
File diff suppressed because it is too large Load Diff
+306
View File
@@ -0,0 +1,306 @@
// Copyright 2023 Global Phasing Ltd.
#include <gemmi/xds_ascii.hpp>
#include <gemmi/atof.hpp> // for fast_from_chars
#include <gemmi/atox.hpp> // for skip_blank, read_word
#include <gemmi/util.hpp> // for trim_str
#include <gemmi/gz.hpp>
#include <gemmi/math.hpp>
namespace gemmi {
void XdsAscii::gather_iset_statistics() {
for (Iset& iset : isets) {
iset.frame_number_min = INT_MAX;
iset.frame_number_max = 0;
for (const XdsAscii::Refl& refl : data)
if (refl.iset == iset.id) {
++iset.reflection_count;
int frame = refl.frame();
iset.frame_number_min = std::min(iset.frame_number_min, frame);
iset.frame_number_max = std::max(iset.frame_number_max, frame);
}
if (iset.frame_number_min > iset.frame_number_max)
continue;
std::vector<uint8_t> frames(iset.frame_number_max - iset.frame_number_min + 1);
for (const XdsAscii::Refl& refl : data)
if (refl.iset == iset.id)
frames[refl.frame() - iset.frame_number_min] = 1;
iset.frame_count = 0;
for (uint8_t f : frames)
iset.frame_count += f;
}
}
/// Based on Phil Evans' notes and the literature, see:
/// https://github.com/project-gemmi/gemmi/discussions/248
/// \par p is defined as in XDS (p=0.5 for unpolarized beam).
void XdsAscii::apply_polarization_correction(double p, Vec3 normal) {
if (!has_cell_axes())
fail("unknown unit cell axes");
Mat33 UB = cell_axes.inverse();
Vec3 rot_axis = get_rotation_axis();
Vec3 s0_dir = get_s0_direction();
normal = normal.normalized();
// The polarization normal is expected to be approx. orthogonal to the beam.
// dot() is the same as cos_angle() for normalized vectors.
if (normal.dot(s0_dir) > std::cos(rad(5.0)))
fail("polarization normal is far from orthogonal to the incident beam");
// make normal exactly orthogonal to the beam
normal = s0_dir.cross(normal).cross(s0_dir).normalized();
// wavevector
Vec3 s0 = s0_dir / wavelength;
double s0_m2 = 1. / s0.length_sq(); // s0^-2
for (Refl& refl : data) {
double phi = rad(rot_angle(refl));
Vec3 h(refl.hkl[0], refl.hkl[1], refl.hkl[2]);
Vec3 r0 = UB.multiply(h);
Vec3 r = rotate_about_axis(r0, rot_axis, phi);
Vec3 s = s0 + r;
#if 0
double two_theta = s0.angle(s);
// 2d sin(theta) = lambda
double bragg_angle = std::asin(wavelength / (2 * unit_cell.calculate_d(refl.hkl)));
printf("(%d %d %d) two-theta %g %g\n",
refl.hkl[0], refl.hkl[1], refl.hkl[2], deg(two_theta), deg(2 * bragg_angle));
#endif
// we should have |s| == |s0|, but just in case calculate it separately
double s_m2 = 1. / s.length_sq();
// 1 + cos^2(2theta) = 2 * correction for unpolarized beam
double t = 1 + sq(s.dot(s0)) * s_m2 * s0_m2;
double polariz_factor = (1 - 2*p) * (1 - sq(normal.dot(s)) * s_m2) + p * t;
// We assume that the XDS files has polarization correction applied,
// but for non-polarized beam. So we multiply intensities by P0=t/2
// and divide by a hopefully more accurate polarization factor.
double mult = 0.5 * t / polariz_factor;
refl.iobs *= mult;
refl.sigma *= mult;
refl.rlp *= mult;
}
}
namespace {
template<size_t N>
bool starts_with_ptr(const char* a, const char (&b)[N], const char** endptr) {
if (std::strncmp(a, b, N-1) != 0)
return false;
*endptr = a + N - 1;
return true;
}
template<size_t N>
bool starts_with_ptr_b(const char* a, const char (&b)[N], const char** endptr) {
return starts_with_ptr<N>(skip_blank(a), b, endptr);
}
inline const char* parse_number_into(const char* start, const char* end,
double& val, const char* line) {
auto result = fast_from_chars(start, end, val);
if (result.ec != std::errc())
fail("failed to parse a number in:\n", line);
return result.ptr;
}
template<size_t N>
void parse_numbers_into_array(const char* start, const char* end,
double (&arr)[N], const char* line) {
for (double& val : arr)
start = parse_number_into(start, end, val, line);
}
template<size_t N>
void parse_numbers_into_array(const char* start, const char* end,
std::array<double,N>& arr, const char* line) {
for (double& val : arr)
start = parse_number_into(start, end, val, line);
}
void parse_numbers_into_vec3(const char* start, const char* end,
Vec3& vec, const char* line) {
for (double* val : {&vec.x, &vec.y, &vec.z})
start = parse_number_into(start, end, *val, line);
}
} // anonymous namespace
void XdsAscii::read_stream(AnyStream& line_reader, const std::string& source) {
source_path = source;
read_columns = 12;
char line[256];
size_t len0 = line_reader.copy_line(line, 255);
if (len0 == 0)
fail("empty file");
int iset_col = 0;
const char xds_ascii_header[] = "!FORMAT=XDS_ASCII MERGE=";
char xds_ascii_type = '\0';
if (starts_with(line, xds_ascii_header)) {
size_t n = sizeof(xds_ascii_header)-1;
xds_ascii_type = line[n];
// !FORMAT=XDS_ASCII MERGE=FALSE FRIEDEL'S_LAW=
if (strncmp(line + n + 5, " FRIEDEL'S_LAW=", 18) == 0)
friedels_law = line[50];
}
if (!xds_ascii_type && !starts_with(line, "!OUTPUT_FILE=INTEGRATE.HKL"))
fail("not an XDS_ASCII nor INTEGRATE.HKL file: " + source_path);
const char* rhs;
while (size_t len = line_reader.copy_line(line, 255)) {
if (line[0] == '!') {
if (starts_with_ptr(line+1, "Generated by ", &rhs)) {
generated_by = read_word(rhs, &rhs);
version_str = trim_str(rhs);
} else if (starts_with_ptr(line+1, "SPACE_GROUP_NUMBER=", &rhs)) {
spacegroup_number = simple_atoi(rhs);
} else if (starts_with_ptr(line+1, "UNIT_CELL_", &rhs)) {
if (starts_with_ptr(rhs, "CONSTANTS=", &rhs)) { // UNIT_CELL_CONSTANTS=
parse_numbers_into_array(rhs, line+len, cell_constants, line);
} else if (starts_with_ptr(rhs, "A-AXIS=", &rhs)) { // UNIT_CELL_A-AXIS=
parse_numbers_into_array(rhs, line+len, cell_axes.a[0], line);
} else if (starts_with_ptr(rhs, "B-AXIS=", &rhs)) { // UNIT_CELL_B-AXIS=
parse_numbers_into_array(rhs, line+len, cell_axes.a[1], line);
} else if (starts_with_ptr(rhs, "C-AXIS=", &rhs)) { // UNIT_CELL_C-AXIS=
parse_numbers_into_array(rhs, line+len, cell_axes.a[2], line);
}
} else if (starts_with_ptr(line+1, "REFLECTING_RANGE_E.S.D.=", &rhs)) {
auto result = fast_from_chars(rhs, line+len, reflecting_range_esd);
if (result.ec != std::errc())
fail("failed to parse mosaicity:\n", line);
} else if (starts_with_ptr(line+1, "X-RAY_WAVELENGTH=", &rhs)) {
auto result = fast_from_chars(rhs, line+len, wavelength);
if (result.ec != std::errc())
fail("failed to parse wavelength:\n", line);
} else if (starts_with_ptr(line+1, "INCIDENT_BEAM_DIRECTION=", &rhs)) {
parse_numbers_into_vec3(rhs, line+len, incident_beam_dir, line);
} else if (starts_with_ptr(line+1, "OSCILLATION_RANGE=", &rhs)) {
auto result = fast_from_chars(rhs, line+len, oscillation_range);
if (result.ec != std::errc())
fail("failed to parse:\n", line);
} else if (starts_with_ptr(line+1, "ROTATION_AXIS=", &rhs)) {
parse_numbers_into_vec3(rhs, line+len, rotation_axis, line);
} else if (starts_with_ptr(line+1, "STARTING_ANGLE=", &rhs)) {
auto result = fast_from_chars(rhs, line+len, starting_angle);
if (result.ec != std::errc())
fail("failed to parse:\n", line);
} else if (starts_with_ptr(line+1, "STARTING_FRAME=", &rhs)) {
starting_frame = simple_atoi(rhs);
} else if (starts_with_ptr(line+1, " ISET= ", &rhs)) {
const char* endptr;
int id = simple_atoi(rhs, &endptr);
XdsAscii::Iset& iset = find_or_add_iset(id);
endptr = skip_blank(endptr);
if (starts_with_ptr(endptr, "INPUT_FILE=", &rhs)) {
iset.input_file = read_word(rhs);
} else if (starts_with_ptr(endptr, "X-RAY_WAVELENGTH=", &rhs)) {
double w;
auto result = fast_from_chars(rhs, line+len, w);
if (result.ec != std::errc())
fail("failed to parse iset wavelength:\n", line);
iset.wavelength = w;
} else if (starts_with_ptr(endptr, "UNIT_CELL_CONSTANTS=", &rhs)) {
parse_numbers_into_array(rhs, line+len, iset.cell_constants, line);
}
} else if (starts_with_ptr(line+1, "NX=", &rhs)) {
const char* endptr;
nx = simple_atoi(rhs, &endptr);
if (starts_with_ptr_b(endptr, "NY=", &rhs))
ny = simple_atoi(rhs, &endptr);
if (starts_with_ptr_b(endptr, "QX=", &rhs))
endptr = parse_number_into(rhs, line+len, qx, line);
if (starts_with_ptr_b(endptr, "QY=", &rhs))
parse_number_into(rhs, line+len, qy, line);
} else if (starts_with_ptr(line+1, "ORGX=", &rhs)) {
const char* endptr = parse_number_into(rhs, line+len, orgx, line);
if (starts_with_ptr_b(endptr, "ORGY=", &rhs))
endptr = parse_number_into(rhs, line+len, orgy, line);
if (starts_with_ptr_b(endptr, "DETECTOR_DISTANCE=", &rhs))
parse_number_into(rhs, line+len, detector_distance, line);
} else if (starts_with_ptr(line+1, "NUMBER_OF_ITEMS_IN_EACH_DATA_RECORD=", &rhs)) {
int num = simple_atoi(rhs);
// INTEGRATE.HKL has read_columns=12, as set above
if (xds_ascii_type == 'T') // merged file
read_columns = 5;
else if (generated_by == "XSCALE")
read_columns = 8;
else if (generated_by == "CORRECT")
read_columns = 11;
// check if the columns are what they always are
if (num < read_columns)
fail("expected ", std::to_string(read_columns), "+ columns, got:\n", line);
if (generated_by == "INTEGRATE") {
line_reader.copy_line(line, 52);
if (!starts_with(line, "!H,K,L,IOBS,SIGMA,XCAL,YCAL,ZCAL,RLP,PEAK,CORR,MAXC"))
fail("unexpected column order in INTEGRATE.HKL");
} else {
const char* expected_columns[12] = {
"H=1", "K=2", "L=3", "IOBS=4", "SIGMA(IOBS)=5",
"XD=6", "YD=7", "ZD=8", "RLP=9", "PEAK=10", "CORR=11", "MAXC=12"
};
for (int i = 0; i < read_columns; ++i) {
const char* col = expected_columns[i];
line_reader.copy_line(line, 42);
if (std::strncmp(line, "!ITEM_", 6) != 0 ||
std::strncmp(line+6, col, std::strlen(col)) != 0)
fail("column !ITEM_" + std::string(col), " not found.");
}
}
} else if (starts_with_ptr(line+1, "ITEM_ISET=", &rhs)) {
iset_col = simple_atoi(rhs);
} else if (starts_with(line+1, "END_OF_DATA")) {
if (isets.empty()) {
isets.emplace_back(1);
isets.back().wavelength = wavelength;
}
for (XdsAscii::Refl& refl : data)
if (size_t(refl.iset - 1) >= isets.size())
fail("unexpected ITEM_ISET " + std::to_string(refl.iset));
return;
}
} else {
data.emplace_back();
XdsAscii::Refl& r = data.back();
const char* p = line;
for (int i = 0; i < 3; ++i)
r.hkl[i] = simple_atoi(p, &p);
auto result = fast_from_chars(p, line+len, r.iobs); // 4
result = fast_from_chars(result.ptr, line+len, r.sigma); // 5
if (read_columns >= 8) {
result = fast_from_chars(result.ptr, line+len, r.xd); // 6
result = fast_from_chars(result.ptr, line+len, r.yd); // 7
result = fast_from_chars(result.ptr, line+len, r.zd); // 8
if (read_columns >= 11) {
result = fast_from_chars(result.ptr, line+len, r.rlp); // 9
result = fast_from_chars(result.ptr, line+len, r.peak); // 10
result = fast_from_chars(result.ptr, line+len, r.corr); // 11
if (read_columns >= 12) {
result = fast_from_chars(result.ptr, line+len, r.maxc); // 12
} else {
r.maxc = 0; // 12
}
} else {
r.rlp = r.peak = r.corr = r.maxc = 0; // 9-11
}
} else {
r.xd = r.yd = r.zd = 0; // 6-8
}
if (result.ec != std::errc())
fail("failed to parse data line:\n", line);
if (iset_col >= read_columns) {
const char* iset_ptr = result.ptr;
for (int j = read_columns+1; j < iset_col; ++j)
iset_ptr = skip_word(skip_blank(iset_ptr));
r.iset = simple_atoi(iset_ptr);
}
}
}
fail("incorrect or unfinished file: " + source_path);
}
XdsAscii read_xds_ascii(const std::string& path) {
XdsAscii xds_ascii;
xds_ascii.read_input(gemmi::MaybeGzipped(path));
return xds_ascii;
}
} // namespace gemmi
-2
View File
@@ -1,2 +0,0 @@
ADD_LIBRARY(gemmi STATIC symmetry.cpp gemmi/symmetry.hpp gemmi/fail.hpp)
TARGET_INCLUDE_DIRECTORIES(gemmi PUBLIC .)