Gemmi: Add more functionality from gemmi 0.7.5
This commit is contained in:
+1
-1
@@ -153,7 +153,7 @@ ADD_SUBDIRECTORY(reader)
|
||||
ADD_SUBDIRECTORY(detector_control)
|
||||
ADD_SUBDIRECTORY(image_puller)
|
||||
ADD_SUBDIRECTORY(preview)
|
||||
ADD_SUBDIRECTORY(symmetry)
|
||||
ADD_SUBDIRECTORY(gemmi_gph)
|
||||
ADD_SUBDIRECTORY(xds-plugin)
|
||||
|
||||
IF (JFJOCH_WRITER_ONLY)
|
||||
|
||||
@@ -0,0 +1,8 @@
|
||||
ADD_LIBRARY(gemmi STATIC symmetry.cpp gz.cpp mtz.cpp sprintf.cpp xds_ascii.cpp
|
||||
gemmi/cellred.hpp
|
||||
gemmi/symmetry.hpp
|
||||
gemmi/fail.hpp
|
||||
gemmi/unitcell.hpp
|
||||
gemmi/math.hpp)
|
||||
TARGET_INCLUDE_DIRECTORIES(gemmi PUBLIC .)
|
||||
TARGET_LINK_LIBRARIES(gemmi )
|
||||
@@ -0,0 +1,41 @@
|
||||
// Copyright 2020 Global Phasing Ltd.
|
||||
//
|
||||
// Functions that convert strings to floating-point numbers ignoring locale.
|
||||
// Simple wrappers around fastfloat::from_chars().
|
||||
|
||||
#ifndef GEMMI_ATOF_HPP_
|
||||
#define GEMMI_ATOF_HPP_
|
||||
|
||||
#include "atox.hpp" // for is_space
|
||||
#include "third_party/fast_float.h"
|
||||
|
||||
namespace gemmi {
|
||||
|
||||
using fast_float::from_chars_result;
|
||||
|
||||
inline from_chars_result fast_from_chars(const char* start, const char* end, double& d) {
|
||||
while (start < end && is_space(*start))
|
||||
++start;
|
||||
if (start < end && *start == '+')
|
||||
++start;
|
||||
return fast_float::from_chars(start, end, d);
|
||||
}
|
||||
|
||||
inline from_chars_result fast_from_chars(const char* start, double& d) {
|
||||
while (is_space(*start))
|
||||
++start;
|
||||
if (*start == '+')
|
||||
++start;
|
||||
return fast_float::from_chars(start, start + std::strlen(start), d);
|
||||
}
|
||||
|
||||
inline double fast_atof(const char* p, const char** endptr=nullptr) {
|
||||
double d = 0;
|
||||
auto result = fast_from_chars(p, d);
|
||||
if (endptr)
|
||||
*endptr = result.ptr;
|
||||
return d;
|
||||
}
|
||||
|
||||
} // namespace gemmi
|
||||
#endif
|
||||
@@ -0,0 +1,135 @@
|
||||
// Copyright 2018 Global Phasing Ltd.
|
||||
//
|
||||
// Locale-independent functions that convert strings to integers,
|
||||
// equivalents of standard isspace and isdigit, and a few helper functions.
|
||||
//
|
||||
// This file is named similarly to the standard functions atoi() and atof().
|
||||
// But the functions here are not meant to be equivalent to the standard
|
||||
// library functions. They are locale-independent (a good thing when reading
|
||||
// numbers from files). They don't set errno, don't signal overflow and
|
||||
// underflow. Due to the limited scope these functions tend to be faster
|
||||
// than the standard-library ones.
|
||||
|
||||
#ifndef GEMMI_ATOX_HPP_
|
||||
#define GEMMI_ATOX_HPP_
|
||||
|
||||
#include <cstdint>
|
||||
#include <stdexcept> // for invalid_argument
|
||||
#include <string>
|
||||
|
||||
namespace gemmi {
|
||||
|
||||
// equivalent of std::isspace for C locale (no handling of EOF)
|
||||
inline bool is_space(char c) {
|
||||
static const std::uint8_t table[256] = { // 1 for 9-13 and 32
|
||||
0,0,0,0,0,0,0,0, 0,1,1,1,1,1,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
|
||||
1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0
|
||||
};
|
||||
return table[(std::uint8_t)c] != 0;
|
||||
}
|
||||
|
||||
// equivalent of std::isblank for C locale (no handling of EOF)
|
||||
inline bool is_blank(char c) {
|
||||
return c == ' ' || c == '\t';
|
||||
}
|
||||
|
||||
// equivalent of std::isdigit for C locale (no handling of EOF)
|
||||
inline bool is_digit(char c) {
|
||||
return c >= '0' && c <= '9';
|
||||
}
|
||||
|
||||
inline const char* skip_blank(const char* p) {
|
||||
if (p)
|
||||
while (is_blank(*p))
|
||||
++p;
|
||||
return p;
|
||||
}
|
||||
|
||||
inline const char* skip_word(const char* p) {
|
||||
if (p)
|
||||
while (*p != '\0' && !is_space(*p))
|
||||
++p;
|
||||
return p;
|
||||
}
|
||||
|
||||
inline std::string read_word(const char* line) {
|
||||
line = skip_blank(line);
|
||||
return std::string(line, skip_word(line));
|
||||
}
|
||||
|
||||
inline std::string read_word(const char* line, const char** endptr) {
|
||||
line = skip_blank(line);
|
||||
*endptr = skip_word(line);
|
||||
return std::string(line, *endptr);
|
||||
}
|
||||
|
||||
// no checking for overflow
|
||||
inline int string_to_int(const char* p, bool checked, size_t length=0) {
|
||||
int mult = -1;
|
||||
int n = 0;
|
||||
size_t i = 0;
|
||||
while ((length == 0 || i < length) && is_space(p[i]))
|
||||
++i;
|
||||
if (p[i] == '-') {
|
||||
mult = 1;
|
||||
++i;
|
||||
} else if (p[i] == '+') {
|
||||
++i;
|
||||
}
|
||||
bool has_digits = false;
|
||||
// use negative numbers because INT_MIN < -INT_MAX
|
||||
for (; (length == 0 || i < length) && is_digit(p[i]); ++i) {
|
||||
n = n * 10 - (p[i] - '0');
|
||||
has_digits = true;
|
||||
}
|
||||
if (checked) {
|
||||
while ((length == 0 || i < length) && is_space(p[i]))
|
||||
++i;
|
||||
if (!has_digits || p[i] != '\0')
|
||||
throw std::invalid_argument("not an integer: " +
|
||||
std::string(p, length ? length : i+1));
|
||||
}
|
||||
return mult * n;
|
||||
}
|
||||
|
||||
inline int string_to_int(const std::string& str, bool checked) {
|
||||
return string_to_int(str.c_str(), checked);
|
||||
}
|
||||
|
||||
inline int simple_atoi(const char* p, const char** endptr=nullptr) {
|
||||
int mult = -1;
|
||||
int n = 0;
|
||||
while (is_space(*p))
|
||||
++p;
|
||||
if (*p == '-') {
|
||||
mult = 1;
|
||||
++p;
|
||||
} else if (*p == '+') {
|
||||
++p;
|
||||
}
|
||||
for (; is_digit(*p); ++p)
|
||||
n = n * 10 - (*p - '0'); // use negative numbers because INT_MIN < -INT_MAX
|
||||
if (endptr)
|
||||
*endptr = p;
|
||||
return mult * n;
|
||||
}
|
||||
|
||||
inline int no_sign_atoi(const char* p, const char** endptr=nullptr) {
|
||||
int n = 0;
|
||||
while (is_space(*p))
|
||||
++p;
|
||||
for (; is_digit(*p); ++p)
|
||||
n = n * 10 + (*p - '0');
|
||||
if (endptr)
|
||||
*endptr = p;
|
||||
return n;
|
||||
}
|
||||
|
||||
} // namespace gemmi
|
||||
#endif
|
||||
@@ -0,0 +1,173 @@
|
||||
// Copyright 2018 Global Phasing Ltd.
|
||||
//
|
||||
// File-related utilities.
|
||||
|
||||
#ifndef GEMMI_FILEUTIL_HPP_
|
||||
#define GEMMI_FILEUTIL_HPP_
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdio> // for FILE, fopen, fclose
|
||||
#include <cstdint>
|
||||
#include <cstdlib> // for malloc, realloc
|
||||
#include <cstring> // for strlen
|
||||
#include <initializer_list>
|
||||
#include <memory> // for unique_ptr
|
||||
#include "fail.hpp" // for sys_fail
|
||||
|
||||
#if defined(_WIN32) && !defined(GEMMI_USE_FOPEN)
|
||||
#include "utf.hpp"
|
||||
#endif
|
||||
|
||||
namespace gemmi {
|
||||
|
||||
// strip directory and suffixes from filename
|
||||
inline std::string path_basename(const std::string& path,
|
||||
std::initializer_list<const char*> exts) {
|
||||
size_t pos = path.find_last_of("\\/");
|
||||
std::string basename = pos == std::string::npos ? path : path.substr(pos + 1);
|
||||
for (const char* ext : exts) {
|
||||
size_t len = std::strlen(ext);
|
||||
if (basename.size() > len &&
|
||||
basename.compare(basename.length() - len, len, ext, len) == 0)
|
||||
basename.resize(basename.length() - len);
|
||||
}
|
||||
return basename;
|
||||
}
|
||||
|
||||
// file operations
|
||||
|
||||
/// deleter for fileptr_t
|
||||
struct needs_fclose {
|
||||
bool use_fclose;
|
||||
void operator()(std::FILE* f) const noexcept {
|
||||
if (use_fclose)
|
||||
std::fclose(f);
|
||||
}
|
||||
};
|
||||
|
||||
typedef std::unique_ptr<std::FILE, needs_fclose> fileptr_t;
|
||||
|
||||
inline fileptr_t file_open(const char* path, const char* mode) {
|
||||
std::FILE* file;
|
||||
#if defined(_WIN32) && !defined(GEMMI_USE_FOPEN)
|
||||
std::wstring wpath = UTF8_to_wchar(path);
|
||||
std::wstring wmode = UTF8_to_wchar(mode);
|
||||
if ((file = ::_wfopen(wpath.c_str(), wmode.c_str())) == nullptr)
|
||||
#else
|
||||
if ((file = std::fopen(path, mode)) == nullptr)
|
||||
#endif
|
||||
sys_fail(std::string("Failed to open ") + path +
|
||||
(*mode == 'w' ? " for writing" : ""));
|
||||
return fileptr_t(file, needs_fclose{true});
|
||||
}
|
||||
|
||||
// helper function for treating "-" as stdin or stdout
|
||||
inline fileptr_t file_open_or(const char* path, const char* mode,
|
||||
std::FILE* dash_stream) {
|
||||
if (path[0] == '-' && path[1] == '\0')
|
||||
return fileptr_t(dash_stream, needs_fclose{false});
|
||||
return file_open(path, mode);
|
||||
}
|
||||
|
||||
inline std::size_t file_size(std::FILE* f, const std::string& path) {
|
||||
if (std::fseek(f, 0, SEEK_END) != 0)
|
||||
sys_fail(path + ": fseek failed");
|
||||
long length = std::ftell(f);
|
||||
if (length < 0)
|
||||
sys_fail(path + ": ftell failed");
|
||||
if (std::fseek(f, 0, SEEK_SET) != 0)
|
||||
sys_fail(path + ": fseek failed");
|
||||
return length;
|
||||
}
|
||||
|
||||
// helper function for working with binary files
|
||||
inline bool is_little_endian() {
|
||||
std::uint32_t x = 1;
|
||||
return *reinterpret_cast<char *>(&x) == 1;
|
||||
}
|
||||
|
||||
inline void swap_two_bytes(void* start) {
|
||||
char* bytes = static_cast<char*>(start);
|
||||
std::swap(bytes[0], bytes[1]);
|
||||
}
|
||||
|
||||
inline void swap_four_bytes(void* start) {
|
||||
char* bytes = static_cast<char*>(start);
|
||||
std::swap(bytes[0], bytes[3]);
|
||||
std::swap(bytes[1], bytes[2]);
|
||||
}
|
||||
|
||||
inline void swap_eight_bytes(void* start) {
|
||||
char* bytes = static_cast<char*>(start);
|
||||
std::swap(bytes[0], bytes[7]);
|
||||
std::swap(bytes[1], bytes[6]);
|
||||
std::swap(bytes[2], bytes[5]);
|
||||
std::swap(bytes[3], bytes[4]);
|
||||
}
|
||||
|
||||
|
||||
class CharArray {
|
||||
std::unique_ptr<char, decltype(&std::free)> ptr_;
|
||||
size_t size_;
|
||||
public:
|
||||
CharArray() : ptr_(nullptr, &std::free), size_(0) {}
|
||||
explicit CharArray(size_t n) : ptr_((char*)std::malloc(n), &std::free), size_(n) {}
|
||||
explicit operator bool() const { return (bool)ptr_; }
|
||||
char* data() { return ptr_.get(); }
|
||||
const char* data() const { return ptr_.get(); }
|
||||
size_t size() const { return size_; }
|
||||
void set_size(size_t n) { size_ = n; }
|
||||
|
||||
void resize(size_t n) {
|
||||
char* new_ptr = (char*) std::realloc(ptr_.get(), n);
|
||||
if (!new_ptr && n != 0)
|
||||
fail("Out of memory.");
|
||||
(void) ptr_.release(); // NOLINT(bugprone-unused-return-value)
|
||||
ptr_.reset(new_ptr);
|
||||
size_ = n;
|
||||
}
|
||||
|
||||
// Remove first n bytes making space for more text at the returned position.
|
||||
char* roll(size_t n) {
|
||||
assert(n <= size());
|
||||
std::memmove(data(), data() + n, n);
|
||||
return data() + n;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/// reading file into a memory buffer (optimized: uses fseek to determine file size)
|
||||
inline CharArray read_file_into_buffer(const std::string& path) {
|
||||
fileptr_t f = file_open(path.c_str(), "rb");
|
||||
size_t size = file_size(f.get(), path);
|
||||
CharArray buffer(size);
|
||||
if (std::fread(buffer.data(), size, 1, f.get()) != 1)
|
||||
sys_fail(path + ": fread failed");
|
||||
return buffer;
|
||||
}
|
||||
|
||||
inline CharArray read_stdin_into_buffer() {
|
||||
size_t n = 0;
|
||||
CharArray buffer(16 * 1024);
|
||||
for (;;) {
|
||||
n += std::fread(buffer.data() + n, 1, buffer.size() - n, stdin);
|
||||
if (n != buffer.size()) {
|
||||
buffer.set_size(n);
|
||||
break;
|
||||
}
|
||||
buffer.resize(2*n);
|
||||
}
|
||||
return buffer;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline CharArray read_into_buffer(T&& input) {
|
||||
if (input.is_compressed())
|
||||
return input.uncompress_into_buffer();
|
||||
if (input.is_stdin())
|
||||
return read_stdin_into_buffer();
|
||||
return read_file_into_buffer(input.path());
|
||||
}
|
||||
|
||||
} // namespace gemmi
|
||||
#endif
|
||||
@@ -0,0 +1,52 @@
|
||||
// Copyright 2017 Global Phasing Ltd.
|
||||
//
|
||||
// Functions for transparent reading of gzipped files. Uses zlib.
|
||||
|
||||
#ifndef GEMMI_GZ_HPP_
|
||||
#define GEMMI_GZ_HPP_
|
||||
#include <string>
|
||||
#include "fail.hpp" // GEMMI_DLL
|
||||
#include "input.hpp" // BasicInput
|
||||
#include "util.hpp" // iends_with
|
||||
|
||||
namespace gemmi {
|
||||
|
||||
GEMMI_DLL extern const char* const zlib_description;
|
||||
|
||||
GEMMI_DLL size_t estimate_uncompressed_size(const std::string& path);
|
||||
|
||||
// the same interface as FileStream and MemoryStream
|
||||
struct GEMMI_DLL GzStream final : public AnyStream {
|
||||
GzStream(void* f_) : f(f_) {}
|
||||
char* gets(char* line, int size) override;
|
||||
int getc() override;
|
||||
bool read(void* buf, size_t len) override;
|
||||
bool skip(size_t n) override;
|
||||
long tell() override;
|
||||
std::string read_rest() override;
|
||||
|
||||
private:
|
||||
void* f; // implementation detail
|
||||
};
|
||||
|
||||
class GEMMI_DLL MaybeGzipped : public BasicInput {
|
||||
public:
|
||||
explicit MaybeGzipped(const std::string& path);
|
||||
~MaybeGzipped();
|
||||
size_t gzread_checked(void* buf, size_t len);
|
||||
bool is_compressed() const { return iends_with(path(), ".gz"); }
|
||||
std::string basepath() const {
|
||||
return is_compressed() ? path().substr(0, path().size() - 3) : path();
|
||||
}
|
||||
|
||||
CharArray uncompress_into_buffer(size_t limit=0);
|
||||
|
||||
std::unique_ptr<AnyStream> create_stream();
|
||||
|
||||
private:
|
||||
void* file_ = nullptr;
|
||||
};
|
||||
|
||||
} // namespace gemmi
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,168 @@
|
||||
// Copyright 2018 Global Phasing Ltd.
|
||||
//
|
||||
// Input abstraction.
|
||||
// Used to decouple file reading and decompression.
|
||||
|
||||
#ifndef GEMMI_INPUT_HPP_
|
||||
#define GEMMI_INPUT_HPP_
|
||||
|
||||
#include <cstddef> // for ptrdiff_t
|
||||
#include <cstdio> // for FILE, fseek, fread
|
||||
#include <cstring> // for memchr
|
||||
#include <string>
|
||||
#include "fileutil.hpp" // for fileptr_t
|
||||
|
||||
namespace gemmi {
|
||||
|
||||
// base class for FileStream, MemoryStream and GzStream
|
||||
struct AnyStream {
|
||||
virtual ~AnyStream() = default;
|
||||
|
||||
virtual char* gets(char* line, int size) = 0; // for pdb, copy_line()
|
||||
virtual int getc() = 0; // for copy_line()
|
||||
virtual bool read(void* buf, size_t len) = 0; // for ccp4, mtz
|
||||
|
||||
// these are not used in GzStream because MemoryStream is used for mtz
|
||||
virtual long tell() = 0; // temporary, for testing
|
||||
virtual bool skip(size_t n) = 0; // for reading mtz without data
|
||||
virtual std::string read_rest() { return {}; } // for mtz (appendix)
|
||||
|
||||
size_t copy_line(char* line, int size) { // for pdb, xds_ascii
|
||||
if (!gets(line, size))
|
||||
return 0;
|
||||
size_t len = std::strlen(line);
|
||||
// If a line is longer than size we discard the rest of it.
|
||||
if (len > 0 && line[len-1] != '\n')
|
||||
for (int c = getc(); c > 0 /* not 0 nor EOF */ && c != '\n'; c = getc())
|
||||
continue;
|
||||
return len;
|
||||
};
|
||||
};
|
||||
|
||||
struct FileStream final : public AnyStream {
|
||||
FileStream(std::FILE* f_) : f(f_, needs_fclose{false}) {}
|
||||
FileStream(const char* path, const char* mode) : f(file_open_or(path, mode, stdin)) {}
|
||||
|
||||
char* gets(char* line, int size) override { return std::fgets(line, size, f.get()); }
|
||||
int getc() override { return std::fgetc(f.get()); }
|
||||
bool read(void* buf, size_t len) override { return std::fread(buf, len, 1, f.get()) == 1; }
|
||||
|
||||
std::string read_rest() override {
|
||||
std::string ret;
|
||||
int c = std::fgetc(f.get());
|
||||
if (c != EOF) {
|
||||
ret += (char)c;
|
||||
char buf[512];
|
||||
for (;;) {
|
||||
size_t n = std::fread(buf, 1, sizeof(buf), f.get());
|
||||
ret.append(buf, n);
|
||||
if (n != sizeof(buf))
|
||||
break;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
long tell() override {
|
||||
return std::ftell(f.get());
|
||||
}
|
||||
|
||||
bool skip(size_t n) override {
|
||||
#if defined(_MSC_VER)
|
||||
int result = _fseeki64(f.get(), (std::ptrdiff_t)n, SEEK_CUR);
|
||||
#elif defined(__MINGW32__)
|
||||
int result = fseeko(f.get(), (_off_t)n, SEEK_CUR);
|
||||
#else
|
||||
int result = std::fseek(f.get(), (long)n, SEEK_CUR);
|
||||
#endif
|
||||
if (result != 0) {
|
||||
char buf[512];
|
||||
while (n >= sizeof(buf)) {
|
||||
if (std::fread(buf, sizeof(buf), 1, f.get()) != 1)
|
||||
return false;
|
||||
n -= sizeof(buf);
|
||||
}
|
||||
if (n > 0 && std::fread(buf, n, 1, f.get()) != 1)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
fileptr_t f;
|
||||
};
|
||||
|
||||
struct MemoryStream final : public AnyStream {
|
||||
MemoryStream(const char* start_, size_t size)
|
||||
: start(start_), end(start_ + size), cur(start_) {}
|
||||
|
||||
char* gets(char* line, int size) override {
|
||||
--size; // fgets reads in at most one less than size characters
|
||||
if (cur >= end)
|
||||
return nullptr;
|
||||
if (size > end - cur)
|
||||
size = int(end - cur);
|
||||
const char* nl = (const char*) std::memchr(cur, '\n', size);
|
||||
size_t len = nl ? nl - cur + 1 : size;
|
||||
std::memcpy(line, cur, len);
|
||||
line[len] = '\0';
|
||||
cur += len;
|
||||
return line;
|
||||
}
|
||||
int getc() override { return cur < end ? *cur++ : EOF; }
|
||||
|
||||
bool read(void* buf, size_t len) override {
|
||||
if (cur + len > end)
|
||||
return false;
|
||||
std::memcpy(buf, cur, len);
|
||||
cur += len;
|
||||
return true;
|
||||
}
|
||||
|
||||
std::string read_rest() override {
|
||||
const char* last = cur;
|
||||
cur = end;
|
||||
return std::string(last, end);
|
||||
}
|
||||
|
||||
long tell() override {
|
||||
return cur - start;
|
||||
}
|
||||
bool skip(size_t n) override {
|
||||
cur += n;
|
||||
return cur < end;
|
||||
}
|
||||
|
||||
private:
|
||||
const char* const start;
|
||||
const char* const end;
|
||||
const char* cur;
|
||||
};
|
||||
|
||||
class BasicInput {
|
||||
public:
|
||||
explicit BasicInput(const std::string& path) : path_(path) {}
|
||||
|
||||
const std::string& path() const { return path_; }
|
||||
const std::string& basepath() const { return path_; }
|
||||
|
||||
// Does the path stands for stdin?
|
||||
// Each reading function needs to call it (some functions use stdin
|
||||
// and some std::cin, so we don't try to unify it here).
|
||||
bool is_stdin() const { return path() == "-"; }
|
||||
|
||||
// providing the same interface as MaybeGzipped
|
||||
bool is_compressed() const { return false; }
|
||||
// for reading (uncompressing into memory) the whole file at once
|
||||
CharArray uncompress_into_buffer(size_t=0) { return {}; }
|
||||
|
||||
std::unique_ptr<AnyStream> create_stream() {
|
||||
return std::unique_ptr<AnyStream>(new FileStream(path().c_str(), "rb"));
|
||||
}
|
||||
|
||||
private:
|
||||
std::string path_;
|
||||
};
|
||||
|
||||
} // namespace gemmi
|
||||
#endif
|
||||
@@ -0,0 +1,287 @@
|
||||
// Copyright 2018 Global Phasing Ltd.
|
||||
//
|
||||
// Bidirectional iterators (over elements of any container) that can filter,
|
||||
// uniquify, group, or iterate with a stride.
|
||||
|
||||
#ifndef GEMMI_ITERATOR_HPP_
|
||||
#define GEMMI_ITERATOR_HPP_
|
||||
#include <iterator> // for bidirectional_iterator_tag
|
||||
#include <type_traits> // for remove_cv
|
||||
#include <vector>
|
||||
|
||||
namespace gemmi {
|
||||
|
||||
// Disable warning "X<T>::operator X<T>() const will not be called for
|
||||
// implicit or explicit conversions", which is triggered when templates
|
||||
// StrideIter, IndirectIter and others are expanded with const Value.
|
||||
#if defined(__INTEL_COMPILER) || defined(__NVCOMPILER)
|
||||
#pragma diagnostic push
|
||||
#pragma diag_suppress = conversion_function_not_usable
|
||||
#elif defined(__NVCC__)
|
||||
#pragma nv_diagnostic push
|
||||
#pragma nv_diag_suppress = conversion_function_not_usable
|
||||
#endif
|
||||
|
||||
// implements concept BidirectionalIterator
|
||||
template <typename Policy>
|
||||
struct BidirIterator : Policy {
|
||||
using value_type = typename std::remove_cv<typename Policy::value_type>::type;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
using pointer = typename Policy::value_type*;
|
||||
using reference = typename Policy::reference;
|
||||
using iterator_category = std::bidirectional_iterator_tag;
|
||||
|
||||
BidirIterator() = default;
|
||||
BidirIterator(Policy&& p) : Policy(p) {}
|
||||
|
||||
BidirIterator& operator++() { Policy::increment(); return *this; }
|
||||
BidirIterator operator++(int) { BidirIterator x = *this; ++*this; return x; }
|
||||
BidirIterator& operator--() { Policy::decrement(); return *this; }
|
||||
BidirIterator operator--(int) { BidirIterator x = *this; --*this; return x; }
|
||||
bool operator==(const BidirIterator &o) const { return Policy::equal(o); }
|
||||
bool operator!=(const BidirIterator &o) const { return !Policy::equal(o); }
|
||||
reference operator*() { return Policy::dereference(); }
|
||||
pointer operator->() { return &Policy::dereference(); }
|
||||
using const_variant = BidirIterator<typename Policy::const_policy>;
|
||||
operator const_variant() const {
|
||||
return const_variant(static_cast<const Policy&>(*this));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Value>
|
||||
class StrideIterPolicy {
|
||||
public:
|
||||
using value_type = Value;
|
||||
using reference = Value&;
|
||||
StrideIterPolicy() : cur_(nullptr), offset_(0), stride_(0) {}
|
||||
StrideIterPolicy(Value* ptr, std::size_t offset, size_t stride)
|
||||
: cur_(ptr), offset_(offset), stride_((unsigned)stride) {}
|
||||
void increment() { cur_ += stride_; }
|
||||
void decrement() { cur_ -= stride_; }
|
||||
bool equal(const StrideIterPolicy& o) const { return cur_ == o.cur_; }
|
||||
Value& dereference() { return cur_[offset_]; }
|
||||
using const_policy = StrideIterPolicy<Value const>;
|
||||
operator const_policy() const { return const_policy(cur_, offset_, stride_); }
|
||||
private:
|
||||
Value* cur_;
|
||||
std::size_t offset_;
|
||||
unsigned stride_;
|
||||
};
|
||||
template<typename Value>
|
||||
using StrideIter = BidirIterator<StrideIterPolicy<Value>>;
|
||||
|
||||
|
||||
template<typename Redirect, typename Value>
|
||||
class IndirectIterPolicy {
|
||||
public:
|
||||
using value_type = Value;
|
||||
using reference = Value&;
|
||||
IndirectIterPolicy() : redir_(nullptr) {}
|
||||
IndirectIterPolicy(Redirect* redir, std::vector<int>::const_iterator cur)
|
||||
: redir_(redir), cur_(cur) {}
|
||||
void increment() { ++cur_; }
|
||||
void decrement() { --cur_; }
|
||||
bool equal(const IndirectIterPolicy& o) const { return cur_ == o.cur_; }
|
||||
Value& dereference() { return redir_->value_at(*cur_); }
|
||||
using const_policy = IndirectIterPolicy<Redirect const, Value const>;
|
||||
operator const_policy() const { return const_policy(redir_, cur_); }
|
||||
// TODO: what should be done with absent optional tags (*cur_ < 0)?
|
||||
private:
|
||||
Redirect* redir_;
|
||||
std::vector<int>::const_iterator cur_; // points into positions
|
||||
};
|
||||
template<typename Redirect, typename Value>
|
||||
using IndirectIter = BidirIterator<IndirectIterPolicy<Redirect, Value>>;
|
||||
|
||||
|
||||
template<typename Vector, typename Value>
|
||||
class UniqIterPolicy {
|
||||
public:
|
||||
using value_type = Value;
|
||||
using reference = Value&;
|
||||
UniqIterPolicy() : vec_(nullptr), pos_(0) {}
|
||||
UniqIterPolicy(Vector* vec, std::size_t pos) : vec_(vec), pos_(pos) {}
|
||||
void increment() {
|
||||
// move to the first element of the next group
|
||||
const auto& key = (*vec_)[pos_].group_key();
|
||||
++pos_;
|
||||
while (pos_ != vec_->size() && (*vec_)[pos_].group_key() == key)
|
||||
++pos_;
|
||||
}
|
||||
void decrement() {
|
||||
--pos_; // now we are at the last element of the previous group
|
||||
const auto& key = (*vec_)[pos_].group_key();
|
||||
while (pos_ != 0 && (*vec_)[pos_-1].group_key() == key)
|
||||
--pos_; // move to the group beginning
|
||||
}
|
||||
bool equal(const UniqIterPolicy& o) const { return pos_ == o.pos_; }
|
||||
Value& dereference() { return (*vec_)[pos_]; }
|
||||
using const_policy = UniqIterPolicy<Vector const, Value const>;
|
||||
operator const_policy() const { return const_policy(vec_, pos_); }
|
||||
private:
|
||||
Vector* vec_;
|
||||
std::size_t pos_;
|
||||
};
|
||||
template<typename Vector, typename Value>
|
||||
using UniqIter = BidirIterator<UniqIterPolicy<Vector, Value>>;
|
||||
|
||||
template<typename Value, typename Vector=std::vector<Value>>
|
||||
struct UniqProxy {
|
||||
Vector& vec;
|
||||
using iterator = UniqIter<Vector, Value>;
|
||||
iterator begin() { return {{&vec, 0}}; }
|
||||
iterator end() { return {{&vec, vec.size()}}; }
|
||||
};
|
||||
template<typename Value, typename Vector=std::vector<Value>>
|
||||
struct ConstUniqProxy {
|
||||
const Vector& vec;
|
||||
using iterator = UniqIter<const Vector, const Value>;
|
||||
iterator begin() const { return {{&vec, 0}}; }
|
||||
iterator end() const { return {{&vec, vec.size()}}; }
|
||||
};
|
||||
|
||||
|
||||
template<typename Vector, typename Value>
|
||||
class GroupingIterPolicy {
|
||||
public:
|
||||
using value_type = Value;
|
||||
using reference = Value&;
|
||||
GroupingIterPolicy() = default;
|
||||
GroupingIterPolicy(const Value& span) : span_(span) {}
|
||||
void increment() {
|
||||
span_.set_begin(span_.end());
|
||||
span_.set_size(0);
|
||||
while (!span_.is_ending() &&
|
||||
span_.begin()->group_key() == span_.end()->group_key())
|
||||
span_.set_size(span_.size() + 1);
|
||||
}
|
||||
void decrement() {
|
||||
span_.set_begin(span_.begin() - 1);
|
||||
span_.set_size(1);
|
||||
while (!span_.is_beginning() &&
|
||||
span_.begin()->group_key() == (span_.begin() - 1)->group_key()) {
|
||||
span_.set_begin(span_.begin() - 1);
|
||||
span_.set_size(span_.size() + 1);
|
||||
}
|
||||
}
|
||||
bool equal(const GroupingIterPolicy& o) const {
|
||||
return span_.begin() == o.span_.begin();
|
||||
}
|
||||
Value& dereference() { return span_; }
|
||||
using const_policy = GroupingIterPolicy<Vector const, Value const>;
|
||||
operator const_policy() const { return const_policy(span_); }
|
||||
private:
|
||||
Value span_;
|
||||
};
|
||||
template<typename Vector, typename Value>
|
||||
using GroupingIter = BidirIterator<GroupingIterPolicy<Vector, Value>>;
|
||||
|
||||
|
||||
template<typename Filter, typename Vector, typename Value>
|
||||
class FilterIterPolicy {
|
||||
public:
|
||||
using value_type = Value;
|
||||
using reference = Value&;
|
||||
FilterIterPolicy() : vec_(nullptr), pos_(0) {}
|
||||
FilterIterPolicy(const Filter* filter, Vector* vec, std::size_t pos)
|
||||
: filter_(filter), vec_(vec), pos_(pos) {
|
||||
while (pos_ != vec_->size() && !matches(pos_))
|
||||
++pos_;
|
||||
}
|
||||
bool matches(std::size_t p) const { return filter_->matches((*vec_)[p]); }
|
||||
void increment() { while (++pos_ < vec_->size() && !matches(pos_)) {} }
|
||||
void decrement() { while (pos_ != 0 && !matches(--pos_)) {} }
|
||||
bool equal(const FilterIterPolicy& o) const { return pos_ == o.pos_; }
|
||||
Value& dereference() { return (*vec_)[pos_]; }
|
||||
using const_policy = FilterIterPolicy<Filter, Vector const, Value const>;
|
||||
operator const_policy() const { return const_policy(vec_, pos_); }
|
||||
private:
|
||||
const Filter* filter_;
|
||||
Vector* vec_;
|
||||
std::size_t pos_;
|
||||
};
|
||||
template<typename Filter, typename Vector, typename Value>
|
||||
using FilterIter = BidirIterator<FilterIterPolicy<Filter, Vector, Value>>;
|
||||
|
||||
template<typename Filter, typename Value>
|
||||
struct FilterProxy {
|
||||
const Filter& filter;
|
||||
std::vector<Value>& vec;
|
||||
using iterator = FilterIter<Filter, std::vector<Value>, Value>;
|
||||
iterator begin() { return {{&filter, &vec, 0}}; }
|
||||
iterator end() { return {{&filter, &vec, vec.size()}}; }
|
||||
};
|
||||
|
||||
template<typename Filter, typename Value>
|
||||
struct ConstFilterProxy {
|
||||
const Filter& filter;
|
||||
const std::vector<Value>& vec;
|
||||
using iterator = FilterIter<Filter, const std::vector<Value>, const Value>;
|
||||
iterator begin() const { return {{&filter, &vec, 0}}; }
|
||||
iterator end() const { return {{&filter, &vec, vec.size()}}; }
|
||||
};
|
||||
|
||||
|
||||
template<typename Item>
|
||||
struct ItemGroup {
|
||||
using element_type = Item;
|
||||
|
||||
ItemGroup(Item* start, const Item* end)
|
||||
: size_(int(end - start)), extent_(int(end - start)), start_(start) {
|
||||
for (const Item* i = start + 1; i != end; ++i)
|
||||
if (i->group_key() != start->group_key())
|
||||
--size_;
|
||||
}
|
||||
|
||||
struct iterator {
|
||||
Item* ptr;
|
||||
const Item* end;
|
||||
bool operator==(const iterator& o) const { return ptr == o.ptr; }
|
||||
bool operator!=(const iterator& o) const { return ptr != o.ptr; }
|
||||
iterator& operator++() {
|
||||
const Item* prev = ptr++;
|
||||
while (ptr != end && ptr->group_key() != prev->group_key())
|
||||
++ptr;
|
||||
return *this;
|
||||
}
|
||||
Item& operator*() { return *ptr; }
|
||||
Item* operator->() { return ptr; }
|
||||
};
|
||||
iterator begin() { return iterator{start_, start_+extent_}; }
|
||||
iterator end() { return iterator{start_+extent_, start_+extent_}; }
|
||||
|
||||
size_t size() const { return (size_t) size_; }
|
||||
int extent() const { return extent_; }
|
||||
bool empty() const { return size_ == 0; }
|
||||
Item& front() { return *start_; }
|
||||
const Item& front() const { return *start_; }
|
||||
Item& back() { return start_[extent_ - 1]; }
|
||||
const Item& back() const { return start_[extent_ - 1]; }
|
||||
|
||||
// constant time unless sparse (extend_ > size_)
|
||||
Item& operator[](std::size_t i) {
|
||||
if (size_ == extent_ || i == 0)
|
||||
return start_[i];
|
||||
for (Item* ptr = start_ + 1; ; ++ptr)
|
||||
if (ptr->group_key() == start_->group_key())
|
||||
if (--i == 0)
|
||||
return *ptr;
|
||||
}
|
||||
const Item& operator[](std::size_t i) const {
|
||||
return const_cast<ItemGroup*>(this)->operator[](i);
|
||||
}
|
||||
|
||||
private:
|
||||
int size_ = 0;
|
||||
int extent_ = 0;
|
||||
Item* start_ = nullptr;
|
||||
};
|
||||
|
||||
#if defined(__INTEL_COMPILER) || defined(__NVCOMPILER)
|
||||
#pragma diagnostic pop
|
||||
#elif defined(__NVCC__)
|
||||
#pragma nv_diagnostic pop
|
||||
#endif
|
||||
|
||||
} // namespace gemmi
|
||||
#endif
|
||||
@@ -0,0 +1,71 @@
|
||||
// Copyright Global Phasing Ltd.
|
||||
//
|
||||
// Logger - a tiny utility for passing messages through a callback.
|
||||
|
||||
#ifndef GEMMI_LOGGER_HPP_
|
||||
#define GEMMI_LOGGER_HPP_
|
||||
|
||||
#include <cstdio> // for fprintf
|
||||
#include <functional> // for function
|
||||
#include "fail.hpp" // for GEMMI_COLD
|
||||
#include "util.hpp" // for cat
|
||||
|
||||
namespace gemmi {
|
||||
|
||||
/// Passes messages (including warnings/errors) to a callback function.
|
||||
/// Messages are passed as strings without a trailing newline.
|
||||
/// They have syslog-like severity levels: 8=debug, 6=info, 5=notice, 3=error,
|
||||
/// allowing the use of a threshold to filter them.
|
||||
/// Quirk: Errors double as both errors and warnings. Unrecoverable errors
|
||||
/// don't go through this class; Logger only handles errors that can
|
||||
/// be downgraded to warnings. If a callback is set, the error is passed
|
||||
/// as a warning message. Otherwise, it's thrown as std::runtime_error.
|
||||
struct Logger {
|
||||
/// A function that handles messages.
|
||||
std::function<void(const std::string&)> callback;
|
||||
/// Pass messages of this level and all lower (more severe) levels:
|
||||
/// 8=all, 6=all but debug, 5=notes and warnings, 3=warnings, 0=none
|
||||
int threshold = 6;
|
||||
|
||||
/// suspend() and resume() are used internally to avoid duplicate messages
|
||||
/// when the same function is called (internally) multiple times.
|
||||
void suspend() { threshold -= 100; }
|
||||
void resume() { threshold += 100; }
|
||||
|
||||
/// Send a message without any prefix on with a numeric threshold N.
|
||||
template<int N, class... Args> void level(Args const&... args) const {
|
||||
if (threshold >= N && callback)
|
||||
callback(cat(args...));
|
||||
}
|
||||
|
||||
/// Send a debug message.
|
||||
template<class... Args> void debug(Args const&... args) const { level<8>("Debug: ", args...); }
|
||||
/// Send a message without any prefix.
|
||||
template<class... Args> void mesg(Args const&... args) const { level<6>(args...); }
|
||||
/// Send a note (a notice, a significant message).
|
||||
template<class... Args> void note(Args const&... args) const { level<5>("Note: ", args...); }
|
||||
|
||||
/// Send a warning/error (see Quirk above).
|
||||
template<class... Args> GEMMI_COLD void err(Args const&... args) const {
|
||||
if (threshold >= 3) {
|
||||
std::string msg = cat(args...);
|
||||
if (callback == nullptr)
|
||||
fail(msg);
|
||||
callback("Warning: " + msg);
|
||||
}
|
||||
}
|
||||
|
||||
// predefined callbacks
|
||||
|
||||
/// to be used as: logger.callback = Logger::to_stderr;
|
||||
static void to_stderr(const std::string& s) {
|
||||
std::fprintf(stderr, "%s\n", s.c_str());
|
||||
}
|
||||
/// to be used as: logger.callback = Logger::to_stdout;
|
||||
static void to_stdout(const std::string& s) {
|
||||
std::fprintf(stdout, "%s\n", s.c_str());
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace gemmi
|
||||
#endif
|
||||
@@ -0,0 +1,600 @@
|
||||
// Copyright 2019 Global Phasing Ltd.
|
||||
//
|
||||
// MTZ reflection file format.
|
||||
|
||||
#ifndef GEMMI_MTZ_HPP_
|
||||
#define GEMMI_MTZ_HPP_
|
||||
|
||||
#include <cassert>
|
||||
#include <cmath> // for isnan
|
||||
#include <cstdint> // for int32_t
|
||||
#include <algorithm> // for copy
|
||||
#include <array>
|
||||
#include <initializer_list>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "fail.hpp" // for fail
|
||||
#include "input.hpp" // for AnyStream, FileStream, CharArray
|
||||
#include "iterator.hpp" // for StrideIter
|
||||
#include "logger.hpp" // for Logger
|
||||
#include "math.hpp" // for rad, Mat33
|
||||
#include "symmetry.hpp" // for find_spacegroup_by_name, SpaceGroup
|
||||
#include "unitcell.hpp" // for UnitCell
|
||||
#include "util.hpp" // for ialpha4_id, rtrim_str, ialpha3_id, ...
|
||||
|
||||
namespace gemmi {
|
||||
|
||||
// Unmerged MTZ files always store in-asu hkl indices and symmetry operation
|
||||
// encoded in the M/ISYM column. Here is a helper for writing such files.
|
||||
struct UnmergedHklMover {
|
||||
UnmergedHklMover(const SpaceGroup* spacegroup) : asu_(spacegroup) {
|
||||
if (spacegroup)
|
||||
group_ops_ = spacegroup->operations();
|
||||
}
|
||||
|
||||
// Modifies hkl and returns ISYM value for M/ISYM
|
||||
int move_to_asu(std::array<int, 3>& hkl) {
|
||||
std::pair<Miller, int> hkl_isym = asu_.to_asu(hkl, group_ops_);
|
||||
hkl = hkl_isym.first;
|
||||
return hkl_isym.second;
|
||||
}
|
||||
|
||||
private:
|
||||
ReciprocalAsu asu_;
|
||||
GroupOps group_ops_;
|
||||
};
|
||||
|
||||
struct MtzMetadata {
|
||||
std::string source_path; // input file path, if known
|
||||
bool same_byte_order = true;
|
||||
bool indices_switched_to_original = false;
|
||||
std::int64_t header_offset = 0;
|
||||
std::string version_stamp;
|
||||
std::string title;
|
||||
int nreflections = 0;
|
||||
std::array<int, 5> sort_order = {};
|
||||
double min_1_d2 = NAN;
|
||||
double max_1_d2 = NAN;
|
||||
float valm = NAN;
|
||||
int nsymop = 0;
|
||||
UnitCell cell;
|
||||
int spacegroup_number = 0;
|
||||
std::string spacegroup_name;
|
||||
std::vector<Op> symops;
|
||||
const SpaceGroup* spacegroup = nullptr;
|
||||
std::vector<std::string> history;
|
||||
std::string appended_text;
|
||||
// used to report non-critical problems when reading a file (also used in mtz2cif)
|
||||
Logger logger;
|
||||
};
|
||||
|
||||
struct GEMMI_DLL Mtz : public MtzMetadata {
|
||||
struct Dataset {
|
||||
int id;
|
||||
std::string project_name;
|
||||
std::string crystal_name;
|
||||
std::string dataset_name;
|
||||
UnitCell cell;
|
||||
double wavelength; // 0 means not set
|
||||
};
|
||||
|
||||
struct Column {
|
||||
int dataset_id;
|
||||
char type;
|
||||
std::string label;
|
||||
float min_value = NAN;
|
||||
float max_value = NAN;
|
||||
std::string source; // from COLSRC
|
||||
Mtz* parent;
|
||||
std::size_t idx;
|
||||
|
||||
Dataset& dataset() { return parent->dataset(dataset_id); }
|
||||
const Dataset& dataset() const { return parent->dataset(dataset_id); }
|
||||
bool has_data() const { return parent->has_data(); }
|
||||
int size() const { return has_data() ? parent->nreflections : 0; }
|
||||
size_t stride() const { return parent->columns.size(); }
|
||||
float& operator[](std::size_t n) { return parent->data[idx + n * stride()]; }
|
||||
float operator[](std::size_t n) const { return parent->data[idx + n * stride()]; }
|
||||
float& at(std::size_t n) { return parent->data.at(idx + n * stride()); }
|
||||
float at(std::size_t n) const { return parent->data.at(idx + n * stride()); }
|
||||
bool is_integer() const {
|
||||
return type == 'H' || type == 'B' || type == 'Y' || type == 'I';
|
||||
}
|
||||
|
||||
const Column* get_next_column_if_type(char next_type) const {
|
||||
if (idx + 1 < parent->columns.size()) {
|
||||
const Column& next_col = parent->columns[idx + 1];
|
||||
if (next_col.dataset_id == dataset_id && next_col.type == next_type)
|
||||
return &next_col;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
using iterator = StrideIter<float>;
|
||||
iterator begin() {
|
||||
assert(parent);
|
||||
assert(&parent->columns[idx] == this);
|
||||
return iterator({parent->data.data(), idx, stride()});
|
||||
}
|
||||
iterator end() {
|
||||
return iterator({parent->data.data() + parent->data.size(), idx,
|
||||
stride()});
|
||||
}
|
||||
using const_iterator = StrideIter<const float>;
|
||||
const_iterator begin() const { return const_cast<Column*>(this)->begin(); }
|
||||
const_iterator end() const { return const_cast<Column*>(this)->end(); }
|
||||
};
|
||||
|
||||
struct Batch {
|
||||
Batch() {
|
||||
ints.resize(29, 0);
|
||||
floats.resize(156, 0.);
|
||||
// write the same values that are written by CCP4 progs such as COMBAT
|
||||
ints[0] = 29 + 156;
|
||||
ints[1] = 29;
|
||||
ints[2] = 156;
|
||||
// COMBAT sets BSCALE=1, but Pointless sets it to 0.
|
||||
//floats[43] = 1.f; // batch scale
|
||||
}
|
||||
int number = 0;
|
||||
std::string title;
|
||||
std::vector<int> ints;
|
||||
std::vector<float> floats;
|
||||
std::vector<std::string> axes;
|
||||
|
||||
UnitCell get_cell() const {
|
||||
return UnitCell(floats[0], floats[1], floats[2],
|
||||
floats[3], floats[4], floats[5]);
|
||||
}
|
||||
void set_cell(const UnitCell& uc) {
|
||||
floats[0] = (float) uc.a;
|
||||
floats[1] = (float) uc.b;
|
||||
floats[2] = (float) uc.c;
|
||||
floats[3] = (float) uc.alpha;
|
||||
floats[4] = (float) uc.beta;
|
||||
floats[5] = (float) uc.gamma;
|
||||
}
|
||||
|
||||
int dataset_id() const { return ints[20]; }
|
||||
void set_dataset_id(int id) { ints[20] = id; }
|
||||
float wavelength() const { return floats[86]; }
|
||||
void set_wavelength(float lambda) { floats[86] = lambda; }
|
||||
float phi_start() const { return floats[36]; }
|
||||
float phi_end() const { return floats[37]; }
|
||||
Mat33 matrix_U() const {
|
||||
return Mat33(floats[6], floats[9], floats[12],
|
||||
floats[7], floats[10], floats[13],
|
||||
floats[8], floats[11], floats[14]);
|
||||
}
|
||||
};
|
||||
|
||||
std::vector<Dataset> datasets;
|
||||
std::vector<Column> columns;
|
||||
std::vector<Batch> batches;
|
||||
std::vector<float> data;
|
||||
|
||||
explicit Mtz(bool with_base=false) {
|
||||
if (with_base)
|
||||
add_base();
|
||||
}
|
||||
Mtz(Mtz&& o) noexcept { *this = std::move(o); }
|
||||
Mtz& operator=(Mtz&& o) noexcept {
|
||||
MtzMetadata::operator=(std::move(o));
|
||||
datasets = std::move(o.datasets);
|
||||
columns = std::move(o.columns);
|
||||
batches = std::move(o.batches);
|
||||
data = std::move(o.data);
|
||||
for (Mtz::Column& col : columns)
|
||||
col.parent = this;
|
||||
return *this;
|
||||
}
|
||||
|
||||
// explicit to be aware where we make copies
|
||||
explicit Mtz(const Mtz& o) : MtzMetadata(o) {
|
||||
datasets = o.datasets;
|
||||
columns = o.columns;
|
||||
batches = o.batches;
|
||||
data = o.data;
|
||||
for (Mtz::Column& col : columns)
|
||||
col.parent = this;
|
||||
}
|
||||
|
||||
Mtz& operator=(Mtz const&) = delete;
|
||||
|
||||
void add_base() {
|
||||
datasets.push_back({0, "HKL_base", "HKL_base", "HKL_base", cell, 0.});
|
||||
for (int i = 0; i != 3; ++i)
|
||||
add_column(std::string(1, "HKL"[i]), 'H', 0, i, false);
|
||||
}
|
||||
|
||||
// Functions to use after MTZ headers (and data) is read.
|
||||
|
||||
double resolution_high() const { return std::sqrt(1.0 / max_1_d2); }
|
||||
double resolution_low() const { return std::sqrt(1.0 / min_1_d2); }
|
||||
|
||||
UnitCell& get_cell(int dataset=-1) {
|
||||
for (Dataset& ds : datasets)
|
||||
if (ds.id == dataset && ds.cell.is_crystal() && ds.cell.a > 0)
|
||||
return ds.cell;
|
||||
return cell;
|
||||
}
|
||||
|
||||
const UnitCell& get_cell(int dataset=-1) const {
|
||||
return const_cast<Mtz*>(this)->get_cell(dataset);
|
||||
}
|
||||
|
||||
void set_cell_for_all(const UnitCell& new_cell) {
|
||||
cell = new_cell;
|
||||
cell.set_cell_images_from_spacegroup(spacegroup); // probably not needed
|
||||
for (Dataset& ds : datasets)
|
||||
ds.cell = cell;
|
||||
}
|
||||
|
||||
UnitCellParameters get_average_cell_from_batch_headers(double* rmsd) const;
|
||||
|
||||
void set_spacegroup(const SpaceGroup* new_sg) {
|
||||
spacegroup = new_sg;
|
||||
spacegroup_number = new_sg ? spacegroup->ccp4 : 0;
|
||||
spacegroup_name = new_sg ? spacegroup->hm : "";
|
||||
}
|
||||
|
||||
Dataset& last_dataset() {
|
||||
if (datasets.empty())
|
||||
fail("MTZ dataset not found (missing DATASET header line?).");
|
||||
return datasets.back();
|
||||
}
|
||||
|
||||
Dataset& dataset(int id) {
|
||||
if ((size_t)id < datasets.size() && datasets[id].id == id)
|
||||
return datasets[id];
|
||||
for (Dataset& d : datasets)
|
||||
if (d.id == id)
|
||||
return d;
|
||||
fail("MTZ file has no dataset with ID " + std::to_string(id));
|
||||
}
|
||||
const Dataset& dataset(int id) const {
|
||||
return const_cast<Mtz*>(this)->dataset(id);
|
||||
}
|
||||
|
||||
Dataset* dataset_with_name(const std::string& name) {
|
||||
for (Dataset& d : datasets)
|
||||
if (d.dataset_name == name)
|
||||
return &d;
|
||||
return nullptr;
|
||||
}
|
||||
const Dataset* dataset_with_name(const std::string& label) const {
|
||||
return const_cast<Mtz*>(this)->dataset_with_name(label);
|
||||
}
|
||||
|
||||
int count(const std::string& label) const {
|
||||
int n = 0;
|
||||
for (const Column& col : columns)
|
||||
if (col.label == label)
|
||||
++n;
|
||||
return n;
|
||||
}
|
||||
|
||||
int count_type(char type) const {
|
||||
int n = 0;
|
||||
for (const Column& col : columns)
|
||||
if (col.type == type)
|
||||
++n;
|
||||
return n;
|
||||
}
|
||||
|
||||
Column* column_with_label(const std::string& label, const Dataset* ds=nullptr, char type='*') {
|
||||
for (Column& col : columns)
|
||||
if (col.label == label && (!ds || ds->id == col.dataset_id)
|
||||
&& (type == '*' || type == col.type))
|
||||
return &col;
|
||||
return nullptr;
|
||||
}
|
||||
const Column* column_with_label(const std::string& label, const Dataset* ds=nullptr,
|
||||
char type='*') const {
|
||||
return const_cast<Mtz*>(this)->column_with_label(label, ds, type);
|
||||
}
|
||||
|
||||
const Column& get_column_with_label(const std::string& label, const Dataset* ds=nullptr) const {
|
||||
if (const Column* col = column_with_label(label, ds))
|
||||
return *col;
|
||||
fail("Column label not found: " + label);
|
||||
}
|
||||
|
||||
std::vector<const Column*> columns_with_type(char type) const {
|
||||
std::vector<const Column*> cols;
|
||||
for (const Column& col : columns)
|
||||
if (col.type == type)
|
||||
cols.push_back(&col);
|
||||
return cols;
|
||||
}
|
||||
|
||||
std::vector<int> positions_of_columns_with_type(char col_type) const {
|
||||
std::vector<int> cols;
|
||||
for (int i = 0; i < (int) columns.size(); ++i)
|
||||
if (columns[i].type == col_type)
|
||||
cols.push_back(i);
|
||||
return cols;
|
||||
}
|
||||
|
||||
// F(+)/(-) pairs should have type G (and L for sigma),
|
||||
// I(+)/(-) -- K (M for sigma), but E(+)/(-) has no special column type,
|
||||
// so here we use column labels not types.
|
||||
std::vector<std::pair<int,int>> positions_of_plus_minus_columns() const {
|
||||
std::vector<std::pair<int,int>> r;
|
||||
for (int i = 0; i < (int) columns.size(); ++i) {
|
||||
const Column& col = columns[i];
|
||||
size_t sign_pos = col.label.find("(+)");
|
||||
if (sign_pos != std::string::npos) {
|
||||
std::string minus_label = columns[i].label;
|
||||
minus_label[sign_pos+1] = '-';
|
||||
for (int j = 0; j < (int) columns.size(); ++j)
|
||||
if (columns[j].label == minus_label &&
|
||||
columns[j].type == col.type &&
|
||||
columns[j].dataset_id == col.dataset_id) {
|
||||
r.emplace_back(i, j);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
/// the order of labels matters
|
||||
const Column* column_with_one_of_labels(std::initializer_list<const char*> labels,
|
||||
char type='*') const {
|
||||
for (const char* label : labels)
|
||||
if (const Column* col = column_with_label(label, nullptr, type))
|
||||
return col;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// the order of labels doesn't matter
|
||||
Column* column_with_type_and_any_of_labels(char type, std::initializer_list<const char*> labels) {
|
||||
for (Column& col : columns)
|
||||
if (col.type == type) {
|
||||
for (const char* label : labels)
|
||||
if (col.label == label)
|
||||
return &col;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Column* rfree_column() {
|
||||
// cf. MtzToCif::default_spec in mtz2cif.hpp
|
||||
return column_with_type_and_any_of_labels('I',
|
||||
{"FREE", "RFREE", "FREER", "FreeR_flag", "R-free-flags", "FreeRflag", "R_FREE_FLAGS"});
|
||||
}
|
||||
const Column* rfree_column() const {
|
||||
return const_cast<Mtz*>(this)->rfree_column();
|
||||
}
|
||||
|
||||
Column* imean_column() {
|
||||
return column_with_type_and_any_of_labels('J', {"IMEAN", "I", "IOBS", "I-obs"});
|
||||
}
|
||||
const Column* imean_column() const {
|
||||
return const_cast<Mtz*>(this)->imean_column();
|
||||
}
|
||||
|
||||
Column* iplus_column() {
|
||||
return column_with_type_and_any_of_labels('K', {"I(+)", "IOBS(+)", "I-obs(+)", "Iplus"});
|
||||
}
|
||||
const Column* iplus_column() const {
|
||||
return const_cast<Mtz*>(this)->iplus_column();
|
||||
}
|
||||
|
||||
Column* iminus_column() {
|
||||
return column_with_type_and_any_of_labels('K', {"I(-)", "IOBS(-)", "I-obs(-)", "Iminus"});
|
||||
}
|
||||
const Column* iminus_column() const {
|
||||
return const_cast<Mtz*>(this)->iminus_column();
|
||||
}
|
||||
|
||||
bool has_data() const {
|
||||
return data.size() == columns.size() * nreflections;
|
||||
}
|
||||
|
||||
bool is_merged() const { return batches.empty(); }
|
||||
|
||||
/// Calculates min/max for all combinations of reflections and unit cells,
|
||||
/// where unit cells are a global CELL and per-dataset DCELL.
|
||||
std::array<double,2> calculate_min_max_1_d2() const;
|
||||
|
||||
void update_reso() {
|
||||
std::array<double,2> reso = calculate_min_max_1_d2();
|
||||
min_1_d2 = reso[0];
|
||||
max_1_d2 = reso[1];
|
||||
}
|
||||
|
||||
// Functions for reading MTZ headers and data.
|
||||
|
||||
void toggle_endianness() {
|
||||
same_byte_order = !same_byte_order;
|
||||
swap_eight_bytes(&header_offset);
|
||||
}
|
||||
|
||||
void read_first_bytes(AnyStream& stream);
|
||||
|
||||
/// read headers until END
|
||||
void read_main_headers(AnyStream& stream, std::vector<std::string>* save_headers);
|
||||
|
||||
/// read the part between END and MTZENDOFHEADERS
|
||||
void read_history_and_batch_headers(AnyStream& stream);
|
||||
|
||||
void setup_spacegroup();
|
||||
|
||||
void read_raw_data(AnyStream& stream, bool do_read=true);
|
||||
|
||||
void read_all_headers(AnyStream& stream);
|
||||
|
||||
void read_stream(AnyStream& stream, bool with_data);
|
||||
|
||||
void read_file(const std::string& path) {
|
||||
try {
|
||||
source_path = path;
|
||||
FileStream stream(path.c_str(), "rb");
|
||||
read_stream(stream, true);
|
||||
} catch (std::system_error&) {
|
||||
throw; // system_error::what() includes path, don't add anything
|
||||
} catch (std::runtime_error& e) {
|
||||
fail(std::string(e.what()) + ": " + path);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Input>
|
||||
void read_input(Input&& input, bool with_data) {
|
||||
source_path = input.path();
|
||||
read_stream(*input.create_stream(), with_data);
|
||||
}
|
||||
|
||||
/// the same as read_input(MaybeGzipped(path), with_data)
|
||||
void read_file_gz(const std::string& path, bool with_data=true);
|
||||
|
||||
std::vector<int> sorted_row_indices(int use_first=3) const;
|
||||
bool sort(int use_first=3);
|
||||
|
||||
Miller get_hkl(size_t offset) const {
|
||||
return {{(int)data[offset], (int)data[offset+1], (int)data[offset+2]}};
|
||||
}
|
||||
void set_hkl(size_t offset, const Miller& hkl) {
|
||||
for (int i = 0; i != 3; ++i)
|
||||
data[offset + i] = static_cast<float>(hkl[i]);
|
||||
}
|
||||
|
||||
/// Returns offset of the first hkl or (size_t)-1. Can be slow.
|
||||
size_t find_offset_of_hkl(const Miller& hkl, size_t start=0) const;
|
||||
|
||||
/// (for merged MTZ only) change HKL to ASU equivalent, adjust phases, etc
|
||||
void ensure_asu(bool tnt_asu=false);
|
||||
|
||||
/// Reindex data, usually followed by ensure_asu(). Outputs messages through logger.
|
||||
void reindex(const Op& op);
|
||||
|
||||
/// Change symmetry to P1 and expand reflections. Does not sort.
|
||||
/// Similar to command EXPAND in SFTOOLS.
|
||||
void expand_to_p1();
|
||||
|
||||
/// (for unmerged MTZ only) change HKL according to M/ISYM
|
||||
bool switch_to_original_hkl();
|
||||
|
||||
/// (for unmerged MTZ only) change HKL to ASU equivalent and set ISYM
|
||||
bool switch_to_asu_hkl();
|
||||
|
||||
Dataset& add_dataset(const std::string& name) {
|
||||
int id = 0;
|
||||
for (const Dataset& d : datasets)
|
||||
if (d.id >= id)
|
||||
id = d.id + 1;
|
||||
datasets.push_back({id, name, name, name, cell, 0.0});
|
||||
return datasets.back();
|
||||
}
|
||||
|
||||
Column& add_column(const std::string& label, char type,
|
||||
int dataset_id, int pos, bool expand_data);
|
||||
|
||||
// extra_col are columns right after src_col that are also copied.
|
||||
Column& replace_column(size_t dest_idx, const Column& src_col,
|
||||
const std::vector<std::string>& trailing_cols={});
|
||||
|
||||
// If dest_idx < 0 - columns are appended at the end
|
||||
// append new column(s), otherwise overwrite existing ones.
|
||||
Column& copy_column(int dest_idx, const Column& src_col,
|
||||
const std::vector<std::string>& trailing_cols={});
|
||||
|
||||
void remove_column(size_t idx);
|
||||
|
||||
template <typename Func>
|
||||
void remove_rows_if(Func condition) {
|
||||
if (!has_data())
|
||||
fail("No data.");
|
||||
auto out = data.begin();
|
||||
size_t width = columns.size();
|
||||
for (auto r = data.begin(); r < data.end(); r += width)
|
||||
if (!condition(&*r)) {
|
||||
if (r != out)
|
||||
std::copy(r, r + width, out);
|
||||
out += width;
|
||||
}
|
||||
data.erase(out, data.end());
|
||||
nreflections = int(data.size() / width);
|
||||
}
|
||||
|
||||
void expand_data_rows(size_t added, int pos_=-1) {
|
||||
size_t old_row_size = columns.size() - added;
|
||||
if (data.size() != old_row_size * nreflections)
|
||||
fail("Internal error");
|
||||
size_t pos = pos_ == -1 ? old_row_size : (size_t) pos_;
|
||||
if (pos > old_row_size)
|
||||
fail("expand_data_rows(): pos out of range");
|
||||
vector_insert_columns(data, old_row_size, (size_t)nreflections, added, pos, NAN);
|
||||
}
|
||||
|
||||
void set_data(const float* new_data, size_t n) {
|
||||
size_t ncols = columns.size();
|
||||
if (n % ncols != 0)
|
||||
fail("Mtz.set_data(): expected " + std::to_string(ncols) + " columns.");
|
||||
nreflections = int(n / ncols);
|
||||
data.assign(new_data, new_data + n);
|
||||
}
|
||||
|
||||
// Function for writing MTZ file
|
||||
void write_to_cstream(std::FILE* stream) const;
|
||||
void write_to_string(std::string& str) const;
|
||||
void write_to_file(const std::string& path) const;
|
||||
size_t size_to_write() const;
|
||||
size_t write_to_buffer(char* buf, size_t maxlen) const;
|
||||
|
||||
private:
|
||||
template<typename Write> void write_to_stream(Write write) const;
|
||||
};
|
||||
|
||||
|
||||
inline Mtz read_mtz_file(const std::string& path) {
|
||||
Mtz mtz;
|
||||
mtz.read_file(path);
|
||||
return mtz;
|
||||
}
|
||||
|
||||
template<typename Input>
|
||||
Mtz read_mtz(Input&& input, bool with_data) {
|
||||
Mtz mtz;
|
||||
mtz.read_input(std::forward<Input>(input), with_data);
|
||||
return mtz;
|
||||
}
|
||||
|
||||
// Abstraction of data source, cf. ReflnDataProxy.
|
||||
struct MtzDataProxy {
|
||||
const Mtz& mtz_;
|
||||
size_t stride() const { return mtz_.columns.size(); }
|
||||
size_t size() const { return mtz_.data.size(); }
|
||||
using num_type = float;
|
||||
float get_num(size_t n) const { return mtz_.data[n]; }
|
||||
const UnitCell& unit_cell() const { return mtz_.cell; }
|
||||
const SpaceGroup* spacegroup() const { return mtz_.spacegroup; }
|
||||
Miller get_hkl(size_t offset) const { return mtz_.get_hkl(offset); }
|
||||
|
||||
size_t column_index(const std::string& label) const {
|
||||
if (const Mtz::Column* col = mtz_.column_with_label(label))
|
||||
return col->idx;
|
||||
fail("MTZ file has no column with label: " + label);
|
||||
}
|
||||
};
|
||||
|
||||
// Like above, but here the data is stored outside of the Mtz class
|
||||
struct MtzExternalDataProxy : MtzDataProxy {
|
||||
const float* data_;
|
||||
MtzExternalDataProxy(const Mtz& mtz, const float* data)
|
||||
: MtzDataProxy{mtz}, data_(data) {}
|
||||
size_t size() const { return mtz_.columns.size() * mtz_.nreflections; }
|
||||
float get_num(size_t n) const { return data_[n]; }
|
||||
Miller get_hkl(size_t offset) const {
|
||||
return {{(int)data_[offset + 0],
|
||||
(int)data_[offset + 1],
|
||||
(int)data_[offset + 2]}};
|
||||
}
|
||||
};
|
||||
|
||||
inline MtzDataProxy data_proxy(const Mtz& mtz) { return {mtz}; }
|
||||
|
||||
} // namespace gemmi
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,80 @@
|
||||
// Copyright 2017 Global Phasing Ltd.
|
||||
//
|
||||
// interface to stb_sprintf: snprintf_z, to_str(float|double)
|
||||
|
||||
#ifndef GEMMI_SPRINTF_HPP_
|
||||
#define GEMMI_SPRINTF_HPP_
|
||||
|
||||
#include <string>
|
||||
#ifdef __has_include
|
||||
# if __has_include(<charconv>) && !(defined(_MSVC_LANG) && _MSVC_LANG < 201703L)
|
||||
# include <charconv>
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if __cpp_lib_to_chars < 201611L
|
||||
# include <algorithm> // for min
|
||||
#endif
|
||||
|
||||
#include "fail.hpp" // for GEMMI_DLL
|
||||
|
||||
namespace gemmi {
|
||||
|
||||
// On MinGW format(printf) doesn't support %zu.
|
||||
#if (defined(__GNUC__) && !defined(__MINGW32__)) || defined(__clang__)
|
||||
# define GEMMI_ATTRIBUTE_FORMAT(fmt,va) __attribute__((format(printf,fmt,va)))
|
||||
#else
|
||||
# define GEMMI_ATTRIBUTE_FORMAT(fmt,va)
|
||||
#endif
|
||||
/// stb_snprintf in gemmi namespace - like snprintf, but ignores locale
|
||||
/// and is always zero-terminated (hence _z).
|
||||
GEMMI_DLL int snprintf_z(char *buf, int count, char const *fmt, ...)
|
||||
GEMMI_ATTRIBUTE_FORMAT(3,4);
|
||||
/// stb_sprintf in gemmi namespace
|
||||
GEMMI_DLL int sprintf_z(char *buf, char const *fmt, ...) GEMMI_ATTRIBUTE_FORMAT(2,3);
|
||||
|
||||
inline std::string to_str(double d) {
|
||||
char buf[24];
|
||||
int len = sprintf_z(buf, "%.9g", d);
|
||||
return std::string(buf, len > 0 ? len : 0);
|
||||
}
|
||||
|
||||
inline std::string to_str(float d) {
|
||||
char buf[16];
|
||||
int len = sprintf_z(buf, "%.6g", d);
|
||||
return std::string(buf, len > 0 ? len : 0);
|
||||
}
|
||||
|
||||
template<int Prec>
|
||||
std::string to_str_prec(double d) {
|
||||
static_assert(Prec >= 0 && Prec < 7, "unsupported precision");
|
||||
char buf[16];
|
||||
int len = d > -1e8 && d < 1e8 ? sprintf_z(buf, "%.*f", Prec, d)
|
||||
: sprintf_z(buf, "%g", d);
|
||||
return std::string(buf, len > 0 ? len : 0);
|
||||
}
|
||||
|
||||
/// zero-terminated to_chars()
|
||||
inline char* to_chars_z(char* first, char* last, int value) {
|
||||
#if __cpp_lib_to_chars >= 201611L
|
||||
auto result = std::to_chars(first, last-1, value);
|
||||
*result.ptr = '\0';
|
||||
return result.ptr;
|
||||
#else
|
||||
int n = snprintf_z(first, int(last - first), "%d", value);
|
||||
return std::min(first + n, last - 1);
|
||||
#endif
|
||||
}
|
||||
inline char* to_chars_z(char* first, char* last, size_t value) {
|
||||
#if __cpp_lib_to_chars >= 201611L
|
||||
auto result = std::to_chars(first, last-1, value);
|
||||
*result.ptr = '\0';
|
||||
return result.ptr;
|
||||
#else
|
||||
int n = snprintf_z(first, int(last - first), "%zu", value);
|
||||
return std::min(first + n, last - 1);
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace gemmi
|
||||
#endif
|
||||
+4933
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,315 @@
|
||||
// Copyright 2017 Global Phasing Ltd.
|
||||
//
|
||||
// Utilities. Mostly for working with strings and vectors.
|
||||
|
||||
#ifndef GEMMI_UTIL_HPP_
|
||||
#define GEMMI_UTIL_HPP_
|
||||
|
||||
#include <cassert>
|
||||
#include <cctype> // for isspace
|
||||
#include <cstring> // for strncmp
|
||||
#include <algorithm> // for equal, find, remove_if
|
||||
#include <iterator> // for begin, end, make_move_iterator
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace gemmi {
|
||||
|
||||
// ##### string helpers #####
|
||||
|
||||
inline void append_to_str(std::string& out, int v) { out += std::to_string(v); }
|
||||
inline void append_to_str(std::string& out, size_t v) { out += std::to_string(v); }
|
||||
void append_to_str(std::string& out, double) = delete;
|
||||
template<typename T>
|
||||
void append_to_str(std::string& out, const T& v) { out += v; }
|
||||
|
||||
inline void cat_to(std::string&) {}
|
||||
template <typename T, typename... Args>
|
||||
void cat_to(std::string& out, const T& value, Args const&... args) {
|
||||
append_to_str(out, value);
|
||||
cat_to(out, args...);
|
||||
}
|
||||
template <class... Args>
|
||||
std::string cat(Args const&... args) {
|
||||
std::string out;
|
||||
cat_to(out, args...);
|
||||
return out;
|
||||
}
|
||||
|
||||
inline bool starts_with(const std::string& str, const std::string& prefix) {
|
||||
size_t sl = prefix.length();
|
||||
return str.length() >= sl && str.compare(0, sl, prefix) == 0;
|
||||
}
|
||||
|
||||
template<size_t N> bool starts_with(const char* a, const char (&b)[N]) {
|
||||
return std::strncmp(a, b, N-1) == 0;
|
||||
}
|
||||
|
||||
inline bool ends_with(const std::string& str, const std::string& suffix) {
|
||||
size_t sl = suffix.length();
|
||||
return str.length() >= sl && str.compare(str.length() - sl, sl, suffix) == 0;
|
||||
}
|
||||
|
||||
// can be faster than std::tolower() b/c it takes char not int
|
||||
inline char lower(char c) {
|
||||
if (c >= 'A' && c <= 'Z')
|
||||
return c | 0x20;
|
||||
return c;
|
||||
}
|
||||
|
||||
// works as expected only for a-zA-Z
|
||||
inline char alpha_up(char c) { return c & ~0x20; }
|
||||
|
||||
inline std::string to_lower(std::string str) {
|
||||
for (char& c : str)
|
||||
if (c >= 'A' && c <= 'Z')
|
||||
c |= 0x20;
|
||||
return str;
|
||||
}
|
||||
|
||||
inline std::string to_upper(std::string str) {
|
||||
for (char& c : str)
|
||||
if (c >= 'a' && c <= 'z')
|
||||
c &= ~0x20;
|
||||
return str;
|
||||
}
|
||||
|
||||
// case-insensitive character comparison
|
||||
inline bool isame(char a, char b) {
|
||||
return a == b || ((a^b) == 0x20 && (a|0x20) >= 'a' && (a|0x20) <= 'z');
|
||||
}
|
||||
|
||||
// Case-insensitive comparisons. The second arg must be lowercase.
|
||||
|
||||
inline bool iequal_from(const std::string& str, size_t offset, const std::string& low) {
|
||||
return str.length() == low.length() + offset &&
|
||||
std::equal(std::begin(low), std::end(low), str.begin() + offset,
|
||||
[](char c1, char c2) { return c1 == lower(c2); });
|
||||
}
|
||||
|
||||
inline bool iequal(const std::string& str, const std::string& low) {
|
||||
return iequal_from(str, 0, low);
|
||||
}
|
||||
|
||||
inline bool istarts_with(const std::string& str, const std::string& prefix) {
|
||||
return str.length() >= prefix.length() &&
|
||||
std::equal(std::begin(prefix), std::end(prefix), str.begin(),
|
||||
[](char c1, char c2) { return c1 == lower(c2); });
|
||||
}
|
||||
inline bool iends_with(const std::string& str, const std::string& suffix) {
|
||||
size_t sl = suffix.length();
|
||||
return str.length() >= sl &&
|
||||
std::equal(std::begin(suffix), std::end(suffix), str.end() - sl,
|
||||
[](char c1, char c2) { return c1 == lower(c2); });
|
||||
}
|
||||
|
||||
inline bool giends_with(const std::string& str, const std::string& suffix) {
|
||||
return iends_with(str, suffix) || iends_with(str, suffix + ".gz");
|
||||
}
|
||||
|
||||
inline std::string trim_str(const std::string& str) {
|
||||
const std::string ws = " \r\n\t";
|
||||
std::string::size_type first = str.find_first_not_of(ws);
|
||||
if (first == std::string::npos)
|
||||
return std::string{};
|
||||
std::string::size_type last = str.find_last_not_of(ws);
|
||||
return str.substr(first, last - first + 1);
|
||||
}
|
||||
|
||||
inline std::string rtrim_str(const std::string& str) {
|
||||
std::string::size_type last = str.find_last_not_of(" \r\n\t");
|
||||
return str.substr(0, last == std::string::npos ? 0 : last + 1);
|
||||
}
|
||||
|
||||
// end is after the last character of the string (typically \0)
|
||||
inline const char* rtrim_cstr(const char* start, const char* end=nullptr) {
|
||||
if (!start)
|
||||
return nullptr;
|
||||
if (!end) {
|
||||
end = start;
|
||||
while (*end != '\0')
|
||||
++end;
|
||||
}
|
||||
while (end > start && std::isspace(end[-1]))
|
||||
--end;
|
||||
return end;
|
||||
}
|
||||
|
||||
namespace impl {
|
||||
inline size_t length(char) { return 1; }
|
||||
inline size_t length(const std::string& s) { return s.length(); }
|
||||
}
|
||||
|
||||
// takes a single separator (usually char or string);
|
||||
// may return empty fields
|
||||
template<typename S>
|
||||
void split_str_into(const std::string& str, S sep,
|
||||
std::vector<std::string>& result) {
|
||||
std::size_t start = 0, end;
|
||||
while ((end = str.find(sep, start)) != std::string::npos) {
|
||||
result.emplace_back(str, start, end - start);
|
||||
start = end + impl::length(sep);
|
||||
}
|
||||
result.emplace_back(str, start);
|
||||
}
|
||||
|
||||
template<typename S>
|
||||
std::vector<std::string> split_str(const std::string& str, S sep) {
|
||||
std::vector<std::string> result;
|
||||
split_str_into(str, sep, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
// _multi variants takes multiple 1-char separators as a string;
|
||||
// discards empty fields
|
||||
inline void split_str_into_multi(const std::string& str, const char* seps,
|
||||
std::vector<std::string>& result) {
|
||||
std::size_t start = str.find_first_not_of(seps);
|
||||
while (start != std::string::npos) {
|
||||
std::size_t end = str.find_first_of(seps, start);
|
||||
result.emplace_back(str, start, end - start);
|
||||
start = str.find_first_not_of(seps, end);
|
||||
}
|
||||
}
|
||||
|
||||
inline std::vector<std::string> split_str_multi(const std::string& str,
|
||||
const char* seps=" \t") {
|
||||
std::vector<std::string> result;
|
||||
split_str_into_multi(str, seps, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename T, typename S, typename F>
|
||||
std::string join_str(T begin, T end, const S& sep, const F& getter) {
|
||||
std::string r;
|
||||
bool first = true;
|
||||
for (T i = begin; i != end; ++i) {
|
||||
if (!first)
|
||||
r += sep;
|
||||
r += getter(*i);
|
||||
first = false;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
template<typename T, typename S>
|
||||
std::string join_str(T begin, T end, const S& sep) {
|
||||
return join_str(begin, end, sep, [](const std::string& t) { return t; });
|
||||
}
|
||||
|
||||
template<typename T, typename S, typename F>
|
||||
std::string join_str(const T& iterable, const S& sep, const F& getter) {
|
||||
return join_str(iterable.begin(), iterable.end(), sep, getter);
|
||||
}
|
||||
|
||||
template<typename T, typename S>
|
||||
std::string join_str(const T& iterable, const S& sep) {
|
||||
return join_str(iterable.begin(), iterable.end(), sep);
|
||||
}
|
||||
|
||||
template<typename T, typename S>
|
||||
void string_append_sep(std::string& str, S sep, const T& item) {
|
||||
if (!str.empty())
|
||||
str += sep;
|
||||
str += item;
|
||||
}
|
||||
|
||||
inline void replace_all(std::string &s,
|
||||
const std::string &old, const std::string &new_) {
|
||||
std::string::size_type pos = 0;
|
||||
while ((pos = s.find(old, pos)) != std::string::npos) {
|
||||
s.replace(pos, old.size(), new_);
|
||||
pos += new_.size();
|
||||
}
|
||||
}
|
||||
|
||||
// list is a comma separated string
|
||||
inline bool is_in_list(const std::string& name, const std::string& list,
|
||||
char sep=',') {
|
||||
if (name.length() >= list.length())
|
||||
return name == list;
|
||||
for (size_t start=0, end=0; end != std::string::npos; start=end+1) {
|
||||
end = list.find(sep, start);
|
||||
if (list.compare(start, end - start, name) == 0)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// ##### vector helpers #####
|
||||
|
||||
template <class T>
|
||||
bool in_vector(const T& x, const std::vector<T>& v) {
|
||||
return std::find(v.begin(), v.end(), x) != v.end();
|
||||
}
|
||||
|
||||
template <typename F, typename T>
|
||||
bool in_vector_f(F f, const std::vector<T>& v) {
|
||||
return std::find_if(v.begin(), v.end(), f) != v.end();
|
||||
}
|
||||
|
||||
template <class T>
|
||||
T* vector_end_ptr(std::vector<T>& v) { return v.data() + v.size(); }
|
||||
template <class T>
|
||||
const T* vector_end_ptr(const std::vector<T>& v) { return v.data() + v.size(); }
|
||||
|
||||
template <class T>
|
||||
void vector_move_extend(std::vector<T>& dst, std::vector<T>&& src) {
|
||||
if (dst.empty())
|
||||
dst = std::move(src);
|
||||
else
|
||||
dst.insert(dst.end(), std::make_move_iterator(src.begin()),
|
||||
std::make_move_iterator(src.end()));
|
||||
}
|
||||
|
||||
// wrapper around the erase-remove idiom
|
||||
template <class T, typename F>
|
||||
void vector_remove_if(std::vector<T>& v, F&& condition) {
|
||||
v.erase(std::remove_if(v.begin(), v.end(), condition), v.end());
|
||||
}
|
||||
|
||||
/// \par data - 2d array (old_width x length) in a vector
|
||||
/// Insert \par n new columns at position pos.
|
||||
template <class T>
|
||||
void vector_insert_columns(std::vector<T>& data, size_t old_width,
|
||||
size_t length, size_t n, size_t pos, const T& new_value) {
|
||||
assert(data.size() == old_width * length);
|
||||
assert(pos <= old_width);
|
||||
data.resize(data.size() + n * length);
|
||||
typename std::vector<T>::iterator dst = data.end();
|
||||
for (size_t i = length; i-- != 0; ) {
|
||||
for (size_t j = old_width; j-- != pos; )
|
||||
*--dst = data[i * old_width + j];
|
||||
for (size_t j = n; j-- != 0; )
|
||||
*--dst = new_value;
|
||||
for (size_t j = pos; j-- != 0; )
|
||||
*--dst = data[i * old_width + j];
|
||||
}
|
||||
assert(dst == data.begin());
|
||||
}
|
||||
/// \par data - 2d array with new_width+1 columns, in a vector
|
||||
/// Remove column at position pos.
|
||||
template <class T>
|
||||
void vector_remove_column(std::vector<T>& data, size_t new_width, size_t pos) {
|
||||
assert(pos <= new_width);
|
||||
for (size_t source = pos + 1; source < data.size(); ++source)
|
||||
for (size_t i = 0; i < new_width && source < data.size(); ++i)
|
||||
data[pos++] = data[source++];
|
||||
data.resize(pos);
|
||||
}
|
||||
|
||||
|
||||
// ##### other helpers #####
|
||||
|
||||
// Numeric ID used for case-insensitive comparison of 4 letters.
|
||||
// s must have 4 chars or 3 chars + NUL, ' ' and NUL are equivalent in s.
|
||||
constexpr int ialpha4_id(const char* s) {
|
||||
return (s[0] << 24 | s[1] << 16 | s[2] << 8 | s[3]) & ~0x20202020;
|
||||
}
|
||||
// Numeric ID used for case-insensitive comparison of 3 letters.
|
||||
constexpr int ialpha3_id(const char* s) {
|
||||
return (s[0] << 16 | s[1] << 8 | s[2]) & ~0x20202020;
|
||||
}
|
||||
|
||||
} // namespace gemmi
|
||||
#endif
|
||||
@@ -0,0 +1,183 @@
|
||||
// Copyright 2020 Global Phasing Ltd.
|
||||
//
|
||||
// Read XDS files: XDS_ASCII.HKL and INTEGRATE.HKL.
|
||||
|
||||
#ifndef GEMMI_XDS_ASCII_HPP_
|
||||
#define GEMMI_XDS_ASCII_HPP_
|
||||
|
||||
#include "input.hpp" // for AnyStream, FileStream
|
||||
#include "unitcell.hpp" // for UnitCell
|
||||
#include "util.hpp" // for starts_with
|
||||
|
||||
namespace gemmi {
|
||||
|
||||
// from Pointless docs: likely in-house source, in which case
|
||||
// the unpolarised value is left unchanged (recognised wavelengths
|
||||
// are CuKalpha 1.5418 +- 0.0019, Mo 0.7107 +- 0.0002, Cr 2.29 +- 0.01)
|
||||
inline bool likely_in_house_source(double wavelength) {
|
||||
return std::fabs(wavelength - 1.5418) < 0.0019 ||
|
||||
std::fabs(wavelength - 0.7107) < 0.0002 ||
|
||||
std::fabs(wavelength - 2.29) < 0.01;
|
||||
}
|
||||
|
||||
struct XdsAsciiMetadata {
|
||||
struct Iset {
|
||||
int id;
|
||||
std::string input_file;
|
||||
double wavelength = 0.;
|
||||
std::array<double,6> cell_constants = {0., 0., 0., 0., 0., 0.};
|
||||
//statistics set by gather_iset_statistics()
|
||||
int frame_number_min = -1;
|
||||
int frame_number_max = -1;
|
||||
int frame_count = -1;
|
||||
int reflection_count = -1;
|
||||
|
||||
Iset(int id_) : id(id_) {}
|
||||
};
|
||||
std::string source_path;
|
||||
int read_columns = 0; // doesn't include ITEM_ISET from XSCALE
|
||||
int spacegroup_number = 0;
|
||||
double wavelength = 0.;
|
||||
std::array<double,6> cell_constants = {0., 0., 0., 0., 0., 0.};
|
||||
Mat33 cell_axes{0.};
|
||||
Vec3 incident_beam_dir;
|
||||
double oscillation_range = 0.;
|
||||
Vec3 rotation_axis;
|
||||
double starting_angle = 0.;
|
||||
double reflecting_range_esd = 0.;
|
||||
char friedels_law = '\0';
|
||||
int starting_frame = 1;
|
||||
int nx = 0; // detector size - number of pixels
|
||||
int ny = 0;
|
||||
double qx = 0.; // pixel size in mm
|
||||
double qy = 0.;
|
||||
double orgx = 0.;
|
||||
double orgy = 0.;
|
||||
double detector_distance = 0.;
|
||||
std::string generated_by;
|
||||
std::string version_str;
|
||||
std::vector<Iset> isets;
|
||||
};
|
||||
|
||||
struct GEMMI_DLL XdsAscii : XdsAsciiMetadata {
|
||||
struct Refl {
|
||||
Miller hkl;
|
||||
int iset = 1;
|
||||
double iobs;
|
||||
double sigma;
|
||||
double xd;
|
||||
double yd;
|
||||
double zd;
|
||||
double rlp;
|
||||
double peak;
|
||||
double corr; // is it always integer?
|
||||
double maxc;
|
||||
|
||||
// ZD can be negative for a few reflections
|
||||
int frame() const { return (int) std::floor(zd + 1); }
|
||||
};
|
||||
std::vector<Refl> data;
|
||||
|
||||
XdsAscii() = default;
|
||||
XdsAscii(const XdsAsciiMetadata& m) : XdsAsciiMetadata(m) {}
|
||||
|
||||
Iset& find_or_add_iset(int id) {
|
||||
for (Iset& i : isets)
|
||||
if (i.id == id)
|
||||
return i;
|
||||
isets.emplace_back(id);
|
||||
return isets.back();
|
||||
}
|
||||
void read_stream(AnyStream& reader, const std::string& source);
|
||||
|
||||
template<typename T>
|
||||
void read_input(T&& input) {
|
||||
read_stream(*input.create_stream(), input.path());
|
||||
}
|
||||
|
||||
bool is_merged() const { return read_columns < 8; }
|
||||
|
||||
// set a few Iset properties in isets
|
||||
void gather_iset_statistics();
|
||||
|
||||
double rot_angle(const Refl& refl) const {
|
||||
double z = refl.zd - starting_frame + 1;
|
||||
return starting_angle + oscillation_range * z;
|
||||
}
|
||||
|
||||
// it's already normalized, but just in case normalize it again
|
||||
Vec3 get_rotation_axis() const {
|
||||
double length = rotation_axis.length();
|
||||
if (length == 0)
|
||||
fail("unknown rotation axis");
|
||||
return rotation_axis / length;
|
||||
}
|
||||
|
||||
// I'm not sure if always |incident_beam_dir| == 1/wavelength
|
||||
Vec3 get_s0_direction() const {
|
||||
double length = incident_beam_dir.length();
|
||||
if (length == 0)
|
||||
fail("unknown incident beam direction");
|
||||
return incident_beam_dir / length;
|
||||
}
|
||||
|
||||
bool has_cell_axes() const {
|
||||
for (int i = 0; i < 3; ++i)
|
||||
if (cell_axes[i][0] == 0 && cell_axes[i][1] == 0 && cell_axes[i][2] == 0)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Return transition matrix from "Cambridge" frame to XDS frame.
|
||||
/// x_xds = M x_cam
|
||||
Mat33 calculate_conversion_from_cambridge() const {
|
||||
// Cambridge z direction is along the principal rotation axis
|
||||
Vec3 z = get_rotation_axis();
|
||||
// Cambridge z direction is along beam
|
||||
Vec3 x = get_s0_direction();
|
||||
Vec3 y = z.cross(x).normalized();
|
||||
// beam and rotation axis may not be orthogonal
|
||||
x = y.cross(z).normalized();
|
||||
return Mat33::from_columns(x, y, z);
|
||||
}
|
||||
|
||||
Mat33 get_orientation() const {
|
||||
if (!has_cell_axes())
|
||||
fail("unknown unit cell axes");
|
||||
Vec3 a = cell_axes.row_copy(0);
|
||||
Vec3 b = cell_axes.row_copy(1);
|
||||
Vec3 c = cell_axes.row_copy(2);
|
||||
Vec3 ar = b.cross(c).normalized();
|
||||
Vec3 br = c.cross(a);
|
||||
Vec3 cr = ar.cross(br).normalized();
|
||||
br = cr.cross(ar);
|
||||
return Mat33::from_columns(ar, br, cr);
|
||||
}
|
||||
|
||||
/// \par p is degree of polarization from range (0,1), as used in XDS.
|
||||
void apply_polarization_correction(double p, Vec3 normal);
|
||||
|
||||
/// \par overload is maximally allowed pixel value in a peak (MAXC).
|
||||
void eliminate_overloads(double overload) {
|
||||
vector_remove_if(data, [&](Refl& r) { return r.maxc > overload; });
|
||||
}
|
||||
|
||||
/// \par batchmin lowest allowed batch number.
|
||||
void eliminate_batchmin(int batchmin) {
|
||||
double minz = batchmin - 1;
|
||||
vector_remove_if(data, [&](Refl& r) { return r.zd < minz; });
|
||||
}
|
||||
};
|
||||
|
||||
inline XdsAscii read_xds_ascii_file(const std::string& path) {
|
||||
XdsAscii ret;
|
||||
FileStream stream(path.c_str(), "rb");
|
||||
ret.read_stream(stream, path);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/// read possibly gzipped file
|
||||
GEMMI_DLL XdsAscii read_xds_ascii(const std::string& path);
|
||||
|
||||
} // namespace gemmi
|
||||
#endif
|
||||
@@ -0,0 +1,189 @@
|
||||
// Copyright Global Phasing Ltd.
|
||||
|
||||
#include <gemmi/gz.hpp>
|
||||
#include <cassert>
|
||||
#include <cstdio> // fseek, ftell, fread
|
||||
#include <climits> // INT_MAX
|
||||
#if USE_ZLIB_NG
|
||||
# define WITH_GZFILEOP 1
|
||||
# include <zlib-ng.h>
|
||||
# define GG(name) zng_ ## name
|
||||
#else
|
||||
# include <zlib.h>
|
||||
# define GG(name) name
|
||||
#endif
|
||||
#include <gemmi/fileutil.hpp> // file_open
|
||||
|
||||
namespace gemmi {
|
||||
|
||||
const char* const zlib_description =
|
||||
#if USE_ZLIB_NG
|
||||
"zlib-ng " ZLIBNG_VERSION;
|
||||
#else
|
||||
"zlib " ZLIB_VERSION;
|
||||
#endif
|
||||
|
||||
// Throws if the size is not found or if it is suspicious.
|
||||
// Anything outside of the arbitrary limits from 1 to 10x of the compressed
|
||||
// size looks suspicious to us.
|
||||
// **This function should not be relied upon.**
|
||||
// In particular, if the return values is >= 4GiB - it's only a guess.
|
||||
size_t estimate_uncompressed_size(const std::string& path) {
|
||||
fileptr_t f = file_open(path.c_str(), "rb");
|
||||
unsigned char buf[4];
|
||||
if (std::fread(buf, 1, 2, f.get()) != 2)
|
||||
sys_fail("Failed to read: " + path);
|
||||
if (buf[0] != 0x1f || buf[1] != 0x8b)
|
||||
fail("File not in the gzip format: " + path);
|
||||
if (std::fseek(f.get(), -4, SEEK_END) != 0)
|
||||
sys_fail("fseek() failed (empty file?): " + path);
|
||||
long pos = std::ftell(f.get());
|
||||
if (pos <= 0)
|
||||
sys_fail("ftell() failed on " + path);
|
||||
size_t gzipped_size = pos + 4;
|
||||
if (std::fread(buf, 1, 4, f.get()) != 4)
|
||||
sys_fail("Failed to read last 4 bytes of: " + path);
|
||||
unsigned orig_size = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0];
|
||||
if (orig_size + 100 < gzipped_size || orig_size > 100 * gzipped_size) {
|
||||
// The size is stored as 32-bit number. If the original size exceeds 4GiB,
|
||||
// the stored number is modulo 4 GiB. So we just guess...
|
||||
constexpr size_t max_uint = 4294967295U;
|
||||
if (gzipped_size > max_uint / 6)
|
||||
return max_uint + (sizeof(size_t) > 4 ? orig_size : 0);
|
||||
fail("Cannot determine uncompressed size of " + path +
|
||||
"\nWould it be " + std::to_string(gzipped_size) + " -> " +
|
||||
std::to_string(orig_size) + " bytes?");
|
||||
}
|
||||
return orig_size;
|
||||
}
|
||||
|
||||
static size_t big_gzread(gzFile file, void* buf, size_t len) {
|
||||
#if USE_ZLIB_NG
|
||||
return GG(gzfread)(buf, 1, len, file);
|
||||
#else
|
||||
// In zlib >= 1.2.9 we could use gzfread()
|
||||
size_t read_bytes = 0;
|
||||
while (len > INT_MAX) {
|
||||
int ret = gzread(file, buf, INT_MAX);
|
||||
read_bytes += ret;
|
||||
if (ret != INT_MAX)
|
||||
return read_bytes;
|
||||
len -= INT_MAX;
|
||||
buf = (char*) buf + INT_MAX;
|
||||
}
|
||||
read_bytes += gzread(file, buf, (unsigned) len);
|
||||
return read_bytes;
|
||||
#endif
|
||||
}
|
||||
|
||||
char* GzStream::gets(char* line, int size) {
|
||||
return GG(gzgets)((gzFile)f, line, size);
|
||||
}
|
||||
|
||||
int GzStream::getc() {
|
||||
return GG(gzgetc)((gzFile)f);
|
||||
}
|
||||
|
||||
bool GzStream::read(void* buf, size_t len) {
|
||||
return big_gzread((gzFile)f, buf, len) == len;
|
||||
}
|
||||
|
||||
bool GzStream::skip(size_t n) {
|
||||
return GG(gzseek)((gzFile)f, n, SEEK_CUR) != -1;
|
||||
}
|
||||
|
||||
long GzStream::tell() {
|
||||
return GG(gztell)((gzFile)f);
|
||||
}
|
||||
|
||||
std::string GzStream::read_rest() {
|
||||
std::string retval;
|
||||
int c = getc();
|
||||
if (c != EOF) {
|
||||
retval += (char)c;
|
||||
char buf[512];
|
||||
for (;;) {
|
||||
size_t n = big_gzread((gzFile)f, buf, sizeof(buf));
|
||||
retval.append(buf, n);
|
||||
if (n != sizeof(buf))
|
||||
break;
|
||||
}
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
||||
MaybeGzipped::MaybeGzipped(const std::string& path) : BasicInput(path) {}
|
||||
|
||||
MaybeGzipped::~MaybeGzipped() {
|
||||
if (file_)
|
||||
#if USE_ZLIB_NG || (ZLIB_VERNUM >= 0x1235)
|
||||
GG(gzclose_r)((gzFile)file_);
|
||||
#else
|
||||
gzclose((gzFile)file_);
|
||||
#endif
|
||||
}
|
||||
|
||||
size_t MaybeGzipped::gzread_checked(void* buf, size_t len) {
|
||||
gzFile file = (gzFile) file_;
|
||||
size_t read_bytes = big_gzread(file, buf, len);
|
||||
if (read_bytes != len && !GG(gzeof)(file)) {
|
||||
int errnum = 0;
|
||||
std::string err_str = GG(gzerror)(file, &errnum);
|
||||
if (errnum == Z_ERRNO)
|
||||
sys_fail("failed to read " + path());
|
||||
if (errnum)
|
||||
fail("Error reading " + path() + ": " + err_str);
|
||||
}
|
||||
if (read_bytes > len) // should never happen
|
||||
fail("Error reading " + path());
|
||||
return read_bytes;
|
||||
}
|
||||
|
||||
CharArray MaybeGzipped::uncompress_into_buffer(size_t limit) {
|
||||
if (!is_compressed())
|
||||
return BasicInput::uncompress_into_buffer();
|
||||
size_t size = (limit == 0 ? estimate_uncompressed_size(path()) : limit);
|
||||
file_ = GG(gzopen)(path().c_str(), "rb");
|
||||
if (!file_)
|
||||
sys_fail("Failed to gzopen " + path());
|
||||
if (size > 3221225471)
|
||||
// if this exception is changed adjust prog/cif2mtz.cpp
|
||||
fail("For now gz files above 3 GiB uncompressed are not supported.\n"
|
||||
"To read " + path() + " first uncompress it.");
|
||||
CharArray mem(size);
|
||||
size_t read_bytes = gzread_checked(mem.data(), size);
|
||||
// if the file is shorter than the size from header, adjust size
|
||||
if (read_bytes < size) {
|
||||
mem.set_size(read_bytes); // should we call resize() here
|
||||
} else if (limit == 0) { // read_bytes == size
|
||||
// if the file is longer than the size from header, read in the rest
|
||||
int next_char;
|
||||
while (!GG(gzeof)((gzFile)file_) && (next_char = GG(gzgetc)((gzFile)file_)) != -1) {
|
||||
if (mem.size() > 3221225471)
|
||||
fail("For now gz files above 3 GiB uncompressed are not supported.\n"
|
||||
"To read " + path() + " first uncompress it.");
|
||||
GG(gzungetc)(next_char, (gzFile)file_);
|
||||
size_t old_size = mem.size();
|
||||
mem.resize(2 * old_size);
|
||||
size_t n = gzread_checked(mem.data() + old_size, old_size);
|
||||
mem.set_size(old_size + n);
|
||||
}
|
||||
}
|
||||
return mem;
|
||||
}
|
||||
|
||||
std::unique_ptr<AnyStream> MaybeGzipped::create_stream() {
|
||||
if (is_compressed()) {
|
||||
file_ = GG(gzopen)(path().c_str(), "rb");
|
||||
if (!file_)
|
||||
sys_fail("Failed to gzopen " + path());
|
||||
#if ZLIB_VERNUM >= 0x1235
|
||||
GG(gzbuffer)((gzFile)file_, 64*1024);
|
||||
#endif
|
||||
return std::unique_ptr<AnyStream>(new GzStream(file_));
|
||||
}
|
||||
return BasicInput::create_stream();
|
||||
}
|
||||
|
||||
} // namespace gemmi
|
||||
@@ -0,0 +1,991 @@
|
||||
// Copyright 2019-2023 Global Phasing Ltd.
|
||||
|
||||
#include <gemmi/mtz.hpp>
|
||||
#include <cstring> // for memcpy
|
||||
#include <algorithm> // for stable_sort
|
||||
#include <gemmi/atof.hpp> // for fast_atof
|
||||
#include <gemmi/atox.hpp> // for simple_atoi, read_word
|
||||
#include <gemmi/gz.hpp>
|
||||
#include <gemmi/sprintf.hpp>
|
||||
|
||||
namespace gemmi {
|
||||
|
||||
namespace {
|
||||
|
||||
double wrap_degrees(double phi) {
|
||||
if (phi >= 0 && phi < 360.)
|
||||
return phi;
|
||||
return phi - std::floor(phi / 360.) * 360.;
|
||||
}
|
||||
|
||||
void shift_phase(float& phi, double shift, bool negate=false) {
|
||||
double phi_ = phi + deg(shift);
|
||||
phi = float(wrap_degrees(negate ? -phi_ : phi_));
|
||||
}
|
||||
|
||||
// apply phase shift to Hendrickson–Lattman coefficients HLA, HLB, HLC and HLD
|
||||
void shift_hl_coefficients(float& a, float& b, float& c, float& d,
|
||||
double shift, bool negate=false) {
|
||||
double sinx = std::sin(shift);
|
||||
double cosx = std::cos(shift);
|
||||
double sin2x = 2 * sinx * cosx;
|
||||
double cos2x = sq(cosx)- sq(sinx);
|
||||
// a sin(x+y) + b cos(x+y) = a sin(x) cos(y) - b sin(x) sin(y)
|
||||
// + a cos(x) sin(y) + b cos(x) cos(y)
|
||||
float a_ = float(a * cosx - b * sinx);
|
||||
float b_ = float(a * sinx + b * cosx);
|
||||
float c_ = float(c * cos2x - d * sin2x);
|
||||
float d_ = float(c * sin2x + d * cos2x);
|
||||
a = a_; // cos(phi)
|
||||
b = negate ? -b_ : b_; // sin(phi)
|
||||
c = c_; // cos(2 phi)
|
||||
d = negate ? -d_ : d_; // sin(2 phi)
|
||||
}
|
||||
|
||||
// this function is generic because it was used in other places in the past
|
||||
template <typename T, typename FP=typename std::iterator_traits<T>::value_type>
|
||||
std::array<FP,2> calculate_min_max_disregarding_nans(T begin, T end) {
|
||||
std::array<FP,2> minmax = {{NAN, NAN}};
|
||||
T i = begin;
|
||||
while (i != end && std::isnan(*i))
|
||||
++i;
|
||||
if (i != end) {
|
||||
minmax[0] = minmax[1] = *i;
|
||||
while (++i != end) {
|
||||
if (*i < minmax[0])
|
||||
minmax[0] = *i;
|
||||
else if (*i > minmax[1])
|
||||
minmax[1] = *i;
|
||||
}
|
||||
}
|
||||
return minmax;
|
||||
}
|
||||
|
||||
const char* skip_word_and_space(const char* line) {
|
||||
while (*line != '\0' && !std::isspace(*line))
|
||||
++line;
|
||||
while (std::isspace(*line))
|
||||
++line;
|
||||
return line;
|
||||
}
|
||||
|
||||
UnitCell read_cell_parameters(const char* line) {
|
||||
double a = fast_atof(line, &line);
|
||||
double b = fast_atof(line, &line);
|
||||
double c = fast_atof(line, &line);
|
||||
double alpha = fast_atof(line, &line);
|
||||
double beta = fast_atof(line, &line);
|
||||
double gamma = fast_atof(line, &line);
|
||||
return UnitCell(a, b, c, alpha, beta, gamma);
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
UnitCellParameters Mtz::get_average_cell_from_batch_headers(double* rmsd) const {
|
||||
if (rmsd)
|
||||
for (int i = 0; i < 6; ++i)
|
||||
rmsd[i] = 0.;
|
||||
std::array<double, 6> avg = {0., 0., 0., 0., 0., 0.};
|
||||
for (const Batch& batch : batches)
|
||||
for (int i = 0; i < 6; ++i) {
|
||||
// if batch headers are not set correctly, return global cell
|
||||
if (batch.floats[i] <= 0)
|
||||
return cell;
|
||||
avg[i] += batch.floats[i];
|
||||
}
|
||||
if (avg[0] <= 0 || avg[1] <= 0 || avg[2] <= 0 ||
|
||||
avg[3] <= 0 || avg[4] <= 0 || avg[5] <= 0)
|
||||
return UnitCellParameters();
|
||||
size_t n = batches.size();
|
||||
for (int i = 0; i < 6; ++i)
|
||||
avg[i] /= n;
|
||||
if (rmsd) {
|
||||
for (const Batch& batch : batches)
|
||||
for (int i = 0; i < 6; ++i)
|
||||
rmsd[i] += sq(avg[i] - batch.floats[i]);
|
||||
for (int i = 0; i < 6; ++i)
|
||||
rmsd[i] = std::sqrt(rmsd[i] / n);
|
||||
}
|
||||
// If average parameters are almost equal to the global cell, use the latter
|
||||
// to avoid 32-bit precision artifacts (58.28 -> 58.279998).
|
||||
if (UnitCellParameters(avg).approx(cell, 1e-4))
|
||||
return cell;
|
||||
return UnitCellParameters(avg);
|
||||
}
|
||||
|
||||
std::array<double,2> Mtz::calculate_min_max_1_d2() const {
|
||||
auto extend_min_max_1_d2 = [&](const UnitCell& uc, double& min, double& max) {
|
||||
for (size_t i = 0; i < data.size(); i += columns.size()) {
|
||||
double res = uc.calculate_1_d2_double(data[i+0], data[i+1], data[i+2]);
|
||||
if (res < min)
|
||||
min = res;
|
||||
if (res > max)
|
||||
max = res;
|
||||
}
|
||||
};
|
||||
if (!has_data() || columns.size() < 3)
|
||||
fail("No data.");
|
||||
double min_value = INFINITY;
|
||||
double max_value = 0.;
|
||||
if (cell.is_crystal() && cell.a > 0)
|
||||
extend_min_max_1_d2(cell, min_value, max_value);
|
||||
const UnitCell* prev_cell = nullptr;
|
||||
for (const Dataset& ds : datasets)
|
||||
if (ds.cell.is_crystal() && ds.cell.a > 0 && ds.cell != cell &&
|
||||
(!prev_cell || ds.cell != *prev_cell)) {
|
||||
extend_min_max_1_d2(ds.cell, min_value, max_value);
|
||||
prev_cell = &ds.cell;
|
||||
}
|
||||
if (min_value == INFINITY)
|
||||
min_value = 0;
|
||||
return {{min_value, max_value}};
|
||||
}
|
||||
|
||||
void Mtz::read_first_bytes(AnyStream& stream) {
|
||||
char buf[20] = {0};
|
||||
|
||||
if (!stream.read(buf, 20))
|
||||
fail("Could not read the MTZ file (is it empty?)");
|
||||
if (buf[0] != 'M' || buf[1] != 'T' || buf[2] != 'Z' || buf[3] != ' ')
|
||||
fail("Not an MTZ file - it does not start with 'MTZ '");
|
||||
|
||||
// Bytes 9-12 have so-called machine stamp:
|
||||
// "The first 4 half-bytes represent the real, complex, integer and
|
||||
// character formats".
|
||||
// We don't try to handle all the combinations here, only the two most
|
||||
// common: big endian (for all types) and little endian (for all types).
|
||||
// BE is denoted by 1 and LE by 4.
|
||||
// If we get a value different than 1 and 4 we assume the native byte order.
|
||||
if ((buf[9] & 0xf0) == (is_little_endian() ? 0x10 : 0x40))
|
||||
toggle_endianness();
|
||||
|
||||
std::int32_t tmp_header_offset;
|
||||
std::memcpy(&tmp_header_offset, buf + 4, 4);
|
||||
if (!same_byte_order)
|
||||
swap_four_bytes(&tmp_header_offset);
|
||||
|
||||
if (tmp_header_offset == -1) {
|
||||
std::memcpy(&header_offset, buf + 12, 8);
|
||||
if (!same_byte_order) {
|
||||
swap_eight_bytes(&header_offset);
|
||||
}
|
||||
} else {
|
||||
header_offset = (int64_t) tmp_header_offset;
|
||||
}
|
||||
stream.skip(60);
|
||||
}
|
||||
|
||||
void Mtz::read_main_headers(AnyStream& stream, std::vector<std::string>* save_headers) {
|
||||
char line[81] = {0};
|
||||
std::ptrdiff_t header_pos = 4 * std::ptrdiff_t(header_offset - 1);
|
||||
// temporary check
|
||||
long cur_pos = stream.tell();
|
||||
if (cur_pos != header_pos && cur_pos != -1)
|
||||
fail(cat("wrong pos ", int(header_pos), " ", int(stream.tell())));
|
||||
int ncol = 0;
|
||||
bool has_batch = false;
|
||||
while (stream.read(line, 80)) {
|
||||
if (save_headers)
|
||||
save_headers->emplace_back(line, line+80);
|
||||
if (ialpha3_id(line) == ialpha3_id("END"))
|
||||
break;
|
||||
const char* args = skip_word_and_space(line);
|
||||
switch (ialpha4_id(line)) {
|
||||
case ialpha4_id("VERS"):
|
||||
version_stamp = rtrim_str(args);
|
||||
break;
|
||||
case ialpha4_id("TITL"):
|
||||
title = rtrim_str(args);
|
||||
break;
|
||||
case ialpha4_id("NCOL"): {
|
||||
ncol = simple_atoi(args, &args);
|
||||
nreflections = simple_atoi(args, &args);
|
||||
int nbatches = simple_atoi(args);
|
||||
if (nbatches < 0 || nbatches > 10000000) // sanity check
|
||||
fail("Wrong NCOL header");
|
||||
batches.resize(nbatches);
|
||||
break;
|
||||
}
|
||||
case ialpha4_id("CELL"):
|
||||
cell = read_cell_parameters(args);
|
||||
break;
|
||||
case ialpha4_id("SORT"):
|
||||
for (int& n : sort_order)
|
||||
n = simple_atoi(args, &args);
|
||||
break;
|
||||
case ialpha4_id("SYMI"): {
|
||||
nsymop = simple_atoi(args, &args);
|
||||
symops.reserve(nsymop);
|
||||
simple_atoi(args, &args); // ignore number of primitive operations
|
||||
args = skip_word_and_space(skip_blank(args)); // ignore lattice type
|
||||
spacegroup_number = simple_atoi(args, &args);
|
||||
args = skip_blank(args);
|
||||
if (*args != '\'')
|
||||
spacegroup_name = read_word(args);
|
||||
else if (const char* end = std::strchr(++args, '\''))
|
||||
spacegroup_name.assign(args, end);
|
||||
// ignore point group which is at the end of args
|
||||
break;
|
||||
}
|
||||
case ialpha4_id("SYMM"):
|
||||
symops.push_back(parse_triplet(args));
|
||||
break;
|
||||
case ialpha4_id("RESO"):
|
||||
min_1_d2 = fast_atof(args, &args);
|
||||
max_1_d2 = fast_atof(args, &args);
|
||||
break;
|
||||
case ialpha4_id("VALM"):
|
||||
if (*args != 'N') {
|
||||
const char* endptr;
|
||||
float v = (float) fast_atof(args, &endptr);
|
||||
if (*endptr == '\0' || is_space(*endptr))
|
||||
valm = v;
|
||||
else
|
||||
logger.note("Unexpected VALM value: " + rtrim_str(args));
|
||||
}
|
||||
break;
|
||||
case ialpha4_id("COLU"): {
|
||||
columns.emplace_back();
|
||||
Column& col = columns.back();
|
||||
col.label = read_word(args, &args);
|
||||
col.type = read_word(args, &args)[0];
|
||||
col.min_value = (float) fast_atof(args, &args);
|
||||
col.max_value = (float) fast_atof(args, &args);
|
||||
col.dataset_id = simple_atoi(args);
|
||||
col.parent = this;
|
||||
col.idx = columns.size() - 1;
|
||||
break;
|
||||
}
|
||||
case ialpha4_id("COLS"):
|
||||
// COLSRC is undocumented. CMTZ (libccp4) adds it after COLUMN:
|
||||
// COLUMN IMEAN J -300.600006 4619 1
|
||||
// COLSRC IMEAN CREATED_07/08/2019_11:00:23 1
|
||||
if (!columns.empty() && columns.back().label == read_word(args, &args))
|
||||
columns.back().source = read_word(args);
|
||||
else
|
||||
logger.note("MTZ: COLSRC is not after matching COLUMN");
|
||||
break;
|
||||
case ialpha4_id("COLG"):
|
||||
// Column group - not used.
|
||||
break;
|
||||
case ialpha4_id("NDIF"):
|
||||
datasets.reserve(simple_atoi(args));
|
||||
break;
|
||||
case ialpha4_id("PROJ"):
|
||||
datasets.emplace_back();
|
||||
datasets.back().id = simple_atoi(args, &args);
|
||||
datasets.back().project_name = read_word(skip_word_and_space(args));
|
||||
datasets.back().wavelength = 0.0;
|
||||
break;
|
||||
case ialpha4_id("CRYS"):
|
||||
if (simple_atoi(args, &args) == last_dataset().id)
|
||||
datasets.back().crystal_name = read_word(args);
|
||||
else
|
||||
logger.note("MTZ CRYSTAL line: unusual numbering.");
|
||||
break;
|
||||
case ialpha4_id("DATA"):
|
||||
if (simple_atoi(args, &args) == last_dataset().id)
|
||||
datasets.back().dataset_name = read_word(args);
|
||||
else
|
||||
logger.note("MTZ DATASET line: unusual numbering.");
|
||||
break;
|
||||
case ialpha4_id("DCEL"):
|
||||
if (simple_atoi(args, &args) == last_dataset().id)
|
||||
datasets.back().cell = read_cell_parameters(args);
|
||||
else
|
||||
logger.note("MTZ DCELL line: unusual numbering.");
|
||||
break;
|
||||
// case("DRES"): not in use yet
|
||||
case ialpha4_id("DWAV"):
|
||||
if (simple_atoi(args, &args) == last_dataset().id)
|
||||
datasets.back().wavelength = fast_atof(args);
|
||||
else
|
||||
logger.note("MTZ DWAV line: unusual numbering.");
|
||||
break;
|
||||
case ialpha4_id("BATCH"):
|
||||
// We take number of batches from the NCOL record and serial numbers
|
||||
// from BH. This header could be used only to check consistency.
|
||||
has_batch = true;
|
||||
break;
|
||||
default:
|
||||
logger.note("Unknown header: " + rtrim_str(line));
|
||||
}
|
||||
}
|
||||
if (ncol != (int) columns.size())
|
||||
fail("Number of COLU records inconsistent with NCOL record.");
|
||||
if (has_batch != !batches.empty())
|
||||
fail("BATCH header inconsistent with NCOL record.");
|
||||
// adjust data size, if necessary
|
||||
if (!data.empty()) {
|
||||
size_t expected_size = columns.size() * nreflections;
|
||||
if (data.size() > expected_size)
|
||||
data.resize(expected_size);
|
||||
else if (data.size() < expected_size)
|
||||
fail("internal error, wrong data size");
|
||||
}
|
||||
}
|
||||
|
||||
void Mtz::read_history_and_batch_headers(AnyStream& stream) {
|
||||
char buf[81] = {0};
|
||||
int n_headers = 0;
|
||||
while (stream.read(buf, 80) && ialpha4_id(buf) != ialpha4_id("MTZE")) {
|
||||
if (n_headers != 0) {
|
||||
const char* start = skip_blank(buf);
|
||||
const char* end = rtrim_cstr(start, start+80);
|
||||
history.emplace_back(start, end);
|
||||
--n_headers;
|
||||
} else if (ialpha4_id(buf) == ialpha4_id("MTZH")) {
|
||||
n_headers = simple_atoi(skip_word_and_space(buf+4));
|
||||
if (n_headers < 0 || n_headers > 30) {
|
||||
logger.note("Wrong MTZ: number of headers should be between 0 and 30");
|
||||
return;
|
||||
}
|
||||
history.reserve(n_headers);
|
||||
} else if (ialpha4_id(buf) == ialpha4_id("MTZB")) {
|
||||
for (Batch& batch : batches) {
|
||||
stream.read(buf, 80);
|
||||
if (ialpha3_id(buf) != ialpha3_id("BH "))
|
||||
fail("Missing BH header");
|
||||
const char* args = skip_blank(buf + 2);
|
||||
batch.number = simple_atoi(args, &args);
|
||||
int total_words = simple_atoi(args, &args);
|
||||
int int_words = simple_atoi(args, &args);
|
||||
int float_words = simple_atoi(args);
|
||||
if (total_words != int_words + float_words || total_words > 1000)
|
||||
fail("Wrong BH header");
|
||||
stream.read(buf, 80); // TITLE
|
||||
const char* end = rtrim_cstr(buf + 6, buf+76);
|
||||
batch.title.assign(buf, end - buf);
|
||||
batch.ints.resize(int_words);
|
||||
stream.read(batch.ints.data(), int_words * 4);
|
||||
batch.floats.resize(float_words);
|
||||
stream.read(batch.floats.data(), float_words * 4);
|
||||
stream.read(buf, 80);
|
||||
if (ialpha4_id(buf) != ialpha4_id("BHCH"))
|
||||
fail("Missing BHCH header");
|
||||
split_str_into_multi(buf + 5, " \t", batch.axes);
|
||||
}
|
||||
}
|
||||
}
|
||||
appended_text = stream.read_rest();
|
||||
}
|
||||
|
||||
void Mtz::setup_spacegroup() {
|
||||
spacegroup = find_spacegroup_by_name(spacegroup_name, cell.alpha, cell.gamma);
|
||||
if (!spacegroup) {
|
||||
logger.note("MTZ: unrecognized spacegroup name: " + spacegroup_name);
|
||||
return;
|
||||
}
|
||||
if (spacegroup->ccp4 != spacegroup_number)
|
||||
logger.note("MTZ: inconsistent spacegroup name and number");
|
||||
cell.set_cell_images_from_spacegroup(spacegroup);
|
||||
for (Dataset& d : datasets)
|
||||
d.cell.set_cell_images_from_spacegroup(spacegroup);
|
||||
}
|
||||
|
||||
// we should be at byte 80
|
||||
void Mtz::read_raw_data(AnyStream& stream, bool do_read) {
|
||||
size_t n = size_t(header_offset - 1 - 20);
|
||||
if (!do_read) {
|
||||
if (!stream.skip(4 * n))
|
||||
fail("ignoring mtz data segment failed");
|
||||
return;
|
||||
}
|
||||
data.resize(n);
|
||||
if (!stream.read(data.data(), 4 * n))
|
||||
fail("Error when reading MTZ data");
|
||||
if (!same_byte_order)
|
||||
for (float& f : data)
|
||||
swap_four_bytes(&f);
|
||||
}
|
||||
|
||||
void Mtz::read_stream(AnyStream& stream, bool with_data) {
|
||||
read_first_bytes(stream);
|
||||
// The older implementation of MTZ reading first read the headers,
|
||||
// then the data. This required jumping to the headers at the end,
|
||||
// then back to the beginning of the data (byte 80).
|
||||
// The current implementation avoids calling seek(), allowing
|
||||
// incremental reading of streams (stdin, gzipped files, etc).
|
||||
read_raw_data(stream, with_data);
|
||||
read_main_headers(stream, nullptr);
|
||||
read_history_and_batch_headers(stream);
|
||||
setup_spacegroup();
|
||||
if (datasets.empty())
|
||||
datasets.push_back({0, "HKL_base", "HKL_base", "HKL_base", cell, 0.});
|
||||
}
|
||||
|
||||
// for probing/testing individual reflections, no need to optimize it
|
||||
size_t Mtz::find_offset_of_hkl(const Miller& hkl, size_t start) const {
|
||||
if (!has_data() || columns.size() < 3)
|
||||
fail("No data.");
|
||||
if (start != 0)
|
||||
start -= (start % columns.size());
|
||||
for (size_t n = start; n + 2 < data.size(); n += columns.size())
|
||||
if (get_hkl(n) == hkl)
|
||||
return n;
|
||||
return (size_t)-1;
|
||||
}
|
||||
|
||||
void Mtz::ensure_asu(bool tnt_asu) {
|
||||
if (!is_merged())
|
||||
fail("Mtz::ensure_asu() is for merged MTZ only");
|
||||
if (!spacegroup)
|
||||
return;
|
||||
GroupOps gops = spacegroup->operations();
|
||||
ReciprocalAsu asu(spacegroup, tnt_asu);
|
||||
std::vector<int> phase_columns = positions_of_columns_with_type('P');
|
||||
std::vector<int> abcd_columns = positions_of_columns_with_type('A');
|
||||
std::vector<int> dano_columns = positions_of_columns_with_type('D');
|
||||
std::vector<std::pair<int,int>> plus_minus_columns = positions_of_plus_minus_columns();
|
||||
bool no_special_columns = phase_columns.empty() && abcd_columns.empty() &&
|
||||
plus_minus_columns.empty() && dano_columns.empty();
|
||||
bool centric = no_special_columns || gops.is_centrosymmetric();
|
||||
for (size_t n = 0; n < data.size(); n += columns.size()) {
|
||||
Miller hkl = get_hkl(n);
|
||||
if (asu.is_in(hkl))
|
||||
continue;
|
||||
auto result = asu.to_asu(hkl, gops);
|
||||
// cf. impl::move_to_asu() in asudata.hpp
|
||||
set_hkl(n, result.first);
|
||||
if (no_special_columns)
|
||||
continue;
|
||||
int isym = result.second;
|
||||
if (!phase_columns.empty() || !abcd_columns.empty()) {
|
||||
const Op& op = gops.sym_ops[(isym - 1) / 2];
|
||||
double shift = op.phase_shift(hkl);
|
||||
bool negate = (isym % 2 == 0);
|
||||
for (int col : phase_columns)
|
||||
shift_phase(data[n + col], shift, negate);
|
||||
for (auto i = abcd_columns.begin(); i+3 < abcd_columns.end(); i += 4)
|
||||
// we expect coefficients HLA, HLB, HLC and HLD - in this order
|
||||
shift_hl_coefficients(data[n + *(i+0)], data[n + *(i+1)],
|
||||
data[n + *(i+2)], data[n + *(i+3)],
|
||||
shift, negate);
|
||||
}
|
||||
if (isym % 2 == 0 && !centric &&
|
||||
// usually, centric reflections have empty F(-), so avoid swapping it
|
||||
!gops.is_reflection_centric(hkl)) {
|
||||
for (std::pair<int,int> cols : plus_minus_columns)
|
||||
std::swap(data[n + cols.first], data[n + cols.second]);
|
||||
for (int col : dano_columns)
|
||||
data[n + col] = -data[n + col];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Mtz::reindex(const Op& op) {
|
||||
if (op.tran != Op::Tran{0, 0, 0})
|
||||
gemmi::fail("reindexing operator must not have a translation");
|
||||
if (op.det_rot() < 0)
|
||||
gemmi::fail("reindexing operator must preserve the hand of the axes");
|
||||
switch_to_original_hkl(); // changes hkl for unmerged data only
|
||||
Op xyz_op = op.as_xyz();
|
||||
logger.mesg("Real space transformation: ", op.as_xyz().triplet());
|
||||
bool row_removal = false;
|
||||
// change Miller indices
|
||||
for (size_t n = 0; n < data.size(); n += columns.size()) {
|
||||
Miller hkl_den = op.apply_to_hkl_without_division(get_hkl(n));
|
||||
Miller hkl = Op::divide_hkl_by_DEN(hkl_den);
|
||||
if (hkl[0] * Op::DEN == hkl_den[0] &&
|
||||
hkl[1] * Op::DEN == hkl_den[1] &&
|
||||
hkl[2] * Op::DEN == hkl_den[2]) {
|
||||
set_hkl(n, hkl);
|
||||
} else { // fractional hkl - remove
|
||||
row_removal = true;
|
||||
data[n] = NAN; // mark for removal
|
||||
}
|
||||
}
|
||||
|
||||
// remove reflections marked for removal
|
||||
if (row_removal) {
|
||||
int n_before = nreflections;
|
||||
remove_rows_if([](const float* h) { return std::isnan(*h); });
|
||||
logger.mesg("Reflections removed (because of fractional indices): ", n_before - nreflections);
|
||||
}
|
||||
|
||||
switch_to_asu_hkl(); // revert switch_to_original_hkl() for unmerged data
|
||||
|
||||
// change space group
|
||||
if (spacegroup) {
|
||||
GroupOps gops = spacegroup->operations();
|
||||
gops.change_basis_backward(xyz_op);
|
||||
const SpaceGroup* new_sg = find_spacegroup_by_ops(gops);
|
||||
if (!new_sg)
|
||||
fail("reindexing: failed to determine new space group name");
|
||||
if (new_sg != spacegroup) {
|
||||
logger.mesg("Space group changed from ", spacegroup->xhm(), " to ", new_sg->xhm(), '.');
|
||||
set_spacegroup(new_sg);
|
||||
} else {
|
||||
logger.mesg("Space group stays the same:", spacegroup->xhm(), '.');
|
||||
}
|
||||
}
|
||||
|
||||
// change unit cell parameters
|
||||
cell = cell.changed_basis_backward(xyz_op, false);
|
||||
for (Mtz::Dataset& ds : datasets)
|
||||
ds.cell = ds.cell.changed_basis_backward(xyz_op, false);
|
||||
for (Mtz::Batch& batch : batches)
|
||||
batch.set_cell(batch.get_cell().changed_basis_backward(xyz_op, false));
|
||||
}
|
||||
|
||||
void Mtz::expand_to_p1() {
|
||||
if (!spacegroup || !has_data())
|
||||
return;
|
||||
std::vector<int> phase_columns = positions_of_columns_with_type('P');
|
||||
std::vector<int> abcd_columns = positions_of_columns_with_type('A');
|
||||
bool has_phases = (!phase_columns.empty() || !abcd_columns.empty());
|
||||
GroupOps gops = spacegroup->operations();
|
||||
data.reserve(gops.sym_ops.size() * data.size());
|
||||
size_t orig_size = data.size();
|
||||
std::vector<Miller> hkl_copies;
|
||||
for (size_t n = 0; n < orig_size; n += columns.size()) {
|
||||
hkl_copies.clear();
|
||||
Miller hkl = get_hkl(n);
|
||||
// no reallocations because of reserve() above
|
||||
auto orig_iter = data.begin() + n;
|
||||
for (auto op = gops.sym_ops.begin() + 1; op < gops.sym_ops.end(); ++op) {
|
||||
Miller new_hkl = op->apply_to_hkl(hkl);
|
||||
Op::Miller negated{{-new_hkl[0], -new_hkl[1], -new_hkl[2]}};
|
||||
if (new_hkl != hkl && !in_vector(new_hkl, hkl_copies) &&
|
||||
negated != hkl && !in_vector(negated, hkl_copies)) {
|
||||
hkl_copies.push_back(new_hkl);
|
||||
size_t offset = data.size();
|
||||
data.insert(data.end(), orig_iter, orig_iter + columns.size());
|
||||
set_hkl(offset, new_hkl);
|
||||
if (has_phases) {
|
||||
double shift = op->phase_shift(hkl);
|
||||
if (shift != 0) {
|
||||
for (int col : phase_columns)
|
||||
shift_phase(data[offset + col], shift);
|
||||
for (auto i = abcd_columns.begin(); i+3 < abcd_columns.end(); i += 4)
|
||||
// we expect coefficients HLA, HLB, HLC and HLD - in this order
|
||||
shift_hl_coefficients(data[offset + *(i+0)], data[offset + *(i+1)],
|
||||
data[offset + *(i+2)], data[offset + *(i+3)], shift);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
nreflections = int(data.size() / columns.size());
|
||||
sort_order = {{0, 0, 0, 0, 0}};
|
||||
set_spacegroup(&get_spacegroup_p1());
|
||||
}
|
||||
|
||||
bool Mtz::switch_to_original_hkl() {
|
||||
if (indices_switched_to_original)
|
||||
return false;
|
||||
if (!has_data())
|
||||
fail("switch_to_original_hkl(): data not read yet");
|
||||
if (nreflections == 0) {
|
||||
// This function can be called before the data is populated
|
||||
// to set indices_switched_to_original, which is not exposed in Python.
|
||||
indices_switched_to_original = true;
|
||||
return true;
|
||||
}
|
||||
const Column* col = column_with_label("M/ISYM");
|
||||
if (col == nullptr || col->type != 'Y' || col->idx < 3)
|
||||
return false;
|
||||
std::vector<Op> inv_symops;
|
||||
inv_symops.reserve(symops.size());
|
||||
for (const Op& op : symops)
|
||||
inv_symops.push_back(op.inverse());
|
||||
for (size_t n = 0; n + col->idx < data.size(); n += columns.size()) {
|
||||
int isym = static_cast<int>(data[n + col->idx]) & 0xFF;
|
||||
const Op& op = inv_symops.at((isym - 1) / 2);
|
||||
Miller hkl = op.apply_to_hkl(get_hkl(n));
|
||||
int sign = (isym & 1) ? 1 : -1;
|
||||
for (int i = 0; i < 3; ++i)
|
||||
data[n+i] = static_cast<float>(sign * hkl[i]);
|
||||
}
|
||||
indices_switched_to_original = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Mtz::switch_to_asu_hkl() {
|
||||
if (!indices_switched_to_original)
|
||||
return false;
|
||||
if (!has_data())
|
||||
fail("switch_to_asu_hkl(): data not read yet");
|
||||
const Column* col = column_with_label("M/ISYM");
|
||||
if (col == nullptr || col->type != 'Y' || col->idx < 3 || !spacegroup)
|
||||
return false;
|
||||
size_t misym_idx = col->idx;
|
||||
UnmergedHklMover hkl_mover(spacegroup);
|
||||
for (size_t n = 0; n + col->idx < data.size(); n += columns.size()) {
|
||||
Miller hkl = get_hkl(n);
|
||||
int isym = hkl_mover.move_to_asu(hkl); // modifies hkl
|
||||
set_hkl(n, hkl);
|
||||
float& misym = data[n + misym_idx];
|
||||
misym = float(((int)misym & ~0xff) | isym);
|
||||
}
|
||||
indices_switched_to_original = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
void Mtz::read_file_gz(const std::string& path, bool with_data) {
|
||||
try {
|
||||
read_input(MaybeGzipped(path), with_data);
|
||||
} catch (std::runtime_error& e) {
|
||||
// append path to the error like in read_file(), but shouldn't the path go first?
|
||||
fail(std::string(e.what()) + ": " + path);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<int> Mtz::sorted_row_indices(int use_first) const {
|
||||
if (!has_data())
|
||||
fail("No data.");
|
||||
if (use_first <= 0 || use_first >= (int) columns.size())
|
||||
fail("Wrong use_first arg in Mtz::sort.");
|
||||
std::vector<int> indices(nreflections);
|
||||
for (int i = 0; i != nreflections; ++i)
|
||||
indices[i] = i;
|
||||
std::stable_sort(indices.begin(), indices.end(), [&](int i, int j) {
|
||||
int a = i * (int) columns.size();
|
||||
int b = j * (int) columns.size();
|
||||
for (int n = 0; n < use_first; ++n)
|
||||
if (data[a+n] != data[b+n])
|
||||
return data[a+n] < data[b+n];
|
||||
return false;
|
||||
});
|
||||
return indices;
|
||||
}
|
||||
|
||||
bool Mtz::sort(int use_first) {
|
||||
std::vector<int> indices = sorted_row_indices(use_first);
|
||||
sort_order = {{0, 0, 0, 0, 0}};
|
||||
for (int i = 0; i < use_first; ++i)
|
||||
sort_order[i] = i + 1;
|
||||
if (std::is_sorted(indices.begin(), indices.end()))
|
||||
return false;
|
||||
std::vector<float> new_data(data.size());
|
||||
size_t w = columns.size();
|
||||
for (size_t i = 0; i != indices.size(); ++i)
|
||||
std::memcpy(&new_data[i * w], &data[indices[i] * w], w * sizeof(float));
|
||||
data.swap(new_data);
|
||||
return true;
|
||||
}
|
||||
|
||||
Mtz::Column& Mtz::add_column(const std::string& label, char type,
|
||||
int dataset_id, int pos, bool expand_data) {
|
||||
if (datasets.empty())
|
||||
fail("No datasets.");
|
||||
if (dataset_id < 0)
|
||||
dataset_id = datasets.back().id;
|
||||
else
|
||||
dataset(dataset_id); // check if such dataset exist
|
||||
if (pos > (int) columns.size())
|
||||
fail("Requested column position after the end.");
|
||||
if (pos < 0)
|
||||
pos = (int) columns.size();
|
||||
auto col = columns.emplace(columns.begin() + pos);
|
||||
for (auto i = col + 1; i != columns.end(); ++i)
|
||||
i->idx++;
|
||||
col->dataset_id = dataset_id;
|
||||
col->type = type;
|
||||
col->label = label;
|
||||
col->parent = this;
|
||||
col->idx = pos;
|
||||
if (expand_data)
|
||||
expand_data_rows(1, pos);
|
||||
return *col;
|
||||
}
|
||||
|
||||
|
||||
namespace { // helper functions for copying, replacing and removing columns
|
||||
|
||||
void check_column(const Mtz& mtz, size_t idx, const char* msg) {
|
||||
if (!mtz.has_data())
|
||||
fail(msg, ": data not read yet");
|
||||
if (idx >= mtz.columns.size())
|
||||
fail(msg, ": no column with 0-based index ", std::to_string(idx));
|
||||
}
|
||||
|
||||
void check_trailing_cols(const Mtz& mtz, const Mtz::Column& src_col,
|
||||
const std::vector<std::string>& trailing_cols) {
|
||||
assert(src_col.parent == &mtz);
|
||||
if (!mtz.has_data())
|
||||
fail("data in source mtz not read yet");
|
||||
if (src_col.idx + trailing_cols.size() >= mtz.columns.size())
|
||||
fail("Not enough columns after " + src_col.label);
|
||||
for (size_t i = 0; i < trailing_cols.size(); ++i)
|
||||
if (!trailing_cols[i].empty() &&
|
||||
trailing_cols[i] != mtz.columns[src_col.idx + i + 1].label)
|
||||
fail("expected trailing column ", trailing_cols[i], ", found ", src_col.label);
|
||||
}
|
||||
|
||||
void do_replace_column(Mtz& mtz, size_t dest_idx, const Mtz::Column& src_col,
|
||||
const std::vector<std::string>& trailing_cols) {
|
||||
const Mtz* src_mtz = src_col.parent;
|
||||
for (size_t i = 0; i <= trailing_cols.size(); ++i) {
|
||||
Mtz::Column& dst = mtz.columns[dest_idx + i];
|
||||
const Mtz::Column& src = src_mtz->columns[src_col.idx + i];
|
||||
dst.type = src.type;
|
||||
dst.label = src.label;
|
||||
dst.min_value = src.min_value;
|
||||
dst.max_value = src.max_value;
|
||||
dst.source = src.source;
|
||||
dst.dataset_id = src.dataset_id;
|
||||
}
|
||||
if (src_mtz == &mtz) {
|
||||
// internal copying
|
||||
for (size_t n = 0; n < mtz.data.size(); n += mtz.columns.size())
|
||||
for (size_t i = 0; i <= trailing_cols.size(); ++i)
|
||||
mtz.data[n + dest_idx + i] = mtz.data[n + src_col.idx + i];
|
||||
} else {
|
||||
// external copying - need to match indices
|
||||
std::vector<int> dst_indices = mtz.sorted_row_indices();
|
||||
std::vector<int> src_indices = src_mtz->sorted_row_indices();
|
||||
// cf. for_matching_reflections()
|
||||
size_t dst_stride = mtz.columns.size();
|
||||
size_t src_stride = src_mtz->columns.size();
|
||||
auto dst = dst_indices.begin();
|
||||
auto src = src_indices.begin();
|
||||
while (dst != dst_indices.end() && src != src_indices.end()) {
|
||||
Miller dst_hkl = mtz.get_hkl(*dst * dst_stride);
|
||||
Miller src_hkl = src_mtz->get_hkl(*src * src_stride);
|
||||
if (dst_hkl == src_hkl) {
|
||||
// copy values
|
||||
for (size_t i = 0; i <= trailing_cols.size(); ++i)
|
||||
mtz.data[*dst * dst_stride + dest_idx + i] =
|
||||
src_mtz->data[*src * src_stride + src_col.idx + i];
|
||||
++dst;
|
||||
++src;
|
||||
} else if (dst_hkl < src_hkl) {
|
||||
++dst;
|
||||
} else {
|
||||
++src;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
Mtz::Column& Mtz::replace_column(size_t dest_idx, const Mtz::Column& src_col,
|
||||
const std::vector<std::string>& trailing_cols) {
|
||||
check_trailing_cols(*src_col.parent, src_col, trailing_cols);
|
||||
check_column(*this, dest_idx + trailing_cols.size(), "replace_column()");
|
||||
do_replace_column(*this, dest_idx, src_col, trailing_cols);
|
||||
return columns[dest_idx];
|
||||
}
|
||||
|
||||
Mtz::Column& Mtz::copy_column(int dest_idx, const Mtz::Column& src_col,
|
||||
const std::vector<std::string>& trailing_cols) {
|
||||
// check input consistency
|
||||
if (!has_data())
|
||||
fail("copy_column(): data not read yet");
|
||||
check_trailing_cols(*src_col.parent, src_col, trailing_cols);
|
||||
// add new columns
|
||||
if (dest_idx < 0)
|
||||
dest_idx = (int) columns.size();
|
||||
// if src_col is from this Mtz it may get invalidated when adding columns
|
||||
int col_idx = -1;
|
||||
if (src_col.parent == this) {
|
||||
col_idx = (int) src_col.idx;
|
||||
if (col_idx >= dest_idx)
|
||||
col_idx += 1 + (int)trailing_cols.size();
|
||||
}
|
||||
for (int i = 0; i <= (int) trailing_cols.size(); ++i)
|
||||
add_column("", ' ', -1, dest_idx + i, false);
|
||||
expand_data_rows(1 + trailing_cols.size(), dest_idx);
|
||||
// copy the data
|
||||
const Column& src_col_now = col_idx < 0 ? src_col : columns[col_idx];
|
||||
// most of the work (hkl-based row matching and data copying) is done here:
|
||||
do_replace_column(*this, dest_idx, src_col_now, trailing_cols);
|
||||
return columns[dest_idx];
|
||||
}
|
||||
|
||||
void Mtz::remove_column(size_t idx) {
|
||||
check_column(*this, idx, "remove_column()");
|
||||
columns.erase(columns.begin() + idx);
|
||||
for (size_t i = idx; i < columns.size(); ++i)
|
||||
--columns[i].idx;
|
||||
vector_remove_column(data, columns.size(), idx);
|
||||
assert(columns.size() * nreflections == data.size());
|
||||
}
|
||||
|
||||
|
||||
#define WRITE(...) do { \
|
||||
int len = snprintf_z(buf, 81, __VA_ARGS__); \
|
||||
if (len < 80) \
|
||||
std::memset(buf + len, ' ', 80 - len); \
|
||||
if (write(buf, 80, 1) != 1) \
|
||||
sys_fail("Writing MTZ file failed"); \
|
||||
} while(0)
|
||||
|
||||
template<typename Write>
|
||||
void Mtz::write_to_stream(Write write) const {
|
||||
// uses: data, spacegroup, nreflections, batches, cell, sort_order,
|
||||
// valm, columns, datasets, history
|
||||
if (!has_data())
|
||||
fail("Cannot write Mtz which has no data");
|
||||
if (!spacegroup)
|
||||
fail("Cannot write Mtz which has no space group");
|
||||
char buf[81] = {'M', 'T', 'Z', ' ', '\0'};
|
||||
std::int64_t real_header_start = (int64_t) columns.size() * nreflections + 21;
|
||||
std::int32_t header_start = (int32_t) real_header_start;
|
||||
if (real_header_start > std::numeric_limits<int32_t>::max()) {
|
||||
header_start = -1;
|
||||
} else {
|
||||
real_header_start = 0;
|
||||
}
|
||||
std::memcpy(buf + 4, &header_start, 4);
|
||||
std::int32_t machst = is_little_endian() ? 0x00004144 : 0x11110000;
|
||||
std::memcpy(buf + 8, &machst, 4);
|
||||
std::memcpy(buf + 12, &real_header_start, 8);
|
||||
if (write(buf, 80, 1) != 1 ||
|
||||
write(data.data(), 4, data.size()) != data.size())
|
||||
fail("Writing MTZ file failed");
|
||||
WRITE("VERS MTZ:V1.1");
|
||||
WRITE("TITLE %s", title.c_str());
|
||||
WRITE("NCOL %8zu %12d %8zu", columns.size(), nreflections, batches.size());
|
||||
if (cell.is_crystal())
|
||||
WRITE("CELL %9.4f %9.4f %9.4f %9.4f %9.4f %9.4f",
|
||||
cell.a, cell.b, cell.c, cell.alpha, cell.beta, cell.gamma);
|
||||
WRITE("SORT %3d %3d %3d %3d %3d", sort_order[0], sort_order[1],
|
||||
sort_order[2], sort_order[3], sort_order[4]);
|
||||
GroupOps ops = spacegroup->operations();
|
||||
char lat_type = spacegroup->ccp4_lattice_type();
|
||||
WRITE("SYMINF %3d %2d %c %5d %*s'%c%s' PG%s",
|
||||
ops.order(), // number of symmetry operations
|
||||
(int) ops.sym_ops.size(), // number of primitive operations
|
||||
lat_type, // lattice type
|
||||
spacegroup->ccp4, // space group number
|
||||
20 - (int) std::strlen(spacegroup->hm), "",
|
||||
lat_type, // space group name (first letter)
|
||||
spacegroup->hm + 1, // space group name (the rest)
|
||||
spacegroup->point_group_hm()); // point group name
|
||||
// If we have symops that are the same as spacegroup->operations(),
|
||||
// write symops to preserve the order of SYMM records.
|
||||
if (!symops.empty() && ops.is_same_as(split_centering_vectors(symops)))
|
||||
for (Op op : symops)
|
||||
WRITE("SYMM %s", to_upper(op.triplet()).c_str());
|
||||
else
|
||||
for (Op op : ops)
|
||||
WRITE("SYMM %s", to_upper(op.triplet()).c_str());
|
||||
auto reso = calculate_min_max_1_d2();
|
||||
WRITE("RESO %-20.12f %-20.12f", reso[0], reso[1]);
|
||||
if (std::isnan(valm))
|
||||
WRITE("VALM NAN");
|
||||
else
|
||||
WRITE("VALM %f", valm);
|
||||
auto format17 = [](float f) {
|
||||
char buffer[18];
|
||||
int len = snprintf_z(buffer, 18, "%.9f", f);
|
||||
return std::string(buffer, len > 0 ? std::min(len, 17) : 0);
|
||||
};
|
||||
for (const Column& col : columns) {
|
||||
auto minmax = calculate_min_max_disregarding_nans(col.begin(), col.end());
|
||||
const char* label = !col.label.empty() ? col.label.c_str() : "_";
|
||||
WRITE("COLUMN %-30s %c %17s %17s %4d",
|
||||
label, col.type,
|
||||
format17(minmax[0]).c_str(), format17(minmax[1]).c_str(),
|
||||
col.dataset_id);
|
||||
if (!col.source.empty())
|
||||
WRITE("COLSRC %-30s %-36s %4d", label, col.source.c_str(), col.dataset_id);
|
||||
}
|
||||
WRITE("NDIF %8zu", datasets.size());
|
||||
for (const Dataset& ds : datasets) {
|
||||
WRITE("PROJECT %7d %s", ds.id, ds.project_name.c_str());
|
||||
WRITE("CRYSTAL %7d %s", ds.id, ds.crystal_name.c_str());
|
||||
WRITE("DATASET %7d %s", ds.id, ds.dataset_name.c_str());
|
||||
const UnitCell& uc = (ds.cell.is_crystal() && ds.cell.a > 0 ? ds.cell : cell);
|
||||
WRITE("DCELL %9d %10.4f%10.4f%10.4f%10.4f%10.4f%10.4f",
|
||||
ds.id, uc.a, uc.b, uc.c, uc.alpha, uc.beta, uc.gamma);
|
||||
WRITE("DWAVEL %8d %10.5f", ds.id, ds.wavelength);
|
||||
}
|
||||
int pos = 0;
|
||||
for (const Batch& batch : batches) {
|
||||
if (pos == 0)
|
||||
std::memcpy(buf, "BATCH ", 6); // NOLINT(bugprone-not-null-terminated-result)
|
||||
pos += 6;
|
||||
snprintf_z(buf + pos, 7, "%6d", batch.number);
|
||||
if (pos > 72 || &batch == &batches.back()) {
|
||||
std::memset(buf + pos, ' ', 80 - pos);
|
||||
if (write(buf, 80, 1) != 1)
|
||||
fail("Writing MTZ file failed");
|
||||
pos = 0;
|
||||
}
|
||||
}
|
||||
WRITE("END");
|
||||
if (!history.empty()) {
|
||||
// According to mtzformat.html the file can have only up to 30 history
|
||||
// lines, but we don't enforce it here.
|
||||
WRITE("MTZHIST %3zu", history.size());
|
||||
for (const std::string& line : history)
|
||||
WRITE("%s", line.c_str());
|
||||
}
|
||||
if (!batches.empty()) {
|
||||
WRITE("MTZBATS");
|
||||
for (const Batch& batch : batches) {
|
||||
// keep the numbers the same as in files written by libccp4
|
||||
WRITE("BH %8d %7zu %7zu %7zu",
|
||||
batch.number, batch.ints.size() + batch.floats.size(),
|
||||
batch.ints.size(), batch.floats.size());
|
||||
WRITE("TITLE %.70s", batch.title.c_str());
|
||||
if (batch.ints.size() != 29 || batch.floats.size() != 156)
|
||||
fail("wrong size of binaries batch headers");
|
||||
write(batch.ints.data(), 4, batch.ints.size());
|
||||
write(batch.floats.data(), 4, batch.floats.size());
|
||||
WRITE("BHCH %7.7s %7.7s %7.7s",
|
||||
batch.axes.size() > 0 ? batch.axes[0].c_str() : "",
|
||||
batch.axes.size() > 1 ? batch.axes[1].c_str() : "",
|
||||
batch.axes.size() > 2 ? batch.axes[2].c_str() : "");
|
||||
}
|
||||
}
|
||||
WRITE("MTZENDOFHEADERS");
|
||||
if (!appended_text.empty()) {
|
||||
if (write(appended_text.data(), appended_text.size(), 1) != 1)
|
||||
fail("Writing MTZ file failed");
|
||||
}
|
||||
}
|
||||
|
||||
#undef WRITE
|
||||
|
||||
void Mtz::write_to_cstream(std::FILE* stream) const {
|
||||
write_to_stream([&](const void *ptr, size_t size, size_t nmemb) {
|
||||
return std::fwrite(ptr, size, nmemb, stream);
|
||||
});
|
||||
}
|
||||
|
||||
void Mtz::write_to_string(std::string& str) const {
|
||||
// Calculate the size beforehand to avoid memory re-allocations
|
||||
// and minimize memory usage. It hasn't been benchmarked against
|
||||
// a single-pass writing.
|
||||
size_t nbytes = size_to_write();
|
||||
str.resize(nbytes);
|
||||
write_to_buffer(&str[0], nbytes);
|
||||
}
|
||||
|
||||
void Mtz::write_to_file(const std::string& path) const {
|
||||
fileptr_t f = file_open(path.c_str(), "wb");
|
||||
try {
|
||||
write_to_cstream(f.get());
|
||||
} catch (std::runtime_error& e) {
|
||||
fail(std::string(e.what()) + ": " + path);
|
||||
}
|
||||
}
|
||||
|
||||
size_t Mtz::size_to_write() const {
|
||||
size_t nbytes = 0;
|
||||
write_to_stream([&](const void *, size_t size, size_t nmemb) {
|
||||
nbytes += size * nmemb;
|
||||
return nmemb;
|
||||
});
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
size_t Mtz::write_to_buffer(char* buf, size_t maxlen) const {
|
||||
size_t len = 0;
|
||||
write_to_stream([&](const void *ptr, size_t size, size_t nmemb) {
|
||||
len += size * nmemb;
|
||||
if (len > maxlen)
|
||||
fail("Mtz::write_to_buffer: size too small");
|
||||
memcpy(buf, ptr, size * nmemb);
|
||||
buf += size * nmemb;
|
||||
return nmemb;
|
||||
});
|
||||
return len;
|
||||
}
|
||||
|
||||
} // namespace gemmi
|
||||
@@ -0,0 +1,68 @@
|
||||
// Copyright 2017 Global Phasing Ltd.
|
||||
|
||||
#include <gemmi/sprintf.hpp>
|
||||
#include <stdarg.h> // for va_list
|
||||
|
||||
#ifdef USE_STD_SNPRINTF // useful for benchmarking and testing only
|
||||
# include <cstdio>
|
||||
# include <algorithm> // for min
|
||||
#else
|
||||
# define STB_SPRINTF_IMPLEMENTATION
|
||||
# define STB_SPRINTF_STATIC
|
||||
# define STB_SPRINTF_NOUNALIGNED 1
|
||||
// Making functions from stb_sprintf static may trigger warnings.
|
||||
# if defined(__GNUC__)
|
||||
# pragma GCC diagnostic ignored "-Wunused-function"
|
||||
# endif
|
||||
# if defined(__clang__)
|
||||
# pragma clang diagnostic ignored "-Wunused-function"
|
||||
# endif
|
||||
|
||||
// To use system stb_sprintf.h (not recommended, but some Linux distros
|
||||
// don't like bundled libraries) define GEMMI_USE_SYSTEM_STB or remove
|
||||
// third_party/stb_sprintf.h.
|
||||
# if defined(__has_include)
|
||||
# if !__has_include("../third_party/stb_sprintf.h")
|
||||
# define GEMMI_USE_SYSTEM_STB 1
|
||||
# endif
|
||||
# endif
|
||||
# ifdef GEMMI_USE_SYSTEM_STB
|
||||
# pragma message("Using system stb_sprintf.h, not the bundled one. It may not work.")
|
||||
# include "stb/stb_sprintf.h"
|
||||
# else
|
||||
# include "../third_party/stb_sprintf.h"
|
||||
# endif
|
||||
#endif // USE_STD_SNPRINTF
|
||||
|
||||
namespace gemmi {
|
||||
|
||||
// We copy functions from sprintf.h only to have them declared with GEMMI_DLL.
|
||||
int sprintf_z(char *buf, char const *fmt, ...) {
|
||||
int result;
|
||||
va_list va;
|
||||
va_start(va, fmt);
|
||||
#ifdef USE_STD_SNPRINTF
|
||||
result = std::vsprintf(buf, fmt, va);
|
||||
#else
|
||||
result = STB_SPRINTF_DECORATE(vsprintfcb)(0, 0, buf, fmt, va);
|
||||
#endif
|
||||
va_end(va);
|
||||
return result;
|
||||
}
|
||||
|
||||
int snprintf_z(char *buf, int count, char const *fmt, ...) {
|
||||
int result;
|
||||
va_list va;
|
||||
va_start(va, fmt);
|
||||
#ifdef USE_STD_SNPRINTF
|
||||
result = std::vsnprintf(buf, count, fmt, va);
|
||||
// stbsp_snprintf always returns a zero-terminated string
|
||||
buf[std::min(result, count-1)] = '\0';
|
||||
#else
|
||||
result = STB_SPRINTF_DECORATE(vsnprintf)(buf, count, fmt, va);
|
||||
#endif
|
||||
va_end(va);
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace gemmi
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,306 @@
|
||||
// Copyright 2023 Global Phasing Ltd.
|
||||
|
||||
#include <gemmi/xds_ascii.hpp>
|
||||
#include <gemmi/atof.hpp> // for fast_from_chars
|
||||
#include <gemmi/atox.hpp> // for skip_blank, read_word
|
||||
#include <gemmi/util.hpp> // for trim_str
|
||||
#include <gemmi/gz.hpp>
|
||||
#include <gemmi/math.hpp>
|
||||
|
||||
namespace gemmi {
|
||||
|
||||
void XdsAscii::gather_iset_statistics() {
|
||||
for (Iset& iset : isets) {
|
||||
iset.frame_number_min = INT_MAX;
|
||||
iset.frame_number_max = 0;
|
||||
for (const XdsAscii::Refl& refl : data)
|
||||
if (refl.iset == iset.id) {
|
||||
++iset.reflection_count;
|
||||
int frame = refl.frame();
|
||||
iset.frame_number_min = std::min(iset.frame_number_min, frame);
|
||||
iset.frame_number_max = std::max(iset.frame_number_max, frame);
|
||||
}
|
||||
if (iset.frame_number_min > iset.frame_number_max)
|
||||
continue;
|
||||
std::vector<uint8_t> frames(iset.frame_number_max - iset.frame_number_min + 1);
|
||||
for (const XdsAscii::Refl& refl : data)
|
||||
if (refl.iset == iset.id)
|
||||
frames[refl.frame() - iset.frame_number_min] = 1;
|
||||
iset.frame_count = 0;
|
||||
for (uint8_t f : frames)
|
||||
iset.frame_count += f;
|
||||
}
|
||||
}
|
||||
|
||||
/// Based on Phil Evans' notes and the literature, see:
|
||||
/// https://github.com/project-gemmi/gemmi/discussions/248
|
||||
/// \par p is defined as in XDS (p=0.5 for unpolarized beam).
|
||||
void XdsAscii::apply_polarization_correction(double p, Vec3 normal) {
|
||||
if (!has_cell_axes())
|
||||
fail("unknown unit cell axes");
|
||||
Mat33 UB = cell_axes.inverse();
|
||||
Vec3 rot_axis = get_rotation_axis();
|
||||
Vec3 s0_dir = get_s0_direction();
|
||||
normal = normal.normalized();
|
||||
// The polarization normal is expected to be approx. orthogonal to the beam.
|
||||
// dot() is the same as cos_angle() for normalized vectors.
|
||||
if (normal.dot(s0_dir) > std::cos(rad(5.0)))
|
||||
fail("polarization normal is far from orthogonal to the incident beam");
|
||||
// make normal exactly orthogonal to the beam
|
||||
normal = s0_dir.cross(normal).cross(s0_dir).normalized();
|
||||
// wavevector
|
||||
Vec3 s0 = s0_dir / wavelength;
|
||||
double s0_m2 = 1. / s0.length_sq(); // s0^-2
|
||||
|
||||
for (Refl& refl : data) {
|
||||
double phi = rad(rot_angle(refl));
|
||||
Vec3 h(refl.hkl[0], refl.hkl[1], refl.hkl[2]);
|
||||
Vec3 r0 = UB.multiply(h);
|
||||
Vec3 r = rotate_about_axis(r0, rot_axis, phi);
|
||||
Vec3 s = s0 + r;
|
||||
#if 0
|
||||
double two_theta = s0.angle(s);
|
||||
// 2d sin(theta) = lambda
|
||||
double bragg_angle = std::asin(wavelength / (2 * unit_cell.calculate_d(refl.hkl)));
|
||||
printf("(%d %d %d) two-theta %g %g\n",
|
||||
refl.hkl[0], refl.hkl[1], refl.hkl[2], deg(two_theta), deg(2 * bragg_angle));
|
||||
#endif
|
||||
// we should have |s| == |s0|, but just in case calculate it separately
|
||||
double s_m2 = 1. / s.length_sq();
|
||||
// 1 + cos^2(2theta) = 2 * correction for unpolarized beam
|
||||
double t = 1 + sq(s.dot(s0)) * s_m2 * s0_m2;
|
||||
double polariz_factor = (1 - 2*p) * (1 - sq(normal.dot(s)) * s_m2) + p * t;
|
||||
// We assume that the XDS files has polarization correction applied,
|
||||
// but for non-polarized beam. So we multiply intensities by P0=t/2
|
||||
// and divide by a hopefully more accurate polarization factor.
|
||||
double mult = 0.5 * t / polariz_factor;
|
||||
refl.iobs *= mult;
|
||||
refl.sigma *= mult;
|
||||
refl.rlp *= mult;
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
template<size_t N>
|
||||
bool starts_with_ptr(const char* a, const char (&b)[N], const char** endptr) {
|
||||
if (std::strncmp(a, b, N-1) != 0)
|
||||
return false;
|
||||
*endptr = a + N - 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
template<size_t N>
|
||||
bool starts_with_ptr_b(const char* a, const char (&b)[N], const char** endptr) {
|
||||
return starts_with_ptr<N>(skip_blank(a), b, endptr);
|
||||
}
|
||||
|
||||
inline const char* parse_number_into(const char* start, const char* end,
|
||||
double& val, const char* line) {
|
||||
auto result = fast_from_chars(start, end, val);
|
||||
if (result.ec != std::errc())
|
||||
fail("failed to parse a number in:\n", line);
|
||||
return result.ptr;
|
||||
}
|
||||
|
||||
template<size_t N>
|
||||
void parse_numbers_into_array(const char* start, const char* end,
|
||||
double (&arr)[N], const char* line) {
|
||||
for (double& val : arr)
|
||||
start = parse_number_into(start, end, val, line);
|
||||
}
|
||||
|
||||
template<size_t N>
|
||||
void parse_numbers_into_array(const char* start, const char* end,
|
||||
std::array<double,N>& arr, const char* line) {
|
||||
for (double& val : arr)
|
||||
start = parse_number_into(start, end, val, line);
|
||||
}
|
||||
|
||||
void parse_numbers_into_vec3(const char* start, const char* end,
|
||||
Vec3& vec, const char* line) {
|
||||
for (double* val : {&vec.x, &vec.y, &vec.z})
|
||||
start = parse_number_into(start, end, *val, line);
|
||||
}
|
||||
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
void XdsAscii::read_stream(AnyStream& line_reader, const std::string& source) {
|
||||
source_path = source;
|
||||
read_columns = 12;
|
||||
char line[256];
|
||||
size_t len0 = line_reader.copy_line(line, 255);
|
||||
if (len0 == 0)
|
||||
fail("empty file");
|
||||
int iset_col = 0;
|
||||
const char xds_ascii_header[] = "!FORMAT=XDS_ASCII MERGE=";
|
||||
char xds_ascii_type = '\0';
|
||||
if (starts_with(line, xds_ascii_header)) {
|
||||
size_t n = sizeof(xds_ascii_header)-1;
|
||||
xds_ascii_type = line[n];
|
||||
// !FORMAT=XDS_ASCII MERGE=FALSE FRIEDEL'S_LAW=
|
||||
if (strncmp(line + n + 5, " FRIEDEL'S_LAW=", 18) == 0)
|
||||
friedels_law = line[50];
|
||||
}
|
||||
if (!xds_ascii_type && !starts_with(line, "!OUTPUT_FILE=INTEGRATE.HKL"))
|
||||
fail("not an XDS_ASCII nor INTEGRATE.HKL file: " + source_path);
|
||||
const char* rhs;
|
||||
while (size_t len = line_reader.copy_line(line, 255)) {
|
||||
if (line[0] == '!') {
|
||||
if (starts_with_ptr(line+1, "Generated by ", &rhs)) {
|
||||
generated_by = read_word(rhs, &rhs);
|
||||
version_str = trim_str(rhs);
|
||||
} else if (starts_with_ptr(line+1, "SPACE_GROUP_NUMBER=", &rhs)) {
|
||||
spacegroup_number = simple_atoi(rhs);
|
||||
} else if (starts_with_ptr(line+1, "UNIT_CELL_", &rhs)) {
|
||||
if (starts_with_ptr(rhs, "CONSTANTS=", &rhs)) { // UNIT_CELL_CONSTANTS=
|
||||
parse_numbers_into_array(rhs, line+len, cell_constants, line);
|
||||
} else if (starts_with_ptr(rhs, "A-AXIS=", &rhs)) { // UNIT_CELL_A-AXIS=
|
||||
parse_numbers_into_array(rhs, line+len, cell_axes.a[0], line);
|
||||
} else if (starts_with_ptr(rhs, "B-AXIS=", &rhs)) { // UNIT_CELL_B-AXIS=
|
||||
parse_numbers_into_array(rhs, line+len, cell_axes.a[1], line);
|
||||
} else if (starts_with_ptr(rhs, "C-AXIS=", &rhs)) { // UNIT_CELL_C-AXIS=
|
||||
parse_numbers_into_array(rhs, line+len, cell_axes.a[2], line);
|
||||
}
|
||||
} else if (starts_with_ptr(line+1, "REFLECTING_RANGE_E.S.D.=", &rhs)) {
|
||||
auto result = fast_from_chars(rhs, line+len, reflecting_range_esd);
|
||||
if (result.ec != std::errc())
|
||||
fail("failed to parse mosaicity:\n", line);
|
||||
} else if (starts_with_ptr(line+1, "X-RAY_WAVELENGTH=", &rhs)) {
|
||||
auto result = fast_from_chars(rhs, line+len, wavelength);
|
||||
if (result.ec != std::errc())
|
||||
fail("failed to parse wavelength:\n", line);
|
||||
} else if (starts_with_ptr(line+1, "INCIDENT_BEAM_DIRECTION=", &rhs)) {
|
||||
parse_numbers_into_vec3(rhs, line+len, incident_beam_dir, line);
|
||||
} else if (starts_with_ptr(line+1, "OSCILLATION_RANGE=", &rhs)) {
|
||||
auto result = fast_from_chars(rhs, line+len, oscillation_range);
|
||||
if (result.ec != std::errc())
|
||||
fail("failed to parse:\n", line);
|
||||
} else if (starts_with_ptr(line+1, "ROTATION_AXIS=", &rhs)) {
|
||||
parse_numbers_into_vec3(rhs, line+len, rotation_axis, line);
|
||||
} else if (starts_with_ptr(line+1, "STARTING_ANGLE=", &rhs)) {
|
||||
auto result = fast_from_chars(rhs, line+len, starting_angle);
|
||||
if (result.ec != std::errc())
|
||||
fail("failed to parse:\n", line);
|
||||
} else if (starts_with_ptr(line+1, "STARTING_FRAME=", &rhs)) {
|
||||
starting_frame = simple_atoi(rhs);
|
||||
} else if (starts_with_ptr(line+1, " ISET= ", &rhs)) {
|
||||
const char* endptr;
|
||||
int id = simple_atoi(rhs, &endptr);
|
||||
XdsAscii::Iset& iset = find_or_add_iset(id);
|
||||
endptr = skip_blank(endptr);
|
||||
if (starts_with_ptr(endptr, "INPUT_FILE=", &rhs)) {
|
||||
iset.input_file = read_word(rhs);
|
||||
} else if (starts_with_ptr(endptr, "X-RAY_WAVELENGTH=", &rhs)) {
|
||||
double w;
|
||||
auto result = fast_from_chars(rhs, line+len, w);
|
||||
if (result.ec != std::errc())
|
||||
fail("failed to parse iset wavelength:\n", line);
|
||||
iset.wavelength = w;
|
||||
} else if (starts_with_ptr(endptr, "UNIT_CELL_CONSTANTS=", &rhs)) {
|
||||
parse_numbers_into_array(rhs, line+len, iset.cell_constants, line);
|
||||
}
|
||||
} else if (starts_with_ptr(line+1, "NX=", &rhs)) {
|
||||
const char* endptr;
|
||||
nx = simple_atoi(rhs, &endptr);
|
||||
if (starts_with_ptr_b(endptr, "NY=", &rhs))
|
||||
ny = simple_atoi(rhs, &endptr);
|
||||
if (starts_with_ptr_b(endptr, "QX=", &rhs))
|
||||
endptr = parse_number_into(rhs, line+len, qx, line);
|
||||
if (starts_with_ptr_b(endptr, "QY=", &rhs))
|
||||
parse_number_into(rhs, line+len, qy, line);
|
||||
} else if (starts_with_ptr(line+1, "ORGX=", &rhs)) {
|
||||
const char* endptr = parse_number_into(rhs, line+len, orgx, line);
|
||||
if (starts_with_ptr_b(endptr, "ORGY=", &rhs))
|
||||
endptr = parse_number_into(rhs, line+len, orgy, line);
|
||||
if (starts_with_ptr_b(endptr, "DETECTOR_DISTANCE=", &rhs))
|
||||
parse_number_into(rhs, line+len, detector_distance, line);
|
||||
} else if (starts_with_ptr(line+1, "NUMBER_OF_ITEMS_IN_EACH_DATA_RECORD=", &rhs)) {
|
||||
int num = simple_atoi(rhs);
|
||||
// INTEGRATE.HKL has read_columns=12, as set above
|
||||
if (xds_ascii_type == 'T') // merged file
|
||||
read_columns = 5;
|
||||
else if (generated_by == "XSCALE")
|
||||
read_columns = 8;
|
||||
else if (generated_by == "CORRECT")
|
||||
read_columns = 11;
|
||||
// check if the columns are what they always are
|
||||
if (num < read_columns)
|
||||
fail("expected ", std::to_string(read_columns), "+ columns, got:\n", line);
|
||||
if (generated_by == "INTEGRATE") {
|
||||
line_reader.copy_line(line, 52);
|
||||
if (!starts_with(line, "!H,K,L,IOBS,SIGMA,XCAL,YCAL,ZCAL,RLP,PEAK,CORR,MAXC"))
|
||||
fail("unexpected column order in INTEGRATE.HKL");
|
||||
} else {
|
||||
const char* expected_columns[12] = {
|
||||
"H=1", "K=2", "L=3", "IOBS=4", "SIGMA(IOBS)=5",
|
||||
"XD=6", "YD=7", "ZD=8", "RLP=9", "PEAK=10", "CORR=11", "MAXC=12"
|
||||
};
|
||||
for (int i = 0; i < read_columns; ++i) {
|
||||
const char* col = expected_columns[i];
|
||||
line_reader.copy_line(line, 42);
|
||||
if (std::strncmp(line, "!ITEM_", 6) != 0 ||
|
||||
std::strncmp(line+6, col, std::strlen(col)) != 0)
|
||||
fail("column !ITEM_" + std::string(col), " not found.");
|
||||
}
|
||||
}
|
||||
} else if (starts_with_ptr(line+1, "ITEM_ISET=", &rhs)) {
|
||||
iset_col = simple_atoi(rhs);
|
||||
} else if (starts_with(line+1, "END_OF_DATA")) {
|
||||
if (isets.empty()) {
|
||||
isets.emplace_back(1);
|
||||
isets.back().wavelength = wavelength;
|
||||
}
|
||||
for (XdsAscii::Refl& refl : data)
|
||||
if (size_t(refl.iset - 1) >= isets.size())
|
||||
fail("unexpected ITEM_ISET " + std::to_string(refl.iset));
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
data.emplace_back();
|
||||
XdsAscii::Refl& r = data.back();
|
||||
const char* p = line;
|
||||
for (int i = 0; i < 3; ++i)
|
||||
r.hkl[i] = simple_atoi(p, &p);
|
||||
auto result = fast_from_chars(p, line+len, r.iobs); // 4
|
||||
result = fast_from_chars(result.ptr, line+len, r.sigma); // 5
|
||||
if (read_columns >= 8) {
|
||||
result = fast_from_chars(result.ptr, line+len, r.xd); // 6
|
||||
result = fast_from_chars(result.ptr, line+len, r.yd); // 7
|
||||
result = fast_from_chars(result.ptr, line+len, r.zd); // 8
|
||||
if (read_columns >= 11) {
|
||||
result = fast_from_chars(result.ptr, line+len, r.rlp); // 9
|
||||
result = fast_from_chars(result.ptr, line+len, r.peak); // 10
|
||||
result = fast_from_chars(result.ptr, line+len, r.corr); // 11
|
||||
if (read_columns >= 12) {
|
||||
result = fast_from_chars(result.ptr, line+len, r.maxc); // 12
|
||||
} else {
|
||||
r.maxc = 0; // 12
|
||||
}
|
||||
} else {
|
||||
r.rlp = r.peak = r.corr = r.maxc = 0; // 9-11
|
||||
}
|
||||
} else {
|
||||
r.xd = r.yd = r.zd = 0; // 6-8
|
||||
}
|
||||
if (result.ec != std::errc())
|
||||
fail("failed to parse data line:\n", line);
|
||||
if (iset_col >= read_columns) {
|
||||
const char* iset_ptr = result.ptr;
|
||||
for (int j = read_columns+1; j < iset_col; ++j)
|
||||
iset_ptr = skip_word(skip_blank(iset_ptr));
|
||||
r.iset = simple_atoi(iset_ptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
fail("incorrect or unfinished file: " + source_path);
|
||||
}
|
||||
|
||||
XdsAscii read_xds_ascii(const std::string& path) {
|
||||
XdsAscii xds_ascii;
|
||||
xds_ascii.read_input(gemmi::MaybeGzipped(path));
|
||||
return xds_ascii;
|
||||
}
|
||||
|
||||
} // namespace gemmi
|
||||
@@ -1,2 +0,0 @@
|
||||
ADD_LIBRARY(gemmi STATIC symmetry.cpp gemmi/symmetry.hpp gemmi/fail.hpp)
|
||||
TARGET_INCLUDE_DIRECTORIES(gemmi PUBLIC .)
|
||||
Reference in New Issue
Block a user