From 08bf1867663126667c018acecf40bbb0c6ddeffa Mon Sep 17 00:00:00 2001
From: Filip Leonarski <filip.leonarski@psi.ch>
Date: Wed, 13 May 2026 13:44:02 +0200
Subject: [PATCH] Gemmi: Add more functionality from gemmi 0.7.5

---
 CMakeLists.txt                             |    2 +-
 gemmi_gph/CMakeLists.txt                   |    8 +
 {symmetry => gemmi_gph}/LICENSE.txt        |    0
 gemmi_gph/gemmi/atof.hpp                   |   41 +
 gemmi_gph/gemmi/atox.hpp                   |  135 +
 {symmetry => gemmi_gph}/gemmi/cellred.hpp  |    0
 {symmetry => gemmi_gph}/gemmi/fail.hpp     |    0
 gemmi_gph/gemmi/fileutil.hpp               |  173 +
 gemmi_gph/gemmi/gz.hpp                     |   52 +
 gemmi_gph/gemmi/input.hpp                  |  168 +
 gemmi_gph/gemmi/iterator.hpp               |  287 ++
 gemmi_gph/gemmi/logger.hpp                 |   71 +
 {symmetry => gemmi_gph}/gemmi/math.hpp     |    0
 gemmi_gph/gemmi/mtz.hpp                    |  600 +++
 gemmi_gph/gemmi/sprintf.hpp                |   80 +
 {symmetry => gemmi_gph}/gemmi/symmetry.hpp |    0
 gemmi_gph/gemmi/third_party/fast_float.h   | 4933 ++++++++++++++++++++
 {symmetry => gemmi_gph}/gemmi/unitcell.hpp |    0
 gemmi_gph/gemmi/util.hpp                   |  315 ++
 gemmi_gph/gemmi/xds_ascii.hpp              |  183 +
 gemmi_gph/gz.cpp                           |  189 +
 gemmi_gph/mtz.cpp                          |  991 ++++
 gemmi_gph/sprintf.cpp                      |   68 +
 gemmi_gph/stb/stb_sprintf.h                | 1906 ++++++++
 {symmetry => gemmi_gph}/symmetry.cpp       |    0
 gemmi_gph/xds_ascii.cpp                    |  306 ++
 symmetry/CMakeLists.txt                    |    2 -
 27 files changed, 10507 insertions(+), 3 deletions(-)
 create mode 100644 gemmi_gph/CMakeLists.txt
 rename {symmetry => gemmi_gph}/LICENSE.txt (100%)
 create mode 100644 gemmi_gph/gemmi/atof.hpp
 create mode 100644 gemmi_gph/gemmi/atox.hpp
 rename {symmetry => gemmi_gph}/gemmi/cellred.hpp (100%)
 rename {symmetry => gemmi_gph}/gemmi/fail.hpp (100%)
 create mode 100644 gemmi_gph/gemmi/fileutil.hpp
 create mode 100644 gemmi_gph/gemmi/gz.hpp
 create mode 100644 gemmi_gph/gemmi/input.hpp
 create mode 100644 gemmi_gph/gemmi/iterator.hpp
 create mode 100644 gemmi_gph/gemmi/logger.hpp
 rename {symmetry => gemmi_gph}/gemmi/math.hpp (100%)
 create mode 100644 gemmi_gph/gemmi/mtz.hpp
 create mode 100644 gemmi_gph/gemmi/sprintf.hpp
 rename {symmetry => gemmi_gph}/gemmi/symmetry.hpp (100%)
 create mode 100644 gemmi_gph/gemmi/third_party/fast_float.h
 rename {symmetry => gemmi_gph}/gemmi/unitcell.hpp (100%)
 create mode 100644 gemmi_gph/gemmi/util.hpp
 create mode 100644 gemmi_gph/gemmi/xds_ascii.hpp
 create mode 100644 gemmi_gph/gz.cpp
 create mode 100644 gemmi_gph/mtz.cpp
 create mode 100644 gemmi_gph/sprintf.cpp
 create mode 100644 gemmi_gph/stb/stb_sprintf.h
 rename {symmetry => gemmi_gph}/symmetry.cpp (100%)
 create mode 100644 gemmi_gph/xds_ascii.cpp
 delete mode 100644 symmetry/CMakeLists.txt

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c49e0644..92e97835 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -153,7 +153,7 @@ ADD_SUBDIRECTORY(reader)
 ADD_SUBDIRECTORY(detector_control)
 ADD_SUBDIRECTORY(image_puller)
 ADD_SUBDIRECTORY(preview)
-ADD_SUBDIRECTORY(symmetry)
+ADD_SUBDIRECTORY(gemmi_gph)
 ADD_SUBDIRECTORY(xds-plugin)
 
 IF (JFJOCH_WRITER_ONLY)
diff --git a/gemmi_gph/CMakeLists.txt b/gemmi_gph/CMakeLists.txt
new file mode 100644
index 00000000..66d88819
--- /dev/null
+++ b/gemmi_gph/CMakeLists.txt
@@ -0,0 +1,8 @@
+ADD_LIBRARY(gemmi STATIC symmetry.cpp gz.cpp mtz.cpp sprintf.cpp xds_ascii.cpp
+        gemmi/cellred.hpp
+        gemmi/symmetry.hpp
+        gemmi/fail.hpp
+        gemmi/unitcell.hpp
+        gemmi/math.hpp)
+TARGET_INCLUDE_DIRECTORIES(gemmi PUBLIC .)
+TARGET_LINK_LIBRARIES(gemmi )
\ No newline at end of file
diff --git a/symmetry/LICENSE.txt b/gemmi_gph/LICENSE.txt
similarity index 100%
rename from symmetry/LICENSE.txt
rename to gemmi_gph/LICENSE.txt
diff --git a/gemmi_gph/gemmi/atof.hpp b/gemmi_gph/gemmi/atof.hpp
new file mode 100644
index 00000000..7f275f7b
--- /dev/null
+++ b/gemmi_gph/gemmi/atof.hpp
@@ -0,0 +1,41 @@
+// Copyright 2020 Global Phasing Ltd.
+//
+// Functions that convert strings to floating-point numbers ignoring locale.
+// Simple wrappers around fastfloat::from_chars().
+
+#ifndef GEMMI_ATOF_HPP_
+#define GEMMI_ATOF_HPP_
+
+#include "atox.hpp"   // for is_space
+#include "third_party/fast_float.h"
+
+namespace gemmi {
+
+using fast_float::from_chars_result;
+
+inline from_chars_result fast_from_chars(const char* start, const char* end, double& d) {
+  while (start < end && is_space(*start))
+    ++start;
+  if (start < end && *start == '+')
+    ++start;
+  return fast_float::from_chars(start, end, d);
+}
+
+inline from_chars_result fast_from_chars(const char* start, double& d) {
+  while (is_space(*start))
+    ++start;
+  if (*start == '+')
+    ++start;
+  return fast_float::from_chars(start, start + std::strlen(start), d);
+}
+
+inline double fast_atof(const char* p, const char** endptr=nullptr) {
+  double d = 0;
+  auto result = fast_from_chars(p, d);
+  if (endptr)
+    *endptr = result.ptr;
+  return d;
+}
+
+} // namespace gemmi
+#endif
diff --git a/gemmi_gph/gemmi/atox.hpp b/gemmi_gph/gemmi/atox.hpp
new file mode 100644
index 00000000..14b4a3b3
--- /dev/null
+++ b/gemmi_gph/gemmi/atox.hpp
@@ -0,0 +1,135 @@
+// Copyright 2018 Global Phasing Ltd.
+//
+// Locale-independent functions that convert strings to integers,
+// equivalents of standard isspace and isdigit, and a few helper functions.
+//
+// This file is named similarly to the standard functions atoi() and atof().
+// But the functions here are not meant to be equivalent to the standard
+// library functions. They are locale-independent (a good thing when reading
+// numbers from files). They don't set errno, don't signal overflow and
+// underflow. Due to the limited scope these functions tend to be faster
+// than the standard-library ones.
+
+#ifndef GEMMI_ATOX_HPP_
+#define GEMMI_ATOX_HPP_
+
+#include <cstdint>
+#include <stdexcept>  // for invalid_argument
+#include <string>
+
+namespace gemmi {
+
+// equivalent of std::isspace for C locale (no handling of EOF)
+inline bool is_space(char c) {
+  static const std::uint8_t table[256] = { // 1 for 9-13 and 32
+    0,0,0,0,0,0,0,0, 0,1,1,1,1,1,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
+    1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0
+  };
+  return table[(std::uint8_t)c] != 0;
+}
+
+// equivalent of std::isblank for C locale (no handling of EOF)
+inline bool is_blank(char c) {
+  return c == ' ' || c == '\t';
+}
+
+// equivalent of std::isdigit for C locale (no handling of EOF)
+inline bool is_digit(char c) {
+  return c >= '0' && c <= '9';
+}
+
+inline const char* skip_blank(const char* p) {
+  if (p)
+    while (is_blank(*p))
+      ++p;
+  return p;
+}
+
+inline const char* skip_word(const char* p) {
+  if (p)
+    while (*p != '\0' && !is_space(*p))
+      ++p;
+  return p;
+}
+
+inline std::string read_word(const char* line) {
+  line = skip_blank(line);
+  return std::string(line, skip_word(line));
+}
+
+inline std::string read_word(const char* line, const char** endptr) {
+  line = skip_blank(line);
+  *endptr = skip_word(line);
+  return std::string(line, *endptr);
+}
+
+// no checking for overflow
+inline int string_to_int(const char* p, bool checked, size_t length=0) {
+  int mult = -1;
+  int n = 0;
+  size_t i = 0;
+  while ((length == 0 || i < length) && is_space(p[i]))
+    ++i;
+  if (p[i] == '-') {
+    mult = 1;
+    ++i;
+  } else if (p[i] == '+') {
+    ++i;
+  }
+  bool has_digits = false;
+  // use negative numbers because INT_MIN < -INT_MAX
+  for (; (length == 0 || i < length) && is_digit(p[i]); ++i) {
+    n = n * 10 - (p[i] - '0');
+    has_digits = true;
+  }
+  if (checked) {
+    while ((length == 0 || i < length) && is_space(p[i]))
+      ++i;
+    if (!has_digits || p[i] != '\0')
+      throw std::invalid_argument("not an integer: " +
+                                  std::string(p, length ? length : i+1));
+  }
+  return mult * n;
+}
+
+inline int string_to_int(const std::string& str, bool checked) {
+  return string_to_int(str.c_str(), checked);
+}
+
+inline int simple_atoi(const char* p, const char** endptr=nullptr) {
+  int mult = -1;
+  int n = 0;
+  while (is_space(*p))
+    ++p;
+  if (*p == '-') {
+    mult = 1;
+    ++p;
+  } else if (*p == '+') {
+    ++p;
+  }
+  for (; is_digit(*p); ++p)
+    n = n * 10 - (*p - '0'); // use negative numbers because INT_MIN < -INT_MAX
+  if (endptr)
+    *endptr = p;
+  return mult * n;
+}
+
+inline int no_sign_atoi(const char* p, const char** endptr=nullptr) {
+  int n = 0;
+  while (is_space(*p))
+    ++p;
+  for (; is_digit(*p); ++p)
+    n = n * 10 + (*p - '0');
+  if (endptr)
+    *endptr = p;
+  return n;
+}
+
+} // namespace gemmi
+#endif
diff --git a/symmetry/gemmi/cellred.hpp b/gemmi_gph/gemmi/cellred.hpp
similarity index 100%
rename from symmetry/gemmi/cellred.hpp
rename to gemmi_gph/gemmi/cellred.hpp
diff --git a/symmetry/gemmi/fail.hpp b/gemmi_gph/gemmi/fail.hpp
similarity index 100%
rename from symmetry/gemmi/fail.hpp
rename to gemmi_gph/gemmi/fail.hpp
diff --git a/gemmi_gph/gemmi/fileutil.hpp b/gemmi_gph/gemmi/fileutil.hpp
new file mode 100644
index 00000000..bd01fed4
--- /dev/null
+++ b/gemmi_gph/gemmi/fileutil.hpp
@@ -0,0 +1,173 @@
+// Copyright 2018 Global Phasing Ltd.
+//
+// File-related utilities.
+
+#ifndef GEMMI_FILEUTIL_HPP_
+#define GEMMI_FILEUTIL_HPP_
+
+#include <cassert>
+#include <cstdio>    // for FILE, fopen, fclose
+#include <cstdint>
+#include <cstdlib>   // for malloc, realloc
+#include <cstring>   // for strlen
+#include <initializer_list>
+#include <memory>    // for unique_ptr
+#include "fail.hpp"  // for sys_fail
+
+#if defined(_WIN32) && !defined(GEMMI_USE_FOPEN)
+#include "utf.hpp"
+#endif
+
+namespace gemmi {
+
+// strip directory and suffixes from filename
+inline std::string path_basename(const std::string& path,
+                                 std::initializer_list<const char*> exts) {
+  size_t pos = path.find_last_of("\\/");
+  std::string basename = pos == std::string::npos ? path : path.substr(pos + 1);
+  for (const char* ext : exts) {
+    size_t len = std::strlen(ext);
+    if (basename.size() > len &&
+        basename.compare(basename.length() - len, len, ext, len) == 0)
+      basename.resize(basename.length() - len);
+  }
+  return basename;
+}
+
+// file operations
+
+/// deleter for fileptr_t
+struct needs_fclose {
+  bool use_fclose;
+  void operator()(std::FILE* f) const noexcept {
+    if (use_fclose)
+      std::fclose(f);
+  }
+};
+
+typedef std::unique_ptr<std::FILE, needs_fclose> fileptr_t;
+
+inline fileptr_t file_open(const char* path, const char* mode) {
+  std::FILE* file;
+#if defined(_WIN32) && !defined(GEMMI_USE_FOPEN)
+  std::wstring wpath = UTF8_to_wchar(path);
+  std::wstring wmode = UTF8_to_wchar(mode);
+  if ((file = ::_wfopen(wpath.c_str(), wmode.c_str())) == nullptr)
+#else
+  if ((file = std::fopen(path, mode)) == nullptr)
+#endif
+    sys_fail(std::string("Failed to open ") + path +
+             (*mode == 'w' ? " for writing" : ""));
+  return fileptr_t(file, needs_fclose{true});
+}
+
+// helper function for treating "-" as stdin or stdout
+inline fileptr_t file_open_or(const char* path, const char* mode,
+                              std::FILE* dash_stream) {
+  if (path[0] == '-' && path[1] == '\0')
+    return fileptr_t(dash_stream, needs_fclose{false});
+  return file_open(path, mode);
+}
+
+inline std::size_t file_size(std::FILE* f, const std::string& path) {
+  if (std::fseek(f, 0, SEEK_END) != 0)
+    sys_fail(path + ": fseek failed");
+  long length = std::ftell(f);
+  if (length < 0)
+    sys_fail(path + ": ftell failed");
+  if (std::fseek(f, 0, SEEK_SET) != 0)
+    sys_fail(path + ": fseek failed");
+  return length;
+}
+
+// helper function for working with binary files
+inline bool is_little_endian() {
+  std::uint32_t x = 1;
+  return *reinterpret_cast<char *>(&x) == 1;
+}
+
+inline void swap_two_bytes(void* start) {
+  char* bytes = static_cast<char*>(start);
+  std::swap(bytes[0], bytes[1]);
+}
+
+inline void swap_four_bytes(void* start) {
+  char* bytes = static_cast<char*>(start);
+  std::swap(bytes[0], bytes[3]);
+  std::swap(bytes[1], bytes[2]);
+}
+
+inline void swap_eight_bytes(void* start) {
+  char* bytes = static_cast<char*>(start);
+  std::swap(bytes[0], bytes[7]);
+  std::swap(bytes[1], bytes[6]);
+  std::swap(bytes[2], bytes[5]);
+  std::swap(bytes[3], bytes[4]);
+}
+
+
+class CharArray {
+  std::unique_ptr<char, decltype(&std::free)> ptr_;
+  size_t size_;
+public:
+  CharArray() : ptr_(nullptr, &std::free), size_(0) {}
+  explicit CharArray(size_t n) : ptr_((char*)std::malloc(n), &std::free), size_(n) {}
+  explicit operator bool() const { return (bool)ptr_; }
+  char* data() { return ptr_.get(); }
+  const char* data() const { return ptr_.get(); }
+  size_t size() const { return size_; }
+  void set_size(size_t n) { size_ = n; }
+
+  void resize(size_t n) {
+    char* new_ptr = (char*) std::realloc(ptr_.get(), n);
+    if (!new_ptr && n != 0)
+      fail("Out of memory.");
+    (void) ptr_.release();  // NOLINT(bugprone-unused-return-value)
+    ptr_.reset(new_ptr);
+    size_ = n;
+  }
+
+  // Remove first n bytes making space for more text at the returned position.
+  char* roll(size_t n) {
+    assert(n <= size());
+    std::memmove(data(), data() + n, n);
+    return data() + n;
+  }
+};
+
+
+/// reading file into a memory buffer (optimized: uses fseek to determine file size)
+inline CharArray read_file_into_buffer(const std::string& path) {
+  fileptr_t f = file_open(path.c_str(), "rb");
+  size_t size = file_size(f.get(), path);
+  CharArray buffer(size);
+  if (std::fread(buffer.data(), size, 1, f.get()) != 1)
+    sys_fail(path + ": fread failed");
+  return buffer;
+}
+
+inline CharArray read_stdin_into_buffer() {
+  size_t n = 0;
+  CharArray buffer(16 * 1024);
+  for (;;) {
+    n += std::fread(buffer.data() + n, 1, buffer.size() - n, stdin);
+    if (n != buffer.size()) {
+      buffer.set_size(n);
+      break;
+    }
+    buffer.resize(2*n);
+  }
+  return buffer;
+}
+
+template<typename T>
+inline CharArray read_into_buffer(T&& input) {
+  if (input.is_compressed())
+    return input.uncompress_into_buffer();
+  if (input.is_stdin())
+    return read_stdin_into_buffer();
+  return read_file_into_buffer(input.path());
+}
+
+} // namespace gemmi
+#endif
diff --git a/gemmi_gph/gemmi/gz.hpp b/gemmi_gph/gemmi/gz.hpp
new file mode 100644
index 00000000..b4edc016
--- /dev/null
+++ b/gemmi_gph/gemmi/gz.hpp
@@ -0,0 +1,52 @@
+// Copyright 2017 Global Phasing Ltd.
+//
+// Functions for transparent reading of gzipped files. Uses zlib.
+
+#ifndef GEMMI_GZ_HPP_
+#define GEMMI_GZ_HPP_
+#include <string>
+#include "fail.hpp"     // GEMMI_DLL
+#include "input.hpp"    // BasicInput
+#include "util.hpp"     // iends_with
+
+namespace gemmi {
+
+GEMMI_DLL extern const char* const zlib_description;
+
+GEMMI_DLL size_t estimate_uncompressed_size(const std::string& path);
+
+// the same interface as FileStream and MemoryStream
+struct GEMMI_DLL GzStream final : public AnyStream {
+  GzStream(void* f_) : f(f_) {}
+  char* gets(char* line, int size) override;
+  int getc() override;
+  bool read(void* buf, size_t len) override;
+  bool skip(size_t n) override;
+  long tell() override;
+  std::string read_rest() override;
+
+private:
+  void* f;  // implementation detail
+};
+
+class GEMMI_DLL MaybeGzipped : public BasicInput {
+public:
+  explicit MaybeGzipped(const std::string& path);
+  ~MaybeGzipped();
+  size_t gzread_checked(void* buf, size_t len);
+  bool is_compressed() const { return iends_with(path(), ".gz"); }
+  std::string basepath() const {
+    return is_compressed() ? path().substr(0, path().size() - 3) : path();
+  }
+
+  CharArray uncompress_into_buffer(size_t limit=0);
+
+  std::unique_ptr<AnyStream> create_stream();
+
+private:
+  void* file_ = nullptr;
+};
+
+} // namespace gemmi
+
+#endif
diff --git a/gemmi_gph/gemmi/input.hpp b/gemmi_gph/gemmi/input.hpp
new file mode 100644
index 00000000..2bf505f7
--- /dev/null
+++ b/gemmi_gph/gemmi/input.hpp
@@ -0,0 +1,168 @@
+// Copyright 2018 Global Phasing Ltd.
+//
+// Input abstraction.
+// Used to decouple file reading and decompression.
+
+#ifndef GEMMI_INPUT_HPP_
+#define GEMMI_INPUT_HPP_
+
+#include <cstddef> // for ptrdiff_t
+#include <cstdio>  // for FILE, fseek, fread
+#include <cstring> // for memchr
+#include <string>
+#include "fileutil.hpp"  // for fileptr_t
+
+namespace gemmi {
+
+// base class for FileStream, MemoryStream and GzStream
+struct AnyStream {
+  virtual ~AnyStream() = default;
+
+  virtual char* gets(char* line, int size) = 0;   // for pdb, copy_line()
+  virtual int getc() = 0;                         // for copy_line()
+  virtual bool read(void* buf, size_t len) = 0;   // for ccp4, mtz
+
+  // these are not used in GzStream because MemoryStream is used for mtz
+  virtual long tell() = 0; // temporary, for testing
+  virtual bool skip(size_t n) = 0;  // for reading mtz without data
+  virtual std::string read_rest() { return {}; }  // for mtz (appendix)
+
+  size_t copy_line(char* line, int size) {        // for pdb, xds_ascii
+    if (!gets(line, size))
+      return 0;
+    size_t len = std::strlen(line);
+    // If a line is longer than size we discard the rest of it.
+    if (len > 0 && line[len-1] != '\n')
+      for (int c = getc(); c > 0 /* not 0 nor EOF */ && c != '\n'; c = getc())
+        continue;
+    return len;
+  };
+};
+
+struct FileStream final : public AnyStream {
+  FileStream(std::FILE* f_) : f(f_, needs_fclose{false}) {}
+  FileStream(const char* path, const char* mode) : f(file_open_or(path, mode, stdin)) {}
+
+  char* gets(char* line, int size) override { return std::fgets(line, size, f.get()); }
+  int getc() override { return std::fgetc(f.get()); }
+  bool read(void* buf, size_t len) override { return std::fread(buf, len, 1, f.get()) == 1; }
+
+  std::string read_rest() override {
+    std::string ret;
+    int c = std::fgetc(f.get());
+    if (c != EOF) {
+      ret += (char)c;
+      char buf[512];
+      for (;;) {
+        size_t n = std::fread(buf, 1, sizeof(buf), f.get());
+        ret.append(buf, n);
+        if (n != sizeof(buf))
+          break;
+      }
+    }
+    return ret;
+  }
+
+  long tell() override {
+    return std::ftell(f.get());
+  }
+
+  bool skip(size_t n) override {
+#if defined(_MSC_VER)
+    int result = _fseeki64(f.get(), (std::ptrdiff_t)n, SEEK_CUR);
+#elif defined(__MINGW32__)
+    int result = fseeko(f.get(), (_off_t)n, SEEK_CUR);
+#else
+    int result = std::fseek(f.get(), (long)n, SEEK_CUR);
+#endif
+    if (result != 0) {
+      char buf[512];
+      while (n >= sizeof(buf)) {
+        if (std::fread(buf, sizeof(buf), 1, f.get()) != 1)
+          return false;
+        n -= sizeof(buf);
+      }
+      if (n > 0 && std::fread(buf, n, 1, f.get()) != 1)
+        return false;
+    }
+    return true;
+  }
+
+private:
+  fileptr_t f;
+};
+
+struct MemoryStream final : public AnyStream {
+  MemoryStream(const char* start_, size_t size)
+    : start(start_), end(start_ + size), cur(start_) {}
+
+  char* gets(char* line, int size) override {
+    --size; // fgets reads in at most one less than size characters
+    if (cur >= end)
+      return nullptr;
+    if (size > end - cur)
+      size = int(end - cur);
+    const char* nl = (const char*) std::memchr(cur, '\n', size);
+    size_t len = nl ? nl - cur + 1 : size;
+    std::memcpy(line, cur, len);
+    line[len] = '\0';
+    cur += len;
+    return line;
+  }
+  int getc() override { return cur < end ? *cur++ : EOF; }
+
+  bool read(void* buf, size_t len) override {
+    if (cur + len > end)
+      return false;
+    std::memcpy(buf, cur, len);
+    cur += len;
+    return true;
+  }
+
+  std::string read_rest() override {
+    const char* last = cur;
+    cur = end;
+    return std::string(last, end);
+  }
+
+  long tell() override {
+    return cur - start;
+  }
+  bool skip(size_t n) override {
+    cur += n;
+    return cur < end;
+  }
+
+private:
+  const char* const start;
+  const char* const end;
+  const char* cur;
+};
+
+class BasicInput {
+public:
+  explicit BasicInput(const std::string& path) : path_(path) {}
+
+  const std::string& path() const { return path_; }
+  const std::string& basepath() const { return path_; }
+
+  // Does the path stands for stdin?
+  // Each reading function needs to call it (some functions use stdin
+  // and some std::cin, so we don't try to unify it here).
+  bool is_stdin() const { return path() == "-"; }
+
+  // providing the same interface as MaybeGzipped
+  bool is_compressed() const { return false; }
+  // for reading (uncompressing into memory) the whole file at once
+  CharArray uncompress_into_buffer(size_t=0) { return {}; }
+
+  std::unique_ptr<AnyStream> create_stream() {
+    return std::unique_ptr<AnyStream>(new FileStream(path().c_str(), "rb"));
+  }
+
+private:
+  std::string path_;
+};
+
+} // namespace gemmi
+#endif
diff --git a/gemmi_gph/gemmi/iterator.hpp b/gemmi_gph/gemmi/iterator.hpp
new file mode 100644
index 00000000..824472f9
--- /dev/null
+++ b/gemmi_gph/gemmi/iterator.hpp
@@ -0,0 +1,287 @@
+// Copyright 2018 Global Phasing Ltd.
+//
+// Bidirectional iterators (over elements of any container) that can filter,
+// uniquify, group, or iterate with a stride.
+
+#ifndef GEMMI_ITERATOR_HPP_
+#define GEMMI_ITERATOR_HPP_
+#include <iterator>     // for bidirectional_iterator_tag
+#include <type_traits>  // for remove_cv
+#include <vector>
+
+namespace gemmi {
+
+// Disable warning "X<T>::operator X<T>() const will not be called for
+// implicit or explicit conversions", which is triggered when templates
+// StrideIter, IndirectIter and others are expanded with const Value.
+#if defined(__INTEL_COMPILER) || defined(__NVCOMPILER)
+  #pragma diagnostic push
+  #pragma diag_suppress = conversion_function_not_usable
+#elif defined(__NVCC__)
+  #pragma nv_diagnostic push
+  #pragma nv_diag_suppress = conversion_function_not_usable
+#endif
+
+// implements concept BidirectionalIterator
+template <typename Policy>
+struct BidirIterator : Policy {
+  using value_type = typename std::remove_cv<typename Policy::value_type>::type;
+  using difference_type = std::ptrdiff_t;
+  using pointer = typename Policy::value_type*;
+  using reference = typename Policy::reference;
+  using iterator_category = std::bidirectional_iterator_tag;
+
+  BidirIterator() = default;
+  BidirIterator(Policy&& p) : Policy(p) {}
+
+  BidirIterator& operator++() { Policy::increment(); return *this; }
+  BidirIterator operator++(int) { BidirIterator x = *this; ++*this; return x; }
+  BidirIterator& operator--() { Policy::decrement(); return *this; }
+  BidirIterator operator--(int) { BidirIterator x = *this; --*this; return x; }
+  bool operator==(const BidirIterator &o) const { return Policy::equal(o); }
+  bool operator!=(const BidirIterator &o) const { return !Policy::equal(o); }
+  reference operator*() { return Policy::dereference(); }
+  pointer operator->() { return &Policy::dereference(); }
+  using const_variant = BidirIterator<typename Policy::const_policy>;
+  operator const_variant() const {
+    return const_variant(static_cast<const Policy&>(*this));
+  }
+};
+
+template<typename Value>
+class StrideIterPolicy {
+public:
+  using value_type = Value;
+  using reference = Value&;
+  StrideIterPolicy() : cur_(nullptr), offset_(0), stride_(0) {}
+  StrideIterPolicy(Value* ptr, std::size_t offset, size_t stride)
+    : cur_(ptr), offset_(offset), stride_((unsigned)stride) {}
+  void increment() { cur_ += stride_; }
+  void decrement() { cur_ -= stride_; }
+  bool equal(const StrideIterPolicy& o) const { return cur_ == o.cur_; }
+  Value& dereference() { return cur_[offset_]; }
+  using const_policy = StrideIterPolicy<Value const>;
+  operator const_policy() const { return const_policy(cur_, offset_, stride_); }
+private:
+  Value* cur_;
+  std::size_t offset_;
+  unsigned stride_;
+};
+template<typename Value>
+using StrideIter = BidirIterator<StrideIterPolicy<Value>>;
+
+
+template<typename Redirect, typename Value>
+class IndirectIterPolicy {
+public:
+  using value_type = Value;
+  using reference = Value&;
+  IndirectIterPolicy() : redir_(nullptr) {}
+  IndirectIterPolicy(Redirect* redir, std::vector<int>::const_iterator cur)
+    : redir_(redir), cur_(cur) {}
+  void increment() { ++cur_; }
+  void decrement() { --cur_; }
+  bool equal(const IndirectIterPolicy& o) const { return cur_ == o.cur_; }
+  Value& dereference() { return redir_->value_at(*cur_); }
+  using const_policy = IndirectIterPolicy<Redirect const, Value const>;
+  operator const_policy() const { return const_policy(redir_, cur_); }
+  // TODO: what should be done with absent optional tags (*cur_ < 0)?
+private:
+  Redirect* redir_;
+  std::vector<int>::const_iterator cur_; // points into positions
+};
+template<typename Redirect, typename Value>
+using IndirectIter = BidirIterator<IndirectIterPolicy<Redirect, Value>>;
+
+
+template<typename Vector, typename Value>
+class UniqIterPolicy {
+public:
+  using value_type = Value;
+  using reference = Value&;
+  UniqIterPolicy() : vec_(nullptr), pos_(0) {}
+  UniqIterPolicy(Vector* vec, std::size_t pos) : vec_(vec), pos_(pos) {}
+  void increment() {
+    // move to the first element of the next group
+    const auto& key = (*vec_)[pos_].group_key();
+    ++pos_;
+    while (pos_ != vec_->size() && (*vec_)[pos_].group_key() == key)
+      ++pos_;
+  }
+  void decrement() {
+    --pos_; // now we are at the last element of the previous group
+    const auto& key = (*vec_)[pos_].group_key();
+    while (pos_ != 0 && (*vec_)[pos_-1].group_key() == key)
+      --pos_; // move to the group beginning
+  }
+  bool equal(const UniqIterPolicy& o) const { return pos_ == o.pos_; }
+  Value& dereference() { return (*vec_)[pos_]; }
+  using const_policy = UniqIterPolicy<Vector const, Value const>;
+  operator const_policy() const { return const_policy(vec_, pos_); }
+private:
+  Vector* vec_;
+  std::size_t pos_;
+};
+template<typename Vector, typename Value>
+using UniqIter = BidirIterator<UniqIterPolicy<Vector, Value>>;
+
+template<typename Value, typename Vector=std::vector<Value>>
+struct UniqProxy {
+  Vector& vec;
+  using iterator = UniqIter<Vector, Value>;
+  iterator begin() { return {{&vec, 0}}; }
+  iterator end() { return {{&vec, vec.size()}}; }
+};
+template<typename Value, typename Vector=std::vector<Value>>
+struct ConstUniqProxy {
+  const Vector& vec;
+  using iterator = UniqIter<const Vector, const Value>;
+  iterator begin() const { return {{&vec, 0}}; }
+  iterator end() const { return {{&vec, vec.size()}}; }
+};
+
+
+template<typename Vector, typename Value>
+class GroupingIterPolicy {
+public:
+  using value_type = Value;
+  using reference = Value&;
+  GroupingIterPolicy() = default;
+  GroupingIterPolicy(const Value& span) : span_(span) {}
+  void increment() {
+    span_.set_begin(span_.end());
+    span_.set_size(0);
+    while (!span_.is_ending() &&
+           span_.begin()->group_key() == span_.end()->group_key())
+      span_.set_size(span_.size() + 1);
+  }
+  void decrement() {
+    span_.set_begin(span_.begin() - 1);
+    span_.set_size(1);
+    while (!span_.is_beginning() &&
+           span_.begin()->group_key() == (span_.begin() - 1)->group_key()) {
+      span_.set_begin(span_.begin() - 1);
+      span_.set_size(span_.size() + 1);
+    }
+  }
+  bool equal(const GroupingIterPolicy& o) const {
+    return span_.begin() == o.span_.begin();
+  }
+  Value& dereference() { return span_; }
+  using const_policy = GroupingIterPolicy<Vector const, Value const>;
+  operator const_policy() const { return const_policy(span_); }
+private:
+  Value span_;
+};
+template<typename Vector, typename Value>
+using GroupingIter = BidirIterator<GroupingIterPolicy<Vector, Value>>;
+
+
+template<typename Filter, typename Vector, typename Value>
+class FilterIterPolicy {
+public:
+  using value_type = Value;
+  using reference = Value&;
+  FilterIterPolicy() : vec_(nullptr), pos_(0) {}
+  FilterIterPolicy(const Filter* filter, Vector* vec, std::size_t pos)
+      : filter_(filter), vec_(vec), pos_(pos) {
+    while (pos_ != vec_->size() && !matches(pos_))
+      ++pos_;
+  }
+  bool matches(std::size_t p) const { return filter_->matches((*vec_)[p]); }
+  void increment() { while (++pos_ < vec_->size() && !matches(pos_)) {} }
+  void decrement() { while (pos_ != 0 && !matches(--pos_)) {} }
+  bool equal(const FilterIterPolicy& o) const { return pos_ == o.pos_; }
+  Value& dereference() { return (*vec_)[pos_]; }
+  using const_policy = FilterIterPolicy<Filter, Vector const, Value const>;
+  operator const_policy() const { return const_policy(vec_, pos_); }
+private:
+  const Filter* filter_;
+  Vector* vec_;
+  std::size_t pos_;
+};
+template<typename Filter, typename Vector, typename Value>
+using FilterIter = BidirIterator<FilterIterPolicy<Filter, Vector, Value>>;
+
+template<typename Filter, typename Value>
+struct FilterProxy {
+  const Filter& filter;
+  std::vector<Value>& vec;
+  using iterator = FilterIter<Filter, std::vector<Value>, Value>;
+  iterator begin() { return {{&filter, &vec, 0}}; }
+  iterator end() { return {{&filter, &vec, vec.size()}}; }
+};
+
+template<typename Filter, typename Value>
+struct ConstFilterProxy {
+  const Filter& filter;
+  const std::vector<Value>& vec;
+  using iterator = FilterIter<Filter, const std::vector<Value>, const Value>;
+  iterator begin() const { return {{&filter, &vec, 0}}; }
+  iterator end() const { return {{&filter, &vec, vec.size()}}; }
+};
+
+
+template<typename Item>
+struct ItemGroup {
+  using element_type = Item;
+
+  ItemGroup(Item* start, const Item* end)
+      : size_(int(end - start)), extent_(int(end - start)), start_(start) {
+    for (const Item* i = start + 1; i != end; ++i)
+      if (i->group_key() != start->group_key())
+        --size_;
+  }
+
+  struct iterator {
+    Item* ptr;
+    const Item* end;
+    bool operator==(const iterator& o) const { return ptr == o.ptr; }
+    bool operator!=(const iterator& o) const { return ptr != o.ptr; }
+    iterator& operator++() {
+      const Item* prev = ptr++;
+      while (ptr != end && ptr->group_key() != prev->group_key())
+        ++ptr;
+      return *this;
+    }
+    Item& operator*() { return *ptr; }
+    Item* operator->() { return ptr; }
+  };
+  iterator begin() { return iterator{start_, start_+extent_}; }
+  iterator end() { return iterator{start_+extent_, start_+extent_}; }
+
+  size_t size() const { return (size_t) size_; }
+  int extent() const { return extent_; }
+  bool empty() const { return size_ == 0; }
+  Item& front() { return *start_; }
+  const Item& front() const { return *start_; }
+  Item& back() { return start_[extent_ - 1]; }
+  const Item& back() const { return start_[extent_ - 1]; }
+
+  // constant time unless sparse (extend_ > size_)
+  Item& operator[](std::size_t i) {
+    if (size_ == extent_ || i == 0)
+      return start_[i];
+    for (Item* ptr = start_ + 1; ; ++ptr)
+      if (ptr->group_key() == start_->group_key())
+        if (--i == 0)
+          return *ptr;
+  }
+  const Item& operator[](std::size_t i) const {
+    return const_cast<ItemGroup*>(this)->operator[](i);
+  }
+
+private:
+  int size_ = 0;
+  int extent_ = 0;
+  Item* start_ = nullptr;
+};
+
+#if defined(__INTEL_COMPILER) || defined(__NVCOMPILER)
+  #pragma diagnostic pop
+#elif defined(__NVCC__)
+  #pragma nv_diagnostic pop
+#endif
+
+} // namespace gemmi
+#endif
diff --git a/gemmi_gph/gemmi/logger.hpp b/gemmi_gph/gemmi/logger.hpp
new file mode 100644
index 00000000..35fafd17
--- /dev/null
+++ b/gemmi_gph/gemmi/logger.hpp
@@ -0,0 +1,71 @@
+// Copyright Global Phasing Ltd.
+//
+// Logger - a tiny utility for passing messages through a callback.
+
+#ifndef GEMMI_LOGGER_HPP_
+#define GEMMI_LOGGER_HPP_
+
+#include <cstdio>      // for fprintf
+#include <functional>  // for function
+#include "fail.hpp"    // for GEMMI_COLD
+#include "util.hpp"    // for cat
+
+namespace gemmi {
+
+/// Passes messages (including warnings/errors) to a callback function.
+/// Messages are passed as strings without a trailing newline.
+/// They have syslog-like severity levels: 8=debug, 6=info, 5=notice, 3=error,
+/// allowing the use of a threshold to filter them.
+/// Quirk: Errors double as both errors and warnings. Unrecoverable errors
+///        don't go through this class; Logger only handles errors that can
+///        be downgraded to warnings. If a callback is set, the error is passed
+///        as a warning message. Otherwise, it's thrown as std::runtime_error.
+struct Logger {
+  /// A function that handles messages.
+  std::function<void(const std::string&)> callback;
+  /// Pass messages of this level and all lower (more severe) levels:
+  /// 8=all, 6=all but debug, 5=notes and warnings, 3=warnings, 0=none
+  int threshold = 6;
+
+  /// suspend() and resume() are used internally to avoid duplicate messages
+  /// when the same function is called (internally) multiple times.
+  void suspend() { threshold -= 100; }
+  void resume()  { threshold += 100; }
+
+  /// Send a message without any prefix on with a numeric threshold N.
+  template<int N, class... Args> void level(Args const&... args) const {
+    if (threshold >= N && callback)
+      callback(cat(args...));
+  }
+
+  /// Send a debug message.
+  template<class... Args> void debug(Args const&... args) const { level<8>("Debug: ", args...); }
+  /// Send a message without any prefix.
+  template<class... Args> void mesg(Args const&... args) const { level<6>(args...); }
+  /// Send a note (a notice, a significant message).
+  template<class... Args> void note(Args const&... args) const { level<5>("Note: ", args...); }
+
+  /// Send a warning/error (see Quirk above).
+  template<class... Args> GEMMI_COLD void err(Args const&... args) const {
+    if (threshold >= 3) {
+      std::string msg = cat(args...);
+      if (callback == nullptr)
+        fail(msg);
+      callback("Warning: " + msg);
+    }
+  }
+
+  // predefined callbacks
+
+  /// to be used as: logger.callback = Logger::to_stderr;
+  static void to_stderr(const std::string& s) {
+    std::fprintf(stderr, "%s\n", s.c_str());
+  }
+  /// to be used as: logger.callback = Logger::to_stdout;
+  static void to_stdout(const std::string& s) {
+    std::fprintf(stdout, "%s\n", s.c_str());
+  }
+};
+
+} // namespace gemmi
+#endif
diff --git a/symmetry/gemmi/math.hpp b/gemmi_gph/gemmi/math.hpp
similarity index 100%
rename from symmetry/gemmi/math.hpp
rename to gemmi_gph/gemmi/math.hpp
diff --git a/gemmi_gph/gemmi/mtz.hpp b/gemmi_gph/gemmi/mtz.hpp
new file mode 100644
index 00000000..c7cf4431
--- /dev/null
+++ b/gemmi_gph/gemmi/mtz.hpp
@@ -0,0 +1,600 @@
+// Copyright 2019 Global Phasing Ltd.
+//
+// MTZ reflection file format.
+
+#ifndef GEMMI_MTZ_HPP_
+#define GEMMI_MTZ_HPP_
+
+#include <cassert>
+#include <cmath>         // for isnan
+#include <cstdint>       // for int32_t
+#include <algorithm>     // for copy
+#include <array>
+#include <initializer_list>
+#include <string>
+#include <vector>
+#include "fail.hpp"      // for fail
+#include "input.hpp"     // for AnyStream, FileStream, CharArray
+#include "iterator.hpp"  // for StrideIter
+#include "logger.hpp"    // for Logger
+#include "math.hpp"      // for rad, Mat33
+#include "symmetry.hpp"  // for find_spacegroup_by_name, SpaceGroup
+#include "unitcell.hpp"  // for UnitCell
+#include "util.hpp"      // for ialpha4_id, rtrim_str, ialpha3_id, ...
+
+namespace gemmi {
+
+// Unmerged MTZ files always store in-asu hkl indices and symmetry operation
+// encoded in the M/ISYM column. Here is a helper for writing such files.
+struct UnmergedHklMover {
+  UnmergedHklMover(const SpaceGroup* spacegroup) : asu_(spacegroup) {
+    if (spacegroup)
+      group_ops_ = spacegroup->operations();
+  }
+
+  // Modifies hkl and returns ISYM value for M/ISYM
+  int move_to_asu(std::array<int, 3>& hkl) {
+    std::pair<Miller, int> hkl_isym = asu_.to_asu(hkl, group_ops_);
+    hkl = hkl_isym.first;
+    return hkl_isym.second;
+  }
+
+private:
+  ReciprocalAsu asu_;
+  GroupOps group_ops_;
+};
+
+struct MtzMetadata {
+  std::string source_path;  // input file path, if known
+  bool same_byte_order = true;
+  bool indices_switched_to_original = false;
+  std::int64_t header_offset = 0;
+  std::string version_stamp;
+  std::string title;
+  int nreflections = 0;
+  std::array<int, 5> sort_order = {};
+  double min_1_d2 = NAN;
+  double max_1_d2 = NAN;
+  float valm = NAN;
+  int nsymop = 0;
+  UnitCell cell;
+  int spacegroup_number = 0;
+  std::string spacegroup_name;
+  std::vector<Op> symops;
+  const SpaceGroup* spacegroup = nullptr;
+  std::vector<std::string> history;
+  std::string appended_text;
+  // used to report non-critical problems when reading a file (also used in mtz2cif)
+  Logger logger;
+};
+
+struct GEMMI_DLL Mtz : public MtzMetadata {
+  struct Dataset {
+    int id;
+    std::string project_name;
+    std::string crystal_name;
+    std::string dataset_name;
+    UnitCell cell;
+    double wavelength;  // 0 means not set
+  };
+
+  struct Column {
+    int dataset_id;
+    char type;
+    std::string label;
+    float min_value = NAN;
+    float max_value = NAN;
+    std::string source;  // from COLSRC
+    Mtz* parent;
+    std::size_t idx;
+
+    Dataset& dataset() { return parent->dataset(dataset_id); }
+    const Dataset& dataset() const { return parent->dataset(dataset_id); }
+    bool has_data() const { return parent->has_data(); }
+    int size() const { return has_data() ? parent->nreflections : 0; }
+    size_t stride() const { return parent->columns.size(); }
+    float& operator[](std::size_t n) { return parent->data[idx + n * stride()]; }
+    float operator[](std::size_t n) const { return parent->data[idx + n * stride()]; }
+    float& at(std::size_t n) { return parent->data.at(idx + n * stride()); }
+    float at(std::size_t n) const { return parent->data.at(idx + n * stride()); }
+    bool is_integer() const {
+      return type == 'H' || type == 'B' || type == 'Y' || type == 'I';
+    }
+
+    const Column* get_next_column_if_type(char next_type) const {
+      if (idx + 1 < parent->columns.size()) {
+        const Column& next_col = parent->columns[idx + 1];
+        if (next_col.dataset_id == dataset_id && next_col.type == next_type)
+          return &next_col;
+      }
+      return nullptr;
+    }
+
+    using iterator = StrideIter<float>;
+    iterator begin() {
+      assert(parent);
+      assert(&parent->columns[idx] == this);
+      return iterator({parent->data.data(), idx, stride()});
+    }
+    iterator end() {
+      return iterator({parent->data.data() + parent->data.size(), idx,
+                       stride()});
+    }
+    using const_iterator = StrideIter<const float>;
+    const_iterator begin() const { return const_cast<Column*>(this)->begin(); }
+    const_iterator end() const { return const_cast<Column*>(this)->end(); }
+  };
+
+  struct Batch {
+    Batch() {
+      ints.resize(29, 0);
+      floats.resize(156, 0.);
+      // write the same values that are written by CCP4 progs such as COMBAT
+      ints[0] = 29 + 156;
+      ints[1] = 29;
+      ints[2] = 156;
+      // COMBAT sets BSCALE=1, but Pointless sets it to 0.
+      //floats[43] = 1.f; // batch scale
+    }
+    int number = 0;
+    std::string title;
+    std::vector<int> ints;
+    std::vector<float> floats;
+    std::vector<std::string> axes;
+
+    UnitCell get_cell() const {
+      return UnitCell(floats[0], floats[1], floats[2],
+                      floats[3], floats[4], floats[5]);
+    }
+    void set_cell(const UnitCell& uc) {
+      floats[0] = (float) uc.a;
+      floats[1] = (float) uc.b;
+      floats[2] = (float) uc.c;
+      floats[3] = (float) uc.alpha;
+      floats[4] = (float) uc.beta;
+      floats[5] = (float) uc.gamma;
+    }
+
+    int dataset_id() const { return ints[20]; }
+    void set_dataset_id(int id) { ints[20] = id; }
+    float wavelength() const { return floats[86]; }
+    void set_wavelength(float lambda) { floats[86] = lambda; }
+    float phi_start() const { return floats[36]; }
+    float phi_end() const { return floats[37]; }
+    Mat33 matrix_U() const {
+      return Mat33(floats[6], floats[9],  floats[12],
+                   floats[7], floats[10], floats[13],
+                   floats[8], floats[11], floats[14]);
+    }
+  };
+
+  std::vector<Dataset> datasets;
+  std::vector<Column> columns;
+  std::vector<Batch> batches;
+  std::vector<float> data;
+
+  explicit Mtz(bool with_base=false) {
+    if (with_base)
+      add_base();
+  }
+  Mtz(Mtz&& o) noexcept { *this = std::move(o); }
+  Mtz& operator=(Mtz&& o) noexcept {
+    MtzMetadata::operator=(std::move(o));
+    datasets = std::move(o.datasets);
+    columns = std::move(o.columns);
+    batches = std::move(o.batches);
+    data = std::move(o.data);
+    for (Mtz::Column& col : columns)
+      col.parent = this;
+    return *this;
+  }
+
+  // explicit to be aware where we make copies
+  explicit Mtz(const Mtz& o) : MtzMetadata(o) {
+    datasets = o.datasets;
+    columns = o.columns;
+    batches = o.batches;
+    data = o.data;
+    for (Mtz::Column& col : columns)
+      col.parent = this;
+  }
+
+  Mtz& operator=(Mtz const&) = delete;
+
+  void add_base() {
+    datasets.push_back({0, "HKL_base", "HKL_base", "HKL_base", cell, 0.});
+    for (int i = 0; i != 3; ++i)
+      add_column(std::string(1, "HKL"[i]), 'H', 0, i, false);
+  }
+
+  // Functions to use after MTZ headers (and data) is read.
+
+  double resolution_high() const { return std::sqrt(1.0 / max_1_d2); }
+  double resolution_low() const  { return std::sqrt(1.0 / min_1_d2); }
+
+  UnitCell& get_cell(int dataset=-1) {
+    for (Dataset& ds : datasets)
+      if (ds.id == dataset && ds.cell.is_crystal() && ds.cell.a > 0)
+        return ds.cell;
+    return cell;
+  }
+
+  const UnitCell& get_cell(int dataset=-1) const {
+    return const_cast<Mtz*>(this)->get_cell(dataset);
+  }
+
+  void set_cell_for_all(const UnitCell& new_cell) {
+    cell = new_cell;
+    cell.set_cell_images_from_spacegroup(spacegroup);  // probably not needed
+    for (Dataset& ds : datasets)
+      ds.cell = cell;
+  }
+
+  UnitCellParameters get_average_cell_from_batch_headers(double* rmsd) const;
+
+  void set_spacegroup(const SpaceGroup* new_sg) {
+    spacegroup = new_sg;
+    spacegroup_number = new_sg ? spacegroup->ccp4 : 0;
+    spacegroup_name = new_sg ? spacegroup->hm : "";
+  }
+
+  Dataset& last_dataset() {
+    if (datasets.empty())
+      fail("MTZ dataset not found (missing DATASET header line?).");
+    return datasets.back();
+  }
+
+  Dataset& dataset(int id) {
+    if ((size_t)id < datasets.size() && datasets[id].id == id)
+      return datasets[id];
+    for (Dataset& d : datasets)
+      if (d.id == id)
+        return d;
+    fail("MTZ file has no dataset with ID " + std::to_string(id));
+  }
+  const Dataset& dataset(int id) const {
+    return const_cast<Mtz*>(this)->dataset(id);
+  }
+
+  Dataset* dataset_with_name(const std::string& name) {
+    for (Dataset& d : datasets)
+      if (d.dataset_name == name)
+        return &d;
+    return nullptr;
+  }
+  const Dataset* dataset_with_name(const std::string& label) const {
+    return const_cast<Mtz*>(this)->dataset_with_name(label);
+  }
+
+  int count(const std::string& label) const {
+    int n = 0;
+    for (const Column& col : columns)
+      if (col.label == label)
+        ++n;
+    return n;
+  }
+
+  int count_type(char type) const {
+    int n = 0;
+    for (const Column& col : columns)
+      if (col.type == type)
+        ++n;
+    return n;
+  }
+
+  Column* column_with_label(const std::string& label, const Dataset* ds=nullptr, char type='*') {
+    for (Column& col : columns)
+      if (col.label == label && (!ds || ds->id == col.dataset_id)
+                             && (type == '*' || type == col.type))
+        return &col;
+    return nullptr;
+  }
+  const Column* column_with_label(const std::string& label, const Dataset* ds=nullptr,
+                                  char type='*') const {
+    return const_cast<Mtz*>(this)->column_with_label(label, ds, type);
+  }
+
+  const Column& get_column_with_label(const std::string& label, const Dataset* ds=nullptr) const {
+    if (const Column* col = column_with_label(label, ds))
+      return *col;
+    fail("Column label not found: " + label);
+  }
+
+  std::vector<const Column*> columns_with_type(char type) const {
+    std::vector<const Column*> cols;
+    for (const Column& col : columns)
+      if (col.type == type)
+        cols.push_back(&col);
+    return cols;
+  }
+
+  std::vector<int> positions_of_columns_with_type(char col_type) const {
+    std::vector<int> cols;
+    for (int i = 0; i < (int) columns.size(); ++i)
+      if (columns[i].type == col_type)
+        cols.push_back(i);
+    return cols;
+  }
+
+  // F(+)/(-) pairs should have type G (and L for sigma),
+  // I(+)/(-) -- K (M for sigma), but E(+)/(-) has no special column type,
+  // so here we use column labels not types.
+  std::vector<std::pair<int,int>> positions_of_plus_minus_columns() const {
+    std::vector<std::pair<int,int>> r;
+    for (int i = 0; i < (int) columns.size(); ++i) {
+      const Column& col = columns[i];
+      size_t sign_pos = col.label.find("(+)");
+      if (sign_pos != std::string::npos) {
+        std::string minus_label = columns[i].label;
+        minus_label[sign_pos+1] = '-';
+        for (int j = 0; j < (int) columns.size(); ++j)
+          if (columns[j].label == minus_label &&
+              columns[j].type == col.type &&
+              columns[j].dataset_id == col.dataset_id) {
+            r.emplace_back(i, j);
+            break;
+          }
+      }
+    }
+    return r;
+  }
+
+  /// the order of labels matters
+  const Column* column_with_one_of_labels(std::initializer_list<const char*> labels,
+                                          char type='*') const {
+    for (const char* label : labels)
+      if (const Column* col = column_with_label(label, nullptr, type))
+        return col;
+    return nullptr;
+  }
+
+  /// the order of labels doesn't matter
+  Column* column_with_type_and_any_of_labels(char type, std::initializer_list<const char*> labels) {
+    for (Column& col : columns)
+      if (col.type == type) {
+        for (const char* label : labels)
+          if (col.label == label)
+            return &col;
+      }
+    return nullptr;
+  }
+
+  Column* rfree_column() {
+    // cf. MtzToCif::default_spec in mtz2cif.hpp
+    return column_with_type_and_any_of_labels('I',
+        {"FREE", "RFREE", "FREER", "FreeR_flag", "R-free-flags", "FreeRflag", "R_FREE_FLAGS"});
+  }
+  const Column* rfree_column() const {
+    return const_cast<Mtz*>(this)->rfree_column();
+  }
+
+  Column* imean_column() {
+    return column_with_type_and_any_of_labels('J', {"IMEAN", "I", "IOBS", "I-obs"});
+  }
+  const Column* imean_column() const {
+    return const_cast<Mtz*>(this)->imean_column();
+  }
+
+  Column* iplus_column() {
+    return column_with_type_and_any_of_labels('K', {"I(+)", "IOBS(+)", "I-obs(+)", "Iplus"});
+  }
+  const Column* iplus_column() const {
+    return const_cast<Mtz*>(this)->iplus_column();
+  }
+
+  Column* iminus_column() {
+    return column_with_type_and_any_of_labels('K', {"I(-)", "IOBS(-)", "I-obs(-)", "Iminus"});
+  }
+  const Column* iminus_column() const {
+    return const_cast<Mtz*>(this)->iminus_column();
+  }
+
+  bool has_data() const {
+    return data.size() == columns.size() * nreflections;
+  }
+
+  bool is_merged() const { return batches.empty(); }
+
+  /// Calculates min/max for all combinations of reflections and unit cells,
+  /// where unit cells are a global CELL and per-dataset DCELL.
+  std::array<double,2> calculate_min_max_1_d2() const;
+
+  void update_reso() {
+    std::array<double,2> reso = calculate_min_max_1_d2();
+    min_1_d2 = reso[0];
+    max_1_d2 = reso[1];
+  }
+
+  // Functions for reading MTZ headers and data.
+
+  void toggle_endianness() {
+    same_byte_order = !same_byte_order;
+    swap_eight_bytes(&header_offset);
+  }
+
+  void read_first_bytes(AnyStream& stream);
+
+  /// read headers until END
+  void read_main_headers(AnyStream& stream, std::vector<std::string>* save_headers);
+
+  /// read the part between END and MTZENDOFHEADERS
+  void read_history_and_batch_headers(AnyStream& stream);
+
+  void setup_spacegroup();
+
+  void read_raw_data(AnyStream& stream, bool do_read=true);
+
+  void read_all_headers(AnyStream& stream);
+
+  void read_stream(AnyStream& stream, bool with_data);
+
+  void read_file(const std::string& path) {
+    try {
+      source_path = path;
+      FileStream stream(path.c_str(), "rb");
+      read_stream(stream, true);
+    } catch (std::system_error&) {
+      throw;  // system_error::what() includes path, don't add anything
+    } catch (std::runtime_error& e) {
+      fail(std::string(e.what()) + ": " + path);
+    }
+  }
+
+  template<typename Input>
+  void read_input(Input&& input, bool with_data) {
+    source_path = input.path();
+    read_stream(*input.create_stream(), with_data);
+  }
+
+  /// the same as read_input(MaybeGzipped(path), with_data)
+  void read_file_gz(const std::string& path, bool with_data=true);
+
+  std::vector<int> sorted_row_indices(int use_first=3) const;
+  bool sort(int use_first=3);
+
+  Miller get_hkl(size_t offset) const {
+    return {{(int)data[offset], (int)data[offset+1], (int)data[offset+2]}};
+  }
+  void set_hkl(size_t offset, const Miller& hkl) {
+    for (int i = 0; i != 3; ++i)
+      data[offset + i] = static_cast<float>(hkl[i]);
+  }
+
+  /// Returns offset of the first hkl or (size_t)-1. Can be slow.
+  size_t find_offset_of_hkl(const Miller& hkl, size_t start=0) const;
+
+  /// (for merged MTZ only) change HKL to ASU equivalent, adjust phases, etc
+  void ensure_asu(bool tnt_asu=false);
+
+  /// Reindex data, usually followed by ensure_asu(). Outputs messages through logger.
+  void reindex(const Op& op);
+
+  /// Change symmetry to P1 and expand reflections. Does not sort.
+  /// Similar to command EXPAND in SFTOOLS.
+  void expand_to_p1();
+
+  /// (for unmerged MTZ only) change HKL according to M/ISYM
+  bool switch_to_original_hkl();
+
+  /// (for unmerged MTZ only) change HKL to ASU equivalent and set ISYM
+  bool switch_to_asu_hkl();
+
+  Dataset& add_dataset(const std::string& name) {
+    int id = 0;
+    for (const Dataset& d : datasets)
+      if (d.id >= id)
+        id = d.id + 1;
+    datasets.push_back({id, name, name, name, cell, 0.0});
+    return datasets.back();
+  }
+
+  Column& add_column(const std::string& label, char type,
+                     int dataset_id, int pos, bool expand_data);
+
+  // extra_col are columns right after src_col that are also copied.
+  Column& replace_column(size_t dest_idx, const Column& src_col,
+                         const std::vector<std::string>& trailing_cols={});
+
+  // If dest_idx < 0 - columns are appended at the end
+  // append new column(s), otherwise overwrite existing ones.
+  Column& copy_column(int dest_idx, const Column& src_col,
+                      const std::vector<std::string>& trailing_cols={});
+
+  void remove_column(size_t idx);
+
+  template <typename Func>
+  void remove_rows_if(Func condition) {
+    if (!has_data())
+      fail("No data.");
+    auto out = data.begin();
+    size_t width = columns.size();
+    for (auto r = data.begin(); r < data.end(); r += width)
+      if (!condition(&*r)) {
+        if (r != out)
+          std::copy(r, r + width, out);
+        out += width;
+      }
+    data.erase(out, data.end());
+    nreflections = int(data.size() / width);
+  }
+
+  void expand_data_rows(size_t added, int pos_=-1) {
+    size_t old_row_size = columns.size() - added;
+    if (data.size() != old_row_size * nreflections)
+      fail("Internal error");
+    size_t pos = pos_ == -1 ? old_row_size : (size_t) pos_;
+    if (pos > old_row_size)
+      fail("expand_data_rows(): pos out of range");
+    vector_insert_columns(data, old_row_size, (size_t)nreflections, added, pos, NAN);
+  }
+
+  void set_data(const float* new_data, size_t n) {
+    size_t ncols = columns.size();
+    if (n % ncols != 0)
+      fail("Mtz.set_data(): expected " + std::to_string(ncols) + " columns.");
+    nreflections = int(n / ncols);
+    data.assign(new_data, new_data + n);
+  }
+
+  // Function for writing MTZ file
+  void write_to_cstream(std::FILE* stream) const;
+  void write_to_string(std::string& str) const;
+  void write_to_file(const std::string& path) const;
+  size_t size_to_write() const;
+  size_t write_to_buffer(char* buf, size_t maxlen) const;
+
+private:
+  template<typename Write> void write_to_stream(Write write) const;
+};
+
+
+inline Mtz read_mtz_file(const std::string& path) {
+  Mtz mtz;
+  mtz.read_file(path);
+  return mtz;
+}
+
+template<typename Input>
+Mtz read_mtz(Input&& input, bool with_data) {
+  Mtz mtz;
+  mtz.read_input(std::forward<Input>(input), with_data);
+  return mtz;
+}
+
+// Abstraction of data source, cf. ReflnDataProxy.
+struct MtzDataProxy {
+  const Mtz& mtz_;
+  size_t stride() const { return mtz_.columns.size(); }
+  size_t size() const { return mtz_.data.size(); }
+  using num_type = float;
+  float get_num(size_t n) const { return mtz_.data[n]; }
+  const UnitCell& unit_cell() const { return mtz_.cell; }
+  const SpaceGroup* spacegroup() const { return mtz_.spacegroup; }
+  Miller get_hkl(size_t offset) const { return mtz_.get_hkl(offset); }
+
+  size_t column_index(const std::string& label) const {
+    if (const Mtz::Column* col = mtz_.column_with_label(label))
+      return col->idx;
+    fail("MTZ file has no column with label: " + label);
+  }
+};
+
+// Like above, but here the data is stored outside of the Mtz class
+struct MtzExternalDataProxy : MtzDataProxy {
+  const float* data_;
+  MtzExternalDataProxy(const Mtz& mtz, const float* data)
+    : MtzDataProxy{mtz}, data_(data) {}
+  size_t size() const { return mtz_.columns.size() * mtz_.nreflections; }
+  float get_num(size_t n) const { return data_[n]; }
+  Miller get_hkl(size_t offset) const {
+    return {{(int)data_[offset + 0],
+             (int)data_[offset + 1],
+             (int)data_[offset + 2]}};
+  }
+};
+
+inline MtzDataProxy data_proxy(const Mtz& mtz) { return {mtz}; }
+
+} // namespace gemmi
+
+#endif
diff --git a/gemmi_gph/gemmi/sprintf.hpp b/gemmi_gph/gemmi/sprintf.hpp
new file mode 100644
index 00000000..ead2ce0f
--- /dev/null
+++ b/gemmi_gph/gemmi/sprintf.hpp
@@ -0,0 +1,80 @@
+// Copyright 2017 Global Phasing Ltd.
+//
+// interface to stb_sprintf: snprintf_z, to_str(float|double)
+
+#ifndef GEMMI_SPRINTF_HPP_
+#define GEMMI_SPRINTF_HPP_
+
+#include <string>
+#ifdef __has_include
+# if __has_include(<charconv>) && !(defined(_MSVC_LANG) && _MSVC_LANG < 201703L)
+#  include <charconv>
+# endif
+#endif
+
+#if __cpp_lib_to_chars < 201611L
+# include <algorithm> // for min
+#endif
+
+#include "fail.hpp"  // for GEMMI_DLL
+
+namespace gemmi {
+
+// On MinGW format(printf) doesn't support %zu.
+#if (defined(__GNUC__) && !defined(__MINGW32__)) || defined(__clang__)
+# define GEMMI_ATTRIBUTE_FORMAT(fmt,va) __attribute__((format(printf,fmt,va)))
+#else
+# define GEMMI_ATTRIBUTE_FORMAT(fmt,va)
+#endif
+/// stb_snprintf in gemmi namespace - like snprintf, but ignores locale
+/// and is always zero-terminated (hence _z).
+GEMMI_DLL int snprintf_z(char *buf, int count, char const *fmt, ...)
+                                                         GEMMI_ATTRIBUTE_FORMAT(3,4);
+/// stb_sprintf in gemmi namespace
+GEMMI_DLL int sprintf_z(char *buf, char const *fmt, ...) GEMMI_ATTRIBUTE_FORMAT(2,3);
+
+inline std::string to_str(double d) {
+  char buf[24];
+  int len = sprintf_z(buf, "%.9g", d);
+  return std::string(buf, len > 0 ? len : 0);
+}
+
+inline std::string to_str(float d) {
+  char buf[16];
+  int len = sprintf_z(buf, "%.6g", d);
+  return std::string(buf, len > 0 ? len : 0);
+}
+
+template<int Prec>
+std::string to_str_prec(double d) {
+  static_assert(Prec >= 0 && Prec < 7, "unsupported precision");
+  char buf[16];
+  int len = d > -1e8 && d < 1e8 ? sprintf_z(buf, "%.*f", Prec, d)
+                                : sprintf_z(buf, "%g", d);
+  return std::string(buf, len > 0 ? len : 0);
+}
+
+/// zero-terminated to_chars()
+inline char* to_chars_z(char* first, char* last, int value) {
+#if __cpp_lib_to_chars >= 201611L
+  auto result = std::to_chars(first, last-1, value);
+  *result.ptr = '\0';
+  return result.ptr;
+#else
+  int n = snprintf_z(first, int(last - first), "%d", value);
+  return std::min(first + n, last - 1);
+#endif
+}
+inline char* to_chars_z(char* first, char* last, size_t value) {
+#if __cpp_lib_to_chars >= 201611L
+  auto result = std::to_chars(first, last-1, value);
+  *result.ptr = '\0';
+  return result.ptr;
+#else
+  int n = snprintf_z(first, int(last - first), "%zu", value);
+  return std::min(first + n, last - 1);
+#endif
+}
+
+} // namespace gemmi
+#endif
diff --git a/symmetry/gemmi/symmetry.hpp b/gemmi_gph/gemmi/symmetry.hpp
similarity index 100%
rename from symmetry/gemmi/symmetry.hpp
rename to gemmi_gph/gemmi/symmetry.hpp
diff --git a/gemmi_gph/gemmi/third_party/fast_float.h b/gemmi_gph/gemmi/third_party/fast_float.h
new file mode 100644
index 00000000..10afb792
--- /dev/null
+++ b/gemmi_gph/gemmi/third_party/fast_float.h
@@ -0,0 +1,4933 @@
+// fast_float by Daniel Lemire
+// fast_float by João Paulo Magalhaes
+//
+//
+// with contributions from Eugene Golushkov
+// with contributions from Maksim Kita
+// with contributions from Marcin Wojdyr
+// with contributions from Neal Richardson
+// with contributions from Tim Paine
+// with contributions from Fabio Pellacini
+// with contributions from Lénárd Szolnoki
+// with contributions from Jan Pharago
+// with contributions from Maya Warrier
+// with contributions from Taha Khokhar
+// with contributions from Anders Dalvander
+//
+//
+// Licensed under the Apache License, Version 2.0, or the
+// MIT License or the Boost License. This file may not be copied,
+// modified, or distributed except according to those terms.
+//
+// MIT License Notice
+//
+//    MIT License
+//
+//    Copyright (c) 2021 The fast_float authors
+//
+//    Permission is hereby granted, free of charge, to any
+//    person obtaining a copy of this software and associated
+//    documentation files (the "Software"), to deal in the
+//    Software without restriction, including without
+//    limitation the rights to use, copy, modify, merge,
+//    publish, distribute, sublicense, and/or sell copies of
+//    the Software, and to permit persons to whom the Software
+//    is furnished to do so, subject to the following
+//    conditions:
+//
+//    The above copyright notice and this permission notice
+//    shall be included in all copies or substantial portions
+//    of the Software.
+//
+//    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
+//    ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+//    TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+//    PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+//    SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+//    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+//    OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+//    IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+//    DEALINGS IN THE SOFTWARE.
+//
+// Apache License (Version 2.0) Notice
+//
+//    Copyright 2021 The fast_float authors
+//    Licensed under the Apache License, Version 2.0 (the "License");
+//    you may not use this file except in compliance with the License.
+//    You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+//    Unless required by applicable law or agreed to in writing, software
+//    distributed under the License is distributed on an "AS IS" BASIS,
+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//    See the License for the specific language governing permissions and
+//
+// BOOST License Notice
+//
+//    Boost Software License - Version 1.0 - August 17th, 2003
+//
+//    Permission is hereby granted, free of charge, to any person or organization
+//    obtaining a copy of the software and accompanying documentation covered by
+//    this license (the "Software") to use, reproduce, display, distribute,
+//    execute, and transmit the Software, and to prepare derivative works of the
+//    Software, and to permit third-parties to whom the Software is furnished to
+//    do so, all subject to the following:
+//
+//    The copyright notices in the Software and this entire statement, including
+//    the above license grant, this restriction and the following disclaimer,
+//    must be included in all copies of the Software, in whole or in part, and
+//    all derivative works of the Software, unless such copies or derivative
+//    works are solely in the form of machine-executable object code generated by
+//    a source language processor.
+//
+//    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+//    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+//    FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+//    SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+//    FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+//    ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+//    DEALINGS IN THE SOFTWARE.
+//
+
+#ifndef FASTFLOAT_CONSTEXPR_FEATURE_DETECT_H
+#define FASTFLOAT_CONSTEXPR_FEATURE_DETECT_H
+
+#ifdef __has_include
+#if __has_include(<version>)
+#include <version>
+#endif
+#endif
+
+// Testing for https://wg21.link/N3652, adopted in C++14
+#if defined(__cpp_constexpr) && __cpp_constexpr >= 201304
+#define FASTFLOAT_CONSTEXPR14 constexpr
+#else
+#define FASTFLOAT_CONSTEXPR14
+#endif
+
+#if defined(__cpp_lib_bit_cast) && __cpp_lib_bit_cast >= 201806L
+#define FASTFLOAT_HAS_BIT_CAST 1
+#else
+#define FASTFLOAT_HAS_BIT_CAST 0
+#endif
+
+#if defined(__cpp_lib_is_constant_evaluated) &&                                \
+    __cpp_lib_is_constant_evaluated >= 201811L
+#define FASTFLOAT_HAS_IS_CONSTANT_EVALUATED 1
+#else
+#define FASTFLOAT_HAS_IS_CONSTANT_EVALUATED 0
+#endif
+
+#if defined(__cpp_if_constexpr) && __cpp_if_constexpr >= 201606L
+#define FASTFLOAT_IF_CONSTEXPR17(x) if constexpr (x)
+#else
+#define FASTFLOAT_IF_CONSTEXPR17(x) if (x)
+#endif
+
+// Testing for relevant C++20 constexpr library features
+#if FASTFLOAT_HAS_IS_CONSTANT_EVALUATED && FASTFLOAT_HAS_BIT_CAST &&           \
+    defined(__cpp_lib_constexpr_algorithms) &&                                 \
+    __cpp_lib_constexpr_algorithms >= 201806L /*For std::copy and std::fill*/
+#define FASTFLOAT_CONSTEXPR20 constexpr
+#define FASTFLOAT_IS_CONSTEXPR 1
+#else
+#define FASTFLOAT_CONSTEXPR20
+#define FASTFLOAT_IS_CONSTEXPR 0
+#endif
+
+#if __cplusplus >= 201703L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L)
+#define FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE 0
+#else
+#define FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE 1
+#endif
+
+#endif // FASTFLOAT_CONSTEXPR_FEATURE_DETECT_H
+
+#ifndef FASTFLOAT_FLOAT_COMMON_H
+#define FASTFLOAT_FLOAT_COMMON_H
+
+#include <cfloat>
+#include <cstddef>
+#include <cstdint>
+#include <cassert>
+#include <cstring>
+#include <limits>
+#include <type_traits>
+#include <system_error>
+#ifdef __has_include
+#if __has_include(<stdfloat>) && (__cplusplus > 202002L || (defined(_MSVC_LANG) && (_MSVC_LANG > 202002L)))
+#include <stdfloat>
+#endif
+#endif
+
+#define FASTFLOAT_VERSION_MAJOR 8
+#define FASTFLOAT_VERSION_MINOR 2
+#define FASTFLOAT_VERSION_PATCH 3
+
+#define FASTFLOAT_STRINGIZE_IMPL(x) #x
+#define FASTFLOAT_STRINGIZE(x) FASTFLOAT_STRINGIZE_IMPL(x)
+
+#define FASTFLOAT_VERSION_STR                                                  \
+  FASTFLOAT_STRINGIZE(FASTFLOAT_VERSION_MAJOR)                                 \
+  "." FASTFLOAT_STRINGIZE(FASTFLOAT_VERSION_MINOR) "." FASTFLOAT_STRINGIZE(    \
+      FASTFLOAT_VERSION_PATCH)
+
+#define FASTFLOAT_VERSION                                                      \
+  (FASTFLOAT_VERSION_MAJOR * 10000 + FASTFLOAT_VERSION_MINOR * 100 +           \
+   FASTFLOAT_VERSION_PATCH)
+
+namespace fast_float {
+
+enum class chars_format : uint64_t;
+
+namespace detail {
+constexpr chars_format basic_json_fmt = chars_format(1 << 5);
+constexpr chars_format basic_fortran_fmt = chars_format(1 << 6);
+} // namespace detail
+
+enum class chars_format : uint64_t {
+  scientific = 1 << 0,
+  fixed = 1 << 2,
+  hex = 1 << 3,
+  no_infnan = 1 << 4,
+  // RFC 8259: https://datatracker.ietf.org/doc/html/rfc8259#section-6
+  json = uint64_t(detail::basic_json_fmt) | fixed | scientific | no_infnan,
+  // Extension of RFC 8259 where, e.g., "inf" and "nan" are allowed.
+  json_or_infnan = uint64_t(detail::basic_json_fmt) | fixed | scientific,
+  fortran = uint64_t(detail::basic_fortran_fmt) | fixed | scientific,
+  general = fixed | scientific,
+  allow_leading_plus = 1 << 7,
+  skip_white_space = 1 << 8,
+};
+
+template <typename UC> struct from_chars_result_t {
+  UC const *ptr;
+  std::errc ec;
+
+  // https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2023/p2497r0.html
+  constexpr explicit operator bool() const noexcept {
+    return ec == std::errc();
+  }
+};
+
+using from_chars_result = from_chars_result_t<char>;
+
+template <typename UC> struct parse_options_t {
+  constexpr explicit parse_options_t(chars_format fmt = chars_format::general,
+                                     UC dot = UC('.'), int b = 10)
+      : format(fmt), decimal_point(dot), base(b) {}
+
+  /** Which number formats are accepted */
+  chars_format format;
+  /** The character used as decimal point */
+  UC decimal_point;
+  /** The base used for integers */
+  int base;
+};
+
+using parse_options = parse_options_t<char>;
+
+} // namespace fast_float
+
+#if FASTFLOAT_HAS_BIT_CAST
+#include <bit>
+#endif
+
+#if (defined(__x86_64) || defined(__x86_64__) || defined(_M_X64) ||            \
+     defined(__amd64) || defined(__aarch64__) || defined(_M_ARM64) ||          \
+     defined(__MINGW64__) || defined(__s390x__) ||                             \
+     (defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) ||      \
+      defined(__PPC64LE__)) ||                                                 \
+     defined(__loongarch64) || (defined(__riscv) && __riscv_xlen == 64))
+#define FASTFLOAT_64BIT 1
+#elif (defined(__i386) || defined(__i386__) || defined(_M_IX86) ||             \
+       defined(__arm__) || defined(_M_ARM) || defined(__ppc__) ||              \
+       defined(__MINGW32__) || defined(__EMSCRIPTEN__) ||                      \
+       (defined(__riscv) && __riscv_xlen == 32))
+#define FASTFLOAT_32BIT 1
+#else
+  // Need to check incrementally, since SIZE_MAX is a size_t, avoid overflow.
+// We can never tell the register width, but the SIZE_MAX is a good
+// approximation. UINTPTR_MAX and INTPTR_MAX are optional, so avoid them for max
+// portability.
+#if SIZE_MAX == 0xffff
+#error Unknown platform (16-bit, unsupported)
+#elif SIZE_MAX == 0xffffffff
+#define FASTFLOAT_32BIT 1
+#elif SIZE_MAX == 0xffffffffffffffff
+#define FASTFLOAT_64BIT 1
+#else
+#error Unknown platform (not 32-bit, not 64-bit?)
+#endif
+#endif
+
+#if ((defined(_WIN32) || defined(_WIN64)) && !defined(__clang__)) ||           \
+    (defined(_M_ARM64) && !defined(__MINGW32__))
+#include <intrin.h>
+#endif
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#define FASTFLOAT_VISUAL_STUDIO 1
+#endif
+
+#if defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__
+#define FASTFLOAT_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+#elif defined _WIN32
+#define FASTFLOAT_IS_BIG_ENDIAN 0
+#else
+#if defined(__APPLE__) || defined(__FreeBSD__)
+#include <machine/endian.h>
+#elif defined(sun) || defined(__sun)
+#include <sys/byteorder.h>
+#elif defined(__MVS__)
+#include <sys/endian.h>
+#else
+#ifdef __has_include
+#if __has_include(<endian.h>)
+#include <endian.h>
+#endif //__has_include(<endian.h>)
+#endif //__has_include
+#endif
+#
+#ifndef __BYTE_ORDER__
+// safe choice
+#define FASTFLOAT_IS_BIG_ENDIAN 0
+#endif
+#
+#ifndef __ORDER_LITTLE_ENDIAN__
+// safe choice
+#define FASTFLOAT_IS_BIG_ENDIAN 0
+#endif
+#
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define FASTFLOAT_IS_BIG_ENDIAN 0
+#else
+#define FASTFLOAT_IS_BIG_ENDIAN 1
+#endif
+#endif
+
+#if defined(__SSE2__) || (defined(FASTFLOAT_VISUAL_STUDIO) &&                  \
+                          (defined(_M_AMD64) || defined(_M_X64) ||             \
+                           (defined(_M_IX86_FP) && _M_IX86_FP == 2)))
+#define FASTFLOAT_SSE2 1
+#endif
+
+#if defined(__aarch64__) || defined(_M_ARM64)
+#define FASTFLOAT_NEON 1
+#endif
+
+#if defined(FASTFLOAT_SSE2) || defined(FASTFLOAT_NEON)
+#define FASTFLOAT_HAS_SIMD 1
+#endif
+
+#if defined(__GNUC__)
+// disable -Wcast-align=strict (GCC only)
+#define FASTFLOAT_SIMD_DISABLE_WARNINGS                                        \
+  _Pragma("GCC diagnostic push")                                               \
+      _Pragma("GCC diagnostic ignored \"-Wcast-align\"")
+#else
+#define FASTFLOAT_SIMD_DISABLE_WARNINGS
+#endif
+
+#if defined(__GNUC__)
+#define FASTFLOAT_SIMD_RESTORE_WARNINGS _Pragma("GCC diagnostic pop")
+#else
+#define FASTFLOAT_SIMD_RESTORE_WARNINGS
+#endif
+
+#ifdef FASTFLOAT_VISUAL_STUDIO
+#define fastfloat_really_inline __forceinline
+#else
+#define fastfloat_really_inline inline __attribute__((always_inline))
+#endif
+
+#ifndef FASTFLOAT_ASSERT
+#define FASTFLOAT_ASSERT(x)                                                    \
+  { ((void)(x)); }
+#endif
+
+#ifndef FASTFLOAT_DEBUG_ASSERT
+#define FASTFLOAT_DEBUG_ASSERT(x)                                              \
+  { ((void)(x)); }
+#endif
+
+// rust style `try!()` macro, or `?` operator
+#define FASTFLOAT_TRY(x)                                                       \
+  {                                                                            \
+    if (!(x))                                                                  \
+      return false;                                                            \
+  }
+
+#define FASTFLOAT_ENABLE_IF(...)                                               \
+  typename std::enable_if<(__VA_ARGS__), int>::type
+
+namespace fast_float {
+
+fastfloat_really_inline constexpr bool cpp20_and_in_constexpr() {
+#if FASTFLOAT_HAS_IS_CONSTANT_EVALUATED
+  return std::is_constant_evaluated();
+#else
+  return false;
+#endif
+}
+
+template <typename T>
+struct is_supported_float_type
+    : std::integral_constant<
+          bool, std::is_same<T, double>::value || std::is_same<T, float>::value
+#ifdef __STDCPP_FLOAT64_T__
+                    || std::is_same<T, std::float64_t>::value
+#endif
+#ifdef __STDCPP_FLOAT32_T__
+                    || std::is_same<T, std::float32_t>::value
+#endif
+#ifdef __STDCPP_FLOAT16_T__
+                    || std::is_same<T, std::float16_t>::value
+#endif
+#ifdef __STDCPP_BFLOAT16_T__
+                    || std::is_same<T, std::bfloat16_t>::value
+#endif
+          > {
+};
+
+template <typename T>
+using equiv_uint_t = typename std::conditional<
+    sizeof(T) == 1, uint8_t,
+    typename std::conditional<
+        sizeof(T) == 2, uint16_t,
+        typename std::conditional<sizeof(T) == 4, uint32_t,
+                                  uint64_t>::type>::type>::type;
+
+template <typename T> struct is_supported_integer_type : std::is_integral<T> {};
+
+template <typename UC>
+struct is_supported_char_type
+    : std::integral_constant<bool, std::is_same<UC, char>::value ||
+                                       std::is_same<UC, wchar_t>::value ||
+                                       std::is_same<UC, char16_t>::value ||
+                                       std::is_same<UC, char32_t>::value
+#ifdef __cpp_char8_t
+                                       || std::is_same<UC, char8_t>::value
+#endif
+                             > {
+};
+
+template <typename UC>
+inline FASTFLOAT_CONSTEXPR14 bool
+fastfloat_strncasecmp3(UC const *actual_mixedcase,
+                       UC const *expected_lowercase) {
+  uint64_t mask{0};
+  FASTFLOAT_IF_CONSTEXPR17(sizeof(UC) == 1) { mask = 0x2020202020202020; }
+  else FASTFLOAT_IF_CONSTEXPR17(sizeof(UC) == 2) {
+    mask = 0x0020002000200020;
+  }
+  else FASTFLOAT_IF_CONSTEXPR17(sizeof(UC) == 4) {
+    mask = 0x0000002000000020;
+  }
+  else {
+    return false;
+  }
+
+  uint64_t val1{0}, val2{0};
+  if (cpp20_and_in_constexpr()) {
+    for (size_t i = 0; i < 3; i++) {
+      if ((actual_mixedcase[i] | 32) != expected_lowercase[i]) {
+        return false;
+      }
+    }
+    return true;
+  } else {
+    FASTFLOAT_IF_CONSTEXPR17(sizeof(UC) == 1 || sizeof(UC) == 2) {
+      ::memcpy(&val1, actual_mixedcase, 3 * sizeof(UC));
+      ::memcpy(&val2, expected_lowercase, 3 * sizeof(UC));
+      val1 |= mask;
+      val2 |= mask;
+      return val1 == val2;
+    }
+    else FASTFLOAT_IF_CONSTEXPR17(sizeof(UC) == 4) {
+      ::memcpy(&val1, actual_mixedcase, 2 * sizeof(UC));
+      ::memcpy(&val2, expected_lowercase, 2 * sizeof(UC));
+      val1 |= mask;
+      if (val1 != val2) {
+        return false;
+      }
+      return (actual_mixedcase[2] | 32) == (expected_lowercase[2]);
+    }
+    else {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+template <typename UC>
+inline FASTFLOAT_CONSTEXPR14 bool
+fastfloat_strncasecmp5(UC const *actual_mixedcase,
+                       UC const *expected_lowercase) {
+  uint64_t mask{0};
+  uint64_t val1{0}, val2{0};
+  if (cpp20_and_in_constexpr()) {
+    for (size_t i = 0; i < 5; i++) {
+      if ((actual_mixedcase[i] | 32) != expected_lowercase[i]) {
+        return false;
+      }
+    }
+    return true;
+  } else {
+    FASTFLOAT_IF_CONSTEXPR17(sizeof(UC) == 1) {
+      mask = 0x2020202020202020;
+      ::memcpy(&val1, actual_mixedcase, 5 * sizeof(UC));
+      ::memcpy(&val2, expected_lowercase, 5 * sizeof(UC));
+      val1 |= mask;
+      val2 |= mask;
+      return val1 == val2;
+    }
+    else FASTFLOAT_IF_CONSTEXPR17(sizeof(UC) == 2) {
+      mask = 0x0020002000200020;
+      ::memcpy(&val1, actual_mixedcase, 4 * sizeof(UC));
+      ::memcpy(&val2, expected_lowercase, 4 * sizeof(UC));
+      val1 |= mask;
+      if (val1 != val2) {
+        return false;
+      }
+      return (actual_mixedcase[4] | 32) == (expected_lowercase[4]);
+    }
+    else FASTFLOAT_IF_CONSTEXPR17(sizeof(UC) == 4) {
+      mask = 0x0000002000000020;
+      ::memcpy(&val1, actual_mixedcase, 2 * sizeof(UC));
+      ::memcpy(&val2, expected_lowercase, 2 * sizeof(UC));
+      val1 |= mask;
+      if (val1 != val2) {
+        return false;
+      }
+      ::memcpy(&val1, actual_mixedcase + 2, 2 * sizeof(UC));
+      ::memcpy(&val2, expected_lowercase + 2, 2 * sizeof(UC));
+      val1 |= mask;
+      if (val1 != val2) {
+        return false;
+      }
+      return (actual_mixedcase[4] | 32) == (expected_lowercase[4]);
+    }
+    else {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+// Compares two ASCII strings in a case insensitive manner.
+template <typename UC>
+inline FASTFLOAT_CONSTEXPR14 bool
+fastfloat_strncasecmp(UC const *actual_mixedcase, UC const *expected_lowercase,
+                      size_t length) {
+  uint64_t mask{0};
+  FASTFLOAT_IF_CONSTEXPR17(sizeof(UC) == 1) { mask = 0x2020202020202020; }
+  else FASTFLOAT_IF_CONSTEXPR17(sizeof(UC) == 2) {
+    mask = 0x0020002000200020;
+  }
+  else FASTFLOAT_IF_CONSTEXPR17(sizeof(UC) == 4) {
+    mask = 0x0000002000000020;
+  }
+  else {
+    return false;
+  }
+
+  if (cpp20_and_in_constexpr()) {
+    for (size_t i = 0; i < length; i++) {
+      if ((actual_mixedcase[i] | 32) != expected_lowercase[i]) {
+        return false;
+      }
+    }
+    return true;
+  } else {
+    uint64_t val1{0}, val2{0};
+    size_t sz{8 / (sizeof(UC))};
+    for (size_t i = 0; i < length; i += sz) {
+      val1 = val2 = 0;
+      sz = std::min(sz, length - i);
+      ::memcpy(&val1, actual_mixedcase + i, sz * sizeof(UC));
+      ::memcpy(&val2, expected_lowercase + i, sz * sizeof(UC));
+      val1 |= mask;
+      val2 |= mask;
+      if (val1 != val2) {
+        return false;
+      }
+    }
+    return true;
+  }
+}
+
+#ifndef FLT_EVAL_METHOD
+#error "FLT_EVAL_METHOD should be defined, please include cfloat."
+#endif
+
+// a pointer and a length to a contiguous block of memory
+template <typename T> struct span {
+  T const *ptr;
+  size_t length;
+
+  constexpr span(T const *_ptr, size_t _length) : ptr(_ptr), length(_length) {}
+
+  constexpr span() : ptr(nullptr), length(0) {}
+
+  constexpr size_t len() const noexcept { return length; }
+
+  FASTFLOAT_CONSTEXPR14 const T &operator[](size_t index) const noexcept {
+    FASTFLOAT_DEBUG_ASSERT(index < length);
+    return ptr[index];
+  }
+};
+
+struct value128 {
+  uint64_t low;
+  uint64_t high;
+
+  constexpr value128(uint64_t _low, uint64_t _high) : low(_low), high(_high) {}
+
+  constexpr value128() : low(0), high(0) {}
+};
+
+/* Helper C++14 constexpr generic implementation of leading_zeroes */
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 int
+leading_zeroes_generic(uint64_t input_num, int last_bit = 0) {
+  if (input_num & uint64_t(0xffffffff00000000)) {
+    input_num >>= 32;
+    last_bit |= 32;
+  }
+  if (input_num & uint64_t(0xffff0000)) {
+    input_num >>= 16;
+    last_bit |= 16;
+  }
+  if (input_num & uint64_t(0xff00)) {
+    input_num >>= 8;
+    last_bit |= 8;
+  }
+  if (input_num & uint64_t(0xf0)) {
+    input_num >>= 4;
+    last_bit |= 4;
+  }
+  if (input_num & uint64_t(0xc)) {
+    input_num >>= 2;
+    last_bit |= 2;
+  }
+  if (input_num & uint64_t(0x2)) { /* input_num >>=  1; */
+    last_bit |= 1;
+  }
+  return 63 - last_bit;
+}
+
+/* result might be undefined when input_num is zero */
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 int
+leading_zeroes(uint64_t input_num) {
+  assert(input_num > 0);
+  if (cpp20_and_in_constexpr()) {
+    return leading_zeroes_generic(input_num);
+  }
+#ifdef FASTFLOAT_VISUAL_STUDIO
+#if defined(_M_X64) || defined(_M_ARM64)
+  unsigned long leading_zero = 0;
+  // Search the mask data from most significant bit (MSB)
+  // to least significant bit (LSB) for a set bit (1).
+  _BitScanReverse64(&leading_zero, input_num);
+  return (int)(63 - leading_zero);
+#else
+  return leading_zeroes_generic(input_num);
+#endif
+#else
+  return __builtin_clzll(input_num);
+#endif
+}
+
+/* Helper C++14 constexpr generic implementation of countr_zero for 32-bit */
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 int
+countr_zero_generic_32(uint32_t input_num) {
+  if (input_num == 0) {
+    return 32;
+  }
+  int last_bit = 0;
+  if (!(input_num & 0x0000FFFF)) {
+    input_num >>= 16;
+    last_bit |= 16;
+  }
+  if (!(input_num & 0x00FF)) {
+    input_num >>= 8;
+    last_bit |= 8;
+  }
+  if (!(input_num & 0x0F)) {
+    input_num >>= 4;
+    last_bit |= 4;
+  }
+  if (!(input_num & 0x3)) {
+    input_num >>= 2;
+    last_bit |= 2;
+  }
+  if (!(input_num & 0x1)) {
+    last_bit |= 1;
+  }
+  return last_bit;
+}
+
+/* count trailing zeroes for 32-bit integers */
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 int
+countr_zero_32(uint32_t input_num) {
+  if (cpp20_and_in_constexpr()) {
+    return countr_zero_generic_32(input_num);
+  }
+#ifdef FASTFLOAT_VISUAL_STUDIO
+  unsigned long trailing_zero = 0;
+  if (_BitScanForward(&trailing_zero, input_num)) {
+    return (int)trailing_zero;
+  }
+  return 32;
+#else
+  return input_num == 0 ? 32 : __builtin_ctz(input_num);
+#endif
+}
+
+// slow emulation routine for 32-bit
+fastfloat_really_inline constexpr uint64_t emulu(uint32_t x, uint32_t y) {
+  return x * (uint64_t)y;
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint64_t
+umul128_generic(uint64_t ab, uint64_t cd, uint64_t *hi) {
+  uint64_t ad = emulu((uint32_t)(ab >> 32), (uint32_t)cd);
+  uint64_t bd = emulu((uint32_t)ab, (uint32_t)cd);
+  uint64_t adbc = ad + emulu((uint32_t)ab, (uint32_t)(cd >> 32));
+  uint64_t adbc_carry = (uint64_t)(adbc < ad);
+  uint64_t lo = bd + (adbc << 32);
+  *hi = emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) +
+        (adbc_carry << 32) + (uint64_t)(lo < bd);
+  return lo;
+}
+
+#ifdef FASTFLOAT_32BIT
+
+// slow emulation routine for 32-bit
+#if !defined(__MINGW64__)
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint64_t _umul128(uint64_t ab,
+                                                                uint64_t cd,
+                                                                uint64_t *hi) {
+  return umul128_generic(ab, cd, hi);
+}
+#endif // !__MINGW64__
+
+#endif // FASTFLOAT_32BIT
+
+// compute 64-bit a*b
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 value128
+full_multiplication(uint64_t a, uint64_t b) {
+  if (cpp20_and_in_constexpr()) {
+    value128 answer;
+    answer.low = umul128_generic(a, b, &answer.high);
+    return answer;
+  }
+  value128 answer;
+#if defined(_M_ARM64) && !defined(__MINGW32__)
+  // ARM64 has native support for 64-bit multiplications, no need to emulate
+  // But MinGW on ARM64 doesn't have native support for 64-bit multiplications
+  answer.high = __umulh(a, b);
+  answer.low = a * b;
+#elif defined(FASTFLOAT_32BIT) || (defined(_WIN64) && !defined(__clang__) &&   \
+                                   !defined(_M_ARM64) && !defined(__GNUC__))
+  answer.low = _umul128(a, b, &answer.high); // _umul128 not available on ARM64
+#elif defined(FASTFLOAT_64BIT) && defined(__SIZEOF_INT128__)
+  __uint128_t r = ((__uint128_t)a) * b;
+  answer.low = uint64_t(r);
+  answer.high = uint64_t(r >> 64);
+#else
+  answer.low = umul128_generic(a, b, &answer.high);
+#endif
+  return answer;
+}
+
+struct adjusted_mantissa {
+  uint64_t mantissa{0};
+  int32_t power2{0}; // a negative value indicates an invalid result
+  adjusted_mantissa() = default;
+
+  constexpr bool operator==(adjusted_mantissa const &o) const {
+    return mantissa == o.mantissa && power2 == o.power2;
+  }
+
+  constexpr bool operator!=(adjusted_mantissa const &o) const {
+    return mantissa != o.mantissa || power2 != o.power2;
+  }
+};
+
+// Bias so we can get the real exponent with an invalid adjusted_mantissa.
+constexpr static int32_t invalid_am_bias = -0x8000;
+
+// used for binary_format_lookup_tables<T>::max_mantissa
+constexpr uint64_t constant_55555 = 5 * 5 * 5 * 5 * 5;
+
+template <typename T, typename U = void> struct binary_format_lookup_tables;
+
+template <typename T> struct binary_format : binary_format_lookup_tables<T> {
+  using equiv_uint = equiv_uint_t<T>;
+
+  static constexpr int mantissa_explicit_bits();
+  static constexpr int minimum_exponent();
+  static constexpr int infinite_power();
+  static constexpr int sign_index();
+  static constexpr int
+  min_exponent_fast_path(); // used when fegetround() == FE_TONEAREST
+  static constexpr int max_exponent_fast_path();
+  static constexpr int max_exponent_round_to_even();
+  static constexpr int min_exponent_round_to_even();
+  static constexpr uint64_t max_mantissa_fast_path(int64_t power);
+  static constexpr uint64_t
+  max_mantissa_fast_path(); // used when fegetround() == FE_TONEAREST
+  static constexpr int largest_power_of_ten();
+  static constexpr int smallest_power_of_ten();
+  static constexpr T exact_power_of_ten(int64_t power);
+  static constexpr size_t max_digits();
+  static constexpr equiv_uint exponent_mask();
+  static constexpr equiv_uint mantissa_mask();
+  static constexpr equiv_uint hidden_bit_mask();
+};
+
+template <typename U> struct binary_format_lookup_tables<double, U> {
+  static constexpr double powers_of_ten[] = {
+      1e0,  1e1,  1e2,  1e3,  1e4,  1e5,  1e6,  1e7,  1e8,  1e9,  1e10, 1e11,
+      1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22};
+
+  // Largest integer value v so that (5**index * v) <= 1<<53.
+  // 0x20000000000000 == 1 << 53
+  static constexpr uint64_t max_mantissa[] = {
+      0x20000000000000,
+      0x20000000000000 / 5,
+      0x20000000000000 / (5 * 5),
+      0x20000000000000 / (5 * 5 * 5),
+      0x20000000000000 / (5 * 5 * 5 * 5),
+      0x20000000000000 / (constant_55555),
+      0x20000000000000 / (constant_55555 * 5),
+      0x20000000000000 / (constant_55555 * 5 * 5),
+      0x20000000000000 / (constant_55555 * 5 * 5 * 5),
+      0x20000000000000 / (constant_55555 * 5 * 5 * 5 * 5),
+      0x20000000000000 / (constant_55555 * constant_55555),
+      0x20000000000000 / (constant_55555 * constant_55555 * 5),
+      0x20000000000000 / (constant_55555 * constant_55555 * 5 * 5),
+      0x20000000000000 / (constant_55555 * constant_55555 * 5 * 5 * 5),
+      0x20000000000000 / (constant_55555 * constant_55555 * constant_55555),
+      0x20000000000000 / (constant_55555 * constant_55555 * constant_55555 * 5),
+      0x20000000000000 /
+          (constant_55555 * constant_55555 * constant_55555 * 5 * 5),
+      0x20000000000000 /
+          (constant_55555 * constant_55555 * constant_55555 * 5 * 5 * 5),
+      0x20000000000000 /
+          (constant_55555 * constant_55555 * constant_55555 * 5 * 5 * 5 * 5),
+      0x20000000000000 /
+          (constant_55555 * constant_55555 * constant_55555 * constant_55555),
+      0x20000000000000 / (constant_55555 * constant_55555 * constant_55555 *
+                          constant_55555 * 5),
+      0x20000000000000 / (constant_55555 * constant_55555 * constant_55555 *
+                          constant_55555 * 5 * 5),
+      0x20000000000000 / (constant_55555 * constant_55555 * constant_55555 *
+                          constant_55555 * 5 * 5 * 5),
+      0x20000000000000 / (constant_55555 * constant_55555 * constant_55555 *
+                          constant_55555 * 5 * 5 * 5 * 5)};
+};
+
+#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE
+
+template <typename U>
+constexpr double binary_format_lookup_tables<double, U>::powers_of_ten[];
+
+template <typename U>
+constexpr uint64_t binary_format_lookup_tables<double, U>::max_mantissa[];
+
+#endif
+
+template <typename U> struct binary_format_lookup_tables<float, U> {
+  static constexpr float powers_of_ten[] = {1e0f, 1e1f, 1e2f, 1e3f, 1e4f, 1e5f,
+                                            1e6f, 1e7f, 1e8f, 1e9f, 1e10f};
+
+  // Largest integer value v so that (5**index * v) <= 1<<24.
+  // 0x1000000 == 1<<24
+  static constexpr uint64_t max_mantissa[] = {
+      0x1000000,
+      0x1000000 / 5,
+      0x1000000 / (5 * 5),
+      0x1000000 / (5 * 5 * 5),
+      0x1000000 / (5 * 5 * 5 * 5),
+      0x1000000 / (constant_55555),
+      0x1000000 / (constant_55555 * 5),
+      0x1000000 / (constant_55555 * 5 * 5),
+      0x1000000 / (constant_55555 * 5 * 5 * 5),
+      0x1000000 / (constant_55555 * 5 * 5 * 5 * 5),
+      0x1000000 / (constant_55555 * constant_55555),
+      0x1000000 / (constant_55555 * constant_55555 * 5)};
+};
+
+#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE
+
+template <typename U>
+constexpr float binary_format_lookup_tables<float, U>::powers_of_ten[];
+
+template <typename U>
+constexpr uint64_t binary_format_lookup_tables<float, U>::max_mantissa[];
+
+#endif
+
+template <>
+inline constexpr int binary_format<double>::min_exponent_fast_path() {
+#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
+  return 0;
+#else
+  return -22;
+#endif
+}
+
+template <>
+inline constexpr int binary_format<float>::min_exponent_fast_path() {
+#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
+  return 0;
+#else
+  return -10;
+#endif
+}
+
+template <>
+inline constexpr int binary_format<double>::mantissa_explicit_bits() {
+  return 52;
+}
+
+template <>
+inline constexpr int binary_format<float>::mantissa_explicit_bits() {
+  return 23;
+}
+
+template <>
+inline constexpr int binary_format<double>::max_exponent_round_to_even() {
+  return 23;
+}
+
+template <>
+inline constexpr int binary_format<float>::max_exponent_round_to_even() {
+  return 10;
+}
+
+template <>
+inline constexpr int binary_format<double>::min_exponent_round_to_even() {
+  return -4;
+}
+
+template <>
+inline constexpr int binary_format<float>::min_exponent_round_to_even() {
+  return -17;
+}
+
+template <> inline constexpr int binary_format<double>::minimum_exponent() {
+  return -1023;
+}
+
+template <> inline constexpr int binary_format<float>::minimum_exponent() {
+  return -127;
+}
+
+template <> inline constexpr int binary_format<double>::infinite_power() {
+  return 0x7FF;
+}
+
+template <> inline constexpr int binary_format<float>::infinite_power() {
+  return 0xFF;
+}
+
+template <> inline constexpr int binary_format<double>::sign_index() {
+  return 63;
+}
+
+template <> inline constexpr int binary_format<float>::sign_index() {
+  return 31;
+}
+
+template <>
+inline constexpr int binary_format<double>::max_exponent_fast_path() {
+  return 22;
+}
+
+template <>
+inline constexpr int binary_format<float>::max_exponent_fast_path() {
+  return 10;
+}
+
+template <>
+inline constexpr uint64_t binary_format<double>::max_mantissa_fast_path() {
+  return uint64_t(2) << mantissa_explicit_bits();
+}
+
+template <>
+inline constexpr uint64_t binary_format<float>::max_mantissa_fast_path() {
+  return uint64_t(2) << mantissa_explicit_bits();
+}
+
+// credit: Jakub Jelínek
+#ifdef __STDCPP_FLOAT16_T__
+template <typename U> struct binary_format_lookup_tables<std::float16_t, U> {
+  static constexpr std::float16_t powers_of_ten[] = {1e0f16, 1e1f16, 1e2f16,
+                                                     1e3f16, 1e4f16};
+
+  // Largest integer value v so that (5**index * v) <= 1<<11.
+  // 0x800 == 1<<11
+  static constexpr uint64_t max_mantissa[] = {0x800,
+                                              0x800 / 5,
+                                              0x800 / (5 * 5),
+                                              0x800 / (5 * 5 * 5),
+                                              0x800 / (5 * 5 * 5 * 5),
+                                              0x800 / (constant_55555)};
+};
+
+#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE
+
+template <typename U>
+constexpr std::float16_t
+    binary_format_lookup_tables<std::float16_t, U>::powers_of_ten[];
+
+template <typename U>
+constexpr uint64_t
+    binary_format_lookup_tables<std::float16_t, U>::max_mantissa[];
+
+#endif
+
+template <>
+inline constexpr std::float16_t
+binary_format<std::float16_t>::exact_power_of_ten(int64_t power) {
+  // Work around clang bug https://godbolt.org/z/zedh7rrhc
+  return (void)powers_of_ten[0], powers_of_ten[power];
+}
+
+template <>
+inline constexpr binary_format<std::float16_t>::equiv_uint
+binary_format<std::float16_t>::exponent_mask() {
+  return 0x7C00;
+}
+
+template <>
+inline constexpr binary_format<std::float16_t>::equiv_uint
+binary_format<std::float16_t>::mantissa_mask() {
+  return 0x03FF;
+}
+
+template <>
+inline constexpr binary_format<std::float16_t>::equiv_uint
+binary_format<std::float16_t>::hidden_bit_mask() {
+  return 0x0400;
+}
+
+template <>
+inline constexpr int binary_format<std::float16_t>::max_exponent_fast_path() {
+  return 4;
+}
+
+template <>
+inline constexpr int binary_format<std::float16_t>::mantissa_explicit_bits() {
+  return 10;
+}
+
+template <>
+inline constexpr uint64_t
+binary_format<std::float16_t>::max_mantissa_fast_path() {
+  return uint64_t(2) << mantissa_explicit_bits();
+}
+
+template <>
+inline constexpr uint64_t
+binary_format<std::float16_t>::max_mantissa_fast_path(int64_t power) {
+  // caller is responsible to ensure that
+  // power >= 0 && power <= 4
+  //
+  // Work around clang bug https://godbolt.org/z/zedh7rrhc
+  return (void)max_mantissa[0], max_mantissa[power];
+}
+
+template <>
+inline constexpr int binary_format<std::float16_t>::min_exponent_fast_path() {
+  return 0;
+}
+
+template <>
+inline constexpr int
+binary_format<std::float16_t>::max_exponent_round_to_even() {
+  return 5;
+}
+
+template <>
+inline constexpr int
+binary_format<std::float16_t>::min_exponent_round_to_even() {
+  return -22;
+}
+
+template <>
+inline constexpr int binary_format<std::float16_t>::minimum_exponent() {
+  return -15;
+}
+
+template <>
+inline constexpr int binary_format<std::float16_t>::infinite_power() {
+  return 0x1F;
+}
+
+template <> inline constexpr int binary_format<std::float16_t>::sign_index() {
+  return 15;
+}
+
+template <>
+inline constexpr int binary_format<std::float16_t>::largest_power_of_ten() {
+  return 4;
+}
+
+template <>
+inline constexpr int binary_format<std::float16_t>::smallest_power_of_ten() {
+  return -27;
+}
+
+template <>
+inline constexpr size_t binary_format<std::float16_t>::max_digits() {
+  return 22;
+}
+#endif // __STDCPP_FLOAT16_T__
+
+// credit: Jakub Jelínek
+#ifdef __STDCPP_BFLOAT16_T__
+template <typename U> struct binary_format_lookup_tables<std::bfloat16_t, U> {
+  static constexpr std::bfloat16_t powers_of_ten[] = {1e0bf16, 1e1bf16, 1e2bf16,
+                                                      1e3bf16};
+
+  // Largest integer value v so that (5**index * v) <= 1<<8.
+  // 0x100 == 1<<8
+  static constexpr uint64_t max_mantissa[] = {0x100, 0x100 / 5, 0x100 / (5 * 5),
+                                              0x100 / (5 * 5 * 5),
+                                              0x100 / (5 * 5 * 5 * 5)};
+};
+
+#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE
+
+template <typename U>
+constexpr std::bfloat16_t
+    binary_format_lookup_tables<std::bfloat16_t, U>::powers_of_ten[];
+
+template <typename U>
+constexpr uint64_t
+    binary_format_lookup_tables<std::bfloat16_t, U>::max_mantissa[];
+
+#endif
+
+template <>
+inline constexpr std::bfloat16_t
+binary_format<std::bfloat16_t>::exact_power_of_ten(int64_t power) {
+  // Work around clang bug https://godbolt.org/z/zedh7rrhc
+  return (void)powers_of_ten[0], powers_of_ten[power];
+}
+
+template <>
+inline constexpr int binary_format<std::bfloat16_t>::max_exponent_fast_path() {
+  return 3;
+}
+
+template <>
+inline constexpr binary_format<std::bfloat16_t>::equiv_uint
+binary_format<std::bfloat16_t>::exponent_mask() {
+  return 0x7F80;
+}
+
+template <>
+inline constexpr binary_format<std::bfloat16_t>::equiv_uint
+binary_format<std::bfloat16_t>::mantissa_mask() {
+  return 0x007F;
+}
+
+template <>
+inline constexpr binary_format<std::bfloat16_t>::equiv_uint
+binary_format<std::bfloat16_t>::hidden_bit_mask() {
+  return 0x0080;
+}
+
+template <>
+inline constexpr int binary_format<std::bfloat16_t>::mantissa_explicit_bits() {
+  return 7;
+}
+
+template <>
+inline constexpr uint64_t
+binary_format<std::bfloat16_t>::max_mantissa_fast_path() {
+  return uint64_t(2) << mantissa_explicit_bits();
+}
+
+template <>
+inline constexpr uint64_t
+binary_format<std::bfloat16_t>::max_mantissa_fast_path(int64_t power) {
+  // caller is responsible to ensure that
+  // power >= 0 && power <= 3
+  //
+  // Work around clang bug https://godbolt.org/z/zedh7rrhc
+  return (void)max_mantissa[0], max_mantissa[power];
+}
+
+template <>
+inline constexpr int binary_format<std::bfloat16_t>::min_exponent_fast_path() {
+  return 0;
+}
+
+template <>
+inline constexpr int
+binary_format<std::bfloat16_t>::max_exponent_round_to_even() {
+  return 3;
+}
+
+template <>
+inline constexpr int
+binary_format<std::bfloat16_t>::min_exponent_round_to_even() {
+  return -24;
+}
+
+template <>
+inline constexpr int binary_format<std::bfloat16_t>::minimum_exponent() {
+  return -127;
+}
+
+template <>
+inline constexpr int binary_format<std::bfloat16_t>::infinite_power() {
+  return 0xFF;
+}
+
+template <> inline constexpr int binary_format<std::bfloat16_t>::sign_index() {
+  return 15;
+}
+
+template <>
+inline constexpr int binary_format<std::bfloat16_t>::largest_power_of_ten() {
+  return 38;
+}
+
+template <>
+inline constexpr int binary_format<std::bfloat16_t>::smallest_power_of_ten() {
+  return -60;
+}
+
+template <>
+inline constexpr size_t binary_format<std::bfloat16_t>::max_digits() {
+  return 98;
+}
+#endif // __STDCPP_BFLOAT16_T__
+
+template <>
+inline constexpr uint64_t
+binary_format<double>::max_mantissa_fast_path(int64_t power) {
+  // caller is responsible to ensure that
+  // power >= 0 && power <= 22
+  //
+  // Work around clang bug https://godbolt.org/z/zedh7rrhc
+  return (void)max_mantissa[0], max_mantissa[power];
+}
+
+template <>
+inline constexpr uint64_t
+binary_format<float>::max_mantissa_fast_path(int64_t power) {
+  // caller is responsible to ensure that
+  // power >= 0 && power <= 10
+  //
+  // Work around clang bug https://godbolt.org/z/zedh7rrhc
+  return (void)max_mantissa[0], max_mantissa[power];
+}
+
+template <>
+inline constexpr double
+binary_format<double>::exact_power_of_ten(int64_t power) {
+  // Work around clang bug https://godbolt.org/z/zedh7rrhc
+  return (void)powers_of_ten[0], powers_of_ten[power];
+}
+
+template <>
+inline constexpr float binary_format<float>::exact_power_of_ten(int64_t power) {
+  // Work around clang bug https://godbolt.org/z/zedh7rrhc
+  return (void)powers_of_ten[0], powers_of_ten[power];
+}
+
+template <> inline constexpr int binary_format<double>::largest_power_of_ten() {
+  return 308;
+}
+
+template <> inline constexpr int binary_format<float>::largest_power_of_ten() {
+  return 38;
+}
+
+template <>
+inline constexpr int binary_format<double>::smallest_power_of_ten() {
+  return -342;
+}
+
+template <> inline constexpr int binary_format<float>::smallest_power_of_ten() {
+  return -64;
+}
+
+template <> inline constexpr size_t binary_format<double>::max_digits() {
+  return 769;
+}
+
+template <> inline constexpr size_t binary_format<float>::max_digits() {
+  return 114;
+}
+
+template <>
+inline constexpr binary_format<float>::equiv_uint
+binary_format<float>::exponent_mask() {
+  return 0x7F800000;
+}
+
+template <>
+inline constexpr binary_format<double>::equiv_uint
+binary_format<double>::exponent_mask() {
+  return 0x7FF0000000000000;
+}
+
+template <>
+inline constexpr binary_format<float>::equiv_uint
+binary_format<float>::mantissa_mask() {
+  return 0x007FFFFF;
+}
+
+template <>
+inline constexpr binary_format<double>::equiv_uint
+binary_format<double>::mantissa_mask() {
+  return 0x000FFFFFFFFFFFFF;
+}
+
+template <>
+inline constexpr binary_format<float>::equiv_uint
+binary_format<float>::hidden_bit_mask() {
+  return 0x00800000;
+}
+
+template <>
+inline constexpr binary_format<double>::equiv_uint
+binary_format<double>::hidden_bit_mask() {
+  return 0x0010000000000000;
+}
+
+template <typename T>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void
+to_float(bool negative, adjusted_mantissa am, T &value) {
+  using equiv_uint = equiv_uint_t<T>;
+  equiv_uint word = equiv_uint(am.mantissa);
+  word = equiv_uint(word | equiv_uint(am.power2)
+                               << binary_format<T>::mantissa_explicit_bits());
+  word =
+      equiv_uint(word | equiv_uint(negative) << binary_format<T>::sign_index());
+#if FASTFLOAT_HAS_BIT_CAST
+  value = std::bit_cast<T>(word);
+#else
+  ::memcpy(&value, &word, sizeof(T));
+#endif
+}
+
+template <typename = void> struct space_lut {
+  static constexpr bool value[] = {
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+};
+
+#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE
+
+template <typename T> constexpr bool space_lut<T>::value[];
+
+#endif
+
+template <typename UC> constexpr bool is_space(UC c) {
+  return c < 256 && space_lut<>::value[uint8_t(c)];
+}
+
+template <typename UC> static constexpr uint64_t int_cmp_zeros() {
+  static_assert((sizeof(UC) == 1) || (sizeof(UC) == 2) || (sizeof(UC) == 4),
+                "Unsupported character size");
+  return (sizeof(UC) == 1) ? 0x3030303030303030
+         : (sizeof(UC) == 2)
+             ? (uint64_t(UC('0')) << 48 | uint64_t(UC('0')) << 32 |
+                uint64_t(UC('0')) << 16 | UC('0'))
+             : (uint64_t(UC('0')) << 32 | UC('0'));
+}
+
+template <typename UC> static constexpr int int_cmp_len() {
+  return sizeof(uint64_t) / sizeof(UC);
+}
+
+template <typename UC> constexpr UC const *str_const_nan();
+
+template <> constexpr char const *str_const_nan<char>() { return "nan"; }
+
+template <> constexpr wchar_t const *str_const_nan<wchar_t>() { return L"nan"; }
+
+template <> constexpr char16_t const *str_const_nan<char16_t>() {
+  return u"nan";
+}
+
+template <> constexpr char32_t const *str_const_nan<char32_t>() {
+  return U"nan";
+}
+
+#ifdef __cpp_char8_t
+template <> constexpr char8_t const *str_const_nan<char8_t>() {
+  return u8"nan";
+}
+#endif
+
+template <typename UC> constexpr UC const *str_const_inf();
+
+template <> constexpr char const *str_const_inf<char>() { return "infinity"; }
+
+template <> constexpr wchar_t const *str_const_inf<wchar_t>() {
+  return L"infinity";
+}
+
+template <> constexpr char16_t const *str_const_inf<char16_t>() {
+  return u"infinity";
+}
+
+template <> constexpr char32_t const *str_const_inf<char32_t>() {
+  return U"infinity";
+}
+
+#ifdef __cpp_char8_t
+template <> constexpr char8_t const *str_const_inf<char8_t>() {
+  return u8"infinity";
+}
+#endif
+
+template <typename = void> struct int_luts {
+  static constexpr uint8_t chdigit[] = {
+      255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   255, 255,
+      255, 255, 255, 255, 255, 10,  11,  12,  13,  14,  15,  16,  17,  18,  19,
+      20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,
+      35,  255, 255, 255, 255, 255, 255, 10,  11,  12,  13,  14,  15,  16,  17,
+      18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,
+      33,  34,  35,  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+      255};
+
+  static constexpr size_t maxdigits_u64[] = {
+      64, 41, 32, 28, 25, 23, 22, 21, 20, 19, 18, 18, 17, 17, 16, 16, 16, 16,
+      15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13};
+
+  static constexpr uint64_t min_safe_u64[] = {
+      9223372036854775808ull,  12157665459056928801ull, 4611686018427387904,
+      7450580596923828125,     4738381338321616896,     3909821048582988049,
+      9223372036854775808ull,  12157665459056928801ull, 10000000000000000000ull,
+      5559917313492231481,     2218611106740436992,     8650415919381337933,
+      2177953337809371136,     6568408355712890625,     1152921504606846976,
+      2862423051509815793,     6746640616477458432,     15181127029874798299ull,
+      1638400000000000000,     3243919932521508681,     6221821273427820544,
+      11592836324538749809ull, 876488338465357824,      1490116119384765625,
+      2481152873203736576,     4052555153018976267,     6502111422497947648,
+      10260628712958602189ull, 15943230000000000000ull, 787662783788549761,
+      1152921504606846976,     1667889514952984961,     2386420683693101056,
+      3379220508056640625,     4738381338321616896};
+};
+
+#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE
+
+template <typename T> constexpr uint8_t int_luts<T>::chdigit[];
+
+template <typename T> constexpr size_t int_luts<T>::maxdigits_u64[];
+
+template <typename T> constexpr uint64_t int_luts<T>::min_safe_u64[];
+
+#endif
+
+template <typename UC>
+fastfloat_really_inline constexpr uint8_t ch_to_digit(UC c) {
+  // wchar_t and char can be signed, so we need to be careful.
+  using UnsignedUC = typename std::make_unsigned<UC>::type;
+  return int_luts<>::chdigit[static_cast<unsigned char>(
+      static_cast<UnsignedUC>(c) &
+      static_cast<UnsignedUC>(
+          -((static_cast<UnsignedUC>(c) & ~0xFFull) == 0)))];
+}
+
+fastfloat_really_inline constexpr size_t max_digits_u64(int base) {
+  return int_luts<>::maxdigits_u64[base - 2];
+}
+
+// If a u64 is exactly max_digits_u64() in length, this is
+// the value below which it has definitely overflowed.
+fastfloat_really_inline constexpr uint64_t min_safe_u64(int base) {
+  return int_luts<>::min_safe_u64[base - 2];
+}
+
+static_assert(std::is_same<equiv_uint_t<double>, uint64_t>::value,
+              "equiv_uint should be uint64_t for double");
+static_assert(std::numeric_limits<double>::is_iec559,
+              "double must fulfill the requirements of IEC 559 (IEEE 754)");
+
+static_assert(std::is_same<equiv_uint_t<float>, uint32_t>::value,
+              "equiv_uint should be uint32_t for float");
+static_assert(std::numeric_limits<float>::is_iec559,
+              "float must fulfill the requirements of IEC 559 (IEEE 754)");
+
+#ifdef __STDCPP_FLOAT64_T__
+static_assert(std::is_same<equiv_uint_t<std::float64_t>, uint64_t>::value,
+              "equiv_uint should be uint64_t for std::float64_t");
+static_assert(
+    std::numeric_limits<std::float64_t>::is_iec559,
+    "std::float64_t must fulfill the requirements of IEC 559 (IEEE 754)");
+
+template <>
+struct binary_format<std::float64_t> : public binary_format<double> {};
+#endif // __STDCPP_FLOAT64_T__
+
+#ifdef __STDCPP_FLOAT32_T__
+static_assert(std::is_same<equiv_uint_t<std::float32_t>, uint32_t>::value,
+              "equiv_uint should be uint32_t for std::float32_t");
+static_assert(
+    std::numeric_limits<std::float32_t>::is_iec559,
+    "std::float32_t must fulfill the requirements of IEC 559 (IEEE 754)");
+
+template <>
+struct binary_format<std::float32_t> : public binary_format<float> {};
+#endif // __STDCPP_FLOAT32_T__
+
+#ifdef __STDCPP_FLOAT16_T__
+static_assert(
+    std::is_same<binary_format<std::float16_t>::equiv_uint, uint16_t>::value,
+    "equiv_uint should be uint16_t for std::float16_t");
+static_assert(
+    std::numeric_limits<std::float16_t>::is_iec559,
+    "std::float16_t must fulfill the requirements of IEC 559 (IEEE 754)");
+#endif // __STDCPP_FLOAT16_T__
+
+#ifdef __STDCPP_BFLOAT16_T__
+static_assert(
+    std::is_same<binary_format<std::bfloat16_t>::equiv_uint, uint16_t>::value,
+    "equiv_uint should be uint16_t for std::bfloat16_t");
+static_assert(
+    std::numeric_limits<std::bfloat16_t>::is_iec559,
+    "std::bfloat16_t must fulfill the requirements of IEC 559 (IEEE 754)");
+#endif // __STDCPP_BFLOAT16_T__
+
+constexpr chars_format operator~(chars_format rhs) noexcept {
+  using int_type = std::underlying_type<chars_format>::type;
+  return static_cast<chars_format>(~static_cast<int_type>(rhs));
+}
+
+constexpr chars_format operator&(chars_format lhs, chars_format rhs) noexcept {
+  using int_type = std::underlying_type<chars_format>::type;
+  return static_cast<chars_format>(static_cast<int_type>(lhs) &
+                                   static_cast<int_type>(rhs));
+}
+
+constexpr chars_format operator|(chars_format lhs, chars_format rhs) noexcept {
+  using int_type = std::underlying_type<chars_format>::type;
+  return static_cast<chars_format>(static_cast<int_type>(lhs) |
+                                   static_cast<int_type>(rhs));
+}
+
+constexpr chars_format operator^(chars_format lhs, chars_format rhs) noexcept {
+  using int_type = std::underlying_type<chars_format>::type;
+  return static_cast<chars_format>(static_cast<int_type>(lhs) ^
+                                   static_cast<int_type>(rhs));
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 chars_format &
+operator&=(chars_format &lhs, chars_format rhs) noexcept {
+  return lhs = (lhs & rhs);
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 chars_format &
+operator|=(chars_format &lhs, chars_format rhs) noexcept {
+  return lhs = (lhs | rhs);
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 chars_format &
+operator^=(chars_format &lhs, chars_format rhs) noexcept {
+  return lhs = (lhs ^ rhs);
+}
+
+namespace detail {
+// adjust for deprecated feature macros
+constexpr chars_format adjust_for_feature_macros(chars_format fmt) {
+  return fmt
+#ifdef FASTFLOAT_ALLOWS_LEADING_PLUS
+         | chars_format::allow_leading_plus
+#endif
+#ifdef FASTFLOAT_SKIP_WHITE_SPACE
+         | chars_format::skip_white_space
+#endif
+      ;
+}
+} // namespace detail
+} // namespace fast_float
+
+#endif
+
+
+#ifndef FASTFLOAT_FAST_FLOAT_H
+#define FASTFLOAT_FAST_FLOAT_H
+
+
+namespace fast_float {
+/**
+ * This function parses the character sequence [first,last) for a number. It
+ * parses floating-point numbers expecting a locale-indepent format equivalent
+ * to what is used by std::strtod in the default ("C") locale. The resulting
+ * floating-point value is the closest floating-point values (using either float
+ * or double), using the "round to even" convention for values that would
+ * otherwise fall right in-between two values. That is, we provide exact parsing
+ * according to the IEEE standard.
+ *
+ * Given a successful parse, the pointer (`ptr`) in the returned value is set to
+ * point right after the parsed number, and the `value` referenced is set to the
+ * parsed value. In case of error, the returned `ec` contains a representative
+ * error, otherwise the default (`std::errc()`) value is stored.
+ *
+ * The implementation does not throw and does not allocate memory (e.g., with
+ * `new` or `malloc`).
+ *
+ * Like the C++17 standard, the `fast_float::from_chars` functions take an
+ * optional last argument of the type `fast_float::chars_format`. It is a bitset
+ * value: we check whether `fmt & fast_float::chars_format::fixed` and `fmt &
+ * fast_float::chars_format::scientific` are set to determine whether we allow
+ * the fixed point and scientific notation respectively. The default is
+ * `fast_float::chars_format::general` which allows both `fixed` and
+ * `scientific`.
+ */
+template <typename T, typename UC = char,
+          typename = FASTFLOAT_ENABLE_IF(is_supported_float_type<T>::value)>
+FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
+from_chars(UC const *first, UC const *last, T &value,
+           chars_format fmt = chars_format::general) noexcept;
+
+/**
+ * Like from_chars, but accepts an `options` argument to govern number parsing.
+ * Both for floating-point types and integer types.
+ */
+template <typename T, typename UC = char>
+FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
+from_chars_advanced(UC const *first, UC const *last, T &value,
+                    parse_options_t<UC> options) noexcept;
+
+/**
+ * This function multiplies an integer number by a power of 10 and returns
+ * the result as a double precision floating-point value that is correctly
+ * rounded. The resulting floating-point value is the closest floating-point
+ * value, using the "round to nearest, tie to even" convention for values that
+ * would otherwise fall right in-between two values. That is, we provide exact
+ * conversion according to the IEEE standard.
+ *
+ * On overflow infinity is returned, on underflow 0 is returned.
+ *
+ * The implementation does not throw and does not allocate memory (e.g., with
+ * `new` or `malloc`).
+ */
+FASTFLOAT_CONSTEXPR20 inline double
+integer_times_pow10(uint64_t mantissa, int decimal_exponent) noexcept;
+FASTFLOAT_CONSTEXPR20 inline double
+integer_times_pow10(int64_t mantissa, int decimal_exponent) noexcept;
+
+/**
+ * This function is a template overload of `integer_times_pow10()`
+ * that returns a floating-point value of type `T` that is one of
+ * supported floating-point types (e.g. `double`, `float`).
+ */
+template <typename T>
+FASTFLOAT_CONSTEXPR20
+    typename std::enable_if<is_supported_float_type<T>::value, T>::type
+    integer_times_pow10(uint64_t mantissa, int decimal_exponent) noexcept;
+template <typename T>
+FASTFLOAT_CONSTEXPR20
+    typename std::enable_if<is_supported_float_type<T>::value, T>::type
+    integer_times_pow10(int64_t mantissa, int decimal_exponent) noexcept;
+
+/**
+ * from_chars for integer types.
+ */
+template <typename T, typename UC = char,
+          typename = FASTFLOAT_ENABLE_IF(is_supported_integer_type<T>::value)>
+FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
+from_chars(UC const *first, UC const *last, T &value, int base = 10) noexcept;
+
+} // namespace fast_float
+
+#endif // FASTFLOAT_FAST_FLOAT_H
+
+#ifndef FASTFLOAT_ASCII_NUMBER_H
+#define FASTFLOAT_ASCII_NUMBER_H
+
+#include <cctype>
+#include <cstdint>
+#include <cstring>
+#include <iterator>
+#include <limits>
+#include <type_traits>
+
+
+#ifdef FASTFLOAT_SSE2
+#include <emmintrin.h>
+#endif
+
+#ifdef FASTFLOAT_NEON
+#include <arm_neon.h>
+#endif
+
+namespace fast_float {
+
+template <typename UC> fastfloat_really_inline constexpr bool has_simd_opt() {
+#ifdef FASTFLOAT_HAS_SIMD
+  return std::is_same<UC, char16_t>::value;
+#else
+  return false;
+#endif
+}
+
+// Next function can be micro-optimized, but compilers are entirely
+// able to optimize it well.
+template <typename UC>
+fastfloat_really_inline constexpr bool is_integer(UC c) noexcept {
+  return (unsigned)(c - UC('0')) <= 9u;
+}
+
+fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) {
+  return (val & 0xFF00000000000000) >> 56 | (val & 0x00FF000000000000) >> 40 |
+         (val & 0x0000FF0000000000) >> 24 | (val & 0x000000FF00000000) >> 8 |
+         (val & 0x00000000FF000000) << 8 | (val & 0x0000000000FF0000) << 24 |
+         (val & 0x000000000000FF00) << 40 | (val & 0x00000000000000FF) << 56;
+}
+
+fastfloat_really_inline constexpr uint32_t byteswap_32(uint32_t val) {
+  return (val >> 24) | ((val >> 8) & 0x0000FF00u) | ((val << 8) & 0x00FF0000u) |
+         (val << 24);
+}
+
+// Read 8 UC into a u64. Truncates UC if not char.
+template <typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t
+read8_to_u64(UC const *chars) {
+  if (cpp20_and_in_constexpr() || !std::is_same<UC, char>::value) {
+    uint64_t val = 0;
+    for (int i = 0; i < 8; ++i) {
+      val |= uint64_t(uint8_t(*chars)) << (i * 8);
+      ++chars;
+    }
+    return val;
+  }
+  uint64_t val;
+  ::memcpy(&val, chars, sizeof(uint64_t));
+#if FASTFLOAT_IS_BIG_ENDIAN == 1
+  // Need to read as-if the number was in little-endian order.
+  val = byteswap(val);
+#endif
+  return val;
+}
+
+// Read 4 UC into a u32. Truncates UC if not char.
+template <typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint32_t
+read4_to_u32(UC const *chars) {
+  if (cpp20_and_in_constexpr() || !std::is_same<UC, char>::value) {
+    uint32_t val = 0;
+    for (int i = 0; i < 4; ++i) {
+      val |= uint32_t(uint8_t(*chars)) << (i * 8);
+      ++chars;
+    }
+    return val;
+  }
+  uint32_t val;
+  ::memcpy(&val, chars, sizeof(uint32_t));
+#if FASTFLOAT_IS_BIG_ENDIAN == 1
+  val = byteswap_32(val);
+#endif
+  return val;
+}
+#ifdef FASTFLOAT_SSE2
+
+fastfloat_really_inline uint64_t simd_read8_to_u64(__m128i const data) {
+  FASTFLOAT_SIMD_DISABLE_WARNINGS
+  __m128i const packed = _mm_packus_epi16(data, data);
+#ifdef FASTFLOAT_64BIT
+  return uint64_t(_mm_cvtsi128_si64(packed));
+#else
+  uint64_t value;
+  // Visual Studio + older versions of GCC don't support _mm_storeu_si64
+  _mm_storel_epi64(reinterpret_cast<__m128i *>(&value), packed);
+  return value;
+#endif
+  FASTFLOAT_SIMD_RESTORE_WARNINGS
+}
+
+fastfloat_really_inline uint64_t simd_read8_to_u64(char16_t const *chars) {
+  FASTFLOAT_SIMD_DISABLE_WARNINGS
+  return simd_read8_to_u64(
+      _mm_loadu_si128(reinterpret_cast<__m128i const *>(chars)));
+  FASTFLOAT_SIMD_RESTORE_WARNINGS
+}
+
+#elif defined(FASTFLOAT_NEON)
+
+fastfloat_really_inline uint64_t simd_read8_to_u64(uint16x8_t const data) {
+  FASTFLOAT_SIMD_DISABLE_WARNINGS
+  uint8x8_t utf8_packed = vmovn_u16(data);
+  return vget_lane_u64(vreinterpret_u64_u8(utf8_packed), 0);
+  FASTFLOAT_SIMD_RESTORE_WARNINGS
+}
+
+fastfloat_really_inline uint64_t simd_read8_to_u64(char16_t const *chars) {
+  FASTFLOAT_SIMD_DISABLE_WARNINGS
+  return simd_read8_to_u64(
+      vld1q_u16(reinterpret_cast<uint16_t const *>(chars)));
+  FASTFLOAT_SIMD_RESTORE_WARNINGS
+}
+
+#endif // FASTFLOAT_SSE2
+
+// MSVC SFINAE is broken pre-VS2017
+#if defined(_MSC_VER) && _MSC_VER <= 1900
+template <typename UC>
+#else
+template <typename UC, FASTFLOAT_ENABLE_IF(!has_simd_opt<UC>()) = 0>
+#endif
+// dummy for compile
+uint64_t simd_read8_to_u64(UC const *) {
+  return 0;
+}
+
+// credit  @aqrit
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint32_t
+parse_eight_digits_unrolled(uint64_t val) {
+  uint64_t const mask = 0x000000FF000000FF;
+  uint64_t const mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
+  uint64_t const mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
+  val -= 0x3030303030303030;
+  val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
+  val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
+  return uint32_t(val);
+}
+
+// Call this if chars are definitely 8 digits.
+template <typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint32_t
+parse_eight_digits_unrolled(UC const *chars) noexcept {
+  if (cpp20_and_in_constexpr() || !has_simd_opt<UC>()) {
+    return parse_eight_digits_unrolled(read8_to_u64(chars)); // truncation okay
+  }
+  return parse_eight_digits_unrolled(simd_read8_to_u64(chars));
+}
+
+// credit @aqrit
+fastfloat_really_inline constexpr bool
+is_made_of_eight_digits_fast(uint64_t val) noexcept {
+  return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) &
+            0x8080808080808080));
+}
+
+fastfloat_really_inline constexpr bool
+is_made_of_four_digits_fast(uint32_t val) noexcept {
+  return !((((val + 0x46464646) | (val - 0x30303030)) & 0x80808080));
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint32_t
+parse_four_digits_unrolled(uint32_t val) noexcept {
+  val -= 0x30303030;
+  val = (val * 10) + (val >> 8);
+  return (((val & 0x00FF00FF) * 0x00640001) >> 16) & 0xFFFF;
+}
+
+#ifdef FASTFLOAT_HAS_SIMD
+
+// Call this if chars might not be 8 digits.
+// Using this style (instead of is_made_of_eight_digits_fast() then
+// parse_eight_digits_unrolled()) ensures we don't load SIMD registers twice.
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool
+simd_parse_if_eight_digits_unrolled(char16_t const *chars,
+                                    uint64_t &i) noexcept {
+  if (cpp20_and_in_constexpr()) {
+    return false;
+  }
+#ifdef FASTFLOAT_SSE2
+  FASTFLOAT_SIMD_DISABLE_WARNINGS
+  __m128i const data =
+      _mm_loadu_si128(reinterpret_cast<__m128i const *>(chars));
+
+  // (x - '0') <= 9
+  // http://0x80.pl/articles/simd-parsing-int-sequences.html
+  __m128i const t0 = _mm_add_epi16(data, _mm_set1_epi16(32720));
+  __m128i const t1 = _mm_cmpgt_epi16(t0, _mm_set1_epi16(-32759));
+
+  if (_mm_movemask_epi8(t1) == 0) {
+    i = i * 100000000 + parse_eight_digits_unrolled(simd_read8_to_u64(data));
+    return true;
+  } else
+    return false;
+  FASTFLOAT_SIMD_RESTORE_WARNINGS
+#elif defined(FASTFLOAT_NEON)
+  FASTFLOAT_SIMD_DISABLE_WARNINGS
+  uint16x8_t const data = vld1q_u16(reinterpret_cast<uint16_t const *>(chars));
+
+  // (x - '0') <= 9
+  // http://0x80.pl/articles/simd-parsing-int-sequences.html
+  uint16x8_t const t0 = vsubq_u16(data, vmovq_n_u16('0'));
+  uint16x8_t const mask = vcltq_u16(t0, vmovq_n_u16('9' - '0' + 1));
+
+  if (vminvq_u16(mask) == 0xFFFF) {
+    i = i * 100000000 + parse_eight_digits_unrolled(simd_read8_to_u64(data));
+    return true;
+  } else
+    return false;
+  FASTFLOAT_SIMD_RESTORE_WARNINGS
+#else
+  (void)chars;
+  (void)i;
+  return false;
+#endif // FASTFLOAT_SSE2
+}
+
+#endif // FASTFLOAT_HAS_SIMD
+
+// MSVC SFINAE is broken pre-VS2017
+#if defined(_MSC_VER) && _MSC_VER <= 1900
+template <typename UC>
+#else
+template <typename UC, FASTFLOAT_ENABLE_IF(!has_simd_opt<UC>()) = 0>
+#endif
+// dummy for compile
+bool simd_parse_if_eight_digits_unrolled(UC const *, uint64_t &) {
+  return 0;
+}
+
+template <typename UC, FASTFLOAT_ENABLE_IF(!std::is_same<UC, char>::value) = 0>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void
+loop_parse_if_eight_digits(UC const *&p, UC const *const pend, uint64_t &i) {
+  if (!has_simd_opt<UC>()) {
+    return;
+  }
+  while ((std::distance(p, pend) >= 8) &&
+         simd_parse_if_eight_digits_unrolled(
+             p, i)) { // in rare cases, this will overflow, but that's ok
+    p += 8;
+  }
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void
+loop_parse_if_eight_digits(char const *&p, char const *const pend,
+                           uint64_t &i) {
+  // optimizes better than parse_if_eight_digits_unrolled() for UC = char.
+  while ((std::distance(p, pend) >= 8) &&
+         is_made_of_eight_digits_fast(read8_to_u64(p))) {
+    i = i * 100000000 +
+        parse_eight_digits_unrolled(read8_to_u64(
+            p)); // in rare cases, this will overflow, but that's ok
+    p += 8;
+  }
+}
+
+enum class parse_error {
+  no_error,
+  // [JSON-only] The minus sign must be followed by an integer.
+  missing_integer_after_sign,
+  // A sign must be followed by an integer or dot.
+  missing_integer_or_dot_after_sign,
+  // [JSON-only] The integer part must not have leading zeros.
+  leading_zeros_in_integer_part,
+  // [JSON-only] The integer part must have at least one digit.
+  no_digits_in_integer_part,
+  // [JSON-only] If there is a decimal point, there must be digits in the
+  // fractional part.
+  no_digits_in_fractional_part,
+  // The mantissa must have at least one digit.
+  no_digits_in_mantissa,
+  // Scientific notation requires an exponential part.
+  missing_exponential_part,
+};
+
+template <typename UC> struct parsed_number_string_t {
+  int64_t exponent{0};
+  uint64_t mantissa{0};
+  UC const *lastmatch{nullptr};
+  bool negative{false};
+  bool valid{false};
+  bool too_many_digits{false};
+  // contains the range of the significant digits
+  span<UC const> integer{};  // non-nullable
+  span<UC const> fraction{}; // nullable
+  parse_error error{parse_error::no_error};
+};
+
+using byte_span = span<char const>;
+using parsed_number_string = parsed_number_string_t<char>;
+
+template <typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 parsed_number_string_t<UC>
+report_parse_error(UC const *p, parse_error error) {
+  parsed_number_string_t<UC> answer;
+  answer.valid = false;
+  answer.lastmatch = p;
+  answer.error = error;
+  return answer;
+}
+
+// Assuming that you use no more than 19 digits, this will
+// parse an ASCII string.
+template <bool basic_json_fmt, typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 parsed_number_string_t<UC>
+parse_number_string(UC const *p, UC const *pend,
+                    parse_options_t<UC> options) noexcept {
+  chars_format const fmt = detail::adjust_for_feature_macros(options.format);
+  UC const decimal_point = options.decimal_point;
+
+  parsed_number_string_t<UC> answer;
+  answer.valid = false;
+  answer.too_many_digits = false;
+  // assume p < pend, so dereference without checks;
+  answer.negative = (*p == UC('-'));
+  // C++17 20.19.3.(7.1) explicitly forbids '+' sign here
+  if ((*p == UC('-')) || (uint64_t(fmt & chars_format::allow_leading_plus) &&
+                          !basic_json_fmt && *p == UC('+'))) {
+    ++p;
+    if (p == pend) {
+      return report_parse_error<UC>(
+          p, parse_error::missing_integer_or_dot_after_sign);
+    }
+    FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
+      if (!is_integer(*p)) { // a sign must be followed by an integer
+        return report_parse_error<UC>(p,
+                                      parse_error::missing_integer_after_sign);
+      }
+    }
+    else {
+      if (!is_integer(*p) &&
+          (*p !=
+           decimal_point)) { // a sign must be followed by an integer or the dot
+        return report_parse_error<UC>(
+            p, parse_error::missing_integer_or_dot_after_sign);
+      }
+    }
+  }
+  UC const *const start_digits = p;
+
+  uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad)
+
+  while ((p != pend) && is_integer(*p)) {
+    // a multiplication by 10 is cheaper than an arbitrary integer
+    // multiplication
+    i = 10 * i +
+        uint64_t(*p -
+                 UC('0')); // might overflow, we will handle the overflow later
+    ++p;
+  }
+  UC const *const end_of_integer_part = p;
+  int64_t digit_count = int64_t(end_of_integer_part - start_digits);
+  answer.integer = span<UC const>(start_digits, size_t(digit_count));
+  FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
+    // at least 1 digit in integer part, without leading zeros
+    if (digit_count == 0) {
+      return report_parse_error<UC>(p, parse_error::no_digits_in_integer_part);
+    }
+    if ((start_digits[0] == UC('0') && digit_count > 1)) {
+      return report_parse_error<UC>(start_digits,
+                                    parse_error::leading_zeros_in_integer_part);
+    }
+  }
+
+  int64_t exponent = 0;
+  bool const has_decimal_point = (p != pend) && (*p == decimal_point);
+  if (has_decimal_point) {
+    ++p;
+    UC const *before = p;
+    // can occur at most twice without overflowing, but let it occur more, since
+    // for integers with many digits, digit parsing is the primary bottleneck.
+    loop_parse_if_eight_digits(p, pend, i);
+
+    while ((p != pend) && is_integer(*p)) {
+      uint8_t digit = uint8_t(*p - UC('0'));
+      ++p;
+      i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
+    }
+    exponent = before - p;
+    answer.fraction = span<UC const>(before, size_t(p - before));
+    digit_count -= exponent;
+  }
+  FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
+    // at least 1 digit in fractional part
+    if (has_decimal_point && exponent == 0) {
+      return report_parse_error<UC>(p,
+                                    parse_error::no_digits_in_fractional_part);
+    }
+  }
+  else if (digit_count == 0) { // we must have encountered at least one integer!
+    return report_parse_error<UC>(p, parse_error::no_digits_in_mantissa);
+  }
+  int64_t exp_number = 0; // explicit exponential part
+  if ((uint64_t(fmt & chars_format::scientific) && (p != pend) &&
+       ((UC('e') == *p) || (UC('E') == *p))) ||
+      (uint64_t(fmt & detail::basic_fortran_fmt) && (p != pend) &&
+       ((UC('+') == *p) || (UC('-') == *p) || (UC('d') == *p) ||
+        (UC('D') == *p)))) {
+    UC const *location_of_e = p;
+    if ((UC('e') == *p) || (UC('E') == *p) || (UC('d') == *p) ||
+        (UC('D') == *p)) {
+      ++p;
+    }
+    bool neg_exp = false;
+    if ((p != pend) && (UC('-') == *p)) {
+      neg_exp = true;
+      ++p;
+    } else if ((p != pend) &&
+               (UC('+') ==
+                *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1)
+      ++p;
+    }
+    if ((p == pend) || !is_integer(*p)) {
+      if (!uint64_t(fmt & chars_format::fixed)) {
+        // The exponential part is invalid for scientific notation, so it must
+        // be a trailing token for fixed notation. However, fixed notation is
+        // disabled, so report a scientific notation error.
+        return report_parse_error<UC>(p, parse_error::missing_exponential_part);
+      }
+      // Otherwise, we will be ignoring the 'e'.
+      p = location_of_e;
+    } else {
+      while ((p != pend) && is_integer(*p)) {
+        uint8_t digit = uint8_t(*p - UC('0'));
+        if (exp_number < 0x10000000) {
+          exp_number = 10 * exp_number + digit;
+        }
+        ++p;
+      }
+      if (neg_exp) {
+        exp_number = -exp_number;
+      }
+      exponent += exp_number;
+    }
+  } else {
+    // If it scientific and not fixed, we have to bail out.
+    if (uint64_t(fmt & chars_format::scientific) &&
+        !uint64_t(fmt & chars_format::fixed)) {
+      return report_parse_error<UC>(p, parse_error::missing_exponential_part);
+    }
+  }
+  answer.lastmatch = p;
+  answer.valid = true;
+
+  // If we frequently had to deal with long strings of digits,
+  // we could extend our code by using a 128-bit integer instead
+  // of a 64-bit integer. However, this is uncommon.
+  //
+  // We can deal with up to 19 digits.
+  if (digit_count > 19) { // this is uncommon
+    // It is possible that the integer had an overflow.
+    // We have to handle the case where we have 0.0000somenumber.
+    // We need to be mindful of the case where we only have zeroes...
+    // E.g., 0.000000000...000.
+    UC const *start = start_digits;
+    while ((start != pend) && (*start == UC('0') || *start == decimal_point)) {
+      if (*start == UC('0')) {
+        digit_count--;
+      }
+      start++;
+    }
+
+    if (digit_count > 19) {
+      answer.too_many_digits = true;
+      // Let us start again, this time, avoiding overflows.
+      // We don't need to call if is_integer, since we use the
+      // pre-tokenized spans from above.
+      i = 0;
+      p = answer.integer.ptr;
+      UC const *int_end = p + answer.integer.len();
+      uint64_t const minimal_nineteen_digit_integer{1000000000000000000};
+      while ((i < minimal_nineteen_digit_integer) && (p != int_end)) {
+        i = i * 10 + uint64_t(*p - UC('0'));
+        ++p;
+      }
+      if (i >= minimal_nineteen_digit_integer) { // We have a big integer
+        exponent = end_of_integer_part - p + exp_number;
+      } else { // We have a value with a fractional component.
+        p = answer.fraction.ptr;
+        UC const *frac_end = p + answer.fraction.len();
+        while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
+          i = i * 10 + uint64_t(*p - UC('0'));
+          ++p;
+        }
+        exponent = answer.fraction.ptr - p + exp_number;
+      }
+      // We have now corrected both exponent and i, to a truncated value
+    }
+  }
+  answer.exponent = exponent;
+  answer.mantissa = i;
+  return answer;
+}
+
+template <typename T, typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
+parse_int_string(UC const *p, UC const *pend, T &value,
+                 parse_options_t<UC> options) {
+  chars_format const fmt = detail::adjust_for_feature_macros(options.format);
+  int const base = options.base;
+
+  from_chars_result_t<UC> answer;
+
+  UC const *const first = p;
+
+  bool const negative = (*p == UC('-'));
+#ifdef FASTFLOAT_VISUAL_STUDIO
+#pragma warning(push)
+#pragma warning(disable : 4127)
+#endif
+  if (!std::is_signed<T>::value && negative) {
+#ifdef FASTFLOAT_VISUAL_STUDIO
+#pragma warning(pop)
+#endif
+    answer.ec = std::errc::invalid_argument;
+    answer.ptr = first;
+    return answer;
+  }
+  if ((*p == UC('-')) ||
+      (uint64_t(fmt & chars_format::allow_leading_plus) && (*p == UC('+')))) {
+    ++p;
+  }
+
+  UC const *const start_num = p;
+
+  while (p != pend && *p == UC('0')) {
+    ++p;
+  }
+
+  bool const has_leading_zeros = p > start_num;
+
+  UC const *const start_digits = p;
+
+  FASTFLOAT_IF_CONSTEXPR17((std::is_same<T, std::uint8_t>::value)) {
+    if (base == 10) {
+      const size_t len = (size_t)(pend - p);
+      if (len == 0) {
+        if (has_leading_zeros) {
+          value = 0;
+          answer.ec = std::errc();
+          answer.ptr = p;
+        } else {
+          answer.ec = std::errc::invalid_argument;
+          answer.ptr = first;
+        }
+        return answer;
+      }
+
+      uint32_t digits;
+
+#if FASTFLOAT_HAS_IS_CONSTANT_EVALUATED && FASTFLOAT_HAS_BIT_CAST
+      if (std::is_constant_evaluated()) {
+        uint8_t str[4]{};
+        for (size_t j = 0; j < 4 && j < len; ++j) {
+          str[j] = static_cast<uint8_t>(p[j]);
+        }
+        digits = std::bit_cast<uint32_t>(str);
+#if FASTFLOAT_IS_BIG_ENDIAN
+        digits = byteswap_32(digits);
+#endif
+      }
+#else
+      if (false) {
+      }
+#endif
+      else if (len >= 4) {
+        ::memcpy(&digits, p, 4);
+#if FASTFLOAT_IS_BIG_ENDIAN
+        digits = byteswap_32(digits);
+#endif
+      } else {
+        uint32_t b0 = static_cast<uint8_t>(p[0]);
+        uint32_t b1 = (len > 1) ? static_cast<uint8_t>(p[1]) : 0xFFu;
+        uint32_t b2 = (len > 2) ? static_cast<uint8_t>(p[2]) : 0xFFu;
+        uint32_t b3 = 0xFFu;
+        digits = b0 | (b1 << 8) | (b2 << 16) | (b3 << 24);
+      }
+
+      uint32_t magic =
+          ((digits + 0x46464646u) | (digits - 0x30303030u)) & 0x80808080u;
+      uint32_t tz = (uint32_t)countr_zero_32(magic); // 7, 15, 23, 31, or 32
+      uint32_t nd = (tz == 32) ? 4 : (tz >> 3);
+      nd = (uint32_t)std::min((size_t)nd, len);
+      if (nd == 0) {
+        if (has_leading_zeros) {
+          value = 0;
+          answer.ec = std::errc();
+          answer.ptr = p;
+          return answer;
+        }
+        answer.ec = std::errc::invalid_argument;
+        answer.ptr = first;
+        return answer;
+      }
+      if (nd > 3) {
+        const UC *q = p + nd;
+        size_t rem = len - nd;
+        while (rem) {
+          if (*q < UC('0') || *q > UC('9'))
+            break;
+          ++q;
+          --rem;
+        }
+        answer.ec = std::errc::result_out_of_range;
+        answer.ptr = q;
+        return answer;
+      }
+
+      digits ^= 0x30303030u;
+      digits <<= ((4 - nd) * 8);
+
+      uint32_t check = ((digits >> 24) & 0xff) | ((digits >> 8) & 0xff00) |
+                       ((digits << 8) & 0xff0000);
+      if (check > 0x00020505) {
+        answer.ec = std::errc::result_out_of_range;
+        answer.ptr = p + nd;
+        return answer;
+      }
+      value = (uint8_t)((0x640a01 * digits) >> 24);
+      answer.ec = std::errc();
+      answer.ptr = p + nd;
+      return answer;
+    }
+  }
+
+  FASTFLOAT_IF_CONSTEXPR17((std::is_same<T, std::uint16_t>::value)) {
+    if (base == 10) {
+      const size_t len = size_t(pend - p);
+      if (len == 0) {
+        if (has_leading_zeros) {
+          value = 0;
+          answer.ec = std::errc();
+          answer.ptr = p;
+        } else {
+          answer.ec = std::errc::invalid_argument;
+          answer.ptr = first;
+        }
+        return answer;
+      }
+
+      if (len >= 4) {
+        uint32_t digits = read4_to_u32(p);
+        if (is_made_of_four_digits_fast(digits)) {
+          uint32_t v = parse_four_digits_unrolled(digits);
+          if (len >= 5 && is_integer(p[4])) {
+            v = v * 10 + uint32_t(p[4] - '0');
+            if (len >= 6 && is_integer(p[5])) {
+              answer.ec = std::errc::result_out_of_range;
+              const UC *q = p + 5;
+              while (q != pend && is_integer(*q)) {
+                q++;
+              }
+              answer.ptr = q;
+              return answer;
+            }
+            if (v > 65535) {
+              answer.ec = std::errc::result_out_of_range;
+              answer.ptr = p + 5;
+              return answer;
+            }
+            value = uint16_t(v);
+            answer.ec = std::errc();
+            answer.ptr = p + 5;
+            return answer;
+          }
+          // 4 digits
+          value = uint16_t(v);
+          answer.ec = std::errc();
+          answer.ptr = p + 4;
+          return answer;
+        }
+      }
+    }
+  }
+
+  uint64_t i = 0;
+  if (base == 10) {
+    loop_parse_if_eight_digits(p, pend, i); // use SIMD if possible
+  }
+  while (p != pend) {
+    uint8_t digit = ch_to_digit(*p);
+    if (digit >= base) {
+      break;
+    }
+    i = uint64_t(base) * i + digit; // might overflow, check this later
+    p++;
+  }
+
+  size_t digit_count = size_t(p - start_digits);
+
+  if (digit_count == 0) {
+    if (has_leading_zeros) {
+      value = 0;
+      answer.ec = std::errc();
+      answer.ptr = p;
+    } else {
+      answer.ec = std::errc::invalid_argument;
+      answer.ptr = first;
+    }
+    return answer;
+  }
+
+  answer.ptr = p;
+
+  // check u64 overflow
+  size_t max_digits = max_digits_u64(base);
+  if (digit_count > max_digits) {
+    answer.ec = std::errc::result_out_of_range;
+    return answer;
+  }
+  // this check can be eliminated for all other types, but they will all require
+  // a max_digits(base) equivalent
+  if (digit_count == max_digits && i < min_safe_u64(base)) {
+    answer.ec = std::errc::result_out_of_range;
+    return answer;
+  }
+
+  // check other types overflow
+  if (!std::is_same<T, uint64_t>::value) {
+    if (i > uint64_t(std::numeric_limits<T>::max()) + uint64_t(negative)) {
+      answer.ec = std::errc::result_out_of_range;
+      return answer;
+    }
+  }
+
+  if (negative) {
+#ifdef FASTFLOAT_VISUAL_STUDIO
+#pragma warning(push)
+#pragma warning(disable : 4146)
+#endif
+    // this weird workaround is required because:
+    // - converting unsigned to signed when its value is greater than signed max
+    // is UB pre-C++23.
+    // - reinterpret_casting (~i + 1) would work, but it is not constexpr
+    // this is always optimized into a neg instruction (note: T is an integer
+    // type)
+    value = T(-std::numeric_limits<T>::max() -
+              T(i - uint64_t(std::numeric_limits<T>::max())));
+#ifdef FASTFLOAT_VISUAL_STUDIO
+#pragma warning(pop)
+#endif
+  } else {
+    value = T(i);
+  }
+
+  answer.ec = std::errc();
+  return answer;
+}
+
+} // namespace fast_float
+
+#endif
+
+#ifndef FASTFLOAT_FAST_TABLE_H
+#define FASTFLOAT_FAST_TABLE_H
+
+#include <cstdint>
+
+namespace fast_float {
+
+/**
+ * When mapping numbers from decimal to binary,
+ * we go from w * 10^q to m * 2^p but we have
+ * 10^q = 5^q * 2^q, so effectively
+ * we are trying to match
+ * w * 2^q * 5^q to m * 2^p. Thus the powers of two
+ * are not a concern since they can be represented
+ * exactly using the binary notation, only the powers of five
+ * affect the binary significand.
+ */
+
+/**
+ * The smallest non-zero float (binary64) is 2^-1074.
+ * We take as input numbers of the form w x 10^q where w < 2^64.
+ * We have that w * 10^-343  <  2^(64-344) 5^-343 < 2^-1076.
+ * However, we have that
+ * (2^64-1) * 10^-342 =  (2^64-1) * 2^-342 * 5^-342 > 2^-1074.
+ * Thus it is possible for a number of the form w * 10^-342 where
+ * w is a 64-bit value to be a non-zero floating-point number.
+ *********
+ * Any number of form w * 10^309 where w>= 1 is going to be
+ * infinite in binary64 so we never need to worry about powers
+ * of 5 greater than 308.
+ */
+template <class unused = void> struct powers_template {
+
+  constexpr static int smallest_power_of_five =
+      binary_format<double>::smallest_power_of_ten();
+  constexpr static int largest_power_of_five =
+      binary_format<double>::largest_power_of_ten();
+  constexpr static int number_of_entries =
+      2 * (largest_power_of_five - smallest_power_of_five + 1);
+  // Powers of five from 5^-342 all the way to 5^308 rounded toward one.
+  constexpr static uint64_t power_of_five_128[number_of_entries] = {
+      0xeef453d6923bd65a, 0x113faa2906a13b3f,
+      0x9558b4661b6565f8, 0x4ac7ca59a424c507,
+      0xbaaee17fa23ebf76, 0x5d79bcf00d2df649,
+      0xe95a99df8ace6f53, 0xf4d82c2c107973dc,
+      0x91d8a02bb6c10594, 0x79071b9b8a4be869,
+      0xb64ec836a47146f9, 0x9748e2826cdee284,
+      0xe3e27a444d8d98b7, 0xfd1b1b2308169b25,
+      0x8e6d8c6ab0787f72, 0xfe30f0f5e50e20f7,
+      0xb208ef855c969f4f, 0xbdbd2d335e51a935,
+      0xde8b2b66b3bc4723, 0xad2c788035e61382,
+      0x8b16fb203055ac76, 0x4c3bcb5021afcc31,
+      0xaddcb9e83c6b1793, 0xdf4abe242a1bbf3d,
+      0xd953e8624b85dd78, 0xd71d6dad34a2af0d,
+      0x87d4713d6f33aa6b, 0x8672648c40e5ad68,
+      0xa9c98d8ccb009506, 0x680efdaf511f18c2,
+      0xd43bf0effdc0ba48, 0x212bd1b2566def2,
+      0x84a57695fe98746d, 0x14bb630f7604b57,
+      0xa5ced43b7e3e9188, 0x419ea3bd35385e2d,
+      0xcf42894a5dce35ea, 0x52064cac828675b9,
+      0x818995ce7aa0e1b2, 0x7343efebd1940993,
+      0xa1ebfb4219491a1f, 0x1014ebe6c5f90bf8,
+      0xca66fa129f9b60a6, 0xd41a26e077774ef6,
+      0xfd00b897478238d0, 0x8920b098955522b4,
+      0x9e20735e8cb16382, 0x55b46e5f5d5535b0,
+      0xc5a890362fddbc62, 0xeb2189f734aa831d,
+      0xf712b443bbd52b7b, 0xa5e9ec7501d523e4,
+      0x9a6bb0aa55653b2d, 0x47b233c92125366e,
+      0xc1069cd4eabe89f8, 0x999ec0bb696e840a,
+      0xf148440a256e2c76, 0xc00670ea43ca250d,
+      0x96cd2a865764dbca, 0x380406926a5e5728,
+      0xbc807527ed3e12bc, 0xc605083704f5ecf2,
+      0xeba09271e88d976b, 0xf7864a44c633682e,
+      0x93445b8731587ea3, 0x7ab3ee6afbe0211d,
+      0xb8157268fdae9e4c, 0x5960ea05bad82964,
+      0xe61acf033d1a45df, 0x6fb92487298e33bd,
+      0x8fd0c16206306bab, 0xa5d3b6d479f8e056,
+      0xb3c4f1ba87bc8696, 0x8f48a4899877186c,
+      0xe0b62e2929aba83c, 0x331acdabfe94de87,
+      0x8c71dcd9ba0b4925, 0x9ff0c08b7f1d0b14,
+      0xaf8e5410288e1b6f, 0x7ecf0ae5ee44dd9,
+      0xdb71e91432b1a24a, 0xc9e82cd9f69d6150,
+      0x892731ac9faf056e, 0xbe311c083a225cd2,
+      0xab70fe17c79ac6ca, 0x6dbd630a48aaf406,
+      0xd64d3d9db981787d, 0x92cbbccdad5b108,
+      0x85f0468293f0eb4e, 0x25bbf56008c58ea5,
+      0xa76c582338ed2621, 0xaf2af2b80af6f24e,
+      0xd1476e2c07286faa, 0x1af5af660db4aee1,
+      0x82cca4db847945ca, 0x50d98d9fc890ed4d,
+      0xa37fce126597973c, 0xe50ff107bab528a0,
+      0xcc5fc196fefd7d0c, 0x1e53ed49a96272c8,
+      0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7a,
+      0x9faacf3df73609b1, 0x77b191618c54e9ac,
+      0xc795830d75038c1d, 0xd59df5b9ef6a2417,
+      0xf97ae3d0d2446f25, 0x4b0573286b44ad1d,
+      0x9becce62836ac577, 0x4ee367f9430aec32,
+      0xc2e801fb244576d5, 0x229c41f793cda73f,
+      0xf3a20279ed56d48a, 0x6b43527578c1110f,
+      0x9845418c345644d6, 0x830a13896b78aaa9,
+      0xbe5691ef416bd60c, 0x23cc986bc656d553,
+      0xedec366b11c6cb8f, 0x2cbfbe86b7ec8aa8,
+      0x94b3a202eb1c3f39, 0x7bf7d71432f3d6a9,
+      0xb9e08a83a5e34f07, 0xdaf5ccd93fb0cc53,
+      0xe858ad248f5c22c9, 0xd1b3400f8f9cff68,
+      0x91376c36d99995be, 0x23100809b9c21fa1,
+      0xb58547448ffffb2d, 0xabd40a0c2832a78a,
+      0xe2e69915b3fff9f9, 0x16c90c8f323f516c,
+      0x8dd01fad907ffc3b, 0xae3da7d97f6792e3,
+      0xb1442798f49ffb4a, 0x99cd11cfdf41779c,
+      0xdd95317f31c7fa1d, 0x40405643d711d583,
+      0x8a7d3eef7f1cfc52, 0x482835ea666b2572,
+      0xad1c8eab5ee43b66, 0xda3243650005eecf,
+      0xd863b256369d4a40, 0x90bed43e40076a82,
+      0x873e4f75e2224e68, 0x5a7744a6e804a291,
+      0xa90de3535aaae202, 0x711515d0a205cb36,
+      0xd3515c2831559a83, 0xd5a5b44ca873e03,
+      0x8412d9991ed58091, 0xe858790afe9486c2,
+      0xa5178fff668ae0b6, 0x626e974dbe39a872,
+      0xce5d73ff402d98e3, 0xfb0a3d212dc8128f,
+      0x80fa687f881c7f8e, 0x7ce66634bc9d0b99,
+      0xa139029f6a239f72, 0x1c1fffc1ebc44e80,
+      0xc987434744ac874e, 0xa327ffb266b56220,
+      0xfbe9141915d7a922, 0x4bf1ff9f0062baa8,
+      0x9d71ac8fada6c9b5, 0x6f773fc3603db4a9,
+      0xc4ce17b399107c22, 0xcb550fb4384d21d3,
+      0xf6019da07f549b2b, 0x7e2a53a146606a48,
+      0x99c102844f94e0fb, 0x2eda7444cbfc426d,
+      0xc0314325637a1939, 0xfa911155fefb5308,
+      0xf03d93eebc589f88, 0x793555ab7eba27ca,
+      0x96267c7535b763b5, 0x4bc1558b2f3458de,
+      0xbbb01b9283253ca2, 0x9eb1aaedfb016f16,
+      0xea9c227723ee8bcb, 0x465e15a979c1cadc,
+      0x92a1958a7675175f, 0xbfacd89ec191ec9,
+      0xb749faed14125d36, 0xcef980ec671f667b,
+      0xe51c79a85916f484, 0x82b7e12780e7401a,
+      0x8f31cc0937ae58d2, 0xd1b2ecb8b0908810,
+      0xb2fe3f0b8599ef07, 0x861fa7e6dcb4aa15,
+      0xdfbdcece67006ac9, 0x67a791e093e1d49a,
+      0x8bd6a141006042bd, 0xe0c8bb2c5c6d24e0,
+      0xaecc49914078536d, 0x58fae9f773886e18,
+      0xda7f5bf590966848, 0xaf39a475506a899e,
+      0x888f99797a5e012d, 0x6d8406c952429603,
+      0xaab37fd7d8f58178, 0xc8e5087ba6d33b83,
+      0xd5605fcdcf32e1d6, 0xfb1e4a9a90880a64,
+      0x855c3be0a17fcd26, 0x5cf2eea09a55067f,
+      0xa6b34ad8c9dfc06f, 0xf42faa48c0ea481e,
+      0xd0601d8efc57b08b, 0xf13b94daf124da26,
+      0x823c12795db6ce57, 0x76c53d08d6b70858,
+      0xa2cb1717b52481ed, 0x54768c4b0c64ca6e,
+      0xcb7ddcdda26da268, 0xa9942f5dcf7dfd09,
+      0xfe5d54150b090b02, 0xd3f93b35435d7c4c,
+      0x9efa548d26e5a6e1, 0xc47bc5014a1a6daf,
+      0xc6b8e9b0709f109a, 0x359ab6419ca1091b,
+      0xf867241c8cc6d4c0, 0xc30163d203c94b62,
+      0x9b407691d7fc44f8, 0x79e0de63425dcf1d,
+      0xc21094364dfb5636, 0x985915fc12f542e4,
+      0xf294b943e17a2bc4, 0x3e6f5b7b17b2939d,
+      0x979cf3ca6cec5b5a, 0xa705992ceecf9c42,
+      0xbd8430bd08277231, 0x50c6ff782a838353,
+      0xece53cec4a314ebd, 0xa4f8bf5635246428,
+      0x940f4613ae5ed136, 0x871b7795e136be99,
+      0xb913179899f68584, 0x28e2557b59846e3f,
+      0xe757dd7ec07426e5, 0x331aeada2fe589cf,
+      0x9096ea6f3848984f, 0x3ff0d2c85def7621,
+      0xb4bca50b065abe63, 0xfed077a756b53a9,
+      0xe1ebce4dc7f16dfb, 0xd3e8495912c62894,
+      0x8d3360f09cf6e4bd, 0x64712dd7abbbd95c,
+      0xb080392cc4349dec, 0xbd8d794d96aacfb3,
+      0xdca04777f541c567, 0xecf0d7a0fc5583a0,
+      0x89e42caaf9491b60, 0xf41686c49db57244,
+      0xac5d37d5b79b6239, 0x311c2875c522ced5,
+      0xd77485cb25823ac7, 0x7d633293366b828b,
+      0x86a8d39ef77164bc, 0xae5dff9c02033197,
+      0xa8530886b54dbdeb, 0xd9f57f830283fdfc,
+      0xd267caa862a12d66, 0xd072df63c324fd7b,
+      0x8380dea93da4bc60, 0x4247cb9e59f71e6d,
+      0xa46116538d0deb78, 0x52d9be85f074e608,
+      0xcd795be870516656, 0x67902e276c921f8b,
+      0x806bd9714632dff6, 0xba1cd8a3db53b6,
+      0xa086cfcd97bf97f3, 0x80e8a40eccd228a4,
+      0xc8a883c0fdaf7df0, 0x6122cd128006b2cd,
+      0xfad2a4b13d1b5d6c, 0x796b805720085f81,
+      0x9cc3a6eec6311a63, 0xcbe3303674053bb0,
+      0xc3f490aa77bd60fc, 0xbedbfc4411068a9c,
+      0xf4f1b4d515acb93b, 0xee92fb5515482d44,
+      0x991711052d8bf3c5, 0x751bdd152d4d1c4a,
+      0xbf5cd54678eef0b6, 0xd262d45a78a0635d,
+      0xef340a98172aace4, 0x86fb897116c87c34,
+      0x9580869f0e7aac0e, 0xd45d35e6ae3d4da0,
+      0xbae0a846d2195712, 0x8974836059cca109,
+      0xe998d258869facd7, 0x2bd1a438703fc94b,
+      0x91ff83775423cc06, 0x7b6306a34627ddcf,
+      0xb67f6455292cbf08, 0x1a3bc84c17b1d542,
+      0xe41f3d6a7377eeca, 0x20caba5f1d9e4a93,
+      0x8e938662882af53e, 0x547eb47b7282ee9c,
+      0xb23867fb2a35b28d, 0xe99e619a4f23aa43,
+      0xdec681f9f4c31f31, 0x6405fa00e2ec94d4,
+      0x8b3c113c38f9f37e, 0xde83bc408dd3dd04,
+      0xae0b158b4738705e, 0x9624ab50b148d445,
+      0xd98ddaee19068c76, 0x3badd624dd9b0957,
+      0x87f8a8d4cfa417c9, 0xe54ca5d70a80e5d6,
+      0xa9f6d30a038d1dbc, 0x5e9fcf4ccd211f4c,
+      0xd47487cc8470652b, 0x7647c3200069671f,
+      0x84c8d4dfd2c63f3b, 0x29ecd9f40041e073,
+      0xa5fb0a17c777cf09, 0xf468107100525890,
+      0xcf79cc9db955c2cc, 0x7182148d4066eeb4,
+      0x81ac1fe293d599bf, 0xc6f14cd848405530,
+      0xa21727db38cb002f, 0xb8ada00e5a506a7c,
+      0xca9cf1d206fdc03b, 0xa6d90811f0e4851c,
+      0xfd442e4688bd304a, 0x908f4a166d1da663,
+      0x9e4a9cec15763e2e, 0x9a598e4e043287fe,
+      0xc5dd44271ad3cdba, 0x40eff1e1853f29fd,
+      0xf7549530e188c128, 0xd12bee59e68ef47c,
+      0x9a94dd3e8cf578b9, 0x82bb74f8301958ce,
+      0xc13a148e3032d6e7, 0xe36a52363c1faf01,
+      0xf18899b1bc3f8ca1, 0xdc44e6c3cb279ac1,
+      0x96f5600f15a7b7e5, 0x29ab103a5ef8c0b9,
+      0xbcb2b812db11a5de, 0x7415d448f6b6f0e7,
+      0xebdf661791d60f56, 0x111b495b3464ad21,
+      0x936b9fcebb25c995, 0xcab10dd900beec34,
+      0xb84687c269ef3bfb, 0x3d5d514f40eea742,
+      0xe65829b3046b0afa, 0xcb4a5a3112a5112,
+      0x8ff71a0fe2c2e6dc, 0x47f0e785eaba72ab,
+      0xb3f4e093db73a093, 0x59ed216765690f56,
+      0xe0f218b8d25088b8, 0x306869c13ec3532c,
+      0x8c974f7383725573, 0x1e414218c73a13fb,
+      0xafbd2350644eeacf, 0xe5d1929ef90898fa,
+      0xdbac6c247d62a583, 0xdf45f746b74abf39,
+      0x894bc396ce5da772, 0x6b8bba8c328eb783,
+      0xab9eb47c81f5114f, 0x66ea92f3f326564,
+      0xd686619ba27255a2, 0xc80a537b0efefebd,
+      0x8613fd0145877585, 0xbd06742ce95f5f36,
+      0xa798fc4196e952e7, 0x2c48113823b73704,
+      0xd17f3b51fca3a7a0, 0xf75a15862ca504c5,
+      0x82ef85133de648c4, 0x9a984d73dbe722fb,
+      0xa3ab66580d5fdaf5, 0xc13e60d0d2e0ebba,
+      0xcc963fee10b7d1b3, 0x318df905079926a8,
+      0xffbbcfe994e5c61f, 0xfdf17746497f7052,
+      0x9fd561f1fd0f9bd3, 0xfeb6ea8bedefa633,
+      0xc7caba6e7c5382c8, 0xfe64a52ee96b8fc0,
+      0xf9bd690a1b68637b, 0x3dfdce7aa3c673b0,
+      0x9c1661a651213e2d, 0x6bea10ca65c084e,
+      0xc31bfa0fe5698db8, 0x486e494fcff30a62,
+      0xf3e2f893dec3f126, 0x5a89dba3c3efccfa,
+      0x986ddb5c6b3a76b7, 0xf89629465a75e01c,
+      0xbe89523386091465, 0xf6bbb397f1135823,
+      0xee2ba6c0678b597f, 0x746aa07ded582e2c,
+      0x94db483840b717ef, 0xa8c2a44eb4571cdc,
+      0xba121a4650e4ddeb, 0x92f34d62616ce413,
+      0xe896a0d7e51e1566, 0x77b020baf9c81d17,
+      0x915e2486ef32cd60, 0xace1474dc1d122e,
+      0xb5b5ada8aaff80b8, 0xd819992132456ba,
+      0xe3231912d5bf60e6, 0x10e1fff697ed6c69,
+      0x8df5efabc5979c8f, 0xca8d3ffa1ef463c1,
+      0xb1736b96b6fd83b3, 0xbd308ff8a6b17cb2,
+      0xddd0467c64bce4a0, 0xac7cb3f6d05ddbde,
+      0x8aa22c0dbef60ee4, 0x6bcdf07a423aa96b,
+      0xad4ab7112eb3929d, 0x86c16c98d2c953c6,
+      0xd89d64d57a607744, 0xe871c7bf077ba8b7,
+      0x87625f056c7c4a8b, 0x11471cd764ad4972,
+      0xa93af6c6c79b5d2d, 0xd598e40d3dd89bcf,
+      0xd389b47879823479, 0x4aff1d108d4ec2c3,
+      0x843610cb4bf160cb, 0xcedf722a585139ba,
+      0xa54394fe1eedb8fe, 0xc2974eb4ee658828,
+      0xce947a3da6a9273e, 0x733d226229feea32,
+      0x811ccc668829b887, 0x806357d5a3f525f,
+      0xa163ff802a3426a8, 0xca07c2dcb0cf26f7,
+      0xc9bcff6034c13052, 0xfc89b393dd02f0b5,
+      0xfc2c3f3841f17c67, 0xbbac2078d443ace2,
+      0x9d9ba7832936edc0, 0xd54b944b84aa4c0d,
+      0xc5029163f384a931, 0xa9e795e65d4df11,
+      0xf64335bcf065d37d, 0x4d4617b5ff4a16d5,
+      0x99ea0196163fa42e, 0x504bced1bf8e4e45,
+      0xc06481fb9bcf8d39, 0xe45ec2862f71e1d6,
+      0xf07da27a82c37088, 0x5d767327bb4e5a4c,
+      0x964e858c91ba2655, 0x3a6a07f8d510f86f,
+      0xbbe226efb628afea, 0x890489f70a55368b,
+      0xeadab0aba3b2dbe5, 0x2b45ac74ccea842e,
+      0x92c8ae6b464fc96f, 0x3b0b8bc90012929d,
+      0xb77ada0617e3bbcb, 0x9ce6ebb40173744,
+      0xe55990879ddcaabd, 0xcc420a6a101d0515,
+      0x8f57fa54c2a9eab6, 0x9fa946824a12232d,
+      0xb32df8e9f3546564, 0x47939822dc96abf9,
+      0xdff9772470297ebd, 0x59787e2b93bc56f7,
+      0x8bfbea76c619ef36, 0x57eb4edb3c55b65a,
+      0xaefae51477a06b03, 0xede622920b6b23f1,
+      0xdab99e59958885c4, 0xe95fab368e45eced,
+      0x88b402f7fd75539b, 0x11dbcb0218ebb414,
+      0xaae103b5fcd2a881, 0xd652bdc29f26a119,
+      0xd59944a37c0752a2, 0x4be76d3346f0495f,
+      0x857fcae62d8493a5, 0x6f70a4400c562ddb,
+      0xa6dfbd9fb8e5b88e, 0xcb4ccd500f6bb952,
+      0xd097ad07a71f26b2, 0x7e2000a41346a7a7,
+      0x825ecc24c873782f, 0x8ed400668c0c28c8,
+      0xa2f67f2dfa90563b, 0x728900802f0f32fa,
+      0xcbb41ef979346bca, 0x4f2b40a03ad2ffb9,
+      0xfea126b7d78186bc, 0xe2f610c84987bfa8,
+      0x9f24b832e6b0f436, 0xdd9ca7d2df4d7c9,
+      0xc6ede63fa05d3143, 0x91503d1c79720dbb,
+      0xf8a95fcf88747d94, 0x75a44c6397ce912a,
+      0x9b69dbe1b548ce7c, 0xc986afbe3ee11aba,
+      0xc24452da229b021b, 0xfbe85badce996168,
+      0xf2d56790ab41c2a2, 0xfae27299423fb9c3,
+      0x97c560ba6b0919a5, 0xdccd879fc967d41a,
+      0xbdb6b8e905cb600f, 0x5400e987bbc1c920,
+      0xed246723473e3813, 0x290123e9aab23b68,
+      0x9436c0760c86e30b, 0xf9a0b6720aaf6521,
+      0xb94470938fa89bce, 0xf808e40e8d5b3e69,
+      0xe7958cb87392c2c2, 0xb60b1d1230b20e04,
+      0x90bd77f3483bb9b9, 0xb1c6f22b5e6f48c2,
+      0xb4ecd5f01a4aa828, 0x1e38aeb6360b1af3,
+      0xe2280b6c20dd5232, 0x25c6da63c38de1b0,
+      0x8d590723948a535f, 0x579c487e5a38ad0e,
+      0xb0af48ec79ace837, 0x2d835a9df0c6d851,
+      0xdcdb1b2798182244, 0xf8e431456cf88e65,
+      0x8a08f0f8bf0f156b, 0x1b8e9ecb641b58ff,
+      0xac8b2d36eed2dac5, 0xe272467e3d222f3f,
+      0xd7adf884aa879177, 0x5b0ed81dcc6abb0f,
+      0x86ccbb52ea94baea, 0x98e947129fc2b4e9,
+      0xa87fea27a539e9a5, 0x3f2398d747b36224,
+      0xd29fe4b18e88640e, 0x8eec7f0d19a03aad,
+      0x83a3eeeef9153e89, 0x1953cf68300424ac,
+      0xa48ceaaab75a8e2b, 0x5fa8c3423c052dd7,
+      0xcdb02555653131b6, 0x3792f412cb06794d,
+      0x808e17555f3ebf11, 0xe2bbd88bbee40bd0,
+      0xa0b19d2ab70e6ed6, 0x5b6aceaeae9d0ec4,
+      0xc8de047564d20a8b, 0xf245825a5a445275,
+      0xfb158592be068d2e, 0xeed6e2f0f0d56712,
+      0x9ced737bb6c4183d, 0x55464dd69685606b,
+      0xc428d05aa4751e4c, 0xaa97e14c3c26b886,
+      0xf53304714d9265df, 0xd53dd99f4b3066a8,
+      0x993fe2c6d07b7fab, 0xe546a8038efe4029,
+      0xbf8fdb78849a5f96, 0xde98520472bdd033,
+      0xef73d256a5c0f77c, 0x963e66858f6d4440,
+      0x95a8637627989aad, 0xdde7001379a44aa8,
+      0xbb127c53b17ec159, 0x5560c018580d5d52,
+      0xe9d71b689dde71af, 0xaab8f01e6e10b4a6,
+      0x9226712162ab070d, 0xcab3961304ca70e8,
+      0xb6b00d69bb55c8d1, 0x3d607b97c5fd0d22,
+      0xe45c10c42a2b3b05, 0x8cb89a7db77c506a,
+      0x8eb98a7a9a5b04e3, 0x77f3608e92adb242,
+      0xb267ed1940f1c61c, 0x55f038b237591ed3,
+      0xdf01e85f912e37a3, 0x6b6c46dec52f6688,
+      0x8b61313bbabce2c6, 0x2323ac4b3b3da015,
+      0xae397d8aa96c1b77, 0xabec975e0a0d081a,
+      0xd9c7dced53c72255, 0x96e7bd358c904a21,
+      0x881cea14545c7575, 0x7e50d64177da2e54,
+      0xaa242499697392d2, 0xdde50bd1d5d0b9e9,
+      0xd4ad2dbfc3d07787, 0x955e4ec64b44e864,
+      0x84ec3c97da624ab4, 0xbd5af13bef0b113e,
+      0xa6274bbdd0fadd61, 0xecb1ad8aeacdd58e,
+      0xcfb11ead453994ba, 0x67de18eda5814af2,
+      0x81ceb32c4b43fcf4, 0x80eacf948770ced7,
+      0xa2425ff75e14fc31, 0xa1258379a94d028d,
+      0xcad2f7f5359a3b3e, 0x96ee45813a04330,
+      0xfd87b5f28300ca0d, 0x8bca9d6e188853fc,
+      0x9e74d1b791e07e48, 0x775ea264cf55347e,
+      0xc612062576589dda, 0x95364afe032a819e,
+      0xf79687aed3eec551, 0x3a83ddbd83f52205,
+      0x9abe14cd44753b52, 0xc4926a9672793543,
+      0xc16d9a0095928a27, 0x75b7053c0f178294,
+      0xf1c90080baf72cb1, 0x5324c68b12dd6339,
+      0x971da05074da7bee, 0xd3f6fc16ebca5e04,
+      0xbce5086492111aea, 0x88f4bb1ca6bcf585,
+      0xec1e4a7db69561a5, 0x2b31e9e3d06c32e6,
+      0x9392ee8e921d5d07, 0x3aff322e62439fd0,
+      0xb877aa3236a4b449, 0x9befeb9fad487c3,
+      0xe69594bec44de15b, 0x4c2ebe687989a9b4,
+      0x901d7cf73ab0acd9, 0xf9d37014bf60a11,
+      0xb424dc35095cd80f, 0x538484c19ef38c95,
+      0xe12e13424bb40e13, 0x2865a5f206b06fba,
+      0x8cbccc096f5088cb, 0xf93f87b7442e45d4,
+      0xafebff0bcb24aafe, 0xf78f69a51539d749,
+      0xdbe6fecebdedd5be, 0xb573440e5a884d1c,
+      0x89705f4136b4a597, 0x31680a88f8953031,
+      0xabcc77118461cefc, 0xfdc20d2b36ba7c3e,
+      0xd6bf94d5e57a42bc, 0x3d32907604691b4d,
+      0x8637bd05af6c69b5, 0xa63f9a49c2c1b110,
+      0xa7c5ac471b478423, 0xfcf80dc33721d54,
+      0xd1b71758e219652b, 0xd3c36113404ea4a9,
+      0x83126e978d4fdf3b, 0x645a1cac083126ea,
+      0xa3d70a3d70a3d70a, 0x3d70a3d70a3d70a4,
+      0xcccccccccccccccc, 0xcccccccccccccccd,
+      0x8000000000000000, 0x0,
+      0xa000000000000000, 0x0,
+      0xc800000000000000, 0x0,
+      0xfa00000000000000, 0x0,
+      0x9c40000000000000, 0x0,
+      0xc350000000000000, 0x0,
+      0xf424000000000000, 0x0,
+      0x9896800000000000, 0x0,
+      0xbebc200000000000, 0x0,
+      0xee6b280000000000, 0x0,
+      0x9502f90000000000, 0x0,
+      0xba43b74000000000, 0x0,
+      0xe8d4a51000000000, 0x0,
+      0x9184e72a00000000, 0x0,
+      0xb5e620f480000000, 0x0,
+      0xe35fa931a0000000, 0x0,
+      0x8e1bc9bf04000000, 0x0,
+      0xb1a2bc2ec5000000, 0x0,
+      0xde0b6b3a76400000, 0x0,
+      0x8ac7230489e80000, 0x0,
+      0xad78ebc5ac620000, 0x0,
+      0xd8d726b7177a8000, 0x0,
+      0x878678326eac9000, 0x0,
+      0xa968163f0a57b400, 0x0,
+      0xd3c21bcecceda100, 0x0,
+      0x84595161401484a0, 0x0,
+      0xa56fa5b99019a5c8, 0x0,
+      0xcecb8f27f4200f3a, 0x0,
+      0x813f3978f8940984, 0x4000000000000000,
+      0xa18f07d736b90be5, 0x5000000000000000,
+      0xc9f2c9cd04674ede, 0xa400000000000000,
+      0xfc6f7c4045812296, 0x4d00000000000000,
+      0x9dc5ada82b70b59d, 0xf020000000000000,
+      0xc5371912364ce305, 0x6c28000000000000,
+      0xf684df56c3e01bc6, 0xc732000000000000,
+      0x9a130b963a6c115c, 0x3c7f400000000000,
+      0xc097ce7bc90715b3, 0x4b9f100000000000,
+      0xf0bdc21abb48db20, 0x1e86d40000000000,
+      0x96769950b50d88f4, 0x1314448000000000,
+      0xbc143fa4e250eb31, 0x17d955a000000000,
+      0xeb194f8e1ae525fd, 0x5dcfab0800000000,
+      0x92efd1b8d0cf37be, 0x5aa1cae500000000,
+      0xb7abc627050305ad, 0xf14a3d9e40000000,
+      0xe596b7b0c643c719, 0x6d9ccd05d0000000,
+      0x8f7e32ce7bea5c6f, 0xe4820023a2000000,
+      0xb35dbf821ae4f38b, 0xdda2802c8a800000,
+      0xe0352f62a19e306e, 0xd50b2037ad200000,
+      0x8c213d9da502de45, 0x4526f422cc340000,
+      0xaf298d050e4395d6, 0x9670b12b7f410000,
+      0xdaf3f04651d47b4c, 0x3c0cdd765f114000,
+      0x88d8762bf324cd0f, 0xa5880a69fb6ac800,
+      0xab0e93b6efee0053, 0x8eea0d047a457a00,
+      0xd5d238a4abe98068, 0x72a4904598d6d880,
+      0x85a36366eb71f041, 0x47a6da2b7f864750,
+      0xa70c3c40a64e6c51, 0x999090b65f67d924,
+      0xd0cf4b50cfe20765, 0xfff4b4e3f741cf6d,
+      0x82818f1281ed449f, 0xbff8f10e7a8921a4,
+      0xa321f2d7226895c7, 0xaff72d52192b6a0d,
+      0xcbea6f8ceb02bb39, 0x9bf4f8a69f764490,
+      0xfee50b7025c36a08, 0x2f236d04753d5b4,
+      0x9f4f2726179a2245, 0x1d762422c946590,
+      0xc722f0ef9d80aad6, 0x424d3ad2b7b97ef5,
+      0xf8ebad2b84e0d58b, 0xd2e0898765a7deb2,
+      0x9b934c3b330c8577, 0x63cc55f49f88eb2f,
+      0xc2781f49ffcfa6d5, 0x3cbf6b71c76b25fb,
+      0xf316271c7fc3908a, 0x8bef464e3945ef7a,
+      0x97edd871cfda3a56, 0x97758bf0e3cbb5ac,
+      0xbde94e8e43d0c8ec, 0x3d52eeed1cbea317,
+      0xed63a231d4c4fb27, 0x4ca7aaa863ee4bdd,
+      0x945e455f24fb1cf8, 0x8fe8caa93e74ef6a,
+      0xb975d6b6ee39e436, 0xb3e2fd538e122b44,
+      0xe7d34c64a9c85d44, 0x60dbbca87196b616,
+      0x90e40fbeea1d3a4a, 0xbc8955e946fe31cd,
+      0xb51d13aea4a488dd, 0x6babab6398bdbe41,
+      0xe264589a4dcdab14, 0xc696963c7eed2dd1,
+      0x8d7eb76070a08aec, 0xfc1e1de5cf543ca2,
+      0xb0de65388cc8ada8, 0x3b25a55f43294bcb,
+      0xdd15fe86affad912, 0x49ef0eb713f39ebe,
+      0x8a2dbf142dfcc7ab, 0x6e3569326c784337,
+      0xacb92ed9397bf996, 0x49c2c37f07965404,
+      0xd7e77a8f87daf7fb, 0xdc33745ec97be906,
+      0x86f0ac99b4e8dafd, 0x69a028bb3ded71a3,
+      0xa8acd7c0222311bc, 0xc40832ea0d68ce0c,
+      0xd2d80db02aabd62b, 0xf50a3fa490c30190,
+      0x83c7088e1aab65db, 0x792667c6da79e0fa,
+      0xa4b8cab1a1563f52, 0x577001b891185938,
+      0xcde6fd5e09abcf26, 0xed4c0226b55e6f86,
+      0x80b05e5ac60b6178, 0x544f8158315b05b4,
+      0xa0dc75f1778e39d6, 0x696361ae3db1c721,
+      0xc913936dd571c84c, 0x3bc3a19cd1e38e9,
+      0xfb5878494ace3a5f, 0x4ab48a04065c723,
+      0x9d174b2dcec0e47b, 0x62eb0d64283f9c76,
+      0xc45d1df942711d9a, 0x3ba5d0bd324f8394,
+      0xf5746577930d6500, 0xca8f44ec7ee36479,
+      0x9968bf6abbe85f20, 0x7e998b13cf4e1ecb,
+      0xbfc2ef456ae276e8, 0x9e3fedd8c321a67e,
+      0xefb3ab16c59b14a2, 0xc5cfe94ef3ea101e,
+      0x95d04aee3b80ece5, 0xbba1f1d158724a12,
+      0xbb445da9ca61281f, 0x2a8a6e45ae8edc97,
+      0xea1575143cf97226, 0xf52d09d71a3293bd,
+      0x924d692ca61be758, 0x593c2626705f9c56,
+      0xb6e0c377cfa2e12e, 0x6f8b2fb00c77836c,
+      0xe498f455c38b997a, 0xb6dfb9c0f956447,
+      0x8edf98b59a373fec, 0x4724bd4189bd5eac,
+      0xb2977ee300c50fe7, 0x58edec91ec2cb657,
+      0xdf3d5e9bc0f653e1, 0x2f2967b66737e3ed,
+      0x8b865b215899f46c, 0xbd79e0d20082ee74,
+      0xae67f1e9aec07187, 0xecd8590680a3aa11,
+      0xda01ee641a708de9, 0xe80e6f4820cc9495,
+      0x884134fe908658b2, 0x3109058d147fdcdd,
+      0xaa51823e34a7eede, 0xbd4b46f0599fd415,
+      0xd4e5e2cdc1d1ea96, 0x6c9e18ac7007c91a,
+      0x850fadc09923329e, 0x3e2cf6bc604ddb0,
+      0xa6539930bf6bff45, 0x84db8346b786151c,
+      0xcfe87f7cef46ff16, 0xe612641865679a63,
+      0x81f14fae158c5f6e, 0x4fcb7e8f3f60c07e,
+      0xa26da3999aef7749, 0xe3be5e330f38f09d,
+      0xcb090c8001ab551c, 0x5cadf5bfd3072cc5,
+      0xfdcb4fa002162a63, 0x73d9732fc7c8f7f6,
+      0x9e9f11c4014dda7e, 0x2867e7fddcdd9afa,
+      0xc646d63501a1511d, 0xb281e1fd541501b8,
+      0xf7d88bc24209a565, 0x1f225a7ca91a4226,
+      0x9ae757596946075f, 0x3375788de9b06958,
+      0xc1a12d2fc3978937, 0x52d6b1641c83ae,
+      0xf209787bb47d6b84, 0xc0678c5dbd23a49a,
+      0x9745eb4d50ce6332, 0xf840b7ba963646e0,
+      0xbd176620a501fbff, 0xb650e5a93bc3d898,
+      0xec5d3fa8ce427aff, 0xa3e51f138ab4cebe,
+      0x93ba47c980e98cdf, 0xc66f336c36b10137,
+      0xb8a8d9bbe123f017, 0xb80b0047445d4184,
+      0xe6d3102ad96cec1d, 0xa60dc059157491e5,
+      0x9043ea1ac7e41392, 0x87c89837ad68db2f,
+      0xb454e4a179dd1877, 0x29babe4598c311fb,
+      0xe16a1dc9d8545e94, 0xf4296dd6fef3d67a,
+      0x8ce2529e2734bb1d, 0x1899e4a65f58660c,
+      0xb01ae745b101e9e4, 0x5ec05dcff72e7f8f,
+      0xdc21a1171d42645d, 0x76707543f4fa1f73,
+      0x899504ae72497eba, 0x6a06494a791c53a8,
+      0xabfa45da0edbde69, 0x487db9d17636892,
+      0xd6f8d7509292d603, 0x45a9d2845d3c42b6,
+      0x865b86925b9bc5c2, 0xb8a2392ba45a9b2,
+      0xa7f26836f282b732, 0x8e6cac7768d7141e,
+      0xd1ef0244af2364ff, 0x3207d795430cd926,
+      0x8335616aed761f1f, 0x7f44e6bd49e807b8,
+      0xa402b9c5a8d3a6e7, 0x5f16206c9c6209a6,
+      0xcd036837130890a1, 0x36dba887c37a8c0f,
+      0x802221226be55a64, 0xc2494954da2c9789,
+      0xa02aa96b06deb0fd, 0xf2db9baa10b7bd6c,
+      0xc83553c5c8965d3d, 0x6f92829494e5acc7,
+      0xfa42a8b73abbf48c, 0xcb772339ba1f17f9,
+      0x9c69a97284b578d7, 0xff2a760414536efb,
+      0xc38413cf25e2d70d, 0xfef5138519684aba,
+      0xf46518c2ef5b8cd1, 0x7eb258665fc25d69,
+      0x98bf2f79d5993802, 0xef2f773ffbd97a61,
+      0xbeeefb584aff8603, 0xaafb550ffacfd8fa,
+      0xeeaaba2e5dbf6784, 0x95ba2a53f983cf38,
+      0x952ab45cfa97a0b2, 0xdd945a747bf26183,
+      0xba756174393d88df, 0x94f971119aeef9e4,
+      0xe912b9d1478ceb17, 0x7a37cd5601aab85d,
+      0x91abb422ccb812ee, 0xac62e055c10ab33a,
+      0xb616a12b7fe617aa, 0x577b986b314d6009,
+      0xe39c49765fdf9d94, 0xed5a7e85fda0b80b,
+      0x8e41ade9fbebc27d, 0x14588f13be847307,
+      0xb1d219647ae6b31c, 0x596eb2d8ae258fc8,
+      0xde469fbd99a05fe3, 0x6fca5f8ed9aef3bb,
+      0x8aec23d680043bee, 0x25de7bb9480d5854,
+      0xada72ccc20054ae9, 0xaf561aa79a10ae6a,
+      0xd910f7ff28069da4, 0x1b2ba1518094da04,
+      0x87aa9aff79042286, 0x90fb44d2f05d0842,
+      0xa99541bf57452b28, 0x353a1607ac744a53,
+      0xd3fa922f2d1675f2, 0x42889b8997915ce8,
+      0x847c9b5d7c2e09b7, 0x69956135febada11,
+      0xa59bc234db398c25, 0x43fab9837e699095,
+      0xcf02b2c21207ef2e, 0x94f967e45e03f4bb,
+      0x8161afb94b44f57d, 0x1d1be0eebac278f5,
+      0xa1ba1ba79e1632dc, 0x6462d92a69731732,
+      0xca28a291859bbf93, 0x7d7b8f7503cfdcfe,
+      0xfcb2cb35e702af78, 0x5cda735244c3d43e,
+      0x9defbf01b061adab, 0x3a0888136afa64a7,
+      0xc56baec21c7a1916, 0x88aaa1845b8fdd0,
+      0xf6c69a72a3989f5b, 0x8aad549e57273d45,
+      0x9a3c2087a63f6399, 0x36ac54e2f678864b,
+      0xc0cb28a98fcf3c7f, 0x84576a1bb416a7dd,
+      0xf0fdf2d3f3c30b9f, 0x656d44a2a11c51d5,
+      0x969eb7c47859e743, 0x9f644ae5a4b1b325,
+      0xbc4665b596706114, 0x873d5d9f0dde1fee,
+      0xeb57ff22fc0c7959, 0xa90cb506d155a7ea,
+      0x9316ff75dd87cbd8, 0x9a7f12442d588f2,
+      0xb7dcbf5354e9bece, 0xc11ed6d538aeb2f,
+      0xe5d3ef282a242e81, 0x8f1668c8a86da5fa,
+      0x8fa475791a569d10, 0xf96e017d694487bc,
+      0xb38d92d760ec4455, 0x37c981dcc395a9ac,
+      0xe070f78d3927556a, 0x85bbe253f47b1417,
+      0x8c469ab843b89562, 0x93956d7478ccec8e,
+      0xaf58416654a6babb, 0x387ac8d1970027b2,
+      0xdb2e51bfe9d0696a, 0x6997b05fcc0319e,
+      0x88fcf317f22241e2, 0x441fece3bdf81f03,
+      0xab3c2fddeeaad25a, 0xd527e81cad7626c3,
+      0xd60b3bd56a5586f1, 0x8a71e223d8d3b074,
+      0x85c7056562757456, 0xf6872d5667844e49,
+      0xa738c6bebb12d16c, 0xb428f8ac016561db,
+      0xd106f86e69d785c7, 0xe13336d701beba52,
+      0x82a45b450226b39c, 0xecc0024661173473,
+      0xa34d721642b06084, 0x27f002d7f95d0190,
+      0xcc20ce9bd35c78a5, 0x31ec038df7b441f4,
+      0xff290242c83396ce, 0x7e67047175a15271,
+      0x9f79a169bd203e41, 0xf0062c6e984d386,
+      0xc75809c42c684dd1, 0x52c07b78a3e60868,
+      0xf92e0c3537826145, 0xa7709a56ccdf8a82,
+      0x9bbcc7a142b17ccb, 0x88a66076400bb691,
+      0xc2abf989935ddbfe, 0x6acff893d00ea435,
+      0xf356f7ebf83552fe, 0x583f6b8c4124d43,
+      0x98165af37b2153de, 0xc3727a337a8b704a,
+      0xbe1bf1b059e9a8d6, 0x744f18c0592e4c5c,
+      0xeda2ee1c7064130c, 0x1162def06f79df73,
+      0x9485d4d1c63e8be7, 0x8addcb5645ac2ba8,
+      0xb9a74a0637ce2ee1, 0x6d953e2bd7173692,
+      0xe8111c87c5c1ba99, 0xc8fa8db6ccdd0437,
+      0x910ab1d4db9914a0, 0x1d9c9892400a22a2,
+      0xb54d5e4a127f59c8, 0x2503beb6d00cab4b,
+      0xe2a0b5dc971f303a, 0x2e44ae64840fd61d,
+      0x8da471a9de737e24, 0x5ceaecfed289e5d2,
+      0xb10d8e1456105dad, 0x7425a83e872c5f47,
+      0xdd50f1996b947518, 0xd12f124e28f77719,
+      0x8a5296ffe33cc92f, 0x82bd6b70d99aaa6f,
+      0xace73cbfdc0bfb7b, 0x636cc64d1001550b,
+      0xd8210befd30efa5a, 0x3c47f7e05401aa4e,
+      0x8714a775e3e95c78, 0x65acfaec34810a71,
+      0xa8d9d1535ce3b396, 0x7f1839a741a14d0d,
+      0xd31045a8341ca07c, 0x1ede48111209a050,
+      0x83ea2b892091e44d, 0x934aed0aab460432,
+      0xa4e4b66b68b65d60, 0xf81da84d5617853f,
+      0xce1de40642e3f4b9, 0x36251260ab9d668e,
+      0x80d2ae83e9ce78f3, 0xc1d72b7c6b426019,
+      0xa1075a24e4421730, 0xb24cf65b8612f81f,
+      0xc94930ae1d529cfc, 0xdee033f26797b627,
+      0xfb9b7cd9a4a7443c, 0x169840ef017da3b1,
+      0x9d412e0806e88aa5, 0x8e1f289560ee864e,
+      0xc491798a08a2ad4e, 0xf1a6f2bab92a27e2,
+      0xf5b5d7ec8acb58a2, 0xae10af696774b1db,
+      0x9991a6f3d6bf1765, 0xacca6da1e0a8ef29,
+      0xbff610b0cc6edd3f, 0x17fd090a58d32af3,
+      0xeff394dcff8a948e, 0xddfc4b4cef07f5b0,
+      0x95f83d0a1fb69cd9, 0x4abdaf101564f98e,
+      0xbb764c4ca7a4440f, 0x9d6d1ad41abe37f1,
+      0xea53df5fd18d5513, 0x84c86189216dc5ed,
+      0x92746b9be2f8552c, 0x32fd3cf5b4e49bb4,
+      0xb7118682dbb66a77, 0x3fbc8c33221dc2a1,
+      0xe4d5e82392a40515, 0xfabaf3feaa5334a,
+      0x8f05b1163ba6832d, 0x29cb4d87f2a7400e,
+      0xb2c71d5bca9023f8, 0x743e20e9ef511012,
+      0xdf78e4b2bd342cf6, 0x914da9246b255416,
+      0x8bab8eefb6409c1a, 0x1ad089b6c2f7548e,
+      0xae9672aba3d0c320, 0xa184ac2473b529b1,
+      0xda3c0f568cc4f3e8, 0xc9e5d72d90a2741e,
+      0x8865899617fb1871, 0x7e2fa67c7a658892,
+      0xaa7eebfb9df9de8d, 0xddbb901b98feeab7,
+      0xd51ea6fa85785631, 0x552a74227f3ea565,
+      0x8533285c936b35de, 0xd53a88958f87275f,
+      0xa67ff273b8460356, 0x8a892abaf368f137,
+      0xd01fef10a657842c, 0x2d2b7569b0432d85,
+      0x8213f56a67f6b29b, 0x9c3b29620e29fc73,
+      0xa298f2c501f45f42, 0x8349f3ba91b47b8f,
+      0xcb3f2f7642717713, 0x241c70a936219a73,
+      0xfe0efb53d30dd4d7, 0xed238cd383aa0110,
+      0x9ec95d1463e8a506, 0xf4363804324a40aa,
+      0xc67bb4597ce2ce48, 0xb143c6053edcd0d5,
+      0xf81aa16fdc1b81da, 0xdd94b7868e94050a,
+      0x9b10a4e5e9913128, 0xca7cf2b4191c8326,
+      0xc1d4ce1f63f57d72, 0xfd1c2f611f63a3f0,
+      0xf24a01a73cf2dccf, 0xbc633b39673c8cec,
+      0x976e41088617ca01, 0xd5be0503e085d813,
+      0xbd49d14aa79dbc82, 0x4b2d8644d8a74e18,
+      0xec9c459d51852ba2, 0xddf8e7d60ed1219e,
+      0x93e1ab8252f33b45, 0xcabb90e5c942b503,
+      0xb8da1662e7b00a17, 0x3d6a751f3b936243,
+      0xe7109bfba19c0c9d, 0xcc512670a783ad4,
+      0x906a617d450187e2, 0x27fb2b80668b24c5,
+      0xb484f9dc9641e9da, 0xb1f9f660802dedf6,
+      0xe1a63853bbd26451, 0x5e7873f8a0396973,
+      0x8d07e33455637eb2, 0xdb0b487b6423e1e8,
+      0xb049dc016abc5e5f, 0x91ce1a9a3d2cda62,
+      0xdc5c5301c56b75f7, 0x7641a140cc7810fb,
+      0x89b9b3e11b6329ba, 0xa9e904c87fcb0a9d,
+      0xac2820d9623bf429, 0x546345fa9fbdcd44,
+      0xd732290fbacaf133, 0xa97c177947ad4095,
+      0x867f59a9d4bed6c0, 0x49ed8eabcccc485d,
+      0xa81f301449ee8c70, 0x5c68f256bfff5a74,
+      0xd226fc195c6a2f8c, 0x73832eec6fff3111,
+      0x83585d8fd9c25db7, 0xc831fd53c5ff7eab,
+      0xa42e74f3d032f525, 0xba3e7ca8b77f5e55,
+      0xcd3a1230c43fb26f, 0x28ce1bd2e55f35eb,
+      0x80444b5e7aa7cf85, 0x7980d163cf5b81b3,
+      0xa0555e361951c366, 0xd7e105bcc332621f,
+      0xc86ab5c39fa63440, 0x8dd9472bf3fefaa7,
+      0xfa856334878fc150, 0xb14f98f6f0feb951,
+      0x9c935e00d4b9d8d2, 0x6ed1bf9a569f33d3,
+      0xc3b8358109e84f07, 0xa862f80ec4700c8,
+      0xf4a642e14c6262c8, 0xcd27bb612758c0fa,
+      0x98e7e9cccfbd7dbd, 0x8038d51cb897789c,
+      0xbf21e44003acdd2c, 0xe0470a63e6bd56c3,
+      0xeeea5d5004981478, 0x1858ccfce06cac74,
+      0x95527a5202df0ccb, 0xf37801e0c43ebc8,
+      0xbaa718e68396cffd, 0xd30560258f54e6ba,
+      0xe950df20247c83fd, 0x47c6b82ef32a2069,
+      0x91d28b7416cdd27e, 0x4cdc331d57fa5441,
+      0xb6472e511c81471d, 0xe0133fe4adf8e952,
+      0xe3d8f9e563a198e5, 0x58180fddd97723a6,
+      0x8e679c2f5e44ff8f, 0x570f09eaa7ea7648,
+  };
+};
+
+#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE
+
+template <class unused>
+constexpr uint64_t
+    powers_template<unused>::power_of_five_128[number_of_entries];
+
+#endif
+
+using powers = powers_template<>;
+
+} // namespace fast_float
+
+#endif
+
+#ifndef FASTFLOAT_DECIMAL_TO_BINARY_H
+#define FASTFLOAT_DECIMAL_TO_BINARY_H
+
+#include <cfloat>
+#include <cinttypes>
+#include <cmath>
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+
+namespace fast_float {
+
+// This will compute or rather approximate w * 5**q and return a pair of 64-bit
+// words approximating the result, with the "high" part corresponding to the
+// most significant bits and the low part corresponding to the least significant
+// bits.
+//
+template <int bit_precision>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 value128
+compute_product_approximation(int64_t q, uint64_t w) {
+  int const index = 2 * int(q - powers::smallest_power_of_five);
+  // For small values of q, e.g., q in [0,27], the answer is always exact
+  // because The line value128 firstproduct = full_multiplication(w,
+  // power_of_five_128[index]); gives the exact answer.
+  value128 firstproduct =
+      full_multiplication(w, powers::power_of_five_128[index]);
+  static_assert((bit_precision >= 0) && (bit_precision <= 64),
+                " precision should  be in (0,64]");
+  constexpr uint64_t precision_mask =
+      (bit_precision < 64) ? (uint64_t(0xFFFFFFFFFFFFFFFF) >> bit_precision)
+                           : uint64_t(0xFFFFFFFFFFFFFFFF);
+  if ((firstproduct.high & precision_mask) ==
+      precision_mask) { // could further guard with  (lower + w < lower)
+    // regarding the second product, we only need secondproduct.high, but our
+    // expectation is that the compiler will optimize this extra work away if
+    // needed.
+    value128 secondproduct =
+        full_multiplication(w, powers::power_of_five_128[index + 1]);
+    firstproduct.low += secondproduct.high;
+    if (secondproduct.high > firstproduct.low) {
+      firstproduct.high++;
+    }
+  }
+  return firstproduct;
+}
+
+namespace detail {
+/**
+ * For q in (0,350), we have that
+ *  f = (((152170 + 65536) * q ) >> 16);
+ * is equal to
+ *   floor(p) + q
+ * where
+ *   p = log(5**q)/log(2) = q * log(5)/log(2)
+ *
+ * For negative values of q in (-400,0), we have that
+ *  f = (((152170 + 65536) * q ) >> 16);
+ * is equal to
+ *   -ceil(p) + q
+ * where
+ *   p = log(5**-q)/log(2) = -q * log(5)/log(2)
+ */
+constexpr fastfloat_really_inline int32_t power(int32_t q) noexcept {
+  return (((152170 + 65536) * q) >> 16) + 63;
+}
+} // namespace detail
+
+// create an adjusted mantissa, biased by the invalid power2
+// for significant digits already multiplied by 10 ** q.
+template <typename binary>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 adjusted_mantissa
+compute_error_scaled(int64_t q, uint64_t w, int lz) noexcept {
+  int hilz = int(w >> 63) ^ 1;
+  adjusted_mantissa answer;
+  answer.mantissa = w << hilz;
+  int bias = binary::mantissa_explicit_bits() - binary::minimum_exponent();
+  answer.power2 = int32_t(detail::power(int32_t(q)) + bias - hilz - lz - 62 +
+                          invalid_am_bias);
+  return answer;
+}
+
+// w * 10 ** q, without rounding the representation up.
+// the power2 in the exponent will be adjusted by invalid_am_bias.
+template <typename binary>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
+compute_error(int64_t q, uint64_t w) noexcept {
+  int lz = leading_zeroes(w);
+  w <<= lz;
+  value128 product =
+      compute_product_approximation<binary::mantissa_explicit_bits() + 3>(q, w);
+  return compute_error_scaled<binary>(q, product.high, lz);
+}
+
+// Computers w * 10 ** q.
+// The returned value should be a valid number that simply needs to be
+// packed. However, in some very rare cases, the computation will fail. In such
+// cases, we return an adjusted_mantissa with a negative power of 2: the caller
+// should recompute in such cases.
+template <typename binary>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
+compute_float(int64_t q, uint64_t w) noexcept {
+  adjusted_mantissa answer;
+  if ((w == 0) || (q < binary::smallest_power_of_ten())) {
+    answer.power2 = 0;
+    answer.mantissa = 0;
+    // result should be zero
+    return answer;
+  }
+  if (q > binary::largest_power_of_ten()) {
+    // we want to get infinity:
+    answer.power2 = binary::infinite_power();
+    answer.mantissa = 0;
+    return answer;
+  }
+  // At this point in time q is in [powers::smallest_power_of_five,
+  // powers::largest_power_of_five].
+
+  // We want the most significant bit of i to be 1. Shift if needed.
+  int lz = leading_zeroes(w);
+  w <<= lz;
+
+  // The required precision is binary::mantissa_explicit_bits() + 3 because
+  // 1. We need the implicit bit
+  // 2. We need an extra bit for rounding purposes
+  // 3. We might lose a bit due to the "upperbit" routine (result too small,
+  // requiring a shift)
+
+  value128 product =
+      compute_product_approximation<binary::mantissa_explicit_bits() + 3>(q, w);
+  // The computed 'product' is always sufficient.
+  // Mathematical proof:
+  // Noble Mushtak and Daniel Lemire, Fast Number Parsing Without Fallback (to
+  // appear) See script/mushtak_lemire.py
+
+  // The "compute_product_approximation" function can be slightly slower than a
+  // branchless approach: value128 product = compute_product(q, w); but in
+  // practice, we can win big with the compute_product_approximation if its
+  // additional branch is easily predicted. Which is best is data specific.
+  int upperbit = int(product.high >> 63);
+  int shift = upperbit + 64 - binary::mantissa_explicit_bits() - 3;
+
+  answer.mantissa = product.high >> shift;
+
+  answer.power2 = int32_t(detail::power(int32_t(q)) + upperbit - lz -
+                          binary::minimum_exponent());
+  if (answer.power2 <= 0) { // we have a subnormal?
+    // Here have that answer.power2 <= 0 so -answer.power2 >= 0
+    if (-answer.power2 + 1 >=
+        64) { // if we have more than 64 bits below the minimum exponent, you
+              // have a zero for sure.
+      answer.power2 = 0;
+      answer.mantissa = 0;
+      // result should be zero
+      return answer;
+    }
+    // next line is safe because -answer.power2 + 1 < 64
+    answer.mantissa >>= -answer.power2 + 1;
+    // Thankfully, we can't have both "round-to-even" and subnormals because
+    // "round-to-even" only occurs for powers close to 0 in the 32-bit and
+    // and 64-bit case (with no more than 19 digits).
+    answer.mantissa += (answer.mantissa & 1); // round up
+    answer.mantissa >>= 1;
+    // There is a weird scenario where we don't have a subnormal but just.
+    // Suppose we start with 2.2250738585072013e-308, we end up
+    // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal
+    // whereas 0x40000000000000 x 2^-1023-53  is normal. Now, we need to round
+    // up 0x3fffffffffffff x 2^-1023-53  and once we do, we are no longer
+    // subnormal, but we can only know this after rounding.
+    // So we only declare a subnormal if we are smaller than the threshold.
+    answer.power2 =
+        (answer.mantissa < (uint64_t(1) << binary::mantissa_explicit_bits()))
+            ? 0
+            : 1;
+    return answer;
+  }
+
+  // usually, we round *up*, but if we fall right in between and and we have an
+  // even basis, we need to round down
+  // We are only concerned with the cases where 5**q fits in single 64-bit word.
+  if ((product.low <= 1) && (q >= binary::min_exponent_round_to_even()) &&
+      (q <= binary::max_exponent_round_to_even()) &&
+      ((answer.mantissa & 3) == 1)) { // we may fall between two floats!
+    // To be in-between two floats we need that in doing
+    //   answer.mantissa = product.high >> (upperbit + 64 -
+    //   binary::mantissa_explicit_bits() - 3);
+    // ... we dropped out only zeroes. But if this happened, then we can go
+    // back!!!
+    if ((answer.mantissa << shift) == product.high) {
+      answer.mantissa &= ~uint64_t(1); // flip it so that we do not round up
+    }
+  }
+
+  answer.mantissa += (answer.mantissa & 1); // round up
+  answer.mantissa >>= 1;
+  if (answer.mantissa >= (uint64_t(2) << binary::mantissa_explicit_bits())) {
+    answer.mantissa = (uint64_t(1) << binary::mantissa_explicit_bits());
+    answer.power2++; // undo previous addition
+  }
+
+  answer.mantissa &= ~(uint64_t(1) << binary::mantissa_explicit_bits());
+  if (answer.power2 >= binary::infinite_power()) { // infinity
+    answer.power2 = binary::infinite_power();
+    answer.mantissa = 0;
+  }
+  return answer;
+}
+
+} // namespace fast_float
+
+#endif
+
+#ifndef FASTFLOAT_BIGINT_H
+#define FASTFLOAT_BIGINT_H
+
+#include <algorithm>
+#include <cstdint>
+#include <climits>
+#include <cstring>
+
+
+namespace fast_float {
+
+// the limb width: we want efficient multiplication of double the bits in
+// limb, or for 64-bit limbs, at least 64-bit multiplication where we can
+// extract the high and low parts efficiently. this is every 64-bit
+// architecture except for sparc, which emulates 128-bit multiplication.
+// we might have platforms where `CHAR_BIT` is not 8, so let's avoid
+// doing `8 * sizeof(limb)`.
+#if defined(FASTFLOAT_64BIT) && !defined(__sparc)
+#define FASTFLOAT_64BIT_LIMB 1
+typedef uint64_t limb;
+constexpr size_t limb_bits = 64;
+#else
+#define FASTFLOAT_32BIT_LIMB
+typedef uint32_t limb;
+constexpr size_t limb_bits = 32;
+#endif
+
+typedef span<limb> limb_span;
+
+// number of bits in a bigint. this needs to be at least the number
+// of bits required to store the largest bigint, which is
+// `log2(10**(digits + max_exp))`, or `log2(10**(767 + 342))`, or
+// ~3600 bits, so we round to 4000.
+constexpr size_t bigint_bits = 4000;
+constexpr size_t bigint_limbs = bigint_bits / limb_bits;
+
+// vector-like type that is allocated on the stack. the entire
+// buffer is pre-allocated, and only the length changes.
+template <uint16_t size> struct stackvec {
+  limb data[size];
+  // we never need more than 150 limbs
+  uint16_t length{0};
+
+  stackvec() = default;
+  stackvec(stackvec const &) = delete;
+  stackvec &operator=(stackvec const &) = delete;
+  stackvec(stackvec &&) = delete;
+  stackvec &operator=(stackvec &&other) = delete;
+
+  // create stack vector from existing limb span.
+  FASTFLOAT_CONSTEXPR20 stackvec(limb_span s) {
+    FASTFLOAT_ASSERT(try_extend(s));
+  }
+
+  FASTFLOAT_CONSTEXPR14 limb &operator[](size_t index) noexcept {
+    FASTFLOAT_DEBUG_ASSERT(index < length);
+    return data[index];
+  }
+
+  FASTFLOAT_CONSTEXPR14 const limb &operator[](size_t index) const noexcept {
+    FASTFLOAT_DEBUG_ASSERT(index < length);
+    return data[index];
+  }
+
+  // index from the end of the container
+  FASTFLOAT_CONSTEXPR14 const limb &rindex(size_t index) const noexcept {
+    FASTFLOAT_DEBUG_ASSERT(index < length);
+    size_t rindex = length - index - 1;
+    return data[rindex];
+  }
+
+  // set the length, without bounds checking.
+  FASTFLOAT_CONSTEXPR14 void set_len(size_t len) noexcept {
+    length = uint16_t(len);
+  }
+
+  constexpr size_t len() const noexcept { return length; }
+
+  constexpr bool is_empty() const noexcept { return length == 0; }
+
+  constexpr size_t capacity() const noexcept { return size; }
+
+  // append item to vector, without bounds checking
+  FASTFLOAT_CONSTEXPR14 void push_unchecked(limb value) noexcept {
+    data[length] = value;
+    length++;
+  }
+
+  // append item to vector, returning if item was added
+  FASTFLOAT_CONSTEXPR14 bool try_push(limb value) noexcept {
+    if (len() < capacity()) {
+      push_unchecked(value);
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  // add items to the vector, from a span, without bounds checking
+  FASTFLOAT_CONSTEXPR20 void extend_unchecked(limb_span s) noexcept {
+    limb *ptr = data + length;
+    std::copy_n(s.ptr, s.len(), ptr);
+    set_len(len() + s.len());
+  }
+
+  // try to add items to the vector, returning if items were added
+  FASTFLOAT_CONSTEXPR20 bool try_extend(limb_span s) noexcept {
+    if (len() + s.len() <= capacity()) {
+      extend_unchecked(s);
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  // resize the vector, without bounds checking
+  // if the new size is longer than the vector, assign value to each
+  // appended item.
+  FASTFLOAT_CONSTEXPR20
+  void resize_unchecked(size_t new_len, limb value) noexcept {
+    if (new_len > len()) {
+      size_t count = new_len - len();
+      limb *first = data + len();
+      limb *last = first + count;
+      ::std::fill(first, last, value);
+      set_len(new_len);
+    } else {
+      set_len(new_len);
+    }
+  }
+
+  // try to resize the vector, returning if the vector was resized.
+  FASTFLOAT_CONSTEXPR20 bool try_resize(size_t new_len, limb value) noexcept {
+    if (new_len > capacity()) {
+      return false;
+    } else {
+      resize_unchecked(new_len, value);
+      return true;
+    }
+  }
+
+  // check if any limbs are non-zero after the given index.
+  // this needs to be done in reverse order, since the index
+  // is relative to the most significant limbs.
+  FASTFLOAT_CONSTEXPR14 bool nonzero(size_t index) const noexcept {
+    while (index < len()) {
+      if (rindex(index) != 0) {
+        return true;
+      }
+      index++;
+    }
+    return false;
+  }
+
+  // normalize the big integer, so most-significant zero limbs are removed.
+  FASTFLOAT_CONSTEXPR14 void normalize() noexcept {
+    while (len() > 0 && rindex(0) == 0) {
+      length--;
+    }
+  }
+};
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint64_t
+empty_hi64(bool &truncated) noexcept {
+  truncated = false;
+  return 0;
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t
+uint64_hi64(uint64_t r0, bool &truncated) noexcept {
+  truncated = false;
+  int shl = leading_zeroes(r0);
+  return r0 << shl;
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t
+uint64_hi64(uint64_t r0, uint64_t r1, bool &truncated) noexcept {
+  int shl = leading_zeroes(r0);
+  if (shl == 0) {
+    truncated = r1 != 0;
+    return r0;
+  } else {
+    int shr = 64 - shl;
+    truncated = (r1 << shl) != 0;
+    return (r0 << shl) | (r1 >> shr);
+  }
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t
+uint32_hi64(uint32_t r0, bool &truncated) noexcept {
+  return uint64_hi64(r0, truncated);
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t
+uint32_hi64(uint32_t r0, uint32_t r1, bool &truncated) noexcept {
+  uint64_t x0 = r0;
+  uint64_t x1 = r1;
+  return uint64_hi64((x0 << 32) | x1, truncated);
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t
+uint32_hi64(uint32_t r0, uint32_t r1, uint32_t r2, bool &truncated) noexcept {
+  uint64_t x0 = r0;
+  uint64_t x1 = r1;
+  uint64_t x2 = r2;
+  return uint64_hi64(x0, (x1 << 32) | x2, truncated);
+}
+
+// add two small integers, checking for overflow.
+// we want an efficient operation. for msvc, where
+// we don't have built-in intrinsics, this is still
+// pretty fast.
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 limb
+scalar_add(limb x, limb y, bool &overflow) noexcept {
+  limb z;
+// gcc and clang
+#if defined(__has_builtin)
+#if __has_builtin(__builtin_add_overflow)
+  if (!cpp20_and_in_constexpr()) {
+    overflow = __builtin_add_overflow(x, y, &z);
+    return z;
+  }
+#endif
+#endif
+
+  // generic, this still optimizes correctly on MSVC.
+  z = x + y;
+  overflow = z < x;
+  return z;
+}
+
+// multiply two small integers, getting both the high and low bits.
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 limb
+scalar_mul(limb x, limb y, limb &carry) noexcept {
+#ifdef FASTFLOAT_64BIT_LIMB
+#if defined(__SIZEOF_INT128__)
+  // GCC and clang both define it as an extension.
+  __uint128_t z = __uint128_t(x) * __uint128_t(y) + __uint128_t(carry);
+  carry = limb(z >> limb_bits);
+  return limb(z);
+#else
+  // fallback, no native 128-bit integer multiplication with carry.
+  // on msvc, this optimizes identically, somehow.
+  value128 z = full_multiplication(x, y);
+  bool overflow;
+  z.low = scalar_add(z.low, carry, overflow);
+  z.high += uint64_t(overflow); // cannot overflow
+  carry = z.high;
+  return z.low;
+#endif
+#else
+  uint64_t z = uint64_t(x) * uint64_t(y) + uint64_t(carry);
+  carry = limb(z >> limb_bits);
+  return limb(z);
+#endif
+}
+
+// add scalar value to bigint starting from offset.
+// used in grade school multiplication
+template <uint16_t size>
+inline FASTFLOAT_CONSTEXPR20 bool small_add_from(stackvec<size> &vec, limb y,
+                                                 size_t start) noexcept {
+  size_t index = start;
+  limb carry = y;
+  bool overflow;
+  while (carry != 0 && index < vec.len()) {
+    vec[index] = scalar_add(vec[index], carry, overflow);
+    carry = limb(overflow);
+    index += 1;
+  }
+  if (carry != 0) {
+    FASTFLOAT_TRY(vec.try_push(carry));
+  }
+  return true;
+}
+
+// add scalar value to bigint.
+template <uint16_t size>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool
+small_add(stackvec<size> &vec, limb y) noexcept {
+  return small_add_from(vec, y, 0);
+}
+
+// multiply bigint by scalar value.
+template <uint16_t size>
+inline FASTFLOAT_CONSTEXPR20 bool small_mul(stackvec<size> &vec,
+                                            limb y) noexcept {
+  limb carry = 0;
+  for (size_t index = 0; index < vec.len(); index++) {
+    vec[index] = scalar_mul(vec[index], y, carry);
+  }
+  if (carry != 0) {
+    FASTFLOAT_TRY(vec.try_push(carry));
+  }
+  return true;
+}
+
+// add bigint to bigint starting from index.
+// used in grade school multiplication
+template <uint16_t size>
+FASTFLOAT_CONSTEXPR20 bool large_add_from(stackvec<size> &x, limb_span y,
+                                          size_t start) noexcept {
+  // the effective x buffer is from `xstart..x.len()`, so exit early
+  // if we can't get that current range.
+  if (x.len() < start || y.len() > x.len() - start) {
+    FASTFLOAT_TRY(x.try_resize(y.len() + start, 0));
+  }
+
+  bool carry = false;
+  for (size_t index = 0; index < y.len(); index++) {
+    limb xi = x[index + start];
+    limb yi = y[index];
+    bool c1 = false;
+    bool c2 = false;
+    xi = scalar_add(xi, yi, c1);
+    if (carry) {
+      xi = scalar_add(xi, 1, c2);
+    }
+    x[index + start] = xi;
+    carry = c1 | c2;
+  }
+
+  // handle overflow
+  if (carry) {
+    FASTFLOAT_TRY(small_add_from(x, 1, y.len() + start));
+  }
+  return true;
+}
+
+// add bigint to bigint.
+template <uint16_t size>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool
+large_add_from(stackvec<size> &x, limb_span y) noexcept {
+  return large_add_from(x, y, 0);
+}
+
+// grade-school multiplication algorithm
+template <uint16_t size>
+FASTFLOAT_CONSTEXPR20 bool long_mul(stackvec<size> &x, limb_span y) noexcept {
+  limb_span xs = limb_span(x.data, x.len());
+  stackvec<size> z(xs);
+  limb_span zs = limb_span(z.data, z.len());
+
+  if (y.len() != 0) {
+    limb y0 = y[0];
+    FASTFLOAT_TRY(small_mul(x, y0));
+    for (size_t index = 1; index < y.len(); index++) {
+      limb yi = y[index];
+      stackvec<size> zi;
+      if (yi != 0) {
+        // re-use the same buffer throughout
+        zi.set_len(0);
+        FASTFLOAT_TRY(zi.try_extend(zs));
+        FASTFLOAT_TRY(small_mul(zi, yi));
+        limb_span zis = limb_span(zi.data, zi.len());
+        FASTFLOAT_TRY(large_add_from(x, zis, index));
+      }
+    }
+  }
+
+  x.normalize();
+  return true;
+}
+
+// grade-school multiplication algorithm
+template <uint16_t size>
+FASTFLOAT_CONSTEXPR20 bool large_mul(stackvec<size> &x, limb_span y) noexcept {
+  if (y.len() == 1) {
+    FASTFLOAT_TRY(small_mul(x, y[0]));
+  } else {
+    FASTFLOAT_TRY(long_mul(x, y));
+  }
+  return true;
+}
+
+template <typename = void> struct pow5_tables {
+  static constexpr uint32_t large_step = 135;
+  static constexpr uint64_t small_power_of_5[] = {
+      1UL,
+      5UL,
+      25UL,
+      125UL,
+      625UL,
+      3125UL,
+      15625UL,
+      78125UL,
+      390625UL,
+      1953125UL,
+      9765625UL,
+      48828125UL,
+      244140625UL,
+      1220703125UL,
+      6103515625UL,
+      30517578125UL,
+      152587890625UL,
+      762939453125UL,
+      3814697265625UL,
+      19073486328125UL,
+      95367431640625UL,
+      476837158203125UL,
+      2384185791015625UL,
+      11920928955078125UL,
+      59604644775390625UL,
+      298023223876953125UL,
+      1490116119384765625UL,
+      7450580596923828125UL,
+  };
+#ifdef FASTFLOAT_64BIT_LIMB
+  constexpr static limb large_power_of_5[] = {
+      1414648277510068013UL, 9180637584431281687UL, 4539964771860779200UL,
+      10482974169319127550UL, 198276706040285095UL};
+#else
+  constexpr static limb large_power_of_5[] = {
+      4279965485U, 329373468U,  4020270615U, 2137533757U, 4287402176U,
+      1057042919U, 1071430142U, 2440757623U, 381945767U,  46164893U};
+#endif
+};
+
+#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE
+
+template <typename T> constexpr uint32_t pow5_tables<T>::large_step;
+
+template <typename T> constexpr uint64_t pow5_tables<T>::small_power_of_5[];
+
+template <typename T> constexpr limb pow5_tables<T>::large_power_of_5[];
+
+#endif
+
+// big integer type. implements a small subset of big integer
+// arithmetic, using simple algorithms since asymptotically
+// faster algorithms are slower for a small number of limbs.
+// all operations assume the big-integer is normalized.
+struct bigint : pow5_tables<> {
+  // storage of the limbs, in little-endian order.
+  stackvec<bigint_limbs> vec;
+
+  FASTFLOAT_CONSTEXPR20 bigint() : vec() {}
+
+  bigint(bigint const &) = delete;
+  bigint &operator=(bigint const &) = delete;
+  bigint(bigint &&) = delete;
+  bigint &operator=(bigint &&other) = delete;
+
+  FASTFLOAT_CONSTEXPR20 bigint(uint64_t value) : vec() {
+#ifdef FASTFLOAT_64BIT_LIMB
+    vec.push_unchecked(value);
+#else
+    vec.push_unchecked(uint32_t(value));
+    vec.push_unchecked(uint32_t(value >> 32));
+#endif
+    vec.normalize();
+  }
+
+  // get the high 64 bits from the vector, and if bits were truncated.
+  // this is to get the significant digits for the float.
+  FASTFLOAT_CONSTEXPR20 uint64_t hi64(bool &truncated) const noexcept {
+#ifdef FASTFLOAT_64BIT_LIMB
+    if (vec.len() == 0) {
+      return empty_hi64(truncated);
+    } else if (vec.len() == 1) {
+      return uint64_hi64(vec.rindex(0), truncated);
+    } else {
+      uint64_t result = uint64_hi64(vec.rindex(0), vec.rindex(1), truncated);
+      truncated |= vec.nonzero(2);
+      return result;
+    }
+#else
+    if (vec.len() == 0) {
+      return empty_hi64(truncated);
+    } else if (vec.len() == 1) {
+      return uint32_hi64(vec.rindex(0), truncated);
+    } else if (vec.len() == 2) {
+      return uint32_hi64(vec.rindex(0), vec.rindex(1), truncated);
+    } else {
+      uint64_t result =
+          uint32_hi64(vec.rindex(0), vec.rindex(1), vec.rindex(2), truncated);
+      truncated |= vec.nonzero(3);
+      return result;
+    }
+#endif
+  }
+
+  // compare two big integers, returning the large value.
+  // assumes both are normalized. if the return value is
+  // negative, other is larger, if the return value is
+  // positive, this is larger, otherwise they are equal.
+  // the limbs are stored in little-endian order, so we
+  // must compare the limbs in ever order.
+  FASTFLOAT_CONSTEXPR20 int compare(bigint const &other) const noexcept {
+    if (vec.len() > other.vec.len()) {
+      return 1;
+    } else if (vec.len() < other.vec.len()) {
+      return -1;
+    } else {
+      for (size_t index = vec.len(); index > 0; index--) {
+        limb xi = vec[index - 1];
+        limb yi = other.vec[index - 1];
+        if (xi > yi) {
+          return 1;
+        } else if (xi < yi) {
+          return -1;
+        }
+      }
+      return 0;
+    }
+  }
+
+  // shift left each limb n bits, carrying over to the new limb
+  // returns true if we were able to shift all the digits.
+  FASTFLOAT_CONSTEXPR20 bool shl_bits(size_t n) noexcept {
+    // Internally, for each item, we shift left by n, and add the previous
+    // right shifted limb-bits.
+    // For example, we transform (for u8) shifted left 2, to:
+    //      b10100100 b01000010
+    //      b10 b10010001 b00001000
+    FASTFLOAT_DEBUG_ASSERT(n != 0);
+    FASTFLOAT_DEBUG_ASSERT(n < sizeof(limb) * 8);
+
+    size_t shl = n;
+    size_t shr = limb_bits - shl;
+    limb prev = 0;
+    for (size_t index = 0; index < vec.len(); index++) {
+      limb xi = vec[index];
+      vec[index] = (xi << shl) | (prev >> shr);
+      prev = xi;
+    }
+
+    limb carry = prev >> shr;
+    if (carry != 0) {
+      return vec.try_push(carry);
+    }
+    return true;
+  }
+
+  // move the limbs left by `n` limbs.
+  FASTFLOAT_CONSTEXPR20 bool shl_limbs(size_t n) noexcept {
+    FASTFLOAT_DEBUG_ASSERT(n != 0);
+    if (n + vec.len() > vec.capacity()) {
+      return false;
+    } else if (!vec.is_empty()) {
+      // move limbs
+      limb *dst = vec.data + n;
+      limb const *src = vec.data;
+      std::copy_backward(src, src + vec.len(), dst + vec.len());
+      // fill in empty limbs
+      limb *first = vec.data;
+      limb *last = first + n;
+      ::std::fill(first, last, 0);
+      vec.set_len(n + vec.len());
+      return true;
+    } else {
+      return true;
+    }
+  }
+
+  // move the limbs left by `n` bits.
+  FASTFLOAT_CONSTEXPR20 bool shl(size_t n) noexcept {
+    size_t rem = n % limb_bits;
+    size_t div = n / limb_bits;
+    if (rem != 0) {
+      FASTFLOAT_TRY(shl_bits(rem));
+    }
+    if (div != 0) {
+      FASTFLOAT_TRY(shl_limbs(div));
+    }
+    return true;
+  }
+
+  // get the number of leading zeros in the bigint.
+  FASTFLOAT_CONSTEXPR20 int ctlz() const noexcept {
+    if (vec.is_empty()) {
+      return 0;
+    } else {
+#ifdef FASTFLOAT_64BIT_LIMB
+      return leading_zeroes(vec.rindex(0));
+#else
+      // no use defining a specialized leading_zeroes for a 32-bit type.
+      uint64_t r0 = vec.rindex(0);
+      return leading_zeroes(r0 << 32);
+#endif
+    }
+  }
+
+  // get the number of bits in the bigint.
+  FASTFLOAT_CONSTEXPR20 int bit_length() const noexcept {
+    int lz = ctlz();
+    return int(limb_bits * vec.len()) - lz;
+  }
+
+  FASTFLOAT_CONSTEXPR20 bool mul(limb y) noexcept { return small_mul(vec, y); }
+
+  FASTFLOAT_CONSTEXPR20 bool add(limb y) noexcept { return small_add(vec, y); }
+
+  // multiply as if by 2 raised to a power.
+  FASTFLOAT_CONSTEXPR20 bool pow2(uint32_t exp) noexcept { return shl(exp); }
+
+  // multiply as if by 5 raised to a power.
+  FASTFLOAT_CONSTEXPR20 bool pow5(uint32_t exp) noexcept {
+    // multiply by a power of 5
+    size_t large_length = sizeof(large_power_of_5) / sizeof(limb);
+    limb_span large = limb_span(large_power_of_5, large_length);
+    while (exp >= large_step) {
+      FASTFLOAT_TRY(large_mul(vec, large));
+      exp -= large_step;
+    }
+#ifdef FASTFLOAT_64BIT_LIMB
+    uint32_t small_step = 27;
+    limb max_native = 7450580596923828125UL;
+#else
+    uint32_t small_step = 13;
+    limb max_native = 1220703125U;
+#endif
+    while (exp >= small_step) {
+      FASTFLOAT_TRY(small_mul(vec, max_native));
+      exp -= small_step;
+    }
+    if (exp != 0) {
+      // Work around clang bug https://godbolt.org/z/zedh7rrhc
+      // This is similar to https://github.com/llvm/llvm-project/issues/47746,
+      // except the workaround described there don't work here
+      FASTFLOAT_TRY(small_mul(
+          vec, limb(((void)small_power_of_5[0], small_power_of_5[exp]))));
+    }
+
+    return true;
+  }
+
+  // multiply as if by 10 raised to a power.
+  FASTFLOAT_CONSTEXPR20 bool pow10(uint32_t exp) noexcept {
+    FASTFLOAT_TRY(pow5(exp));
+    return pow2(exp);
+  }
+};
+
+} // namespace fast_float
+
+#endif
+
+#ifndef FASTFLOAT_DIGIT_COMPARISON_H
+#define FASTFLOAT_DIGIT_COMPARISON_H
+
+#include <algorithm>
+#include <cstdint>
+#include <cstring>
+#include <iterator>
+
+
+namespace fast_float {
+
+// 1e0 to 1e19
+constexpr static uint64_t powers_of_ten_uint64[] = {1UL,
+                                                    10UL,
+                                                    100UL,
+                                                    1000UL,
+                                                    10000UL,
+                                                    100000UL,
+                                                    1000000UL,
+                                                    10000000UL,
+                                                    100000000UL,
+                                                    1000000000UL,
+                                                    10000000000UL,
+                                                    100000000000UL,
+                                                    1000000000000UL,
+                                                    10000000000000UL,
+                                                    100000000000000UL,
+                                                    1000000000000000UL,
+                                                    10000000000000000UL,
+                                                    100000000000000000UL,
+                                                    1000000000000000000UL,
+                                                    10000000000000000000UL};
+
+// calculate the exponent, in scientific notation, of the number.
+// this algorithm is not even close to optimized, but it has no practical
+// effect on performance: in order to have a faster algorithm, we'd need
+// to slow down performance for faster algorithms, and this is still fast.
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 int32_t
+scientific_exponent(uint64_t mantissa, int32_t exponent) noexcept {
+  while (mantissa >= 10000) {
+    mantissa /= 10000;
+    exponent += 4;
+  }
+  while (mantissa >= 100) {
+    mantissa /= 100;
+    exponent += 2;
+  }
+  while (mantissa >= 10) {
+    mantissa /= 10;
+    exponent += 1;
+  }
+  return exponent;
+}
+
+// this converts a native floating-point number to an extended-precision float.
+template <typename T>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
+to_extended(T value) noexcept {
+  using equiv_uint = equiv_uint_t<T>;
+  constexpr equiv_uint exponent_mask = binary_format<T>::exponent_mask();
+  constexpr equiv_uint mantissa_mask = binary_format<T>::mantissa_mask();
+  constexpr equiv_uint hidden_bit_mask = binary_format<T>::hidden_bit_mask();
+
+  adjusted_mantissa am;
+  int32_t bias = binary_format<T>::mantissa_explicit_bits() -
+                 binary_format<T>::minimum_exponent();
+  equiv_uint bits;
+#if FASTFLOAT_HAS_BIT_CAST
+  bits = std::bit_cast<equiv_uint>(value);
+#else
+  ::memcpy(&bits, &value, sizeof(T));
+#endif
+  if ((bits & exponent_mask) == 0) {
+    // denormal
+    am.power2 = 1 - bias;
+    am.mantissa = bits & mantissa_mask;
+  } else {
+    // normal
+    am.power2 = int32_t((bits & exponent_mask) >>
+                        binary_format<T>::mantissa_explicit_bits());
+    am.power2 -= bias;
+    am.mantissa = (bits & mantissa_mask) | hidden_bit_mask;
+  }
+
+  return am;
+}
+
+// get the extended precision value of the halfway point between b and b+u.
+// we are given a native float that represents b, so we need to adjust it
+// halfway between b and b+u.
+template <typename T>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
+to_extended_halfway(T value) noexcept {
+  adjusted_mantissa am = to_extended(value);
+  am.mantissa <<= 1;
+  am.mantissa += 1;
+  am.power2 -= 1;
+  return am;
+}
+
+// round an extended-precision float to the nearest machine float.
+template <typename T, typename callback>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void round(adjusted_mantissa &am,
+                                                         callback cb) noexcept {
+  int32_t mantissa_shift = 64 - binary_format<T>::mantissa_explicit_bits() - 1;
+  if (-am.power2 >= mantissa_shift) {
+    // have a denormal float
+    int32_t shift = -am.power2 + 1;
+    cb(am, std::min<int32_t>(shift, 64));
+    // check for round-up: if rounding-nearest carried us to the hidden bit.
+    am.power2 = (am.mantissa <
+                 (uint64_t(1) << binary_format<T>::mantissa_explicit_bits()))
+                    ? 0
+                    : 1;
+    return;
+  }
+
+  // have a normal float, use the default shift.
+  cb(am, mantissa_shift);
+
+  // check for carry
+  if (am.mantissa >=
+      (uint64_t(2) << binary_format<T>::mantissa_explicit_bits())) {
+    am.mantissa = (uint64_t(1) << binary_format<T>::mantissa_explicit_bits());
+    am.power2++;
+  }
+
+  // check for infinite: we could have carried to an infinite power
+  am.mantissa &= ~(uint64_t(1) << binary_format<T>::mantissa_explicit_bits());
+  if (am.power2 >= binary_format<T>::infinite_power()) {
+    am.power2 = binary_format<T>::infinite_power();
+    am.mantissa = 0;
+  }
+}
+
+template <typename callback>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void
+round_nearest_tie_even(adjusted_mantissa &am, int32_t shift,
+                       callback cb) noexcept {
+  uint64_t const mask = (shift == 64) ? UINT64_MAX : (uint64_t(1) << shift) - 1;
+  uint64_t const halfway = (shift == 0) ? 0 : uint64_t(1) << (shift - 1);
+  uint64_t truncated_bits = am.mantissa & mask;
+  bool is_above = truncated_bits > halfway;
+  bool is_halfway = truncated_bits == halfway;
+
+  // shift digits into position
+  if (shift == 64) {
+    am.mantissa = 0;
+  } else {
+    am.mantissa >>= shift;
+  }
+  am.power2 += shift;
+
+  bool is_odd = (am.mantissa & 1) == 1;
+  am.mantissa += uint64_t(cb(is_odd, is_halfway, is_above));
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void
+round_down(adjusted_mantissa &am, int32_t shift) noexcept {
+  if (shift == 64) {
+    am.mantissa = 0;
+  } else {
+    am.mantissa >>= shift;
+  }
+  am.power2 += shift;
+}
+
+template <typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void
+skip_zeros(UC const *&first, UC const *last) noexcept {
+  uint64_t val;
+  while (!cpp20_and_in_constexpr() &&
+         std::distance(first, last) >= int_cmp_len<UC>()) {
+    ::memcpy(&val, first, sizeof(uint64_t));
+    if (val != int_cmp_zeros<UC>()) {
+      break;
+    }
+    first += int_cmp_len<UC>();
+  }
+  while (first != last) {
+    if (*first != UC('0')) {
+      break;
+    }
+    first++;
+  }
+}
+
+// determine if any non-zero digits were truncated.
+// all characters must be valid digits.
+template <typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool
+is_truncated(UC const *first, UC const *last) noexcept {
+  // do 8-bit optimizations, can just compare to 8 literal 0s.
+  uint64_t val;
+  while (!cpp20_and_in_constexpr() &&
+         std::distance(first, last) >= int_cmp_len<UC>()) {
+    ::memcpy(&val, first, sizeof(uint64_t));
+    if (val != int_cmp_zeros<UC>()) {
+      return true;
+    }
+    first += int_cmp_len<UC>();
+  }
+  while (first != last) {
+    if (*first != UC('0')) {
+      return true;
+    }
+    ++first;
+  }
+  return false;
+}
+
+template <typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool
+is_truncated(span<UC const> s) noexcept {
+  return is_truncated(s.ptr, s.ptr + s.len());
+}
+
+template <typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void
+parse_eight_digits(UC const *&p, limb &value, size_t &counter,
+                   size_t &count) noexcept {
+  value = value * 100000000 + parse_eight_digits_unrolled(p);
+  p += 8;
+  counter += 8;
+  count += 8;
+}
+
+template <typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void
+parse_one_digit(UC const *&p, limb &value, size_t &counter,
+                size_t &count) noexcept {
+  value = value * 10 + limb(*p - UC('0'));
+  p++;
+  counter++;
+  count++;
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void
+add_native(bigint &big, limb power, limb value) noexcept {
+  big.mul(power);
+  big.add(value);
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void
+round_up_bigint(bigint &big, size_t &count) noexcept {
+  // need to round-up the digits, but need to avoid rounding
+  // ....9999 to ...10000, which could cause a false halfway point.
+  add_native(big, 10, 1);
+  count++;
+}
+
+// parse the significant digits into a big integer
+template <typename UC>
+inline FASTFLOAT_CONSTEXPR20 void
+parse_mantissa(bigint &result, parsed_number_string_t<UC> &num,
+               size_t max_digits, size_t &digits) noexcept {
+  // try to minimize the number of big integer and scalar multiplication.
+  // therefore, try to parse 8 digits at a time, and multiply by the largest
+  // scalar value (9 or 19 digits) for each step.
+  size_t counter = 0;
+  digits = 0;
+  limb value = 0;
+#ifdef FASTFLOAT_64BIT_LIMB
+  size_t step = 19;
+#else
+  size_t step = 9;
+#endif
+
+  // process all integer digits.
+  UC const *p = num.integer.ptr;
+  UC const *pend = p + num.integer.len();
+  skip_zeros(p, pend);
+  // process all digits, in increments of step per loop
+  while (p != pend) {
+    while ((std::distance(p, pend) >= 8) && (step - counter >= 8) &&
+           (max_digits - digits >= 8)) {
+      parse_eight_digits(p, value, counter, digits);
+    }
+    while (counter < step && p != pend && digits < max_digits) {
+      parse_one_digit(p, value, counter, digits);
+    }
+    if (digits == max_digits) {
+      // add the temporary value, then check if we've truncated any digits
+      add_native(result, limb(powers_of_ten_uint64[counter]), value);
+      bool truncated = is_truncated(p, pend);
+      if (num.fraction.ptr != nullptr) {
+        truncated |= is_truncated(num.fraction);
+      }
+      if (truncated) {
+        round_up_bigint(result, digits);
+      }
+      return;
+    } else {
+      add_native(result, limb(powers_of_ten_uint64[counter]), value);
+      counter = 0;
+      value = 0;
+    }
+  }
+
+  // add our fraction digits, if they're available.
+  if (num.fraction.ptr != nullptr) {
+    p = num.fraction.ptr;
+    pend = p + num.fraction.len();
+    if (digits == 0) {
+      skip_zeros(p, pend);
+    }
+    // process all digits, in increments of step per loop
+    while (p != pend) {
+      while ((std::distance(p, pend) >= 8) && (step - counter >= 8) &&
+             (max_digits - digits >= 8)) {
+        parse_eight_digits(p, value, counter, digits);
+      }
+      while (counter < step && p != pend && digits < max_digits) {
+        parse_one_digit(p, value, counter, digits);
+      }
+      if (digits == max_digits) {
+        // add the temporary value, then check if we've truncated any digits
+        add_native(result, limb(powers_of_ten_uint64[counter]), value);
+        bool truncated = is_truncated(p, pend);
+        if (truncated) {
+          round_up_bigint(result, digits);
+        }
+        return;
+      } else {
+        add_native(result, limb(powers_of_ten_uint64[counter]), value);
+        counter = 0;
+        value = 0;
+      }
+    }
+  }
+
+  if (counter != 0) {
+    add_native(result, limb(powers_of_ten_uint64[counter]), value);
+  }
+}
+
+template <typename T>
+inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
+positive_digit_comp(bigint &bigmant, int32_t exponent) noexcept {
+  FASTFLOAT_ASSERT(bigmant.pow10(uint32_t(exponent)));
+  adjusted_mantissa answer;
+  bool truncated;
+  answer.mantissa = bigmant.hi64(truncated);
+  int bias = binary_format<T>::mantissa_explicit_bits() -
+             binary_format<T>::minimum_exponent();
+  answer.power2 = bigmant.bit_length() - 64 + bias;
+
+  round<T>(answer, [truncated](adjusted_mantissa &a, int32_t shift) {
+    round_nearest_tie_even(
+        a, shift,
+        [truncated](bool is_odd, bool is_halfway, bool is_above) -> bool {
+          return is_above || (is_halfway && truncated) ||
+                 (is_odd && is_halfway);
+        });
+  });
+
+  return answer;
+}
+
+// the scaling here is quite simple: we have, for the real digits `m * 10^e`,
+// and for the theoretical digits `n * 2^f`. Since `e` is always negative,
+// to scale them identically, we do `n * 2^f * 5^-f`, so we now have `m * 2^e`.
+// we then need to scale by `2^(f- e)`, and then the two significant digits
+// are of the same magnitude.
+template <typename T>
+inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa negative_digit_comp(
+    bigint &bigmant, adjusted_mantissa am, int32_t exponent) noexcept {
+  bigint &real_digits = bigmant;
+  int32_t real_exp = exponent;
+
+  // get the value of `b`, rounded down, and get a bigint representation of b+h
+  adjusted_mantissa am_b = am;
+  // gcc7 buf: use a lambda to remove the noexcept qualifier bug with
+  // -Wnoexcept-type.
+  round<T>(am_b,
+           [](adjusted_mantissa &a, int32_t shift) { round_down(a, shift); });
+  T b;
+  to_float(false, am_b, b);
+  adjusted_mantissa theor = to_extended_halfway(b);
+  bigint theor_digits(theor.mantissa);
+  int32_t theor_exp = theor.power2;
+
+  // scale real digits and theor digits to be same power.
+  int32_t pow2_exp = theor_exp - real_exp;
+  uint32_t pow5_exp = uint32_t(-real_exp);
+  if (pow5_exp != 0) {
+    FASTFLOAT_ASSERT(theor_digits.pow5(pow5_exp));
+  }
+  if (pow2_exp > 0) {
+    FASTFLOAT_ASSERT(theor_digits.pow2(uint32_t(pow2_exp)));
+  } else if (pow2_exp < 0) {
+    FASTFLOAT_ASSERT(real_digits.pow2(uint32_t(-pow2_exp)));
+  }
+
+  // compare digits, and use it to direct rounding
+  int ord = real_digits.compare(theor_digits);
+  adjusted_mantissa answer = am;
+  round<T>(answer, [ord](adjusted_mantissa &a, int32_t shift) {
+    round_nearest_tie_even(
+        a, shift, [ord](bool is_odd, bool _, bool __) -> bool {
+          (void)_;  // not needed, since we've done our comparison
+          (void)__; // not needed, since we've done our comparison
+          if (ord > 0) {
+            return true;
+          } else if (ord < 0) {
+            return false;
+          } else {
+            return is_odd;
+          }
+        });
+  });
+
+  return answer;
+}
+
+// parse the significant digits as a big integer to unambiguously round
+// the significant digits. here, we are trying to determine how to round
+// an extended float representation close to `b+h`, halfway between `b`
+// (the float rounded-down) and `b+u`, the next positive float. this
+// algorithm is always correct, and uses one of two approaches. when
+// the exponent is positive relative to the significant digits (such as
+// 1234), we create a big-integer representation, get the high 64-bits,
+// determine if any lower bits are truncated, and use that to direct
+// rounding. in case of a negative exponent relative to the significant
+// digits (such as 1.2345), we create a theoretical representation of
+// `b` as a big-integer type, scaled to the same binary exponent as
+// the actual digits. we then compare the big integer representations
+// of both, and use that to direct rounding.
+template <typename T, typename UC>
+inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
+digit_comp(parsed_number_string_t<UC> &num, adjusted_mantissa am) noexcept {
+  // remove the invalid exponent bias
+  am.power2 -= invalid_am_bias;
+
+  int32_t sci_exp =
+      scientific_exponent(num.mantissa, static_cast<int32_t>(num.exponent));
+  size_t max_digits = binary_format<T>::max_digits();
+  size_t digits = 0;
+  bigint bigmant;
+  parse_mantissa(bigmant, num, max_digits, digits);
+  // can't underflow, since digits is at most max_digits.
+  int32_t exponent = sci_exp + 1 - int32_t(digits);
+  if (exponent >= 0) {
+    return positive_digit_comp<T>(bigmant, exponent);
+  } else {
+    return negative_digit_comp<T>(bigmant, am, exponent);
+  }
+}
+
+} // namespace fast_float
+
+#endif
+
+#ifndef FASTFLOAT_PARSE_NUMBER_H
+#define FASTFLOAT_PARSE_NUMBER_H
+
+
+#include <cmath>
+#include <cstring>
+#include <limits>
+#include <system_error>
+
+namespace fast_float {
+
+namespace detail {
+/**
+ * Special case +inf, -inf, nan, infinity, -infinity.
+ * The case comparisons could be made much faster given that we know that the
+ * strings a null-free and fixed.
+ **/
+template <typename T, typename UC>
+from_chars_result_t<UC>
+    FASTFLOAT_CONSTEXPR14 parse_infnan(UC const *first, UC const *last,
+                                       T &value, chars_format fmt) noexcept {
+  from_chars_result_t<UC> answer{};
+  answer.ptr = first;
+  answer.ec = std::errc(); // be optimistic
+  // assume first < last, so dereference without checks;
+  bool const minusSign = (*first == UC('-'));
+  // C++17 20.19.3.(7.1) explicitly forbids '+' sign here
+  if ((*first == UC('-')) ||
+      (uint64_t(fmt & chars_format::allow_leading_plus) &&
+       (*first == UC('+')))) {
+    ++first;
+  }
+  if (last - first >= 3) {
+    if (fastfloat_strncasecmp3(first, str_const_nan<UC>())) {
+      answer.ptr = (first += 3);
+      value = minusSign ? -std::numeric_limits<T>::quiet_NaN()
+                        : std::numeric_limits<T>::quiet_NaN();
+      // Check for possible nan(n-char-seq-opt), C++17 20.19.3.7,
+      // C11 7.20.1.3.3. At least MSVC produces nan(ind) and nan(snan).
+      if (first != last && *first == UC('(')) {
+        for (UC const *ptr = first + 1; ptr != last; ++ptr) {
+          if (*ptr == UC(')')) {
+            answer.ptr = ptr + 1; // valid nan(n-char-seq-opt)
+            break;
+          } else if (!((UC('a') <= *ptr && *ptr <= UC('z')) ||
+                       (UC('A') <= *ptr && *ptr <= UC('Z')) ||
+                       (UC('0') <= *ptr && *ptr <= UC('9')) || *ptr == UC('_')))
+            break; // forbidden char, not nan(n-char-seq-opt)
+        }
+      }
+      return answer;
+    }
+    if (fastfloat_strncasecmp3(first, str_const_inf<UC>())) {
+      if ((last - first >= 8) &&
+          fastfloat_strncasecmp5(first + 3, str_const_inf<UC>() + 3)) {
+        answer.ptr = first + 8;
+      } else {
+        answer.ptr = first + 3;
+      }
+      value = minusSign ? -std::numeric_limits<T>::infinity()
+                        : std::numeric_limits<T>::infinity();
+      return answer;
+    }
+  }
+  answer.ec = std::errc::invalid_argument;
+  return answer;
+}
+
+/**
+ * Returns true if the floating-pointing rounding mode is to 'nearest'.
+ * It is the default on most system. This function is meant to be inexpensive.
+ * Credit : @mwalcott3
+ */
+fastfloat_really_inline bool rounds_to_nearest() noexcept {
+  // https://lemire.me/blog/2020/06/26/gcc-not-nearest/
+#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
+  return false;
+#endif
+  // See
+  // A fast function to check your floating-point rounding mode
+  // https://lemire.me/blog/2022/11/16/a-fast-function-to-check-your-floating-point-rounding-mode/
+  //
+  // This function is meant to be equivalent to :
+  // prior: #include <cfenv>
+  //  return fegetround() == FE_TONEAREST;
+  // However, it is expected to be much faster than the fegetround()
+  // function call.
+  //
+  // The volatile keyword prevents the compiler from computing the function
+  // at compile-time.
+  // There might be other ways to prevent compile-time optimizations (e.g.,
+  // asm). The value does not need to be std::numeric_limits<float>::min(), any
+  // small value so that 1 + x should round to 1 would do (after accounting for
+  // excess precision, as in 387 instructions).
+  static float volatile fmin = std::numeric_limits<float>::min();
+  float fmini = fmin; // we copy it so that it gets loaded at most once.
+//
+// Explanation:
+// Only when fegetround() == FE_TONEAREST do we have that
+// fmin + 1.0f == 1.0f - fmin.
+//
+// FE_UPWARD:
+//  fmin + 1.0f > 1
+//  1.0f - fmin == 1
+//
+// FE_DOWNWARD or  FE_TOWARDZERO:
+//  fmin + 1.0f == 1
+//  1.0f - fmin < 1
+//
+// Note: This may fail to be accurate if fast-math has been
+// enabled, as rounding conventions may not apply.
+#ifdef FASTFLOAT_VISUAL_STUDIO
+#pragma warning(push)
+//  todo: is there a VS warning?
+//  see
+//  https://stackoverflow.com/questions/46079446/is-there-a-warning-for-floating-point-equality-checking-in-visual-studio-2013
+#elif defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wfloat-equal"
+#elif defined(__GNUC__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#endif
+  return (fmini + 1.0f == 1.0f - fmini);
+#ifdef FASTFLOAT_VISUAL_STUDIO
+#pragma warning(pop)
+#elif defined(__clang__)
+#pragma clang diagnostic pop
+#elif defined(__GNUC__)
+#pragma GCC diagnostic pop
+#endif
+}
+
+} // namespace detail
+
+template <typename T> struct from_chars_caller {
+  template <typename UC>
+  FASTFLOAT_CONSTEXPR20 static from_chars_result_t<UC>
+  call(UC const *first, UC const *last, T &value,
+       parse_options_t<UC> options) noexcept {
+    return from_chars_advanced(first, last, value, options);
+  }
+};
+
+#ifdef __STDCPP_FLOAT32_T__
+template <> struct from_chars_caller<std::float32_t> {
+  template <typename UC>
+  FASTFLOAT_CONSTEXPR20 static from_chars_result_t<UC>
+  call(UC const *first, UC const *last, std::float32_t &value,
+       parse_options_t<UC> options) noexcept {
+    // if std::float32_t is defined, and we are in C++23 mode; macro set for
+    // float32; set value to float due to equivalence between float and
+    // float32_t
+    float val;
+    auto ret = from_chars_advanced(first, last, val, options);
+    value = val;
+    return ret;
+  }
+};
+#endif
+
+#ifdef __STDCPP_FLOAT64_T__
+template <> struct from_chars_caller<std::float64_t> {
+  template <typename UC>
+  FASTFLOAT_CONSTEXPR20 static from_chars_result_t<UC>
+  call(UC const *first, UC const *last, std::float64_t &value,
+       parse_options_t<UC> options) noexcept {
+    // if std::float64_t is defined, and we are in C++23 mode; macro set for
+    // float64; set value as double due to equivalence between double and
+    // float64_t
+    double val;
+    auto ret = from_chars_advanced(first, last, val, options);
+    value = val;
+    return ret;
+  }
+};
+#endif
+
+template <typename T, typename UC, typename>
+FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
+from_chars(UC const *first, UC const *last, T &value,
+           chars_format fmt /*= chars_format::general*/) noexcept {
+  return from_chars_caller<T>::call(first, last, value,
+                                    parse_options_t<UC>(fmt));
+}
+
+template <typename T>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool
+clinger_fast_path_impl(uint64_t mantissa, int64_t exponent, bool is_negative,
+                       T &value) noexcept {
+  // The implementation of the Clinger's fast path is convoluted because
+  // we want round-to-nearest in all cases, irrespective of the rounding mode
+  // selected on the thread.
+  // We proceed optimistically, assuming that detail::rounds_to_nearest()
+  // returns true.
+  if (binary_format<T>::min_exponent_fast_path() <= exponent &&
+      exponent <= binary_format<T>::max_exponent_fast_path()) {
+    // Unfortunately, the conventional Clinger's fast path is only possible
+    // when the system rounds to the nearest float.
+    //
+    // We expect the next branch to almost always be selected.
+    // We could check it first (before the previous branch), but
+    // there might be performance advantages at having the check
+    // be last.
+    if (!cpp20_and_in_constexpr() && detail::rounds_to_nearest()) {
+      // We have that fegetround() == FE_TONEAREST.
+      // Next is Clinger's fast path.
+      if (mantissa <= binary_format<T>::max_mantissa_fast_path()) {
+        value = T(mantissa);
+        if (exponent < 0) {
+          value = value / binary_format<T>::exact_power_of_ten(-exponent);
+        } else {
+          value = value * binary_format<T>::exact_power_of_ten(exponent);
+        }
+        if (is_negative) {
+          value = -value;
+        }
+        return true;
+      }
+    } else {
+      // We do not have that fegetround() == FE_TONEAREST.
+      // Next is a modified Clinger's fast path, inspired by Jakub Jelínek's
+      // proposal
+      if (exponent >= 0 &&
+          mantissa <= binary_format<T>::max_mantissa_fast_path(exponent)) {
+#if defined(__clang__) || defined(FASTFLOAT_32BIT)
+        // Clang may map 0 to -0.0 when fegetround() == FE_DOWNWARD
+        if (mantissa == 0) {
+          value = is_negative ? T(-0.) : T(0.);
+          return true;
+        }
+#endif
+        value = T(mantissa) * binary_format<T>::exact_power_of_ten(exponent);
+        if (is_negative) {
+          value = -value;
+        }
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+/**
+ * This function overload takes parsed_number_string_t structure that is created
+ * and populated either by from_chars_advanced function taking chars range and
+ * parsing options or other parsing custom function implemented by user.
+ */
+template <typename T, typename UC>
+FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
+from_chars_advanced(parsed_number_string_t<UC> &pns, T &value) noexcept {
+  static_assert(is_supported_float_type<T>::value,
+                "only some floating-point types are supported");
+  static_assert(is_supported_char_type<UC>::value,
+                "only char, wchar_t, char16_t and char32_t are supported");
+
+  from_chars_result_t<UC> answer;
+
+  answer.ec = std::errc(); // be optimistic
+  answer.ptr = pns.lastmatch;
+
+  if (!pns.too_many_digits &&
+      clinger_fast_path_impl(pns.mantissa, pns.exponent, pns.negative, value))
+    return answer;
+
+  adjusted_mantissa am =
+      compute_float<binary_format<T>>(pns.exponent, pns.mantissa);
+  if (pns.too_many_digits && am.power2 >= 0) {
+    if (am != compute_float<binary_format<T>>(pns.exponent, pns.mantissa + 1)) {
+      am = compute_error<binary_format<T>>(pns.exponent, pns.mantissa);
+    }
+  }
+  // If we called compute_float<binary_format<T>>(pns.exponent, pns.mantissa)
+  // and we have an invalid power (am.power2 < 0), then we need to go the long
+  // way around again. This is very uncommon.
+  if (am.power2 < 0) {
+    am = digit_comp<T>(pns, am);
+  }
+  to_float(pns.negative, am, value);
+  // Test for over/underflow.
+  if ((pns.mantissa != 0 && am.mantissa == 0 && am.power2 == 0) ||
+      am.power2 == binary_format<T>::infinite_power()) {
+    answer.ec = std::errc::result_out_of_range;
+  }
+  return answer;
+}
+
+template <typename T, typename UC>
+FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
+from_chars_float_advanced(UC const *first, UC const *last, T &value,
+                          parse_options_t<UC> options) noexcept {
+
+  static_assert(is_supported_float_type<T>::value,
+                "only some floating-point types are supported");
+  static_assert(is_supported_char_type<UC>::value,
+                "only char, wchar_t, char16_t and char32_t are supported");
+
+  chars_format const fmt = detail::adjust_for_feature_macros(options.format);
+
+  from_chars_result_t<UC> answer;
+  if (uint64_t(fmt & chars_format::skip_white_space)) {
+    while ((first != last) && fast_float::is_space(*first)) {
+      first++;
+    }
+  }
+  if (first == last) {
+    answer.ec = std::errc::invalid_argument;
+    answer.ptr = first;
+    return answer;
+  }
+  parsed_number_string_t<UC> pns =
+      uint64_t(fmt & detail::basic_json_fmt)
+          ? parse_number_string<true, UC>(first, last, options)
+          : parse_number_string<false, UC>(first, last, options);
+  if (!pns.valid) {
+    if (uint64_t(fmt & chars_format::no_infnan)) {
+      answer.ec = std::errc::invalid_argument;
+      answer.ptr = first;
+      return answer;
+    } else {
+      return detail::parse_infnan(first, last, value, fmt);
+    }
+  }
+
+  // call overload that takes parsed_number_string_t directly.
+  return from_chars_advanced(pns, value);
+}
+
+template <typename T, typename UC, typename>
+FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
+from_chars(UC const *first, UC const *last, T &value, int base) noexcept {
+
+  static_assert(is_supported_integer_type<T>::value,
+                "only integer types are supported");
+  static_assert(is_supported_char_type<UC>::value,
+                "only char, wchar_t, char16_t and char32_t are supported");
+
+  parse_options_t<UC> options;
+  options.base = base;
+  return from_chars_advanced(first, last, value, options);
+}
+
+template <typename T>
+FASTFLOAT_CONSTEXPR20
+    typename std::enable_if<is_supported_float_type<T>::value, T>::type
+    integer_times_pow10(uint64_t mantissa, int decimal_exponent) noexcept {
+  T value;
+  if (clinger_fast_path_impl(mantissa, decimal_exponent, false, value))
+    return value;
+
+  adjusted_mantissa am =
+      compute_float<binary_format<T>>(decimal_exponent, mantissa);
+  to_float(false, am, value);
+  return value;
+}
+
+template <typename T>
+FASTFLOAT_CONSTEXPR20
+    typename std::enable_if<is_supported_float_type<T>::value, T>::type
+    integer_times_pow10(int64_t mantissa, int decimal_exponent) noexcept {
+  const bool is_negative = mantissa < 0;
+  const uint64_t m = static_cast<uint64_t>(is_negative ? -mantissa : mantissa);
+
+  T value;
+  if (clinger_fast_path_impl(m, decimal_exponent, is_negative, value))
+    return value;
+
+  adjusted_mantissa am = compute_float<binary_format<T>>(decimal_exponent, m);
+  to_float(is_negative, am, value);
+  return value;
+}
+
+FASTFLOAT_CONSTEXPR20 inline double
+integer_times_pow10(uint64_t mantissa, int decimal_exponent) noexcept {
+  return integer_times_pow10<double>(mantissa, decimal_exponent);
+}
+
+FASTFLOAT_CONSTEXPR20 inline double
+integer_times_pow10(int64_t mantissa, int decimal_exponent) noexcept {
+  return integer_times_pow10<double>(mantissa, decimal_exponent);
+}
+
+// the following overloads are here to avoid surprising ambiguity for int,
+// unsigned, etc.
+template <typename T, typename Int>
+FASTFLOAT_CONSTEXPR20
+    typename std::enable_if<is_supported_float_type<T>::value &&
+                                std::is_integral<Int>::value &&
+                                !std::is_signed<Int>::value,
+                            T>::type
+    integer_times_pow10(Int mantissa, int decimal_exponent) noexcept {
+  return integer_times_pow10<T>(static_cast<uint64_t>(mantissa),
+                                decimal_exponent);
+}
+
+template <typename T, typename Int>
+FASTFLOAT_CONSTEXPR20
+    typename std::enable_if<is_supported_float_type<T>::value &&
+                                std::is_integral<Int>::value &&
+                                std::is_signed<Int>::value,
+                            T>::type
+    integer_times_pow10(Int mantissa, int decimal_exponent) noexcept {
+  return integer_times_pow10<T>(static_cast<int64_t>(mantissa),
+                                decimal_exponent);
+}
+
+template <typename Int>
+FASTFLOAT_CONSTEXPR20 typename std::enable_if<
+    std::is_integral<Int>::value && !std::is_signed<Int>::value, double>::type
+integer_times_pow10(Int mantissa, int decimal_exponent) noexcept {
+  return integer_times_pow10(static_cast<uint64_t>(mantissa), decimal_exponent);
+}
+
+template <typename Int>
+FASTFLOAT_CONSTEXPR20 typename std::enable_if<
+    std::is_integral<Int>::value && std::is_signed<Int>::value, double>::type
+integer_times_pow10(Int mantissa, int decimal_exponent) noexcept {
+  return integer_times_pow10(static_cast<int64_t>(mantissa), decimal_exponent);
+}
+
+template <typename T, typename UC>
+FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
+from_chars_int_advanced(UC const *first, UC const *last, T &value,
+                        parse_options_t<UC> options) noexcept {
+
+  static_assert(is_supported_integer_type<T>::value,
+                "only integer types are supported");
+  static_assert(is_supported_char_type<UC>::value,
+                "only char, wchar_t, char16_t and char32_t are supported");
+
+  chars_format const fmt = detail::adjust_for_feature_macros(options.format);
+  int const base = options.base;
+
+  from_chars_result_t<UC> answer;
+  if (uint64_t(fmt & chars_format::skip_white_space)) {
+    while ((first != last) && fast_float::is_space(*first)) {
+      first++;
+    }
+  }
+  if (first == last || base < 2 || base > 36) {
+    answer.ec = std::errc::invalid_argument;
+    answer.ptr = first;
+    return answer;
+  }
+
+  return parse_int_string(first, last, value, options);
+}
+
+template <size_t TypeIx> struct from_chars_advanced_caller {
+  static_assert(TypeIx > 0, "unsupported type");
+};
+
+template <> struct from_chars_advanced_caller<1> {
+  template <typename T, typename UC>
+  FASTFLOAT_CONSTEXPR20 static from_chars_result_t<UC>
+  call(UC const *first, UC const *last, T &value,
+       parse_options_t<UC> options) noexcept {
+    return from_chars_float_advanced(first, last, value, options);
+  }
+};
+
+template <> struct from_chars_advanced_caller<2> {
+  template <typename T, typename UC>
+  FASTFLOAT_CONSTEXPR20 static from_chars_result_t<UC>
+  call(UC const *first, UC const *last, T &value,
+       parse_options_t<UC> options) noexcept {
+    return from_chars_int_advanced(first, last, value, options);
+  }
+};
+
+template <typename T, typename UC>
+FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
+from_chars_advanced(UC const *first, UC const *last, T &value,
+                    parse_options_t<UC> options) noexcept {
+  return from_chars_advanced_caller<
+      size_t(is_supported_float_type<T>::value) +
+      2 * size_t(is_supported_integer_type<T>::value)>::call(first, last, value,
+                                                             options);
+}
+
+} // namespace fast_float
+
+#endif
+
diff --git a/symmetry/gemmi/unitcell.hpp b/gemmi_gph/gemmi/unitcell.hpp
similarity index 100%
rename from symmetry/gemmi/unitcell.hpp
rename to gemmi_gph/gemmi/unitcell.hpp
diff --git a/gemmi_gph/gemmi/util.hpp b/gemmi_gph/gemmi/util.hpp
new file mode 100644
index 00000000..9d4e0a12
--- /dev/null
+++ b/gemmi_gph/gemmi/util.hpp
@@ -0,0 +1,315 @@
+// Copyright 2017 Global Phasing Ltd.
+//
+// Utilities. Mostly for working with strings and vectors.
+
+#ifndef GEMMI_UTIL_HPP_
+#define GEMMI_UTIL_HPP_
+
+#include <cassert>
+#include <cctype>     // for isspace
+#include <cstring>    // for strncmp
+#include <algorithm>  // for equal, find, remove_if
+#include <iterator>   // for begin, end, make_move_iterator
+#include <string>
+#include <vector>
+
+namespace gemmi {
+
+//   #####   string helpers   #####
+
+inline void append_to_str(std::string& out, int v) { out += std::to_string(v); }
+inline void append_to_str(std::string& out, size_t v) { out += std::to_string(v); }
+void append_to_str(std::string& out, double) = delete;
+template<typename T>
+void append_to_str(std::string& out, const T& v) { out += v; }
+
+inline void cat_to(std::string&) {}
+template <typename T, typename... Args>
+void cat_to(std::string& out, const T& value, Args const&... args) {
+  append_to_str(out, value);
+  cat_to(out, args...);
+}
+template <class... Args>
+std::string cat(Args const&... args) {
+  std::string out;
+  cat_to(out, args...);
+  return out;
+}
+
+inline bool starts_with(const std::string& str, const std::string& prefix) {
+  size_t sl = prefix.length();
+  return str.length() >= sl && str.compare(0, sl, prefix) == 0;
+}
+
+template<size_t N> bool starts_with(const char* a, const char (&b)[N]) {
+  return std::strncmp(a, b, N-1) == 0;
+}
+
+inline bool ends_with(const std::string& str, const std::string& suffix) {
+  size_t sl = suffix.length();
+  return str.length() >= sl && str.compare(str.length() - sl, sl, suffix) == 0;
+}
+
+// can be faster than std::tolower() b/c it takes char not int
+inline char lower(char c) {
+  if (c >= 'A' && c <= 'Z')
+    return c | 0x20;
+  return c;
+}
+
+// works as expected only for a-zA-Z
+inline char alpha_up(char c) { return c & ~0x20; }
+
+inline std::string to_lower(std::string str) {
+  for (char& c : str)
+    if (c >= 'A' && c <= 'Z')
+      c |= 0x20;
+  return str;
+}
+
+inline std::string to_upper(std::string str) {
+  for (char& c : str)
+    if (c >= 'a' && c <= 'z')
+      c &= ~0x20;
+  return str;
+}
+
+// case-insensitive character comparison
+inline bool isame(char a, char b) {
+  return a == b || ((a^b) == 0x20 && (a|0x20) >= 'a' && (a|0x20) <= 'z');
+}
+
+// Case-insensitive comparisons. The second arg must be lowercase.
+
+inline bool iequal_from(const std::string& str, size_t offset, const std::string& low) {
+  return str.length() == low.length() + offset &&
+         std::equal(std::begin(low), std::end(low), str.begin() + offset,
+                    [](char c1, char c2) { return c1 == lower(c2); });
+}
+
+inline bool iequal(const std::string& str, const std::string& low) {
+  return iequal_from(str, 0, low);
+}
+
+inline bool istarts_with(const std::string& str, const std::string& prefix) {
+  return str.length() >= prefix.length() &&
+         std::equal(std::begin(prefix), std::end(prefix), str.begin(),
+                    [](char c1, char c2) { return c1 == lower(c2); });
+}
+inline bool iends_with(const std::string& str, const std::string& suffix) {
+  size_t sl = suffix.length();
+  return str.length() >= sl &&
+         std::equal(std::begin(suffix), std::end(suffix), str.end() - sl,
+                    [](char c1, char c2) { return c1 == lower(c2); });
+}
+
+inline bool giends_with(const std::string& str, const std::string& suffix) {
+  return iends_with(str, suffix) || iends_with(str, suffix + ".gz");
+}
+
+inline std::string trim_str(const std::string& str) {
+  const std::string ws = " \r\n\t";
+  std::string::size_type first = str.find_first_not_of(ws);
+  if (first == std::string::npos)
+    return std::string{};
+  std::string::size_type last = str.find_last_not_of(ws);
+  return str.substr(first, last - first + 1);
+}
+
+inline std::string rtrim_str(const std::string& str) {
+  std::string::size_type last = str.find_last_not_of(" \r\n\t");
+  return str.substr(0, last == std::string::npos ? 0 : last + 1);
+}
+
+// end is after the last character of the string (typically \0)
+inline const char* rtrim_cstr(const char* start, const char* end=nullptr) {
+  if (!start)
+    return nullptr;
+  if (!end) {
+    end = start;
+    while (*end != '\0')
+      ++end;
+  }
+  while (end > start && std::isspace(end[-1]))
+    --end;
+  return end;
+}
+
+namespace impl {
+inline size_t length(char) { return 1; }
+inline size_t length(const std::string& s) { return s.length(); }
+}
+
+// takes a single separator (usually char or string);
+// may return empty fields
+template<typename S>
+void split_str_into(const std::string& str, S sep,
+                    std::vector<std::string>& result) {
+  std::size_t start = 0, end;
+  while ((end = str.find(sep, start)) != std::string::npos) {
+    result.emplace_back(str, start, end - start);
+    start = end + impl::length(sep);
+  }
+  result.emplace_back(str, start);
+}
+
+template<typename S>
+std::vector<std::string> split_str(const std::string& str, S sep) {
+  std::vector<std::string> result;
+  split_str_into(str, sep, result);
+  return result;
+}
+
+// _multi variants takes multiple 1-char separators as a string;
+// discards empty fields
+inline void split_str_into_multi(const std::string& str, const char* seps,
+                                 std::vector<std::string>& result) {
+  std::size_t start = str.find_first_not_of(seps);
+  while (start != std::string::npos) {
+    std::size_t end = str.find_first_of(seps, start);
+    result.emplace_back(str, start, end - start);
+    start = str.find_first_not_of(seps, end);
+  }
+}
+
+inline std::vector<std::string> split_str_multi(const std::string& str,
+                                                const char* seps=" \t") {
+  std::vector<std::string> result;
+  split_str_into_multi(str, seps, result);
+  return result;
+}
+
+template<typename T, typename S, typename F>
+std::string join_str(T begin, T end, const S& sep, const F& getter) {
+  std::string r;
+  bool first = true;
+  for (T i = begin; i != end; ++i) {
+    if (!first)
+      r += sep;
+    r += getter(*i);
+    first = false;
+  }
+  return r;
+}
+
+template<typename T, typename S>
+std::string join_str(T begin, T end, const S& sep) {
+  return join_str(begin, end, sep, [](const std::string& t) { return t; });
+}
+
+template<typename T, typename S, typename F>
+std::string join_str(const T& iterable, const S& sep, const F& getter) {
+  return join_str(iterable.begin(), iterable.end(), sep, getter);
+}
+
+template<typename T, typename S>
+std::string join_str(const T& iterable, const S& sep) {
+  return join_str(iterable.begin(), iterable.end(), sep);
+}
+
+template<typename T, typename S>
+void string_append_sep(std::string& str, S sep, const T& item) {
+  if (!str.empty())
+    str += sep;
+  str += item;
+}
+
+inline void replace_all(std::string &s,
+                        const std::string &old, const std::string &new_) {
+  std::string::size_type pos = 0;
+  while ((pos = s.find(old, pos)) != std::string::npos) {
+    s.replace(pos, old.size(), new_);
+    pos += new_.size();
+  }
+}
+
+// list is a comma separated string
+inline bool is_in_list(const std::string& name, const std::string& list,
+                       char sep=',') {
+  if (name.length() >= list.length())
+    return name == list;
+  for (size_t start=0, end=0; end != std::string::npos; start=end+1) {
+    end = list.find(sep, start);
+    if (list.compare(start, end - start, name) == 0)
+      return true;
+  }
+  return false;
+}
+
+//   #####   vector helpers   #####
+
+template <class T>
+bool in_vector(const T& x, const std::vector<T>& v) {
+  return std::find(v.begin(), v.end(), x) != v.end();
+}
+
+template <typename F, typename T>
+bool in_vector_f(F f, const std::vector<T>& v) {
+  return std::find_if(v.begin(), v.end(), f) != v.end();
+}
+
+template <class T>
+T* vector_end_ptr(std::vector<T>& v) { return v.data() + v.size(); }
+template <class T>
+const T* vector_end_ptr(const std::vector<T>& v) { return v.data() + v.size(); }
+
+template <class T>
+void vector_move_extend(std::vector<T>& dst, std::vector<T>&& src) {
+  if (dst.empty())
+    dst = std::move(src);
+  else
+    dst.insert(dst.end(), std::make_move_iterator(src.begin()),
+                          std::make_move_iterator(src.end()));
+}
+
+// wrapper around the erase-remove idiom
+template <class T, typename F>
+void vector_remove_if(std::vector<T>& v, F&& condition) {
+  v.erase(std::remove_if(v.begin(), v.end(), condition), v.end());
+}
+
+/// \par data - 2d array (old_width x length) in a vector
+/// Insert \par n new columns at position pos.
+template <class T>
+void vector_insert_columns(std::vector<T>& data, size_t old_width,
+                           size_t length, size_t n, size_t pos, const T& new_value) {
+  assert(data.size() == old_width * length);
+  assert(pos <= old_width);
+  data.resize(data.size() + n * length);
+  typename std::vector<T>::iterator dst = data.end();
+  for (size_t i = length; i-- != 0; ) {
+    for (size_t j = old_width; j-- != pos; )
+      *--dst = data[i * old_width + j];
+    for (size_t j = n; j-- != 0; )
+      *--dst = new_value;
+    for (size_t j = pos; j-- != 0; )
+      *--dst = data[i * old_width + j];
+  }
+  assert(dst == data.begin());
+}
+/// \par data - 2d array with new_width+1 columns, in a vector
+/// Remove column at position pos.
+template <class T>
+void vector_remove_column(std::vector<T>& data, size_t new_width, size_t pos) {
+  assert(pos <= new_width);
+  for (size_t source = pos + 1; source < data.size(); ++source)
+    for (size_t i = 0; i < new_width && source < data.size(); ++i)
+      data[pos++] = data[source++];
+  data.resize(pos);
+}
+
+
+//   #####   other helpers   #####
+
+// Numeric ID used for case-insensitive comparison of 4 letters.
+// s must have 4 chars or 3 chars + NUL, ' ' and NUL are equivalent in s.
+constexpr int ialpha4_id(const char* s) {
+  return (s[0] << 24 | s[1] << 16 | s[2] << 8 | s[3]) & ~0x20202020;
+}
+// Numeric ID used for case-insensitive comparison of 3 letters.
+constexpr int ialpha3_id(const char* s) {
+  return (s[0] << 16 | s[1] << 8 | s[2]) & ~0x20202020;
+}
+
+} // namespace gemmi
+#endif
diff --git a/gemmi_gph/gemmi/xds_ascii.hpp b/gemmi_gph/gemmi/xds_ascii.hpp
new file mode 100644
index 00000000..38b7a93f
--- /dev/null
+++ b/gemmi_gph/gemmi/xds_ascii.hpp
@@ -0,0 +1,183 @@
+// Copyright 2020 Global Phasing Ltd.
+//
+// Read XDS files: XDS_ASCII.HKL and INTEGRATE.HKL.
+
+#ifndef GEMMI_XDS_ASCII_HPP_
+#define GEMMI_XDS_ASCII_HPP_
+
+#include "input.hpp"     // for AnyStream, FileStream
+#include "unitcell.hpp"  // for UnitCell
+#include "util.hpp"      // for starts_with
+
+namespace gemmi {
+
+// from Pointless docs: likely in-house source, in which case
+// the unpolarised value is left unchanged (recognised wavelengths
+// are CuKalpha 1.5418 +- 0.0019, Mo 0.7107 +- 0.0002, Cr 2.29 +- 0.01)
+inline bool likely_in_house_source(double wavelength) {
+  return std::fabs(wavelength - 1.5418) < 0.0019 ||
+         std::fabs(wavelength - 0.7107) < 0.0002 ||
+         std::fabs(wavelength - 2.29) < 0.01;
+}
+
+struct XdsAsciiMetadata {
+  struct Iset {
+    int id;
+    std::string input_file;
+    double wavelength = 0.;
+    std::array<double,6> cell_constants = {0., 0., 0., 0., 0., 0.};
+    //statistics set by gather_iset_statistics()
+    int frame_number_min = -1;
+    int frame_number_max = -1;
+    int frame_count = -1;
+    int reflection_count = -1;
+
+    Iset(int id_) : id(id_) {}
+  };
+  std::string source_path;
+  int read_columns = 0;  // doesn't include ITEM_ISET from XSCALE
+  int spacegroup_number = 0;
+  double wavelength = 0.;
+  std::array<double,6> cell_constants = {0., 0., 0., 0., 0., 0.};
+  Mat33 cell_axes{0.};
+  Vec3 incident_beam_dir;
+  double oscillation_range = 0.;
+  Vec3 rotation_axis;
+  double starting_angle = 0.;
+  double reflecting_range_esd = 0.;
+  char friedels_law = '\0';
+  int starting_frame = 1;
+  int nx = 0;  // detector size - number of pixels
+  int ny = 0;
+  double qx = 0.;  // pixel size in mm
+  double qy = 0.;
+  double orgx = 0.;
+  double orgy = 0.;
+  double detector_distance = 0.;
+  std::string generated_by;
+  std::string version_str;
+  std::vector<Iset> isets;
+};
+
+struct GEMMI_DLL XdsAscii : XdsAsciiMetadata {
+  struct Refl {
+    Miller hkl;
+    int iset = 1;
+    double iobs;
+    double sigma;
+    double xd;
+    double yd;
+    double zd;
+    double rlp;
+    double peak;
+    double corr;  // is it always integer?
+    double maxc;
+
+    // ZD can be negative for a few reflections
+    int frame() const { return (int) std::floor(zd + 1); }
+  };
+  std::vector<Refl> data;
+
+  XdsAscii() = default;
+  XdsAscii(const XdsAsciiMetadata& m) : XdsAsciiMetadata(m) {}
+
+  Iset& find_or_add_iset(int id) {
+    for (Iset& i : isets)
+      if (i.id == id)
+        return i;
+    isets.emplace_back(id);
+    return isets.back();
+  }
+  void read_stream(AnyStream& reader, const std::string& source);
+
+  template<typename T>
+  void read_input(T&& input) {
+    read_stream(*input.create_stream(), input.path());
+  }
+
+  bool is_merged() const { return read_columns < 8; }
+
+  // set a few Iset properties in isets
+  void gather_iset_statistics();
+
+  double rot_angle(const Refl& refl) const {
+    double z = refl.zd - starting_frame + 1;
+    return starting_angle + oscillation_range * z;
+  }
+
+  // it's already normalized, but just in case normalize it again
+  Vec3 get_rotation_axis() const {
+    double length = rotation_axis.length();
+    if (length == 0)
+      fail("unknown rotation axis");
+    return rotation_axis / length;
+  }
+
+  // I'm not sure if always |incident_beam_dir| == 1/wavelength
+  Vec3 get_s0_direction() const {
+    double length = incident_beam_dir.length();
+    if (length == 0)
+      fail("unknown incident beam direction");
+    return incident_beam_dir / length;
+  }
+
+  bool has_cell_axes() const {
+    for (int i = 0; i < 3; ++i)
+      if (cell_axes[i][0] == 0 && cell_axes[i][1] == 0 && cell_axes[i][2] == 0)
+        return false;
+    return true;
+  }
+
+  /// Return transition matrix from "Cambridge" frame to XDS frame.
+  /// x_xds = M x_cam
+  Mat33 calculate_conversion_from_cambridge() const {
+    // Cambridge z direction is along the principal rotation axis
+    Vec3 z = get_rotation_axis();
+    // Cambridge z direction is along beam
+    Vec3 x = get_s0_direction();
+    Vec3 y = z.cross(x).normalized();
+    // beam and rotation axis may not be orthogonal
+    x = y.cross(z).normalized();
+    return Mat33::from_columns(x, y, z);
+  }
+
+  Mat33 get_orientation() const {
+    if (!has_cell_axes())
+      fail("unknown unit cell axes");
+    Vec3 a = cell_axes.row_copy(0);
+    Vec3 b = cell_axes.row_copy(1);
+    Vec3 c = cell_axes.row_copy(2);
+    Vec3 ar = b.cross(c).normalized();
+    Vec3 br = c.cross(a);
+    Vec3 cr = ar.cross(br).normalized();
+    br = cr.cross(ar);
+    return Mat33::from_columns(ar, br, cr);
+  }
+
+  /// \par p is degree of polarization from range (0,1), as used in XDS.
+  void apply_polarization_correction(double p, Vec3 normal);
+
+  /// \par overload is maximally allowed pixel value in a peak (MAXC).
+  void eliminate_overloads(double overload) {
+    vector_remove_if(data, [&](Refl& r) { return r.maxc > overload; });
+  }
+
+  /// \par batchmin lowest allowed batch number.
+  void eliminate_batchmin(int batchmin) {
+    double minz = batchmin - 1;
+    vector_remove_if(data, [&](Refl& r) { return r.zd < minz; });
+  }
+};
+
+inline XdsAscii read_xds_ascii_file(const std::string& path) {
+  XdsAscii ret;
+  FileStream stream(path.c_str(), "rb");
+  ret.read_stream(stream, path);
+  return ret;
+}
+
+/// read possibly gzipped file
+GEMMI_DLL XdsAscii read_xds_ascii(const std::string& path);
+
+} // namespace gemmi
+#endif
diff --git a/gemmi_gph/gz.cpp b/gemmi_gph/gz.cpp
new file mode 100644
index 00000000..d5e4123e
--- /dev/null
+++ b/gemmi_gph/gz.cpp
@@ -0,0 +1,189 @@
+// Copyright Global Phasing Ltd.
+
+#include <gemmi/gz.hpp>
+#include <cassert>
+#include <cstdio>       // fseek, ftell, fread
+#include <climits>      // INT_MAX
+#if USE_ZLIB_NG
+# define WITH_GZFILEOP 1
+# include <zlib-ng.h>
+# define GG(name) zng_ ## name
+#else
+# include <zlib.h>
+# define GG(name) name
+#endif
+#include <gemmi/fileutil.hpp> // file_open
+
+namespace gemmi {
+
+const char* const zlib_description =
+#if USE_ZLIB_NG
+  "zlib-ng " ZLIBNG_VERSION;
+#else
+  "zlib " ZLIB_VERSION;
+#endif
+
+// Throws if the size is not found or if it is suspicious.
+// Anything outside of the arbitrary limits from 1 to 10x of the compressed
+// size looks suspicious to us.
+// **This function should not be relied upon.**
+// In particular, if the return values is >= 4GiB - it's only a guess.
+size_t estimate_uncompressed_size(const std::string& path) {
+  fileptr_t f = file_open(path.c_str(), "rb");
+  unsigned char buf[4];
+  if (std::fread(buf, 1, 2, f.get()) != 2)
+    sys_fail("Failed to read: " + path);
+  if (buf[0] != 0x1f || buf[1] != 0x8b)
+    fail("File not in the gzip format: " + path);
+  if (std::fseek(f.get(), -4, SEEK_END) != 0)
+    sys_fail("fseek() failed (empty file?): " + path);
+  long pos = std::ftell(f.get());
+  if (pos <= 0)
+    sys_fail("ftell() failed on " + path);
+  size_t gzipped_size = pos + 4;
+  if (std::fread(buf, 1, 4, f.get()) != 4)
+    sys_fail("Failed to read last 4 bytes of: " + path);
+  unsigned orig_size = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0];
+  if (orig_size + 100 < gzipped_size || orig_size > 100 * gzipped_size) {
+    // The size is stored as 32-bit number. If the original size exceeds 4GiB,
+    // the stored number is modulo 4 GiB. So we just guess...
+    constexpr size_t max_uint = 4294967295U;
+    if (gzipped_size > max_uint / 6)
+      return max_uint + (sizeof(size_t) > 4 ? orig_size : 0);
+    fail("Cannot determine uncompressed size of " + path +
+         "\nWould it be " + std::to_string(gzipped_size) + " -> " +
+         std::to_string(orig_size) + " bytes?");
+  }
+  return orig_size;
+}
+
+static size_t big_gzread(gzFile file, void* buf, size_t len) {
+#if USE_ZLIB_NG
+  return GG(gzfread)(buf, 1, len, file);
+#else
+  // In zlib >= 1.2.9 we could use gzfread()
+  size_t read_bytes = 0;
+  while (len > INT_MAX) {
+    int ret = gzread(file, buf, INT_MAX);
+    read_bytes += ret;
+    if (ret != INT_MAX)
+      return read_bytes;
+    len -= INT_MAX;
+    buf = (char*) buf + INT_MAX;
+  }
+  read_bytes += gzread(file, buf, (unsigned) len);
+  return read_bytes;
+#endif
+}
+
+char* GzStream::gets(char* line, int size) {
+  return GG(gzgets)((gzFile)f, line, size);
+}
+
+int GzStream::getc() {
+  return GG(gzgetc)((gzFile)f);
+}
+
+bool GzStream::read(void* buf, size_t len) {
+  return big_gzread((gzFile)f, buf, len) == len;
+}
+
+bool GzStream::skip(size_t n) {
+  return GG(gzseek)((gzFile)f, n, SEEK_CUR) != -1;
+}
+
+long GzStream::tell() {
+  return GG(gztell)((gzFile)f);
+}
+
+std::string GzStream::read_rest() {
+    std::string retval;
+    int c = getc();
+    if (c != EOF) {
+      retval += (char)c;
+      char buf[512];
+      for (;;) {
+        size_t n = big_gzread((gzFile)f, buf,  sizeof(buf));
+        retval.append(buf, n);
+        if (n != sizeof(buf))
+          break;
+      }
+    }
+    return retval;
+}
+
+
+MaybeGzipped::MaybeGzipped(const std::string& path) : BasicInput(path) {}
+
+MaybeGzipped::~MaybeGzipped() {
+  if (file_)
+#if USE_ZLIB_NG || (ZLIB_VERNUM >= 0x1235)
+    GG(gzclose_r)((gzFile)file_);
+#else
+    gzclose((gzFile)file_);
+#endif
+}
+
+size_t MaybeGzipped::gzread_checked(void* buf, size_t len) {
+  gzFile file = (gzFile) file_;
+  size_t read_bytes = big_gzread(file, buf, len);
+  if (read_bytes != len && !GG(gzeof)(file)) {
+    int errnum = 0;
+    std::string err_str = GG(gzerror)(file, &errnum);
+    if (errnum == Z_ERRNO)
+      sys_fail("failed to read " + path());
+    if (errnum)
+      fail("Error reading " + path() + ": " + err_str);
+  }
+  if (read_bytes > len)  // should never happen
+    fail("Error reading " + path());
+  return read_bytes;
+}
+
+CharArray MaybeGzipped::uncompress_into_buffer(size_t limit) {
+  if (!is_compressed())
+    return BasicInput::uncompress_into_buffer();
+  size_t size = (limit == 0 ? estimate_uncompressed_size(path()) : limit);
+  file_ = GG(gzopen)(path().c_str(), "rb");
+  if (!file_)
+    sys_fail("Failed to gzopen " + path());
+  if (size > 3221225471)
+    // if this exception is changed adjust prog/cif2mtz.cpp
+    fail("For now gz files above 3 GiB uncompressed are not supported.\n"
+         "To read " + path() + " first uncompress it.");
+  CharArray mem(size);
+  size_t read_bytes = gzread_checked(mem.data(), size);
+  // if the file is shorter than the size from header, adjust size
+  if (read_bytes < size) {
+    mem.set_size(read_bytes);  // should we call resize() here
+  } else if (limit == 0) { // read_bytes == size
+  // if the file is longer than the size from header, read in the rest
+    int next_char;
+    while (!GG(gzeof)((gzFile)file_) && (next_char = GG(gzgetc)((gzFile)file_)) != -1) {
+      if (mem.size() > 3221225471)
+        fail("For now gz files above 3 GiB uncompressed are not supported.\n"
+             "To read " + path() + " first uncompress it.");
+      GG(gzungetc)(next_char, (gzFile)file_);
+      size_t old_size = mem.size();
+      mem.resize(2 * old_size);
+      size_t n = gzread_checked(mem.data() + old_size, old_size);
+      mem.set_size(old_size + n);
+    }
+  }
+  return mem;
+}
+
+std::unique_ptr<AnyStream> MaybeGzipped::create_stream() {
+  if (is_compressed()) {
+    file_ = GG(gzopen)(path().c_str(), "rb");
+    if (!file_)
+      sys_fail("Failed to gzopen " + path());
+#if ZLIB_VERNUM >= 0x1235
+    GG(gzbuffer)((gzFile)file_, 64*1024);
+#endif
+    return std::unique_ptr<AnyStream>(new GzStream(file_));
+  }
+  return BasicInput::create_stream();
+}
+
+} // namespace gemmi
diff --git a/gemmi_gph/mtz.cpp b/gemmi_gph/mtz.cpp
new file mode 100644
index 00000000..04fd5bc8
--- /dev/null
+++ b/gemmi_gph/mtz.cpp
@@ -0,0 +1,991 @@
+// Copyright 2019-2023 Global Phasing Ltd.
+
+#include <gemmi/mtz.hpp>
+#include <cstring>            // for memcpy
+#include <algorithm>          // for stable_sort
+#include <gemmi/atof.hpp>     // for fast_atof
+#include <gemmi/atox.hpp>     // for simple_atoi, read_word
+#include <gemmi/gz.hpp>
+#include <gemmi/sprintf.hpp>
+
+namespace gemmi {
+
+namespace {
+
+double wrap_degrees(double phi) {
+  if (phi >= 0 && phi < 360.)
+    return phi;
+  return phi - std::floor(phi / 360.) * 360.;
+}
+
+void shift_phase(float& phi, double shift, bool negate=false) {
+  double phi_ = phi + deg(shift);
+  phi = float(wrap_degrees(negate ? -phi_ : phi_));
+}
+
+// apply phase shift to Hendrickson–Lattman coefficients HLA, HLB, HLC and HLD
+void shift_hl_coefficients(float& a, float& b, float& c, float& d,
+                           double shift, bool negate=false) {
+  double sinx = std::sin(shift);
+  double cosx = std::cos(shift);
+  double sin2x = 2 * sinx * cosx;
+  double cos2x = sq(cosx)- sq(sinx);
+  // a sin(x+y) + b cos(x+y) = a sin(x) cos(y) - b sin(x) sin(y)
+  //                         + a cos(x) sin(y) + b cos(x) cos(y)
+  float a_ = float(a * cosx - b * sinx);
+  float b_ = float(a * sinx + b * cosx);
+  float c_ = float(c * cos2x - d * sin2x);
+  float d_ = float(c * sin2x + d * cos2x);
+  a = a_;                 // cos(phi)
+  b = negate ? -b_ : b_;  // sin(phi)
+  c = c_;                 // cos(2 phi)
+  d = negate ? -d_ : d_;  // sin(2 phi)
+}
+
+// this function is generic because it was used in other places in the past
+template <typename T, typename FP=typename std::iterator_traits<T>::value_type>
+std::array<FP,2> calculate_min_max_disregarding_nans(T begin, T end) {
+  std::array<FP,2> minmax = {{NAN, NAN}};
+  T i = begin;
+  while (i != end && std::isnan(*i))
+    ++i;
+  if (i != end) {
+    minmax[0] = minmax[1] = *i;
+    while (++i != end) {
+      if (*i < minmax[0])
+        minmax[0] = *i;
+      else if (*i > minmax[1])
+        minmax[1] = *i;
+    }
+  }
+  return minmax;
+}
+
+const char* skip_word_and_space(const char* line) {
+  while (*line != '\0' && !std::isspace(*line))
+    ++line;
+  while (std::isspace(*line))
+    ++line;
+  return line;
+}
+
+UnitCell read_cell_parameters(const char* line) {
+  double a = fast_atof(line, &line);
+  double b = fast_atof(line, &line);
+  double c = fast_atof(line, &line);
+  double alpha = fast_atof(line, &line);
+  double beta = fast_atof(line, &line);
+  double gamma = fast_atof(line, &line);
+  return UnitCell(a, b, c, alpha, beta, gamma);
+}
+
+} // anonymous namespace
+
+UnitCellParameters Mtz::get_average_cell_from_batch_headers(double* rmsd) const {
+  if (rmsd)
+    for (int i = 0; i < 6; ++i)
+      rmsd[i] = 0.;
+  std::array<double, 6> avg = {0., 0., 0., 0., 0., 0.};
+  for (const Batch& batch : batches)
+    for (int i = 0; i < 6; ++i) {
+      // if batch headers are not set correctly, return global cell
+      if (batch.floats[i] <= 0)
+        return cell;
+      avg[i] += batch.floats[i];
+    }
+  if (avg[0] <= 0 || avg[1] <= 0 || avg[2] <= 0 ||
+      avg[3] <= 0 || avg[4] <= 0 || avg[5] <= 0)
+    return UnitCellParameters();
+  size_t n = batches.size();
+  for (int i = 0; i < 6; ++i)
+    avg[i] /= n;
+  if (rmsd) {
+    for (const Batch& batch : batches)
+      for (int i = 0; i < 6; ++i)
+        rmsd[i] += sq(avg[i] - batch.floats[i]);
+    for (int i = 0; i < 6; ++i)
+      rmsd[i] = std::sqrt(rmsd[i] / n);
+  }
+  // If average parameters are almost equal to the global cell, use the latter
+  // to avoid 32-bit precision artifacts (58.28 -> 58.279998).
+  if (UnitCellParameters(avg).approx(cell, 1e-4))
+    return cell;
+  return UnitCellParameters(avg);
+}
+
+std::array<double,2> Mtz::calculate_min_max_1_d2() const {
+  auto extend_min_max_1_d2 = [&](const UnitCell& uc, double& min, double& max) {
+    for (size_t i = 0; i < data.size(); i += columns.size()) {
+      double res = uc.calculate_1_d2_double(data[i+0], data[i+1], data[i+2]);
+      if (res < min)
+        min = res;
+      if (res > max)
+        max = res;
+    }
+  };
+  if (!has_data() || columns.size() < 3)
+    fail("No data.");
+  double min_value = INFINITY;
+  double max_value = 0.;
+  if (cell.is_crystal() && cell.a > 0)
+    extend_min_max_1_d2(cell, min_value, max_value);
+  const UnitCell* prev_cell = nullptr;
+  for (const Dataset& ds : datasets)
+    if (ds.cell.is_crystal() && ds.cell.a > 0 && ds.cell != cell &&
+        (!prev_cell || ds.cell != *prev_cell)) {
+      extend_min_max_1_d2(ds.cell, min_value, max_value);
+      prev_cell = &ds.cell;
+    }
+  if (min_value == INFINITY)
+    min_value = 0;
+  return {{min_value, max_value}};
+}
+
+void Mtz::read_first_bytes(AnyStream& stream) {
+  char buf[20] = {0};
+
+  if (!stream.read(buf, 20))
+    fail("Could not read the MTZ file (is it empty?)");
+  if (buf[0] != 'M' || buf[1] != 'T' || buf[2] != 'Z' || buf[3] != ' ')
+    fail("Not an MTZ file - it does not start with 'MTZ '");
+
+  // Bytes 9-12 have so-called machine stamp:
+  // "The first 4 half-bytes represent the real, complex, integer and
+  // character formats".
+  // We don't try to handle all the combinations here, only the two most
+  // common: big endian (for all types) and little endian (for all types).
+  // BE is denoted by 1 and LE by 4.
+  // If we get a value different than 1 and 4 we assume the native byte order.
+  if ((buf[9] & 0xf0) == (is_little_endian() ? 0x10 : 0x40))
+    toggle_endianness();
+
+  std::int32_t tmp_header_offset;
+  std::memcpy(&tmp_header_offset, buf + 4, 4);
+  if (!same_byte_order)
+    swap_four_bytes(&tmp_header_offset);
+
+  if (tmp_header_offset == -1) {
+    std::memcpy(&header_offset, buf + 12, 8);
+    if (!same_byte_order) {
+      swap_eight_bytes(&header_offset);
+    }
+  } else {
+    header_offset = (int64_t) tmp_header_offset;
+  }
+  stream.skip(60);
+}
+
+void Mtz::read_main_headers(AnyStream& stream, std::vector<std::string>* save_headers) {
+  char line[81] = {0};
+  std::ptrdiff_t header_pos = 4 * std::ptrdiff_t(header_offset - 1);
+  // temporary check
+  long cur_pos = stream.tell();
+  if (cur_pos != header_pos && cur_pos != -1)
+    fail(cat("wrong pos ", int(header_pos), "  ", int(stream.tell())));
+  int ncol = 0;
+  bool has_batch = false;
+  while (stream.read(line, 80)) {
+    if (save_headers)
+      save_headers->emplace_back(line, line+80);
+    if (ialpha3_id(line) == ialpha3_id("END"))
+      break;
+    const char* args = skip_word_and_space(line);
+    switch (ialpha4_id(line)) {
+      case ialpha4_id("VERS"):
+        version_stamp = rtrim_str(args);
+        break;
+      case ialpha4_id("TITL"):
+        title = rtrim_str(args);
+        break;
+      case ialpha4_id("NCOL"): {
+        ncol = simple_atoi(args, &args);
+        nreflections = simple_atoi(args, &args);
+        int nbatches = simple_atoi(args);
+        if (nbatches < 0 || nbatches > 10000000)  // sanity check
+          fail("Wrong NCOL header");
+        batches.resize(nbatches);
+        break;
+      }
+      case ialpha4_id("CELL"):
+        cell = read_cell_parameters(args);
+        break;
+      case ialpha4_id("SORT"):
+        for (int& n : sort_order)
+          n = simple_atoi(args, &args);
+        break;
+      case ialpha4_id("SYMI"): {
+        nsymop = simple_atoi(args, &args);
+        symops.reserve(nsymop);
+        simple_atoi(args, &args); // ignore number of primitive operations
+        args = skip_word_and_space(skip_blank(args)); // ignore lattice type
+        spacegroup_number = simple_atoi(args, &args);
+        args = skip_blank(args);
+        if (*args != '\'')
+          spacegroup_name = read_word(args);
+        else if (const char* end = std::strchr(++args, '\''))
+          spacegroup_name.assign(args, end);
+        // ignore point group which is at the end of args
+        break;
+      }
+      case ialpha4_id("SYMM"):
+        symops.push_back(parse_triplet(args));
+        break;
+      case ialpha4_id("RESO"):
+        min_1_d2 = fast_atof(args, &args);
+        max_1_d2 = fast_atof(args, &args);
+        break;
+      case ialpha4_id("VALM"):
+        if (*args != 'N') {
+          const char* endptr;
+          float v = (float) fast_atof(args, &endptr);
+          if (*endptr == '\0' || is_space(*endptr))
+            valm = v;
+          else
+            logger.note("Unexpected VALM value: " + rtrim_str(args));
+        }
+        break;
+      case ialpha4_id("COLU"): {
+        columns.emplace_back();
+        Column& col = columns.back();
+        col.label = read_word(args, &args);
+        col.type = read_word(args, &args)[0];
+        col.min_value = (float) fast_atof(args, &args);
+        col.max_value = (float) fast_atof(args, &args);
+        col.dataset_id = simple_atoi(args);
+        col.parent = this;
+        col.idx = columns.size() - 1;
+        break;
+      }
+      case ialpha4_id("COLS"):
+        // COLSRC is undocumented. CMTZ (libccp4) adds it after COLUMN:
+        // COLUMN IMEAN                          J       -300.600006              4619    1
+        // COLSRC IMEAN                          CREATED_07/08/2019_11:00:23              1
+        if (!columns.empty() && columns.back().label == read_word(args, &args))
+          columns.back().source = read_word(args);
+        else
+          logger.note("MTZ: COLSRC is not after matching COLUMN");
+        break;
+      case ialpha4_id("COLG"):
+        // Column group - not used.
+        break;
+      case ialpha4_id("NDIF"):
+        datasets.reserve(simple_atoi(args));
+        break;
+      case ialpha4_id("PROJ"):
+        datasets.emplace_back();
+        datasets.back().id = simple_atoi(args, &args);
+        datasets.back().project_name = read_word(skip_word_and_space(args));
+        datasets.back().wavelength = 0.0;
+        break;
+      case ialpha4_id("CRYS"):
+        if (simple_atoi(args, &args) == last_dataset().id)
+          datasets.back().crystal_name = read_word(args);
+        else
+          logger.note("MTZ CRYSTAL line: unusual numbering.");
+        break;
+      case ialpha4_id("DATA"):
+        if (simple_atoi(args, &args) == last_dataset().id)
+          datasets.back().dataset_name = read_word(args);
+        else
+          logger.note("MTZ DATASET line: unusual numbering.");
+        break;
+      case ialpha4_id("DCEL"):
+        if (simple_atoi(args, &args) == last_dataset().id)
+          datasets.back().cell = read_cell_parameters(args);
+        else
+          logger.note("MTZ DCELL line: unusual numbering.");
+        break;
+      // case("DRES"): not in use yet
+      case ialpha4_id("DWAV"):
+        if (simple_atoi(args, &args) == last_dataset().id)
+          datasets.back().wavelength = fast_atof(args);
+        else
+          logger.note("MTZ DWAV line: unusual numbering.");
+        break;
+      case ialpha4_id("BATCH"):
+        // We take number of batches from the NCOL record and serial numbers
+        // from BH. This header could be used only to check consistency.
+        has_batch = true;
+        break;
+      default:
+        logger.note("Unknown header: " + rtrim_str(line));
+    }
+  }
+  if (ncol != (int) columns.size())
+    fail("Number of COLU records inconsistent with NCOL record.");
+  if (has_batch != !batches.empty())
+    fail("BATCH header inconsistent with NCOL record.");
+  // adjust data size, if necessary
+  if (!data.empty()) {
+    size_t expected_size = columns.size() * nreflections;
+    if (data.size() > expected_size)
+      data.resize(expected_size);
+    else if (data.size() < expected_size)
+      fail("internal error, wrong data size");
+  }
+}
+
+void Mtz::read_history_and_batch_headers(AnyStream& stream) {
+  char buf[81] = {0};
+  int n_headers = 0;
+  while (stream.read(buf, 80) && ialpha4_id(buf) != ialpha4_id("MTZE")) {
+    if (n_headers != 0) {
+      const char* start = skip_blank(buf);
+      const char* end = rtrim_cstr(start, start+80);
+      history.emplace_back(start, end);
+      --n_headers;
+    } else if (ialpha4_id(buf) == ialpha4_id("MTZH")) {
+      n_headers = simple_atoi(skip_word_and_space(buf+4));
+      if (n_headers < 0 || n_headers > 30) {
+        logger.note("Wrong MTZ: number of headers should be between 0 and 30");
+        return;
+      }
+      history.reserve(n_headers);
+    } else if (ialpha4_id(buf) == ialpha4_id("MTZB")) {
+      for (Batch& batch : batches) {
+        stream.read(buf, 80);
+        if (ialpha3_id(buf) != ialpha3_id("BH "))
+          fail("Missing BH header");
+        const char* args = skip_blank(buf + 2);
+        batch.number = simple_atoi(args, &args);
+        int total_words = simple_atoi(args, &args);
+        int int_words = simple_atoi(args, &args);
+        int float_words = simple_atoi(args);
+        if (total_words != int_words + float_words || total_words > 1000)
+          fail("Wrong BH header");
+        stream.read(buf, 80); // TITLE
+        const char* end = rtrim_cstr(buf + 6, buf+76);
+        batch.title.assign(buf, end - buf);
+        batch.ints.resize(int_words);
+        stream.read(batch.ints.data(), int_words * 4);
+        batch.floats.resize(float_words);
+        stream.read(batch.floats.data(), float_words * 4);
+        stream.read(buf, 80);
+        if (ialpha4_id(buf) != ialpha4_id("BHCH"))
+          fail("Missing BHCH header");
+        split_str_into_multi(buf + 5, " \t", batch.axes);
+      }
+    }
+  }
+  appended_text = stream.read_rest();
+}
+
+void Mtz::setup_spacegroup() {
+  spacegroup = find_spacegroup_by_name(spacegroup_name, cell.alpha, cell.gamma);
+  if (!spacegroup) {
+    logger.note("MTZ: unrecognized spacegroup name: " + spacegroup_name);
+    return;
+  }
+  if (spacegroup->ccp4 != spacegroup_number)
+    logger.note("MTZ: inconsistent spacegroup name and number");
+  cell.set_cell_images_from_spacegroup(spacegroup);
+  for (Dataset& d : datasets)
+    d.cell.set_cell_images_from_spacegroup(spacegroup);
+}
+
+// we should be at byte 80
+void Mtz::read_raw_data(AnyStream& stream, bool do_read) {
+  size_t n = size_t(header_offset - 1 - 20);
+  if (!do_read) {
+    if (!stream.skip(4 * n))
+      fail("ignoring mtz data segment failed");
+    return;
+  }
+  data.resize(n);
+  if (!stream.read(data.data(), 4 * n))
+    fail("Error when reading MTZ data");
+  if (!same_byte_order)
+    for (float& f : data)
+      swap_four_bytes(&f);
+}
+
+void Mtz::read_stream(AnyStream& stream, bool with_data) {
+  read_first_bytes(stream);
+  // The older implementation of MTZ reading first read the headers,
+  // then the data. This required jumping to the headers at the end,
+  // then back to the beginning of the data (byte 80).
+  // The current implementation avoids calling seek(), allowing
+  // incremental reading of streams (stdin, gzipped files, etc).
+  read_raw_data(stream, with_data);
+  read_main_headers(stream, nullptr);
+  read_history_and_batch_headers(stream);
+  setup_spacegroup();
+  if (datasets.empty())
+    datasets.push_back({0, "HKL_base", "HKL_base", "HKL_base", cell, 0.});
+}
+
+// for probing/testing individual reflections, no need to optimize it
+size_t Mtz::find_offset_of_hkl(const Miller& hkl, size_t start) const {
+  if (!has_data() || columns.size() < 3)
+    fail("No data.");
+  if (start != 0)
+    start -= (start % columns.size());
+  for (size_t n = start; n + 2 < data.size(); n += columns.size())
+    if (get_hkl(n) == hkl)
+      return n;
+  return (size_t)-1;
+}
+
+void Mtz::ensure_asu(bool tnt_asu) {
+  if (!is_merged())
+    fail("Mtz::ensure_asu() is for merged MTZ only");
+  if (!spacegroup)
+    return;
+  GroupOps gops = spacegroup->operations();
+  ReciprocalAsu asu(spacegroup, tnt_asu);
+  std::vector<int> phase_columns = positions_of_columns_with_type('P');
+  std::vector<int> abcd_columns = positions_of_columns_with_type('A');
+  std::vector<int> dano_columns = positions_of_columns_with_type('D');
+  std::vector<std::pair<int,int>> plus_minus_columns = positions_of_plus_minus_columns();
+  bool no_special_columns = phase_columns.empty() && abcd_columns.empty() &&
+                            plus_minus_columns.empty() && dano_columns.empty();
+  bool centric = no_special_columns || gops.is_centrosymmetric();
+  for (size_t n = 0; n < data.size(); n += columns.size()) {
+    Miller hkl = get_hkl(n);
+    if (asu.is_in(hkl))
+      continue;
+    auto result = asu.to_asu(hkl, gops);
+    // cf. impl::move_to_asu() in asudata.hpp
+    set_hkl(n, result.first);
+    if (no_special_columns)
+      continue;
+    int isym = result.second;
+    if (!phase_columns.empty() || !abcd_columns.empty()) {
+      const Op& op = gops.sym_ops[(isym - 1) / 2];
+      double shift = op.phase_shift(hkl);
+      bool negate = (isym % 2 == 0);
+      for (int col : phase_columns)
+        shift_phase(data[n + col], shift, negate);
+      for (auto i = abcd_columns.begin(); i+3 < abcd_columns.end(); i += 4)
+        // we expect coefficients HLA, HLB, HLC and HLD - in this order
+        shift_hl_coefficients(data[n + *(i+0)], data[n + *(i+1)],
+                              data[n + *(i+2)], data[n + *(i+3)],
+                              shift, negate);
+    }
+    if (isym % 2 == 0 && !centric &&
+        // usually, centric reflections have empty F(-), so avoid swapping it
+        !gops.is_reflection_centric(hkl)) {
+      for (std::pair<int,int> cols : plus_minus_columns)
+        std::swap(data[n + cols.first], data[n + cols.second]);
+      for (int col : dano_columns)
+        data[n + col] = -data[n + col];
+    }
+  }
+}
+
+void Mtz::reindex(const Op& op) {
+  if (op.tran != Op::Tran{0, 0, 0})
+    gemmi::fail("reindexing operator must not have a translation");
+  if (op.det_rot() < 0)
+    gemmi::fail("reindexing operator must preserve the hand of the axes");
+  switch_to_original_hkl();  // changes hkl for unmerged data only
+  Op xyz_op = op.as_xyz();
+  logger.mesg("Real space transformation: ", op.as_xyz().triplet());
+  bool row_removal = false;
+  // change Miller indices
+  for (size_t n = 0; n < data.size(); n += columns.size()) {
+    Miller hkl_den = op.apply_to_hkl_without_division(get_hkl(n));
+    Miller hkl = Op::divide_hkl_by_DEN(hkl_den);
+    if (hkl[0] * Op::DEN == hkl_den[0] &&
+        hkl[1] * Op::DEN == hkl_den[1] &&
+        hkl[2] * Op::DEN == hkl_den[2]) {
+      set_hkl(n, hkl);
+    } else {  // fractional hkl - remove
+      row_removal = true;
+      data[n] = NAN;  // mark for removal
+    }
+  }
+
+  // remove reflections marked for removal
+  if (row_removal) {
+    int n_before = nreflections;
+    remove_rows_if([](const float* h) { return std::isnan(*h); });
+    logger.mesg("Reflections removed (because of fractional indices): ", n_before - nreflections);
+  }
+
+  switch_to_asu_hkl();  // revert switch_to_original_hkl() for unmerged data
+
+  // change space group
+  if (spacegroup) {
+    GroupOps gops = spacegroup->operations();
+    gops.change_basis_backward(xyz_op);
+    const SpaceGroup* new_sg = find_spacegroup_by_ops(gops);
+    if (!new_sg)
+      fail("reindexing: failed to determine new space group name");
+    if (new_sg != spacegroup) {
+      logger.mesg("Space group changed from ", spacegroup->xhm(), " to ", new_sg->xhm(), '.');
+      set_spacegroup(new_sg);
+    } else {
+      logger.mesg("Space group stays the same:", spacegroup->xhm(), '.');
+    }
+  }
+
+  // change unit cell parameters
+  cell = cell.changed_basis_backward(xyz_op, false);
+  for (Mtz::Dataset& ds : datasets)
+    ds.cell = ds.cell.changed_basis_backward(xyz_op, false);
+  for (Mtz::Batch& batch : batches)
+    batch.set_cell(batch.get_cell().changed_basis_backward(xyz_op, false));
+}
+
+void Mtz::expand_to_p1() {
+  if (!spacegroup || !has_data())
+    return;
+  std::vector<int> phase_columns = positions_of_columns_with_type('P');
+  std::vector<int> abcd_columns = positions_of_columns_with_type('A');
+  bool has_phases = (!phase_columns.empty() || !abcd_columns.empty());
+  GroupOps gops = spacegroup->operations();
+  data.reserve(gops.sym_ops.size() * data.size());
+  size_t orig_size = data.size();
+  std::vector<Miller> hkl_copies;
+  for (size_t n = 0; n < orig_size; n += columns.size()) {
+    hkl_copies.clear();
+    Miller hkl = get_hkl(n);
+    // no reallocations because of reserve() above
+    auto orig_iter = data.begin() + n;
+    for (auto op = gops.sym_ops.begin() + 1; op < gops.sym_ops.end(); ++op) {
+      Miller new_hkl = op->apply_to_hkl(hkl);
+      Op::Miller negated{{-new_hkl[0], -new_hkl[1], -new_hkl[2]}};
+      if (new_hkl != hkl && !in_vector(new_hkl, hkl_copies) &&
+          negated != hkl && !in_vector(negated, hkl_copies)) {
+        hkl_copies.push_back(new_hkl);
+        size_t offset = data.size();
+        data.insert(data.end(), orig_iter, orig_iter + columns.size());
+        set_hkl(offset, new_hkl);
+        if (has_phases) {
+          double shift = op->phase_shift(hkl);
+          if (shift != 0) {
+            for (int col : phase_columns)
+              shift_phase(data[offset + col], shift);
+            for (auto i = abcd_columns.begin(); i+3 < abcd_columns.end(); i += 4)
+              // we expect coefficients HLA, HLB, HLC and HLD - in this order
+              shift_hl_coefficients(data[offset + *(i+0)], data[offset + *(i+1)],
+                                    data[offset + *(i+2)], data[offset + *(i+3)], shift);
+          }
+        }
+      }
+    }
+  }
+  nreflections = int(data.size() / columns.size());
+  sort_order = {{0, 0, 0, 0, 0}};
+  set_spacegroup(&get_spacegroup_p1());
+}
+
+bool Mtz::switch_to_original_hkl() {
+  if (indices_switched_to_original)
+    return false;
+  if (!has_data())
+    fail("switch_to_original_hkl(): data not read yet");
+  if (nreflections == 0) {
+    // This function can be called before the data is populated
+    // to set indices_switched_to_original, which is not exposed in Python.
+    indices_switched_to_original = true;
+    return true;
+  }
+  const Column* col = column_with_label("M/ISYM");
+  if (col == nullptr || col->type != 'Y' || col->idx < 3)
+    return false;
+  std::vector<Op> inv_symops;
+  inv_symops.reserve(symops.size());
+  for (const Op& op : symops)
+    inv_symops.push_back(op.inverse());
+  for (size_t n = 0; n + col->idx < data.size(); n += columns.size()) {
+    int isym = static_cast<int>(data[n + col->idx]) & 0xFF;
+    const Op& op = inv_symops.at((isym - 1) / 2);
+    Miller hkl = op.apply_to_hkl(get_hkl(n));
+    int sign = (isym & 1) ? 1 : -1;
+    for (int i = 0; i < 3; ++i)
+      data[n+i] = static_cast<float>(sign * hkl[i]);
+  }
+  indices_switched_to_original = true;
+  return true;
+}
+
+bool Mtz::switch_to_asu_hkl() {
+  if (!indices_switched_to_original)
+    return false;
+  if (!has_data())
+    fail("switch_to_asu_hkl(): data not read yet");
+  const Column* col = column_with_label("M/ISYM");
+  if (col == nullptr || col->type != 'Y' || col->idx < 3 || !spacegroup)
+    return false;
+  size_t misym_idx = col->idx;
+  UnmergedHklMover hkl_mover(spacegroup);
+  for (size_t n = 0; n + col->idx < data.size(); n += columns.size()) {
+    Miller hkl = get_hkl(n);
+    int isym = hkl_mover.move_to_asu(hkl);  // modifies hkl
+    set_hkl(n, hkl);
+    float& misym = data[n + misym_idx];
+    misym = float(((int)misym & ~0xff) | isym);
+  }
+  indices_switched_to_original = false;
+  return true;
+}
+
+void Mtz::read_file_gz(const std::string& path, bool with_data) {
+  try {
+    read_input(MaybeGzipped(path), with_data);
+  } catch (std::runtime_error& e) {
+    // append path to the error like in read_file(), but shouldn't the path go first?
+    fail(std::string(e.what()) + ": " + path);
+  }
+}
+
+std::vector<int> Mtz::sorted_row_indices(int use_first) const {
+  if (!has_data())
+    fail("No data.");
+  if (use_first <= 0 || use_first >= (int) columns.size())
+    fail("Wrong use_first arg in Mtz::sort.");
+  std::vector<int> indices(nreflections);
+  for (int i = 0; i != nreflections; ++i)
+    indices[i] = i;
+  std::stable_sort(indices.begin(), indices.end(), [&](int i, int j) {
+    int a = i * (int) columns.size();
+    int b = j * (int) columns.size();
+    for (int n = 0; n < use_first; ++n)
+      if (data[a+n] != data[b+n])
+        return data[a+n] < data[b+n];
+    return false;
+  });
+  return indices;
+}
+
+bool Mtz::sort(int use_first) {
+  std::vector<int> indices = sorted_row_indices(use_first);
+  sort_order = {{0, 0, 0, 0, 0}};
+  for (int i = 0; i < use_first; ++i)
+    sort_order[i] = i + 1;
+  if (std::is_sorted(indices.begin(), indices.end()))
+    return false;
+  std::vector<float> new_data(data.size());
+  size_t w = columns.size();
+  for (size_t i = 0; i != indices.size(); ++i)
+    std::memcpy(&new_data[i * w], &data[indices[i] * w], w * sizeof(float));
+  data.swap(new_data);
+  return true;
+}
+
+Mtz::Column& Mtz::add_column(const std::string& label, char type,
+                             int dataset_id, int pos, bool expand_data) {
+  if (datasets.empty())
+    fail("No datasets.");
+  if (dataset_id < 0)
+    dataset_id = datasets.back().id;
+  else
+    dataset(dataset_id); // check if such dataset exist
+  if (pos > (int) columns.size())
+    fail("Requested column position after the end.");
+  if (pos < 0)
+    pos = (int) columns.size();
+  auto col = columns.emplace(columns.begin() + pos);
+  for (auto i = col + 1; i != columns.end(); ++i)
+    i->idx++;
+  col->dataset_id = dataset_id;
+  col->type = type;
+  col->label = label;
+  col->parent = this;
+  col->idx = pos;
+  if (expand_data)
+    expand_data_rows(1, pos);
+  return *col;
+}
+
+
+namespace {  // helper functions for copying, replacing and removing columns
+
+void check_column(const Mtz& mtz, size_t idx, const char* msg) {
+  if (!mtz.has_data())
+    fail(msg, ": data not read yet");
+  if (idx >= mtz.columns.size())
+    fail(msg, ": no column with 0-based index ", std::to_string(idx));
+}
+
+void check_trailing_cols(const Mtz& mtz, const Mtz::Column& src_col,
+                         const std::vector<std::string>& trailing_cols) {
+  assert(src_col.parent == &mtz);
+  if (!mtz.has_data())
+    fail("data in source mtz not read yet");
+  if (src_col.idx + trailing_cols.size() >= mtz.columns.size())
+    fail("Not enough columns after " + src_col.label);
+  for (size_t i = 0; i < trailing_cols.size(); ++i)
+    if (!trailing_cols[i].empty() &&
+        trailing_cols[i] != mtz.columns[src_col.idx + i + 1].label)
+      fail("expected trailing column ", trailing_cols[i], ", found ", src_col.label);
+}
+
+void do_replace_column(Mtz& mtz, size_t dest_idx, const Mtz::Column& src_col,
+                       const std::vector<std::string>& trailing_cols) {
+  const Mtz* src_mtz = src_col.parent;
+  for (size_t i = 0; i <= trailing_cols.size(); ++i) {
+    Mtz::Column& dst = mtz.columns[dest_idx + i];
+    const Mtz::Column& src = src_mtz->columns[src_col.idx + i];
+    dst.type = src.type;
+    dst.label = src.label;
+    dst.min_value = src.min_value;
+    dst.max_value = src.max_value;
+    dst.source = src.source;
+    dst.dataset_id = src.dataset_id;
+  }
+  if (src_mtz == &mtz) {
+    // internal copying
+    for (size_t n = 0; n < mtz.data.size(); n += mtz.columns.size())
+      for (size_t i = 0; i <= trailing_cols.size(); ++i)
+        mtz.data[n + dest_idx + i] = mtz.data[n + src_col.idx + i];
+  } else {
+    // external copying - need to match indices
+    std::vector<int> dst_indices = mtz.sorted_row_indices();
+    std::vector<int> src_indices = src_mtz->sorted_row_indices();
+    // cf. for_matching_reflections()
+    size_t dst_stride = mtz.columns.size();
+    size_t src_stride = src_mtz->columns.size();
+    auto dst = dst_indices.begin();
+    auto src = src_indices.begin();
+    while (dst != dst_indices.end() && src != src_indices.end()) {
+      Miller dst_hkl = mtz.get_hkl(*dst * dst_stride);
+      Miller src_hkl = src_mtz->get_hkl(*src * src_stride);
+      if (dst_hkl == src_hkl) {
+        // copy values
+        for (size_t i = 0; i <= trailing_cols.size(); ++i)
+          mtz.data[*dst * dst_stride + dest_idx + i] =
+            src_mtz->data[*src * src_stride + src_col.idx + i];
+        ++dst;
+        ++src;
+      } else if (dst_hkl < src_hkl) {
+        ++dst;
+      } else {
+        ++src;
+      }
+    }
+  }
+}
+
+} // anonymous namespace
+
+Mtz::Column& Mtz::replace_column(size_t dest_idx, const Mtz::Column& src_col,
+                                 const std::vector<std::string>& trailing_cols) {
+  check_trailing_cols(*src_col.parent, src_col, trailing_cols);
+  check_column(*this, dest_idx + trailing_cols.size(), "replace_column()");
+  do_replace_column(*this, dest_idx, src_col, trailing_cols);
+  return columns[dest_idx];
+}
+
+Mtz::Column& Mtz::copy_column(int dest_idx, const Mtz::Column& src_col,
+                              const std::vector<std::string>& trailing_cols) {
+  // check input consistency
+  if (!has_data())
+    fail("copy_column(): data not read yet");
+  check_trailing_cols(*src_col.parent, src_col, trailing_cols);
+  // add new columns
+  if (dest_idx < 0)
+    dest_idx = (int) columns.size();
+  // if src_col is from this Mtz it may get invalidated when adding columns
+  int col_idx = -1;
+  if (src_col.parent == this) {
+    col_idx = (int) src_col.idx;
+    if (col_idx >= dest_idx)
+      col_idx += 1 + (int)trailing_cols.size();
+  }
+  for (int i = 0; i <= (int) trailing_cols.size(); ++i)
+    add_column("", ' ', -1, dest_idx + i, false);
+  expand_data_rows(1 + trailing_cols.size(), dest_idx);
+  // copy the data
+  const Column& src_col_now = col_idx < 0 ? src_col : columns[col_idx];
+  // most of the work (hkl-based row matching and data copying) is done here:
+  do_replace_column(*this, dest_idx, src_col_now, trailing_cols);
+  return columns[dest_idx];
+}
+
+void Mtz::remove_column(size_t idx) {
+  check_column(*this, idx, "remove_column()");
+  columns.erase(columns.begin() + idx);
+  for (size_t i = idx; i < columns.size(); ++i)
+    --columns[i].idx;
+  vector_remove_column(data, columns.size(), idx);
+  assert(columns.size() * nreflections == data.size());
+}
+
+
+#define WRITE(...) do { \
+    int len = snprintf_z(buf, 81, __VA_ARGS__); \
+    if (len < 80) \
+      std::memset(buf + len, ' ', 80 - len); \
+    if (write(buf, 80, 1) != 1) \
+      sys_fail("Writing MTZ file failed"); \
+  } while(0)
+
+template<typename Write>
+void Mtz::write_to_stream(Write write) const {
+  // uses: data, spacegroup, nreflections, batches, cell, sort_order,
+  //       valm, columns, datasets, history
+  if (!has_data())
+    fail("Cannot write Mtz which has no data");
+  if (!spacegroup)
+    fail("Cannot write Mtz which has no space group");
+  char buf[81] = {'M', 'T', 'Z', ' ', '\0'};
+  std::int64_t real_header_start = (int64_t) columns.size() * nreflections + 21;
+  std::int32_t header_start = (int32_t) real_header_start;
+  if (real_header_start > std::numeric_limits<int32_t>::max()) {
+    header_start = -1;
+  } else {
+    real_header_start = 0;
+  }
+  std::memcpy(buf + 4, &header_start, 4);
+  std::int32_t machst = is_little_endian() ? 0x00004144 : 0x11110000;
+  std::memcpy(buf + 8, &machst, 4);
+  std::memcpy(buf + 12, &real_header_start, 8);
+  if (write(buf, 80, 1) != 1 ||
+      write(data.data(), 4, data.size()) != data.size())
+    fail("Writing MTZ file failed");
+  WRITE("VERS MTZ:V1.1");
+  WRITE("TITLE %s", title.c_str());
+  WRITE("NCOL %8zu %12d %8zu", columns.size(), nreflections, batches.size());
+  if (cell.is_crystal())
+    WRITE("CELL  %9.4f %9.4f %9.4f %9.4f %9.4f %9.4f",
+          cell.a, cell.b, cell.c, cell.alpha, cell.beta, cell.gamma);
+  WRITE("SORT  %3d %3d %3d %3d %3d", sort_order[0], sort_order[1],
+        sort_order[2], sort_order[3], sort_order[4]);
+  GroupOps ops = spacegroup->operations();
+  char lat_type = spacegroup->ccp4_lattice_type();
+  WRITE("SYMINF %3d %2d %c %5d %*s'%c%s' PG%s",
+        ops.order(),               // number of symmetry operations
+        (int) ops.sym_ops.size(),  // number of primitive operations
+        lat_type,                  // lattice type
+        spacegroup->ccp4,          // space group number
+        20 - (int) std::strlen(spacegroup->hm), "",
+        lat_type,                  // space group name (first letter)
+        spacegroup->hm + 1,        // space group name (the rest)
+        spacegroup->point_group_hm()); // point group name
+  // If we have symops that are the same as spacegroup->operations(),
+  // write symops to preserve the order of SYMM records.
+  if (!symops.empty() && ops.is_same_as(split_centering_vectors(symops)))
+    for (Op op : symops)
+      WRITE("SYMM %s", to_upper(op.triplet()).c_str());
+  else
+    for (Op op : ops)
+      WRITE("SYMM %s", to_upper(op.triplet()).c_str());
+  auto reso = calculate_min_max_1_d2();
+  WRITE("RESO %-20.12f %-20.12f", reso[0], reso[1]);
+  if (std::isnan(valm))
+    WRITE("VALM NAN");
+  else
+    WRITE("VALM %f", valm);
+  auto format17 = [](float f) {
+    char buffer[18];
+    int len = snprintf_z(buffer, 18, "%.9f", f);
+    return std::string(buffer, len > 0 ? std::min(len, 17) : 0);
+  };
+  for (const Column& col : columns) {
+    auto minmax = calculate_min_max_disregarding_nans(col.begin(), col.end());
+    const char* label = !col.label.empty() ? col.label.c_str() : "_";
+    WRITE("COLUMN %-30s %c %17s %17s %4d",
+          label, col.type,
+          format17(minmax[0]).c_str(), format17(minmax[1]).c_str(),
+          col.dataset_id);
+    if (!col.source.empty())
+      WRITE("COLSRC %-30s %-36s  %4d", label, col.source.c_str(), col.dataset_id);
+  }
+  WRITE("NDIF %8zu", datasets.size());
+  for (const Dataset& ds : datasets) {
+    WRITE("PROJECT %7d %s", ds.id, ds.project_name.c_str());
+    WRITE("CRYSTAL %7d %s", ds.id, ds.crystal_name.c_str());
+    WRITE("DATASET %7d %s", ds.id, ds.dataset_name.c_str());
+    const UnitCell& uc = (ds.cell.is_crystal() && ds.cell.a > 0 ? ds.cell : cell);
+    WRITE("DCELL %9d %10.4f%10.4f%10.4f%10.4f%10.4f%10.4f",
+          ds.id, uc.a, uc.b, uc.c, uc.alpha, uc.beta, uc.gamma);
+    WRITE("DWAVEL %8d %10.5f", ds.id, ds.wavelength);
+  }
+  int pos = 0;
+  for (const Batch& batch : batches) {
+    if (pos == 0)
+      std::memcpy(buf, "BATCH ", 6);  // NOLINT(bugprone-not-null-terminated-result)
+    pos += 6;
+    snprintf_z(buf + pos, 7, "%6d", batch.number);
+    if (pos > 72 || &batch == &batches.back()) {
+      std::memset(buf + pos, ' ', 80 - pos);
+      if (write(buf, 80, 1) != 1)
+        fail("Writing MTZ file failed");
+      pos = 0;
+    }
+  }
+  WRITE("END");
+  if (!history.empty()) {
+    // According to mtzformat.html the file can have only up to 30 history
+    // lines, but we don't enforce it here.
+    WRITE("MTZHIST %3zu", history.size());
+    for (const std::string& line : history)
+      WRITE("%s", line.c_str());
+  }
+  if (!batches.empty()) {
+    WRITE("MTZBATS");
+    for (const Batch& batch : batches) {
+      // keep the numbers the same as in files written by libccp4
+      WRITE("BH %8d %7zu %7zu %7zu",
+            batch.number, batch.ints.size() + batch.floats.size(),
+            batch.ints.size(), batch.floats.size());
+      WRITE("TITLE %.70s", batch.title.c_str());
+      if (batch.ints.size() != 29 || batch.floats.size() != 156)
+        fail("wrong size of binaries batch headers");
+      write(batch.ints.data(), 4, batch.ints.size());
+      write(batch.floats.data(), 4, batch.floats.size());
+      WRITE("BHCH  %7.7s %7.7s %7.7s",
+            batch.axes.size() > 0 ? batch.axes[0].c_str() : "",
+            batch.axes.size() > 1 ? batch.axes[1].c_str() : "",
+            batch.axes.size() > 2 ? batch.axes[2].c_str() : "");
+    }
+  }
+  WRITE("MTZENDOFHEADERS");
+  if (!appended_text.empty()) {
+    if (write(appended_text.data(), appended_text.size(), 1) != 1)
+      fail("Writing MTZ file failed");
+  }
+}
+
+#undef WRITE
+
+void Mtz::write_to_cstream(std::FILE* stream) const {
+  write_to_stream([&](const void *ptr, size_t size, size_t nmemb) {
+      return std::fwrite(ptr, size, nmemb, stream);
+  });
+}
+
+void Mtz::write_to_string(std::string& str) const {
+  // Calculate the size beforehand to avoid memory re-allocations
+  // and minimize memory usage. It hasn't been benchmarked against
+  // a single-pass writing.
+  size_t nbytes = size_to_write();
+  str.resize(nbytes);
+  write_to_buffer(&str[0], nbytes);
+}
+
+void Mtz::write_to_file(const std::string& path) const {
+  fileptr_t f = file_open(path.c_str(), "wb");
+  try {
+    write_to_cstream(f.get());
+  } catch (std::runtime_error& e) {
+    fail(std::string(e.what()) + ": " + path);
+  }
+}
+
+size_t Mtz::size_to_write() const {
+  size_t nbytes = 0;
+  write_to_stream([&](const void *, size_t size, size_t nmemb) {
+      nbytes += size * nmemb;
+      return nmemb;
+  });
+  return nbytes;
+}
+
+size_t Mtz::write_to_buffer(char* buf, size_t maxlen) const {
+  size_t len = 0;
+  write_to_stream([&](const void *ptr, size_t size, size_t nmemb) {
+      len += size * nmemb;
+      if (len > maxlen)
+        fail("Mtz::write_to_buffer: size too small");
+      memcpy(buf, ptr, size * nmemb);
+      buf += size * nmemb;
+      return nmemb;
+  });
+  return len;
+}
+
+} // namespace gemmi
diff --git a/gemmi_gph/sprintf.cpp b/gemmi_gph/sprintf.cpp
new file mode 100644
index 00000000..68b32b67
--- /dev/null
+++ b/gemmi_gph/sprintf.cpp
@@ -0,0 +1,68 @@
+// Copyright 2017 Global Phasing Ltd.
+
+#include <gemmi/sprintf.hpp>
+#include <stdarg.h>  // for va_list
+
+#ifdef USE_STD_SNPRINTF  // useful for benchmarking and testing only
+# include <cstdio>
+# include <algorithm> // for min
+#else
+# define STB_SPRINTF_IMPLEMENTATION
+# define STB_SPRINTF_STATIC
+# define STB_SPRINTF_NOUNALIGNED 1
+// Making functions from stb_sprintf static may trigger warnings.
+# if defined(__GNUC__)
+#  pragma GCC diagnostic ignored "-Wunused-function"
+# endif
+# if defined(__clang__)
+#  pragma clang diagnostic ignored "-Wunused-function"
+# endif
+
+// To use system stb_sprintf.h (not recommended, but some Linux distros
+// don't like bundled libraries) define GEMMI_USE_SYSTEM_STB or remove
+// third_party/stb_sprintf.h.
+# if defined(__has_include)
+#  if !__has_include("../third_party/stb_sprintf.h")
+#   define GEMMI_USE_SYSTEM_STB 1
+#  endif
+# endif
+# ifdef GEMMI_USE_SYSTEM_STB
+#  pragma message("Using system stb_sprintf.h, not the bundled one. It may not work.")
+#  include "stb/stb_sprintf.h"
+# else
+#  include "../third_party/stb_sprintf.h"
+# endif
+#endif  // USE_STD_SNPRINTF
+
+namespace gemmi {
+
+// We copy functions from sprintf.h only to have them declared with GEMMI_DLL.
+int sprintf_z(char *buf, char const *fmt, ...) {
+  int result;
+  va_list va;
+  va_start(va, fmt);
+#ifdef USE_STD_SNPRINTF
+  result = std::vsprintf(buf, fmt, va);
+#else
+  result = STB_SPRINTF_DECORATE(vsprintfcb)(0, 0, buf, fmt, va);
+#endif
+  va_end(va);
+  return result;
+}
+
+int snprintf_z(char *buf, int count, char const *fmt, ...) {
+  int result;
+  va_list va;
+  va_start(va, fmt);
+#ifdef USE_STD_SNPRINTF
+  result = std::vsnprintf(buf, count, fmt, va);
+  // stbsp_snprintf always returns a zero-terminated string
+  buf[std::min(result, count-1)] = '\0';
+#else
+  result = STB_SPRINTF_DECORATE(vsnprintf)(buf, count, fmt, va);
+#endif
+  va_end(va);
+  return result;
+}
+
+}  // namespace gemmi
diff --git a/gemmi_gph/stb/stb_sprintf.h b/gemmi_gph/stb/stb_sprintf.h
new file mode 100644
index 00000000..28e9d64b
--- /dev/null
+++ b/gemmi_gph/stb/stb_sprintf.h
@@ -0,0 +1,1906 @@
+// stb_sprintf - v1.10 - public domain snprintf() implementation
+// originally by Jeff Roberts / RAD Game Tools, 2015/10/20
+// http://github.com/nothings/stb
+//
+// allowed types:  sc uidBboXx p AaGgEef n
+// lengths      :  hh h ll j z t I64 I32 I
+//
+// Contributors:
+//    Fabian "ryg" Giesen (reformatting)
+//    github:aganm (attribute format)
+//
+// Contributors (bugfixes):
+//    github:d26435
+//    github:trex78
+//    github:account-login
+//    Jari Komppa (SI suffixes)
+//    Rohit Nirmal
+//    Marcin Wojdyr
+//    Leonard Ritter
+//    Stefano Zanotti
+//    Adam Allison
+//    Arvid Gerstmann
+//    Markus Kolb
+//
+// LICENSE:
+//
+//   See end of file for license information.
+
+#ifndef STB_SPRINTF_H_INCLUDE
+#define STB_SPRINTF_H_INCLUDE
+
+/*
+Single file sprintf replacement.
+
+Originally written by Jeff Roberts at RAD Game Tools - 2015/10/20.
+Hereby placed in public domain.
+
+This is a full sprintf replacement that supports everything that
+the C runtime sprintfs support, including float/double, 64-bit integers,
+hex floats, field parameters (%*.*d stuff), length reads backs, etc.
+
+Why would you need this if sprintf already exists?  Well, first off,
+it's *much* faster (see below). It's also much smaller than the CRT
+versions code-space-wise. We've also added some simple improvements
+that are super handy (commas in thousands, callbacks at buffer full,
+for example). Finally, the format strings for MSVC and GCC differ
+for 64-bit integers (among other small things), so this lets you use
+the same format strings in cross platform code.
+
+It uses the standard single file trick of being both the header file
+and the source itself. If you just include it normally, you just get
+the header file function definitions. To get the code, you include
+it from a C or C++ file and define STB_SPRINTF_IMPLEMENTATION first.
+
+It only uses va_args macros from the C runtime to do it's work. It
+does cast doubles to S64s and shifts and divides U64s, which does
+drag in CRT code on most platforms.
+
+It compiles to roughly 8K with float support, and 4K without.
+As a comparison, when using MSVC static libs, calling sprintf drags
+in 16K.
+
+API:
+====
+int stbsp_sprintf( char * buf, char const * fmt, ... )
+int stbsp_snprintf( char * buf, int count, char const * fmt, ... )
+  Convert an arg list into a buffer.  stbsp_snprintf always returns
+  a zero-terminated string (unlike regular snprintf).
+
+int stbsp_vsprintf( char * buf, char const * fmt, va_list va )
+int stbsp_vsnprintf( char * buf, int count, char const * fmt, va_list va )
+  Convert a va_list arg list into a buffer.  stbsp_vsnprintf always returns
+  a zero-terminated string (unlike regular snprintf).
+
+int stbsp_vsprintfcb( STBSP_SPRINTFCB * callback, void * user, char * buf, char const * fmt, va_list va )
+    typedef char * STBSP_SPRINTFCB( char const * buf, void * user, int len );
+  Convert into a buffer, calling back every STB_SPRINTF_MIN chars.
+  Your callback can then copy the chars out, print them or whatever.
+  This function is actually the workhorse for everything else.
+  The buffer you pass in must hold at least STB_SPRINTF_MIN characters.
+    // you return the next buffer to use or 0 to stop converting
+
+void stbsp_set_separators( char comma, char period )
+  Set the comma and period characters to use.
+
+FLOATS/DOUBLES:
+===============
+This code uses a internal float->ascii conversion method that uses
+doubles with error correction (double-doubles, for ~105 bits of
+precision).  This conversion is round-trip perfect - that is, an atof
+of the values output here will give you the bit-exact double back.
+
+One difference is that our insignificant digits will be different than
+with MSVC or GCC (but they don't match each other either).  We also
+don't attempt to find the minimum length matching float (pre-MSVC15
+doesn't either).
+
+If you don't need float or doubles at all, define STB_SPRINTF_NOFLOAT
+and you'll save 4K of code space.
+
+64-BIT INTS:
+============
+This library also supports 64-bit integers and you can use MSVC style or
+GCC style indicators (%I64d or %lld).  It supports the C99 specifiers
+for size_t and ptr_diff_t (%jd %zd) as well.
+
+EXTRAS:
+=======
+Like some GCCs, for integers and floats, you can use a ' (single quote)
+specifier and commas will be inserted on the thousands: "%'d" on 12345
+would print 12,345.
+
+For integers and floats, you can use a "$" specifier and the number
+will be converted to float and then divided to get kilo, mega, giga or
+tera and then printed, so "%$d" 1000 is "1.0 k", "%$.2d" 2536000 is
+"2.53 M", etc. For byte values, use two $:s, like "%$$d" to turn
+2536000 to "2.42 Mi". If you prefer JEDEC suffixes to SI ones, use three
+$:s: "%$$$d" -> "2.42 M". To remove the space between the number and the
+suffix, add "_" specifier: "%_$d" -> "2.53M".
+
+In addition to octal and hexadecimal conversions, you can print
+integers in binary: "%b" for 256 would print 100.
+
+PERFORMANCE vs MSVC 2008 32-/64-bit (GCC is even slower than MSVC):
+===================================================================
+"%d" across all 32-bit ints (4.8x/4.0x faster than 32-/64-bit MSVC)
+"%24d" across all 32-bit ints (4.5x/4.2x faster)
+"%x" across all 32-bit ints (4.5x/3.8x faster)
+"%08x" across all 32-bit ints (4.3x/3.8x faster)
+"%f" across e-10 to e+10 floats (7.3x/6.0x faster)
+"%e" across e-10 to e+10 floats (8.1x/6.0x faster)
+"%g" across e-10 to e+10 floats (10.0x/7.1x faster)
+"%f" for values near e-300 (7.9x/6.5x faster)
+"%f" for values near e+300 (10.0x/9.1x faster)
+"%e" for values near e-300 (10.1x/7.0x faster)
+"%e" for values near e+300 (9.2x/6.0x faster)
+"%.320f" for values near e-300 (12.6x/11.2x faster)
+"%a" for random values (8.6x/4.3x faster)
+"%I64d" for 64-bits with 32-bit values (4.8x/3.4x faster)
+"%I64d" for 64-bits > 32-bit values (4.9x/5.5x faster)
+"%s%s%s" for 64 char strings (7.1x/7.3x faster)
+"...512 char string..." ( 35.0x/32.5x faster!)
+*/
+
+#if defined(__clang__)
+ #if defined(__has_feature) && defined(__has_attribute)
+  #if __has_feature(address_sanitizer)
+   #if __has_attribute(__no_sanitize__)
+    #define STBSP__ASAN __attribute__((__no_sanitize__("address")))
+   #elif __has_attribute(__no_sanitize_address__)
+    #define STBSP__ASAN __attribute__((__no_sanitize_address__))
+   #elif __has_attribute(__no_address_safety_analysis__)
+    #define STBSP__ASAN __attribute__((__no_address_safety_analysis__))
+   #endif
+  #endif
+ #endif
+#elif defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
+ #if defined(__SANITIZE_ADDRESS__) && __SANITIZE_ADDRESS__
+  #define STBSP__ASAN __attribute__((__no_sanitize_address__))
+ #endif
+#endif
+
+#ifndef STBSP__ASAN
+#define STBSP__ASAN
+#endif
+
+#ifdef STB_SPRINTF_STATIC
+#define STBSP__PUBLICDEC static
+#define STBSP__PUBLICDEF static STBSP__ASAN
+#else
+#ifdef __cplusplus
+#define STBSP__PUBLICDEC extern "C"
+#define STBSP__PUBLICDEF extern "C" STBSP__ASAN
+#else
+#define STBSP__PUBLICDEC extern
+#define STBSP__PUBLICDEF STBSP__ASAN
+#endif
+#endif
+
+#if defined(__has_attribute) && !defined(__MINGW32__)
+ #if __has_attribute(format)
+   #define STBSP__ATTRIBUTE_FORMAT(fmt,va) __attribute__((format(printf,fmt,va)))
+ #endif
+#endif
+
+#ifndef STBSP__ATTRIBUTE_FORMAT
+#define STBSP__ATTRIBUTE_FORMAT(fmt,va)
+#endif
+
+#ifdef _MSC_VER
+#define STBSP__NOTUSED(v)  (void)(v)
+#else
+#define STBSP__NOTUSED(v)  (void)sizeof(v)
+#endif
+
+#include <stdarg.h> // for va_arg(), va_list()
+#include <stddef.h> // size_t, ptrdiff_t
+
+#ifndef STB_SPRINTF_MIN
+#define STB_SPRINTF_MIN 512 // how many characters per callback
+#endif
+typedef char *STBSP_SPRINTFCB(const char *buf, void *user, int len);
+
+#ifndef STB_SPRINTF_DECORATE
+#define STB_SPRINTF_DECORATE(name) stbsp_##name // define this before including if you want to change the names
+#endif
+
+STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(vsprintf)(char *buf, char const *fmt, va_list va);
+STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(vsnprintf)(char *buf, int count, char const *fmt, va_list va);
+STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(sprintf)(char *buf, char const *fmt, ...) STBSP__ATTRIBUTE_FORMAT(2,3);
+STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(snprintf)(char *buf, int count, char const *fmt, ...) STBSP__ATTRIBUTE_FORMAT(3,4);
+
+STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(vsprintfcb)(STBSP_SPRINTFCB *callback, void *user, char *buf, char const *fmt, va_list va);
+STBSP__PUBLICDEC void STB_SPRINTF_DECORATE(set_separators)(char comma, char period);
+
+#endif // STB_SPRINTF_H_INCLUDE
+
+#ifdef STB_SPRINTF_IMPLEMENTATION
+
+#define stbsp__uint32 unsigned int
+#define stbsp__int32 signed int
+
+#ifdef _MSC_VER
+#define stbsp__uint64 unsigned __int64
+#define stbsp__int64 signed __int64
+#else
+#define stbsp__uint64 unsigned long long
+#define stbsp__int64 signed long long
+#endif
+#define stbsp__uint16 unsigned short
+
+#ifndef stbsp__uintptr
+#if defined(__ppc64__) || defined(__powerpc64__) || defined(__aarch64__) || defined(_M_X64) || defined(__x86_64__) || defined(__x86_64) || defined(__s390x__)
+#define stbsp__uintptr stbsp__uint64
+#else
+#define stbsp__uintptr stbsp__uint32
+#endif
+#endif
+
+#ifndef STB_SPRINTF_MSVC_MODE // used for MSVC2013 and earlier (MSVC2015 matches GCC)
+#if defined(_MSC_VER) && (_MSC_VER < 1900)
+#define STB_SPRINTF_MSVC_MODE
+#endif
+#endif
+
+#ifdef STB_SPRINTF_NOUNALIGNED // define this before inclusion to force stbsp_sprintf to always use aligned accesses
+#define STBSP__UNALIGNED(code)
+#else
+#define STBSP__UNALIGNED(code) code
+#endif
+
+#ifndef STB_SPRINTF_NOFLOAT
+// internal float utility functions
+static stbsp__int32 stbsp__real_to_str(char const **start, stbsp__uint32 *len, char *out, stbsp__int32 *decimal_pos, double value, stbsp__uint32 frac_digits);
+static stbsp__int32 stbsp__real_to_parts(stbsp__int64 *bits, stbsp__int32 *expo, double value);
+#define STBSP__SPECIAL 0x7000
+#endif
+
+static char stbsp__period = '.';
+static char stbsp__comma = ',';
+static struct
+{
+   short temp; // force next field to be 2-byte aligned
+   char pair[201];
+} stbsp__digitpair =
+{
+  0,
+   "00010203040506070809101112131415161718192021222324"
+   "25262728293031323334353637383940414243444546474849"
+   "50515253545556575859606162636465666768697071727374"
+   "75767778798081828384858687888990919293949596979899"
+};
+
+STBSP__PUBLICDEF void STB_SPRINTF_DECORATE(set_separators)(char pcomma, char pperiod)
+{
+   stbsp__period = pperiod;
+   stbsp__comma = pcomma;
+}
+
+#define STBSP__LEFTJUST 1
+#define STBSP__LEADINGPLUS 2
+#define STBSP__LEADINGSPACE 4
+#define STBSP__LEADING_0X 8
+#define STBSP__LEADINGZERO 16
+#define STBSP__INTMAX 32
+#define STBSP__TRIPLET_COMMA 64
+#define STBSP__NEGATIVE 128
+#define STBSP__METRIC_SUFFIX 256
+#define STBSP__HALFWIDTH 512
+#define STBSP__METRIC_NOSPACE 1024
+#define STBSP__METRIC_1024 2048
+#define STBSP__METRIC_JEDEC 4096
+
+static void stbsp__lead_sign(stbsp__uint32 fl, char *sign)
+{
+   sign[0] = 0;
+   if (fl & STBSP__NEGATIVE) {
+      sign[0] = 1;
+      sign[1] = '-';
+   } else if (fl & STBSP__LEADINGSPACE) {
+      sign[0] = 1;
+      sign[1] = ' ';
+   } else if (fl & STBSP__LEADINGPLUS) {
+      sign[0] = 1;
+      sign[1] = '+';
+   }
+}
+
+static STBSP__ASAN stbsp__uint32 stbsp__strlen_limited(char const *s, stbsp__uint32 limit)
+{
+   char const * sn = s;
+
+   // get up to 4-byte alignment
+   for (;;) {
+      if (((stbsp__uintptr)sn & 3) == 0)
+         break;
+
+      if (!limit || *sn == 0)
+         return (stbsp__uint32)(sn - s);
+
+      ++sn;
+      --limit;
+   }
+
+   // scan over 4 bytes at a time to find terminating 0
+   // this will intentionally scan up to 3 bytes past the end of buffers,
+   // but becase it works 4B aligned, it will never cross page boundaries
+   // (hence the STBSP__ASAN markup; the over-read here is intentional
+   // and harmless)
+   while (limit >= 4) {
+      stbsp__uint32 v = *(stbsp__uint32 *)sn;
+      // bit hack to find if there's a 0 byte in there
+      if ((v - 0x01010101) & (~v) & 0x80808080UL)
+         break;
+
+      sn += 4;
+      limit -= 4;
+   }
+
+   // handle the last few characters to find actual size
+   while (limit && *sn) {
+      ++sn;
+      --limit;
+   }
+
+   return (stbsp__uint32)(sn - s);
+}
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(vsprintfcb)(STBSP_SPRINTFCB *callback, void *user, char *buf, char const *fmt, va_list va)
+{
+   static char hex[] = "0123456789abcdefxp";
+   static char hexu[] = "0123456789ABCDEFXP";
+   char *bf;
+   char const *f;
+   int tlen = 0;
+
+   bf = buf;
+   f = fmt;
+   for (;;) {
+      stbsp__int32 fw, pr, tz;
+      stbsp__uint32 fl;
+
+      // macros for the callback buffer stuff
+      #define stbsp__chk_cb_bufL(bytes)                        \
+         {                                                     \
+            int len = (int)(bf - buf);                         \
+            if ((len + (bytes)) >= STB_SPRINTF_MIN) {          \
+               tlen += len;                                    \
+               if (0 == (bf = buf = callback(buf, user, len))) \
+                  goto done;                                   \
+            }                                                  \
+         }
+      #define stbsp__chk_cb_buf(bytes)    \
+         {                                \
+            if (callback) {               \
+               stbsp__chk_cb_bufL(bytes); \
+            }                             \
+         }
+      #define stbsp__flush_cb()                      \
+         {                                           \
+            stbsp__chk_cb_bufL(STB_SPRINTF_MIN - 1); \
+         } // flush if there is even one byte in the buffer
+      #define stbsp__cb_buf_clamp(cl, v)                \
+         cl = v;                                        \
+         if (callback) {                                \
+            int lg = STB_SPRINTF_MIN - (int)(bf - buf); \
+            if (cl > lg)                                \
+               cl = lg;                                 \
+         }
+
+      // fast copy everything up to the next % (or end of string)
+      for (;;) {
+         while (((stbsp__uintptr)f) & 3) {
+         schk1:
+            if (f[0] == '%')
+               goto scandd;
+         schk2:
+            if (f[0] == 0)
+               goto endfmt;
+            stbsp__chk_cb_buf(1);
+            *bf++ = f[0];
+            ++f;
+         }
+         for (;;) {
+            // Check if the next 4 bytes contain %(0x25) or end of string.
+            // Using the 'hasless' trick:
+            // https://graphics.stanford.edu/~seander/bithacks.html#HasLessInWord
+            stbsp__uint32 v, c;
+            v = *(stbsp__uint32 *)f;
+            c = (~v) & 0x80808080;
+            if (((v ^ 0x25252525) - 0x01010101) & c)
+               goto schk1;
+            if ((v - 0x01010101) & c)
+               goto schk2;
+            if (callback)
+               if ((STB_SPRINTF_MIN - (int)(bf - buf)) < 4)
+                  goto schk1;
+            #ifdef STB_SPRINTF_NOUNALIGNED
+                if(((stbsp__uintptr)bf) & 3) {
+                    bf[0] = f[0];
+                    bf[1] = f[1];
+                    bf[2] = f[2];
+                    bf[3] = f[3];
+                } else
+            #endif
+            {
+                *(stbsp__uint32 *)bf = v;
+            }
+            bf += 4;
+            f += 4;
+         }
+      }
+   scandd:
+
+      ++f;
+
+      // ok, we have a percent, read the modifiers first
+      fw = 0;
+      pr = -1;
+      fl = 0;
+      tz = 0;
+
+      // flags
+      for (;;) {
+         switch (f[0]) {
+         // if we have left justify
+         case '-':
+            fl |= STBSP__LEFTJUST;
+            ++f;
+            continue;
+         // if we have leading plus
+         case '+':
+            fl |= STBSP__LEADINGPLUS;
+            ++f;
+            continue;
+         // if we have leading space
+         case ' ':
+            fl |= STBSP__LEADINGSPACE;
+            ++f;
+            continue;
+         // if we have leading 0x
+         case '#':
+            fl |= STBSP__LEADING_0X;
+            ++f;
+            continue;
+         // if we have thousand commas
+         case '\'':
+            fl |= STBSP__TRIPLET_COMMA;
+            ++f;
+            continue;
+         // if we have kilo marker (none->kilo->kibi->jedec)
+         case '$':
+            if (fl & STBSP__METRIC_SUFFIX) {
+               if (fl & STBSP__METRIC_1024) {
+                  fl |= STBSP__METRIC_JEDEC;
+               } else {
+                  fl |= STBSP__METRIC_1024;
+               }
+            } else {
+               fl |= STBSP__METRIC_SUFFIX;
+            }
+            ++f;
+            continue;
+         // if we don't want space between metric suffix and number
+         case '_':
+            fl |= STBSP__METRIC_NOSPACE;
+            ++f;
+            continue;
+         // if we have leading zero
+         case '0':
+            fl |= STBSP__LEADINGZERO;
+            ++f;
+            goto flags_done;
+         default: goto flags_done;
+         }
+      }
+   flags_done:
+
+      // get the field width
+      if (f[0] == '*') {
+         fw = va_arg(va, stbsp__uint32);
+         ++f;
+      } else {
+         while ((f[0] >= '0') && (f[0] <= '9')) {
+            fw = fw * 10 + f[0] - '0';
+            f++;
+         }
+      }
+      // get the precision
+      if (f[0] == '.') {
+         ++f;
+         if (f[0] == '*') {
+            pr = va_arg(va, stbsp__uint32);
+            ++f;
+         } else {
+            pr = 0;
+            while ((f[0] >= '0') && (f[0] <= '9')) {
+               pr = pr * 10 + f[0] - '0';
+               f++;
+            }
+         }
+      }
+
+      // handle integer size overrides
+      switch (f[0]) {
+      // are we halfwidth?
+      case 'h':
+         fl |= STBSP__HALFWIDTH;
+         ++f;
+         if (f[0] == 'h')
+            ++f;  // QUARTERWIDTH
+         break;
+      // are we 64-bit (unix style)
+      case 'l':
+         fl |= ((sizeof(long) == 8) ? STBSP__INTMAX : 0);
+         ++f;
+         if (f[0] == 'l') {
+            fl |= STBSP__INTMAX;
+            ++f;
+         }
+         break;
+      // are we 64-bit on intmax? (c99)
+      case 'j':
+         fl |= (sizeof(size_t) == 8) ? STBSP__INTMAX : 0;
+         ++f;
+         break;
+      // are we 64-bit on size_t or ptrdiff_t? (c99)
+      case 'z':
+         fl |= (sizeof(ptrdiff_t) == 8) ? STBSP__INTMAX : 0;
+         ++f;
+         break;
+      case 't':
+         fl |= (sizeof(ptrdiff_t) == 8) ? STBSP__INTMAX : 0;
+         ++f;
+         break;
+      // are we 64-bit (msft style)
+      case 'I':
+         if ((f[1] == '6') && (f[2] == '4')) {
+            fl |= STBSP__INTMAX;
+            f += 3;
+         } else if ((f[1] == '3') && (f[2] == '2')) {
+            f += 3;
+         } else {
+            fl |= ((sizeof(void *) == 8) ? STBSP__INTMAX : 0);
+            ++f;
+         }
+         break;
+      default: break;
+      }
+
+      // handle each replacement
+      switch (f[0]) {
+         #define STBSP__NUMSZ 512 // big enough for e308 (with commas) or e-307
+         char num[STBSP__NUMSZ];
+         char lead[8];
+         char tail[8];
+         char *s;
+         char const *h;
+         stbsp__uint32 l, n, cs;
+         stbsp__uint64 n64;
+#ifndef STB_SPRINTF_NOFLOAT
+         double fv;
+#endif
+         stbsp__int32 dp;
+         char const *sn;
+
+      case 's':
+         // get the string
+         s = va_arg(va, char *);
+         if (s == 0)
+            s = (char *)"null";
+         // get the length, limited to desired precision
+         // always limit to ~0u chars since our counts are 32b
+         l = stbsp__strlen_limited(s, (pr >= 0) ? pr : ~0u);
+         lead[0] = 0;
+         tail[0] = 0;
+         pr = 0;
+         dp = 0;
+         cs = 0;
+         // copy the string in
+         goto scopy;
+
+      case 'c': // char
+         // get the character
+         s = num + STBSP__NUMSZ - 1;
+         *s = (char)va_arg(va, int);
+         l = 1;
+         lead[0] = 0;
+         tail[0] = 0;
+         pr = 0;
+         dp = 0;
+         cs = 0;
+         goto scopy;
+
+      case 'n': // weird write-bytes specifier
+      {
+         int *d = va_arg(va, int *);
+         *d = tlen + (int)(bf - buf);
+      } break;
+
+#ifdef STB_SPRINTF_NOFLOAT
+      case 'A':              // float
+      case 'a':              // hex float
+      case 'G':              // float
+      case 'g':              // float
+      case 'E':              // float
+      case 'e':              // float
+      case 'f':              // float
+         va_arg(va, double); // eat it
+         s = (char *)"No float";
+         l = 8;
+         lead[0] = 0;
+         tail[0] = 0;
+         pr = 0;
+         cs = 0;
+         STBSP__NOTUSED(dp);
+         goto scopy;
+#else
+      case 'A': // hex float
+      case 'a': // hex float
+         h = (f[0] == 'A') ? hexu : hex;
+         fv = va_arg(va, double);
+         if (pr == -1)
+            pr = 6; // default is 6
+         // read the double into a string
+         if (stbsp__real_to_parts((stbsp__int64 *)&n64, &dp, fv))
+            fl |= STBSP__NEGATIVE;
+
+         s = num + 64;
+
+         stbsp__lead_sign(fl, lead);
+
+         if (dp == -1023)
+            dp = (n64) ? -1022 : 0;
+         else
+            n64 |= (((stbsp__uint64)1) << 52);
+         n64 <<= (64 - 56);
+         if (pr < 15)
+            n64 += ((((stbsp__uint64)8) << 56) >> (pr * 4));
+// add leading chars
+
+#ifdef STB_SPRINTF_MSVC_MODE
+         *s++ = '0';
+         *s++ = 'x';
+#else
+         lead[1 + lead[0]] = '0';
+         lead[2 + lead[0]] = 'x';
+         lead[0] += 2;
+#endif
+         *s++ = h[(n64 >> 60) & 15];
+         n64 <<= 4;
+         if (pr)
+            *s++ = stbsp__period;
+         sn = s;
+
+         // print the bits
+         n = pr;
+         if (n > 13)
+            n = 13;
+         if (pr > (stbsp__int32)n)
+            tz = pr - n;
+         pr = 0;
+         while (n--) {
+            *s++ = h[(n64 >> 60) & 15];
+            n64 <<= 4;
+         }
+
+         // print the expo
+         tail[1] = h[17];
+         if (dp < 0) {
+            tail[2] = '-';
+            dp = -dp;
+         } else
+            tail[2] = '+';
+         n = (dp >= 1000) ? 6 : ((dp >= 100) ? 5 : ((dp >= 10) ? 4 : 3));
+         tail[0] = (char)n;
+         for (;;) {
+            tail[n] = '0' + dp % 10;
+            if (n <= 3)
+               break;
+            --n;
+            dp /= 10;
+         }
+
+         dp = (int)(s - sn);
+         l = (int)(s - (num + 64));
+         s = num + 64;
+         cs = 1 + (3 << 24);
+         goto scopy;
+
+      case 'G': // float
+      case 'g': // float
+         h = (f[0] == 'G') ? hexu : hex;
+         fv = va_arg(va, double);
+         if (pr == -1)
+            pr = 6;
+         else if (pr == 0)
+            pr = 1; // default is 6
+         // read the double into a string
+         if (stbsp__real_to_str(&sn, &l, num, &dp, fv, (pr - 1) | 0x80000000))
+            fl |= STBSP__NEGATIVE;
+
+         // clamp the precision and delete extra zeros after clamp
+         n = pr;
+         if (l > (stbsp__uint32)pr)
+            l = pr;
+         while ((l > 1) && (pr) && (sn[l - 1] == '0')) {
+            --pr;
+            --l;
+         }
+
+         // should we use %e
+         if ((dp <= -4) || (dp > (stbsp__int32)n)) {
+            if (pr > (stbsp__int32)l)
+               pr = l - 1;
+            else if (pr)
+               --pr; // when using %e, there is one digit before the decimal
+            goto doexpfromg;
+         }
+         // this is the insane action to get the pr to match %g semantics for %f
+         if (dp > 0) {
+            pr = (dp < (stbsp__int32)l) ? l - dp : 0;
+         } else {
+            pr = -dp + ((pr > (stbsp__int32)l) ? (stbsp__int32) l : pr);
+         }
+         goto dofloatfromg;
+
+      case 'E': // float
+      case 'e': // float
+         h = (f[0] == 'E') ? hexu : hex;
+         fv = va_arg(va, double);
+         if (pr == -1)
+            pr = 6; // default is 6
+         // read the double into a string
+         if (stbsp__real_to_str(&sn, &l, num, &dp, fv, pr | 0x80000000))
+            fl |= STBSP__NEGATIVE;
+      doexpfromg:
+         tail[0] = 0;
+         stbsp__lead_sign(fl, lead);
+         if (dp == STBSP__SPECIAL) {
+            s = (char *)sn;
+            cs = 0;
+            pr = 0;
+            goto scopy;
+         }
+         s = num + 64;
+         // handle leading chars
+         *s++ = sn[0];
+
+         if (pr)
+            *s++ = stbsp__period;
+
+         // handle after decimal
+         if ((l - 1) > (stbsp__uint32)pr)
+            l = pr + 1;
+         for (n = 1; n < l; n++)
+            *s++ = sn[n];
+         // trailing zeros
+         tz = pr - (l - 1);
+         pr = 0;
+         // dump expo
+         tail[1] = h[0xe];
+         dp -= 1;
+         if (dp < 0) {
+            tail[2] = '-';
+            dp = -dp;
+         } else
+            tail[2] = '+';
+#ifdef STB_SPRINTF_MSVC_MODE
+         n = 5;
+#else
+         n = (dp >= 100) ? 5 : 4;
+#endif
+         tail[0] = (char)n;
+         for (;;) {
+            tail[n] = '0' + dp % 10;
+            if (n <= 3)
+               break;
+            --n;
+            dp /= 10;
+         }
+         cs = 1 + (3 << 24); // how many tens
+         goto flt_lead;
+
+      case 'f': // float
+         fv = va_arg(va, double);
+      doafloat:
+         // do kilos
+         if (fl & STBSP__METRIC_SUFFIX) {
+            double divisor;
+            divisor = 1000.0f;
+            if (fl & STBSP__METRIC_1024)
+               divisor = 1024.0;
+            while (fl < 0x4000000) {
+               if ((fv < divisor) && (fv > -divisor))
+                  break;
+               fv /= divisor;
+               fl += 0x1000000;
+            }
+         }
+         if (pr == -1)
+            pr = 6; // default is 6
+         // read the double into a string
+         if (stbsp__real_to_str(&sn, &l, num, &dp, fv, pr))
+            fl |= STBSP__NEGATIVE;
+      dofloatfromg:
+         tail[0] = 0;
+         stbsp__lead_sign(fl, lead);
+         if (dp == STBSP__SPECIAL) {
+            s = (char *)sn;
+            cs = 0;
+            pr = 0;
+            goto scopy;
+         }
+         s = num + 64;
+
+         // handle the three decimal varieties
+         if (dp <= 0) {
+            stbsp__int32 i;
+            // handle 0.000*000xxxx
+            *s++ = '0';
+            if (pr)
+               *s++ = stbsp__period;
+            n = -dp;
+            if ((stbsp__int32)n > pr)
+               n = pr;
+            i = n;
+            while (i) {
+               if ((((stbsp__uintptr)s) & 3) == 0)
+                  break;
+               *s++ = '0';
+               --i;
+            }
+            while (i >= 4) {
+               *(stbsp__uint32 *)s = 0x30303030;
+               s += 4;
+               i -= 4;
+            }
+            while (i) {
+               *s++ = '0';
+               --i;
+            }
+            if ((stbsp__int32)(l + n) > pr)
+               l = pr - n;
+            i = l;
+            while (i) {
+               *s++ = *sn++;
+               --i;
+            }
+            tz = pr - (n + l);
+            cs = 1 + (3 << 24); // how many tens did we write (for commas below)
+         } else {
+            cs = (fl & STBSP__TRIPLET_COMMA) ? ((600 - (stbsp__uint32)dp) % 3) : 0;
+            if ((stbsp__uint32)dp >= l) {
+               // handle xxxx000*000.0
+               n = 0;
+               for (;;) {
+                  if ((fl & STBSP__TRIPLET_COMMA) && (++cs == 4)) {
+                     cs = 0;
+                     *s++ = stbsp__comma;
+                  } else {
+                     *s++ = sn[n];
+                     ++n;
+                     if (n >= l)
+                        break;
+                  }
+               }
+               if (n < (stbsp__uint32)dp) {
+                  n = dp - n;
+                  if ((fl & STBSP__TRIPLET_COMMA) == 0) {
+                     while (n) {
+                        if ((((stbsp__uintptr)s) & 3) == 0)
+                           break;
+                        *s++ = '0';
+                        --n;
+                     }
+                     while (n >= 4) {
+                        *(stbsp__uint32 *)s = 0x30303030;
+                        s += 4;
+                        n -= 4;
+                     }
+                  }
+                  while (n) {
+                     if ((fl & STBSP__TRIPLET_COMMA) && (++cs == 4)) {
+                        cs = 0;
+                        *s++ = stbsp__comma;
+                     } else {
+                        *s++ = '0';
+                        --n;
+                     }
+                  }
+               }
+               cs = (int)(s - (num + 64)) + (3 << 24); // cs is how many tens
+               if (pr) {
+                  *s++ = stbsp__period;
+                  tz = pr;
+               }
+            } else {
+               // handle xxxxx.xxxx000*000
+               n = 0;
+               for (;;) {
+                  if ((fl & STBSP__TRIPLET_COMMA) && (++cs == 4)) {
+                     cs = 0;
+                     *s++ = stbsp__comma;
+                  } else {
+                     *s++ = sn[n];
+                     ++n;
+                     if (n >= (stbsp__uint32)dp)
+                        break;
+                  }
+               }
+               cs = (int)(s - (num + 64)) + (3 << 24); // cs is how many tens
+               if (pr)
+                  *s++ = stbsp__period;
+               if ((l - dp) > (stbsp__uint32)pr)
+                  l = pr + dp;
+               while (n < l) {
+                  *s++ = sn[n];
+                  ++n;
+               }
+               tz = pr - (l - dp);
+            }
+         }
+         pr = 0;
+
+         // handle k,m,g,t
+         if (fl & STBSP__METRIC_SUFFIX) {
+            char idx;
+            idx = 1;
+            if (fl & STBSP__METRIC_NOSPACE)
+               idx = 0;
+            tail[0] = idx;
+            tail[1] = ' ';
+            {
+               if (fl >> 24) { // SI kilo is 'k', JEDEC and SI kibits are 'K'.
+                  if (fl & STBSP__METRIC_1024)
+                     tail[idx + 1] = "_KMGT"[fl >> 24];
+                  else
+                     tail[idx + 1] = "_kMGT"[fl >> 24];
+                  idx++;
+                  // If printing kibits and not in jedec, add the 'i'.
+                  if (fl & STBSP__METRIC_1024 && !(fl & STBSP__METRIC_JEDEC)) {
+                     tail[idx + 1] = 'i';
+                     idx++;
+                  }
+                  tail[0] = idx;
+               }
+            }
+         };
+
+      flt_lead:
+         // get the length that we copied
+         l = (stbsp__uint32)(s - (num + 64));
+         s = num + 64;
+         goto scopy;
+#endif
+
+      case 'B': // upper binary
+      case 'b': // lower binary
+         h = (f[0] == 'B') ? hexu : hex;
+         lead[0] = 0;
+         if (fl & STBSP__LEADING_0X) {
+            lead[0] = 2;
+            lead[1] = '0';
+            lead[2] = h[0xb];
+         }
+         l = (8 << 4) | (1 << 8);
+         goto radixnum;
+
+      case 'o': // octal
+         h = hexu;
+         lead[0] = 0;
+         if (fl & STBSP__LEADING_0X) {
+            lead[0] = 1;
+            lead[1] = '0';
+         }
+         l = (3 << 4) | (3 << 8);
+         goto radixnum;
+
+      case 'p': // pointer
+         fl |= (sizeof(void *) == 8) ? STBSP__INTMAX : 0;
+         pr = sizeof(void *) * 2;
+         fl &= ~STBSP__LEADINGZERO; // 'p' only prints the pointer with zeros
+                                    // fall through - to X
+
+      case 'X': // upper hex
+      case 'x': // lower hex
+         h = (f[0] == 'X') ? hexu : hex;
+         l = (4 << 4) | (4 << 8);
+         lead[0] = 0;
+         if (fl & STBSP__LEADING_0X) {
+            lead[0] = 2;
+            lead[1] = '0';
+            lead[2] = h[16];
+         }
+      radixnum:
+         // get the number
+         if (fl & STBSP__INTMAX)
+            n64 = va_arg(va, stbsp__uint64);
+         else
+            n64 = va_arg(va, stbsp__uint32);
+
+         s = num + STBSP__NUMSZ;
+         dp = 0;
+         // clear tail, and clear leading if value is zero
+         tail[0] = 0;
+         if (n64 == 0) {
+            lead[0] = 0;
+            if (pr == 0) {
+               l = 0;
+               cs = 0;
+               goto scopy;
+            }
+         }
+         // convert to string
+         for (;;) {
+            *--s = h[n64 & ((1 << (l >> 8)) - 1)];
+            n64 >>= (l >> 8);
+            if (!((n64) || ((stbsp__int32)((num + STBSP__NUMSZ) - s) < pr)))
+               break;
+            if (fl & STBSP__TRIPLET_COMMA) {
+               ++l;
+               if ((l & 15) == ((l >> 4) & 15)) {
+                  l &= ~15;
+                  *--s = stbsp__comma;
+               }
+            }
+         };
+         // get the tens and the comma pos
+         cs = (stbsp__uint32)((num + STBSP__NUMSZ) - s) + ((((l >> 4) & 15)) << 24);
+         // get the length that we copied
+         l = (stbsp__uint32)((num + STBSP__NUMSZ) - s);
+         // copy it
+         goto scopy;
+
+      case 'u': // unsigned
+      case 'i':
+      case 'd': // integer
+         // get the integer and abs it
+         if (fl & STBSP__INTMAX) {
+            stbsp__int64 i64 = va_arg(va, stbsp__int64);
+            n64 = (stbsp__uint64)i64;
+            if ((f[0] != 'u') && (i64 < 0)) {
+               n64 = (stbsp__uint64)-i64;
+               fl |= STBSP__NEGATIVE;
+            }
+         } else {
+            stbsp__int32 i = va_arg(va, stbsp__int32);
+            n64 = (stbsp__uint32)i;
+            if ((f[0] != 'u') && (i < 0)) {
+               n64 = (stbsp__uint32)-i;
+               fl |= STBSP__NEGATIVE;
+            }
+         }
+
+#ifndef STB_SPRINTF_NOFLOAT
+         if (fl & STBSP__METRIC_SUFFIX) {
+            if (n64 < 1024)
+               pr = 0;
+            else if (pr == -1)
+               pr = 1;
+            fv = (double)(stbsp__int64)n64;
+            goto doafloat;
+         }
+#endif
+
+         // convert to string
+         s = num + STBSP__NUMSZ;
+         l = 0;
+
+         for (;;) {
+            // do in 32-bit chunks (avoid lots of 64-bit divides even with constant denominators)
+            char *o = s - 8;
+            if (n64 >= 100000000) {
+               n = (stbsp__uint32)(n64 % 100000000);
+               n64 /= 100000000;
+            } else {
+               n = (stbsp__uint32)n64;
+               n64 = 0;
+            }
+            if ((fl & STBSP__TRIPLET_COMMA) == 0) {
+               do {
+                  s -= 2;
+                  *(stbsp__uint16 *)s = *(stbsp__uint16 *)&stbsp__digitpair.pair[(n % 100) * 2];
+                  n /= 100;
+               } while (n);
+            }
+            while (n) {
+               if ((fl & STBSP__TRIPLET_COMMA) && (l++ == 3)) {
+                  l = 0;
+                  *--s = stbsp__comma;
+                  --o;
+               } else {
+                  *--s = (char)(n % 10) + '0';
+                  n /= 10;
+               }
+            }
+            if (n64 == 0) {
+               if ((s[0] == '0') && (s != (num + STBSP__NUMSZ)))
+                  ++s;
+               break;
+            }
+            while (s != o)
+               if ((fl & STBSP__TRIPLET_COMMA) && (l++ == 3)) {
+                  l = 0;
+                  *--s = stbsp__comma;
+                  --o;
+               } else {
+                  *--s = '0';
+               }
+         }
+
+         tail[0] = 0;
+         stbsp__lead_sign(fl, lead);
+
+         // get the length that we copied
+         l = (stbsp__uint32)((num + STBSP__NUMSZ) - s);
+         if (l == 0) {
+            *--s = '0';
+            l = 1;
+         }
+         cs = l + (3 << 24);
+         if (pr < 0)
+            pr = 0;
+
+      scopy:
+         // get fw=leading/trailing space, pr=leading zeros
+         if (pr < (stbsp__int32)l)
+            pr = l;
+         n = pr + lead[0] + tail[0] + tz;
+         if (fw < (stbsp__int32)n)
+            fw = n;
+         fw -= n;
+         pr -= l;
+
+         // handle right justify and leading zeros
+         if ((fl & STBSP__LEFTJUST) == 0) {
+            if (fl & STBSP__LEADINGZERO) // if leading zeros, everything is in pr
+            {
+               pr = (fw > pr) ? fw : pr;
+               fw = 0;
+            } else {
+               fl &= ~STBSP__TRIPLET_COMMA; // if no leading zeros, then no commas
+            }
+         }
+
+         // copy the spaces and/or zeros
+         if (fw + pr) {
+            stbsp__int32 i;
+            stbsp__uint32 c;
+
+            // copy leading spaces (or when doing %8.4d stuff)
+            if ((fl & STBSP__LEFTJUST) == 0)
+               while (fw > 0) {
+                  stbsp__cb_buf_clamp(i, fw);
+                  fw -= i;
+                  while (i) {
+                     if ((((stbsp__uintptr)bf) & 3) == 0)
+                        break;
+                     *bf++ = ' ';
+                     --i;
+                  }
+                  while (i >= 4) {
+                     *(stbsp__uint32 *)bf = 0x20202020;
+                     bf += 4;
+                     i -= 4;
+                  }
+                  while (i) {
+                     *bf++ = ' ';
+                     --i;
+                  }
+                  stbsp__chk_cb_buf(1);
+               }
+
+            // copy leader
+            sn = lead + 1;
+            while (lead[0]) {
+               stbsp__cb_buf_clamp(i, lead[0]);
+               lead[0] -= (char)i;
+               while (i) {
+                  *bf++ = *sn++;
+                  --i;
+               }
+               stbsp__chk_cb_buf(1);
+            }
+
+            // copy leading zeros
+            c = cs >> 24;
+            cs &= 0xffffff;
+            cs = (fl & STBSP__TRIPLET_COMMA) ? ((stbsp__uint32)(c - ((pr + cs) % (c + 1)))) : 0;
+            while (pr > 0) {
+               stbsp__cb_buf_clamp(i, pr);
+               pr -= i;
+               if ((fl & STBSP__TRIPLET_COMMA) == 0) {
+                  while (i) {
+                     if ((((stbsp__uintptr)bf) & 3) == 0)
+                        break;
+                     *bf++ = '0';
+                     --i;
+                  }
+                  while (i >= 4) {
+                     *(stbsp__uint32 *)bf = 0x30303030;
+                     bf += 4;
+                     i -= 4;
+                  }
+               }
+               while (i) {
+                  if ((fl & STBSP__TRIPLET_COMMA) && (cs++ == c)) {
+                     cs = 0;
+                     *bf++ = stbsp__comma;
+                  } else
+                     *bf++ = '0';
+                  --i;
+               }
+               stbsp__chk_cb_buf(1);
+            }
+         }
+
+         // copy leader if there is still one
+         sn = lead + 1;
+         while (lead[0]) {
+            stbsp__int32 i;
+            stbsp__cb_buf_clamp(i, lead[0]);
+            lead[0] -= (char)i;
+            while (i) {
+               *bf++ = *sn++;
+               --i;
+            }
+            stbsp__chk_cb_buf(1);
+         }
+
+         // copy the string
+         n = l;
+         while (n) {
+            stbsp__int32 i;
+            stbsp__cb_buf_clamp(i, n);
+            n -= i;
+            STBSP__UNALIGNED(while (i >= 4) {
+               *(stbsp__uint32 volatile *)bf = *(stbsp__uint32 volatile *)s;
+               bf += 4;
+               s += 4;
+               i -= 4;
+            })
+            while (i) {
+               *bf++ = *s++;
+               --i;
+            }
+            stbsp__chk_cb_buf(1);
+         }
+
+         // copy trailing zeros
+         while (tz) {
+            stbsp__int32 i;
+            stbsp__cb_buf_clamp(i, tz);
+            tz -= i;
+            while (i) {
+               if ((((stbsp__uintptr)bf) & 3) == 0)
+                  break;
+               *bf++ = '0';
+               --i;
+            }
+            while (i >= 4) {
+               *(stbsp__uint32 *)bf = 0x30303030;
+               bf += 4;
+               i -= 4;
+            }
+            while (i) {
+               *bf++ = '0';
+               --i;
+            }
+            stbsp__chk_cb_buf(1);
+         }
+
+         // copy tail if there is one
+         sn = tail + 1;
+         while (tail[0]) {
+            stbsp__int32 i;
+            stbsp__cb_buf_clamp(i, tail[0]);
+            tail[0] -= (char)i;
+            while (i) {
+               *bf++ = *sn++;
+               --i;
+            }
+            stbsp__chk_cb_buf(1);
+         }
+
+         // handle the left justify
+         if (fl & STBSP__LEFTJUST)
+            if (fw > 0) {
+               while (fw) {
+                  stbsp__int32 i;
+                  stbsp__cb_buf_clamp(i, fw);
+                  fw -= i;
+                  while (i) {
+                     if ((((stbsp__uintptr)bf) & 3) == 0)
+                        break;
+                     *bf++ = ' ';
+                     --i;
+                  }
+                  while (i >= 4) {
+                     *(stbsp__uint32 *)bf = 0x20202020;
+                     bf += 4;
+                     i -= 4;
+                  }
+                  while (i--)
+                     *bf++ = ' ';
+                  stbsp__chk_cb_buf(1);
+               }
+            }
+         break;
+
+      default: // unknown, just copy code
+         s = num + STBSP__NUMSZ - 1;
+         *s = f[0];
+         l = 1;
+         fw = fl = 0;
+         lead[0] = 0;
+         tail[0] = 0;
+         pr = 0;
+         dp = 0;
+         cs = 0;
+         goto scopy;
+      }
+      ++f;
+   }
+endfmt:
+
+   if (!callback)
+      *bf = 0;
+   else
+      stbsp__flush_cb();
+
+done:
+   return tlen + (int)(bf - buf);
+}
+
+// cleanup
+#undef STBSP__LEFTJUST
+#undef STBSP__LEADINGPLUS
+#undef STBSP__LEADINGSPACE
+#undef STBSP__LEADING_0X
+#undef STBSP__LEADINGZERO
+#undef STBSP__INTMAX
+#undef STBSP__TRIPLET_COMMA
+#undef STBSP__NEGATIVE
+#undef STBSP__METRIC_SUFFIX
+#undef STBSP__NUMSZ
+#undef stbsp__chk_cb_bufL
+#undef stbsp__chk_cb_buf
+#undef stbsp__flush_cb
+#undef stbsp__cb_buf_clamp
+
+// ============================================================================
+//   wrapper functions
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(sprintf)(char *buf, char const *fmt, ...)
+{
+   int result;
+   va_list va;
+   va_start(va, fmt);
+   result = STB_SPRINTF_DECORATE(vsprintfcb)(0, 0, buf, fmt, va);
+   va_end(va);
+   return result;
+}
+
+typedef struct stbsp__context {
+   char *buf;
+   int count;
+   int length;
+   char tmp[STB_SPRINTF_MIN];
+} stbsp__context;
+
+static char *stbsp__clamp_callback(const char *buf, void *user, int len)
+{
+   stbsp__context *c = (stbsp__context *)user;
+   c->length += len;
+
+   if (len > c->count)
+      len = c->count;
+
+   if (len) {
+      if (buf != c->buf) {
+         const char *s, *se;
+         char *d;
+         d = c->buf;
+         s = buf;
+         se = buf + len;
+         do {
+            *d++ = *s++;
+         } while (s < se);
+      }
+      c->buf += len;
+      c->count -= len;
+   }
+
+   if (c->count <= 0)
+      return c->tmp;
+   return (c->count >= STB_SPRINTF_MIN) ? c->buf : c->tmp; // go direct into buffer if you can
+}
+
+static char * stbsp__count_clamp_callback( const char * buf, void * user, int len )
+{
+   stbsp__context * c = (stbsp__context*)user;
+   (void) sizeof(buf);
+
+   c->length += len;
+   return c->tmp; // go direct into buffer if you can
+}
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE( vsnprintf )( char * buf, int count, char const * fmt, va_list va )
+{
+   stbsp__context c;
+
+   if ( (count == 0) && !buf )
+   {
+      c.length = 0;
+
+      STB_SPRINTF_DECORATE( vsprintfcb )( stbsp__count_clamp_callback, &c, c.tmp, fmt, va );
+   }
+   else
+   {
+      int l;
+
+      c.buf = buf;
+      c.count = count;
+      c.length = 0;
+
+      STB_SPRINTF_DECORATE( vsprintfcb )( stbsp__clamp_callback, &c, stbsp__clamp_callback(0,&c,0), fmt, va );
+
+      // zero-terminate
+      l = (int)( c.buf - buf );
+      if ( l >= count ) // should never be greater, only equal (or less) than count
+         l = count - 1;
+      buf[l] = 0;
+   }
+
+   return c.length;
+}
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(snprintf)(char *buf, int count, char const *fmt, ...)
+{
+   int result;
+   va_list va;
+   va_start(va, fmt);
+
+   result = STB_SPRINTF_DECORATE(vsnprintf)(buf, count, fmt, va);
+   va_end(va);
+
+   return result;
+}
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(vsprintf)(char *buf, char const *fmt, va_list va)
+{
+   return STB_SPRINTF_DECORATE(vsprintfcb)(0, 0, buf, fmt, va);
+}
+
+// =======================================================================
+//   low level float utility functions
+
+#ifndef STB_SPRINTF_NOFLOAT
+
+// copies d to bits w/ strict aliasing (this compiles to nothing on /Ox)
+#define STBSP__COPYFP(dest, src)                   \
+   {                                               \
+      int cn;                                      \
+      for (cn = 0; cn < 8; cn++)                   \
+         ((char *)&dest)[cn] = ((char *)&src)[cn]; \
+   }
+
+// get float info
+static stbsp__int32 stbsp__real_to_parts(stbsp__int64 *bits, stbsp__int32 *expo, double value)
+{
+   double d;
+   stbsp__int64 b = 0;
+
+   // load value and round at the frac_digits
+   d = value;
+
+   STBSP__COPYFP(b, d);
+
+   *bits = b & ((((stbsp__uint64)1) << 52) - 1);
+   *expo = (stbsp__int32)(((b >> 52) & 2047) - 1023);
+
+   return (stbsp__int32)((stbsp__uint64) b >> 63);
+}
+
+static double const stbsp__bot[23] = {
+   1e+000, 1e+001, 1e+002, 1e+003, 1e+004, 1e+005, 1e+006, 1e+007, 1e+008, 1e+009, 1e+010, 1e+011,
+   1e+012, 1e+013, 1e+014, 1e+015, 1e+016, 1e+017, 1e+018, 1e+019, 1e+020, 1e+021, 1e+022
+};
+static double const stbsp__negbot[22] = {
+   1e-001, 1e-002, 1e-003, 1e-004, 1e-005, 1e-006, 1e-007, 1e-008, 1e-009, 1e-010, 1e-011,
+   1e-012, 1e-013, 1e-014, 1e-015, 1e-016, 1e-017, 1e-018, 1e-019, 1e-020, 1e-021, 1e-022
+};
+static double const stbsp__negboterr[22] = {
+   -5.551115123125783e-018,  -2.0816681711721684e-019, -2.0816681711721686e-020, -4.7921736023859299e-021, -8.1803053914031305e-022, 4.5251888174113741e-023,
+   4.5251888174113739e-024,  -2.0922560830128471e-025, -6.2281591457779853e-026, -3.6432197315497743e-027, 6.0503030718060191e-028,  2.0113352370744385e-029,
+   -3.0373745563400371e-030, 1.1806906454401013e-032,  -7.7705399876661076e-032, 2.0902213275965398e-033,  -7.1542424054621921e-034, -7.1542424054621926e-035,
+   2.4754073164739869e-036,  5.4846728545790429e-037,  9.2462547772103625e-038,  -4.8596774326570872e-039
+};
+static double const stbsp__top[13] = {
+   1e+023, 1e+046, 1e+069, 1e+092, 1e+115, 1e+138, 1e+161, 1e+184, 1e+207, 1e+230, 1e+253, 1e+276, 1e+299
+};
+static double const stbsp__negtop[13] = {
+   1e-023, 1e-046, 1e-069, 1e-092, 1e-115, 1e-138, 1e-161, 1e-184, 1e-207, 1e-230, 1e-253, 1e-276, 1e-299
+};
+static double const stbsp__toperr[13] = {
+   8388608,
+   6.8601809640529717e+028,
+   -7.253143638152921e+052,
+   -4.3377296974619174e+075,
+   -1.5559416129466825e+098,
+   -3.2841562489204913e+121,
+   -3.7745893248228135e+144,
+   -1.7356668416969134e+167,
+   -3.8893577551088374e+190,
+   -9.9566444326005119e+213,
+   6.3641293062232429e+236,
+   -5.2069140800249813e+259,
+   -5.2504760255204387e+282
+};
+static double const stbsp__negtoperr[13] = {
+   3.9565301985100693e-040,  -2.299904345391321e-063,  3.6506201437945798e-086,  1.1875228833981544e-109,
+   -5.0644902316928607e-132, -6.7156837247865426e-155, -2.812077463003139e-178,  -5.7778912386589953e-201,
+   7.4997100559334532e-224,  -4.6439668915134491e-247, -6.3691100762962136e-270, -9.436808465446358e-293,
+   8.0970921678014997e-317
+};
+
+#if defined(_MSC_VER) && (_MSC_VER <= 1200)
+static stbsp__uint64 const stbsp__powten[20] = {
+   1,
+   10,
+   100,
+   1000,
+   10000,
+   100000,
+   1000000,
+   10000000,
+   100000000,
+   1000000000,
+   10000000000,
+   100000000000,
+   1000000000000,
+   10000000000000,
+   100000000000000,
+   1000000000000000,
+   10000000000000000,
+   100000000000000000,
+   1000000000000000000,
+   10000000000000000000U
+};
+#define stbsp__tento19th ((stbsp__uint64)1000000000000000000)
+#else
+static stbsp__uint64 const stbsp__powten[20] = {
+   1,
+   10,
+   100,
+   1000,
+   10000,
+   100000,
+   1000000,
+   10000000,
+   100000000,
+   1000000000,
+   10000000000ULL,
+   100000000000ULL,
+   1000000000000ULL,
+   10000000000000ULL,
+   100000000000000ULL,
+   1000000000000000ULL,
+   10000000000000000ULL,
+   100000000000000000ULL,
+   1000000000000000000ULL,
+   10000000000000000000ULL
+};
+#define stbsp__tento19th (1000000000000000000ULL)
+#endif
+
+#define stbsp__ddmulthi(oh, ol, xh, yh)                            \
+   {                                                               \
+      double ahi = 0, alo, bhi = 0, blo;                           \
+      stbsp__int64 bt;                                             \
+      oh = xh * yh;                                                \
+      STBSP__COPYFP(bt, xh);                                       \
+      bt &= ((~(stbsp__uint64)0) << 27);                           \
+      STBSP__COPYFP(ahi, bt);                                      \
+      alo = xh - ahi;                                              \
+      STBSP__COPYFP(bt, yh);                                       \
+      bt &= ((~(stbsp__uint64)0) << 27);                           \
+      STBSP__COPYFP(bhi, bt);                                      \
+      blo = yh - bhi;                                              \
+      ol = ((ahi * bhi - oh) + ahi * blo + alo * bhi) + alo * blo; \
+   }
+
+#define stbsp__ddtoS64(ob, xh, xl)          \
+   {                                        \
+      double ahi = 0, alo, vh, t;           \
+      ob = (stbsp__int64)xh;                \
+      vh = (double)ob;                      \
+      ahi = (xh - vh);                      \
+      t = (ahi - xh);                       \
+      alo = (xh - (ahi - t)) - (vh + t);    \
+      ob += (stbsp__int64)(ahi + alo + xl); \
+   }
+
+#define stbsp__ddrenorm(oh, ol) \
+   {                            \
+      double s;                 \
+      s = oh + ol;              \
+      ol = ol - (s - oh);       \
+      oh = s;                   \
+   }
+
+#define stbsp__ddmultlo(oh, ol, xh, xl, yh, yl) ol = ol + (xh * yl + xl * yh);
+
+#define stbsp__ddmultlos(oh, ol, xh, yl) ol = ol + (xh * yl);
+
+static void stbsp__raise_to_power10(double *ohi, double *olo, double d, stbsp__int32 power) // power can be -323 to +350
+{
+   double ph, pl;
+   if ((power >= 0) && (power <= 22)) {
+      stbsp__ddmulthi(ph, pl, d, stbsp__bot[power]);
+   } else {
+      stbsp__int32 e, et, eb;
+      double p2h, p2l;
+
+      e = power;
+      if (power < 0)
+         e = -e;
+      et = (e * 0x2c9) >> 14; /* %23 */
+      if (et > 13)
+         et = 13;
+      eb = e - (et * 23);
+
+      ph = d;
+      pl = 0.0;
+      if (power < 0) {
+         if (eb) {
+            --eb;
+            stbsp__ddmulthi(ph, pl, d, stbsp__negbot[eb]);
+            stbsp__ddmultlos(ph, pl, d, stbsp__negboterr[eb]);
+         }
+         if (et) {
+            stbsp__ddrenorm(ph, pl);
+            --et;
+            stbsp__ddmulthi(p2h, p2l, ph, stbsp__negtop[et]);
+            stbsp__ddmultlo(p2h, p2l, ph, pl, stbsp__negtop[et], stbsp__negtoperr[et]);
+            ph = p2h;
+            pl = p2l;
+         }
+      } else {
+         if (eb) {
+            e = eb;
+            if (eb > 22)
+               eb = 22;
+            e -= eb;
+            stbsp__ddmulthi(ph, pl, d, stbsp__bot[eb]);
+            if (e) {
+               stbsp__ddrenorm(ph, pl);
+               stbsp__ddmulthi(p2h, p2l, ph, stbsp__bot[e]);
+               stbsp__ddmultlos(p2h, p2l, stbsp__bot[e], pl);
+               ph = p2h;
+               pl = p2l;
+            }
+         }
+         if (et) {
+            stbsp__ddrenorm(ph, pl);
+            --et;
+            stbsp__ddmulthi(p2h, p2l, ph, stbsp__top[et]);
+            stbsp__ddmultlo(p2h, p2l, ph, pl, stbsp__top[et], stbsp__toperr[et]);
+            ph = p2h;
+            pl = p2l;
+         }
+      }
+   }
+   stbsp__ddrenorm(ph, pl);
+   *ohi = ph;
+   *olo = pl;
+}
+
+// given a float value, returns the significant bits in bits, and the position of the
+//   decimal point in decimal_pos.  +/-INF and NAN are specified by special values
+//   returned in the decimal_pos parameter.
+// frac_digits is absolute normally, but if you want from first significant digits (got %g and %e), or in 0x80000000
+static stbsp__int32 stbsp__real_to_str(char const **start, stbsp__uint32 *len, char *out, stbsp__int32 *decimal_pos, double value, stbsp__uint32 frac_digits)
+{
+   double d;
+   stbsp__int64 bits = 0;
+   stbsp__int32 expo, e, ng, tens;
+
+   d = value;
+   STBSP__COPYFP(bits, d);
+   expo = (stbsp__int32)((bits >> 52) & 2047);
+   ng = (stbsp__int32)((stbsp__uint64) bits >> 63);
+   if (ng)
+      d = -d;
+
+   if (expo == 2047) // is nan or inf?
+   {
+      *start = (bits & ((((stbsp__uint64)1) << 52) - 1)) ? "NaN" : "Inf";
+      *decimal_pos = STBSP__SPECIAL;
+      *len = 3;
+      return ng;
+   }
+
+   if (expo == 0) // is zero or denormal
+   {
+      if (((stbsp__uint64) bits << 1) == 0) // do zero
+      {
+         *decimal_pos = 1;
+         *start = out;
+         out[0] = '0';
+         *len = 1;
+         return ng;
+      }
+      // find the right expo for denormals
+      {
+         stbsp__int64 v = ((stbsp__uint64)1) << 51;
+         while ((bits & v) == 0) {
+            --expo;
+            v >>= 1;
+         }
+      }
+   }
+
+   // find the decimal exponent as well as the decimal bits of the value
+   {
+      double ph, pl;
+
+      // log10 estimate - very specifically tweaked to hit or undershoot by no more than 1 of log10 of all expos 1..2046
+      tens = expo - 1023;
+      tens = (tens < 0) ? ((tens * 617) / 2048) : (((tens * 1233) / 4096) + 1);
+
+      // move the significant bits into position and stick them into an int
+      stbsp__raise_to_power10(&ph, &pl, d, 18 - tens);
+
+      // get full as much precision from double-double as possible
+      stbsp__ddtoS64(bits, ph, pl);
+
+      // check if we undershot
+      if (((stbsp__uint64)bits) >= stbsp__tento19th)
+         ++tens;
+   }
+
+   // now do the rounding in integer land
+   frac_digits = (frac_digits & 0x80000000) ? ((frac_digits & 0x7ffffff) + 1) : (tens + frac_digits);
+   if ((frac_digits < 24)) {
+      stbsp__uint32 dg = 1;
+      if ((stbsp__uint64)bits >= stbsp__powten[9])
+         dg = 10;
+      while ((stbsp__uint64)bits >= stbsp__powten[dg]) {
+         ++dg;
+         if (dg == 20)
+            goto noround;
+      }
+      if (frac_digits < dg) {
+         stbsp__uint64 r;
+         // add 0.5 at the right position and round
+         e = dg - frac_digits;
+         if ((stbsp__uint32)e >= 24)
+            goto noround;
+         r = stbsp__powten[e];
+         bits = bits + (r / 2);
+         if ((stbsp__uint64)bits >= stbsp__powten[dg])
+            ++tens;
+         bits /= r;
+      }
+   noround:;
+   }
+
+   // kill long trailing runs of zeros
+   if (bits) {
+      stbsp__uint32 n;
+      for (;;) {
+         if (bits <= 0xffffffff)
+            break;
+         if (bits % 1000)
+            goto donez;
+         bits /= 1000;
+      }
+      n = (stbsp__uint32)bits;
+      while ((n % 1000) == 0)
+         n /= 1000;
+      bits = n;
+   donez:;
+   }
+
+   // convert to string
+   out += 64;
+   e = 0;
+   for (;;) {
+      stbsp__uint32 n;
+      char *o = out - 8;
+      // do the conversion in chunks of U32s (avoid most 64-bit divides, worth it, constant denomiators be damned)
+      if (bits >= 100000000) {
+         n = (stbsp__uint32)(bits % 100000000);
+         bits /= 100000000;
+      } else {
+         n = (stbsp__uint32)bits;
+         bits = 0;
+      }
+      while (n) {
+         out -= 2;
+         *(stbsp__uint16 *)out = *(stbsp__uint16 *)&stbsp__digitpair.pair[(n % 100) * 2];
+         n /= 100;
+         e += 2;
+      }
+      if (bits == 0) {
+         if ((e) && (out[0] == '0')) {
+            ++out;
+            --e;
+         }
+         break;
+      }
+      while (out != o) {
+         *--out = '0';
+         ++e;
+      }
+   }
+
+   *decimal_pos = tens;
+   *start = out;
+   *len = e;
+   return ng;
+}
+
+#undef stbsp__ddmulthi
+#undef stbsp__ddrenorm
+#undef stbsp__ddmultlo
+#undef stbsp__ddmultlos
+#undef STBSP__SPECIAL
+#undef STBSP__COPYFP
+
+#endif // STB_SPRINTF_NOFLOAT
+
+// clean up
+#undef stbsp__uint16
+#undef stbsp__uint32
+#undef stbsp__int32
+#undef stbsp__uint64
+#undef stbsp__int64
+#undef STBSP__UNALIGNED
+
+#endif // STB_SPRINTF_IMPLEMENTATION
+
+/*
+------------------------------------------------------------------------------
+This software is available under 2 licenses -- choose whichever you prefer.
+------------------------------------------------------------------------------
+ALTERNATIVE A - MIT License
+Copyright (c) 2017 Sean Barrett
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+------------------------------------------------------------------------------
+ALTERNATIVE B - Public Domain (www.unlicense.org)
+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+------------------------------------------------------------------------------
+*/
diff --git a/symmetry/symmetry.cpp b/gemmi_gph/symmetry.cpp
similarity index 100%
rename from symmetry/symmetry.cpp
rename to gemmi_gph/symmetry.cpp
diff --git a/gemmi_gph/xds_ascii.cpp b/gemmi_gph/xds_ascii.cpp
new file mode 100644
index 00000000..373a1480
--- /dev/null
+++ b/gemmi_gph/xds_ascii.cpp
@@ -0,0 +1,306 @@
+// Copyright 2023 Global Phasing Ltd.
+
+#include <gemmi/xds_ascii.hpp>
+#include <gemmi/atof.hpp>      // for fast_from_chars
+#include <gemmi/atox.hpp>      // for skip_blank, read_word
+#include <gemmi/util.hpp>      // for trim_str
+#include <gemmi/gz.hpp>
+#include <gemmi/math.hpp>
+
+namespace gemmi {
+
+void XdsAscii::gather_iset_statistics() {
+  for (Iset& iset : isets) {
+    iset.frame_number_min = INT_MAX;
+    iset.frame_number_max = 0;
+    for (const XdsAscii::Refl& refl : data)
+      if (refl.iset == iset.id) {
+        ++iset.reflection_count;
+        int frame = refl.frame();
+        iset.frame_number_min = std::min(iset.frame_number_min, frame);
+        iset.frame_number_max = std::max(iset.frame_number_max, frame);
+      }
+    if (iset.frame_number_min > iset.frame_number_max)
+      continue;
+    std::vector<uint8_t> frames(iset.frame_number_max - iset.frame_number_min + 1);
+    for (const XdsAscii::Refl& refl : data)
+      if (refl.iset == iset.id)
+        frames[refl.frame() - iset.frame_number_min] = 1;
+    iset.frame_count = 0;
+    for (uint8_t f : frames)
+      iset.frame_count += f;
+  }
+}
+
+/// Based on Phil Evans' notes and the literature, see:
+/// https://github.com/project-gemmi/gemmi/discussions/248
+/// \par p is defined as in XDS (p=0.5 for unpolarized beam).
+void XdsAscii::apply_polarization_correction(double p, Vec3 normal) {
+  if (!has_cell_axes())
+    fail("unknown unit cell axes");
+  Mat33 UB = cell_axes.inverse();
+  Vec3 rot_axis = get_rotation_axis();
+  Vec3 s0_dir = get_s0_direction();
+  normal = normal.normalized();
+  // The polarization normal is expected to be approx. orthogonal to the beam.
+  // dot() is the same as cos_angle() for normalized vectors.
+  if (normal.dot(s0_dir) > std::cos(rad(5.0)))
+    fail("polarization normal is far from orthogonal to the incident beam");
+  // make normal exactly orthogonal to the beam
+  normal = s0_dir.cross(normal).cross(s0_dir).normalized();
+  // wavevector
+  Vec3 s0 = s0_dir / wavelength;
+  double s0_m2 = 1. / s0.length_sq();  // s0^-2
+
+  for (Refl& refl : data) {
+    double phi = rad(rot_angle(refl));
+    Vec3 h(refl.hkl[0], refl.hkl[1], refl.hkl[2]);
+    Vec3 r0 = UB.multiply(h);
+    Vec3 r = rotate_about_axis(r0, rot_axis, phi);
+    Vec3 s = s0 + r;
+#if 0
+    double two_theta = s0.angle(s);
+    // 2d sin(theta) = lambda
+    double bragg_angle = std::asin(wavelength / (2 * unit_cell.calculate_d(refl.hkl)));
+    printf("(%d %d %d) two-theta %g %g\n",
+           refl.hkl[0], refl.hkl[1], refl.hkl[2], deg(two_theta), deg(2 * bragg_angle));
+#endif
+    // we should have |s| == |s0|, but just in case calculate it separately
+    double s_m2 = 1. / s.length_sq();
+    // 1 + cos^2(2theta) = 2 * correction for unpolarized beam
+    double t = 1 + sq(s.dot(s0)) * s_m2 * s0_m2;
+    double polariz_factor = (1 - 2*p) * (1 - sq(normal.dot(s)) * s_m2) + p * t;
+    // We assume that the XDS files has polarization correction applied,
+    // but for non-polarized beam. So we multiply intensities by P0=t/2
+    // and divide by a hopefully more accurate polarization factor.
+    double mult = 0.5 * t / polariz_factor;
+    refl.iobs *= mult;
+    refl.sigma *= mult;
+    refl.rlp *= mult;
+  }
+}
+
+namespace {
+
+template<size_t N>
+bool starts_with_ptr(const char* a, const char (&b)[N], const char** endptr) {
+  if (std::strncmp(a, b, N-1) != 0)
+    return false;
+  *endptr = a + N - 1;
+  return true;
+}
+
+template<size_t N>
+bool starts_with_ptr_b(const char* a, const char (&b)[N], const char** endptr) {
+  return starts_with_ptr<N>(skip_blank(a), b, endptr);
+}
+
+inline const char* parse_number_into(const char* start, const char* end,
+                                     double& val, const char* line) {
+  auto result = fast_from_chars(start, end, val);
+  if (result.ec != std::errc())
+    fail("failed to parse a number in:\n", line);
+  return result.ptr;
+}
+
+template<size_t N>
+void parse_numbers_into_array(const char* start, const char* end,
+                              double (&arr)[N], const char* line) {
+  for (double& val : arr)
+    start = parse_number_into(start, end, val, line);
+}
+
+template<size_t N>
+void parse_numbers_into_array(const char* start, const char* end,
+                              std::array<double,N>& arr, const char* line) {
+  for (double& val : arr)
+    start = parse_number_into(start, end, val, line);
+}
+
+void parse_numbers_into_vec3(const char* start, const char* end,
+                             Vec3& vec, const char* line) {
+  for (double* val : {&vec.x, &vec.y, &vec.z})
+    start = parse_number_into(start, end, *val, line);
+}
+
+
+} // anonymous namespace
+
+void XdsAscii::read_stream(AnyStream& line_reader, const std::string& source) {
+  source_path = source;
+  read_columns = 12;
+  char line[256];
+  size_t len0 = line_reader.copy_line(line, 255);
+  if (len0 == 0)
+    fail("empty file");
+  int iset_col = 0;
+  const char xds_ascii_header[] = "!FORMAT=XDS_ASCII    MERGE=";
+  char xds_ascii_type = '\0';
+  if (starts_with(line, xds_ascii_header)) {
+    size_t n = sizeof(xds_ascii_header)-1;
+    xds_ascii_type = line[n];
+    // !FORMAT=XDS_ASCII    MERGE=FALSE    FRIEDEL'S_LAW=
+    if (strncmp(line + n + 5, "    FRIEDEL'S_LAW=", 18) == 0)
+      friedels_law = line[50];
+  }
+  if (!xds_ascii_type && !starts_with(line, "!OUTPUT_FILE=INTEGRATE.HKL"))
+    fail("not an XDS_ASCII nor INTEGRATE.HKL file: " + source_path);
+  const char* rhs;
+  while (size_t len = line_reader.copy_line(line, 255)) {
+    if (line[0] == '!') {
+      if (starts_with_ptr(line+1, "Generated by ", &rhs)) {
+        generated_by = read_word(rhs, &rhs);
+        version_str = trim_str(rhs);
+      } else if (starts_with_ptr(line+1, "SPACE_GROUP_NUMBER=", &rhs)) {
+        spacegroup_number = simple_atoi(rhs);
+      } else if (starts_with_ptr(line+1, "UNIT_CELL_", &rhs)) {
+        if (starts_with_ptr(rhs, "CONSTANTS=", &rhs)) {  // UNIT_CELL_CONSTANTS=
+          parse_numbers_into_array(rhs, line+len, cell_constants, line);
+        } else if (starts_with_ptr(rhs, "A-AXIS=", &rhs)) { // UNIT_CELL_A-AXIS=
+          parse_numbers_into_array(rhs, line+len, cell_axes.a[0], line);
+        } else if (starts_with_ptr(rhs, "B-AXIS=", &rhs)) { // UNIT_CELL_B-AXIS=
+          parse_numbers_into_array(rhs, line+len, cell_axes.a[1], line);
+        } else if (starts_with_ptr(rhs, "C-AXIS=", &rhs)) { // UNIT_CELL_C-AXIS=
+          parse_numbers_into_array(rhs, line+len, cell_axes.a[2], line);
+        }
+      } else if (starts_with_ptr(line+1, "REFLECTING_RANGE_E.S.D.=", &rhs)) {
+        auto result = fast_from_chars(rhs, line+len, reflecting_range_esd);
+        if (result.ec != std::errc())
+          fail("failed to parse mosaicity:\n", line);
+      } else if (starts_with_ptr(line+1, "X-RAY_WAVELENGTH=", &rhs)) {
+        auto result = fast_from_chars(rhs, line+len, wavelength);
+        if (result.ec != std::errc())
+          fail("failed to parse wavelength:\n", line);
+      } else if (starts_with_ptr(line+1, "INCIDENT_BEAM_DIRECTION=", &rhs)) {
+        parse_numbers_into_vec3(rhs, line+len, incident_beam_dir, line);
+      } else if (starts_with_ptr(line+1, "OSCILLATION_RANGE=", &rhs)) {
+        auto result = fast_from_chars(rhs, line+len, oscillation_range);
+        if (result.ec != std::errc())
+          fail("failed to parse:\n", line);
+      } else if (starts_with_ptr(line+1, "ROTATION_AXIS=", &rhs)) {
+        parse_numbers_into_vec3(rhs, line+len, rotation_axis, line);
+      } else if (starts_with_ptr(line+1, "STARTING_ANGLE=", &rhs)) {
+        auto result = fast_from_chars(rhs, line+len, starting_angle);
+        if (result.ec != std::errc())
+          fail("failed to parse:\n", line);
+      } else if (starts_with_ptr(line+1, "STARTING_FRAME=", &rhs)) {
+        starting_frame = simple_atoi(rhs);
+      } else if (starts_with_ptr(line+1, " ISET= ", &rhs)) {
+        const char* endptr;
+        int id = simple_atoi(rhs, &endptr);
+        XdsAscii::Iset& iset = find_or_add_iset(id);
+        endptr = skip_blank(endptr);
+        if (starts_with_ptr(endptr, "INPUT_FILE=", &rhs)) {
+          iset.input_file = read_word(rhs);
+        } else if (starts_with_ptr(endptr, "X-RAY_WAVELENGTH=", &rhs)) {
+          double w;
+          auto result = fast_from_chars(rhs, line+len, w);
+          if (result.ec != std::errc())
+            fail("failed to parse iset wavelength:\n", line);
+          iset.wavelength = w;
+        } else if (starts_with_ptr(endptr, "UNIT_CELL_CONSTANTS=", &rhs)) {
+          parse_numbers_into_array(rhs, line+len, iset.cell_constants, line);
+        }
+      } else if (starts_with_ptr(line+1, "NX=", &rhs)) {
+        const char* endptr;
+        nx = simple_atoi(rhs, &endptr);
+        if (starts_with_ptr_b(endptr, "NY=", &rhs))
+          ny = simple_atoi(rhs, &endptr);
+        if (starts_with_ptr_b(endptr, "QX=", &rhs))
+          endptr = parse_number_into(rhs, line+len, qx, line);
+        if (starts_with_ptr_b(endptr, "QY=", &rhs))
+          parse_number_into(rhs, line+len, qy, line);
+      } else if (starts_with_ptr(line+1, "ORGX=", &rhs)) {
+        const char* endptr = parse_number_into(rhs, line+len, orgx, line);
+        if (starts_with_ptr_b(endptr, "ORGY=", &rhs))
+          endptr = parse_number_into(rhs, line+len, orgy, line);
+        if (starts_with_ptr_b(endptr, "DETECTOR_DISTANCE=", &rhs))
+          parse_number_into(rhs, line+len, detector_distance, line);
+      } else if (starts_with_ptr(line+1, "NUMBER_OF_ITEMS_IN_EACH_DATA_RECORD=", &rhs)) {
+        int num = simple_atoi(rhs);
+        // INTEGRATE.HKL has read_columns=12, as set above
+        if (xds_ascii_type == 'T')  // merged file
+          read_columns = 5;
+        else if (generated_by == "XSCALE")
+          read_columns = 8;
+        else if (generated_by == "CORRECT")
+          read_columns = 11;
+        // check if the columns are what they always are
+        if (num < read_columns)
+          fail("expected ", std::to_string(read_columns), "+ columns, got:\n", line);
+        if (generated_by == "INTEGRATE") {
+          line_reader.copy_line(line, 52);
+          if (!starts_with(line, "!H,K,L,IOBS,SIGMA,XCAL,YCAL,ZCAL,RLP,PEAK,CORR,MAXC"))
+            fail("unexpected column order in INTEGRATE.HKL");
+        } else {
+          const char* expected_columns[12] = {
+            "H=1", "K=2", "L=3", "IOBS=4", "SIGMA(IOBS)=5",
+            "XD=6", "YD=7", "ZD=8", "RLP=9", "PEAK=10", "CORR=11", "MAXC=12"
+          };
+          for (int i = 0; i < read_columns; ++i) {
+            const char* col = expected_columns[i];
+            line_reader.copy_line(line, 42);
+            if (std::strncmp(line, "!ITEM_", 6) != 0 ||
+                std::strncmp(line+6, col, std::strlen(col)) != 0)
+              fail("column !ITEM_" + std::string(col), " not found.");
+          }
+        }
+      } else if (starts_with_ptr(line+1, "ITEM_ISET=", &rhs)) {
+        iset_col = simple_atoi(rhs);
+      } else if (starts_with(line+1, "END_OF_DATA")) {
+        if (isets.empty()) {
+          isets.emplace_back(1);
+          isets.back().wavelength = wavelength;
+        }
+        for (XdsAscii::Refl& refl : data)
+          if (size_t(refl.iset - 1) >= isets.size())
+            fail("unexpected ITEM_ISET " + std::to_string(refl.iset));
+        return;
+      }
+    } else {
+      data.emplace_back();
+      XdsAscii::Refl& r = data.back();
+      const char* p = line;
+      for (int i = 0; i < 3; ++i)
+        r.hkl[i] = simple_atoi(p, &p);
+      auto result = fast_from_chars(p, line+len, r.iobs); // 4
+      result = fast_from_chars(result.ptr, line+len, r.sigma); // 5
+      if (read_columns >= 8) {
+        result = fast_from_chars(result.ptr, line+len, r.xd); // 6
+        result = fast_from_chars(result.ptr, line+len, r.yd); // 7
+        result = fast_from_chars(result.ptr, line+len, r.zd); // 8
+        if (read_columns >= 11) {
+          result = fast_from_chars(result.ptr, line+len, r.rlp); // 9
+          result = fast_from_chars(result.ptr, line+len, r.peak); // 10
+          result = fast_from_chars(result.ptr, line+len, r.corr); // 11
+          if (read_columns >= 12) {
+            result = fast_from_chars(result.ptr, line+len, r.maxc); // 12
+          } else {
+            r.maxc = 0;  // 12
+          }
+        } else {
+          r.rlp = r.peak = r.corr = r.maxc = 0;  // 9-11
+        }
+      } else {
+        r.xd = r.yd = r.zd = 0;  // 6-8
+      }
+      if (result.ec != std::errc())
+        fail("failed to parse data line:\n", line);
+      if (iset_col >= read_columns) {
+        const char* iset_ptr = result.ptr;
+        for (int j = read_columns+1; j < iset_col; ++j)
+          iset_ptr = skip_word(skip_blank(iset_ptr));
+        r.iset = simple_atoi(iset_ptr);
+      }
+    }
+  }
+  fail("incorrect or unfinished file: " + source_path);
+}
+
+XdsAscii read_xds_ascii(const std::string& path) {
+  XdsAscii xds_ascii;
+  xds_ascii.read_input(gemmi::MaybeGzipped(path));
+  return xds_ascii;
+}
+
+}  // namespace gemmi
diff --git a/symmetry/CMakeLists.txt b/symmetry/CMakeLists.txt
deleted file mode 100644
index fed3f792..00000000
--- a/symmetry/CMakeLists.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-ADD_LIBRARY(gemmi STATIC symmetry.cpp gemmi/symmetry.hpp gemmi/fail.hpp)
-TARGET_INCLUDE_DIRECTORIES(gemmi PUBLIC .)
\ No newline at end of file