Update README.md

2019-01-29 14:25:11 +00:00
26 changed files with 2598 additions and 5737 deletions
@@ -1,35 +0,0 @@
 name: Build Packages
 on:
  push:
    branches:
      - '**'
    tags:
      - '**'
 jobs:
  build-library:
    name: Build
    runs-on: durin
    steps:
      - uses: actions/checkout@v4
      - name: Build library
        shell: bash
        run: |
          mkdir -p build
          cd build
          cmake -DCMAKE_BUILD_TYPE=Release ..
          make -j
      - name: Upload release assets to Gitea
        if: github.ref_type == 'tag'
        shell: bash
        env:
          GITEA_TOKEN: ${{ secrets.PIP_REPOSITORY_API_TOKEN }}
        run: |
          set -euo pipefail
          python tools/gitea_release_upload.py \
            "${{ github.server_url }}" \
            "${{ github.repository }}" \
            "${{ github.ref_name }}"
@@ -1,56 +0,0 @@
 CMAKE_MINIMUM_REQUIRED(VERSION 3.19)
 PROJECT(durin VERSION 1.0.0 LANGUAGES C)
 include(FetchContent)
 SET(CMAKE_C_FLAGS_RELEASE "-O3")
 SET(CMAKE_C_STANDARD 99)
 SET(CMAKE_C_STANDARD_REQUIRED ON)
 SET(CMAKE_C_EXTENSIONS OFF)
 SET(CMAKE_POSITION_INDEPENDENT_CODE ON)
 SET(BUILD_SHARED_LIBS OFF)
 set(HDF5_USE_STATIC_LIBRARIES TRUE)
 SET(HDF5_BUILD_HL_LIB OFF)
 SET(HDF5_ENABLE_THREADSAFE ON)
 SET(HDF5_ENABLE_SZIP_SUPPORT OFF)
 SET(HDF5_ENABLE_SZIP_ENCODING OFF)
 SET(HDF5_BUILD_EXAMPLES OFF)
 SET(HDF5_BUILD_CPP_LIB OFF)
 SET(HDF5_ENABLE_Z_LIB_SUPPORT OFF)
 SET(HDF5_EXTERNALLY_CONFIGURED 1)
 INCLUDE_DIRECTORIES(bslz4/src)
 FetchContent_Declare(hdf5
        URL https://github.com/HDFGroup/hdf5/releases/download/hdf5_1.14.6/hdf5-1.14.6.tar.gz
        DOWNLOAD_EXTRACT_TIMESTAMP FALSE
        EXCLUDE_FROM_ALL)
 FetchContent_MakeAvailable(hdf5)
 ADD_LIBRARY(durin-plugin SHARED
        src/plugin.c src/plugin.h
        src/err.c src/err.h
        src/filters.c src/filters.h
        src/file.c src/file.h bslz4/src/bitshuffle.c bslz4/src/bitshuffle.h
        bslz4/src/bitshuffle_core.c bslz4/src/bitshuffle_core.h
        bslz4/src/bitshuffle_internals.h
        bslz4/src/bshuf_h5filter.c bslz4/src/bshuf_h5filter.h
        bslz4/src/iochain.c bslz4/src/iochain.h
        bslz4/src/lz4.c bslz4/src/lz4.h
        )
 set_target_properties(durin-plugin PROPERTIES VERSION 1.0.0)
 TARGET_COMPILE_DEFINITIONS(durin-plugin PRIVATE
        H5_USE_110_API
        USE_BITSHUFFLE)
 TARGET_LINK_LIBRARIES(durin-plugin PRIVATE hdf5-static)
@@ -1,43 +0,0 @@
 FROM registry.access.redhat.com/ubi7/ubi
 ARG HDF5_TAG="hdf5_1.14.6"
 LABEL authors="Filip Leonarski"
 ARG CMAKE_VERSION=3.31.6
 ARG NODE_MAJOR=16
 RUN yum -y update && \
    yum -y install \
        gcc \
        gcc-c++ \
        git \
        make \
        tar \
        gzip \
        curl && \
    yum clean all
 # Install a recent Node.js (NodeSource). Change NODE_MAJOR if you want another major version.
 RUN curl -fsSL https://rpm.nodesource.com/setup_${NODE_MAJOR}.x | bash - && \
    yum -y install nodejs && \
    yum clean all && \
    node --version && npm --version && (corepack enable || true)
 RUN set -eux; \
    arch="$(uname -m)"; \
    case "$arch" in \
      x86_64) cmake_arch="x86_64" ;; \
      aarch64) cmake_arch="aarch64" ;; \
      *) echo "Unsupported architecture: $arch"; exit 1 ;; \
    esac; \
    curl -L "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-${cmake_arch}.tar.gz" \
      -o /tmp/cmake.tar.gz; \
    tar -xzf /tmp/cmake.tar.gz -C /opt; \
    ln -s "/opt/cmake-${CMAKE_VERSION}-linux-${cmake_arch}/bin/cmake" /usr/local/bin/cmake; \
    ln -s "/opt/cmake-${CMAKE_VERSION}-linux-${cmake_arch}/bin/ctest" /usr/local/bin/ctest; \
    ln -s "/opt/cmake-${CMAKE_VERSION}-linux-${cmake_arch}/bin/cpack" /usr/local/bin/cpack; \
    rm -f /tmp/cmake.tar.gz
 # Default entrypoint prints tool versions and hints.
 CMD ["/bin/bash", "-l"]
@@ -0,0 +1,55 @@
 BUILD_DIR ?= ./build
 SRC_DIR = ./src
 TEST_DIR = ./test
 INC_DIR = $(SRC_DIR)
 BSLZ4_SRC_DIR = ./bslz4/src
 BSLZ4_BUILD_DIR = ./bslz4/build
 BSLZ4_INC_DIR = $(BSLZ4_SRC_DIR)
 CC=h5cc
 CFLAGS=-Wall -g -O2 -fpic -I$(INC_DIR) -I$(BSLZ4_INC_DIR) -std=c89
 .PHONY: plugin
 plugin: $(BUILD_DIR)/durin-plugin.so
 .PHONY: all
 all: plugin example test_plugin
 .PHONY: example
 example: $(BUILD_DIR)/example
 .PHONY: test_plugin
 test_plugin: $(BUILD_DIR)/test_plugin
 $(BUILD_DIR)/test_plugin: $(TEST_DIR)/generic_data_plugin.f90 $(TEST_DIR)/test_generic_host.f90
 	mkdir -p $(BUILD_DIR)
 	gfortran -O -g -fopenmp -ldl $(TEST_DIR)/generic_data_plugin.f90 $(TEST_DIR)/test_generic_host.f90 -o $@ -J$(BUILD_DIR)
 $(BUILD_DIR)/%.o: $(SRC_DIR)/%.c
 	mkdir -p $(BUILD_DIR)
 	$(CC) $(CFLAGS) -c $< -o $@
 $(BSLZ4_BUILD_DIR)/%.o: $(BSLZ4_SRC_DIR)/%.c
 	mkdir -p $(BSLZ4_BUILD_DIR)
 	$(CC) $(CFLAGS) -c $< -o $@
 $(BUILD_DIR)/bslz4.a: $(BSLZ4_BUILD_DIR)/lz4.o $(BSLZ4_BUILD_DIR)/bitshuffle.o \
 $(BSLZ4_BUILD_DIR)/bitshuffle_core.o $(BSLZ4_BUILD_DIR)/iochain.o
 	mkdir -p $(BUILD_DIR)
 	ar rcs $@ $^
 $(BUILD_DIR)/durin-plugin.so: $(BUILD_DIR)/plugin.o $(BUILD_DIR)/file.o $(BUILD_DIR)/err.o $(BUILD_DIR)/filters.o \
 $(BUILD_DIR)/bslz4.a
 	mkdir -p $(BUILD_DIR)
 	$(CC) $(CFLAGS) -shared $^ -o $(BUILD_DIR)/durin-plugin.so
 $(BUILD_DIR)/example: $(BUILD_DIR)/test.o $(BUILD_DIR)/file.o $(BUILD_DIR)/err.o $(BUILD_DIR)/filters.o \
 $(BUILD_DIR)/bslz4.a
 	mkdir -p $(BUILD_DIR)
 	$(CC) $(CFLAGS) $^ -o $(BUILD_DIR)/example
 .PHONY: clean
 clean:
 	rm -r $(BUILD_DIR)
 	rm -r $(BSLZ4_BUILD_DIR)
@@ -8,24 +8,11 @@ See:
 * https://www.dectris.com/features/features-eiger-x/hdf5-and-nexus
 * https://strucbio.biologie.uni-konstanz.de/xdswiki
 ## Paul Scherrer Institute fork
 This fork is maintained by Paul Scherrer Institute.
 The plugin is based on the code developed by the Diamond Light Source and modified by Global Phasing.
 Modifications from PSI side:
 * Using CMake for building the plugin
 * HDF5 is built as part of the CMake process
 * Bitshuffle/LZ4 is updated to the latest version
 * HDF5 filter is automatically registered for virtual dataset HDF5 files
 * Docker image to build the plugin on x86/RH7 is provided.
 * Generated versioned shared library, so the used version can be tracked.
 ## Get Durin
 Linux x86 version is automatically built on RHEL 7 and available from the Gitea release page.
 ## Usage
 In your XDS.INP add:
 ```
-LIB=[path to libdurin-plugin.so]
+LIB=[path to durin-plugin.so]
 NAME_TEMPLATE_OF_DATA_FRAMES=[data_path]/data_images_??????.h5
 ```
 XDS will instruct the plugin to load `[data_path]/data_images_master.h5` and this must be the
@@ -37,25 +24,55 @@ the master file contains an `NXdata` or `NXdetector` group with either a dataset
 series of datasets named `data_000001`, `data_000002`, etc.
 ## Requirements
 * HDF5 Library (https://www.hdfgroup.org/downloads)
 ## Building
-Requires CMake version 3.19 or later + GCC compiler. There is no need to build HDF5 separately.
+
-To build:
+### Building HDF5 library
 The HDF5 library used when building durin must have been compiled with specific switches enabled
 to allow the durin plugin to be built and used.
 Download the HDF5 source code (https://www.hdfgroup.org/downloads/hdf5/source-code) and extract
 to any directory (referred to as `/hdf5_dir`), and run the following commands.
 ```
 cd /hdf5_dir
 mkdir build
 cd build
-cmake ..
+export CFLAGS=-fPIC
-make -j
+../configure --enable-threadsafe --enable-deprecated-symbols --enable-hl --enable-unsupported
 make
 make check
 make install
 ```
 The hdf5 tools and libraries should now be located in `/hdf5_dir/build/hdf5`
 For reference, the plugin requires the thread-safe switch and the optimised chunk read function.
 The chunk read function may be defined in the high level library instead of the regular library,
 depending on the exact HDF5 version downloaded (hence the --enable-deprecated-symbols _and_ --enable-hl).
 The unsupported flag enables building with both threadsafe and high-level enabled.
 ### Building durin plugin
 The plugin makefile will use the "h5cc" compiler wrapper, provided by the HDF5 library, which
 must be on your PATH.
 Download or clone the plugin source code (https://github.com/DiamondLightSource/durin)
 into any directory (referred to as `/durin_dir`) and run the following commands.
 ```
 cd /durin_dir
 PATH=/hdf5_dir/build/hdf5/bin:$PATH
 make
 ```
 The plugin is located at `/durin_dir/build/durin-plugin.so` and should be added to the
 XDS.INP file as `LIB=/durin_dir/build/durin-plugin.so`
 The plugin is located at `build/libdurin-plugin.so` and should be added to the
 XDS.INP file as `LIB=<CURRENT_DIRECTORY>/build/libdurin-plugin.so`.
 Alternatively, versioned copy is also provided, e.g. `libdurin-plugin.so.1.0.0`, allowing to track 
 the current version of the plugin.
 ## Example XDS.INP
 ```
 DETECTOR=PILATUS MINIMUM_VALID_PIXEL_VALUE=0 OVERLOAD=4096
-LIB=/opt/durin/build/libdurin-plugin.so
+LIB=/opt/durin/build/durin-plugin.so
 SENSOR_THICKNESS= 0.450
 !SENSOR_MATERIAL / THICKNESS Si 0.450
 !SILICON= 3.953379
@@ -75,3 +92,6 @@ TRUSTED_REGION= 0.0 1.41
 DATA_RANGE= 1 600
 JOB=XYCORR INIT COLSPOT IDXREF DEFPIX INTEGRATE CORRECT
 ```
 N.B. the master file is needed, not the .nxs one which follows the
 standard.
@@ -14,22 +14,23 @@
 #include "bitshuffle_internals.h"
 #include "lz4.h"
 #ifdef ZSTD_SUPPORT
 #include "zstd.h"
 #endif
 #include <stdio.h>
 #include <string.h>
-// Macros.
+
 #define BSHUF_LZ4_DECOMPRESS_FAST
 #define CHECK_ERR_FREE_LZ(count, buf) if (count < 0) {                      \
    free(buf); return count - 1000; }
 /* Bitshuffle and compress a single block. */
 int64_t bshuf_compress_lz4_block(ioc_chain *C_ptr, \
-        const size_t size, const size_t elem_size, const int option) {
+        const size_t size, const size_t elem_size) {
    int64_t nbytes, count;
    void *tmp_buf_bshuf;
@@ -41,8 +42,7 @@ int64_t bshuf_compress_lz4_block(ioc_chain *C_ptr, \
    tmp_buf_bshuf = malloc(size * elem_size);
    if (tmp_buf_bshuf == NULL) return -1;
-    int dst_capacity = LZ4_compressBound(size * elem_size);
+    tmp_buf_lz4 = malloc(LZ4_compressBound(size * elem_size));
    tmp_buf_lz4 = malloc(dst_capacity);
    if (tmp_buf_lz4 == NULL){
        free(tmp_buf_bshuf);
        return -1;
@@ -58,7 +58,7 @@ int64_t bshuf_compress_lz4_block(ioc_chain *C_ptr, \
        free(tmp_buf_bshuf);
        return count;
    }
-    nbytes = LZ4_compress_default((const char*) tmp_buf_bshuf, (char*) tmp_buf_lz4, size * elem_size, dst_capacity);
+    nbytes = LZ4_compress((const char*) tmp_buf_bshuf, (char*) tmp_buf_lz4, size * elem_size);
    free(tmp_buf_bshuf);
    CHECK_ERR_FREE_LZ(nbytes, tmp_buf_lz4);
@@ -76,7 +76,7 @@ int64_t bshuf_compress_lz4_block(ioc_chain *C_ptr, \
 /* Decompress and bitunshuffle a single block. */
 int64_t bshuf_decompress_lz4_block(ioc_chain *C_ptr,
-        const size_t size, const size_t elem_size, const int option) {
+        const size_t size, const size_t elem_size) {
    int64_t nbytes, count;
    void *out, *tmp_buf;
@@ -96,6 +96,14 @@ int64_t bshuf_decompress_lz4_block(ioc_chain *C_ptr,
    tmp_buf = malloc(size * elem_size);
    if (tmp_buf == NULL) return -1;
 #ifdef BSHUF_LZ4_DECOMPRESS_FAST
    nbytes = LZ4_decompress_fast((const char*) in + 4, (char*) tmp_buf, size * elem_size);
    CHECK_ERR_FREE_LZ(nbytes, tmp_buf);
    if (nbytes != nbytes_from_header) {
        free(tmp_buf);
        return -91;
    }
 #else
    nbytes = LZ4_decompress_safe((const char*) in + 4, (char *) tmp_buf, nbytes_from_header,
                                 size * elem_size);
    CHECK_ERR_FREE_LZ(nbytes, tmp_buf);
@@ -104,7 +112,7 @@ int64_t bshuf_decompress_lz4_block(ioc_chain *C_ptr,
        return -91;
    }
    nbytes = nbytes_from_header;
-
+#endif
    count = bshuf_untrans_bit_elem(tmp_buf, out, size, elem_size);
    CHECK_ERR_FREE(count, tmp_buf);
    nbytes += 4;
@@ -113,92 +121,6 @@ int64_t bshuf_decompress_lz4_block(ioc_chain *C_ptr,
    return nbytes;
 }
 #ifdef ZSTD_SUPPORT
 /* Bitshuffle and compress a single block. */
 int64_t bshuf_compress_zstd_block(ioc_chain *C_ptr, \
        const size_t size, const size_t elem_size, const int comp_lvl) {
    int64_t nbytes, count;
    void *tmp_buf_bshuf;
    void *tmp_buf_zstd;
    size_t this_iter;
    const void *in;
    void *out;
    tmp_buf_bshuf = malloc(size * elem_size);
    if (tmp_buf_bshuf == NULL) return -1;
    size_t tmp_buf_zstd_size = ZSTD_compressBound(size * elem_size);
    tmp_buf_zstd = malloc(tmp_buf_zstd_size);
    if (tmp_buf_zstd == NULL){
        free(tmp_buf_bshuf);
        return -1;
    }
    in = ioc_get_in(C_ptr, &this_iter);
    ioc_set_next_in(C_ptr, &this_iter, (void*) ((char*) in + size * elem_size));
    count = bshuf_trans_bit_elem(in, tmp_buf_bshuf, size, elem_size);
    if (count < 0) {
        free(tmp_buf_zstd);
        free(tmp_buf_bshuf);
        return count;
    }
    nbytes = ZSTD_compress(tmp_buf_zstd, tmp_buf_zstd_size, (const void*)tmp_buf_bshuf,  size * elem_size, comp_lvl);
    free(tmp_buf_bshuf);
    CHECK_ERR_FREE_LZ(nbytes, tmp_buf_zstd);
    out = ioc_get_out(C_ptr, &this_iter);
    ioc_set_next_out(C_ptr, &this_iter, (void *) ((char *) out + nbytes + 4));
    bshuf_write_uint32_BE(out, nbytes);
    memcpy((char *) out + 4, tmp_buf_zstd, nbytes);
    free(tmp_buf_zstd);
    return nbytes + 4;
 }
 /* Decompress and bitunshuffle a single block. */
 int64_t bshuf_decompress_zstd_block(ioc_chain *C_ptr,
        const size_t size, const size_t elem_size, const int option) {
    int64_t nbytes, count;
    void *out, *tmp_buf;
    const void *in;
    size_t this_iter;
    int32_t nbytes_from_header;
    in = ioc_get_in(C_ptr, &this_iter);
    nbytes_from_header = bshuf_read_uint32_BE(in);
    ioc_set_next_in(C_ptr, &this_iter,
            (void*) ((char*) in + nbytes_from_header + 4));
    out = ioc_get_out(C_ptr, &this_iter);
    ioc_set_next_out(C_ptr, &this_iter,
            (void *) ((char *) out + size * elem_size));
    tmp_buf = malloc(size * elem_size);
    if (tmp_buf == NULL) return -1;
    nbytes = ZSTD_decompress(tmp_buf, size * elem_size, (void *)((char *) in + 4), nbytes_from_header);
    CHECK_ERR_FREE_LZ(nbytes, tmp_buf);
    if (nbytes != size * elem_size) {
        free(tmp_buf);
        return -91;
    }
    nbytes = nbytes_from_header;
    count = bshuf_untrans_bit_elem(tmp_buf, out, size, elem_size);
    CHECK_ERR_FREE(count, tmp_buf);
    nbytes += 4;
    free(tmp_buf);
    return nbytes;
 }
 #endif // ZSTD_SUPPORT
 /* ---- Public functions ----
 *
@@ -216,13 +138,13 @@ size_t bshuf_compress_lz4_bound(const size_t size,
    }
    if (block_size % BSHUF_BLOCKED_MULT) return -81;
-    // Note that each block gets a 4 byte header.
+
-    // Size of full blocks.
+
    bound = (LZ4_compressBound(block_size * elem_size) + 4) * (size / block_size);
-    // Size of partial blocks, if any.
+
    leftover = ((size % block_size) / BSHUF_BLOCKED_MULT) * BSHUF_BLOCKED_MULT;
    if (leftover) bound += LZ4_compressBound(leftover * elem_size) + 4;
-    // Size of uncompressed data not fitting into any blocks.
+
    bound += (size % BSHUF_BLOCKED_MULT) * elem_size;
    return bound;
 }
@@ -231,49 +153,13 @@ size_t bshuf_compress_lz4_bound(const size_t size,
 int64_t bshuf_compress_lz4(const void* in, void* out, const size_t size,
        const size_t elem_size, size_t block_size) {
    return bshuf_blocked_wrap_fun(&bshuf_compress_lz4_block, in, out, size,
-            elem_size, block_size, 0/*option*/);
+            elem_size, block_size);
 }
 int64_t bshuf_decompress_lz4(const void* in, void* out, const size_t size,
        const size_t elem_size, size_t block_size) {
    return bshuf_blocked_wrap_fun(&bshuf_decompress_lz4_block, in, out, size,
-            elem_size, block_size, 0/*option*/);
+            elem_size, block_size);
 }
 #ifdef ZSTD_SUPPORT
 size_t bshuf_compress_zstd_bound(const size_t size,
        const size_t elem_size, size_t block_size) {
    size_t bound, leftover;
    if (block_size == 0) {
        block_size = bshuf_default_block_size(elem_size);
    }
    if (block_size % BSHUF_BLOCKED_MULT) return -81;
    // Note that each block gets a 4 byte header.
    // Size of full blocks.
    bound = (ZSTD_compressBound(block_size * elem_size) + 4) * (size / block_size);
    // Size of partial blocks, if any.
    leftover = ((size % block_size) / BSHUF_BLOCKED_MULT) * BSHUF_BLOCKED_MULT;
    if (leftover) bound += ZSTD_compressBound(leftover * elem_size) + 4;
    // Size of uncompressed data not fitting into any blocks.
    bound += (size % BSHUF_BLOCKED_MULT) * elem_size;
    return bound;
 }
 int64_t bshuf_compress_zstd(const void* in, void* out, const size_t size,
        const size_t elem_size, size_t block_size, const int comp_lvl) {
    return bshuf_blocked_wrap_fun(&bshuf_compress_zstd_block, in, out, size,
            elem_size, block_size, comp_lvl);
 }
 int64_t bshuf_decompress_zstd(const void* in, void* out, const size_t size,
        const size_t elem_size, size_t block_size) {
    return bshuf_blocked_wrap_fun(&bshuf_decompress_zstd_block, in, out, size,
            elem_size, block_size, 0/*option*/);
 }
 #endif // ZSTD_SUPPORT
@@ -35,10 +35,6 @@
 extern "C" {
 #endif
 /*
 * ---- LZ4 Interface ----
 */
 /* ---- bshuf_compress_lz4_bound ----
 *
 * Bound on size of data compressed with *bshuf_compress_lz4*.
@@ -98,6 +94,11 @@ int64_t bshuf_compress_lz4(const void* in, void* out, const size_t size, const s
 * To properly unshuffle bitshuffled data, *size*, *elem_size* and *block_size*
 * must patch the parameters used to compress the data.
 *
 * NOT TO BE USED WITH UNTRUSTED DATA: This routine uses the function
 * LZ4_decompress_fast from LZ4, which does not protect against maliciously
 * formed datasets. By modifying the compressed data, this function could be
 * coerced into leaving the boundaries of the input buffer.
 *
 * Parameters
 * ----------
 *  in : input buffer
@@ -115,91 +116,8 @@ int64_t bshuf_compress_lz4(const void* in, void* out, const size_t size, const s
 int64_t bshuf_decompress_lz4(const void* in, void* out, const size_t size,
        const size_t elem_size, size_t block_size);
 /*
 * ---- ZSTD Interface ----
 */
 #ifdef ZSTD_SUPPORT
 /* ---- bshuf_compress_zstd_bound ----
 *
 * Bound on size of data compressed with *bshuf_compress_zstd*.
 *
 * Parameters
 * ----------
 *  size : number of elements in input
 *  elem_size : element size of typed data
 *  block_size : Process in blocks of this many elements. Pass 0 to
 *  select automatically (recommended).
 *
 * Returns
 * -------
 *  Bound on compressed data size.
 *
 */
 size_t bshuf_compress_zstd_bound(const size_t size,
        const size_t elem_size, size_t block_size);
 /* ---- bshuf_compress_zstd ----
 *
 * Bitshuffled and compress the data using zstd.
 *
 * Transpose within elements, in blocks of data of *block_size* elements then
 * compress the blocks using ZSTD.  In the output buffer, each block is prefixed
 * by a 4 byte integer giving the compressed size of that block.
 *
 * Output buffer must be large enough to hold the compressed data.  This could
 * be in principle substantially larger than the input buffer.  Use the routine
 * *bshuf_compress_zstd_bound* to get an upper limit.
 *
 * Parameters
 * ----------
 *  in : input buffer, must be of size * elem_size bytes
 *  out : output buffer, must be large enough to hold data.
 *  size : number of elements in input
 *  elem_size : element size of typed data
 *  block_size : Process in blocks of this many elements. Pass 0 to
 *  select automatically (recommended).
 *  comp_lvl : compression level applied
 *
 * Returns
 * -------
 *  number of bytes used in output buffer, negative error-code if failed.
 *
 */
 int64_t bshuf_compress_zstd(const void* in, void* out, const size_t size, const size_t
        elem_size, size_t block_size, const int comp_lvl);
 /* ---- bshuf_decompress_zstd ----
 *
 * Undo compression and bitshuffling.
 *
 * Decompress data then un-bitshuffle it in blocks of *block_size* elements.
 *
 * To properly unshuffle bitshuffled data, *size*, *elem_size* and *block_size*
 * must patch the parameters used to compress the data.
 *
 * Parameters
 * ----------
 *  in : input buffer
 *  out : output buffer, must be of size * elem_size bytes
 *  size : number of elements in input
 *  elem_size : element size of typed data
 *  block_size : Process in blocks of this many elements. Pass 0 to
 *  select automatically (recommended).
 *
 * Returns
 * -------
 *  number of bytes consumed in *input* buffer, negative error-code if failed.
 *
 */
 int64_t bshuf_decompress_zstd(const void* in, void* out, const size_t size,
        const size_t elem_size, size_t block_size);
 #endif // ZSTD_SUPPORT
 #ifdef __cplusplus
-} // extern "C"
+}
 #endif
-#endif  // BITSHUFFLE_H
+#endif
@@ -16,55 +16,30 @@
 #include <string.h>
 #if defined(__AVX512F__) && defined (__AVX512BW__) && defined(__AVX2__) && defined(__SSE2__)
 #define USEAVX512
 #endif
 #if defined(__AVX2__) && defined (__SSE2__)
 #define USEAVX2
 #endif
-#if defined(__SSE2__) || defined(NO_WARN_X86_INTRINSICS)
+#if defined(__SSE2__)
 #define USESSE2
 #endif
 #if defined(__ARM_NEON__) || (__ARM_NEON)
 #ifdef __aarch64__
 #define USEARMNEON
 #endif
 #endif
-// Conditional includes for SSE2 and AVX2.
+
 #ifdef USEAVX2
 #include <immintrin.h>
 #elif defined USESSE2
 #include <emmintrin.h>
 #elif defined USEARMNEON
 #include <arm_neon.h>
 #endif
 #if defined(_OPENMP) && defined(_MSC_VER)
 typedef int64_t omp_size_t;
 #else
 typedef size_t omp_size_t;
 #endif
-// Macros.
+
 #define CHECK_MULT_EIGHT(n) if (n % 8) return -80;
 #define MAX(X,Y) ((X) > (Y) ? (X) : (Y))
 /* ---- Functions indicating compile time instruction set. ---- */
 int bshuf_using_NEON(void) {
 #ifdef USEARMNEON
    return 1;
 #else
    return 0;
 #endif
 }
 int bshuf_using_SSE2(void) {
 #ifdef USESSE2
    return 1;
@@ -83,14 +58,6 @@ int bshuf_using_AVX2(void) {
 }
 int bshuf_using_AVX512(void) {
 #ifdef USEAVX512
    return 1;
 #else
    return 0;
 #endif
 }
 /* ---- Worker code not requiring special instruction sets. ----
 *
 * The following code does not use any x86 specific vectorized instructions
@@ -164,8 +131,8 @@ int64_t bshuf_trans_byte_elem_remainder(const void* in, void* out, const size_t
    CHECK_MULT_EIGHT(start);
    if (size > start) {
-        // ii loop separated into 2 loops so the compiler can unroll
+
-        // the inner one.
+
        for (ii = start; ii + 7 < size; ii += 8) {
            for (jj = 0; jj < elem_size; jj++) {
                for (kk = 0; kk < 8; kk++) {
@@ -381,512 +348,6 @@ int64_t bshuf_untrans_bit_elem_scal(const void* in, void* out, const size_t size
 }
 /* ---- Worker code that uses Arm NEON ----
 *
 * The following code makes use of the Arm NEON instruction set.
 * NEON technology is the implementation of the ARM Advanced Single
 * Instruction Multiple Data (SIMD) extension.
 * The NEON unit is the component of the processor that executes SIMD instructions.
 * It is also called the NEON Media Processing Engine (MPE).
 *
 */
 #ifdef USEARMNEON
 /* Transpose bytes within elements for 16 bit elements. */
 int64_t bshuf_trans_byte_elem_NEON_16(const void* in, void* out, const size_t size) {
    size_t ii;
    const char *in_b = (const char*) in;
    char *out_b = (char*) out;
    int8x16_t a0, b0, a1, b1;
    for (ii=0; ii + 15 < size; ii += 16) {
        a0 = vld1q_s8(in_b + 2*ii + 0*16);
        b0 = vld1q_s8(in_b + 2*ii + 1*16);
        a1 = vzip1q_s8(a0, b0);
        b1 = vzip2q_s8(a0, b0);
        a0 = vzip1q_s8(a1, b1);
        b0 = vzip2q_s8(a1, b1);
        a1 = vzip1q_s8(a0, b0);
        b1 = vzip2q_s8(a0, b0);
        a0 = vzip1q_s8(a1, b1);
        b0 = vzip2q_s8(a1, b1);
        vst1q_s8(out_b + 0*size + ii, a0);
        vst1q_s8(out_b + 1*size + ii, b0);
    }
    return bshuf_trans_byte_elem_remainder(in, out, size, 2,
            size - size % 16);
 }
 /* Transpose bytes within elements for 32 bit elements. */
 int64_t bshuf_trans_byte_elem_NEON_32(const void* in, void* out, const size_t size) {
    size_t ii;
    const char *in_b;
    char *out_b;
    in_b = (const char*) in;
    out_b = (char*) out;
    int8x16_t a0, b0, c0, d0, a1, b1, c1, d1;
    int64x2_t a2, b2, c2, d2;
    for (ii=0; ii + 15 < size; ii += 16) {
        a0 = vld1q_s8(in_b + 4*ii + 0*16);
        b0 = vld1q_s8(in_b + 4*ii + 1*16);
        c0 = vld1q_s8(in_b + 4*ii + 2*16);
        d0 = vld1q_s8(in_b + 4*ii + 3*16);
        a1 = vzip1q_s8(a0, b0);
        b1 = vzip2q_s8(a0, b0);
        c1 = vzip1q_s8(c0, d0);
        d1 = vzip2q_s8(c0, d0);
        a0 = vzip1q_s8(a1, b1);
        b0 = vzip2q_s8(a1, b1);
        c0 = vzip1q_s8(c1, d1);
        d0 = vzip2q_s8(c1, d1);
        a1 = vzip1q_s8(a0, b0);
        b1 = vzip2q_s8(a0, b0);
        c1 = vzip1q_s8(c0, d0);
        d1 = vzip2q_s8(c0, d0);
        a2 = vzip1q_s64(vreinterpretq_s64_s8(a1), vreinterpretq_s64_s8(c1));
        b2 = vzip2q_s64(vreinterpretq_s64_s8(a1), vreinterpretq_s64_s8(c1));
        c2 = vzip1q_s64(vreinterpretq_s64_s8(b1), vreinterpretq_s64_s8(d1));
        d2 = vzip2q_s64(vreinterpretq_s64_s8(b1), vreinterpretq_s64_s8(d1));
        vst1q_s64((int64_t *) (out_b + 0*size + ii), a2);
        vst1q_s64((int64_t *) (out_b + 1*size + ii), b2);
        vst1q_s64((int64_t *) (out_b + 2*size + ii), c2);
        vst1q_s64((int64_t *) (out_b + 3*size + ii), d2);
    }
    return bshuf_trans_byte_elem_remainder(in, out, size, 4,
            size - size % 16);
 }
 /* Transpose bytes within elements for 64 bit elements. */
 int64_t bshuf_trans_byte_elem_NEON_64(const void* in, void* out, const size_t size) {
    size_t ii;
    const char* in_b = (const char*) in;
    char* out_b = (char*) out;
    int8x16_t a0, b0, c0, d0, e0, f0, g0, h0;
    int8x16_t a1, b1, c1, d1, e1, f1, g1, h1;
    for (ii=0; ii + 15 < size; ii += 16) {
        a0 = vld1q_s8(in_b + 8*ii + 0*16);
        b0 = vld1q_s8(in_b + 8*ii + 1*16);
        c0 = vld1q_s8(in_b + 8*ii + 2*16);
        d0 = vld1q_s8(in_b + 8*ii + 3*16);
        e0 = vld1q_s8(in_b + 8*ii + 4*16);
        f0 = vld1q_s8(in_b + 8*ii + 5*16);
        g0 = vld1q_s8(in_b + 8*ii + 6*16);
        h0 = vld1q_s8(in_b + 8*ii + 7*16);
        a1 = vzip1q_s8 (a0, b0);
        b1 = vzip2q_s8 (a0, b0);
        c1 = vzip1q_s8 (c0, d0);
        d1 = vzip2q_s8 (c0, d0);
        e1 = vzip1q_s8 (e0, f0);
        f1 = vzip2q_s8 (e0, f0);
        g1 = vzip1q_s8 (g0, h0);
        h1 = vzip2q_s8 (g0, h0);
        a0 = vzip1q_s8 (a1, b1);
        b0 = vzip2q_s8 (a1, b1);
        c0 = vzip1q_s8 (c1, d1);
        d0 = vzip2q_s8 (c1, d1);
        e0 = vzip1q_s8 (e1, f1);
        f0 = vzip2q_s8 (e1, f1);
        g0 = vzip1q_s8 (g1, h1);
        h0 = vzip2q_s8 (g1, h1);
        a1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (a0), vreinterpretq_s32_s8 (c0));
        b1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (a0), vreinterpretq_s32_s8 (c0));
        c1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (b0), vreinterpretq_s32_s8 (d0));
        d1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (b0), vreinterpretq_s32_s8 (d0));
        e1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (e0), vreinterpretq_s32_s8 (g0));
        f1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (e0), vreinterpretq_s32_s8 (g0));
        g1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (f0), vreinterpretq_s32_s8 (h0));
        h1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (f0), vreinterpretq_s32_s8 (h0));
        a0 = (int8x16_t) vzip1q_s64 (vreinterpretq_s64_s8 (a1), vreinterpretq_s64_s8 (e1));
        b0 = (int8x16_t) vzip2q_s64 (vreinterpretq_s64_s8 (a1), vreinterpretq_s64_s8 (e1));
        c0 = (int8x16_t) vzip1q_s64 (vreinterpretq_s64_s8 (b1), vreinterpretq_s64_s8 (f1));
        d0 = (int8x16_t) vzip2q_s64 (vreinterpretq_s64_s8 (b1), vreinterpretq_s64_s8 (f1));
        e0 = (int8x16_t) vzip1q_s64 (vreinterpretq_s64_s8 (c1), vreinterpretq_s64_s8 (g1));
        f0 = (int8x16_t) vzip2q_s64 (vreinterpretq_s64_s8 (c1), vreinterpretq_s64_s8 (g1));
        g0 = (int8x16_t) vzip1q_s64 (vreinterpretq_s64_s8 (d1), vreinterpretq_s64_s8 (h1));
        h0 = (int8x16_t) vzip2q_s64 (vreinterpretq_s64_s8 (d1), vreinterpretq_s64_s8 (h1));
        vst1q_s8(out_b + 0*size + ii, a0);
        vst1q_s8(out_b + 1*size + ii, b0);
        vst1q_s8(out_b + 2*size + ii, c0);
        vst1q_s8(out_b + 3*size + ii, d0);
        vst1q_s8(out_b + 4*size + ii, e0);
        vst1q_s8(out_b + 5*size + ii, f0);
        vst1q_s8(out_b + 6*size + ii, g0);
        vst1q_s8(out_b + 7*size + ii, h0);
    }
    return bshuf_trans_byte_elem_remainder(in, out, size, 8,
            size - size % 16);
 }
 /* Transpose bytes within elements using best NEON algorithm available. */
 int64_t bshuf_trans_byte_elem_NEON(const void* in, void* out, const size_t size,
         const size_t elem_size) {
    int64_t count;
    // Trivial cases: power of 2 bytes.
    switch (elem_size) {
        case 1:
            count = bshuf_copy(in, out, size, elem_size);
            return count;
        case 2:
            count = bshuf_trans_byte_elem_NEON_16(in, out, size);
            return count;
        case 4:
            count = bshuf_trans_byte_elem_NEON_32(in, out, size);
            return count;
        case 8:
            count = bshuf_trans_byte_elem_NEON_64(in, out, size);
            return count;
    }
    // Worst case: odd number of bytes. Turns out that this is faster for
    // (odd * 2) byte elements as well (hence % 4).
    if (elem_size % 4) {
        count = bshuf_trans_byte_elem_scal(in, out, size, elem_size);
        return count;
    }
    // Multiple of power of 2: transpose hierarchically.
    {
        size_t nchunk_elem;
        void* tmp_buf = malloc(size * elem_size);
        if (tmp_buf == NULL) return -1;
        if ((elem_size % 8) == 0) {
            nchunk_elem = elem_size / 8;
            TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int64_t);
            count = bshuf_trans_byte_elem_NEON_64(out, tmp_buf,
                    size * nchunk_elem);
            bshuf_trans_elem(tmp_buf, out, 8, nchunk_elem, size);
        } else if ((elem_size % 4) == 0) {
            nchunk_elem = elem_size / 4;
            TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int32_t);
            count = bshuf_trans_byte_elem_NEON_32(out, tmp_buf,
                    size * nchunk_elem);
            bshuf_trans_elem(tmp_buf, out, 4, nchunk_elem, size);
        } else {
            // Not used since scalar algorithm is faster.
            nchunk_elem = elem_size / 2;
            TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int16_t);
            count = bshuf_trans_byte_elem_NEON_16(out, tmp_buf,
                    size * nchunk_elem);
            bshuf_trans_elem(tmp_buf, out, 2, nchunk_elem, size);
        }
        free(tmp_buf);
        return count;
    }
 }
 /* Creates a mask made up of the most significant
 * bit of each byte of 'input'
 */
 int32_t move_byte_mask_neon(uint8x16_t input) {
    return (  ((input[0] & 0x80) >> 7)          | (((input[1] & 0x80) >> 7) << 1)   | (((input[2] & 0x80) >> 7) << 2)   | (((input[3] & 0x80) >> 7) << 3)
            | (((input[4] & 0x80) >> 7) << 4)   | (((input[5] & 0x80) >> 7) << 5)   | (((input[6] & 0x80) >> 7) << 6)   | (((input[7] & 0x80) >> 7) << 7)
            | (((input[8] & 0x80) >> 7) << 8)   | (((input[9] & 0x80) >> 7) << 9)   | (((input[10] & 0x80) >> 7) << 10) | (((input[11] & 0x80) >> 7) << 11)
            | (((input[12] & 0x80) >> 7) << 12) | (((input[13] & 0x80) >> 7) << 13) | (((input[14] & 0x80) >> 7) << 14) | (((input[15] & 0x80) >> 7) << 15)
           );
 }
 /* Transpose bits within bytes. */
 int64_t bshuf_trans_bit_byte_NEON(const void* in, void* out, const size_t size,
         const size_t elem_size) {
    size_t ii, kk;
    const char* in_b = (const char*) in;
    char* out_b = (char*) out;
    uint16_t* out_ui16;
    int64_t count;
    size_t nbyte = elem_size * size;
    CHECK_MULT_EIGHT(nbyte);
    int16x8_t xmm;
    int32_t bt;
    for (ii = 0; ii + 15 < nbyte; ii += 16) {
        xmm = vld1q_s16((int16_t *) (in_b + ii));
        for (kk = 0; kk < 8; kk++) {
            bt = move_byte_mask_neon((uint8x16_t) xmm);
            xmm = vshlq_n_s16(xmm, 1);
            out_ui16 = (uint16_t*) &out_b[((7 - kk) * nbyte + ii) / 8];
            *out_ui16 = bt;
        }
    }
    count = bshuf_trans_bit_byte_remainder(in, out, size, elem_size,
            nbyte - nbyte % 16);
    return count;
 }
 /* Transpose bits within elements. */
 int64_t bshuf_trans_bit_elem_NEON(const void* in, void* out, const size_t size,
         const size_t elem_size) {
    int64_t count;
    CHECK_MULT_EIGHT(size);
    void* tmp_buf = malloc(size * elem_size);
    if (tmp_buf == NULL) return -1;
    count = bshuf_trans_byte_elem_NEON(in, out, size, elem_size);
    CHECK_ERR_FREE(count, tmp_buf);
    count = bshuf_trans_bit_byte_NEON(out, tmp_buf, size, elem_size);
    CHECK_ERR_FREE(count, tmp_buf);
    count = bshuf_trans_bitrow_eight(tmp_buf, out, size, elem_size);
    free(tmp_buf);
    return count;
 }
 /* For data organized into a row for each bit (8 * elem_size rows), transpose
 * the bytes. */
 int64_t bshuf_trans_byte_bitrow_NEON(const void* in, void* out, const size_t size,
         const size_t elem_size) {
    size_t ii, jj;
    const char* in_b = (const char*) in;
    char* out_b = (char*) out;
    CHECK_MULT_EIGHT(size);
    size_t nrows = 8 * elem_size;
    size_t nbyte_row = size / 8;
    int8x16_t a0, b0, c0, d0, e0, f0, g0, h0;
    int8x16_t a1, b1, c1, d1, e1, f1, g1, h1;
    int64x1_t *as, *bs, *cs, *ds, *es, *fs, *gs, *hs;
    for (ii = 0; ii + 7 < nrows; ii += 8) {
        for (jj = 0; jj + 15 < nbyte_row; jj += 16) {
            a0 = vld1q_s8(in_b + (ii + 0)*nbyte_row + jj);
            b0 = vld1q_s8(in_b + (ii + 1)*nbyte_row + jj);
            c0 = vld1q_s8(in_b + (ii + 2)*nbyte_row + jj);
            d0 = vld1q_s8(in_b + (ii + 3)*nbyte_row + jj);
            e0 = vld1q_s8(in_b + (ii + 4)*nbyte_row + jj);
            f0 = vld1q_s8(in_b + (ii + 5)*nbyte_row + jj);
            g0 = vld1q_s8(in_b + (ii + 6)*nbyte_row + jj);
            h0 = vld1q_s8(in_b + (ii + 7)*nbyte_row + jj);
            a1 = vzip1q_s8(a0, b0);
            b1 = vzip1q_s8(c0, d0);
            c1 = vzip1q_s8(e0, f0);
            d1 = vzip1q_s8(g0, h0);
            e1 = vzip2q_s8(a0, b0);
            f1 = vzip2q_s8(c0, d0);
            g1 = vzip2q_s8(e0, f0);
            h1 = vzip2q_s8(g0, h0);
            a0 = (int8x16_t) vzip1q_s16 (vreinterpretq_s16_s8 (a1), vreinterpretq_s16_s8 (b1));
            b0=  (int8x16_t) vzip1q_s16 (vreinterpretq_s16_s8 (c1), vreinterpretq_s16_s8 (d1));
            c0 = (int8x16_t) vzip2q_s16 (vreinterpretq_s16_s8 (a1), vreinterpretq_s16_s8 (b1));
            d0 = (int8x16_t) vzip2q_s16 (vreinterpretq_s16_s8 (c1), vreinterpretq_s16_s8 (d1));
            e0 = (int8x16_t) vzip1q_s16 (vreinterpretq_s16_s8 (e1), vreinterpretq_s16_s8 (f1));
            f0 = (int8x16_t) vzip1q_s16 (vreinterpretq_s16_s8 (g1), vreinterpretq_s16_s8 (h1));
            g0 = (int8x16_t) vzip2q_s16 (vreinterpretq_s16_s8 (e1), vreinterpretq_s16_s8 (f1));
            h0 = (int8x16_t) vzip2q_s16 (vreinterpretq_s16_s8 (g1), vreinterpretq_s16_s8 (h1));
            a1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (a0), vreinterpretq_s32_s8 (b0));
            b1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (a0), vreinterpretq_s32_s8 (b0));
            c1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (c0), vreinterpretq_s32_s8 (d0));
            d1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (c0), vreinterpretq_s32_s8 (d0));
            e1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (e0), vreinterpretq_s32_s8 (f0));
            f1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (e0), vreinterpretq_s32_s8 (f0));
            g1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (g0), vreinterpretq_s32_s8 (h0));
            h1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (g0), vreinterpretq_s32_s8 (h0));
            as = (int64x1_t *) &a1;
            bs = (int64x1_t *) &b1;
            cs = (int64x1_t *) &c1;
            ds = (int64x1_t *) &d1;
            es = (int64x1_t *) &e1;
            fs = (int64x1_t *) &f1;
            gs = (int64x1_t *) &g1;
            hs = (int64x1_t *) &h1;
            vst1_s64((int64_t *)(out_b + (jj + 0) * nrows + ii), *as);
            vst1_s64((int64_t *)(out_b + (jj + 1) * nrows + ii), *(as + 1));
            vst1_s64((int64_t *)(out_b + (jj + 2) * nrows + ii), *bs);
            vst1_s64((int64_t *)(out_b + (jj + 3) * nrows + ii), *(bs + 1));
            vst1_s64((int64_t *)(out_b + (jj + 4) * nrows + ii), *cs);
            vst1_s64((int64_t *)(out_b + (jj + 5) * nrows + ii), *(cs + 1));
            vst1_s64((int64_t *)(out_b + (jj + 6) * nrows + ii), *ds);
            vst1_s64((int64_t *)(out_b + (jj + 7) * nrows + ii), *(ds + 1));
            vst1_s64((int64_t *)(out_b + (jj + 8) * nrows + ii), *es);
            vst1_s64((int64_t *)(out_b + (jj + 9) * nrows + ii), *(es + 1));
            vst1_s64((int64_t *)(out_b + (jj + 10) * nrows + ii), *fs);
            vst1_s64((int64_t *)(out_b + (jj + 11) * nrows + ii), *(fs + 1));
            vst1_s64((int64_t *)(out_b + (jj + 12) * nrows + ii), *gs);
            vst1_s64((int64_t *)(out_b + (jj + 13) * nrows + ii), *(gs + 1));
            vst1_s64((int64_t *)(out_b + (jj + 14) * nrows + ii), *hs);
            vst1_s64((int64_t *)(out_b + (jj + 15) * nrows + ii), *(hs + 1));
        }
        for (jj = nbyte_row - nbyte_row % 16; jj < nbyte_row; jj ++) {
            out_b[jj * nrows + ii + 0] = in_b[(ii + 0)*nbyte_row + jj];
            out_b[jj * nrows + ii + 1] = in_b[(ii + 1)*nbyte_row + jj];
            out_b[jj * nrows + ii + 2] = in_b[(ii + 2)*nbyte_row + jj];
            out_b[jj * nrows + ii + 3] = in_b[(ii + 3)*nbyte_row + jj];
            out_b[jj * nrows + ii + 4] = in_b[(ii + 4)*nbyte_row + jj];
            out_b[jj * nrows + ii + 5] = in_b[(ii + 5)*nbyte_row + jj];
            out_b[jj * nrows + ii + 6] = in_b[(ii + 6)*nbyte_row + jj];
            out_b[jj * nrows + ii + 7] = in_b[(ii + 7)*nbyte_row + jj];
        }
    }
    return size * elem_size;
 }
 /* Shuffle bits within the bytes of eight element blocks. */
 int64_t bshuf_shuffle_bit_eightelem_NEON(const void* in, void* out, const size_t size,
         const size_t elem_size) {
    CHECK_MULT_EIGHT(size);
    // With a bit of care, this could be written such that such that it is
    // in_buf = out_buf safe.
    const char* in_b = (const char*) in;
    uint16_t* out_ui16 = (uint16_t*) out;
    size_t ii, jj, kk;
    size_t nbyte = elem_size * size;
    int16x8_t xmm;
    int32_t bt;
    if (elem_size % 2) {
        bshuf_shuffle_bit_eightelem_scal(in, out, size, elem_size);
    } else {
        for (ii = 0; ii + 8 * elem_size - 1 < nbyte;
                ii += 8 * elem_size) {
            for (jj = 0; jj + 15 < 8 * elem_size; jj += 16) {
                xmm = vld1q_s16((int16_t *) &in_b[ii + jj]);
                for (kk = 0; kk < 8; kk++) {
                    bt = move_byte_mask_neon((uint8x16_t) xmm);
                    xmm = vshlq_n_s16(xmm, 1);
                    size_t ind = (ii + jj / 8 + (7 - kk) * elem_size);
                    out_ui16[ind / 2] = bt;
                }
            }
        }
    }
    return size * elem_size;
 }
 /* Untranspose bits within elements. */
 int64_t bshuf_untrans_bit_elem_NEON(const void* in, void* out, const size_t size,
         const size_t elem_size) {
    int64_t count;
    CHECK_MULT_EIGHT(size);
    void* tmp_buf = malloc(size * elem_size);
    if (tmp_buf == NULL) return -1;
    count = bshuf_trans_byte_bitrow_NEON(in, tmp_buf, size, elem_size);
    CHECK_ERR_FREE(count, tmp_buf);
    count =  bshuf_shuffle_bit_eightelem_NEON(tmp_buf, out, size, elem_size);
    free(tmp_buf);
    return count;
 }
 #else // #ifdef USEARMNEON
 int64_t bshuf_untrans_bit_elem_NEON(const void* in, void* out, const size_t size,
         const size_t elem_size) {
    return -13;
 }
 int64_t bshuf_trans_bit_elem_NEON(const void* in, void* out, const size_t size,
         const size_t elem_size) {
    return -13;
 }
 int64_t bshuf_trans_byte_bitrow_NEON(const void* in, void* out, const size_t size,
         const size_t elem_size) {
    return -13;
 }
 int64_t bshuf_trans_bit_byte_NEON(const void* in, void* out, const size_t size,
         const size_t elem_size) {
    return -13;
 }
 int64_t bshuf_trans_byte_elem_NEON(const void* in, void* out, const size_t size,
         const size_t elem_size) {
    return -13;
 }
 int64_t bshuf_trans_byte_elem_NEON_64(const void* in, void* out, const size_t size) {
    return -13;
 }
 int64_t bshuf_trans_byte_elem_NEON_32(const void* in, void* out, const size_t size) {
    return -13;
 }
 int64_t bshuf_trans_byte_elem_NEON_16(const void* in, void* out, const size_t size) {
    return -13;
 }
 int64_t bshuf_shuffle_bit_eightelem_NEON(const void* in, void* out, const size_t size,
         const size_t elem_size) {
    return -13;
 }
 #endif
 /* ---- Worker code that uses SSE2 ----
 *
 * The following code makes use of the SSE2 instruction set and specialized
@@ -1051,7 +512,7 @@ int64_t bshuf_trans_byte_elem_SSE(const void* in, void* out, const size_t size,
    int64_t count;
-    // Trivial cases: power of 2 bytes.
+
    switch (elem_size) {
        case 1:
            count = bshuf_copy(in, out, size, elem_size);
@@ -1067,14 +528,14 @@ int64_t bshuf_trans_byte_elem_SSE(const void* in, void* out, const size_t size,
            return count;
    }
-    // Worst case: odd number of bytes. Turns out that this is faster for
+
-    // (odd * 2) byte elements as well (hence % 4).
+
    if (elem_size % 4) {
        count = bshuf_trans_byte_elem_scal(in, out, size, elem_size);
        return count;
    }
-    // Multiple of power of 2: transpose hierarchically.
+
    {
        size_t nchunk_elem;
        void* tmp_buf = malloc(size * elem_size);
@@ -1093,7 +554,7 @@ int64_t bshuf_trans_byte_elem_SSE(const void* in, void* out, const size_t size,
                    size * nchunk_elem);
            bshuf_trans_elem(tmp_buf, out, 4, nchunk_elem, size);
        } else {
-            // Not used since scalar algorithm is faster.
+
            nchunk_elem = elem_size / 2;
            TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int16_t);
            count = bshuf_trans_byte_elem_SSE_16(out, tmp_buf,
@@ -1226,8 +687,8 @@ int64_t bshuf_trans_byte_bitrow_SSE(const void* in, void* out, const size_t size
            g1 = _mm_unpacklo_epi32(g0, h0);
            h1 = _mm_unpackhi_epi32(g0, h0);
-            // We don't have a storeh instruction for integers, so interpret
+
-            // as a float. Have a storel (_mm_storel_epi64).
+
            as = (__m128 *) &a1;
            bs = (__m128 *) &b1;
            cs = (__m128 *) &c1;
@@ -1276,8 +737,8 @@ int64_t bshuf_shuffle_bit_eightelem_SSE(const void* in, void* out, const size_t
    CHECK_MULT_EIGHT(size);
-    // With a bit of care, this could be written such that such that it is
+
-    // in_buf = out_buf safe.
+
    const char* in_b = (const char*) in;
    uint16_t* out_ui16 = (uint16_t*) out;
@@ -1327,7 +788,7 @@ int64_t bshuf_untrans_bit_elem_SSE(const void* in, void* out, const size_t size,
    return count;
 }
-#else // #ifdef USESSE2
+#else
 int64_t bshuf_untrans_bit_elem_SSE(const void* in, void* out, const size_t size,
@@ -1381,7 +842,7 @@ int64_t bshuf_shuffle_bit_eightelem_SSE(const void* in, void* out, const size_t
 }
-#endif // #ifdef USESSE2
+#endif
 /* ---- Code that requires AVX2. Intel Haswell (2013) and later. ---- */
@@ -1396,6 +857,7 @@ int64_t bshuf_shuffle_bit_eightelem_SSE(const void* in, void* out, const size_t
 */
 #ifdef USEAVX2
 /* Transpose bits within bytes. */
 int64_t bshuf_trans_bit_byte_AVX(const void* in, void* out, const size_t size,
         const size_t elem_size) {
@@ -1552,8 +1014,8 @@ int64_t bshuf_shuffle_bit_eightelem_AVX(const void* in, void* out, const size_t
    CHECK_MULT_EIGHT(size);
-    // With a bit of care, this could be written such that such that it is
+
-    // in_buf = out_buf safe.
+
    const char* in_b = (const char*) in;
    char* out_b = (char*) out;
@@ -1603,7 +1065,7 @@ int64_t bshuf_untrans_bit_elem_AVX(const void* in, void* out, const size_t size,
 }
-#else // #ifdef USEAVX2
+#else
 int64_t bshuf_trans_bit_byte_AVX(const void* in, void* out, const size_t size,
         const size_t elem_size) {
@@ -1634,179 +1096,19 @@ int64_t bshuf_untrans_bit_elem_AVX(const void* in, void* out, const size_t size,
    return -12;
 }
 #endif // #ifdef USEAVX2
 #ifdef USEAVX512
 /* Transpose bits within bytes. */
 int64_t bshuf_trans_bit_byte_AVX512(const void* in, void* out, const size_t size,
         const size_t elem_size) {
    size_t ii, kk;
    const char* in_b = (const char*) in;
    char* out_b = (char*) out;
    size_t nbyte = elem_size * size;
    int64_t count;
    int64_t* out_i64;
    __m512i zmm;
    __mmask64 bt;
    if (nbyte >= 64) {
        const __m512i mask = _mm512_set1_epi8(0);
       for (ii = 0; ii + 63 < nbyte; ii += 64) {
            zmm = _mm512_loadu_si512((__m512i *) &in_b[ii]);
            for (kk = 0; kk < 8; kk++) {
                bt = _mm512_cmp_epi8_mask(zmm, mask, 1);
                zmm = _mm512_slli_epi16(zmm, 1);
                out_i64 = (int64_t*) &out_b[((7 - kk) * nbyte + ii) / 8];
                *out_i64 = (int64_t)bt;
            }
        }
    }
    __m256i ymm;
    int32_t bt32;
    int32_t* out_i32;
    size_t start = nbyte - nbyte % 64;
    for (ii = start; ii + 31 < nbyte; ii += 32) {
        ymm = _mm256_loadu_si256((__m256i *) &in_b[ii]);
        for (kk = 0; kk < 8; kk++) {
            bt32 = _mm256_movemask_epi8(ymm);
            ymm = _mm256_slli_epi16(ymm, 1);
            out_i32 = (int32_t*) &out_b[((7 - kk) * nbyte + ii) / 8];
            *out_i32 = bt32;
        }
    }
    count = bshuf_trans_bit_byte_remainder(in, out, size, elem_size,
            nbyte - nbyte % 64 % 32);
    return count;
 }
 /* Transpose bits within elements. */
 int64_t bshuf_trans_bit_elem_AVX512(const void* in, void* out, const size_t size,
         const size_t elem_size) {
    int64_t count;
    CHECK_MULT_EIGHT(size);
    void* tmp_buf = malloc(size * elem_size);
    if (tmp_buf == NULL) return -1;
    count = bshuf_trans_byte_elem_SSE(in, out, size, elem_size);
    CHECK_ERR_FREE(count, tmp_buf);
    count = bshuf_trans_bit_byte_AVX512(out, tmp_buf, size, elem_size);
    CHECK_ERR_FREE(count, tmp_buf);
    count = bshuf_trans_bitrow_eight(tmp_buf, out, size, elem_size);
    free(tmp_buf);
    return count;
 }
 /* Shuffle bits within the bytes of eight element blocks. */
 int64_t bshuf_shuffle_bit_eightelem_AVX512(const void* in, void* out, const size_t size,
         const size_t elem_size) {
    CHECK_MULT_EIGHT(size);
    // With a bit of care, this could be written such that such that it is
    // in_buf = out_buf safe.
    const char* in_b = (const char*) in;
    char* out_b = (char*) out;
    size_t ii, jj, kk;
    size_t nbyte = elem_size * size;
    __m512i zmm;
    __mmask64 bt;
    if (elem_size % 8) {
        return bshuf_shuffle_bit_eightelem_AVX(in, out, size, elem_size);
    } else {
        const __m512i mask = _mm512_set1_epi8(0);
        for (jj = 0; jj + 63 < 8 * elem_size; jj += 64) {
            for (ii = 0; ii + 8 * elem_size - 1 < nbyte;
                    ii += 8 * elem_size) {
                zmm = _mm512_loadu_si512((__m512i *) &in_b[ii + jj]);
                for (kk = 0; kk < 8; kk++) {
                    bt = _mm512_cmp_epi8_mask(zmm, mask, 1);
                    zmm = _mm512_slli_epi16(zmm, 1);
                    size_t ind = (ii + jj / 8 + (7 - kk) * elem_size);
                    * (int64_t *) &out_b[ind] = bt;
                }
            }
        }
    }
    return size * elem_size;
 }
 /* Untranspose bits within elements. */
 int64_t bshuf_untrans_bit_elem_AVX512(const void* in, void* out, const size_t size,
         const size_t elem_size) {
    int64_t count;
    CHECK_MULT_EIGHT(size);
    void* tmp_buf = malloc(size * elem_size);
    if (tmp_buf == NULL) return -1;
    count = bshuf_trans_byte_bitrow_AVX(in, tmp_buf, size, elem_size);
    CHECK_ERR_FREE(count, tmp_buf);
    count =  bshuf_shuffle_bit_eightelem_AVX512(tmp_buf, out, size, elem_size);
    free(tmp_buf);
    return count;
 }
 #else // #ifdef USEAVX512
 int64_t bshuf_trans_bit_byte_AVX512(const void* in, void* out, const size_t size,
         const size_t elem_size) {
    return -14;
 }
 int64_t bshuf_trans_bit_elem_AVX512(const void* in, void* out, const size_t size,
         const size_t elem_size) {
    return -14;
 }
 int64_t bshuf_shuffle_bit_eightelem_AVX512(const void* in, void* out, const size_t size,
         const size_t elem_size) {
    return -14;
 }
 int64_t bshuf_untrans_bit_elem_AVX512(const void* in, void* out, const size_t size,
         const size_t elem_size) {
    return -14;
 }
 #endif
 /* ---- Drivers selecting best instruction set at compile time. ---- */
 int64_t bshuf_trans_bit_elem(const void* in, void* out, const size_t size,
        const size_t elem_size) {
    int64_t count;
-#ifdef USEAVX512
+#ifdef USEAVX2
    count = bshuf_trans_bit_elem_AVX512(in, out, size, elem_size);
 #elif defined USEAVX2
    count = bshuf_trans_bit_elem_AVX(in, out, size, elem_size);
 #elif defined(USESSE2)
    count = bshuf_trans_bit_elem_SSE(in, out, size, elem_size);
 #elif defined(USEARMNEON)
    count = bshuf_trans_bit_elem_NEON(in, out, size, elem_size);
 #else
    count = bshuf_trans_bit_elem_scal(in, out, size, elem_size);
 #endif
@@ -1818,14 +1120,10 @@ int64_t bshuf_untrans_bit_elem(const void* in, void* out, const size_t size,
        const size_t elem_size) {
    int64_t count;
-#ifdef USEAVX512
+#ifdef USEAVX2
    count = bshuf_untrans_bit_elem_AVX512(in, out, size, elem_size);
 #elif defined USEAVX2
    count = bshuf_untrans_bit_elem_AVX(in, out, size, elem_size);
 #elif defined(USESSE2)
    count = bshuf_untrans_bit_elem_SSE(in, out, size, elem_size);
 #elif defined(USEARMNEON)
    count = bshuf_untrans_bit_elem_NEON(in, out, size, elem_size);
 #else
    count = bshuf_untrans_bit_elem_scal(in, out, size, elem_size);
 #endif
@@ -1838,9 +1136,9 @@ int64_t bshuf_untrans_bit_elem(const void* in, void* out, const size_t size,
 /* Wrap a function for processing a single block to process an entire buffer in
 * parallel. */
 int64_t bshuf_blocked_wrap_fun(bshufBlockFunDef fun, const void* in, void* out, \
-        const size_t size, const size_t elem_size, size_t block_size, const int option) {
+        const size_t size, const size_t elem_size, size_t block_size) {
-    omp_size_t ii = 0;
+    size_t ii;
    int64_t err = 0;
    int64_t count, cum_count=0;
    size_t last_block_size;
@@ -1863,8 +1161,8 @@ int64_t bshuf_blocked_wrap_fun(bshufBlockFunDef fun, const void* in, void* out,
    #pragma omp parallel for schedule(dynamic, 1) \
            private(count) reduction(+ : cum_count)
 #endif
-    for (ii = 0; ii < (omp_size_t)( size / block_size ); ii ++) {
+    for (ii = 0; ii < size / block_size; ii ++) {
-        count = fun(&C, block_size, elem_size, option);
+        count = fun(&C, block_size, elem_size);
        if (count < 0) err = count;
        cum_count += count;
    }
@@ -1872,7 +1170,7 @@ int64_t bshuf_blocked_wrap_fun(bshufBlockFunDef fun, const void* in, void* out,
    last_block_size = size % block_size;
    last_block_size = last_block_size - last_block_size % BSHUF_BLOCKED_MULT;
    if (last_block_size) {
-        count = fun(&C, last_block_size, elem_size, option);
+        count = fun(&C, last_block_size, elem_size);
        if (count < 0) err = count;
        cum_count += count;
    }
@@ -1880,7 +1178,6 @@ int64_t bshuf_blocked_wrap_fun(bshufBlockFunDef fun, const void* in, void* out,
    if (err < 0) return err;
    leftover_bytes = size % BSHUF_BLOCKED_MULT * elem_size;
    //this_iter;
    last_in = (char *) ioc_get_in(&C, &this_iter);
    ioc_set_next_in(&C, &this_iter, (void *) (last_in + leftover_bytes));
    last_out = (char *) ioc_get_out(&C, &this_iter);
@@ -1896,7 +1193,7 @@ int64_t bshuf_blocked_wrap_fun(bshufBlockFunDef fun, const void* in, void* out,
 /* Bitshuffle a single block. */
 int64_t bshuf_bitshuffle_block(ioc_chain *C_ptr, \
-        const size_t size, const size_t elem_size, const int option) {
+        const size_t size, const size_t elem_size) {
    size_t this_iter;
    const void *in;
@@ -1919,7 +1216,7 @@ int64_t bshuf_bitshuffle_block(ioc_chain *C_ptr, \
 /* Bitunshuffle a single block. */
 int64_t bshuf_bitunshuffle_block(ioc_chain* C_ptr, \
-        const size_t size, const size_t elem_size, const int option) {
+        const size_t size, const size_t elem_size) {
    size_t this_iter;
@@ -1999,11 +1296,11 @@ uint32_t bshuf_read_uint32_BE(const void* buf) {
 */
 size_t bshuf_default_block_size(const size_t elem_size) {
-    // This function needs to be absolutely stable between versions.
+
-    // Otherwise encoded data will not be decodable.
+
    size_t block_size = BSHUF_TARGET_BLOCK_SIZE_B / elem_size;
-    // Ensure it is a required multiple.
+
    block_size = (block_size / BSHUF_BLOCKED_MULT) * BSHUF_BLOCKED_MULT;
    return MAX(block_size, BSHUF_MIN_RECOMMEND_BLOCK);
 }
@@ -2013,7 +1310,7 @@ int64_t bshuf_bitshuffle(const void* in, void* out, const size_t size,
        const size_t elem_size, size_t block_size) {
    return bshuf_blocked_wrap_fun(&bshuf_bitshuffle_block, in, out, size,
-            elem_size, block_size, 0/*option*/);
+            elem_size, block_size);
 }
@@ -2021,7 +1318,7 @@ int64_t bshuf_bitunshuffle(const void* in, void* out, const size_t size,
        const size_t elem_size, size_t block_size) {
    return bshuf_blocked_wrap_fun(&bshuf_bitunshuffle_block, in, out, size,
-            elem_size, block_size, 0/*option*/);
+            elem_size, block_size);
 }
@@ -18,8 +18,6 @@
 *      -1    : Failed to allocate memory.
 *      -11   : Missing SSE.
 *      -12   : Missing AVX.
 *      -13   : Missing Arm Neon.
 *      -14   : Missing AVX512.
 *      -80   : Input size not a multiple of 8.
 *      -81   : block_size not multiple of 8.
 *      -91   : Decompression error, wrong number of bytes processed.
@@ -30,12 +28,13 @@
 #ifndef BITSHUFFLE_CORE_H
 #define BITSHUFFLE_CORE_H
-// We assume GNU g++ defining `__cplusplus` has stdint.h
+
 #if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199900L) || defined(__cplusplus)
 #include <stdint.h>
 #else
  typedef unsigned char       uint8_t;
  typedef unsigned short      uint16_t;
  typedef signed short        int16_t;
  typedef unsigned int        uint32_t;
  typedef signed int          int32_t;
  typedef unsigned long long  uint64_t;
@@ -45,11 +44,11 @@
 #include <stdlib.h>
-// These are usually set in the setup.py.
+
 #ifndef BSHUF_VERSION_MAJOR
 #define BSHUF_VERSION_MAJOR 0
-#define BSHUF_VERSION_MINOR 4
+#define BSHUF_VERSION_MINOR 3
-#define BSHUF_VERSION_POINT 0
+#define BSHUF_VERSION_POINT 4
 #endif
 #ifdef __cplusplus
@@ -68,18 +67,6 @@ extern "C" {
 int bshuf_using_SSE2(void);
 /* ---- bshuf_using_NEON ----
 *
 * Whether routines where compiled with the NEON instruction set.
 *
 * Returns
 * -------
 *  1 if using NEON, 0 otherwise.
 *
 */
 int bshuf_using_NEON(void);
 /* ---- bshuf_using_AVX2 ----
 *
 * Whether routines where compiled with the AVX2 instruction set.
@@ -92,18 +79,6 @@ int bshuf_using_NEON(void);
 int bshuf_using_AVX2(void);
 /* ---- bshuf_using_AVX512 ----
 *
 * Whether routines where compiled with the AVX512 instruction set.
 *
 * Returns
 * -------
 *  1 if using AVX512, 0 otherwise.
 *
 */
 int bshuf_using_AVX512(void);
 /* ---- bshuf_default_block_size ----
 *
 * The default block size as function of element size.
@@ -176,7 +151,7 @@ int64_t bshuf_bitunshuffle(const void* in, void* out, const size_t size,
        const size_t elem_size, size_t block_size);
 #ifdef __cplusplus
-} // extern "C"
+}
 #endif
-#endif  // BITSHUFFLE_CORE_H
+#endif
@@ -13,31 +13,19 @@
 #ifndef BITSHUFFLE_INTERNALS_H
 #define BITSHUFFLE_INTERNALS_H
 // We assume GNU g++ defining `__cplusplus` has stdint.h
 #if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199900L) || defined(__cplusplus)
 #include <stdint.h>
 #else
  typedef unsigned char       uint8_t;
  typedef unsigned short      uint16_t;
  typedef unsigned int        uint32_t;
  typedef   signed int        int32_t;
  typedef unsigned long long  uint64_t;
  typedef long long           int64_t;
 #endif
 #include <stdlib.h>
 #include "iochain.h"
 // Constants.
 #ifndef BSHUF_MIN_RECOMMEND_BLOCK
 #define BSHUF_MIN_RECOMMEND_BLOCK 128
-#define BSHUF_BLOCKED_MULT 8    // Block sizes must be multiple of this.
+#define BSHUF_BLOCKED_MULT 8
 #define BSHUF_TARGET_BLOCK_SIZE_B 8192
 #endif
-// Macros.
+
 #define CHECK_ERR_FREE(count, buf) if (count < 0) { free(buf); return count; }
@@ -61,15 +49,15 @@ int64_t bshuf_untrans_bit_elem(const void* in, void* out, const size_t size,
 /* Function definition for worker functions that process a single block. */
 typedef int64_t (*bshufBlockFunDef)(ioc_chain* C_ptr,
-        const size_t size, const size_t elem_size, const int option);
+        const size_t size, const size_t elem_size);
 /* Wrap a function for processing a single block to process an entire buffer in
 * parallel. */
 int64_t bshuf_blocked_wrap_fun(bshufBlockFunDef fun, const void* in, void* out,
-        const size_t size, const size_t elem_size, size_t block_size, const int option);
+        const size_t size, const size_t elem_size, size_t block_size);
 #ifdef __cplusplus
-} // extern "C"
+}
 #endif
-#endif  // BITSHUFFLE_INTERNALS_H
+#endif
@@ -1,260 +0,0 @@
 /*
 * Bitshuffle HDF5 filter
 *
 * This file is part of Bitshuffle
 * Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
 * Website: http://www.github.com/kiyo-masui/bitshuffle
 * Created: 2014
 *
 * See LICENSE file for details about copyright and rights to use.
 *
 */
 #include "bitshuffle.h"
 #include "bshuf_h5filter.h"
 #define PUSH_ERR(func, minor, str)                                      \
    H5Epush1(__FILE__, func, __LINE__, H5E_PLINE, minor, str)
 // Prototypes from bitshuffle.c
 void bshuf_write_uint64_BE(void* buf, uint64_t num);
 uint64_t bshuf_read_uint64_BE(void* buf);
 void bshuf_write_uint32_BE(void* buf, uint32_t num);
 uint32_t bshuf_read_uint32_BE(const void* buf);
 // Only called on compression, not on reverse.
 herr_t bshuf_h5_set_local(hid_t dcpl, hid_t type, hid_t space){
    herr_t r;
    size_t ii;
    unsigned int elem_size;
    unsigned int flags;
    size_t nelements = 8;
    size_t nelem_max = 11;
    unsigned values[] = {0,0,0,0,0,0,0,0,0,0,0};
    unsigned tmp_values[] = {0,0,0,0,0,0,0,0};
    char msg[80];
    r = H5Pget_filter_by_id2(dcpl, BSHUF_H5FILTER, &flags, &nelements,
            tmp_values, 0, NULL, NULL);
    if(r<0) return -1;
    // First 3 slots reserved. Move any passed options to higher addresses.
    for (ii=0; ii < nelements && ii + 3 < nelem_max; ii++) {
        values[ii + 3] = tmp_values[ii];
    }
    nelements = 3 + nelements;
    values[0] = BSHUF_VERSION_MAJOR;
    values[1] = BSHUF_VERSION_MINOR;
    elem_size = H5Tget_size(type);
    if(elem_size <= 0) {
        PUSH_ERR("bshuf_h5_set_local", H5E_CALLBACK, 
                "Invalid element size.");
        return -1;
    }
    values[2] = elem_size;
    // Validate user supplied arguments.
    if (nelements > 3) {
        if (values[3] % 8 || values[3] < 0) {
            sprintf(msg, "Error in bitshuffle. Invalid block size: %d.",
                    values[3]);
            PUSH_ERR("bshuf_h5_set_local", H5E_CALLBACK, msg);
            return -1;
        }
    }
    if (nelements > 4) {
        switch (values[4]) {
            case 0:
                break;
            case BSHUF_H5_COMPRESS_LZ4:
                break;
            #ifdef ZSTD_SUPPORT
            case BSHUF_H5_COMPRESS_ZSTD:
                break;
            #endif
            default:
                PUSH_ERR("bshuf_h5_set_local", H5E_CALLBACK, 
                         "Invalid bitshuffle compression.");
        }
    }
    r = H5Pmodify_filter(dcpl, BSHUF_H5FILTER, flags, nelements, values);
    if(r<0) return -1;
    return 1;
 }
 size_t bshuf_h5_filter(unsigned int flags, size_t cd_nelmts,
           const unsigned int cd_values[], size_t nbytes,
           size_t *buf_size, void **buf) {
    size_t size, elem_size;
    int err = -1;
    char msg[80];
    size_t block_size = 0;
    size_t buf_size_out, nbytes_uncomp, nbytes_out;
    char* in_buf = *buf;
    void *out_buf;
    if (cd_nelmts < 3) {
        PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK, 
                "Not enough parameters.");
        return 0;
    }
    elem_size = cd_values[2];
 #ifdef ZSTD_SUPPORT
    const int comp_lvl = cd_values[5]; 
 #endif
    // User specified block size.
    if (cd_nelmts > 3) block_size = cd_values[3];
    if (block_size == 0) block_size = bshuf_default_block_size(elem_size);
 #ifndef ZSTD_SUPPORT
    if (cd_nelmts > 4 && (cd_values[4] == BSHUF_H5_COMPRESS_ZSTD)) {
        PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK, 
                "ZSTD compression filter chosen but ZSTD support not installed.");
        return 0;
    }
 #endif
    // Compression in addition to bitshuffle.
    if (cd_nelmts > 4 && (cd_values[4] == BSHUF_H5_COMPRESS_LZ4 || cd_values[4] == BSHUF_H5_COMPRESS_ZSTD)) {
        if (flags & H5Z_FLAG_REVERSE) {
            // First eight bytes is the number of bytes in the output buffer,
            // little endian.
            nbytes_uncomp = bshuf_read_uint64_BE(in_buf);
            // Override the block size with the one read from the header.
            block_size = bshuf_read_uint32_BE((const char*) in_buf + 8) / elem_size;
            // Skip over the header.
            in_buf += 12;
            buf_size_out = nbytes_uncomp;
        } else {
            nbytes_uncomp = nbytes;
            // Pick which compressions library to use
            if(cd_values[4] == BSHUF_H5_COMPRESS_LZ4) {
              buf_size_out = bshuf_compress_lz4_bound(nbytes_uncomp / elem_size, 
                  elem_size, block_size) + 12;
            }
 #ifdef ZSTD_SUPPORT
            else if (cd_values[4] == BSHUF_H5_COMPRESS_ZSTD) {
              buf_size_out = bshuf_compress_zstd_bound(nbytes_uncomp / elem_size, 
                  elem_size, block_size) + 12;
            }
 #endif
        }
    } else {
        nbytes_uncomp = nbytes;
        buf_size_out = nbytes;
    }
    // TODO, remove this restriction by memcopying the extra.
    if (nbytes_uncomp % elem_size) {
        PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK, 
                "Non integer number of elements.");
        return 0;
    }
    size = nbytes_uncomp / elem_size;
    out_buf = malloc(buf_size_out);
    if (out_buf == NULL) {
        PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK, 
                "Could not allocate output buffer.");
        return 0;
    }
    if (cd_nelmts > 4 && (cd_values[4] == BSHUF_H5_COMPRESS_LZ4 || cd_values[4] == BSHUF_H5_COMPRESS_ZSTD)) {
        if (flags & H5Z_FLAG_REVERSE) {
            // Bit unshuffle/decompress.
            // Pick which compressions library to use
            if(cd_values[4] == BSHUF_H5_COMPRESS_LZ4) {
              err = bshuf_decompress_lz4(in_buf, out_buf, size, elem_size, block_size);
            }
 #ifdef ZSTD_SUPPORT
            else if (cd_values[4] == BSHUF_H5_COMPRESS_ZSTD) {
              err = bshuf_decompress_zstd(in_buf, out_buf, size, elem_size, block_size);
            }
 #endif
            nbytes_out = nbytes_uncomp;
        } else {
            // Bit shuffle/compress.
            // Write the header, described in
            // http://www.hdfgroup.org/services/filters/HDF5_LZ4.pdf.
            // Technically we should be using signed integers instead of
            // unsigned ones, however for valid inputs (positive numbers) these
            // have the same representation.
            bshuf_write_uint64_BE(out_buf, nbytes_uncomp);
            bshuf_write_uint32_BE((char*) out_buf + 8, block_size * elem_size);
            if(cd_values[4] == BSHUF_H5_COMPRESS_LZ4) {
                err = bshuf_compress_lz4(in_buf, (char*) out_buf + 12, size,
                        elem_size, block_size); 
            }
 #ifdef ZSTD_SUPPORT
            else if (cd_values[4] == BSHUF_H5_COMPRESS_ZSTD) {
                err = bshuf_compress_zstd(in_buf, (char*) out_buf + 12, size,
                        elem_size, block_size, comp_lvl); 
            }
 #endif
            nbytes_out = err + 12;
        } 
    } else {
            if (flags & H5Z_FLAG_REVERSE) {
            // Bit unshuffle.
            err = bshuf_bitunshuffle(in_buf, out_buf, size, elem_size,
                    block_size); } else {
            // Bit shuffle.
            err = bshuf_bitshuffle(in_buf, out_buf, size, elem_size,
                    block_size); } nbytes_out = nbytes; }
    //printf("nb_in %d, nb_uncomp %d, nb_out %d, buf_out %d, block %d\n",
    //nbytes, nbytes_uncomp, nbytes_out, buf_size_out, block_size);
    if (err < 0) {
        sprintf(msg, "Error in bitshuffle with error code %d.", err);
        PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK, msg);
        free(out_buf);
        return 0;
    } else {
        free(*buf);
        *buf = out_buf;
        *buf_size = buf_size_out;
        return nbytes_out;
    }
 }
 H5Z_class_t bshuf_H5Filter[1] = {{
    H5Z_CLASS_T_VERS,
    (H5Z_filter_t)(BSHUF_H5FILTER),
    1, 1,
    "bitshuffle; see https://github.com/kiyo-masui/bitshuffle",
    NULL,
    (H5Z_set_local_func_t)(bshuf_h5_set_local),
    (H5Z_func_t)(bshuf_h5_filter)
 }};
 int bshuf_register_h5filter(void){
    int retval;
    retval = H5Zregister(bshuf_H5Filter);
    if(retval<0){
        PUSH_ERR("bshuf_register_h5filter",
                 H5E_CANTREGISTER, "Can't register bitshuffle filter");
    }
    return retval;
 }
@@ -1,67 +0,0 @@
 /*
 * Bitshuffle HDF5 filter
 *
 * This file is part of Bitshuffle
 * Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
 * Website: http://www.github.com/kiyo-masui/bitshuffle
 * Created: 2014
 *
 * See LICENSE file for details about copyright and rights to use.
 *
 *
 * Header File
 *
 * Filter Options
 * --------------
 *  block_size (option slot 0) : integer (optional)
 *      What block size to use (in elements not bytes). Default is 0,
 *      for which bitshuffle will pick a block size with a target of 8kb.
 *  Compression (option slot 1) : 0 or BSHUF_H5_COMPRESS_LZ4
 *      Whether to apply LZ4 compression to the data after bitshuffling.
 *      This is much faster than applying compression as a second filter
 *      because it is done when the small block of data is already in the
 *      L1 cache.
 *
 *      For LZ4 compression, the compressed format of the data is the same as
 *      for the normal LZ4 filter described in
 *      http://www.hdfgroup.org/services/filters/HDF5_LZ4.pdf.
 *
 */
 #ifndef BSHUF_H5FILTER_H
 #define BSHUF_H5FILTER_H
 #ifdef __cplusplus
 extern "C" {
 #endif
 #define H5Z_class_t_vers 2
 #include "hdf5.h"
 #define BSHUF_H5FILTER 32008
 #define BSHUF_H5_COMPRESS_LZ4 2
 #define BSHUF_H5_COMPRESS_ZSTD 3
 extern H5Z_class_t bshuf_H5Filter[1];
 /* ---- bshuf_register_h5filter ----
 *
 * Register the bitshuffle HDF5 filter within the HDF5 library.
 *
 * Call this before using the bitshuffle HDF5 filter from C unless
 * using dynamically loaded filters.
 *
 */
 int bshuf_register_h5filter(void);
 #ifdef __cplusplus
 } // extern "C"
 #endif
 #endif // BSHUF_H5FILTER_H
@@ -1,5 +1,5 @@
 /*
- * IOchain - Distribute a chain of dependent IO events among threads.
+ * IOchain - Distribute a chain of dependant IO events amoung threads.
 *
 * This file is part of Bitshuffle
 * Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
@@ -81,9 +81,9 @@ void ioc_set_next_out(ioc_chain *C, size_t *this_iter, void* out_ptr) {
    C->out_pl[(*this_iter + 1) % IOC_SIZE].ptr = out_ptr;
 #ifdef _OPENMP
    omp_unset_lock(&(C->out_pl[(*this_iter + 1) % IOC_SIZE].lock));
-    // *in_pl[this_iter]* lock released at the end of the iteration to avoid being
+
-    // overtaken by previous threads and having *out_pl[this_iter]* corrupted.
+
-    // Especially worried about thread 0, iteration 0.
+
    omp_unset_lock(&(C->in_pl[(*this_iter) % IOC_SIZE].lock));
 #endif
 }
@@ -1,5 +1,5 @@
 /*
- * IOchain - Distribute a chain of dependent IO events among threads.
+ * IOchain - Distribute a chain of dependant IO events amoung threads.
 *
 * This file is part of Bitshuffle
 * Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
@@ -90,5 +90,5 @@ void ioc_set_next_in(ioc_chain *C, size_t* this_iter, void* in_ptr);
 void * ioc_get_out(ioc_chain *C, size_t *this_iter);
 void ioc_set_next_out(ioc_chain *C, size_t *this_iter, void* out_ptr);
-#endif  // IOCHAIN_H
+#endif
@@ -1,7 +1,7 @@
 /*
- *  LZ4 - Fast LZ compression algorithm
+   LZ4 - Fast LZ compression algorithm
- *  Header File
+   Header File
- *  Copyright (C) 2011-present, Yann Collet.
+   Copyright (C) 2011-2015, Yann Collet.
   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
@@ -29,744 +29,330 @@
   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   You can contact the author at :
-    - LZ4 homepage : http://www.lz4.org
+   - LZ4 source repository : https://github.com/Cyan4973/lz4
-    - LZ4 source repository : https://github.com/lz4/lz4
+   - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
 */
 #pragma once
 #if defined (__cplusplus)
 extern "C" {
 #endif
 #ifndef LZ4_H_2983827168210
 #define LZ4_H_2983827168210
 /* --- Dependency --- */
 #include <stddef.h>   /* size_t */
 /**
  Introduction
  LZ4 is lossless compression algorithm, providing compression speed >500 MB/s per core,
  scalable with multi-cores CPU. It features an extremely fast decoder, with speed in
  multiple GB/s per core, typically reaching RAM speed limits on multi-core systems.
  The LZ4 compression library provides in-memory compression and decompression functions.
  It gives full buffer control to user.
  Compression can be done in:
    - a single step (described as Simple Functions)
    - a single step, reusing a context (described in Advanced Functions)
    - unbounded multiple steps (described as Streaming compression)
  lz4.h generates and decodes LZ4-compressed blocks (doc/lz4_Block_format.md).
  Decompressing such a compressed block requires additional metadata.
  Exact metadata depends on exact decompression function.
  For the typical case of LZ4_decompress_safe(),
  metadata includes block's compressed size, and maximum bound of decompressed size.
  Each application is free to encode and pass such metadata in whichever way it wants.
  lz4.h only handle blocks, it can not generate Frames.
  Blocks are different from Frames (doc/lz4_Frame_format.md).
  Frames bundle both blocks and metadata in a specified manner.
  Embedding metadata is required for compressed data to be self-contained and portable.
  Frame format is delivered through a companion API, declared in lz4frame.h.
  The `lz4` CLI can only manage frames.
 */
 /*^***************************************************************
 *  Export parameters
 *****************************************************************/
 /*
-*  LZ4_DLL_EXPORT :
+ * lz4.h provides block compression functions, and gives full buffer control to programmer.
-*  Enable exporting of functions when building a Windows DLL
+ * If you need to generate inter-operable compressed data (respecting LZ4 frame specification),
-*  LZ4LIB_VISIBILITY :
+ * and can let the library handle its own memory, please use lz4frame.h instead.
 *  Control library symbols visibility.
 */
 #ifndef LZ4LIB_VISIBILITY
 #  if defined(__GNUC__) && (__GNUC__ >= 4)
 #    define LZ4LIB_VISIBILITY __attribute__ ((visibility ("default")))
 #  else
 #    define LZ4LIB_VISIBILITY
 #  endif
 #endif
 #if defined(LZ4_DLL_EXPORT) && (LZ4_DLL_EXPORT==1)
 #  define LZ4LIB_API __declspec(dllexport) LZ4LIB_VISIBILITY
 #elif defined(LZ4_DLL_IMPORT) && (LZ4_DLL_IMPORT==1)
 #  define LZ4LIB_API __declspec(dllimport) LZ4LIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
 #else
 #  define LZ4LIB_API LZ4LIB_VISIBILITY
 #endif
-/*------   Version   ------*/
+/**************************************
 *  Version
 **************************************/
 #define LZ4_VERSION_MAJOR    1    /* for breaking interface changes  */
-#define LZ4_VERSION_MINOR    9    /* for new (non-breaking) interface capabilities */
+#define LZ4_VERSION_MINOR    7    /* for new (non-breaking) interface capabilities */
-#define LZ4_VERSION_RELEASE  3    /* for tweaks, bug-fixes, or development */
+#define LZ4_VERSION_RELEASE  1    /* for tweaks, bug-fixes, or development */
 #define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE)
 int LZ4_versionNumber (void);
-#define LZ4_LIB_VERSION LZ4_VERSION_MAJOR.LZ4_VERSION_MINOR.LZ4_VERSION_RELEASE
+/**************************************
 #define LZ4_QUOTE(str) #str
 #define LZ4_EXPAND_AND_QUOTE(str) LZ4_QUOTE(str)
 #define LZ4_VERSION_STRING LZ4_EXPAND_AND_QUOTE(LZ4_LIB_VERSION)
 LZ4LIB_API int LZ4_versionNumber (void);  /**< library version number; useful to check dll version */
 LZ4LIB_API const char* LZ4_versionString (void);   /**< library version string; useful to check dll version */
 /*-************************************
 *  Tuning parameter
 **************************************/
-/*!
+/*
 * LZ4_MEMORY_USAGE :
 * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
- * Increasing memory usage improves compression ratio.
+ * Increasing memory usage improves compression ratio
- * Reduced memory usage may improve speed, thanks to better cache locality.
+ * Reduced memory usage can improve speed, due to cache effect
 * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
 */
 #ifndef LZ4_MEMORY_USAGE
 #define LZ4_MEMORY_USAGE 14
 #endif
-/*-************************************
+/**************************************
 *  Simple Functions
 **************************************/
-/*! LZ4_compress_default() :
+
- *  Compresses 'srcSize' bytes from buffer 'src'
+int LZ4_compress_default(const char* source, char* dest, int sourceSize, int maxDestSize);
- *  into already allocated 'dst' buffer of size 'dstCapacity'.
+int LZ4_decompress_safe (const char* source, char* dest, int compressedSize, int maxDecompressedSize);
- *  Compression is guaranteed to succeed if 'dstCapacity' >= LZ4_compressBound(srcSize).
+
- *  It also runs faster, so it's a recommended setting.
+/*
- *  If the function cannot compress 'src' into a more limited 'dst' budget,
+LZ4_compress_default() :
- *  compression stops *immediately*, and the function result is zero.
+    Compresses 'sourceSize' bytes from buffer 'source'
- *  In which case, 'dst' content is undefined (invalid).
+    into already allocated 'dest' buffer of size 'maxDestSize'.
- *      srcSize : max supported value is LZ4_MAX_INPUT_SIZE.
+    Compression is guaranteed to succeed if 'maxDestSize' >= LZ4_compressBound(sourceSize).
- *      dstCapacity : size of buffer 'dst' (which must be already allocated)
+    It also runs faster, so it's a recommended setting.
- *     @return  : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity)
+    If the function cannot compress 'source' into a more limited 'dest' budget,
- *                or 0 if compression fails
+    compression stops *immediately*, and the function result is zero.
- * Note : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer).
+    As a consequence, 'dest' content is not valid.
    This function never writes outside 'dest' buffer, nor read outside 'source' buffer.
        sourceSize  : Max supported value is LZ4_MAX_INPUT_VALUE
        maxDestSize : full or partial size of buffer 'dest' (which must be already allocated)
        return : the number of bytes written into buffer 'dest' (necessarily <= maxOutputSize)
              or 0 if compression fails
 LZ4_decompress_safe() :
    compressedSize : is the precise full size of the compressed block.
    maxDecompressedSize : is the size of destination buffer, which must be already allocated.
    return : the number of bytes decompressed into destination buffer (necessarily <= maxDecompressedSize)
             If destination buffer is not large enough, decoding will stop and output an error code (<0).
             If the source stream is detected malformed, the function will stop decoding and return a negative result.
             This function is protected against buffer overflow exploits, including malicious data packets.
             It never writes outside output buffer, nor reads outside input buffer.
 */
 LZ4LIB_API int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity);
 /*! LZ4_decompress_safe() :
 *  compressedSize : is the exact complete size of the compressed block.
 *  dstCapacity : is the size of destination buffer (which must be already allocated), presumed an upper bound of decompressed size.
 * @return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity)
 *           If destination buffer is not large enough, decoding will stop and output an error code (negative value).
 *           If the source stream is detected malformed, the function will stop decoding and return a negative result.
 * Note 1 : This function is protected against malicious data packets :
 *          it will never writes outside 'dst' buffer, nor read outside 'source' buffer,
 *          even if the compressed block is maliciously modified to order the decoder to do these actions.
 *          In such case, the decoder stops immediately, and considers the compressed block malformed.
 * Note 2 : compressedSize and dstCapacity must be provided to the function, the compressed block does not contain them.
 *          The implementation is free to send / store / derive this information in whichever way is most beneficial.
 *          If there is a need for a different format which bundles together both compressed data and its metadata, consider looking at lz4frame.h instead.
 */
 LZ4LIB_API int LZ4_decompress_safe (const char* src, char* dst, int compressedSize, int dstCapacity);
-/*-************************************
+/**************************************
 *  Advanced Functions
 **************************************/
 #define LZ4_MAX_INPUT_SIZE        0x7E000000   /* 2 113 929 216 bytes */
 #define LZ4_COMPRESSBOUND(isize)  ((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16)
-/*! LZ4_compressBound() :
+/*
 LZ4_compressBound() :
    Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible)
    This function is primarily useful for memory allocation purposes (destination buffer size).
    Macro LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack memory allocation for example).
-    Note that LZ4_compress_default() compresses faster when dstCapacity is >= LZ4_compressBound(srcSize)
+    Note that LZ4_compress_default() compress faster when dest buffer size is >= LZ4_compressBound(srcSize)
        inputSize  : max supported value is LZ4_MAX_INPUT_SIZE
        return : maximum output size in a "worst case" scenario
-              or 0, if input size is incorrect (too large or negative)
+              or 0, if input size is too large ( > LZ4_MAX_INPUT_SIZE)
 */
-LZ4LIB_API int LZ4_compressBound(int inputSize);
+int LZ4_compressBound(int inputSize);
-/*! LZ4_compress_fast() :
+/*
-    Same as LZ4_compress_default(), but allows selection of "acceleration" factor.
+LZ4_compress_fast() :
    Same as LZ4_compress_default(), but allows to select an "acceleration" factor.
    The larger the acceleration value, the faster the algorithm, but also the lesser the compression.
    It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed.
    An acceleration value of "1" is the same as regular LZ4_compress_default()
-    Values <= 0 will be replaced by LZ4_ACCELERATION_DEFAULT (currently == 1, see lz4.c).
+    Values <= 0 will be replaced by ACCELERATION_DEFAULT (see lz4.c), which is 1.
    Values > LZ4_ACCELERATION_MAX will be replaced by LZ4_ACCELERATION_MAX (currently == 65537, see lz4.c).
 */
-LZ4LIB_API int LZ4_compress_fast (const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
+int LZ4_compress_fast (const char* source, char* dest, int sourceSize, int maxDestSize, int acceleration);
-/*! LZ4_compress_fast_extState() :
+/*
- *  Same as LZ4_compress_fast(), using an externally allocated memory space for its state.
+LZ4_compress_fast_extState() :
- *  Use LZ4_sizeofState() to know how much memory must be allocated,
+    Same compression function, just using an externally allocated memory space to store compression state.
- *  and allocate it on 8-bytes boundaries (using `malloc()` typically).
+    Use LZ4_sizeofState() to know how much memory must be allocated,
- *  Then, provide this buffer as `void* state` to compression function.
+    and allocate it on 8-bytes boundaries (using malloc() typically).
    Then, provide it as 'void* state' to compression function.
 */
-LZ4LIB_API int LZ4_sizeofState(void);
+int LZ4_sizeofState(void);
-LZ4LIB_API int LZ4_compress_fast_extState (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
+int LZ4_compress_fast_extState (void* state, const char* source, char* dest, int inputSize, int maxDestSize, int acceleration);
-/*! LZ4_compress_destSize() :
+/*
- *  Reverse the logic : compresses as much data as possible from 'src' buffer
+LZ4_compress_destSize() :
- *  into already allocated buffer 'dst', of size >= 'targetDestSize'.
+    Reverse the logic, by compressing as much data as possible from 'source' buffer
- *  This function either compresses the entire 'src' content into 'dst' if it's large enough,
+    into already allocated buffer 'dest' of size 'targetDestSize'.
- *  or fill 'dst' buffer completely with as much data as possible from 'src'.
+    This function either compresses the entire 'source' content into 'dest' if it's large enough,
- *  note: acceleration parameter is fixed to "default".
+    or fill 'dest' buffer completely with as much data as possible from 'source'.
- *
+        *sourceSizePtr : will be modified to indicate how many bytes where read from 'source' to fill 'dest'.
- * *srcSizePtr : will be modified to indicate how many bytes where read from 'src' to fill 'dst'.
+                         New value is necessarily <= old value.
- *               New value is necessarily <= input value.
+        return : Nb bytes written into 'dest' (necessarily <= targetDestSize)
- * @return : Nb bytes written into 'dst' (necessarily <= targetDestSize)
+              or 0 if compression fails
 *           or 0 if compression fails.
 *
 * Note : from v1.8.2 to v1.9.1, this function had a bug (fixed un v1.9.2+):
 *        the produced compressed content could, in specific circumstances,
 *        require to be decompressed into a destination buffer larger
 *        by at least 1 byte than the content to decompress.
 *        If an application uses `LZ4_compress_destSize()`,
 *        it's highly recommended to update liblz4 to v1.9.2 or better.
 *        If this can't be done or ensured,
 *        the receiving decompression function should provide
 *        a dstCapacity which is > decompressedSize, by at least 1 byte.
 *        See https://github.com/lz4/lz4/issues/859 for details
 */
-LZ4LIB_API int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePtr, int targetDstSize);
+int LZ4_compress_destSize (const char* source, char* dest, int* sourceSizePtr, int targetDestSize);
-/*! LZ4_decompress_safe_partial() :
+/*
- *  Decompress an LZ4 compressed block, of size 'srcSize' at position 'src',
+LZ4_decompress_fast() :
- *  into destination buffer 'dst' of size 'dstCapacity'.
+    originalSize : is the original and therefore uncompressed size
- *  Up to 'targetOutputSize' bytes will be decoded.
+    return : the number of bytes read from the source buffer (in other words, the compressed size)
- *  The function stops decoding on reaching this objective.
+             If the source stream is detected malformed, the function will stop decoding and return a negative result.
- *  This can be useful to boost performance
+             Destination buffer must be already allocated. Its size must be a minimum of 'originalSize' bytes.
- *  whenever only the beginning of a block is required.
+    note : This function fully respect memory boundaries for properly formed compressed data.
- *
+           It is a bit faster than LZ4_decompress_safe().
- * @return : the number of bytes decoded in `dst` (necessarily <= targetOutputSize)
+           However, it does not provide any protection against intentionally modified data stream (malicious input).
- *           If source stream is detected malformed, function returns a negative result.
+           Use this function in trusted environment only (data to decode comes from a trusted source).
 *
 *  Note 1 : @return can be < targetOutputSize, if compressed block contains less data.
 *
 *  Note 2 : targetOutputSize must be <= dstCapacity
 *
 *  Note 3 : this function effectively stops decoding on reaching targetOutputSize,
 *           so dstCapacity is kind of redundant.
 *           This is because in older versions of this function,
 *           decoding operation would still write complete sequences.
 *           Therefore, there was no guarantee that it would stop writing at exactly targetOutputSize,
 *           it could write more bytes, though only up to dstCapacity.
 *           Some "margin" used to be required for this operation to work properly.
 *           Thankfully, this is no longer necessary.
 *           The function nonetheless keeps the same signature, in an effort to preserve API compatibility.
 *
 *  Note 4 : If srcSize is the exact size of the block,
 *           then targetOutputSize can be any value,
 *           including larger than the block's decompressed size.
 *           The function will, at most, generate block's decompressed size.
 *
 *  Note 5 : If srcSize is _larger_ than block's compressed size,
 *           then targetOutputSize **MUST** be <= block's decompressed size.
 *           Otherwise, *silent corruption will occur*.
 */
-LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity);
+int LZ4_decompress_fast (const char* source, char* dest, int originalSize);
 /*
 LZ4_decompress_safe_partial() :
    This function decompress a compressed block of size 'compressedSize' at position 'source'
    into destination buffer 'dest' of size 'maxDecompressedSize'.
    The function tries to stop decompressing operation as soon as 'targetOutputSize' has been reached,
    reducing decompression time.
    return : the number of bytes decoded in the destination buffer (necessarily <= maxDecompressedSize)
       Note : this number can be < 'targetOutputSize' should the compressed block to decode be smaller.
             Always control how many bytes were decoded.
             If the source stream is detected malformed, the function will stop decoding and return a negative result.
             This function never writes outside of output buffer, and never reads outside of input buffer. It is therefore protected against malicious data packets
 */
 int LZ4_decompress_safe_partial (const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize);
-/*-*********************************************
+/***********************************************
 *  Streaming Compression Functions
 ***********************************************/
-typedef union LZ4_stream_u LZ4_stream_t;  /* incomplete type (defined later) */
+#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4)
-
+#define LZ4_STREAMSIZE     (LZ4_STREAMSIZE_U64 * sizeof(long long))
-LZ4LIB_API LZ4_stream_t* LZ4_createStream(void);
+/*
-LZ4LIB_API int           LZ4_freeStream (LZ4_stream_t* streamPtr);
+ * LZ4_stream_t
-
+ * information structure to track an LZ4 stream.
-/*! LZ4_resetStream_fast() : v1.9.0+
+ * important : init this structure content before first use !
- *  Use this to prepare an LZ4_stream_t for a new chain of dependent blocks
+ * note : only allocated directly the structure if you are statically linking LZ4
- *  (e.g., LZ4_compress_fast_continue()).
+ *        If you are using liblz4 as a DLL, please use below construction methods instead.
 *
 *  An LZ4_stream_t must be initialized once before usage.
 *  This is automatically done when created by LZ4_createStream().
 *  However, should the LZ4_stream_t be simply declared on stack (for example),
 *  it's necessary to initialize it first, using LZ4_initStream().
 *
 *  After init, start any new stream with LZ4_resetStream_fast().
 *  A same LZ4_stream_t can be re-used multiple times consecutively
 *  and compress multiple streams,
 *  provided that it starts each new stream with LZ4_resetStream_fast().
 *
 *  LZ4_resetStream_fast() is much faster than LZ4_initStream(),
 *  but is not compatible with memory regions containing garbage data.
 *
 *  Note: it's only useful to call LZ4_resetStream_fast()
 *        in the context of streaming compression.
 *        The *extState* functions perform their own resets.
 *        Invoking LZ4_resetStream_fast() before is redundant, and even counterproductive.
 */
-LZ4LIB_API void LZ4_resetStream_fast (LZ4_stream_t* streamPtr);
+typedef struct { long long table[LZ4_STREAMSIZE_U64]; } LZ4_stream_t;
-/*! LZ4_loadDict() :
+/*
- *  Use this function to reference a static dictionary into LZ4_stream_t.
+ * LZ4_resetStream
- *  The dictionary must remain available during compression.
+ * Use this function to init an allocated LZ4_stream_t structure
 *  LZ4_loadDict() triggers a reset, so any previous data will be forgotten.
 *  The same dictionary will have to be loaded on decompression side for successful decoding.
 *  Dictionary are useful for better compression of small data (KB range).
 *  While LZ4 accept any input as dictionary,
 *  results are generally better when using Zstandard's Dictionary Builder.
 *  Loading a size of 0 is allowed, and is the same as reset.
 * @return : loaded dictionary size, in bytes (necessarily <= 64 KB)
 */
-LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize);
+void LZ4_resetStream (LZ4_stream_t* streamPtr);
-/*! LZ4_compress_fast_continue() :
+/*
- *  Compress 'src' content using data from previously compressed blocks, for better compression ratio.
+ * LZ4_createStream will allocate and initialize an LZ4_stream_t structure
 * LZ4_freeStream releases its memory.
 * In the context of a DLL (liblz4), please use these methods rather than the static struct.
 * They are more future proof, in case of a change of LZ4_stream_t size.
 */
 LZ4_stream_t* LZ4_createStream(void);
 int           LZ4_freeStream (LZ4_stream_t* streamPtr);
 /*
 * LZ4_loadDict
 * Use this function to load a static dictionary into LZ4_stream.
 * Any previous data will be forgotten, only 'dictionary' will remain in memory.
 * Loading a size of 0 is allowed.
 * Return : dictionary size, in bytes (necessarily <= 64 KB)
 */
 int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize);
 /*
 * LZ4_compress_fast_continue
 * Compress buffer content 'src', using data from previously compressed blocks as dictionary to improve compression ratio.
 * Important : Previous data blocks are assumed to still be present and unmodified !
 * 'dst' buffer must be already allocated.
- *  If dstCapacity >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster.
+ * If maxDstSize >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster.
- *
+ * If not, and if compressed data cannot fit into 'dst' buffer size, compression stops, and function returns a zero.
 * @return : size of compressed block
 *           or 0 if there is an error (typically, cannot fit into 'dst').
 *
 *  Note 1 : Each invocation to LZ4_compress_fast_continue() generates a new block.
 *           Each block has precise boundaries.
 *           Each block must be decompressed separately, calling LZ4_decompress_*() with relevant metadata.
 *           It's not possible to append blocks together and expect a single invocation of LZ4_decompress_*() to decompress them together.
 *
 *  Note 2 : The previous 64KB of source data is __assumed__ to remain present, unmodified, at same address in memory !
 *
 *  Note 3 : When input is structured as a double-buffer, each buffer can have any size, including < 64 KB.
 *           Make sure that buffers are separated, by at least one byte.
 *           This construction ensures that each block only depends on previous block.
 *
 *  Note 4 : If input buffer is a ring-buffer, it can have any size, including < 64 KB.
 *
 *  Note 5 : After an error, the stream status is undefined (invalid), it can only be reset or freed.
 */
-LZ4LIB_API int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
+int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int maxDstSize, int acceleration);
-/*! LZ4_saveDict() :
+/*
- *  If last 64KB data cannot be guaranteed to remain available at its current memory location,
+ * LZ4_saveDict
- *  save it into a safer place (char* safeBuffer).
+ * If previously compressed data block is not guaranteed to remain available at its memory location
- *  This is schematically equivalent to a memcpy() followed by LZ4_loadDict(),
+ * save it into a safer place (char* safeBuffer)
- *  but is much faster, because LZ4_saveDict() doesn't need to rebuild tables.
+ * Note : you don't need to call LZ4_loadDict() afterwards,
- * @return : saved dictionary size in bytes (necessarily <= maxDictSize), or 0 if error.
+ *        dictionary is immediately usable, you can therefore call LZ4_compress_fast_continue()
 * Return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error
 */
-LZ4LIB_API int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int maxDictSize);
+int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int dictSize);
-/*-**********************************************
+/************************************************
 *  Streaming Decompression Functions
 *  Bufferless synchronous API
 ************************************************/
 typedef union LZ4_streamDecode_u LZ4_streamDecode_t;   /* tracking context */
-/*! LZ4_createStreamDecode() and LZ4_freeStreamDecode() :
+#define LZ4_STREAMDECODESIZE_U64  4
 *  creation / destruction of streaming decompression tracking context.
 *  A tracking context can be re-used multiple times.
 */
 LZ4LIB_API LZ4_streamDecode_t* LZ4_createStreamDecode(void);
 LZ4LIB_API int                 LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream);
 /*! LZ4_setStreamDecode() :
 *  An LZ4_streamDecode_t context can be allocated once and re-used multiple times.
 *  Use this function to start decompression of a new stream of blocks.
 *  A dictionary can optionally be set. Use NULL or size 0 for a reset order.
 *  Dictionary is presumed stable : it must remain accessible and unmodified during next decompression.
 * @return : 1 if OK, 0 if error
 */
 LZ4LIB_API int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize);
 /*! LZ4_decoderRingBufferSize() : v1.8.2+
 *  Note : in a ring buffer scenario (optional),
 *  blocks are presumed decompressed next to each other
 *  up to the moment there is not enough remaining space for next block (remainingSize < maxBlockSize),
 *  at which stage it resumes from beginning of ring buffer.
 *  When setting such a ring buffer for streaming decompression,
 *  provides the minimum size of this ring buffer
 *  to be compatible with any source respecting maxBlockSize condition.
 * @return : minimum ring buffer size,
 *           or 0 if there is an error (invalid maxBlockSize).
 */
 LZ4LIB_API int LZ4_decoderRingBufferSize(int maxBlockSize);
 #define LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize) (65536 + 14 + (maxBlockSize))  /* for static allocation; maxBlockSize presumed valid */
 /*! LZ4_decompress_*_continue() :
 *  These decoding functions allow decompression of consecutive blocks in "streaming" mode.
 *  A block is an unsplittable entity, it must be presented entirely to a decompression function.
 *  Decompression functions only accepts one block at a time.
 *  The last 64KB of previously decoded data *must* remain available and unmodified at the memory position where they were decoded.
 *  If less than 64KB of data has been decoded, all the data must be present.
 *
 *  Special : if decompression side sets a ring buffer, it must respect one of the following conditions :
 *  - Decompression buffer size is _at least_ LZ4_decoderRingBufferSize(maxBlockSize).
 *    maxBlockSize is the maximum size of any single block. It can have any value > 16 bytes.
 *    In which case, encoding and decoding buffers do not need to be synchronized.
 *    Actually, data can be produced by any source compliant with LZ4 format specification, and respecting maxBlockSize.
 *  - Synchronized mode :
 *    Decompression buffer size is _exactly_ the same as compression buffer size,
 *    and follows exactly same update rule (block boundaries at same positions),
 *    and decoding function is provided with exact decompressed size of each block (exception for last block of the stream),
 *    _then_ decoding & encoding ring buffer can have any size, including small ones ( < 64 KB).
 *  - Decompression buffer is larger than encoding buffer, by a minimum of maxBlockSize more bytes.
 *    In which case, encoding and decoding buffers do not need to be synchronized,
 *    and encoding ring buffer can have any size, including small ones ( < 64 KB).
 *
 *  Whenever these conditions are not possible,
 *  save the last 64KB of decoded data into a safe buffer where it can't be modified during decompression,
 *  then indicate where this data is saved using LZ4_setStreamDecode(), before decompressing next block.
 */
 LZ4LIB_API int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int srcSize, int dstCapacity);
 /*! LZ4_decompress_*_usingDict() :
 *  These decoding functions work the same as
 *  a combination of LZ4_setStreamDecode() followed by LZ4_decompress_*_continue()
 *  They are stand-alone, and don't need an LZ4_streamDecode_t structure.
 *  Dictionary is presumed stable : it must remain accessible and unmodified during decompression.
 *  Performance tip : Decompression speed can be substantially increased
 *                    when dst == dictStart + dictSize.
 */
 LZ4LIB_API int LZ4_decompress_safe_usingDict (const char* src, char* dst, int srcSize, int dstCapcity, const char* dictStart, int dictSize);
 #endif /* LZ4_H_2983827168210 */
 /*^*************************************
 * !!!!!!   STATIC LINKING ONLY   !!!!!!
 ***************************************/
 /*-****************************************************************************
 * Experimental section
 *
 * Symbols declared in this section must be considered unstable. Their
 * signatures or semantics may change, or they may be removed altogether in the
 * future. They are therefore only safe to depend on when the caller is
 * statically linked against the library.
 *
 * To protect against unsafe usage, not only are the declarations guarded,
 * the definitions are hidden by default
 * when building LZ4 as a shared/dynamic library.
 *
 * In order to access these declarations,
 * define LZ4_STATIC_LINKING_ONLY in your application
 * before including LZ4's headers.
 *
 * In order to make their implementations accessible dynamically, you must
 * define LZ4_PUBLISH_STATIC_FUNCTIONS when building the LZ4 library.
 ******************************************************************************/
 #ifdef LZ4_STATIC_LINKING_ONLY
 #ifndef LZ4_STATIC_3504398509
 #define LZ4_STATIC_3504398509
 #ifdef LZ4_PUBLISH_STATIC_FUNCTIONS
 #define LZ4LIB_STATIC_API LZ4LIB_API
 #else
 #define LZ4LIB_STATIC_API
 #endif
 /*! LZ4_compress_fast_extState_fastReset() :
 *  A variant of LZ4_compress_fast_extState().
 *
 *  Using this variant avoids an expensive initialization step.
 *  It is only safe to call if the state buffer is known to be correctly initialized already
 *  (see above comment on LZ4_resetStream_fast() for a definition of "correctly initialized").
 *  From a high level, the difference is that
 *  this function initializes the provided state with a call to something like LZ4_resetStream_fast()
 *  while LZ4_compress_fast_extState() starts with a call to LZ4_resetStream().
 */
 LZ4LIB_STATIC_API int LZ4_compress_fast_extState_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
 /*! LZ4_attach_dictionary() :
 *  This is an experimental API that allows
 *  efficient use of a static dictionary many times.
 *
 *  Rather than re-loading the dictionary buffer into a working context before
 *  each compression, or copying a pre-loaded dictionary's LZ4_stream_t into a
 *  working LZ4_stream_t, this function introduces a no-copy setup mechanism,
 *  in which the working stream references the dictionary stream in-place.
 *
 *  Several assumptions are made about the state of the dictionary stream.
 *  Currently, only streams which have been prepared by LZ4_loadDict() should
 *  be expected to work.
 *
 *  Alternatively, the provided dictionaryStream may be NULL,
 *  in which case any existing dictionary stream is unset.
 *
 *  If a dictionary is provided, it replaces any pre-existing stream history.
 *  The dictionary contents are the only history that can be referenced and
 *  logically immediately precede the data compressed in the first subsequent
 *  compression call.
 *
 *  The dictionary will only remain attached to the working stream through the
 *  first compression call, at the end of which it is cleared. The dictionary
 *  stream (and source buffer) must remain in-place / accessible / unchanged
 *  through the completion of the first compression call on the stream.
 */
 LZ4LIB_STATIC_API void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream);
 /*! In-place compression and decompression
 *
 * It's possible to have input and output sharing the same buffer,
 * for highly contrained memory environments.
 * In both cases, it requires input to lay at the end of the buffer,
 * and decompression to start at beginning of the buffer.
 * Buffer size must feature some margin, hence be larger than final size.
 *
 * |<------------------------buffer--------------------------------->|
 *                             |<-----------compressed data--------->|
 * |<-----------decompressed size------------------>|
 *                                                  |<----margin---->|
 *
 * This technique is more useful for decompression,
 * since decompressed size is typically larger,
 * and margin is short.
 *
 * In-place decompression will work inside any buffer
 * which size is >= LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize).
 * This presumes that decompressedSize > compressedSize.
 * Otherwise, it means compression actually expanded data,
 * and it would be more efficient to store such data with a flag indicating it's not compressed.
 * This can happen when data is not compressible (already compressed, or encrypted).
 *
 * For in-place compression, margin is larger, as it must be able to cope with both
 * history preservation, requiring input data to remain unmodified up to LZ4_DISTANCE_MAX,
 * and data expansion, which can happen when input is not compressible.
 * As a consequence, buffer size requirements are much higher,
 * and memory savings offered by in-place compression are more limited.
 *
 * There are ways to limit this cost for compression :
 * - Reduce history size, by modifying LZ4_DISTANCE_MAX.
 *   Note that it is a compile-time constant, so all compressions will apply this limit.
 *   Lower values will reduce compression ratio, except when input_size < LZ4_DISTANCE_MAX,
 *   so it's a reasonable trick when inputs are known to be small.
 * - Require the compressor to deliver a "maximum compressed size".
 *   This is the `dstCapacity` parameter in `LZ4_compress*()`.
 *   When this size is < LZ4_COMPRESSBOUND(inputSize), then compression can fail,
 *   in which case, the return code will be 0 (zero).
 *   The caller must be ready for these cases to happen,
 *   and typically design a backup scheme to send data uncompressed.
 * The combination of both techniques can significantly reduce
 * the amount of margin required for in-place compression.
 *
 * In-place compression can work in any buffer
 * which size is >= (maxCompressedSize)
 * with maxCompressedSize == LZ4_COMPRESSBOUND(srcSize) for guaranteed compression success.
 * LZ4_COMPRESS_INPLACE_BUFFER_SIZE() depends on both maxCompressedSize and LZ4_DISTANCE_MAX,
 * so it's possible to reduce memory requirements by playing with them.
 */
 #define LZ4_DECOMPRESS_INPLACE_MARGIN(compressedSize)          (((compressedSize) >> 8) + 32)
 #define LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize)   ((decompressedSize) + LZ4_DECOMPRESS_INPLACE_MARGIN(decompressedSize))  /**< note: presumes that compressedSize < decompressedSize. note2: margin is overestimated a bit, since it could use compressedSize instead */
 #ifndef LZ4_DISTANCE_MAX   /* history window size; can be user-defined at compile time */
 #  define LZ4_DISTANCE_MAX 65535   /* set to maximum value by default */
 #endif
 #define LZ4_COMPRESS_INPLACE_MARGIN                           (LZ4_DISTANCE_MAX + 32)   /* LZ4_DISTANCE_MAX can be safely replaced by srcSize when it's smaller */
 #define LZ4_COMPRESS_INPLACE_BUFFER_SIZE(maxCompressedSize)   ((maxCompressedSize) + LZ4_COMPRESS_INPLACE_MARGIN)  /**< maxCompressedSize is generally LZ4_COMPRESSBOUND(inputSize), but can be set to any lower value, with the risk that compression can fail (return code 0(zero)) */
 #endif   /* LZ4_STATIC_3504398509 */
 #endif   /* LZ4_STATIC_LINKING_ONLY */
 #ifndef LZ4_H_98237428734687
 #define LZ4_H_98237428734687
 /*-************************************************************
 *  Private Definitions
 **************************************************************
 * Do not use these definitions directly.
 * They are only exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`.
 * Accessing members will expose user code to API and/or ABI break in future versions of the library.
 **************************************************************/
 #define LZ4_HASHLOG   (LZ4_MEMORY_USAGE-2)
 #define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
 #define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG)       /* required as macro for static allocation */
 #if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
 # include <stdint.h>
  typedef  int8_t  LZ4_i8;
  typedef uint8_t  LZ4_byte;
  typedef uint16_t LZ4_u16;
  typedef uint32_t LZ4_u32;
 #else
  typedef   signed char  LZ4_i8;
  typedef unsigned char  LZ4_byte;
  typedef unsigned short LZ4_u16;
  typedef unsigned int   LZ4_u32;
 #endif
 typedef struct LZ4_stream_t_internal LZ4_stream_t_internal;
 struct LZ4_stream_t_internal {
    LZ4_u32 hashTable[LZ4_HASH_SIZE_U32];
    LZ4_u32 currentOffset;
    LZ4_u32 tableType;
    const LZ4_byte* dictionary;
    const LZ4_stream_t_internal* dictCtx;
    LZ4_u32 dictSize;
 };
 typedef struct {
    const LZ4_byte* externalDict;
    size_t extDictSize;
    const LZ4_byte* prefixEnd;
    size_t prefixSize;
 } LZ4_streamDecode_t_internal;
 /*! LZ4_stream_t :
 *  Do not use below internal definitions directly !
 *  Declare or allocate an LZ4_stream_t instead.
 *  LZ4_stream_t can also be created using LZ4_createStream(), which is recommended.
 *  The structure definition can be convenient for static allocation
 *  (on stack, or as part of larger structure).
 *  Init this structure with LZ4_initStream() before first use.
 *  note : only use this definition in association with static linking !
 *  this definition is not API/ABI safe, and may change in future versions.
 */
 #define LZ4_STREAMSIZE       16416  /* static size, for inter-version compatibility */
 #define LZ4_STREAMSIZE_VOIDP (LZ4_STREAMSIZE / sizeof(void*))
 union LZ4_stream_u {
    void* table[LZ4_STREAMSIZE_VOIDP];
    LZ4_stream_t_internal internal_donotuse;
 }; /* previously typedef'd to LZ4_stream_t */
 /*! LZ4_initStream() : v1.9.0+
 *  An LZ4_stream_t structure must be initialized at least once.
 *  This is automatically done when invoking LZ4_createStream(),
 *  but it's not when the structure is simply declared on stack (for example).
 *
 *  Use LZ4_initStream() to properly initialize a newly declared LZ4_stream_t.
 *  It can also initialize any arbitrary buffer of sufficient size,
 *  and will @return a pointer of proper type upon initialization.
 *
 *  Note : initialization fails if size and alignment conditions are not respected.
 *         In which case, the function will @return NULL.
 *  Note2: An LZ4_stream_t structure guarantees correct alignment and size.
 *  Note3: Before v1.9.0, use LZ4_resetStream() instead
 */
 LZ4LIB_API LZ4_stream_t* LZ4_initStream (void* buffer, size_t size);
 /*! LZ4_streamDecode_t :
 *  information structure to track an LZ4 stream during decompression.
 *  init this structure  using LZ4_setStreamDecode() before first use.
 *  note : only use in association with static linking !
 *         this definition is not API/ABI safe,
 *         and may change in a future version !
 */
 #define LZ4_STREAMDECODESIZE_U64 (4 + ((sizeof(void*)==16) ? 2 : 0) /*AS-400*/ )
 #define LZ4_STREAMDECODESIZE     (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long))
-union LZ4_streamDecode_u {
+typedef struct { unsigned long long table[LZ4_STREAMDECODESIZE_U64]; } LZ4_streamDecode_t;
-    unsigned long long table[LZ4_STREAMDECODESIZE_U64];
+/*
-    LZ4_streamDecode_t_internal internal_donotuse;
+ * LZ4_streamDecode_t
-} ;   /* previously typedef'd to LZ4_streamDecode_t */
+ * information structure to track an LZ4 stream.
 * init this structure content using LZ4_setStreamDecode or memset() before first use !
 *
 * In the context of a DLL (liblz4) please prefer usage of construction methods below.
 * They are more future proof, in case of a change of LZ4_streamDecode_t size in the future.
 * LZ4_createStreamDecode will allocate and initialize an LZ4_streamDecode_t structure
 * LZ4_freeStreamDecode releases its memory.
 */
 LZ4_streamDecode_t* LZ4_createStreamDecode(void);
 int                 LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream);
 /*
 * LZ4_setStreamDecode
 * Use this function to instruct where to find the dictionary.
 * Setting a size of 0 is allowed (same effect as reset).
 * Return : 1 if OK, 0 if error
 */
 int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize);
 /*
 *_continue() :
    These decoding functions allow decompression of multiple blocks in "streaming" mode.
    Previously decoded blocks *must* remain available at the memory position where they were decoded (up to 64 KB)
    In the case of a ring buffers, decoding buffer must be either :
    - Exactly same size as encoding buffer, with same update rule (block boundaries at same positions)
      In which case, the decoding & encoding ring buffer can have any size, including very small ones ( < 64 KB).
    - Larger than encoding buffer, by a minimum of maxBlockSize more bytes.
      maxBlockSize is implementation dependent. It's the maximum size you intend to compress into a single block.
      In which case, encoding and decoding buffers do not need to be synchronized,
      and encoding ring buffer can have any size, including small ones ( < 64 KB).
    - _At least_ 64 KB + 8 bytes + maxBlockSize.
      In which case, encoding and decoding buffers do not need to be synchronized,
      and encoding ring buffer can have any size, including larger than decoding buffer.
    Whenever these conditions are not possible, save the last 64KB of decoded data into a safe buffer,
    and indicate where it is saved using LZ4_setStreamDecode()
 */
 int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxDecompressedSize);
 int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize);
 /*
 Advanced decoding functions :
 *_usingDict() :
    These decoding functions work the same as
    a combination of LZ4_setStreamDecode() followed by LZ4_decompress_x_continue()
    They are stand-alone. They don't need nor update an LZ4_streamDecode_t structure.
 */
 int LZ4_decompress_safe_usingDict (const char* source, char* dest, int compressedSize, int maxDecompressedSize, const char* dictStart, int dictSize);
 int LZ4_decompress_fast_usingDict (const char* source, char* dest, int originalSize, const char* dictStart, int dictSize);
-/*-************************************
+/**************************************
 *  Obsolete Functions
 **************************************/
-
+/* Deprecate Warnings */
-/*! Deprecation warnings
+/* Should these warnings messages be a problem,
- *
+   it is generally possible to disable them,
- *  Deprecated functions make the compiler generate a warning when invoked.
+   with -Wno-deprecated-declarations for gcc
- *  This is meant to invite users to update their source code.
+   or _CRT_SECURE_NO_WARNINGS in Visual for example.
- *  Should deprecation warnings be a problem, it is generally possible to disable them,
+   You can also define LZ4_DEPRECATE_WARNING_DEFBLOCK. */
- *  typically with -Wno-deprecated-declarations for gcc
+#ifndef LZ4_DEPRECATE_WARNING_DEFBLOCK
- *  or _CRT_SECURE_NO_WARNINGS in Visual.
+#  define LZ4_DEPRECATE_WARNING_DEFBLOCK
- *
+#  define LZ4_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
- *  Another method is to define LZ4_DISABLE_DEPRECATE_WARNINGS
+#  if (LZ4_GCC_VERSION >= 405) || defined(__clang__)
- *  before including the header file.
+#    define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
- */
+#  elif (LZ4_GCC_VERSION >= 301)
-#ifdef LZ4_DISABLE_DEPRECATE_WARNINGS
+#    define LZ4_DEPRECATED(message) __attribute__((deprecated))
 #  define LZ4_DEPRECATED(message)   /* disable deprecation warnings */
 #else
 #  if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
 #    define LZ4_DEPRECATED(message) [[deprecated(message)]]
 #  elif defined(_MSC_VER)
 #    define LZ4_DEPRECATED(message) __declspec(deprecated(message))
 #  elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 45))
 #    define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
 #  elif defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 31)
 #    define LZ4_DEPRECATED(message) __attribute__((deprecated))
 #  else
-#    pragma message("WARNING: LZ4_DEPRECATED needs custom implementation for this compiler")
+#    pragma message("WARNING: You need to implement LZ4_DEPRECATED for this compiler")
-#    define LZ4_DEPRECATED(message)   /* disabled */
+#    define LZ4_DEPRECATED(message)
 #  endif
-#endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */
+#endif /* LZ4_DEPRECATE_WARNING_DEFBLOCK */
-/*! Obsolete compression functions (since v1.7.3) */
+/* Obsolete compression functions */
-LZ4_DEPRECATED("use LZ4_compress_default() instead")       LZ4LIB_API int LZ4_compress               (const char* src, char* dest, int srcSize);
+/* These functions are planned to start generate warnings by r131 approximately */
-LZ4_DEPRECATED("use LZ4_compress_default() instead")       LZ4LIB_API int LZ4_compress_limitedOutput (const char* src, char* dest, int srcSize, int maxOutputSize);
+int LZ4_compress               (const char* source, char* dest, int sourceSize);
-LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_withState               (void* state, const char* source, char* dest, int inputSize);
+int LZ4_compress_limitedOutput (const char* source, char* dest, int sourceSize, int maxOutputSize);
-LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
+int LZ4_compress_withState               (void* state, const char* source, char* dest, int inputSize);
-LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_continue                (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize);
+int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
-LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_limitedOutput_continue  (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
+int LZ4_compress_continue                (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize);
 int LZ4_compress_limitedOutput_continue  (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
-/*! Obsolete decompression functions (since v1.8.0) */
+/* Obsolete decompression functions */
-LZ4_DEPRECATED("use LZ4_decompress_fast() instead") LZ4LIB_API int LZ4_uncompress (const char* source, char* dest, int outputSize);
+/* These function names are completely deprecated and must no longer be used.
-LZ4_DEPRECATED("use LZ4_decompress_safe() instead") LZ4LIB_API int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize);
+   They are only provided here for compatibility with older programs.
    - LZ4_uncompress is the same as LZ4_decompress_fast
    - LZ4_uncompress_unknownOutputSize is the same as LZ4_decompress_safe
   These function prototypes are now disabled; uncomment them only if you really need them.
   It is highly recommended to stop using these prototypes and migrate to maintained ones */
 /* int LZ4_uncompress (const char* source, char* dest, int outputSize); */
 /* int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize); */
-/* Obsolete streaming functions (since v1.7.0)
+/* Obsolete streaming functions; use new streaming interface whenever possible */
- * degraded functionality; do not use!
+LZ4_DEPRECATED("use LZ4_createStream() instead") void* LZ4_create (char* inputBuffer);
- *
+LZ4_DEPRECATED("use LZ4_createStream() instead") int   LZ4_sizeofStreamState(void);
- * In order to perform streaming compression, these functions depended on data
+LZ4_DEPRECATED("use LZ4_resetStream() instead")  int   LZ4_resetStreamState(void* state, char* inputBuffer);
- * that is no longer tracked in the state. They have been preserved as well as
+LZ4_DEPRECATED("use LZ4_saveDict() instead")     char* LZ4_slideInputBuffer (void* state);
 * possible: using them will still produce a correct output. However, they don't
 * actually retain any history between compression calls. The compression ratio
 * achieved will therefore be no better than compressing each chunk
 * independently.
 */
 LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API void* LZ4_create (char* inputBuffer);
 LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API int   LZ4_sizeofStreamState(void);
 LZ4_DEPRECATED("Use LZ4_resetStream() instead")  LZ4LIB_API int   LZ4_resetStreamState(void* state, char* inputBuffer);
 LZ4_DEPRECATED("Use LZ4_saveDict() instead")     LZ4LIB_API char* LZ4_slideInputBuffer (void* state);
-/*! Obsolete streaming decoding functions (since v1.7.0) */
+/* Obsolete streaming decoding functions */
-LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") LZ4LIB_API int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize);
+LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize);
-LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") LZ4LIB_API int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize);
+LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize);
 /*! Obsolete LZ4_decompress_fast variants (since v1.9.0) :
 *  These functions used to be faster than LZ4_decompress_safe(),
 *  but this is no longer the case. They are now slower.
 *  This is because LZ4_decompress_fast() doesn't know the input size,
 *  and therefore must progress more cautiously into the input buffer to not read beyond the end of block.
 *  On top of that `LZ4_decompress_fast()` is not protected vs malformed or malicious inputs, making it a security liability.
 *  As a consequence, LZ4_decompress_fast() is strongly discouraged, and deprecated.
 *
 *  The last remaining LZ4_decompress_fast() specificity is that
 *  it can decompress a block without knowing its compressed size.
 *  Such functionality can be achieved in a more secure manner
 *  by employing LZ4_decompress_safe_partial().
 *
 *  Parameters:
 *  originalSize : is the uncompressed size to regenerate.
 *                 `dst` must be already allocated, its size must be >= 'originalSize' bytes.
 * @return : number of bytes read from source buffer (== compressed size).
 *           The function expects to finish at block's end exactly.
 *           If the source stream is detected malformed, the function stops decoding and returns a negative result.
 *  note : LZ4_decompress_fast*() requires originalSize. Thanks to this information, it never writes past the output buffer.
 *         However, since it doesn't know its 'src' size, it may read an unknown amount of input, past input buffer bounds.
 *         Also, since match offsets are not validated, match reads from 'src' may underflow too.
 *         These issues never happen if input (compressed) data is correct.
 *         But they may happen if input data is invalid (error or intentional tampering).
 *         As a consequence, use these functions in trusted environments with trusted data **only**.
 */
 LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe() instead")
 LZ4LIB_API int LZ4_decompress_fast (const char* src, char* dst, int originalSize);
 LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_continue() instead")
 LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int originalSize);
 LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_usingDict() instead")
 LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize, const char* dictStart, int dictSize);
 /*! LZ4_resetStream() :
 *  An LZ4_stream_t structure must be initialized at least once.
 *  This is done with LZ4_initStream(), or LZ4_resetStream().
 *  Consider switching to LZ4_initStream(),
 *  invoking LZ4_resetStream() will trigger deprecation warnings in the future.
 */
 LZ4LIB_API void LZ4_resetStream (LZ4_stream_t* streamPtr);
 #endif /* LZ4_H_98237428734687 */
 #if defined (__cplusplus)
@@ -3,11 +3,12 @@
 * Author: Charles Mita
 */
 #include <hdf5.h>
 #include <stdio.h>
 #include <stdio.h>
 #include <hdf5.h>
 #include "err.h"
 struct error_stack_t {
 	char **files;
 	char **funcs;
@@ -29,10 +30,9 @@ static char *messages[ERR_MAX_STACK_SIZE] = {0};
 static struct error_stack_t stack = {files, funcs, lines, errors, messages, 0};
-void push_error_stack(const char *file, const char *func, int line, int err,
+
-                      const char *message) {
+void push_error_stack(const char *file, const char *func, int line, int err, const char *message) {
-  if (stack.size >= ERR_MAX_STACK_SIZE)
+	if (stack.size >= ERR_MAX_STACK_SIZE) return; /* unfortunate */
    return; /* unfortunate */
 	int idx = stack.size;
 	/* subtract 1 to ensure room for null byte in buffer */
@@ -45,11 +45,10 @@ void push_error_stack(const char *file, const char *func, int line, int err,
 	stack.size++;
 }
-herr_t h5e_walk_callback(unsigned int n, const struct H5E_error2_t *err,
+
-                         void *client_data) {
+herr_t h5e_walk_callback(unsigned int n, const struct H5E_error2_t *err, void *client_data) {
 	herr_t retval = 0;
-  /* only read the message for the innermost stack frame - the rest are just
+	/* only read the message for the innermost stack frame - the rest are just noise */
   * noise */
 	if (n == 0) {
 		char message[ERR_MAX_MESSAGE_LENGTH] = {0};
 		sprintf(message, "%.*s", ERR_MAX_MESSAGE_LENGTH - 1, err->desc);
@@ -60,6 +59,7 @@ herr_t h5e_walk_callback(unsigned int n, const struct H5E_error2_t *err,
 	return retval;
 }
 int h5e_error_callback(hid_t stack_id, void *client_data) {
 	int retval = 0;
 	herr_t err = 0;
@@ -71,29 +71,30 @@ done:
 	return retval;
 }
 void reset_error_stack() {
 	stack.size = 0;
 	H5Eclear2(H5E_DEFAULT); /* almost certainly unnecessary */
 }
 void dump_error_stack(FILE *out) {
 	int idx = stack.size;
-  if (idx > 0)
+	if (idx > 0) fprintf(out, "Durin plugin error:\n");
    fprintf(out, "Durin plugin error:\n");
 	while (idx-- > 0) {
 		const char *file = stack.files[idx];
 		const char *func = stack.funcs[idx];
 		const char *message = stack.messages[idx];
 		const int line = stack.lines[idx];
 		if (message[0] != '\0') {
-      fprintf(out, "\t%s - line %d in %s:\n\t\t%s\n", file, line, func,
+			fprintf(out, "\t%s - line %d in %s:\n\t\t%s\n", file, line, func, message);
              message);
 		} else {
 			fprintf(out, "\t%s - line %d in %s\n", file, line, func);
 		}
 	}
 }
 int init_h5_error_handling() {
 	int retval = 0;
 	hid_t err = 0;
@@ -3,14 +3,17 @@
 * Author: Charles Mita
 */
 #ifndef NXS_XDS_ERR_H
 #define NXS_XDS_ERR_H
 #define ERR_MAX_FILENAME_LENGTH 64
 #define ERR_MAX_FUNCNAME_LENGTH 128
 #define ERR_MAX_MESSAGE_LENGTH 1024
 #define ERR_MAX_STACK_SIZE 128
 /* obtain __func__ from GCC if no C99 */
 #if __STDC_VERSION__ < 199901L
 # if __GNUC__ >= 2
@@ -39,8 +42,7 @@
 	goto target; \
 }
-void push_error_stack(const char *file, const char *func, int line, int err,
+void push_error_stack(const char *file, const char *func, int line, int err, const char *message);
                      const char *message);
 void dump_error_stack(FILE *out);
@@ -3,16 +3,18 @@
 * Author: Charles Mita
 */
 #include <hdf5.h>
 #include <hdf5_hl.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <strings.h>
 #include "err.h"
 #include "file.h"
 #include "err.h"
 #include "filters.h"
 void clear_det_visit_objects(struct det_visit_objects_t *objects) {
 	if (objects->nxdata) {
 		H5Oclose(objects->nxdata);
@@ -24,13 +26,16 @@ void clear_det_visit_objects(struct det_visit_objects_t *objects) {
 	}
 }
 void free_ds_desc(struct ds_desc_t *desc) {
 	H5Gclose(desc->det_g_id);
 	H5Gclose(desc->data_g_id);
 	free(desc);
 }
-void free_nxs_desc(struct ds_desc_t *desc) { free_ds_desc(desc); }
+void free_nxs_desc(struct ds_desc_t *desc) {
 	free_ds_desc(desc);
 }
 void free_eiger_desc(struct ds_desc_t *desc) {
 	struct eiger_ds_desc_t *e_desc = (struct eiger_ds_desc_t *) desc;
@@ -38,7 +43,10 @@ void free_eiger_desc(struct ds_desc_t *desc) {
 	free_ds_desc(desc);
 }
-void free_opt_eiger_desc(struct ds_desc_t *desc) { free_eiger_desc(desc); }
+void free_opt_eiger_desc(struct ds_desc_t *desc) {
 	free_eiger_desc(desc);
 }
 double scale_from_units(const char* unit_string) {
 	if (strcasecmp("m", unit_string) == 0 ||
@@ -69,8 +77,7 @@ int get_nxs_dataset_dims(struct ds_desc_t *desc) {
 	int retval = 0;
 	int ndims = 0;
 	int width = 0;
-  g_id = desc->data_g_id;
+	g_id = desc->data_g_id;;
  ;
 	ds_id = H5Dopen2(g_id, "data", H5P_DEFAULT);
 	if (ds_id <= 0) {
@@ -103,13 +110,7 @@ int get_nxs_dataset_dims(struct ds_desc_t *desc) {
 		ERROR_JUMP(-1, close_space, "Error getting dataset dimensions");
 	}
  if ( H5Tequal(t_id,H5T_NATIVE_CHAR)>0 || H5Tequal(t_id,H5T_NATIVE_INT)>0 || H5Tequal(t_id,H5T_NATIVE_SHORT)>0 || H5Tequal(t_id,H5T_NATIVE_LONG)>0 || H5Tequal(t_id,H5T_NATIVE_LLONG)>0 ) {
    // signed
    desc->data_width = -width;
  } else {
    // unsigned
 	desc->data_width = width;
  }
 close_space:
 	H5Sclose(s_id);
@@ -121,8 +122,10 @@ done:
 	return retval;
 }
-int get_frame_simple(const struct ds_desc_t *desc, const char *name,
+int get_frame_simple(const struct ds_desc_t *desc,
-                     const hsize_t *frame_idx, const hsize_t *frame_size,
+		const char *name,
 		const hsize_t *frame_idx,
 		const hsize_t *frame_size,
 		void *buffer) {
 	int retval = 0;
@@ -145,8 +148,7 @@ int get_frame_simple(const struct ds_desc_t *desc, const char *name,
 	if (t_id <= 0) {
 		ERROR_JUMP(-1, close_type, "Error retrieving datatype");
 	}
-  err = H5Sselect_hyperslab(s_id, H5S_SELECT_SET, frame_idx, NULL, frame_size,
+	err = H5Sselect_hyperslab(s_id, H5S_SELECT_SET, frame_idx, NULL, frame_size, NULL);
                            NULL);
 	if (err < 0) {
 		ERROR_JUMP(-1, close_space, "Error seleting hyperslab");
 	}
@@ -172,8 +174,11 @@ done:
 	return retval;
 }
-int get_frame_from_chunk(const struct ds_desc_t *desc, const char *ds_name,
+
-                         const hsize_t *frame_idx, const hsize_t *frame_size,
+int get_frame_from_chunk(const struct ds_desc_t *desc,
 		const char *ds_name,
 		const hsize_t *frame_idx,
 		const hsize_t *frame_size,
 		void *buffer) {
 	hid_t d_id = 0;
@@ -181,14 +186,12 @@ int get_frame_from_chunk(const struct ds_desc_t *desc, const char *ds_name,
 	uint32_t c_filter_mask = 0;
 	hsize_t c_bytes;
 	void *c_buffer = NULL;
-  const struct opt_eiger_ds_desc_t *o_eiger_desc =
+	const struct opt_eiger_ds_desc_t *o_eiger_desc = (struct opt_eiger_ds_desc_t *) desc;
      (struct opt_eiger_ds_desc_t *)desc;
 	int retval = 0;
 	if (frame_idx[1] != 0 || frame_idx[2] != 0) {
 		char message[64];
-    sprintf(message,
+		sprintf(message, "Require frame selection starts at [n, 0, 0], not [n, %llu, %llu]",
            "Require frame selection starts at [n, 0, 0], not [n, %llu, %llu]",
 				frame_idx[1], frame_idx[2]);
 		ERROR_JUMP(-1, done, message);
 	}
@@ -200,16 +203,15 @@ int get_frame_from_chunk(const struct ds_desc_t *desc, const char *ds_name,
 		ERROR_JUMP(-1, done, message);
 	}
 	if (H5Dget_chunk_storage_size(d_id, c_offset, &c_bytes) < 0) {
 		char message[96];
-    sprintf(message, "Error reading chunk size from %.32s for frame %llu",
+		sprintf(message, "Error reading chunk size from %.32s for frame %llu", ds_name, frame_idx[0]);
            ds_name, frame_idx[0]);
 		ERROR_JUMP(-1, done, message);
 	}
 	if (c_bytes == 0) {
 		char message[96];
-    sprintf(message, "Target chunk %llu has zero size for dataset %.32s",
+		sprintf(message, "Target chunk %llu has zero size for dataset %.32s", frame_idx[0], ds_name);
            frame_idx[0], ds_name);
 		ERROR_JUMP(-1, done, message);
 	}
@@ -217,9 +219,7 @@ int get_frame_from_chunk(const struct ds_desc_t *desc, const char *ds_name,
 		c_buffer = malloc(c_bytes);
 		if (!c_buffer) {
 			char message[128];
-      sprintf(message,
+			sprintf(message, "Unable to allocate chunk buffer for dataset %.32s - frame %llu, size %llu bytes",
              "Unable to allocate chunk buffer for dataset %.32s - frame %llu, "
              "size %llu bytes",
 					ds_name, frame_idx[0], c_bytes);
 			ERROR_JUMP(-1, done, message);
 		}
@@ -227,46 +227,46 @@ int get_frame_from_chunk(const struct ds_desc_t *desc, const char *ds_name,
 		c_buffer = buffer;
 	}
-  if (H5Dread_chunk(d_id, H5P_DEFAULT, c_offset, &c_filter_mask, c_buffer) <
+	if (H5DOread_chunk(d_id, H5P_DEFAULT, c_offset, &c_filter_mask, c_buffer) < 0) {
      0) {
 		char message[128];
-    sprintf(message,
+		sprintf(message, "Error reading chunk %llu from dataset %.32s - size %llu bytes",
            "Error reading chunk %llu from dataset %.32s - size %llu bytes",
 				frame_idx[0], ds_name, c_bytes);
 		ERROR_JUMP(-1, done, message);
 	}
 	if (o_eiger_desc->bs_applied) {
-    if (bslz4_decompress(o_eiger_desc->bs_params, c_bytes, c_buffer,
+		if (bslz4_decompress(
-                         abs(desc->data_width) * frame_size[1] * frame_size[2],
+					o_eiger_desc->bs_params,
 					c_bytes,
 					c_buffer,
 					desc->data_width * frame_size[1] * frame_size[2],
 					buffer) < 0) {
 			char message[128];
-      sprintf(message,
+			sprintf(message, "Error processing chunk %llu from %.32s with bitshuffle_lz4",
              "Error processing chunk %llu from %.32s with bitshuffle_lz4",
 					frame_idx[0], ds_name);
 			ERROR_JUMP(-1, done, message);
 		}
 	}
 done:
-  if (c_buffer && (c_buffer != buffer))
+	if (c_buffer && (c_buffer != buffer)) free(c_buffer);
-    free(c_buffer);
+	if (d_id) H5Dclose(d_id);
  if (d_id)
    H5Dclose(d_id);
 	return retval;
 }
-int get_nxs_frame(const struct ds_desc_t *desc, const int nin, void *buffer) {
+
 int get_nxs_frame(
 		const struct ds_desc_t *desc,
 		const int n,
 		void *buffer) {
 	/* detector data are the two inner most indices */
 	/* TODO: handle ndims > 3 and select appropriately */
 	int retval = 0;
  int n = nin - desc->image_number_offset;
 	hsize_t frame_idx[3] = {n, 0, 0};
 	hsize_t frame_size[3] = {1, desc->dims[1], desc->dims[2]};
 	if (n < 0 || n >= desc->dims[0]) {
 		char message[64];
-    sprintf(message, "Selected frame %d is out of range valid range [0, %d]", n,
+		sprintf(message, "Selected frame %d is out of range valid range [0, %d]", n, (int) desc->dims[0] - 1);
            (int)desc->dims[0] - 1);
 		ERROR_JUMP(-1, done, message);
 	}
 	retval = get_frame_simple(desc, "data", frame_idx, frame_size, buffer);
@@ -277,10 +277,13 @@ done:
 	return retval;
 }
-int get_dectris_eiger_frame(const struct ds_desc_t *desc, int nin, void *buffer) {
+
 int get_dectris_eiger_frame(
 		const struct ds_desc_t *desc,
 		int n,
 		void *buffer) {
 	int retval = 0;
  int n = nin - desc->image_number_offset;
 	int block, frame_count, idx;
 	struct eiger_ds_desc_t *eiger_desc = (struct eiger_ds_desc_t*) desc;
 	char data_name[16] = {0};
@@ -289,22 +292,18 @@ int get_dectris_eiger_frame(const struct ds_desc_t *desc, int nin, void *buffer)
 	if (n < 0 || n >= desc->dims[0]) {
 		char message[64];
-    sprintf(message, "Selected frame %d is out of range valid range [0, %d]", n,
+		sprintf(message, "Selected frame %d is out of range valid range [0, %d]", n, (int) desc->dims[0] - 1);
            (int)desc->dims[0] - 1);
 		ERROR_JUMP(-1, done, message);
 	}
 	/* determine the relevant data block */
 	frame_count = 0;
 	block = 0;
-  while ((frame_count += eiger_desc->block_sizes[block]) <= n)
+	while ((frame_count += eiger_desc->block_sizes[block]) <= n) block++;
-    block++;
+	idx = n - (frame_count - eiger_desc->block_sizes[block]); /* index in current block */
  idx = n - (frame_count -
             eiger_desc->block_sizes[block]); /* index in current block */
 	frame_idx[0] = idx;
 	sprintf(data_name, "data_%06d", block + 1);
-  retval =
+	retval = eiger_desc->frame_func(desc, data_name, frame_idx, frame_size, buffer);
      eiger_desc->frame_func(desc, data_name, frame_idx, frame_size, buffer);
 	if (retval < 0) {
 		ERROR_JUMP(retval, done, "");
 	}
@@ -312,6 +311,7 @@ done:
 	return retval;
 }
 int get_dectris_eiger_dataset_dims(struct ds_desc_t *desc) {
 	int retval = 0;
 	int n_datas = 0;
@@ -323,8 +323,7 @@ int get_dectris_eiger_dataset_dims(struct ds_desc_t *desc) {
 	hsize_t dims[3] = {0};
 	struct eiger_ds_desc_t *eiger_desc = (struct eiger_ds_desc_t*) desc;
-  /* datasets are "data_%06d % n" - need to determine how many of these there
+	/* datasets are "data_%06d % n" - need to determine how many of these there are and what the ranges are */
   * are and what the ranges are */
 	sprintf(ds_name, "data_%06d", n_datas + 1);
 	while (H5Lexists(desc->data_g_id, ds_name, H5P_DEFAULT) > 0) {
@@ -340,7 +339,7 @@ int get_dectris_eiger_dataset_dims(struct ds_desc_t *desc) {
 		ds_id = H5Dopen2(desc->data_g_id, ds_name, H5P_DEFAULT);
 		if (ds_id < 0) {
 			char message[64];
-      sprintf(message, "Unable to open dataset %.16s", ds_name);
+			sprintf("Unable to open dataset %.16s", ds_name);
 			ERROR_JUMP(-1, loop_end, message);
 		}
 		t_id = H5Dget_type(ds_id);
@@ -356,16 +355,11 @@ int get_dectris_eiger_dataset_dims(struct ds_desc_t *desc) {
 		if (data_width <= 0) {
 			ERROR_JUMP(-1, close_space, "Unable to get type size");
 		}
    if ( H5Tequal(t_id,H5T_NATIVE_CHAR)>0 || H5Tequal(t_id,H5T_NATIVE_INT)>0 || H5Tequal(t_id,H5T_NATIVE_SHORT)>0 || H5Tequal(t_id,H5T_NATIVE_LONG)>0 || H5Tequal(t_id,H5T_NATIVE_LLONG)>0 ) {
      // signed
      data_width = -data_width;
    }
 		ndims = H5Sget_simple_extent_ndims(s_id);
 		if (ndims != 3) {
 			char message[64];
-      sprintf(message, "Dataset %.16s has rank %d, expected %d", ds_name, ndims,
+			sprintf(message, "Dataset %.16s has rank %d, expected %d", ds_name, ndims, 3);
              3);
 			ERROR_JUMP(-1, close_space, message);
 		}
 		if (H5Sget_simple_extent_dims(s_id, block_dims, NULL) < 0) {
@@ -385,8 +379,7 @@ int get_dectris_eiger_dataset_dims(struct ds_desc_t *desc) {
 close_dataset:
 		H5Dclose(ds_id);
 loop_end:
-    if (retval < 0)
+		if (retval < 0) break;
      break;
 	}
 	if (retval < 0) {
@@ -400,6 +393,7 @@ int get_dectris_eiger_dataset_dims(struct ds_desc_t *desc) {
 	return retval;
 }
 int read_pixel_info(hid_t g_id, const char *path, double *size) {
 	/*
 	 * NXdetector allows pixel size to be an array (for varied pixel size),
@@ -407,8 +401,7 @@ int read_pixel_info(hid_t g_id, const char *path, double *size) {
 	 * TODO: handle array case (return first value maybe?)
 	 */
-  /* read the scalar dataset value and scale according to the unit in the
+	/* read the scalar dataset value and scale according to the unit in the attribute */
   * attribute */
 	/* returned value is in metres */
 	int retval = 0;
 	herr_t err = 0;
@@ -417,15 +410,14 @@ int read_pixel_info(hid_t g_id, const char *path, double *size) {
 	ds_id = H5Dopen2(g_id, path, H5P_DEFAULT);
 	if (ds_id < 0) {
 		char message[64];
-    sprintf(message, "Error opening dataset %.32s", path);
+		sprintf("Error opening dataset %.32s", path);
 		ERROR_JUMP(-1, done, message);
 	}
-  err =
+	err = H5Dread(ds_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &value);
      H5Dread(ds_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &value);
 	if (err < 0) {
 		char message[64];
-    sprintf(message, "Error reading dataset %.32s", path);
+		sprintf("Error reading dataset %.32s", path);
 		ERROR_JUMP(-1, close_dataset, message);
 	}
@@ -437,10 +429,8 @@ int read_pixel_info(hid_t g_id, const char *path, double *size) {
 		double scale = 1;
 		a_id = H5Aopen(ds_id, "units", H5P_DEFAULT);
 		if (a_id < 0) {
-      char message[100];
+			char message[64];
-      sprintf(message,
+			sprintf("Error openeing units attribute for %.32s after existence check", path);
              "Error openeing units attribute for %.32s after existence check",
              path);
 			ERROR_JUMP(-1, close_dataset, message);
 		}
@@ -454,13 +444,11 @@ int read_pixel_info(hid_t g_id, const char *path, double *size) {
 			str_buffer = malloc(sizeof(char*));
 		} else {
 			str_size = H5Tget_size(t_id);
-      /* do not assume room has been left for null-byte in fixed length string
+			/* do not assume room has been left for null-byte in fixed length string */
       */
 			str_buffer = malloc(str_size + 1);
 		}
 		if (str_buffer == NULL) {
-      ERROR_JUMP(-1, close_datatype,
+			ERROR_JUMP(-1, close_datatype, "Unable to allocate space for variable length string");
                 "Unable to allocate space for variable length string");
 		}
 		mt_id = H5Tcopy(H5T_C_S1);
 		if (mt_id < 0) {
@@ -478,11 +466,9 @@ int read_pixel_info(hid_t g_id, const char *path, double *size) {
 			ERROR_JUMP(-1, close_mem_datatype, "Error reading units attribute");
 		}
 		/* ensure last byte is null */
-    if (str_size > 0)
+		if (str_size > 0) ((char*) str_buffer)[str_size] = '\0';
      ((char *)str_buffer)[str_size] = '\0';
-    scale = scale_from_units(str_size == -1 ? *(char **)str_buffer
+		scale = scale_from_units(str_size == -1 ? *(char**)str_buffer : (char*)str_buffer);
                                            : (char *)str_buffer);
 		value *= scale;
 		if (str_size == -1) {
@@ -505,16 +491,16 @@ int read_pixel_info(hid_t g_id, const char *path, double *size) {
 		H5Aclose(a_id);
 	} /* if H5Aexists(...) */
 close_dataset:
 	H5Dclose(ds_id);
 done:
-  if (retval == 0)
+	if (retval == 0) *size = value;
    *size = value;
 	return retval;
 }
-int get_nxs_pixel_info(const struct ds_desc_t *desc, double *x_size,
+
-                       double *y_size) {
+int get_nxs_pixel_info(const struct ds_desc_t *desc, double *x_size, double *y_size) {
 	int retval = 0;
 	if (read_pixel_info(desc->det_g_id, "x_pixel_size", x_size) < 0) {
 		ERROR_JUMP(-1, done, "");
@@ -526,21 +512,20 @@ done:
 	return retval;
 }
-int get_dectris_eiger_pixel_info(const struct ds_desc_t *desc, double *x_size,
+
-                                 double *y_size) {
+int get_dectris_eiger_pixel_info(const struct ds_desc_t *desc, double *x_size, double *y_size) {
 	int retval = 0;
-  if (read_pixel_info(desc->det_g_id, "detectorSpecific/x_pixel_size", x_size) <
+	if (read_pixel_info(desc->det_g_id, "detectorSpecific/x_pixel_size", x_size) < 0) {
      0) {
 		ERROR_JUMP(-1, done, "");
 	}
-  if (read_pixel_info(desc->det_g_id, "detectorSpecific/y_pixel_size", y_size) <
+	if (read_pixel_info(desc->det_g_id, "detectorSpecific/y_pixel_size", y_size) < 0) {
      0) {
 		ERROR_JUMP(-1, done, "");
 	}
 done:
 	return retval;
 }
 int get_nxs_pixel_mask(const struct ds_desc_t *desc, int *buffer) {
 	int retval = 0;
 	hid_t ds_id;
@@ -562,6 +547,7 @@ done:
 	return retval;
 }
 int get_dectris_eiger_pixel_mask(const struct ds_desc_t *desc, int *buffer) {
 	int retval = 0;
 	hid_t ds_id;
@@ -572,38 +558,10 @@ int get_dectris_eiger_pixel_mask(const struct ds_desc_t *desc, int *buffer) {
 		ERROR_JUMP(-1, done, "Error opening detectorSpecific/pixel_mask");
 	}
  // what if this is compressed?
  hid_t dcpl    = H5Dget_create_plist(ds_id);
  int n_filters = H5Pget_nfilters(dcpl);
  H5Z_filter_t    filter_id;
  if (n_filters>0) {
    unsigned int    flags;
    size_t          nelmts = 1;
    unsigned int    values_out[1] = {99};
    char            filter_name[80];
    for ( int i_filt = 0; i_filt < n_filters; i_filt++) {
      filter_id = H5Pget_filter(dcpl, i_filt, &flags, &nelmts, values_out, sizeof(filter_name), filter_name, NULL);
      if (filter_id>=0) {
        fprintf(stderr," filter #%d name =\"%s\"\n",(i_filt+1),filter_name);
      }
    }
  }
  int i0 = H5Zfilter_avail(BS_H5_FILTER_ID);
 	err = H5Dread(ds_id, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer);
 	if (err < 0) {
    if (n_filters>0) {
      ERROR_JUMP(-1, close_dataset, "Error reading detectorSpecific/pixel_mask with filter(s)");
    }
    else {
 		ERROR_JUMP(-1, close_dataset, "Error reading detectorSpecific/pixel_mask");
 	}
  }
  if (!i0 && H5Zfilter_avail(BS_H5_FILTER_ID)) {
    fprintf(stderr," bitshuffle filter is available now since H5Dread (of pixel-mask) triggered loading of the filter.\n");
  }
 close_dataset:
 	H5Dclose(ds_id);
@@ -611,19 +569,19 @@ done:
 	return retval;
 }
 int get_null_pixel_mask(const struct ds_desc_t *desc, int *buffer) {
 	hsize_t buffer_length = desc->dims[1] * desc->dims[2];
 	memset(buffer, 0, sizeof(*buffer) * buffer_length);
 	return 0;
 }
-herr_t det_visit_callback(hid_t root_id, const char *name,
+
-                          const H5O_info_t *info, void *op_data) {
+herr_t det_visit_callback(hid_t root_id, const char *name, const H5O_info_t *info, void *op_data) {
 	struct det_visit_objects_t *output_data = op_data;
 	hid_t g_id;
 	herr_t retval = 0;
-  if (info->type != H5O_TYPE_GROUP)
+	if (info->type != H5O_TYPE_GROUP) return 0;
    return 0;
 	g_id = H5Oopen(root_id, name, H5P_DEFAULT);
 	if (g_id < 0) {
 		char message[256];
@@ -633,9 +591,9 @@ herr_t det_visit_callback(hid_t root_id, const char *name,
 	/* check for an "NX_class" attribute */
 	{
-    char* buffer = (char*)malloc(1);
+		int str_size = 0;
-    buffer[0] = '\0';
+		void* buffer = NULL;
-    hid_t a_id, t_id;
+		hid_t a_id, t_id, mt_id;
 		if (H5Aexists(g_id, "NX_class") <= 0) {
 			/* not an error - just close group and allow continuation */
 			retval = 0;
@@ -645,8 +603,7 @@ herr_t det_visit_callback(hid_t root_id, const char *name,
 		if (a_id <= 0) {
 			char message[256];
 			sprintf(message,
-              "H5OVisit callback: Error opening NX_class attribute on %.128s "
+					"H5OVisit callback: Error opening NX_class attribute on %.128s after existence check",
              "after existence check",
 					name);
 			ERROR_JUMP(-1, close_group, message);
 		}
@@ -655,48 +612,61 @@ herr_t det_visit_callback(hid_t root_id, const char *name,
 		if (t_id < 0) {
 			ERROR_JUMP(-1, close_attr, "Error getting datatype");
 		}
-
+		if (H5Tis_variable_str(t_id) > 0) {
-    H5A_info_t a_info;
+			str_size = -1;
-    herr_t err = H5Aget_info(a_id, &a_info);
+			buffer = malloc(sizeof(char*));
-    if (err<0) {
+		} else {
-      ERROR_JUMP(-1, close_type,
+			str_size = H5Tget_size(t_id);
-                 "Unable to get attribute info for NX_class");
+			buffer = malloc(str_size + 1);
 		}
    else {
      if (a_info.cset != H5T_CSET_ASCII && a_info.cset != H5T_CSET_UTF8) {
        fprintf(stderr," %s : NX_class attribute info cset = unknown with size %d\n",name,(int) a_info.data_size);
      }
    }
    buffer = (char *)malloc(sizeof(char)*(H5Tget_size(t_id)+1));
 		if (!buffer) {
 			ERROR_JUMP(-1, close_type, "Error allocating string buffer");
 		}
-    if (H5Aread(a_id, t_id, buffer) < 0) {
+		mt_id = H5Tcopy(H5T_C_S1);
 		if (mt_id < 0) {
 			ERROR_JUMP(-1, free_buffer, "Error creating HDF5 String datatype");
 		}
 		if (H5Tset_size(mt_id, str_size == -1 ? H5T_VARIABLE : str_size) < 0) {
 			char message[64];
 			sprintf(message, "Error setting string datatype to size %d", str_size);
 			ERROR_JUMP(-1, close_mtype, message);
 		}
 		if (H5Aread(a_id, mt_id, buffer) < 0) {
 			char message[256];
-      sprintf(
+			sprintf(message,
          message,
 					"H5OVisit callback: Error reading NX_class attribute on group %.128s",
 					name);
-      ERROR_JUMP(-1, free_buffer, message);
+			ERROR_JUMP(-1, close_mtype, message);
 		}
-    /* ensure the buffer is null terminated */
+		/* at least one file has been seen where the NX_class attribute was not null terminated
-    buffer[H5Tget_size(t_id)] = '\0';
+		 * and extraneous bytes where being read by strcmp -  set the end byte to null
-
+		*/
 		if (str_size > 0) ((char*) buffer)[str_size] = '\0';
 		/* test for NXdata or NXdetector */
 		{
-      if      (strcmp("NXdata", buffer) == 0) {
+			char *nxclass = str_size > 0 ? (char*) buffer : *((char**) buffer);
 			if (strcmp("NXdata", nxclass) == 0) {
 				hid_t out_id = H5Gopen(root_id, name, H5P_DEFAULT);
 				output_data->nxdata = out_id;
-      }
+			} else if (strcmp("NXdetector", nxclass) == 0) {
      else if (strcmp("NXdetector", buffer) == 0) {
 				hid_t out_id = H5Gopen(root_id, name, H5P_DEFAULT);
 				output_data->nxdetector = out_id;
 			}
 		}
 		if (str_size == -1) {
 			hsize_t dims[1] = {1};
 			hid_t s_id = H5Screate_simple(1, dims, NULL);
 			H5Sselect_all(s_id);
 			H5Dvlen_reclaim(mt_id, s_id, H5P_DEFAULT, buffer);
 			H5Sclose(s_id);
 		}
 close_mtype:
 		H5Tclose(mt_id);
 free_buffer:
 		free(buffer);
 close_type:
@@ -714,7 +684,9 @@ done:
 	return retval;
 }
-int check_for_chunk_read(hid_t g_id, const char *ds_name,
+int check_for_chunk_read(
 		hid_t g_id,
 		const char* ds_name,
 		struct opt_eiger_ds_desc_t *desc) {
 	int retval = 0;
@@ -780,8 +752,10 @@ int check_for_chunk_read(hid_t g_id, const char *ds_name,
 	}
 	if (n_filters == 1) {
-    filter = H5Pget_filter2(dcpl, 0, &filter_flags, &cd_nelems, desc->bs_params,
+		filter = H5Pget_filter2(dcpl, 0, &filter_flags,
-                            name_len, filter_name, &filter_config);
+				&cd_nelems, desc->bs_params,
 				name_len, filter_name,
 				&filter_config);
 		if (filter < 0) {
 			ERROR_JUMP(-1, done, "Error retrieving filter information");
 		}
@@ -791,8 +765,7 @@ int check_for_chunk_read(hid_t g_id, const char *ds_name,
 		if (cd_nelems > BS_H5_N_PARAMS) {
 			char message[128];
 			sprintf(message,
-              "More than expected number of parameters to bitshuffle filter - "
+					"More than expected number of parameters to bitshuffle filter - expected %d, was %lu",
              "expected %d, was %lu",
 					BS_H5_N_PARAMS, cd_nelems);
 			ERROR_JUMP(-1, done, message);
 		}
@@ -804,17 +777,13 @@ int check_for_chunk_read(hid_t g_id, const char *ds_name,
 	retval = 1;
 done:
-  if (dcpl)
+	if (dcpl) H5Pclose(dcpl);
-    H5Pclose(dcpl);
+	if (s_id) H5Sclose(s_id);
-  if (s_id)
+	if (ds_id) H5Dclose(ds_id);
    H5Sclose(s_id);
  if (ds_id)
    H5Dclose(ds_id);
 	return retval;
 }
-int create_dataset_descriptor(struct ds_desc_t **desc,
+int create_dataset_descriptor(struct ds_desc_t **desc, struct det_visit_objects_t *visit_result) {
                              struct det_visit_objects_t *visit_result) {
 	int retval = 0;
 	hid_t g_id, ds_id;
 	int (*pxl_func)(const struct ds_desc_t*, double*, double*);
@@ -828,13 +797,11 @@ int create_dataset_descriptor(struct ds_desc_t **desc,
 	/* determine the pixel information location */
 	if (H5Lexists(g_id, "x_pixel_size", H5P_DEFAULT) > 0 &&
-      H5Lexists(g_id, "y_pixel_size", H5P_DEFAULT) > 0) {
+			H5Lexists(g_id, "y_pixel_size", H5P_DEFAULT)) {
 		pxl_func = &get_nxs_pixel_info;
 	} else if (H5Lexists(g_id, "detectorSpecific", H5P_DEFAULT) > 0 &&
-             H5Lexists(g_id, "detectorSpecific/x_pixel_size", H5P_DEFAULT) >
+			H5Lexists(g_id, "detectorSpecific/x_pixel_size", H5P_DEFAULT) > 0 &&
-                 0 &&
+			H5Lexists(g_id, "detectorSpecific/y_pixel_size", H5P_DEFAULT) > 0) {
             H5Lexists(g_id, "detectorSpecific/y_pixel_size", H5P_DEFAULT) >
                 0) {
 		pxl_func = &get_dectris_eiger_pixel_info;
 	} else {
 		ERROR_JUMP(-1, done, "Could not locate x_pixel_size and y_pixel_size");
@@ -848,9 +815,7 @@ int create_dataset_descriptor(struct ds_desc_t **desc,
 		pxl_mask_func = &get_dectris_eiger_pixel_mask;
 	} else {
 		pxl_mask_func = &get_null_pixel_mask;
-    fprintf(
+		fprintf(stderr, "WARNING: Could not find pixel mask - no masking will be applied\n");
        stderr,
        "WARNING: Could not find pixel mask - no masking will be applied\n");
 	}
 	/* determine where the data is stored and what strategy to use */
@@ -879,6 +844,7 @@ int create_dataset_descriptor(struct ds_desc_t **desc,
 		ERROR_JUMP(-1, done, "Could not locate detector dataset");
 	}
 	if (ds_prop_func == &get_dectris_eiger_dataset_dims) {
 		/* setup the "extra info" structs */
@@ -895,8 +861,7 @@ int create_dataset_descriptor(struct ds_desc_t **desc,
 		o_eiger_desc = malloc(sizeof(*o_eiger_desc));
 		if (!o_eiger_desc) {
 			free(eiger_desc);
-      ERROR_JUMP(-1, done,
+			ERROR_JUMP(-1, done, "Memory error creating data description for optimised Eiger");
                 "Memory error creating data description for optimised Eiger");
 		}
 		o_eiger_desc->base.frame_func = &get_frame_from_chunk;
@@ -936,13 +901,15 @@ done:
 	return retval;
 }
-int get_detector_info(const hid_t fid, struct ds_desc_t **desc) {
+
 int get_detector_info(
 		const hid_t fid,
 		struct ds_desc_t **desc) {
 	int retval = 0;
 	herr_t err = 0;
 	struct det_visit_objects_t objects = {0};
-  err =
+	err = H5Ovisit(fid, H5_INDEX_NAME, H5_ITER_INC, &det_visit_callback, &objects);
      H5Ovisit(fid, H5_INDEX_NAME, H5_ITER_INC, &det_visit_callback, &objects);
 	if (err < 0) {
 		clear_det_visit_objects(&objects);
 		ERROR_JUMP(-1, done, "Error during H5Ovisit callback");
@@ -958,6 +925,7 @@ int get_detector_info(const hid_t fid, struct ds_desc_t **desc) {
 		ERROR_JUMP(retval, done, "");
 	};
 done:
 	return retval;
 }
@@ -3,24 +3,24 @@
 * Author: Charles Mita
 */
 #ifndef NXS_XDS_FILE_H
 #define NXS_XDS_FILE_H
 #include <hdf5.h>
 #include "err.h"
 #include "filters.h"
-#include <hdf5.h>
+
 struct ds_desc_t {
 	hid_t det_g_id;
 	hid_t data_g_id;
 	hsize_t dims[3];
 	int data_width;
  int image_number_offset;
 	int (*get_pixel_properties)(const struct ds_desc_t*, double*, double*);
 	int (*get_pixel_mask)(const struct ds_desc_t*, int*);
 	int (*get_data_frame)(const struct ds_desc_t*, const int, void*);
 	void (*free_desc)(struct ds_desc_t*);
  int i2i[]; // array to hold a translation from the image number requested by XDS and the actual position in the HDF5 file
 };
 struct nxs_ds_desc_t {
@@ -31,8 +31,7 @@ struct eiger_ds_desc_t {
 	struct ds_desc_t base;
 	int n_data_blocks;
 	int *block_sizes;
-  int (*frame_func)(const struct ds_desc_t *, const char *, const hsize_t *,
+	int (*frame_func)(const struct ds_desc_t*, const char*, const hsize_t*, const hsize_t*, void*);
                    const hsize_t *, void *);
 };
 struct opt_eiger_ds_desc_t {
@@ -4,21 +4,24 @@
 */
 #include <stdio.h>
 #include "bitshuffle.h"
 #include "err.h"
 #include "filters.h"
 #include "err.h"
 #include "bitshuffle.h"
 /* Required prototypes from bitshuffle.c but not included in header */
 uint64_t bshuf_read_uint64_BE(const void *buffer);
 uint32_t bshuf_read_uint32_BE(const void *buffer);
 /*
- * Derived from the h5 filter code from the bitshuffle project (not included
+ * Derived from the h5 filter code from the bitshuffle project (not included here)
 * here)
 */
-int bslz4_decompress(const unsigned int *bs_params, size_t in_size,
+int bslz4_decompress(
-                     void *in_buffer, size_t out_size, void *out_buffer) {
+		const unsigned int* bs_params,
 		size_t in_size,
 		void *in_buffer,
 		size_t out_size,
 		void *out_buffer) {
 	int retval = 0;
 	size_t size, elem_size, block_size, u_bytes;
@@ -28,8 +31,7 @@ int bslz4_decompress(const unsigned int *bs_params, size_t in_size,
 	if (u_bytes != out_size) {
 		char message[64];
-    sprintf(message, "Decompressed chunk is %lu bytes, expected %lu", u_bytes,
+		sprintf(message, "Decompressed chunk is %lu bytes, expected %lu", u_bytes, out_size);
            out_size);
 		ERROR_JUMP(-1, done, message);
 	}
@@ -42,13 +44,11 @@ int bslz4_decompress(const unsigned int *bs_params, size_t in_size,
 	size = u_bytes / elem_size;
 	if (bs_params[4] == BS_H5_PARAM_LZ4_COMPRESS) {
-    if (bshuf_decompress_lz4(in_buffer, out_buffer, size, elem_size,
+		if (bshuf_decompress_lz4(in_buffer, out_buffer, size, elem_size, block_size) < 0) {
                             block_size) < 0) {
 			ERROR_JUMP(-1, done, "Error performing bitshuffle_lz4 decompression");
 		}
 	} else {
-    if (bshuf_bitunshuffle(in_buffer, out_buffer, size, elem_size, block_size) <
+		if (bshuf_bitunshuffle(in_buffer, out_buffer, size, elem_size, block_size) < 0) {
        0) {
 			ERROR_JUMP(-1, done, "Error performing bit unshuffle");
 		}
 	}
@@ -10,7 +10,13 @@
 #define BS_H5_FILTER_ID 32008
 #define BS_H5_PARAM_LZ4_COMPRESS 2
-int bslz4_decompress(const unsigned int *bs_params, size_t in_size,
+
-                     void *in_buffer, size_t out_size, void *out_buffer);
+
 int bslz4_decompress(
 		const unsigned int* bs_params,
 		size_t in_size,
 		void *in_buffer,
 		size_t out_size,
 		void *out_buffer);
 #endif /* NXS_XDS_FILTER_H */
@@ -3,87 +3,55 @@
 * Author: Charles Mita
 */
 #include <hdf5.h>
 #include <stdlib.h>
 #include <string.h>
 #include "file.h"
 #include "filters.h"
 #include "plugin.h"
 #ifdef USE_BITSHUFFLE
 #include "bshuf_h5filter.h"
 #endif
-/* XDS does not provide an error callback facility, so just write to stderr
+/* XDS does not provide an error callback facility, so just write to stderr for now */
-   for now - generally regarded as poor practice */
+/* generally regarded as poor practice */
 #define ERROR_OUTPUT stderr
-/* mask bits loosely based on what Neggia does and what NeXus says should be
+
-   done basically - anything in the low byte (& 0xFF) means "ignore this"
+
-   Neggia uses the value -2 if bit 1, 2 or 3 are set */
+/* mask bits loosely based on what Neggia does and what NeXus says should be done */
-/* CV-GPhL-20210408: we want more control over the value non-masked
+/* basically - anything in the low byte (& 0xFF) means "ignore this" */
-   pixels should be set to. */
+/* Neggia usses the value -2 if bit 1, 2 or 3 are set */
-#define COPY_AND_MASK(in, value, setValue, out, size, mask)                    \
+#define COPY_AND_MASK(in, out, size, mask) \
 { \
 	int i; \
    if (value!=0) {                                                            \
      if (mask) {                                                              \
        for (i = 0; i < size; ++i) {                                           \
          out[i] = (in[i] == value) ? setValue : in[i];                        \
          if (mask[i] & 0xFF)                                                  \
            out[i] = -1;                                                       \
          if (mask[i] & 30)                                                    \
            out[i] = -2;                                                       \
        }                                                                      \
      } else {                                                                 \
        for (i = 0; i < size; i++) {                                           \
          out[i] = (in[i] == value) ? setValue : in[i];                        \
        }                                                                      \
      }                                                                        \
    } else {                                                                   \
 	if (mask) { \
 		for (i = 0; i < size; ++i) { \
 			out[i] = in[i]; \
-          if (mask[i] & 0xFF)                                                  \
+			if (mask[i] & 0xFF) out[i] = -1; \
-            out[i] = -1;                                                       \
+			if (mask[i] & 30) out[i] = -2; \
          if (mask[i] & 30)                                                    \
            out[i] = -2;                                                       \
 		} \
 	} else { \
 		for (i = 0; i < size; i++) { \
 			out[i] = in[i]; \
 		} \
 	} \
    }                                                                          \
 }
 #define APPLY_MASK(buffer, mask, size) \
 { \
 	int i; \
 	if (mask) { \
 		for (i = 0; i < size; ++i) { \
-        if (mask[i] & 0xFF)                                                    \
+			if (mask[i] & 0xFF) buffer[i] = -1; \
-          buffer[i] = -1;                                                      \
+			if (mask[i] & 30) buffer[i] = -2; \
        if (mask[i] & 30)                                                      \
          buffer[i] = -2;                                                      \
 		} \
 	} \
 }
 static hid_t file_id = 0;
 static struct ds_desc_t *data_desc = NULL;
 static int *mask_buffer = NULL;
 // CV-20240605: potentially provide a mapping from frame number (as
 //              requested by caller) to actual 2D slice within 3D data
 //              array.
 //
 //              This is defined by the environment variable
 //              DURIN_IMAGE2ORDINAL (see below).
 int *image2ordinal = NULL;
 int image2ordinal_debug = 0;
 int image2ordinal_imin = 0;
 int image2ordinal_imax = 0;
 void fill_info_array(int info[1024]) {
 	info[0] = DLS_CUSTOMER_ID;
@@ -91,183 +59,33 @@ void fill_info_array(int info[1024]) {
 	info[2] = VERSION_MINOR;
 	info[3] = VERSION_PATCH;
 	info[4] = VERSION_TIMESTAMP;
  info[5] = 0; // image number offset
  info[6] = -1; // marked pixels not already in pixel_mask: reset to this value
  char *cenv;
  cenv = getenv("DURIN_IMAGE_NUMBER_OFFSET");
  if (cenv!=NULL) {
    info[5] = atoi(cenv);
  }
  cenv = getenv("DURIN_RESET_UNMASKED_PIXEL");
  if (cenv!=NULL) {
    info[6] = atoi(cenv);
 }
-  cenv = getenv("DURIN_IMAGE2ORDINAL");
+int convert_to_int_and_mask(void *in_buffer, int d_width, int *out_buffer, int length, int *mask) {
  if (cenv!=NULL&&(!image2ordinal)) {
    char *denv = getenv("DURIN_IMAGE2ORDINAL_DEBUG");
    if (denv!=NULL) {
      image2ordinal_debug=1;
    }
    // <ordinal_start>,<image_1_start>-<image_1_end>,<image_2_start>-<image_2_end>,..,<image_N_start>-<image_N_end>
    if (image2ordinal_debug) printf("DURIN_IMAGE2ORDINAL = \"%s\"\n",cenv);
    const char outer_delimiters[] = ",";
    const char inner_delimiters[] = "-";
    char *found;
    char *outer_saveptr;
    char *inner_saveptr;
    int ordinal_start = 0;
    int ordinal = 0;
    int ntt = -1;
    found = strtok_r(cenv,outer_delimiters, &outer_saveptr);
    if (found!=NULL) {
      int tt[1000][2];
      while(found) {
        if (ordinal_start==0) {
          ordinal_start = atoi(found);
          ordinal = ordinal_start - 1;
        }
        else {
          char* s = strtok_r(found, inner_delimiters, &inner_saveptr);
          if (s!=NULL) {
            int i1 = atoi(s);
            s = strtok_r(NULL,inner_delimiters, &inner_saveptr);
            if (s!=NULL) {
              int i2 = atoi(s);
              ntt++;
              if (ntt<=1000) {
                tt[ntt][0] = i1;
                tt[ntt][1] = i2;
                for(int i=i1; i<=i2; ++i) {
                  ordinal++;
                  if (ordinal==1) {
                    image2ordinal_imin=i;
                    image2ordinal_imax=i;
                  }
                  else {
                    if (i<image2ordinal_imin) image2ordinal_imin=i;
                    if (i>image2ordinal_imax) image2ordinal_imax=i;
                  }
                }
              }
            }
          }
        }
        found = strtok_r(NULL,outer_delimiters,&outer_saveptr);
      }
      if (ordinal_start>0) {
        if (image2ordinal_debug) {
          printf("ordinal_start, end = %d %d\n",ordinal_start, ordinal);
          printf("imin, imax         = %d %d\n",image2ordinal_imin,image2ordinal_imax);
        }
        // allocate array to go from image number/id to ordinal:
        image2ordinal = malloc((image2ordinal_imax-image2ordinal_imin+1) * sizeof(image2ordinal_imin));
        int ordinal = ordinal_start - 1;
        for(int i=0; i<=ntt; i++) {
          for(int j=tt[i][0];j<=tt[i][1];j++) {
            ordinal++;
            //printf(" %d -> %d\n",ordinal,j);
            image2ordinal[j] = ordinal;
          }
        }
        if (image2ordinal&&image2ordinal_debug) {
          for(int i=image2ordinal_imin; i<=image2ordinal_imax; i++) {
            if (image2ordinal[i]>0) {
              printf(" %d -> %d\n",i,image2ordinal[i]);
            }
          }
        }
      }
    }
  }
 }
 int convert_to_int_and_mask(void *in_buffer, int width, int setValue, int *out_buffer,
                            int length, int *mask) {
 	/* transfer data to output buffer, performing data conversion as required */
 	int retval = 0;
 	/* TODO: decide how conversion of data should work */
-  /* Should we sign extend? Neggia doesn't (casts from uint*), but may be more
+	/* Should we sign extend? Neggia doesn't (casts from uint*), but may be more intuitive */
   * intuitive */
  int d_width = abs(width);
  // CV-20210407
  //   Dealing with a signed data array: no extra check for marker
  //   value needed (since data can already take advantage of the
  //   negative data range). It is unclear though why/when data would
  //   come in as a signed array in the first place ...
  if (width<0) {
 	if (d_width == sizeof(signed char)) {
      // 8-bit
 		signed char *in = in_buffer;
-      COPY_AND_MASK(in, 0, setValue, out_buffer, length, mask);
+		COPY_AND_MASK(in, out_buffer, length, mask);
 	} else if (d_width == sizeof(short)) {
      // 16-bit
 		short *in = in_buffer;
-      COPY_AND_MASK(in, 0, setValue, out_buffer, length, mask);
+		COPY_AND_MASK(in, out_buffer, length, mask);
 	} else if (d_width == sizeof(int)) {
      // 16-bit
 		int *in = in_buffer;
-      COPY_AND_MASK(in, 0, setValue, out_buffer, length, mask);
+		COPY_AND_MASK(in, out_buffer, length, mask);
 	} else if (d_width == sizeof(long int)) {
      // 32-bit
 		long int *in = in_buffer;
-      COPY_AND_MASK(in, 0, setValue, out_buffer, length, mask);
+		COPY_AND_MASK(in, out_buffer, length, mask);
 	} else if (d_width == sizeof(long long int)) {
      // 64-bit
 		long long int *in = in_buffer;
-      COPY_AND_MASK(in, 0, setValue, out_buffer, length, mask);
+		COPY_AND_MASK(in, out_buffer, length, mask);
 	} else {
 		char message[128];
-      sprintf(message, "Unsupported conversion of data width %d to %ld (int)",
+		sprintf(message, "Unsupported conversion of data width %d to %ld (int)", d_width, sizeof(int));
              d_width, sizeof(int));
 		ERROR_JUMP(-1, done, message);
 	}
  }
  // CV-20210407
  //   Dealing with an unsigned data array: extra check for marker
  //   value required (to handle overloaded pixels correctly if wanted
  //   - see also DURIN_RESET_UNMASKED_PIXEL environment variable).
  else {
    if (d_width == sizeof(unsigned char)) {
      // 8-bit
      unsigned char *in = in_buffer;
      COPY_AND_MASK(in, UINT8_MAX, setValue, out_buffer, length, mask);
    } else if (d_width == sizeof(unsigned short)) {
      // 16-bit
      unsigned short *in = in_buffer;
      COPY_AND_MASK(in, UINT16_MAX, setValue, out_buffer, length, mask);
    } else if (d_width == sizeof(unsigned int)) {
      // 16-bit
      unsigned int *in = in_buffer;
      COPY_AND_MASK(in, UINT16_MAX, setValue, out_buffer, length, mask);
    } else if (d_width == sizeof(unsigned long)) {
      // 32-bit
      unsigned long *in = in_buffer;
      COPY_AND_MASK(in, UINT32_MAX, setValue, out_buffer, length, mask);
    } else if (d_width == sizeof(unsigned long long)) {
      // 64-bit
      unsigned long long *in = in_buffer;
      COPY_AND_MASK(in, UINT32_MAX, setValue, out_buffer, length, mask);
    } else {
      char message[128];
      sprintf(message, "Unsupported conversion of data width %d to %ld (int)",
              d_width, sizeof(int));
      ERROR_JUMP(-1, done, message);
    }
  }
 done:
 	return retval;
 }
@@ -276,7 +94,10 @@ done:
 extern "C" {
 #endif
-void plugin_open(const char *filename, int info[1024], int *error_flag) {
+void plugin_open(
 		const char *filename,
 		int info[1024],
 		int *error_flag) {
 	int retval = 0;
 	*error_flag = 0;
@@ -290,12 +111,6 @@ void plugin_open(const char *filename, int info[1024], int *error_flag) {
 		ERROR_JUMP(-2, done, "Failed to configure HDF5 error handling");
 	}
 #ifdef USE_BITSHUFFLE
  if (bshuf_register_h5filter() < 0 ) {
    ERROR_JUMP(-2, done, "Failed to register bitshuffle filter");
  }
 #endif
 	fill_info_array(info);
 	file_id = H5Fopen(filename, H5F_ACC_RDONLY, H5P_DEFAULT);
 	if (file_id < 0) {
@@ -310,15 +125,11 @@ void plugin_open(const char *filename, int info[1024], int *error_flag) {
 		ERROR_JUMP(-4, done, "");
 	}
  data_desc->image_number_offset = info[5];
 	mask_buffer = malloc(data_desc->dims[1] * data_desc->dims[2] * sizeof(int));
 	if (mask_buffer) {
 		retval = data_desc->get_pixel_mask(data_desc, mask_buffer);
 		if (retval < 0) {
-      fprintf(
+			fprintf(ERROR_OUTPUT, "WARNING: Could not read pixel mask - no masking will be applied\n");
          ERROR_OUTPUT,
          "WARNING: Could not read pixel mask - no masking will be applied\n");
 			dump_error_stack(ERROR_OUTPUT);
 			free(mask_buffer);
 			mask_buffer = NULL;
@@ -326,10 +137,6 @@ void plugin_open(const char *filename, int info[1024], int *error_flag) {
 	}
 	retval = 0;
 #ifdef GPHL_COMPILE_DATE
  fprintf(ERROR_OUTPUT, "\n XDS HDF5/Durin plugin %d.%d.%d (DLS, 2018-2023; GPhL, 2020-2024 - built %d)\n", info[1], info[2], info[3], GPHL_COMPILE_DATE);
 #endif
 done:
 	*error_flag = retval;
 	if (retval < 0) {
@@ -341,23 +148,28 @@ done:
 	}
 }
-void plugin_get_header(int *nx, int *ny, int *nbytes, float *qx, float *qy,
+
-                       int *number_of_frames, int info[1024], int *error_flag) {
+void plugin_get_header(
 		int *nx, int *ny,
 		int *nbytes,
 		float *qx, float *qy,
 		int *number_of_frames,
 		int info[1024],
 		int *error_flag) {
 	int err = 0;
 	int retval = 0;
 	double x_pixel_size, y_pixel_size;
 	reset_error_stack();
 	fill_info_array(info);
-  err =
+	err = data_desc->get_pixel_properties(data_desc, &x_pixel_size, &y_pixel_size);
      data_desc->get_pixel_properties(data_desc, &x_pixel_size, &y_pixel_size);
 	if (err < 0) {
 		ERROR_JUMP(err, done, "Failed to retrieve pixel information");
 	}
 	*nx = data_desc->dims[2];
 	*ny = data_desc->dims[1];
-  *nbytes = abs(data_desc->data_width);
+	*nbytes = data_desc->data_width;
 	*number_of_frames = data_desc->dims[0];
 	*qx = (float) x_pixel_size;
 	*qy = (float) y_pixel_size;
@@ -369,8 +181,13 @@ done:
 	}
 }
-void plugin_get_data(int *frame_number, int *nx, int *ny, int *data_array,
+
-                     int info[1024], int *error_flag) {
+void plugin_get_data(
 		int *frame_number,
 		int *nx, int *ny,
 		int *data_array,
 		int info[1024],
 		int *error_flag) {
 	int retval = 0;
 	int frame_size_px = data_desc->dims[1] * data_desc->dims[2];
@@ -378,47 +195,25 @@ void plugin_get_data(int *frame_number, int *nx, int *ny, int *data_array,
 	fill_info_array(info);
 	void *buffer = NULL;
-  if (sizeof(*data_array) == abs(data_desc->data_width)) {
+	if (sizeof(*data_array) == data_desc->data_width) {
 		buffer = data_array;
 	} else {
-    buffer = malloc(abs(data_desc->data_width) * frame_size_px);
+		buffer = malloc(data_desc->data_width * frame_size_px);
 		if (!buffer) {
 			ERROR_JUMP(-1, done, "Unable to allocate data buffer");
 		}
 	}
-  int ordinal = *frame_number;
+	if (data_desc->get_data_frame(data_desc, (*frame_number) - 1, buffer) < 0) {
  if (image2ordinal) {
    if (ordinal < image2ordinal_imin || ordinal>image2ordinal_imax) {
 		char message[64] = {0};
      sprintf(message, "Failed to map frame %d to ordinals since outside of range %d - %d", ordinal,image2ordinal_imin,image2ordinal_imax);
      ERROR_JUMP(-2, done, message);
    }
    ordinal = image2ordinal[ordinal];
    if (ordinal!=*frame_number) {
      if (image2ordinal_debug) printf("fetching data from ordinal %d for frame %d\n",ordinal,*frame_number);
    }
  }
  if (data_desc->get_data_frame(data_desc, ordinal - 1, buffer) < 0) {
    char message[64] = {0};
    if (image2ordinal) {
      sprintf(message, "Failed to retrieve data for frame %d (ordinal %d)", *frame_number, ordinal);
    } else {
 		sprintf(message, "Failed to retrieve data for frame %d", *frame_number);
    }
 		ERROR_JUMP(-2, done, message);
 	}
 	if (buffer != data_array) {
-    if (convert_to_int_and_mask(buffer, data_desc->data_width, info[6], data_array,
+		if (convert_to_int_and_mask(buffer, data_desc->data_width, data_array, frame_size_px, mask_buffer) < 0) {
                                frame_size_px, mask_buffer) < 0) {
 			char message[64];
      if (image2ordinal) {
        sprintf(message, "Error converting data for frame %d (ordinal %d)", *frame_number, ordinal);
      } else {
 			sprintf(message, "Error converting data for frame %d", *frame_number);
      }
 			ERROR_JUMP(-2, done, message);
 		}
 	} else {
@@ -430,10 +225,10 @@ done:
 	if (retval < 0) {
 		dump_error_stack(ERROR_OUTPUT);
 	}
-  if (buffer && (buffer != data_array))
+	if (buffer && (buffer != data_array)) free(buffer);
    free(buffer);
 }
 void plugin_close(int *error_flag) {
 	if (file_id) {
 		if (H5Fclose(file_id) < 0) {
@@ -443,15 +238,7 @@ void plugin_close(int *error_flag) {
 	}
 	file_id = 0;
-  if (image2ordinal) {
+	if (mask_buffer) free(mask_buffer);
    free(image2ordinal);
    image2ordinal = NULL;
  }
  if (mask_buffer) {
    free(mask_buffer);
    mask_buffer = NULL;
  }
 	if (data_desc->free_desc) {
 		data_desc->free_desc(data_desc);
 		data_desc = NULL;
@@ -15,23 +15,39 @@
 extern "C" {
 #endif
-#define DLS_CUSTOMER_ID                                                        \
+#define DLS_CUSTOMER_ID 0x01 /* pretend we're Dectris, otherwise XDS doesn't work */
  0x01 /* pretend we're Dectris, otherwise XDS doesn't work */
 #define VERSION_MAJOR 0
 #define VERSION_MINOR 0
 #define VERSION_PATCH 0
 #define VERSION_TIMESTAMP -1 /* good enough for Dectris apparantely */
 void plugin_open(const char *filename, int info[1024], int *error_flag);
-void plugin_get_header(int *nx, int *ny, int *nbytes, float *qx, float *qy,
+void plugin_open(
-                       int *number_of_frames, int info[1024], int *error_flag);
+		const char *filename,
 		int info[1024],
 		int *error_flag);
 void plugin_get_header(
 		int *nx, int *ny,
 		int *nbytes,
 		float *qx, float *qy,
 		int *number_of_frames,
 		int info[1024],
 		int *error_flag);
 void plugin_get_data(
 		int *frame_number,
 		int *nx, int *ny,
 		int *data_array,
 		int info[1024],
 		int *error_flag);
 void plugin_get_data(int *frame_number, int *nx, int *ny, int *data_array,
                     int info[1024], int *error_flag);
 void plugin_close(int *error_flag);
 #ifdef __cplusplus
 } /* extern "C" */
 #endif
@@ -1,8 +1,8 @@
 #include "err.h"
 #include "file.h"
 #include <hdf5.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <hdf5.h>
 #include "file.h"
 #include "err.h"
 #define COPY_AND_MASK(in, out, size, mask) \
 { \
@@ -10,10 +10,8 @@
 	if (mask) { \
 		for (i = 0; i < size; ++i) { \
 			out[i] = in[i]; \
-        if (mask[i] & 0xFE)                                                    \
+			if (mask[i] & 0xFE) out[i] = -2; \
-          out[i] = -2;                                                         \
+			if (mask[i] & 0x01) out[i] = -1; \
        if (mask[i] & 0x01)                                                    \
          out[i] = -1;                                                         \
 		} \
 	} else { \
 		for (i = 0; i < size; i++) { \
@@ -37,6 +35,7 @@ done:
 	return retval;
 }
 int main(int argc, char **argv) {
 	int err = 0;
 	int retval = 0;
@@ -59,8 +58,7 @@ int main(int argc, char **argv) {
 	}
 	fid = H5Fopen(test_file, H5F_ACC_RDONLY, H5P_DEFAULT);
-  if (fid < 0)
+	if (fid < 0) ERROR_JUMP(-1, done, "Error opening file");
    ERROR_JUMP(-1, done, "Error opening file");
 	err = get_detector_info(fid, &desc);
 	if (err < 0) {
@@ -127,15 +125,10 @@ int main(int argc, char **argv) {
 	}
 done:
-  if (fid > 0)
+	if (fid > 0) H5Fclose(fid);
-    H5Fclose(fid);
+	if (data) free(data);
-  if (data)
+	if (buffer && (data != buffer)) free(buffer);
-    free(data);
+	if (mask) free(mask);
-  if (buffer && (data != buffer))
+	if (retval != 0) dump_error_stack(stderr);
    free(buffer);
  if (mask)
    free(mask);
  if (retval != 0)
    dump_error_stack(stderr);
 	return retval;
 }
@@ -1,195 +0,0 @@
 #!/usr/bin/env python2
 # -*- coding: utf-8 -*-
 import json
 import mimetools
 import mimetypes
 import os
 import sys
 import urllib2
 def fail(msg, code=1):
    sys.stderr.write("ERROR: %s\n" % msg)
    sys.exit(code)
 def api_request(url, token, method="GET", json_data=None, content_type=None):
    data = None
    headers = {
        "Authorization": "token %s" % token,
    }
    if json_data is not None:
        data = json.dumps(json_data)
        headers["Content-Type"] = "application/json"
    elif content_type is not None:
        headers["Content-Type"] = content_type
    request = urllib2.Request(url, data=data, headers=headers)
    request.get_method = lambda: method
    try:
        response = urllib2.urlopen(request)
        body = response.read()
        status = response.getcode()
        return status, body
    except urllib2.HTTPError as e:
        return e.code, e.read()
 def encode_multipart_formdata(fields, files):
    boundary = mimetools.choose_boundary()
    crlf = "\r\n"
    lines = []
    for key, value in fields:
        lines.append("--" + boundary)
        lines.append('Content-Disposition: form-data; name="%s"' % key)
        lines.append("")
        lines.append(value)
    for key, filename, content, content_type in files:
        lines.append("--" + boundary)
        lines.append(
            'Content-Disposition: form-data; name="%s"; filename="%s"' % (key, filename)
        )
        lines.append("Content-Type: %s" % content_type)
        lines.append("")
        lines.append(content)
    lines.append("--" + boundary + "--")
    lines.append("")
    body = crlf.join(lines)
    content_type = "multipart/form-data; boundary=%s" % boundary
    return content_type, body
 def multipart_request(url, token, fields, files):
    content_type, body = encode_multipart_formdata(fields, files)
    headers = {
        "Authorization": "token %s" % token,
        "Content-Type": content_type,
    }
    request = urllib2.Request(url, data=body, headers=headers)
    request.get_method = lambda: "POST"
    try:
        response = urllib2.urlopen(request)
        return response.getcode(), response.read()
    except urllib2.HTTPError as e:
        return e.code, e.read()
 def get_release_by_tag(api_base, token, tag):
    url = "%s/releases/tags/%s" % (api_base, tag)
    status, body = api_request(url, token, method="GET")
    if status == 200:
        return json.loads(body)
    if status == 404:
        return None
    fail("failed to fetch release for tag %s: HTTP %s\n%s" % (tag, status, body))
 def create_release(api_base, token, tag):
    url = "%s/releases" % api_base
    payload = {
        "tag_name": tag,
        "name": tag,
        "draft": False,
        "prerelease": False,
    }
    status, body = api_request(url, token, method="POST", json_data=payload)
    if status not in (200, 201):
        fail("failed to create release for tag %s: HTTP %s\n%s" % (tag, status, body))
    return json.loads(body)
 def ensure_release(api_base, token, tag):
    release = get_release_by_tag(api_base, token, tag)
    if release is not None:
        print("Release for tag %s already exists (id=%s)" % (tag, release.get("id")))
        return release
    print("Release for tag %s does not exist, creating it" % tag)
    release = create_release(api_base, token, tag)
    print("Created release id=%s" % release.get("id"))
    return release
 def upload_asset(api_base, token, release_id, file_path):
    if not os.path.isfile(file_path):
        fail("file not found: %s" % file_path)
    asset_name = os.path.basename(file_path)
    mime_type = mimetypes.guess_type(asset_name)[0] or "application/octet-stream"
    with open(file_path, "rb") as f:
        content = f.read()
    url = "%s/releases/%s/assets" % (api_base, release_id)
    status, body = multipart_request(
        url,
        token,
        fields=[("name", asset_name)],
        files=[("attachment", asset_name, content, mime_type)],
    )
    if status not in (200, 201):
        fail("failed to upload asset %s: HTTP %s\n%s" % (asset_name, status, body))
    print("Uploaded asset: %s" % asset_name)
 def find_assets(build_dir):
    names = []
    for name in sorted(os.listdir(build_dir)):
        if name.startswith("libdurin-plugin.so"):
            full = os.path.join(build_dir, name)
            if os.path.isfile(full):
                names.append(full)
    return names
 def main():
    if len(sys.argv) != 4:
        sys.stderr.write(
            "Usage: %s <gitea-server> <owner/repo> <tag>\n" % sys.argv[0]
        )
        sys.stderr.write(
            "Example: %s https://gitea.psi.ch mx/durin 1.0.0\n" % sys.argv[0]
        )
        sys.exit(1)
    server = sys.argv[1].rstrip("/")
    repo = sys.argv[2]
    tag = sys.argv[3]
    token = os.environ.get("GITEA_TOKEN")
    if not token:
        fail("GITEA_TOKEN environment variable is not set")
    build_dir = "build"
    if not os.path.isdir(build_dir):
        fail("build directory not found: %s" % build_dir)
    assets = find_assets(build_dir)
    if not assets:
        fail("no libdurin-plugin.so* files found in %s" % build_dir)
    api_base = "%s/api/v1/repos/%s" % (server, repo)
    release = ensure_release(api_base, token, tag)
    release_id = release.get("id")
    if not release_id:
        fail("release id missing in API response")
    for asset in assets:
        upload_asset(api_base, token, release_id, asset)
    print("Done.")
 if __name__ == "__main__":
    main()