27 Commits

Author SHA1 Message Date
leonarski_f d39e9d7a99 Don't hide symbols in the shared library
Build Packages / Build (push) Successful in 31s
2026-04-08 16:03:30 +02:00
leonarski_f a3235390a4 README: Update
Build Packages / Build (push) Successful in 33s
2026-04-08 15:15:22 +02:00
leonarski_f 215e9471ad CI: Dedicated upload python script
Build Packages / Build (push) Successful in 31s
2026-04-08 14:58:53 +02:00
leonarski_f 7fb6117be7 CI: Try again upload
Build Packages / Build (push) Failing after 32s
2026-04-08 14:51:20 +02:00
leonarski_f a87f4a56e4 CI: Try again upload
Build Packages / Build (push) Failing after 29s
2026-04-08 14:48:56 +02:00
leonarski_f 942e3ae532 CI: Try again upload
Build Packages / Build (push) Failing after 39s
2026-04-08 14:46:36 +02:00
leonarski_f 11c9b581f2 Fix TOKEN
Build Packages / Build (push) Failing after 31s
2026-04-08 14:35:28 +02:00
leonarski_f a7b81b7db0 Trying with tag and uploading release
Build Packages / Build (push) Failing after 29s
2026-04-08 13:32:09 +02:00
leonarski_f 959636cbc3 Add version 1.0.0 2026-04-08 13:05:52 +02:00
leonarski_f 60135d9471 Include Node.js in the docker (needed for gitea) 2026-04-08 13:04:54 +02:00
leonarski_f dd8bb91931 Add gitea process
Build Packages / Build (push) Successful in 29s
2026-04-08 12:49:39 +02:00
leonarski_f 15e8781a74 Use CMake to build the plugin and HDF5 (internally) 2026-04-08 12:39:50 +02:00
leonarski_f cfe032b731 Remove need for hdf5_hl.h (so no need for unsupported configuration of HDF5) 2026-04-08 12:38:52 +02:00
leonarski_f 6421de97bb Improve Makefile 2026-04-08 10:32:20 +02:00
leonarski_f 69470cd374 Update bitshuffle/lz4 code + embed bshuf_h5filter code into the library 2026-04-08 10:16:46 +02:00
CV-GPhL 2bcd3074cc Null terminate buffer in file.c
Ensure the buffer is null terminated after allocation - otherwise correct results from strcmp below is undefined.
2026-02-04 11:20:21 +01:00
Clemens Vonrhein cfe0e78319 loop over all filters registered when printing 2024-12-18 16:46:14 +00:00
Clemens Vonrhein d7801cdb54 modified reading of NX_class attribute - to accommodate both
DATATYPE  H5T_STRING {
      STRSIZE 14;
      STRPAD H5T_STR_NULLTERM;
      CSET H5T_CSET_ASCII;
      CTYPE H5T_C_S1;
   }

and

   DATATYPE  H5T_STRING {
      STRSIZE H5T_VARIABLE;
      STRPAD H5T_STR_NULLTERM;
      CSET H5T_CSET_UTF8;
      CTYPE H5T_C_S1;
   }
2024-12-17 09:59:55 +00:00
Clemens Vonrhein 4cf18ceff4 Mechanism to read bitshuffle-compressed pixel-mask data - by
loading/using the normal bitshuffle source from

  https://github.com/kiyo-masui/bitshuffle

(see Makefile).
2024-12-16 15:05:38 +00:00
Clemens Vonrhein 44ba3cd278 Provide a mechanism to distinguish between an image number (as
understood on the XDS side) and the image ordinal (as stored in the
HDF data).

The 2D data arrays are not necessarily at image_nr_low=1 ;-)
2024-12-16 14:57:32 +00:00
CV-GPhL ca8806c912 Merge pull request #2 from CV-GPhL/CV-GPhL-patch-1
Update plugin.c to write compilation time if available/set
2024-03-14 16:43:22 +01:00
CV-GPhL 163d5bdfb1 Update plugin.c to write compilation time if available/set 2024-03-14 16:42:52 +01:00
CV-GPhL ca2c0ee0e7 Merge pull request #1 from DiamondLightSource/master
Catching up 20240223
2024-02-23 12:55:55 +01:00
Graeme Winter 808f5fbd42 Do not depend on the strings being NULL terminated (#29)
Fixes #28

Instead make them NULL terminated by reading one longer into buffer
2023-08-09 09:42:16 +01:00
Clemens Vonrhein 77d2b84957 Better handling of (usually) unsigned data arrays when calling routine
to convert to (signed) int and apply pixel mask. The environment
variable DURIN_RESET_UNMASKED_PIXEL can now be used to set non-masked
saturated pixels in order to process them correctly with e.g. XDS.
2021-04-08 14:46:48 +01:00
Clemens Vonrhein 0f27832f78 Resolved merge conflicts 2021-04-06 12:33:21 +01:00
Clemens Vonrhein 9e7d609f42 allow for image offset via DURIN_IMAGE_NUMBER_OFFSET environment
variable
2020-06-12 15:57:14 +01:00
20 changed files with 4624 additions and 1475 deletions
+35
View File
@@ -0,0 +1,35 @@
name: Build Packages
on:
push:
branches:
- '**'
tags:
- '**'
jobs:
build-library:
name: Build
runs-on: durin
steps:
- uses: actions/checkout@v4
- name: Build library
shell: bash
run: |
mkdir -p build
cd build
cmake -DCMAKE_BUILD_TYPE=Release ..
make -j
- name: Upload release assets to Gitea
if: github.ref_type == 'tag'
shell: bash
env:
GITEA_TOKEN: ${{ secrets.PIP_REPOSITORY_API_TOKEN }}
run: |
set -euo pipefail
python tools/gitea_release_upload.py \
"${{ github.server_url }}" \
"${{ github.repository }}" \
"${{ github.ref_name }}"
+56
View File
@@ -0,0 +1,56 @@
CMAKE_MINIMUM_REQUIRED(VERSION 3.19)
PROJECT(durin VERSION 1.0.0 LANGUAGES C)
include(FetchContent)
SET(CMAKE_C_FLAGS_RELEASE "-O3")
SET(CMAKE_C_STANDARD 99)
SET(CMAKE_C_STANDARD_REQUIRED ON)
SET(CMAKE_C_EXTENSIONS OFF)
SET(CMAKE_POSITION_INDEPENDENT_CODE ON)
SET(BUILD_SHARED_LIBS OFF)
set(HDF5_USE_STATIC_LIBRARIES TRUE)
SET(HDF5_BUILD_HL_LIB OFF)
SET(HDF5_ENABLE_THREADSAFE ON)
SET(HDF5_ENABLE_SZIP_SUPPORT OFF)
SET(HDF5_ENABLE_SZIP_ENCODING OFF)
SET(HDF5_BUILD_EXAMPLES OFF)
SET(HDF5_BUILD_CPP_LIB OFF)
SET(HDF5_ENABLE_Z_LIB_SUPPORT OFF)
SET(HDF5_EXTERNALLY_CONFIGURED 1)
INCLUDE_DIRECTORIES(bslz4/src)
FetchContent_Declare(hdf5
URL https://github.com/HDFGroup/hdf5/releases/download/hdf5_1.14.6/hdf5-1.14.6.tar.gz
DOWNLOAD_EXTRACT_TIMESTAMP FALSE
EXCLUDE_FROM_ALL)
FetchContent_MakeAvailable(hdf5)
ADD_LIBRARY(durin-plugin SHARED
src/plugin.c src/plugin.h
src/err.c src/err.h
src/filters.c src/filters.h
src/file.c src/file.h bslz4/src/bitshuffle.c bslz4/src/bitshuffle.h
bslz4/src/bitshuffle_core.c bslz4/src/bitshuffle_core.h
bslz4/src/bitshuffle_internals.h
bslz4/src/bshuf_h5filter.c bslz4/src/bshuf_h5filter.h
bslz4/src/iochain.c bslz4/src/iochain.h
bslz4/src/lz4.c bslz4/src/lz4.h
)
set_target_properties(durin-plugin PROPERTIES VERSION 1.0.0)
TARGET_COMPILE_DEFINITIONS(durin-plugin PRIVATE
H5_USE_110_API
USE_BITSHUFFLE)
TARGET_LINK_LIBRARIES(durin-plugin PRIVATE hdf5-static)
+43
View File
@@ -0,0 +1,43 @@
FROM registry.access.redhat.com/ubi7/ubi
ARG HDF5_TAG="hdf5_1.14.6"
LABEL authors="Filip Leonarski"
ARG CMAKE_VERSION=3.31.6
ARG NODE_MAJOR=16
RUN yum -y update && \
yum -y install \
gcc \
gcc-c++ \
git \
make \
tar \
gzip \
curl && \
yum clean all
# Install a recent Node.js (NodeSource). Change NODE_MAJOR if you want another major version.
RUN curl -fsSL https://rpm.nodesource.com/setup_${NODE_MAJOR}.x | bash - && \
yum -y install nodejs && \
yum clean all && \
node --version && npm --version && (corepack enable || true)
RUN set -eux; \
arch="$(uname -m)"; \
case "$arch" in \
x86_64) cmake_arch="x86_64" ;; \
aarch64) cmake_arch="aarch64" ;; \
*) echo "Unsupported architecture: $arch"; exit 1 ;; \
esac; \
curl -L "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-${cmake_arch}.tar.gz" \
-o /tmp/cmake.tar.gz; \
tar -xzf /tmp/cmake.tar.gz -C /opt; \
ln -s "/opt/cmake-${CMAKE_VERSION}-linux-${cmake_arch}/bin/cmake" /usr/local/bin/cmake; \
ln -s "/opt/cmake-${CMAKE_VERSION}-linux-${cmake_arch}/bin/ctest" /usr/local/bin/ctest; \
ln -s "/opt/cmake-${CMAKE_VERSION}-linux-${cmake_arch}/bin/cpack" /usr/local/bin/cpack; \
rm -f /tmp/cmake.tar.gz
# Default entrypoint prints tool versions and hints.
CMD ["/bin/bash", "-l"]
-55
View File
@@ -1,55 +0,0 @@
BUILD_DIR ?= ./build
SRC_DIR = ./src
TEST_DIR = ./test
INC_DIR = $(SRC_DIR)
BSLZ4_SRC_DIR = ./bslz4/src
BSLZ4_BUILD_DIR = ./bslz4/build
BSLZ4_INC_DIR = $(BSLZ4_SRC_DIR)
CC=h5cc
CFLAGS=-DH5_USE_110_API -Wall -g -O2 -fpic -I$(INC_DIR) -I$(BSLZ4_INC_DIR) -std=c99 -shlib
.PHONY: plugin
plugin: $(BUILD_DIR)/durin-plugin.so
.PHONY: all
all: plugin example test_plugin
.PHONY: example
example: $(BUILD_DIR)/example
.PHONY: test_plugin
test_plugin: $(BUILD_DIR)/test_plugin
$(BUILD_DIR)/test_plugin: $(TEST_DIR)/generic_data_plugin.f90 $(TEST_DIR)/test_generic_host.f90
mkdir -p $(BUILD_DIR)
gfortran -O -g -fopenmp -ldl $(TEST_DIR)/generic_data_plugin.f90 $(TEST_DIR)/test_generic_host.f90 -o $@ -J$(BUILD_DIR)
$(BUILD_DIR)/%.o: $(SRC_DIR)/%.c
mkdir -p $(BUILD_DIR)
$(CC) $(CFLAGS) -c $< -o $@
$(BSLZ4_BUILD_DIR)/%.o: $(BSLZ4_SRC_DIR)/%.c
mkdir -p $(BSLZ4_BUILD_DIR)
$(CC) $(CFLAGS) -c $< -o $@
$(BUILD_DIR)/bslz4.a: $(BSLZ4_BUILD_DIR)/lz4.o $(BSLZ4_BUILD_DIR)/bitshuffle.o \
$(BSLZ4_BUILD_DIR)/bitshuffle_core.o $(BSLZ4_BUILD_DIR)/iochain.o
mkdir -p $(BUILD_DIR)
ar rcs $@ $^
$(BUILD_DIR)/durin-plugin.so: $(BUILD_DIR)/plugin.o $(BUILD_DIR)/file.o $(BUILD_DIR)/err.o $(BUILD_DIR)/filters.o \
$(BUILD_DIR)/bslz4.a
mkdir -p $(BUILD_DIR)
$(CC) $(CFLAGS) -shared -noshlib $^ -o $(BUILD_DIR)/durin-plugin.so
$(BUILD_DIR)/example: $(BUILD_DIR)/test.o $(BUILD_DIR)/file.o $(BUILD_DIR)/err.o $(BUILD_DIR)/filters.o \
$(BUILD_DIR)/bslz4.a
mkdir -p $(BUILD_DIR)
$(CC) $(CFLAGS) $^ -o $(BUILD_DIR)/example
.PHONY: clean
clean:
rm -r $(BUILD_DIR)
rm -r $(BSLZ4_BUILD_DIR)
+22 -45
View File
@@ -8,14 +8,24 @@ See:
* https://www.dectris.com/features/features-eiger-x/hdf5-and-nexus
* https://strucbio.biologie.uni-konstanz.de/xdswiki
## Get Durin
## Paul Scherrer Institute fork
This fork is maintained by Paul Scherrer Institute.
The plugin is based on the code developed by the Diamond Light Source and modified by Global Phasing.
Modifications from PSI side:
* Using CMake for building the plugin
* HDF5 is built as part of the CMake process
* Bitshuffle/LZ4 is updated to the latest version
* HDF5 filter is automatically registered for virtual dataset HDF5 files
* Docker image to build the plugin on x86/RH7 is provided.
* Generated versioned shared library, so the used version can be tracked.
Latest release at https://github.com/DiamondLightSource/durin/releases/tag/2019v1
## Get Durin
Linux x86 version is automatically built on RHEL 7 and available from the Gitea release page.
## Usage
In your XDS.INP add:
```
LIB=[path to durin-plugin.so]
LIB=[path to libdurin-plugin.so]
NAME_TEMPLATE_OF_DATA_FRAMES=[data_path]/data_images_??????.h5
```
XDS will instruct the plugin to load `[data_path]/data_images_master.h5` and this must be the
@@ -27,55 +37,25 @@ the master file contains an `NXdata` or `NXdetector` group with either a dataset
series of datasets named `data_000001`, `data_000002`, etc.
## Requirements
* HDF5 Library (https://www.hdfgroup.org/downloads)
## Building
### Building HDF5 library
The HDF5 library used when building durin must have been compiled with specific switches enabled
to allow the durin plugin to be built and used.
Download the HDF5 source code (https://www.hdfgroup.org/downloads/hdf5/source-code) and extract
to any directory (referred to as `/hdf5_dir`), and run the following commands.
Requires CMake version 3.19 or later + GCC compiler. There is no need to build HDF5 separately.
To build:
```
cd /hdf5_dir
mkdir build
cd build
export CFLAGS=-fPIC
../configure --enable-threadsafe --enable-deprecated-symbols --enable-hl --enable-unsupported
make
make check
make install
cmake ..
make -j
```
The hdf5 tools and libraries should now be located in `/hdf5_dir/build/hdf5`
For reference, the plugin requires the thread-safe switch and the optimised chunk read function.
The chunk read function may be defined in the high level library instead of the regular library,
depending on the exact HDF5 version downloaded (hence the --enable-deprecated-symbols _and_ --enable-hl).
The unsupported flag enables building with both threadsafe and high-level enabled.
### Building durin plugin
The plugin makefile will use the "h5cc" compiler wrapper, provided by the HDF5 library, which
must be on your PATH.
Download or clone the plugin source code (https://github.com/DiamondLightSource/durin)
into any directory (referred to as `/durin_dir`) and run the following commands.
```
cd /durin_dir
PATH=/hdf5_dir/build/hdf5/bin:$PATH
make
```
The plugin is located at `/durin_dir/build/durin-plugin.so` and should be added to the
XDS.INP file as `LIB=/durin_dir/build/durin-plugin.so`
The plugin is located at `build/libdurin-plugin.so` and should be added to the
XDS.INP file as `LIB=<CURRENT_DIRECTORY>/build/libdurin-plugin.so`.
Alternatively, versioned copy is also provided, e.g. `libdurin-plugin.so.1.0.0`, allowing to track
the current version of the plugin.
## Example XDS.INP
```
DETECTOR=PILATUS MINIMUM_VALID_PIXEL_VALUE=0 OVERLOAD=4096
LIB=/opt/durin/build/durin-plugin.so
LIB=/opt/durin/build/libdurin-plugin.so
SENSOR_THICKNESS= 0.450
!SENSOR_MATERIAL / THICKNESS Si 0.450
!SILICON= 3.953379
@@ -95,6 +75,3 @@ TRUSTED_REGION= 0.0 1.41
DATA_RANGE= 1 600
JOB=XYCORR INIT COLSPOT IDXREF DEFPIX INTEGRATE CORRECT
```
N.B. the master file is needed, not the .nxs one which follows the
standard.
+139 -25
View File
@@ -14,23 +14,22 @@
#include "bitshuffle_internals.h"
#include "lz4.h"
#ifdef ZSTD_SUPPORT
#include "zstd.h"
#endif
#include <stdio.h>
#include <string.h>
#define BSHUF_LZ4_DECOMPRESS_FAST
// Macros.
#define CHECK_ERR_FREE_LZ(count, buf) if (count < 0) { \
free(buf); return count - 1000; }
/* Bitshuffle and compress a single block. */
int64_t bshuf_compress_lz4_block(ioc_chain *C_ptr, \
const size_t size, const size_t elem_size) {
const size_t size, const size_t elem_size, const int option) {
int64_t nbytes, count;
void *tmp_buf_bshuf;
@@ -42,7 +41,8 @@ int64_t bshuf_compress_lz4_block(ioc_chain *C_ptr, \
tmp_buf_bshuf = malloc(size * elem_size);
if (tmp_buf_bshuf == NULL) return -1;
tmp_buf_lz4 = malloc(LZ4_compressBound(size * elem_size));
int dst_capacity = LZ4_compressBound(size * elem_size);
tmp_buf_lz4 = malloc(dst_capacity);
if (tmp_buf_lz4 == NULL){
free(tmp_buf_bshuf);
return -1;
@@ -58,7 +58,7 @@ int64_t bshuf_compress_lz4_block(ioc_chain *C_ptr, \
free(tmp_buf_bshuf);
return count;
}
nbytes = LZ4_compress((const char*) tmp_buf_bshuf, (char*) tmp_buf_lz4, size * elem_size);
nbytes = LZ4_compress_default((const char*) tmp_buf_bshuf, (char*) tmp_buf_lz4, size * elem_size, dst_capacity);
free(tmp_buf_bshuf);
CHECK_ERR_FREE_LZ(nbytes, tmp_buf_lz4);
@@ -76,7 +76,7 @@ int64_t bshuf_compress_lz4_block(ioc_chain *C_ptr, \
/* Decompress and bitunshuffle a single block. */
int64_t bshuf_decompress_lz4_block(ioc_chain *C_ptr,
const size_t size, const size_t elem_size) {
const size_t size, const size_t elem_size, const int option) {
int64_t nbytes, count;
void *out, *tmp_buf;
@@ -96,14 +96,6 @@ int64_t bshuf_decompress_lz4_block(ioc_chain *C_ptr,
tmp_buf = malloc(size * elem_size);
if (tmp_buf == NULL) return -1;
#ifdef BSHUF_LZ4_DECOMPRESS_FAST
nbytes = LZ4_decompress_fast((const char*) in + 4, (char*) tmp_buf, size * elem_size);
CHECK_ERR_FREE_LZ(nbytes, tmp_buf);
if (nbytes != nbytes_from_header) {
free(tmp_buf);
return -91;
}
#else
nbytes = LZ4_decompress_safe((const char*) in + 4, (char *) tmp_buf, nbytes_from_header,
size * elem_size);
CHECK_ERR_FREE_LZ(nbytes, tmp_buf);
@@ -112,7 +104,7 @@ int64_t bshuf_decompress_lz4_block(ioc_chain *C_ptr,
return -91;
}
nbytes = nbytes_from_header;
#endif
count = bshuf_untrans_bit_elem(tmp_buf, out, size, elem_size);
CHECK_ERR_FREE(count, tmp_buf);
nbytes += 4;
@@ -121,6 +113,92 @@ int64_t bshuf_decompress_lz4_block(ioc_chain *C_ptr,
return nbytes;
}
#ifdef ZSTD_SUPPORT
/* Bitshuffle and compress a single block. */
int64_t bshuf_compress_zstd_block(ioc_chain *C_ptr, \
const size_t size, const size_t elem_size, const int comp_lvl) {
int64_t nbytes, count;
void *tmp_buf_bshuf;
void *tmp_buf_zstd;
size_t this_iter;
const void *in;
void *out;
tmp_buf_bshuf = malloc(size * elem_size);
if (tmp_buf_bshuf == NULL) return -1;
size_t tmp_buf_zstd_size = ZSTD_compressBound(size * elem_size);
tmp_buf_zstd = malloc(tmp_buf_zstd_size);
if (tmp_buf_zstd == NULL){
free(tmp_buf_bshuf);
return -1;
}
in = ioc_get_in(C_ptr, &this_iter);
ioc_set_next_in(C_ptr, &this_iter, (void*) ((char*) in + size * elem_size));
count = bshuf_trans_bit_elem(in, tmp_buf_bshuf, size, elem_size);
if (count < 0) {
free(tmp_buf_zstd);
free(tmp_buf_bshuf);
return count;
}
nbytes = ZSTD_compress(tmp_buf_zstd, tmp_buf_zstd_size, (const void*)tmp_buf_bshuf, size * elem_size, comp_lvl);
free(tmp_buf_bshuf);
CHECK_ERR_FREE_LZ(nbytes, tmp_buf_zstd);
out = ioc_get_out(C_ptr, &this_iter);
ioc_set_next_out(C_ptr, &this_iter, (void *) ((char *) out + nbytes + 4));
bshuf_write_uint32_BE(out, nbytes);
memcpy((char *) out + 4, tmp_buf_zstd, nbytes);
free(tmp_buf_zstd);
return nbytes + 4;
}
/* Decompress and bitunshuffle a single block. */
int64_t bshuf_decompress_zstd_block(ioc_chain *C_ptr,
const size_t size, const size_t elem_size, const int option) {
int64_t nbytes, count;
void *out, *tmp_buf;
const void *in;
size_t this_iter;
int32_t nbytes_from_header;
in = ioc_get_in(C_ptr, &this_iter);
nbytes_from_header = bshuf_read_uint32_BE(in);
ioc_set_next_in(C_ptr, &this_iter,
(void*) ((char*) in + nbytes_from_header + 4));
out = ioc_get_out(C_ptr, &this_iter);
ioc_set_next_out(C_ptr, &this_iter,
(void *) ((char *) out + size * elem_size));
tmp_buf = malloc(size * elem_size);
if (tmp_buf == NULL) return -1;
nbytes = ZSTD_decompress(tmp_buf, size * elem_size, (void *)((char *) in + 4), nbytes_from_header);
CHECK_ERR_FREE_LZ(nbytes, tmp_buf);
if (nbytes != size * elem_size) {
free(tmp_buf);
return -91;
}
nbytes = nbytes_from_header;
count = bshuf_untrans_bit_elem(tmp_buf, out, size, elem_size);
CHECK_ERR_FREE(count, tmp_buf);
nbytes += 4;
free(tmp_buf);
return nbytes;
}
#endif // ZSTD_SUPPORT
/* ---- Public functions ----
*
@@ -138,13 +216,13 @@ size_t bshuf_compress_lz4_bound(const size_t size,
}
if (block_size % BSHUF_BLOCKED_MULT) return -81;
// Note that each block gets a 4 byte header.
// Size of full blocks.
bound = (LZ4_compressBound(block_size * elem_size) + 4) * (size / block_size);
// Size of partial blocks, if any.
leftover = ((size % block_size) / BSHUF_BLOCKED_MULT) * BSHUF_BLOCKED_MULT;
if (leftover) bound += LZ4_compressBound(leftover * elem_size) + 4;
// Size of uncompressed data not fitting into any blocks.
bound += (size % BSHUF_BLOCKED_MULT) * elem_size;
return bound;
}
@@ -153,13 +231,49 @@ size_t bshuf_compress_lz4_bound(const size_t size,
int64_t bshuf_compress_lz4(const void* in, void* out, const size_t size,
const size_t elem_size, size_t block_size) {
return bshuf_blocked_wrap_fun(&bshuf_compress_lz4_block, in, out, size,
elem_size, block_size);
elem_size, block_size, 0/*option*/);
}
int64_t bshuf_decompress_lz4(const void* in, void* out, const size_t size,
const size_t elem_size, size_t block_size) {
return bshuf_blocked_wrap_fun(&bshuf_decompress_lz4_block, in, out, size,
elem_size, block_size);
elem_size, block_size, 0/*option*/);
}
#ifdef ZSTD_SUPPORT
size_t bshuf_compress_zstd_bound(const size_t size,
const size_t elem_size, size_t block_size) {
size_t bound, leftover;
if (block_size == 0) {
block_size = bshuf_default_block_size(elem_size);
}
if (block_size % BSHUF_BLOCKED_MULT) return -81;
// Note that each block gets a 4 byte header.
// Size of full blocks.
bound = (ZSTD_compressBound(block_size * elem_size) + 4) * (size / block_size);
// Size of partial blocks, if any.
leftover = ((size % block_size) / BSHUF_BLOCKED_MULT) * BSHUF_BLOCKED_MULT;
if (leftover) bound += ZSTD_compressBound(leftover * elem_size) + 4;
// Size of uncompressed data not fitting into any blocks.
bound += (size % BSHUF_BLOCKED_MULT) * elem_size;
return bound;
}
int64_t bshuf_compress_zstd(const void* in, void* out, const size_t size,
const size_t elem_size, size_t block_size, const int comp_lvl) {
return bshuf_blocked_wrap_fun(&bshuf_compress_zstd_block, in, out, size,
elem_size, block_size, comp_lvl);
}
int64_t bshuf_decompress_zstd(const void* in, void* out, const size_t size,
const size_t elem_size, size_t block_size) {
return bshuf_blocked_wrap_fun(&bshuf_decompress_zstd_block, in, out, size,
elem_size, block_size, 0/*option*/);
}
#endif // ZSTD_SUPPORT
+89 -7
View File
@@ -35,6 +35,10 @@
extern "C" {
#endif
/*
* ---- LZ4 Interface ----
*/
/* ---- bshuf_compress_lz4_bound ----
*
* Bound on size of data compressed with *bshuf_compress_lz4*.
@@ -94,11 +98,6 @@ int64_t bshuf_compress_lz4(const void* in, void* out, const size_t size, const s
* To properly unshuffle bitshuffled data, *size*, *elem_size* and *block_size*
* must patch the parameters used to compress the data.
*
* NOT TO BE USED WITH UNTRUSTED DATA: This routine uses the function
* LZ4_decompress_fast from LZ4, which does not protect against maliciously
* formed datasets. By modifying the compressed data, this function could be
* coerced into leaving the boundaries of the input buffer.
*
* Parameters
* ----------
* in : input buffer
@@ -116,8 +115,91 @@ int64_t bshuf_compress_lz4(const void* in, void* out, const size_t size, const s
int64_t bshuf_decompress_lz4(const void* in, void* out, const size_t size,
const size_t elem_size, size_t block_size);
/*
* ---- ZSTD Interface ----
*/
#ifdef ZSTD_SUPPORT
/* ---- bshuf_compress_zstd_bound ----
*
* Bound on size of data compressed with *bshuf_compress_zstd*.
*
* Parameters
* ----------
* size : number of elements in input
* elem_size : element size of typed data
* block_size : Process in blocks of this many elements. Pass 0 to
* select automatically (recommended).
*
* Returns
* -------
* Bound on compressed data size.
*
*/
size_t bshuf_compress_zstd_bound(const size_t size,
const size_t elem_size, size_t block_size);
/* ---- bshuf_compress_zstd ----
*
* Bitshuffled and compress the data using zstd.
*
* Transpose within elements, in blocks of data of *block_size* elements then
* compress the blocks using ZSTD. In the output buffer, each block is prefixed
* by a 4 byte integer giving the compressed size of that block.
*
* Output buffer must be large enough to hold the compressed data. This could
* be in principle substantially larger than the input buffer. Use the routine
* *bshuf_compress_zstd_bound* to get an upper limit.
*
* Parameters
* ----------
* in : input buffer, must be of size * elem_size bytes
* out : output buffer, must be large enough to hold data.
* size : number of elements in input
* elem_size : element size of typed data
* block_size : Process in blocks of this many elements. Pass 0 to
* select automatically (recommended).
* comp_lvl : compression level applied
*
* Returns
* -------
* number of bytes used in output buffer, negative error-code if failed.
*
*/
int64_t bshuf_compress_zstd(const void* in, void* out, const size_t size, const size_t
elem_size, size_t block_size, const int comp_lvl);
/* ---- bshuf_decompress_zstd ----
*
* Undo compression and bitshuffling.
*
* Decompress data then un-bitshuffle it in blocks of *block_size* elements.
*
* To properly unshuffle bitshuffled data, *size*, *elem_size* and *block_size*
* must patch the parameters used to compress the data.
*
* Parameters
* ----------
* in : input buffer
* out : output buffer, must be of size * elem_size bytes
* size : number of elements in input
* elem_size : element size of typed data
* block_size : Process in blocks of this many elements. Pass 0 to
* select automatically (recommended).
*
* Returns
* -------
* number of bytes consumed in *input* buffer, negative error-code if failed.
*
*/
int64_t bshuf_decompress_zstd(const void* in, void* out, const size_t size,
const size_t elem_size, size_t block_size);
#endif // ZSTD_SUPPORT
#ifdef __cplusplus
}
} // extern "C"
#endif
#endif
#endif // BITSHUFFLE_H
+742 -39
View File
@@ -16,30 +16,55 @@
#include <string.h>
#if defined(__AVX512F__) && defined (__AVX512BW__) && defined(__AVX2__) && defined(__SSE2__)
#define USEAVX512
#endif
#if defined(__AVX2__) && defined (__SSE2__)
#define USEAVX2
#endif
#if defined(__SSE2__)
#if defined(__SSE2__) || defined(NO_WARN_X86_INTRINSICS)
#define USESSE2
#endif
#if defined(__ARM_NEON__) || (__ARM_NEON)
#ifdef __aarch64__
#define USEARMNEON
#endif
#endif
// Conditional includes for SSE2 and AVX2.
#ifdef USEAVX2
#include <immintrin.h>
#elif defined USESSE2
#include <emmintrin.h>
#elif defined USEARMNEON
#include <arm_neon.h>
#endif
#if defined(_OPENMP) && defined(_MSC_VER)
typedef int64_t omp_size_t;
#else
typedef size_t omp_size_t;
#endif
// Macros.
#define CHECK_MULT_EIGHT(n) if (n % 8) return -80;
#define MAX(X,Y) ((X) > (Y) ? (X) : (Y))
/* ---- Functions indicating compile time instruction set. ---- */
int bshuf_using_NEON(void) {
#ifdef USEARMNEON
return 1;
#else
return 0;
#endif
}
int bshuf_using_SSE2(void) {
#ifdef USESSE2
return 1;
@@ -58,6 +83,14 @@ int bshuf_using_AVX2(void) {
}
int bshuf_using_AVX512(void) {
#ifdef USEAVX512
return 1;
#else
return 0;
#endif
}
/* ---- Worker code not requiring special instruction sets. ----
*
* The following code does not use any x86 specific vectorized instructions
@@ -131,8 +164,8 @@ int64_t bshuf_trans_byte_elem_remainder(const void* in, void* out, const size_t
CHECK_MULT_EIGHT(start);
if (size > start) {
// ii loop separated into 2 loops so the compiler can unroll
// the inner one.
for (ii = start; ii + 7 < size; ii += 8) {
for (jj = 0; jj < elem_size; jj++) {
for (kk = 0; kk < 8; kk++) {
@@ -348,10 +381,516 @@ int64_t bshuf_untrans_bit_elem_scal(const void* in, void* out, const size_t size
}
/* ---- Worker code that uses Arm NEON ----
*
* The following code makes use of the Arm NEON instruction set.
* NEON technology is the implementation of the ARM Advanced Single
* Instruction Multiple Data (SIMD) extension.
* The NEON unit is the component of the processor that executes SIMD instructions.
* It is also called the NEON Media Processing Engine (MPE).
*
*/
#ifdef USEARMNEON
/* Transpose bytes within elements for 16 bit elements. */
int64_t bshuf_trans_byte_elem_NEON_16(const void* in, void* out, const size_t size) {
size_t ii;
const char *in_b = (const char*) in;
char *out_b = (char*) out;
int8x16_t a0, b0, a1, b1;
for (ii=0; ii + 15 < size; ii += 16) {
a0 = vld1q_s8(in_b + 2*ii + 0*16);
b0 = vld1q_s8(in_b + 2*ii + 1*16);
a1 = vzip1q_s8(a0, b0);
b1 = vzip2q_s8(a0, b0);
a0 = vzip1q_s8(a1, b1);
b0 = vzip2q_s8(a1, b1);
a1 = vzip1q_s8(a0, b0);
b1 = vzip2q_s8(a0, b0);
a0 = vzip1q_s8(a1, b1);
b0 = vzip2q_s8(a1, b1);
vst1q_s8(out_b + 0*size + ii, a0);
vst1q_s8(out_b + 1*size + ii, b0);
}
return bshuf_trans_byte_elem_remainder(in, out, size, 2,
size - size % 16);
}
/* Transpose bytes within elements for 32 bit elements. */
int64_t bshuf_trans_byte_elem_NEON_32(const void* in, void* out, const size_t size) {
size_t ii;
const char *in_b;
char *out_b;
in_b = (const char*) in;
out_b = (char*) out;
int8x16_t a0, b0, c0, d0, a1, b1, c1, d1;
int64x2_t a2, b2, c2, d2;
for (ii=0; ii + 15 < size; ii += 16) {
a0 = vld1q_s8(in_b + 4*ii + 0*16);
b0 = vld1q_s8(in_b + 4*ii + 1*16);
c0 = vld1q_s8(in_b + 4*ii + 2*16);
d0 = vld1q_s8(in_b + 4*ii + 3*16);
a1 = vzip1q_s8(a0, b0);
b1 = vzip2q_s8(a0, b0);
c1 = vzip1q_s8(c0, d0);
d1 = vzip2q_s8(c0, d0);
a0 = vzip1q_s8(a1, b1);
b0 = vzip2q_s8(a1, b1);
c0 = vzip1q_s8(c1, d1);
d0 = vzip2q_s8(c1, d1);
a1 = vzip1q_s8(a0, b0);
b1 = vzip2q_s8(a0, b0);
c1 = vzip1q_s8(c0, d0);
d1 = vzip2q_s8(c0, d0);
a2 = vzip1q_s64(vreinterpretq_s64_s8(a1), vreinterpretq_s64_s8(c1));
b2 = vzip2q_s64(vreinterpretq_s64_s8(a1), vreinterpretq_s64_s8(c1));
c2 = vzip1q_s64(vreinterpretq_s64_s8(b1), vreinterpretq_s64_s8(d1));
d2 = vzip2q_s64(vreinterpretq_s64_s8(b1), vreinterpretq_s64_s8(d1));
vst1q_s64((int64_t *) (out_b + 0*size + ii), a2);
vst1q_s64((int64_t *) (out_b + 1*size + ii), b2);
vst1q_s64((int64_t *) (out_b + 2*size + ii), c2);
vst1q_s64((int64_t *) (out_b + 3*size + ii), d2);
}
return bshuf_trans_byte_elem_remainder(in, out, size, 4,
size - size % 16);
}
/* Transpose bytes within elements for 64 bit elements. */
int64_t bshuf_trans_byte_elem_NEON_64(const void* in, void* out, const size_t size) {
size_t ii;
const char* in_b = (const char*) in;
char* out_b = (char*) out;
int8x16_t a0, b0, c0, d0, e0, f0, g0, h0;
int8x16_t a1, b1, c1, d1, e1, f1, g1, h1;
for (ii=0; ii + 15 < size; ii += 16) {
a0 = vld1q_s8(in_b + 8*ii + 0*16);
b0 = vld1q_s8(in_b + 8*ii + 1*16);
c0 = vld1q_s8(in_b + 8*ii + 2*16);
d0 = vld1q_s8(in_b + 8*ii + 3*16);
e0 = vld1q_s8(in_b + 8*ii + 4*16);
f0 = vld1q_s8(in_b + 8*ii + 5*16);
g0 = vld1q_s8(in_b + 8*ii + 6*16);
h0 = vld1q_s8(in_b + 8*ii + 7*16);
a1 = vzip1q_s8 (a0, b0);
b1 = vzip2q_s8 (a0, b0);
c1 = vzip1q_s8 (c0, d0);
d1 = vzip2q_s8 (c0, d0);
e1 = vzip1q_s8 (e0, f0);
f1 = vzip2q_s8 (e0, f0);
g1 = vzip1q_s8 (g0, h0);
h1 = vzip2q_s8 (g0, h0);
a0 = vzip1q_s8 (a1, b1);
b0 = vzip2q_s8 (a1, b1);
c0 = vzip1q_s8 (c1, d1);
d0 = vzip2q_s8 (c1, d1);
e0 = vzip1q_s8 (e1, f1);
f0 = vzip2q_s8 (e1, f1);
g0 = vzip1q_s8 (g1, h1);
h0 = vzip2q_s8 (g1, h1);
a1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (a0), vreinterpretq_s32_s8 (c0));
b1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (a0), vreinterpretq_s32_s8 (c0));
c1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (b0), vreinterpretq_s32_s8 (d0));
d1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (b0), vreinterpretq_s32_s8 (d0));
e1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (e0), vreinterpretq_s32_s8 (g0));
f1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (e0), vreinterpretq_s32_s8 (g0));
g1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (f0), vreinterpretq_s32_s8 (h0));
h1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (f0), vreinterpretq_s32_s8 (h0));
a0 = (int8x16_t) vzip1q_s64 (vreinterpretq_s64_s8 (a1), vreinterpretq_s64_s8 (e1));
b0 = (int8x16_t) vzip2q_s64 (vreinterpretq_s64_s8 (a1), vreinterpretq_s64_s8 (e1));
c0 = (int8x16_t) vzip1q_s64 (vreinterpretq_s64_s8 (b1), vreinterpretq_s64_s8 (f1));
d0 = (int8x16_t) vzip2q_s64 (vreinterpretq_s64_s8 (b1), vreinterpretq_s64_s8 (f1));
e0 = (int8x16_t) vzip1q_s64 (vreinterpretq_s64_s8 (c1), vreinterpretq_s64_s8 (g1));
f0 = (int8x16_t) vzip2q_s64 (vreinterpretq_s64_s8 (c1), vreinterpretq_s64_s8 (g1));
g0 = (int8x16_t) vzip1q_s64 (vreinterpretq_s64_s8 (d1), vreinterpretq_s64_s8 (h1));
h0 = (int8x16_t) vzip2q_s64 (vreinterpretq_s64_s8 (d1), vreinterpretq_s64_s8 (h1));
vst1q_s8(out_b + 0*size + ii, a0);
vst1q_s8(out_b + 1*size + ii, b0);
vst1q_s8(out_b + 2*size + ii, c0);
vst1q_s8(out_b + 3*size + ii, d0);
vst1q_s8(out_b + 4*size + ii, e0);
vst1q_s8(out_b + 5*size + ii, f0);
vst1q_s8(out_b + 6*size + ii, g0);
vst1q_s8(out_b + 7*size + ii, h0);
}
return bshuf_trans_byte_elem_remainder(in, out, size, 8,
size - size % 16);
}
/* Transpose bytes within elements using best NEON algorithm available. */
int64_t bshuf_trans_byte_elem_NEON(const void* in, void* out, const size_t size,
const size_t elem_size) {
int64_t count;
// Trivial cases: power of 2 bytes.
switch (elem_size) {
case 1:
count = bshuf_copy(in, out, size, elem_size);
return count;
case 2:
count = bshuf_trans_byte_elem_NEON_16(in, out, size);
return count;
case 4:
count = bshuf_trans_byte_elem_NEON_32(in, out, size);
return count;
case 8:
count = bshuf_trans_byte_elem_NEON_64(in, out, size);
return count;
}
// Worst case: odd number of bytes. Turns out that this is faster for
// (odd * 2) byte elements as well (hence % 4).
if (elem_size % 4) {
count = bshuf_trans_byte_elem_scal(in, out, size, elem_size);
return count;
}
// Multiple of power of 2: transpose hierarchically.
{
size_t nchunk_elem;
void* tmp_buf = malloc(size * elem_size);
if (tmp_buf == NULL) return -1;
if ((elem_size % 8) == 0) {
nchunk_elem = elem_size / 8;
TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int64_t);
count = bshuf_trans_byte_elem_NEON_64(out, tmp_buf,
size * nchunk_elem);
bshuf_trans_elem(tmp_buf, out, 8, nchunk_elem, size);
} else if ((elem_size % 4) == 0) {
nchunk_elem = elem_size / 4;
TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int32_t);
count = bshuf_trans_byte_elem_NEON_32(out, tmp_buf,
size * nchunk_elem);
bshuf_trans_elem(tmp_buf, out, 4, nchunk_elem, size);
} else {
// Not used since scalar algorithm is faster.
nchunk_elem = elem_size / 2;
TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int16_t);
count = bshuf_trans_byte_elem_NEON_16(out, tmp_buf,
size * nchunk_elem);
bshuf_trans_elem(tmp_buf, out, 2, nchunk_elem, size);
}
free(tmp_buf);
return count;
}
}
/* Creates a mask made up of the most significant
* bit of each byte of 'input'
*/
int32_t move_byte_mask_neon(uint8x16_t input) {
return ( ((input[0] & 0x80) >> 7) | (((input[1] & 0x80) >> 7) << 1) | (((input[2] & 0x80) >> 7) << 2) | (((input[3] & 0x80) >> 7) << 3)
| (((input[4] & 0x80) >> 7) << 4) | (((input[5] & 0x80) >> 7) << 5) | (((input[6] & 0x80) >> 7) << 6) | (((input[7] & 0x80) >> 7) << 7)
| (((input[8] & 0x80) >> 7) << 8) | (((input[9] & 0x80) >> 7) << 9) | (((input[10] & 0x80) >> 7) << 10) | (((input[11] & 0x80) >> 7) << 11)
| (((input[12] & 0x80) >> 7) << 12) | (((input[13] & 0x80) >> 7) << 13) | (((input[14] & 0x80) >> 7) << 14) | (((input[15] & 0x80) >> 7) << 15)
);
}
/* Transpose bits within bytes. */
int64_t bshuf_trans_bit_byte_NEON(const void* in, void* out, const size_t size,
const size_t elem_size) {
size_t ii, kk;
const char* in_b = (const char*) in;
char* out_b = (char*) out;
uint16_t* out_ui16;
int64_t count;
size_t nbyte = elem_size * size;
CHECK_MULT_EIGHT(nbyte);
int16x8_t xmm;
int32_t bt;
for (ii = 0; ii + 15 < nbyte; ii += 16) {
xmm = vld1q_s16((int16_t *) (in_b + ii));
for (kk = 0; kk < 8; kk++) {
bt = move_byte_mask_neon((uint8x16_t) xmm);
xmm = vshlq_n_s16(xmm, 1);
out_ui16 = (uint16_t*) &out_b[((7 - kk) * nbyte + ii) / 8];
*out_ui16 = bt;
}
}
count = bshuf_trans_bit_byte_remainder(in, out, size, elem_size,
nbyte - nbyte % 16);
return count;
}
/* Transpose bits within elements. */
int64_t bshuf_trans_bit_elem_NEON(const void* in, void* out, const size_t size,
const size_t elem_size) {
int64_t count;
CHECK_MULT_EIGHT(size);
void* tmp_buf = malloc(size * elem_size);
if (tmp_buf == NULL) return -1;
count = bshuf_trans_byte_elem_NEON(in, out, size, elem_size);
CHECK_ERR_FREE(count, tmp_buf);
count = bshuf_trans_bit_byte_NEON(out, tmp_buf, size, elem_size);
CHECK_ERR_FREE(count, tmp_buf);
count = bshuf_trans_bitrow_eight(tmp_buf, out, size, elem_size);
free(tmp_buf);
return count;
}
/* For data organized into a row for each bit (8 * elem_size rows), transpose
* the bytes. */
int64_t bshuf_trans_byte_bitrow_NEON(const void* in, void* out, const size_t size,
const size_t elem_size) {
size_t ii, jj;
const char* in_b = (const char*) in;
char* out_b = (char*) out;
CHECK_MULT_EIGHT(size);
size_t nrows = 8 * elem_size;
size_t nbyte_row = size / 8;
int8x16_t a0, b0, c0, d0, e0, f0, g0, h0;
int8x16_t a1, b1, c1, d1, e1, f1, g1, h1;
int64x1_t *as, *bs, *cs, *ds, *es, *fs, *gs, *hs;
for (ii = 0; ii + 7 < nrows; ii += 8) {
for (jj = 0; jj + 15 < nbyte_row; jj += 16) {
a0 = vld1q_s8(in_b + (ii + 0)*nbyte_row + jj);
b0 = vld1q_s8(in_b + (ii + 1)*nbyte_row + jj);
c0 = vld1q_s8(in_b + (ii + 2)*nbyte_row + jj);
d0 = vld1q_s8(in_b + (ii + 3)*nbyte_row + jj);
e0 = vld1q_s8(in_b + (ii + 4)*nbyte_row + jj);
f0 = vld1q_s8(in_b + (ii + 5)*nbyte_row + jj);
g0 = vld1q_s8(in_b + (ii + 6)*nbyte_row + jj);
h0 = vld1q_s8(in_b + (ii + 7)*nbyte_row + jj);
a1 = vzip1q_s8(a0, b0);
b1 = vzip1q_s8(c0, d0);
c1 = vzip1q_s8(e0, f0);
d1 = vzip1q_s8(g0, h0);
e1 = vzip2q_s8(a0, b0);
f1 = vzip2q_s8(c0, d0);
g1 = vzip2q_s8(e0, f0);
h1 = vzip2q_s8(g0, h0);
a0 = (int8x16_t) vzip1q_s16 (vreinterpretq_s16_s8 (a1), vreinterpretq_s16_s8 (b1));
b0= (int8x16_t) vzip1q_s16 (vreinterpretq_s16_s8 (c1), vreinterpretq_s16_s8 (d1));
c0 = (int8x16_t) vzip2q_s16 (vreinterpretq_s16_s8 (a1), vreinterpretq_s16_s8 (b1));
d0 = (int8x16_t) vzip2q_s16 (vreinterpretq_s16_s8 (c1), vreinterpretq_s16_s8 (d1));
e0 = (int8x16_t) vzip1q_s16 (vreinterpretq_s16_s8 (e1), vreinterpretq_s16_s8 (f1));
f0 = (int8x16_t) vzip1q_s16 (vreinterpretq_s16_s8 (g1), vreinterpretq_s16_s8 (h1));
g0 = (int8x16_t) vzip2q_s16 (vreinterpretq_s16_s8 (e1), vreinterpretq_s16_s8 (f1));
h0 = (int8x16_t) vzip2q_s16 (vreinterpretq_s16_s8 (g1), vreinterpretq_s16_s8 (h1));
a1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (a0), vreinterpretq_s32_s8 (b0));
b1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (a0), vreinterpretq_s32_s8 (b0));
c1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (c0), vreinterpretq_s32_s8 (d0));
d1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (c0), vreinterpretq_s32_s8 (d0));
e1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (e0), vreinterpretq_s32_s8 (f0));
f1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (e0), vreinterpretq_s32_s8 (f0));
g1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (g0), vreinterpretq_s32_s8 (h0));
h1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (g0), vreinterpretq_s32_s8 (h0));
as = (int64x1_t *) &a1;
bs = (int64x1_t *) &b1;
cs = (int64x1_t *) &c1;
ds = (int64x1_t *) &d1;
es = (int64x1_t *) &e1;
fs = (int64x1_t *) &f1;
gs = (int64x1_t *) &g1;
hs = (int64x1_t *) &h1;
vst1_s64((int64_t *)(out_b + (jj + 0) * nrows + ii), *as);
vst1_s64((int64_t *)(out_b + (jj + 1) * nrows + ii), *(as + 1));
vst1_s64((int64_t *)(out_b + (jj + 2) * nrows + ii), *bs);
vst1_s64((int64_t *)(out_b + (jj + 3) * nrows + ii), *(bs + 1));
vst1_s64((int64_t *)(out_b + (jj + 4) * nrows + ii), *cs);
vst1_s64((int64_t *)(out_b + (jj + 5) * nrows + ii), *(cs + 1));
vst1_s64((int64_t *)(out_b + (jj + 6) * nrows + ii), *ds);
vst1_s64((int64_t *)(out_b + (jj + 7) * nrows + ii), *(ds + 1));
vst1_s64((int64_t *)(out_b + (jj + 8) * nrows + ii), *es);
vst1_s64((int64_t *)(out_b + (jj + 9) * nrows + ii), *(es + 1));
vst1_s64((int64_t *)(out_b + (jj + 10) * nrows + ii), *fs);
vst1_s64((int64_t *)(out_b + (jj + 11) * nrows + ii), *(fs + 1));
vst1_s64((int64_t *)(out_b + (jj + 12) * nrows + ii), *gs);
vst1_s64((int64_t *)(out_b + (jj + 13) * nrows + ii), *(gs + 1));
vst1_s64((int64_t *)(out_b + (jj + 14) * nrows + ii), *hs);
vst1_s64((int64_t *)(out_b + (jj + 15) * nrows + ii), *(hs + 1));
}
for (jj = nbyte_row - nbyte_row % 16; jj < nbyte_row; jj ++) {
out_b[jj * nrows + ii + 0] = in_b[(ii + 0)*nbyte_row + jj];
out_b[jj * nrows + ii + 1] = in_b[(ii + 1)*nbyte_row + jj];
out_b[jj * nrows + ii + 2] = in_b[(ii + 2)*nbyte_row + jj];
out_b[jj * nrows + ii + 3] = in_b[(ii + 3)*nbyte_row + jj];
out_b[jj * nrows + ii + 4] = in_b[(ii + 4)*nbyte_row + jj];
out_b[jj * nrows + ii + 5] = in_b[(ii + 5)*nbyte_row + jj];
out_b[jj * nrows + ii + 6] = in_b[(ii + 6)*nbyte_row + jj];
out_b[jj * nrows + ii + 7] = in_b[(ii + 7)*nbyte_row + jj];
}
}
return size * elem_size;
}
/* Shuffle bits within the bytes of eight element blocks. */
int64_t bshuf_shuffle_bit_eightelem_NEON(const void* in, void* out, const size_t size,
const size_t elem_size) {
CHECK_MULT_EIGHT(size);
// With a bit of care, this could be written such that such that it is
// in_buf = out_buf safe.
const char* in_b = (const char*) in;
uint16_t* out_ui16 = (uint16_t*) out;
size_t ii, jj, kk;
size_t nbyte = elem_size * size;
int16x8_t xmm;
int32_t bt;
if (elem_size % 2) {
bshuf_shuffle_bit_eightelem_scal(in, out, size, elem_size);
} else {
for (ii = 0; ii + 8 * elem_size - 1 < nbyte;
ii += 8 * elem_size) {
for (jj = 0; jj + 15 < 8 * elem_size; jj += 16) {
xmm = vld1q_s16((int16_t *) &in_b[ii + jj]);
for (kk = 0; kk < 8; kk++) {
bt = move_byte_mask_neon((uint8x16_t) xmm);
xmm = vshlq_n_s16(xmm, 1);
size_t ind = (ii + jj / 8 + (7 - kk) * elem_size);
out_ui16[ind / 2] = bt;
}
}
}
}
return size * elem_size;
}
/* Untranspose bits within elements. */
int64_t bshuf_untrans_bit_elem_NEON(const void* in, void* out, const size_t size,
const size_t elem_size) {
int64_t count;
CHECK_MULT_EIGHT(size);
void* tmp_buf = malloc(size * elem_size);
if (tmp_buf == NULL) return -1;
count = bshuf_trans_byte_bitrow_NEON(in, tmp_buf, size, elem_size);
CHECK_ERR_FREE(count, tmp_buf);
count = bshuf_shuffle_bit_eightelem_NEON(tmp_buf, out, size, elem_size);
free(tmp_buf);
return count;
}
#else // #ifdef USEARMNEON
int64_t bshuf_untrans_bit_elem_NEON(const void* in, void* out, const size_t size,
const size_t elem_size) {
return -13;
}
int64_t bshuf_trans_bit_elem_NEON(const void* in, void* out, const size_t size,
const size_t elem_size) {
return -13;
}
int64_t bshuf_trans_byte_bitrow_NEON(const void* in, void* out, const size_t size,
const size_t elem_size) {
return -13;
}
int64_t bshuf_trans_bit_byte_NEON(const void* in, void* out, const size_t size,
const size_t elem_size) {
return -13;
}
int64_t bshuf_trans_byte_elem_NEON(const void* in, void* out, const size_t size,
const size_t elem_size) {
return -13;
}
int64_t bshuf_trans_byte_elem_NEON_64(const void* in, void* out, const size_t size) {
return -13;
}
int64_t bshuf_trans_byte_elem_NEON_32(const void* in, void* out, const size_t size) {
return -13;
}
int64_t bshuf_trans_byte_elem_NEON_16(const void* in, void* out, const size_t size) {
return -13;
}
int64_t bshuf_shuffle_bit_eightelem_NEON(const void* in, void* out, const size_t size,
const size_t elem_size) {
return -13;
}
#endif
/* ---- Worker code that uses SSE2 ----
*
* The following code makes use of the SSE2 instruction set and specialized
* 16 byte registers. The SSE2 instructions are present on modern x86
* 16 byte registers. The SSE2 instructions are present on modern x86
* processors. The first Intel processor microarchitecture supporting SSE2 was
* Pentium 4 (2000).
*
@@ -512,7 +1051,7 @@ int64_t bshuf_trans_byte_elem_SSE(const void* in, void* out, const size_t size,
int64_t count;
// Trivial cases: power of 2 bytes.
switch (elem_size) {
case 1:
count = bshuf_copy(in, out, size, elem_size);
@@ -528,14 +1067,14 @@ int64_t bshuf_trans_byte_elem_SSE(const void* in, void* out, const size_t size,
return count;
}
// Worst case: odd number of bytes. Turns out that this is faster for
// (odd * 2) byte elements as well (hence % 4).
if (elem_size % 4) {
count = bshuf_trans_byte_elem_scal(in, out, size, elem_size);
return count;
}
// Multiple of power of 2: transpose hierarchically.
{
size_t nchunk_elem;
void* tmp_buf = malloc(size * elem_size);
@@ -554,7 +1093,7 @@ int64_t bshuf_trans_byte_elem_SSE(const void* in, void* out, const size_t size,
size * nchunk_elem);
bshuf_trans_elem(tmp_buf, out, 4, nchunk_elem, size);
} else {
// Not used since scalar algorithm is faster.
nchunk_elem = elem_size / 2;
TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int16_t);
count = bshuf_trans_byte_elem_SSE_16(out, tmp_buf,
@@ -687,8 +1226,8 @@ int64_t bshuf_trans_byte_bitrow_SSE(const void* in, void* out, const size_t size
g1 = _mm_unpacklo_epi32(g0, h0);
h1 = _mm_unpackhi_epi32(g0, h0);
// We don't have a storeh instruction for integers, so interpret
// as a float. Have a storel (_mm_storel_epi64).
as = (__m128 *) &a1;
bs = (__m128 *) &b1;
cs = (__m128 *) &c1;
@@ -737,8 +1276,8 @@ int64_t bshuf_shuffle_bit_eightelem_SSE(const void* in, void* out, const size_t
CHECK_MULT_EIGHT(size);
// With a bit of care, this could be written such that such that it is
// in_buf = out_buf safe.
const char* in_b = (const char*) in;
uint16_t* out_ui16 = (uint16_t*) out;
@@ -788,7 +1327,7 @@ int64_t bshuf_untrans_bit_elem_SSE(const void* in, void* out, const size_t size,
return count;
}
#else
#else // #ifdef USESSE2
int64_t bshuf_untrans_bit_elem_SSE(const void* in, void* out, const size_t size,
@@ -842,7 +1381,7 @@ int64_t bshuf_shuffle_bit_eightelem_SSE(const void* in, void* out, const size_t
}
#endif
#endif // #ifdef USESSE2
/* ---- Code that requires AVX2. Intel Haswell (2013) and later. ---- */
@@ -857,7 +1396,6 @@ int64_t bshuf_shuffle_bit_eightelem_SSE(const void* in, void* out, const size_t
*/
#ifdef USEAVX2
/* Transpose bits within bytes. */
int64_t bshuf_trans_bit_byte_AVX(const void* in, void* out, const size_t size,
const size_t elem_size) {
@@ -1014,8 +1552,8 @@ int64_t bshuf_shuffle_bit_eightelem_AVX(const void* in, void* out, const size_t
CHECK_MULT_EIGHT(size);
// With a bit of care, this could be written such that such that it is
// in_buf = out_buf safe.
const char* in_b = (const char*) in;
char* out_b = (char*) out;
@@ -1065,7 +1603,7 @@ int64_t bshuf_untrans_bit_elem_AVX(const void* in, void* out, const size_t size,
}
#else
#else // #ifdef USEAVX2
int64_t bshuf_trans_bit_byte_AVX(const void* in, void* out, const size_t size,
const size_t elem_size) {
@@ -1096,19 +1634,179 @@ int64_t bshuf_untrans_bit_elem_AVX(const void* in, void* out, const size_t size,
return -12;
}
#endif
#endif // #ifdef USEAVX2
#ifdef USEAVX512
/* Transpose bits within bytes. */
int64_t bshuf_trans_bit_byte_AVX512(const void* in, void* out, const size_t size,
const size_t elem_size) {
size_t ii, kk;
const char* in_b = (const char*) in;
char* out_b = (char*) out;
size_t nbyte = elem_size * size;
int64_t count;
int64_t* out_i64;
__m512i zmm;
__mmask64 bt;
if (nbyte >= 64) {
const __m512i mask = _mm512_set1_epi8(0);
for (ii = 0; ii + 63 < nbyte; ii += 64) {
zmm = _mm512_loadu_si512((__m512i *) &in_b[ii]);
for (kk = 0; kk < 8; kk++) {
bt = _mm512_cmp_epi8_mask(zmm, mask, 1);
zmm = _mm512_slli_epi16(zmm, 1);
out_i64 = (int64_t*) &out_b[((7 - kk) * nbyte + ii) / 8];
*out_i64 = (int64_t)bt;
}
}
}
__m256i ymm;
int32_t bt32;
int32_t* out_i32;
size_t start = nbyte - nbyte % 64;
for (ii = start; ii + 31 < nbyte; ii += 32) {
ymm = _mm256_loadu_si256((__m256i *) &in_b[ii]);
for (kk = 0; kk < 8; kk++) {
bt32 = _mm256_movemask_epi8(ymm);
ymm = _mm256_slli_epi16(ymm, 1);
out_i32 = (int32_t*) &out_b[((7 - kk) * nbyte + ii) / 8];
*out_i32 = bt32;
}
}
count = bshuf_trans_bit_byte_remainder(in, out, size, elem_size,
nbyte - nbyte % 64 % 32);
return count;
}
/* Transpose bits within elements. */
int64_t bshuf_trans_bit_elem_AVX512(const void* in, void* out, const size_t size,
const size_t elem_size) {
int64_t count;
CHECK_MULT_EIGHT(size);
void* tmp_buf = malloc(size * elem_size);
if (tmp_buf == NULL) return -1;
count = bshuf_trans_byte_elem_SSE(in, out, size, elem_size);
CHECK_ERR_FREE(count, tmp_buf);
count = bshuf_trans_bit_byte_AVX512(out, tmp_buf, size, elem_size);
CHECK_ERR_FREE(count, tmp_buf);
count = bshuf_trans_bitrow_eight(tmp_buf, out, size, elem_size);
free(tmp_buf);
return count;
}
/* Shuffle bits within the bytes of eight element blocks. */
int64_t bshuf_shuffle_bit_eightelem_AVX512(const void* in, void* out, const size_t size,
const size_t elem_size) {
CHECK_MULT_EIGHT(size);
// With a bit of care, this could be written such that such that it is
// in_buf = out_buf safe.
const char* in_b = (const char*) in;
char* out_b = (char*) out;
size_t ii, jj, kk;
size_t nbyte = elem_size * size;
__m512i zmm;
__mmask64 bt;
if (elem_size % 8) {
return bshuf_shuffle_bit_eightelem_AVX(in, out, size, elem_size);
} else {
const __m512i mask = _mm512_set1_epi8(0);
for (jj = 0; jj + 63 < 8 * elem_size; jj += 64) {
for (ii = 0; ii + 8 * elem_size - 1 < nbyte;
ii += 8 * elem_size) {
zmm = _mm512_loadu_si512((__m512i *) &in_b[ii + jj]);
for (kk = 0; kk < 8; kk++) {
bt = _mm512_cmp_epi8_mask(zmm, mask, 1);
zmm = _mm512_slli_epi16(zmm, 1);
size_t ind = (ii + jj / 8 + (7 - kk) * elem_size);
* (int64_t *) &out_b[ind] = bt;
}
}
}
}
return size * elem_size;
}
/* Untranspose bits within elements. */
int64_t bshuf_untrans_bit_elem_AVX512(const void* in, void* out, const size_t size,
const size_t elem_size) {
int64_t count;
CHECK_MULT_EIGHT(size);
void* tmp_buf = malloc(size * elem_size);
if (tmp_buf == NULL) return -1;
count = bshuf_trans_byte_bitrow_AVX(in, tmp_buf, size, elem_size);
CHECK_ERR_FREE(count, tmp_buf);
count = bshuf_shuffle_bit_eightelem_AVX512(tmp_buf, out, size, elem_size);
free(tmp_buf);
return count;
}
#else // #ifdef USEAVX512
int64_t bshuf_trans_bit_byte_AVX512(const void* in, void* out, const size_t size,
const size_t elem_size) {
return -14;
}
int64_t bshuf_trans_bit_elem_AVX512(const void* in, void* out, const size_t size,
const size_t elem_size) {
return -14;
}
int64_t bshuf_shuffle_bit_eightelem_AVX512(const void* in, void* out, const size_t size,
const size_t elem_size) {
return -14;
}
int64_t bshuf_untrans_bit_elem_AVX512(const void* in, void* out, const size_t size,
const size_t elem_size) {
return -14;
}
#endif
/* ---- Drivers selecting best instruction set at compile time. ---- */
int64_t bshuf_trans_bit_elem(const void* in, void* out, const size_t size,
int64_t bshuf_trans_bit_elem(const void* in, void* out, const size_t size,
const size_t elem_size) {
int64_t count;
#ifdef USEAVX2
#ifdef USEAVX512
count = bshuf_trans_bit_elem_AVX512(in, out, size, elem_size);
#elif defined USEAVX2
count = bshuf_trans_bit_elem_AVX(in, out, size, elem_size);
#elif defined(USESSE2)
count = bshuf_trans_bit_elem_SSE(in, out, size, elem_size);
#elif defined(USEARMNEON)
count = bshuf_trans_bit_elem_NEON(in, out, size, elem_size);
#else
count = bshuf_trans_bit_elem_scal(in, out, size, elem_size);
#endif
@@ -1116,14 +1814,18 @@ int64_t bshuf_trans_bit_elem(const void* in, void* out, const size_t size,
}
int64_t bshuf_untrans_bit_elem(const void* in, void* out, const size_t size,
int64_t bshuf_untrans_bit_elem(const void* in, void* out, const size_t size,
const size_t elem_size) {
int64_t count;
#ifdef USEAVX2
#ifdef USEAVX512
count = bshuf_untrans_bit_elem_AVX512(in, out, size, elem_size);
#elif defined USEAVX2
count = bshuf_untrans_bit_elem_AVX(in, out, size, elem_size);
#elif defined(USESSE2)
count = bshuf_untrans_bit_elem_SSE(in, out, size, elem_size);
#elif defined(USEARMNEON)
count = bshuf_untrans_bit_elem_NEON(in, out, size, elem_size);
#else
count = bshuf_untrans_bit_elem_scal(in, out, size, elem_size);
#endif
@@ -1136,9 +1838,9 @@ int64_t bshuf_untrans_bit_elem(const void* in, void* out, const size_t size,
/* Wrap a function for processing a single block to process an entire buffer in
* parallel. */
int64_t bshuf_blocked_wrap_fun(bshufBlockFunDef fun, const void* in, void* out, \
const size_t size, const size_t elem_size, size_t block_size) {
const size_t size, const size_t elem_size, size_t block_size, const int option) {
size_t ii;
omp_size_t ii = 0;
int64_t err = 0;
int64_t count, cum_count=0;
size_t last_block_size;
@@ -1161,8 +1863,8 @@ int64_t bshuf_blocked_wrap_fun(bshufBlockFunDef fun, const void* in, void* out,
#pragma omp parallel for schedule(dynamic, 1) \
private(count) reduction(+ : cum_count)
#endif
for (ii = 0; ii < size / block_size; ii ++) {
count = fun(&C, block_size, elem_size);
for (ii = 0; ii < (omp_size_t)( size / block_size ); ii ++) {
count = fun(&C, block_size, elem_size, option);
if (count < 0) err = count;
cum_count += count;
}
@@ -1170,7 +1872,7 @@ int64_t bshuf_blocked_wrap_fun(bshufBlockFunDef fun, const void* in, void* out,
last_block_size = size % block_size;
last_block_size = last_block_size - last_block_size % BSHUF_BLOCKED_MULT;
if (last_block_size) {
count = fun(&C, last_block_size, elem_size);
count = fun(&C, last_block_size, elem_size, option);
if (count < 0) err = count;
cum_count += count;
}
@@ -1178,6 +1880,7 @@ int64_t bshuf_blocked_wrap_fun(bshufBlockFunDef fun, const void* in, void* out,
if (err < 0) return err;
leftover_bytes = size % BSHUF_BLOCKED_MULT * elem_size;
//this_iter;
last_in = (char *) ioc_get_in(&C, &this_iter);
ioc_set_next_in(&C, &this_iter, (void *) (last_in + leftover_bytes));
last_out = (char *) ioc_get_out(&C, &this_iter);
@@ -1193,7 +1896,7 @@ int64_t bshuf_blocked_wrap_fun(bshufBlockFunDef fun, const void* in, void* out,
/* Bitshuffle a single block. */
int64_t bshuf_bitshuffle_block(ioc_chain *C_ptr, \
const size_t size, const size_t elem_size) {
const size_t size, const size_t elem_size, const int option) {
size_t this_iter;
const void *in;
@@ -1201,7 +1904,7 @@ int64_t bshuf_bitshuffle_block(ioc_chain *C_ptr, \
int64_t count;
in = ioc_get_in(C_ptr, &this_iter);
ioc_set_next_in(C_ptr, &this_iter,
(void*) ((char*) in + size * elem_size));
@@ -1216,7 +1919,7 @@ int64_t bshuf_bitshuffle_block(ioc_chain *C_ptr, \
/* Bitunshuffle a single block. */
int64_t bshuf_bitunshuffle_block(ioc_chain* C_ptr, \
const size_t size, const size_t elem_size) {
const size_t size, const size_t elem_size, const int option) {
size_t this_iter;
@@ -1296,11 +1999,11 @@ uint32_t bshuf_read_uint32_BE(const void* buf) {
*/
size_t bshuf_default_block_size(const size_t elem_size) {
// This function needs to be absolutely stable between versions.
// Otherwise encoded data will not be decodable.
size_t block_size = BSHUF_TARGET_BLOCK_SIZE_B / elem_size;
// Ensure it is a required multiple.
block_size = (block_size / BSHUF_BLOCKED_MULT) * BSHUF_BLOCKED_MULT;
return MAX(block_size, BSHUF_MIN_RECOMMEND_BLOCK);
}
@@ -1310,7 +2013,7 @@ int64_t bshuf_bitshuffle(const void* in, void* out, const size_t size,
const size_t elem_size, size_t block_size) {
return bshuf_blocked_wrap_fun(&bshuf_bitshuffle_block, in, out, size,
elem_size, block_size);
elem_size, block_size, 0/*option*/);
}
@@ -1318,7 +2021,7 @@ int64_t bshuf_bitunshuffle(const void* in, void* out, const size_t size,
const size_t elem_size, size_t block_size) {
return bshuf_blocked_wrap_fun(&bshuf_bitunshuffle_block, in, out, size,
elem_size, block_size);
elem_size, block_size, 0/*option*/);
}
+33 -8
View File
@@ -18,6 +18,8 @@
* -1 : Failed to allocate memory.
* -11 : Missing SSE.
* -12 : Missing AVX.
* -13 : Missing Arm Neon.
* -14 : Missing AVX512.
* -80 : Input size not a multiple of 8.
* -81 : block_size not multiple of 8.
* -91 : Decompression error, wrong number of bytes processed.
@@ -28,15 +30,14 @@
#ifndef BITSHUFFLE_CORE_H
#define BITSHUFFLE_CORE_H
// We assume GNU g++ defining `__cplusplus` has stdint.h
#if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199900L) || defined(__cplusplus)
#include <stdint.h>
#else
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef signed short int16_t;
typedef unsigned int uint32_t;
typedef signed int int32_t;
typedef signed int int32_t;
typedef unsigned long long uint64_t;
typedef long long int64_t;
#endif
@@ -44,11 +45,11 @@
#include <stdlib.h>
// These are usually set in the setup.py.
#ifndef BSHUF_VERSION_MAJOR
#define BSHUF_VERSION_MAJOR 0
#define BSHUF_VERSION_MINOR 3
#define BSHUF_VERSION_POINT 4
#define BSHUF_VERSION_MINOR 4
#define BSHUF_VERSION_POINT 0
#endif
#ifdef __cplusplus
@@ -67,6 +68,18 @@ extern "C" {
int bshuf_using_SSE2(void);
/* ---- bshuf_using_NEON ----
*
* Whether routines where compiled with the NEON instruction set.
*
* Returns
* -------
* 1 if using NEON, 0 otherwise.
*
*/
int bshuf_using_NEON(void);
/* ---- bshuf_using_AVX2 ----
*
* Whether routines where compiled with the AVX2 instruction set.
@@ -79,6 +92,18 @@ int bshuf_using_SSE2(void);
int bshuf_using_AVX2(void);
/* ---- bshuf_using_AVX512 ----
*
* Whether routines where compiled with the AVX512 instruction set.
*
* Returns
* -------
* 1 if using AVX512, 0 otherwise.
*
*/
int bshuf_using_AVX512(void);
/* ---- bshuf_default_block_size ----
*
* The default block size as function of element size.
@@ -151,7 +176,7 @@ int64_t bshuf_bitunshuffle(const void* in, void* out, const size_t size,
const size_t elem_size, size_t block_size);
#ifdef __cplusplus
}
} // extern "C"
#endif
#endif
#endif // BITSHUFFLE_CORE_H
+18 -6
View File
@@ -13,19 +13,31 @@
#ifndef BITSHUFFLE_INTERNALS_H
#define BITSHUFFLE_INTERNALS_H
// We assume GNU g++ defining `__cplusplus` has stdint.h
#if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199900L) || defined(__cplusplus)
#include <stdint.h>
#else
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
typedef signed int int32_t;
typedef unsigned long long uint64_t;
typedef long long int64_t;
#endif
#include <stdlib.h>
#include "iochain.h"
// Constants.
#ifndef BSHUF_MIN_RECOMMEND_BLOCK
#define BSHUF_MIN_RECOMMEND_BLOCK 128
#define BSHUF_BLOCKED_MULT 8
#define BSHUF_BLOCKED_MULT 8 // Block sizes must be multiple of this.
#define BSHUF_TARGET_BLOCK_SIZE_B 8192
#endif
// Macros.
#define CHECK_ERR_FREE(count, buf) if (count < 0) { free(buf); return count; }
@@ -49,15 +61,15 @@ int64_t bshuf_untrans_bit_elem(const void* in, void* out, const size_t size,
/* Function definition for worker functions that process a single block. */
typedef int64_t (*bshufBlockFunDef)(ioc_chain* C_ptr,
const size_t size, const size_t elem_size);
const size_t size, const size_t elem_size, const int option);
/* Wrap a function for processing a single block to process an entire buffer in
* parallel. */
int64_t bshuf_blocked_wrap_fun(bshufBlockFunDef fun, const void* in, void* out,
const size_t size, const size_t elem_size, size_t block_size);
const size_t size, const size_t elem_size, size_t block_size, const int option);
#ifdef __cplusplus
}
} // extern "C"
#endif
#endif
#endif // BITSHUFFLE_INTERNALS_H
+260
View File
@@ -0,0 +1,260 @@
/*
* Bitshuffle HDF5 filter
*
* This file is part of Bitshuffle
* Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
* Website: http://www.github.com/kiyo-masui/bitshuffle
* Created: 2014
*
* See LICENSE file for details about copyright and rights to use.
*
*/
#include "bitshuffle.h"
#include "bshuf_h5filter.h"
#define PUSH_ERR(func, minor, str) \
H5Epush1(__FILE__, func, __LINE__, H5E_PLINE, minor, str)
// Prototypes from bitshuffle.c
void bshuf_write_uint64_BE(void* buf, uint64_t num);
uint64_t bshuf_read_uint64_BE(void* buf);
void bshuf_write_uint32_BE(void* buf, uint32_t num);
uint32_t bshuf_read_uint32_BE(const void* buf);
// Only called on compression, not on reverse.
herr_t bshuf_h5_set_local(hid_t dcpl, hid_t type, hid_t space){
herr_t r;
size_t ii;
unsigned int elem_size;
unsigned int flags;
size_t nelements = 8;
size_t nelem_max = 11;
unsigned values[] = {0,0,0,0,0,0,0,0,0,0,0};
unsigned tmp_values[] = {0,0,0,0,0,0,0,0};
char msg[80];
r = H5Pget_filter_by_id2(dcpl, BSHUF_H5FILTER, &flags, &nelements,
tmp_values, 0, NULL, NULL);
if(r<0) return -1;
// First 3 slots reserved. Move any passed options to higher addresses.
for (ii=0; ii < nelements && ii + 3 < nelem_max; ii++) {
values[ii + 3] = tmp_values[ii];
}
nelements = 3 + nelements;
values[0] = BSHUF_VERSION_MAJOR;
values[1] = BSHUF_VERSION_MINOR;
elem_size = H5Tget_size(type);
if(elem_size <= 0) {
PUSH_ERR("bshuf_h5_set_local", H5E_CALLBACK,
"Invalid element size.");
return -1;
}
values[2] = elem_size;
// Validate user supplied arguments.
if (nelements > 3) {
if (values[3] % 8 || values[3] < 0) {
sprintf(msg, "Error in bitshuffle. Invalid block size: %d.",
values[3]);
PUSH_ERR("bshuf_h5_set_local", H5E_CALLBACK, msg);
return -1;
}
}
if (nelements > 4) {
switch (values[4]) {
case 0:
break;
case BSHUF_H5_COMPRESS_LZ4:
break;
#ifdef ZSTD_SUPPORT
case BSHUF_H5_COMPRESS_ZSTD:
break;
#endif
default:
PUSH_ERR("bshuf_h5_set_local", H5E_CALLBACK,
"Invalid bitshuffle compression.");
}
}
r = H5Pmodify_filter(dcpl, BSHUF_H5FILTER, flags, nelements, values);
if(r<0) return -1;
return 1;
}
size_t bshuf_h5_filter(unsigned int flags, size_t cd_nelmts,
const unsigned int cd_values[], size_t nbytes,
size_t *buf_size, void **buf) {
size_t size, elem_size;
int err = -1;
char msg[80];
size_t block_size = 0;
size_t buf_size_out, nbytes_uncomp, nbytes_out;
char* in_buf = *buf;
void *out_buf;
if (cd_nelmts < 3) {
PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK,
"Not enough parameters.");
return 0;
}
elem_size = cd_values[2];
#ifdef ZSTD_SUPPORT
const int comp_lvl = cd_values[5];
#endif
// User specified block size.
if (cd_nelmts > 3) block_size = cd_values[3];
if (block_size == 0) block_size = bshuf_default_block_size(elem_size);
#ifndef ZSTD_SUPPORT
if (cd_nelmts > 4 && (cd_values[4] == BSHUF_H5_COMPRESS_ZSTD)) {
PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK,
"ZSTD compression filter chosen but ZSTD support not installed.");
return 0;
}
#endif
// Compression in addition to bitshuffle.
if (cd_nelmts > 4 && (cd_values[4] == BSHUF_H5_COMPRESS_LZ4 || cd_values[4] == BSHUF_H5_COMPRESS_ZSTD)) {
if (flags & H5Z_FLAG_REVERSE) {
// First eight bytes is the number of bytes in the output buffer,
// little endian.
nbytes_uncomp = bshuf_read_uint64_BE(in_buf);
// Override the block size with the one read from the header.
block_size = bshuf_read_uint32_BE((const char*) in_buf + 8) / elem_size;
// Skip over the header.
in_buf += 12;
buf_size_out = nbytes_uncomp;
} else {
nbytes_uncomp = nbytes;
// Pick which compressions library to use
if(cd_values[4] == BSHUF_H5_COMPRESS_LZ4) {
buf_size_out = bshuf_compress_lz4_bound(nbytes_uncomp / elem_size,
elem_size, block_size) + 12;
}
#ifdef ZSTD_SUPPORT
else if (cd_values[4] == BSHUF_H5_COMPRESS_ZSTD) {
buf_size_out = bshuf_compress_zstd_bound(nbytes_uncomp / elem_size,
elem_size, block_size) + 12;
}
#endif
}
} else {
nbytes_uncomp = nbytes;
buf_size_out = nbytes;
}
// TODO, remove this restriction by memcopying the extra.
if (nbytes_uncomp % elem_size) {
PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK,
"Non integer number of elements.");
return 0;
}
size = nbytes_uncomp / elem_size;
out_buf = malloc(buf_size_out);
if (out_buf == NULL) {
PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK,
"Could not allocate output buffer.");
return 0;
}
if (cd_nelmts > 4 && (cd_values[4] == BSHUF_H5_COMPRESS_LZ4 || cd_values[4] == BSHUF_H5_COMPRESS_ZSTD)) {
if (flags & H5Z_FLAG_REVERSE) {
// Bit unshuffle/decompress.
// Pick which compressions library to use
if(cd_values[4] == BSHUF_H5_COMPRESS_LZ4) {
err = bshuf_decompress_lz4(in_buf, out_buf, size, elem_size, block_size);
}
#ifdef ZSTD_SUPPORT
else if (cd_values[4] == BSHUF_H5_COMPRESS_ZSTD) {
err = bshuf_decompress_zstd(in_buf, out_buf, size, elem_size, block_size);
}
#endif
nbytes_out = nbytes_uncomp;
} else {
// Bit shuffle/compress.
// Write the header, described in
// http://www.hdfgroup.org/services/filters/HDF5_LZ4.pdf.
// Technically we should be using signed integers instead of
// unsigned ones, however for valid inputs (positive numbers) these
// have the same representation.
bshuf_write_uint64_BE(out_buf, nbytes_uncomp);
bshuf_write_uint32_BE((char*) out_buf + 8, block_size * elem_size);
if(cd_values[4] == BSHUF_H5_COMPRESS_LZ4) {
err = bshuf_compress_lz4(in_buf, (char*) out_buf + 12, size,
elem_size, block_size);
}
#ifdef ZSTD_SUPPORT
else if (cd_values[4] == BSHUF_H5_COMPRESS_ZSTD) {
err = bshuf_compress_zstd(in_buf, (char*) out_buf + 12, size,
elem_size, block_size, comp_lvl);
}
#endif
nbytes_out = err + 12;
}
} else {
if (flags & H5Z_FLAG_REVERSE) {
// Bit unshuffle.
err = bshuf_bitunshuffle(in_buf, out_buf, size, elem_size,
block_size); } else {
// Bit shuffle.
err = bshuf_bitshuffle(in_buf, out_buf, size, elem_size,
block_size); } nbytes_out = nbytes; }
//printf("nb_in %d, nb_uncomp %d, nb_out %d, buf_out %d, block %d\n",
//nbytes, nbytes_uncomp, nbytes_out, buf_size_out, block_size);
if (err < 0) {
sprintf(msg, "Error in bitshuffle with error code %d.", err);
PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK, msg);
free(out_buf);
return 0;
} else {
free(*buf);
*buf = out_buf;
*buf_size = buf_size_out;
return nbytes_out;
}
}
H5Z_class_t bshuf_H5Filter[1] = {{
H5Z_CLASS_T_VERS,
(H5Z_filter_t)(BSHUF_H5FILTER),
1, 1,
"bitshuffle; see https://github.com/kiyo-masui/bitshuffle",
NULL,
(H5Z_set_local_func_t)(bshuf_h5_set_local),
(H5Z_func_t)(bshuf_h5_filter)
}};
int bshuf_register_h5filter(void){
int retval;
retval = H5Zregister(bshuf_H5Filter);
if(retval<0){
PUSH_ERR("bshuf_register_h5filter",
H5E_CANTREGISTER, "Can't register bitshuffle filter");
}
return retval;
}
+67
View File
@@ -0,0 +1,67 @@
/*
* Bitshuffle HDF5 filter
*
* This file is part of Bitshuffle
* Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
* Website: http://www.github.com/kiyo-masui/bitshuffle
* Created: 2014
*
* See LICENSE file for details about copyright and rights to use.
*
*
* Header File
*
* Filter Options
* --------------
* block_size (option slot 0) : integer (optional)
* What block size to use (in elements not bytes). Default is 0,
* for which bitshuffle will pick a block size with a target of 8kb.
* Compression (option slot 1) : 0 or BSHUF_H5_COMPRESS_LZ4
* Whether to apply LZ4 compression to the data after bitshuffling.
* This is much faster than applying compression as a second filter
* because it is done when the small block of data is already in the
* L1 cache.
*
* For LZ4 compression, the compressed format of the data is the same as
* for the normal LZ4 filter described in
* http://www.hdfgroup.org/services/filters/HDF5_LZ4.pdf.
*
*/
#ifndef BSHUF_H5FILTER_H
#define BSHUF_H5FILTER_H
#ifdef __cplusplus
extern "C" {
#endif
#define H5Z_class_t_vers 2
#include "hdf5.h"
#define BSHUF_H5FILTER 32008
#define BSHUF_H5_COMPRESS_LZ4 2
#define BSHUF_H5_COMPRESS_ZSTD 3
extern H5Z_class_t bshuf_H5Filter[1];
/* ---- bshuf_register_h5filter ----
*
* Register the bitshuffle HDF5 filter within the HDF5 library.
*
* Call this before using the bitshuffle HDF5 filter from C unless
* using dynamically loaded filters.
*
*/
int bshuf_register_h5filter(void);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // BSHUF_H5FILTER_H
+4 -4
View File
@@ -1,5 +1,5 @@
/*
* IOchain - Distribute a chain of dependant IO events amoung threads.
* IOchain - Distribute a chain of dependent IO events among threads.
*
* This file is part of Bitshuffle
* Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
@@ -81,9 +81,9 @@ void ioc_set_next_out(ioc_chain *C, size_t *this_iter, void* out_ptr) {
C->out_pl[(*this_iter + 1) % IOC_SIZE].ptr = out_ptr;
#ifdef _OPENMP
omp_unset_lock(&(C->out_pl[(*this_iter + 1) % IOC_SIZE].lock));
// *in_pl[this_iter]* lock released at the end of the iteration to avoid being
// overtaken by previous threads and having *out_pl[this_iter]* corrupted.
// Especially worried about thread 0, iteration 0.
omp_unset_lock(&(C->in_pl[(*this_iter) % IOC_SIZE].lock));
#endif
}
+3 -3
View File
@@ -1,5 +1,5 @@
/*
* IOchain - Distribute a chain of dependant IO events amoung threads.
* IOchain - Distribute a chain of dependent IO events among threads.
*
* This file is part of Bitshuffle
* Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
@@ -25,7 +25,7 @@
* Usage
* -----
* - Call `ioc_init` in serial block.
* - Each thread should create a local variable *size_t this_iter* and
* - Each thread should create a local variable *size_t this_iter* and
* pass its address to all function calls. Its value will be set
* inside the functions and is used to identify the thread.
* - Each thread must call each of the `ioc_get*` and `ioc_set*` methods
@@ -90,5 +90,5 @@ void ioc_set_next_in(ioc_chain *C, size_t* this_iter, void* in_ptr);
void * ioc_get_out(ioc_chain *C, size_t *this_iter);
void ioc_set_next_out(ioc_chain *C, size_t *this_iter, void* out_ptr);
#endif
#endif // IOCHAIN_H
+1921 -941
View File
File diff suppressed because it is too large Load Diff
+671 -257
View File
File diff suppressed because it is too large Load Diff
+67 -46
View File
@@ -4,7 +4,6 @@
*/
#include <hdf5.h>
#include <hdf5_hl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -104,7 +103,13 @@ int get_nxs_dataset_dims(struct ds_desc_t *desc) {
ERROR_JUMP(-1, close_space, "Error getting dataset dimensions");
}
desc->data_width = width;
if ( H5Tequal(t_id,H5T_NATIVE_CHAR)>0 || H5Tequal(t_id,H5T_NATIVE_INT)>0 || H5Tequal(t_id,H5T_NATIVE_SHORT)>0 || H5Tequal(t_id,H5T_NATIVE_LONG)>0 || H5Tequal(t_id,H5T_NATIVE_LLONG)>0 ) {
// signed
desc->data_width = -width;
} else {
// unsigned
desc->data_width = width;
}
close_space:
H5Sclose(s_id);
@@ -222,7 +227,7 @@ int get_frame_from_chunk(const struct ds_desc_t *desc, const char *ds_name,
c_buffer = buffer;
}
if (H5DOread_chunk(d_id, H5P_DEFAULT, c_offset, &c_filter_mask, c_buffer) <
if (H5Dread_chunk(d_id, H5P_DEFAULT, c_offset, &c_filter_mask, c_buffer) <
0) {
char message[128];
sprintf(message,
@@ -233,7 +238,7 @@ int get_frame_from_chunk(const struct ds_desc_t *desc, const char *ds_name,
if (o_eiger_desc->bs_applied) {
if (bslz4_decompress(o_eiger_desc->bs_params, c_bytes, c_buffer,
desc->data_width * frame_size[1] * frame_size[2],
abs(desc->data_width) * frame_size[1] * frame_size[2],
buffer) < 0) {
char message[128];
sprintf(message,
@@ -251,10 +256,11 @@ done:
return retval;
}
int get_nxs_frame(const struct ds_desc_t *desc, const int n, void *buffer) {
int get_nxs_frame(const struct ds_desc_t *desc, const int nin, void *buffer) {
/* detector data are the two inner most indices */
/* TODO: handle ndims > 3 and select appropriately */
int retval = 0;
int n = nin - desc->image_number_offset;
hsize_t frame_idx[3] = {n, 0, 0};
hsize_t frame_size[3] = {1, desc->dims[1], desc->dims[2]};
if (n < 0 || n >= desc->dims[0]) {
@@ -271,9 +277,10 @@ done:
return retval;
}
int get_dectris_eiger_frame(const struct ds_desc_t *desc, int n, void *buffer) {
int get_dectris_eiger_frame(const struct ds_desc_t *desc, int nin, void *buffer) {
int retval = 0;
int n = nin - desc->image_number_offset;
int block, frame_count, idx;
struct eiger_ds_desc_t *eiger_desc = (struct eiger_ds_desc_t *)desc;
char data_name[16] = {0};
@@ -349,6 +356,10 @@ int get_dectris_eiger_dataset_dims(struct ds_desc_t *desc) {
if (data_width <= 0) {
ERROR_JUMP(-1, close_space, "Unable to get type size");
}
if ( H5Tequal(t_id,H5T_NATIVE_CHAR)>0 || H5Tequal(t_id,H5T_NATIVE_INT)>0 || H5Tequal(t_id,H5T_NATIVE_SHORT)>0 || H5Tequal(t_id,H5T_NATIVE_LONG)>0 || H5Tequal(t_id,H5T_NATIVE_LLONG)>0 ) {
// signed
data_width = -data_width;
}
ndims = H5Sget_simple_extent_ndims(s_id);
if (ndims != 3) {
@@ -561,9 +572,37 @@ int get_dectris_eiger_pixel_mask(const struct ds_desc_t *desc, int *buffer) {
ERROR_JUMP(-1, done, "Error opening detectorSpecific/pixel_mask");
}
// what if this is compressed?
hid_t dcpl = H5Dget_create_plist(ds_id);
int n_filters = H5Pget_nfilters(dcpl);
H5Z_filter_t filter_id;
if (n_filters>0) {
unsigned int flags;
size_t nelmts = 1;
unsigned int values_out[1] = {99};
char filter_name[80];
for ( int i_filt = 0; i_filt < n_filters; i_filt++) {
filter_id = H5Pget_filter(dcpl, i_filt, &flags, &nelmts, values_out, sizeof(filter_name), filter_name, NULL);
if (filter_id>=0) {
fprintf(stderr," filter #%d name =\"%s\"\n",(i_filt+1),filter_name);
}
}
}
int i0 = H5Zfilter_avail(BS_H5_FILTER_ID);
err = H5Dread(ds_id, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer);
if (err < 0) {
ERROR_JUMP(-1, close_dataset, "Error reading detectorSpecific/pixel_mask");
if (n_filters>0) {
ERROR_JUMP(-1, close_dataset, "Error reading detectorSpecific/pixel_mask with filter(s)");
}
else {
ERROR_JUMP(-1, close_dataset, "Error reading detectorSpecific/pixel_mask");
}
}
if (!i0 && H5Zfilter_avail(BS_H5_FILTER_ID)) {
fprintf(stderr," bitshuffle filter is available now since H5Dread (of pixel-mask) triggered loading of the filter.\n");
}
close_dataset:
@@ -594,9 +633,9 @@ herr_t det_visit_callback(hid_t root_id, const char *name,
/* check for an "NX_class" attribute */
{
int str_size = 0;
void *buffer = NULL;
hid_t a_id, t_id, mt_id;
char* buffer = (char*)malloc(1);
buffer[0] = '\0';
hid_t a_id, t_id;
if (H5Aexists(g_id, "NX_class") <= 0) {
/* not an error - just close group and allow continuation */
retval = 0;
@@ -616,39 +655,31 @@ herr_t det_visit_callback(hid_t root_id, const char *name,
if (t_id < 0) {
ERROR_JUMP(-1, close_attr, "Error getting datatype");
}
if (H5Tis_variable_str(t_id) > 0) {
str_size = -1;
buffer = malloc(sizeof(char *));
} else {
str_size = H5Tget_size(t_id);
buffer = malloc(str_size + 1);
H5A_info_t a_info;
herr_t err = H5Aget_info(a_id, &a_info);
if (err<0) {
ERROR_JUMP(-1, close_type,
"Unable to get attribute info for NX_class");
}
else {
if (a_info.cset != H5T_CSET_ASCII && a_info.cset != H5T_CSET_UTF8) {
fprintf(stderr," %s : NX_class attribute info cset = unknown with size %d\n",name,(int) a_info.data_size);
}
}
buffer = (char *)malloc(sizeof(char)*(H5Tget_size(t_id)+1));
if (!buffer) {
ERROR_JUMP(-1, close_type, "Error allocating string buffer");
}
mt_id = H5Tcopy(H5T_C_S1);
if (mt_id < 0) {
ERROR_JUMP(-1, free_buffer, "Error creating HDF5 String datatype");
}
// set the target string type to be one longer than the recorded string
// in keeping with the malloc'd buffer - if this is already a null
// terminated string then we will just have two nulls, if it is not
// then we won't clobber the last char in the buffer with a null in the
// H5Aread call
if (H5Tset_size(mt_id, str_size == -1 ? H5T_VARIABLE : str_size + 1) < 0) {
char message[64];
sprintf(message, "Error setting string datatype to size %d", str_size);
ERROR_JUMP(-1, close_mtype, message);
}
if (H5Aread(a_id, mt_id, buffer) < 0) {
if (H5Aread(a_id, t_id, buffer) < 0) {
char message[256];
sprintf(
message,
"H5OVisit callback: Error reading NX_class attribute on group %.128s",
name);
ERROR_JUMP(-1, close_mtype, message);
ERROR_JUMP(-1, free_buffer, message);
}
/* ensure the buffer is null terminated */
@@ -656,26 +687,16 @@ herr_t det_visit_callback(hid_t root_id, const char *name,
/* test for NXdata or NXdetector */
{
char *nxclass = str_size > 0 ? (char *)buffer : *((char **)buffer);
if (strcmp("NXdata", nxclass) == 0) {
if (strcmp("NXdata", buffer) == 0) {
hid_t out_id = H5Gopen(root_id, name, H5P_DEFAULT);
output_data->nxdata = out_id;
} else if (strcmp("NXdetector", nxclass) == 0) {
}
else if (strcmp("NXdetector", buffer) == 0) {
hid_t out_id = H5Gopen(root_id, name, H5P_DEFAULT);
output_data->nxdetector = out_id;
}
}
if (str_size == -1) {
hsize_t dims[1] = {1};
hid_t s_id = H5Screate_simple(1, dims, NULL);
H5Sselect_all(s_id);
H5Dvlen_reclaim(mt_id, s_id, H5P_DEFAULT, buffer);
H5Sclose(s_id);
}
close_mtype:
H5Tclose(mt_id);
free_buffer:
free(buffer);
close_type:
+2
View File
@@ -15,10 +15,12 @@ struct ds_desc_t {
hid_t data_g_id;
hsize_t dims[3];
int data_width;
int image_number_offset;
int (*get_pixel_properties)(const struct ds_desc_t *, double *, double *);
int (*get_pixel_mask)(const struct ds_desc_t *, int *);
int (*get_data_frame)(const struct ds_desc_t *, const int, void *);
void (*free_desc)(struct ds_desc_t *);
int i2i[]; // array to hold a translation from the image number requested by XDS and the actual position in the HDF5 file
};
struct nxs_ds_desc_t {
+257 -39
View File
@@ -5,11 +5,16 @@
#include <hdf5.h>
#include <stdlib.h>
#include <string.h>
#include "file.h"
#include "filters.h"
#include "plugin.h"
#ifdef USE_BITSHUFFLE
#include "bshuf_h5filter.h"
#endif
/* XDS does not provide an error callback facility, so just write to stderr
for now - generally regarded as poor practice */
#define ERROR_OUTPUT stderr
@@ -17,24 +22,41 @@
/* mask bits loosely based on what Neggia does and what NeXus says should be
done basically - anything in the low byte (& 0xFF) means "ignore this"
Neggia uses the value -2 if bit 1, 2 or 3 are set */
#define COPY_AND_MASK(in, out, size, mask) \
/* CV-GPhL-20210408: we want more control over the value non-masked
pixels should be set to. */
#define COPY_AND_MASK(in, value, setValue, out, size, mask) \
{ \
int i; \
if (mask) { \
for (i = 0; i < size; ++i) { \
out[i] = in[i]; \
if (mask[i] & 0xFF) \
out[i] = -1; \
if (mask[i] & 30) \
out[i] = -2; \
if (value!=0) { \
if (mask) { \
for (i = 0; i < size; ++i) { \
out[i] = (in[i] == value) ? setValue : in[i]; \
if (mask[i] & 0xFF) \
out[i] = -1; \
if (mask[i] & 30) \
out[i] = -2; \
} \
} else { \
for (i = 0; i < size; i++) { \
out[i] = (in[i] == value) ? setValue : in[i]; \
} \
} \
} else { \
for (i = 0; i < size; i++) { \
out[i] = in[i]; \
if (mask) { \
for (i = 0; i < size; ++i) { \
out[i] = in[i]; \
if (mask[i] & 0xFF) \
out[i] = -1; \
if (mask[i] & 30) \
out[i] = -2; \
} \
} else { \
for (i = 0; i < size; i++) { \
out[i] = in[i]; \
} \
} \
} \
}
#define APPLY_MASK(buffer, mask, size) \
{ \
int i; \
@@ -52,42 +74,200 @@ static hid_t file_id = 0;
static struct ds_desc_t *data_desc = NULL;
static int *mask_buffer = NULL;
// CV-20240605: potentially provide a mapping from frame number (as
// requested by caller) to actual 2D slice within 3D data
// array.
//
// This is defined by the environment variable
// DURIN_IMAGE2ORDINAL (see below).
int *image2ordinal = NULL;
int image2ordinal_debug = 0;
int image2ordinal_imin = 0;
int image2ordinal_imax = 0;
void fill_info_array(int info[1024]) {
info[0] = DLS_CUSTOMER_ID;
info[1] = VERSION_MAJOR;
info[2] = VERSION_MINOR;
info[3] = VERSION_PATCH;
info[4] = VERSION_TIMESTAMP;
info[5] = 0; // image number offset
info[6] = -1; // marked pixels not already in pixel_mask: reset to this value
char *cenv;
cenv = getenv("DURIN_IMAGE_NUMBER_OFFSET");
if (cenv!=NULL) {
info[5] = atoi(cenv);
}
cenv = getenv("DURIN_RESET_UNMASKED_PIXEL");
if (cenv!=NULL) {
info[6] = atoi(cenv);
}
cenv = getenv("DURIN_IMAGE2ORDINAL");
if (cenv!=NULL&&(!image2ordinal)) {
char *denv = getenv("DURIN_IMAGE2ORDINAL_DEBUG");
if (denv!=NULL) {
image2ordinal_debug=1;
}
// <ordinal_start>,<image_1_start>-<image_1_end>,<image_2_start>-<image_2_end>,..,<image_N_start>-<image_N_end>
if (image2ordinal_debug) printf("DURIN_IMAGE2ORDINAL = \"%s\"\n",cenv);
const char outer_delimiters[] = ",";
const char inner_delimiters[] = "-";
char *found;
char *outer_saveptr;
char *inner_saveptr;
int ordinal_start = 0;
int ordinal = 0;
int ntt = -1;
found = strtok_r(cenv,outer_delimiters, &outer_saveptr);
if (found!=NULL) {
int tt[1000][2];
while(found) {
if (ordinal_start==0) {
ordinal_start = atoi(found);
ordinal = ordinal_start - 1;
}
else {
char* s = strtok_r(found, inner_delimiters, &inner_saveptr);
if (s!=NULL) {
int i1 = atoi(s);
s = strtok_r(NULL,inner_delimiters, &inner_saveptr);
if (s!=NULL) {
int i2 = atoi(s);
ntt++;
if (ntt<=1000) {
tt[ntt][0] = i1;
tt[ntt][1] = i2;
for(int i=i1; i<=i2; ++i) {
ordinal++;
if (ordinal==1) {
image2ordinal_imin=i;
image2ordinal_imax=i;
}
else {
if (i<image2ordinal_imin) image2ordinal_imin=i;
if (i>image2ordinal_imax) image2ordinal_imax=i;
}
}
}
}
}
}
found = strtok_r(NULL,outer_delimiters,&outer_saveptr);
}
if (ordinal_start>0) {
if (image2ordinal_debug) {
printf("ordinal_start, end = %d %d\n",ordinal_start, ordinal);
printf("imin, imax = %d %d\n",image2ordinal_imin,image2ordinal_imax);
}
// allocate array to go from image number/id to ordinal:
image2ordinal = malloc((image2ordinal_imax-image2ordinal_imin+1) * sizeof(image2ordinal_imin));
int ordinal = ordinal_start - 1;
for(int i=0; i<=ntt; i++) {
for(int j=tt[i][0];j<=tt[i][1];j++) {
ordinal++;
//printf(" %d -> %d\n",ordinal,j);
image2ordinal[j] = ordinal;
}
}
if (image2ordinal&&image2ordinal_debug) {
for(int i=image2ordinal_imin; i<=image2ordinal_imax; i++) {
if (image2ordinal[i]>0) {
printf(" %d -> %d\n",i,image2ordinal[i]);
}
}
}
}
}
}
}
int convert_to_int_and_mask(void *in_buffer, int d_width, int *out_buffer,
int convert_to_int_and_mask(void *in_buffer, int width, int setValue, int *out_buffer,
int length, int *mask) {
/* transfer data to output buffer, performing data conversion as required */
int retval = 0;
/* TODO: decide how conversion of data should work */
/* Should we sign extend? Neggia doesn't (casts from uint*), but may be more
* intuitive */
if (d_width == sizeof(signed char)) {
signed char *in = in_buffer;
COPY_AND_MASK(in, out_buffer, length, mask);
} else if (d_width == sizeof(short)) {
short *in = in_buffer;
COPY_AND_MASK(in, out_buffer, length, mask);
} else if (d_width == sizeof(int)) {
int *in = in_buffer;
COPY_AND_MASK(in, out_buffer, length, mask);
} else if (d_width == sizeof(long int)) {
long int *in = in_buffer;
COPY_AND_MASK(in, out_buffer, length, mask);
} else if (d_width == sizeof(long long int)) {
long long int *in = in_buffer;
COPY_AND_MASK(in, out_buffer, length, mask);
} else {
char message[128];
sprintf(message, "Unsupported conversion of data width %d to %ld (int)",
d_width, sizeof(int));
ERROR_JUMP(-1, done, message);
int d_width = abs(width);
// CV-20210407
// Dealing with a signed data array: no extra check for marker
// value needed (since data can already take advantage of the
// negative data range). It is unclear though why/when data would
// come in as a signed array in the first place ...
if (width<0) {
if (d_width == sizeof(signed char)) {
// 8-bit
signed char *in = in_buffer;
COPY_AND_MASK(in, 0, setValue, out_buffer, length, mask);
} else if (d_width == sizeof(short)) {
// 16-bit
short *in = in_buffer;
COPY_AND_MASK(in, 0, setValue, out_buffer, length, mask);
} else if (d_width == sizeof(int)) {
// 16-bit
int *in = in_buffer;
COPY_AND_MASK(in, 0, setValue, out_buffer, length, mask);
} else if (d_width == sizeof(long int)) {
// 32-bit
long int *in = in_buffer;
COPY_AND_MASK(in, 0, setValue, out_buffer, length, mask);
} else if (d_width == sizeof(long long int)) {
// 64-bit
long long int *in = in_buffer;
COPY_AND_MASK(in, 0, setValue, out_buffer, length, mask);
} else {
char message[128];
sprintf(message, "Unsupported conversion of data width %d to %ld (int)",
d_width, sizeof(int));
ERROR_JUMP(-1, done, message);
}
}
// CV-20210407
// Dealing with an unsigned data array: extra check for marker
// value required (to handle overloaded pixels correctly if wanted
// - see also DURIN_RESET_UNMASKED_PIXEL environment variable).
else {
if (d_width == sizeof(unsigned char)) {
// 8-bit
unsigned char *in = in_buffer;
COPY_AND_MASK(in, UINT8_MAX, setValue, out_buffer, length, mask);
} else if (d_width == sizeof(unsigned short)) {
// 16-bit
unsigned short *in = in_buffer;
COPY_AND_MASK(in, UINT16_MAX, setValue, out_buffer, length, mask);
} else if (d_width == sizeof(unsigned int)) {
// 16-bit
unsigned int *in = in_buffer;
COPY_AND_MASK(in, UINT16_MAX, setValue, out_buffer, length, mask);
} else if (d_width == sizeof(unsigned long)) {
// 32-bit
unsigned long *in = in_buffer;
COPY_AND_MASK(in, UINT32_MAX, setValue, out_buffer, length, mask);
} else if (d_width == sizeof(unsigned long long)) {
// 64-bit
unsigned long long *in = in_buffer;
COPY_AND_MASK(in, UINT32_MAX, setValue, out_buffer, length, mask);
} else {
char message[128];
sprintf(message, "Unsupported conversion of data width %d to %ld (int)",
d_width, sizeof(int));
ERROR_JUMP(-1, done, message);
}
}
done:
return retval;
}
@@ -110,6 +290,12 @@ void plugin_open(const char *filename, int info[1024], int *error_flag) {
ERROR_JUMP(-2, done, "Failed to configure HDF5 error handling");
}
#ifdef USE_BITSHUFFLE
if (bshuf_register_h5filter() < 0 ) {
ERROR_JUMP(-2, done, "Failed to register bitshuffle filter");
}
#endif
fill_info_array(info);
file_id = H5Fopen(filename, H5F_ACC_RDONLY, H5P_DEFAULT);
if (file_id < 0) {
@@ -124,6 +310,8 @@ void plugin_open(const char *filename, int info[1024], int *error_flag) {
ERROR_JUMP(-4, done, "");
}
data_desc->image_number_offset = info[5];
mask_buffer = malloc(data_desc->dims[1] * data_desc->dims[2] * sizeof(int));
if (mask_buffer) {
retval = data_desc->get_pixel_mask(data_desc, mask_buffer);
@@ -138,6 +326,10 @@ void plugin_open(const char *filename, int info[1024], int *error_flag) {
}
retval = 0;
#ifdef GPHL_COMPILE_DATE
fprintf(ERROR_OUTPUT, "\n XDS HDF5/Durin plugin %d.%d.%d (DLS, 2018-2023; GPhL, 2020-2024 - built %d)\n", info[1], info[2], info[3], GPHL_COMPILE_DATE);
#endif
done:
*error_flag = retval;
if (retval < 0) {
@@ -165,7 +357,7 @@ void plugin_get_header(int *nx, int *ny, int *nbytes, float *qx, float *qy,
*nx = data_desc->dims[2];
*ny = data_desc->dims[1];
*nbytes = data_desc->data_width;
*nbytes = abs(data_desc->data_width);
*number_of_frames = data_desc->dims[0];
*qx = (float)x_pixel_size;
*qy = (float)y_pixel_size;
@@ -186,26 +378,47 @@ void plugin_get_data(int *frame_number, int *nx, int *ny, int *data_array,
fill_info_array(info);
void *buffer = NULL;
if (sizeof(*data_array) == data_desc->data_width) {
if (sizeof(*data_array) == abs(data_desc->data_width)) {
buffer = data_array;
} else {
buffer = malloc(data_desc->data_width * frame_size_px);
buffer = malloc(abs(data_desc->data_width) * frame_size_px);
if (!buffer) {
ERROR_JUMP(-1, done, "Unable to allocate data buffer");
}
}
if (data_desc->get_data_frame(data_desc, (*frame_number) - 1, buffer) < 0) {
int ordinal = *frame_number;
if (image2ordinal) {
if (ordinal < image2ordinal_imin || ordinal>image2ordinal_imax) {
char message[64] = {0};
sprintf(message, "Failed to map frame %d to ordinals since outside of range %d - %d", ordinal,image2ordinal_imin,image2ordinal_imax);
ERROR_JUMP(-2, done, message);
}
ordinal = image2ordinal[ordinal];
if (ordinal!=*frame_number) {
if (image2ordinal_debug) printf("fetching data from ordinal %d for frame %d\n",ordinal,*frame_number);
}
}
if (data_desc->get_data_frame(data_desc, ordinal - 1, buffer) < 0) {
char message[64] = {0};
sprintf(message, "Failed to retrieve data for frame %d", *frame_number);
if (image2ordinal) {
sprintf(message, "Failed to retrieve data for frame %d (ordinal %d)", *frame_number, ordinal);
} else {
sprintf(message, "Failed to retrieve data for frame %d", *frame_number);
}
ERROR_JUMP(-2, done, message);
}
if (buffer != data_array) {
if (convert_to_int_and_mask(buffer, data_desc->data_width, data_array,
if (convert_to_int_and_mask(buffer, data_desc->data_width, info[6], data_array,
frame_size_px, mask_buffer) < 0) {
char message[64];
sprintf(message, "Error converting data for frame %d", *frame_number);
if (image2ordinal) {
sprintf(message, "Error converting data for frame %d (ordinal %d)", *frame_number, ordinal);
} else {
sprintf(message, "Error converting data for frame %d", *frame_number);
}
ERROR_JUMP(-2, done, message);
}
} else {
@@ -230,6 +443,11 @@ void plugin_close(int *error_flag) {
}
file_id = 0;
if (image2ordinal) {
free(image2ordinal);
image2ordinal = NULL;
}
if (mask_buffer) {
free(mask_buffer);
mask_buffer = NULL;
+195
View File
@@ -0,0 +1,195 @@
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
import json
import mimetools
import mimetypes
import os
import sys
import urllib2
def fail(msg, code=1):
sys.stderr.write("ERROR: %s\n" % msg)
sys.exit(code)
def api_request(url, token, method="GET", json_data=None, content_type=None):
data = None
headers = {
"Authorization": "token %s" % token,
}
if json_data is not None:
data = json.dumps(json_data)
headers["Content-Type"] = "application/json"
elif content_type is not None:
headers["Content-Type"] = content_type
request = urllib2.Request(url, data=data, headers=headers)
request.get_method = lambda: method
try:
response = urllib2.urlopen(request)
body = response.read()
status = response.getcode()
return status, body
except urllib2.HTTPError as e:
return e.code, e.read()
def encode_multipart_formdata(fields, files):
boundary = mimetools.choose_boundary()
crlf = "\r\n"
lines = []
for key, value in fields:
lines.append("--" + boundary)
lines.append('Content-Disposition: form-data; name="%s"' % key)
lines.append("")
lines.append(value)
for key, filename, content, content_type in files:
lines.append("--" + boundary)
lines.append(
'Content-Disposition: form-data; name="%s"; filename="%s"' % (key, filename)
)
lines.append("Content-Type: %s" % content_type)
lines.append("")
lines.append(content)
lines.append("--" + boundary + "--")
lines.append("")
body = crlf.join(lines)
content_type = "multipart/form-data; boundary=%s" % boundary
return content_type, body
def multipart_request(url, token, fields, files):
content_type, body = encode_multipart_formdata(fields, files)
headers = {
"Authorization": "token %s" % token,
"Content-Type": content_type,
}
request = urllib2.Request(url, data=body, headers=headers)
request.get_method = lambda: "POST"
try:
response = urllib2.urlopen(request)
return response.getcode(), response.read()
except urllib2.HTTPError as e:
return e.code, e.read()
def get_release_by_tag(api_base, token, tag):
url = "%s/releases/tags/%s" % (api_base, tag)
status, body = api_request(url, token, method="GET")
if status == 200:
return json.loads(body)
if status == 404:
return None
fail("failed to fetch release for tag %s: HTTP %s\n%s" % (tag, status, body))
def create_release(api_base, token, tag):
url = "%s/releases" % api_base
payload = {
"tag_name": tag,
"name": tag,
"draft": False,
"prerelease": False,
}
status, body = api_request(url, token, method="POST", json_data=payload)
if status not in (200, 201):
fail("failed to create release for tag %s: HTTP %s\n%s" % (tag, status, body))
return json.loads(body)
def ensure_release(api_base, token, tag):
release = get_release_by_tag(api_base, token, tag)
if release is not None:
print("Release for tag %s already exists (id=%s)" % (tag, release.get("id")))
return release
print("Release for tag %s does not exist, creating it" % tag)
release = create_release(api_base, token, tag)
print("Created release id=%s" % release.get("id"))
return release
def upload_asset(api_base, token, release_id, file_path):
if not os.path.isfile(file_path):
fail("file not found: %s" % file_path)
asset_name = os.path.basename(file_path)
mime_type = mimetypes.guess_type(asset_name)[0] or "application/octet-stream"
with open(file_path, "rb") as f:
content = f.read()
url = "%s/releases/%s/assets" % (api_base, release_id)
status, body = multipart_request(
url,
token,
fields=[("name", asset_name)],
files=[("attachment", asset_name, content, mime_type)],
)
if status not in (200, 201):
fail("failed to upload asset %s: HTTP %s\n%s" % (asset_name, status, body))
print("Uploaded asset: %s" % asset_name)
def find_assets(build_dir):
names = []
for name in sorted(os.listdir(build_dir)):
if name.startswith("libdurin-plugin.so"):
full = os.path.join(build_dir, name)
if os.path.isfile(full):
names.append(full)
return names
def main():
if len(sys.argv) != 4:
sys.stderr.write(
"Usage: %s <gitea-server> <owner/repo> <tag>\n" % sys.argv[0]
)
sys.stderr.write(
"Example: %s https://gitea.psi.ch mx/durin 1.0.0\n" % sys.argv[0]
)
sys.exit(1)
server = sys.argv[1].rstrip("/")
repo = sys.argv[2]
tag = sys.argv[3]
token = os.environ.get("GITEA_TOKEN")
if not token:
fail("GITEA_TOKEN environment variable is not set")
build_dir = "build"
if not os.path.isdir(build_dir):
fail("build directory not found: %s" % build_dir)
assets = find_assets(build_dir)
if not assets:
fail("no libdurin-plugin.so* files found in %s" % build_dir)
api_base = "%s/api/v1/repos/%s" % (server, repo)
release = ensure_release(api_base, token, tag)
release_id = release.get("id")
if not release_id:
fail("release id missing in API response")
for asset in assets:
upload_asset(api_base, token, release_id, asset)
print("Done.")
if __name__ == "__main__":
main()