Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 6ec2cd7437 |
@@ -1,35 +0,0 @@
|
|||||||
name: Build Packages
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- '**'
|
|
||||||
tags:
|
|
||||||
- '**'
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
build-library:
|
|
||||||
name: Build
|
|
||||||
runs-on: durin
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Build library
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
mkdir -p build
|
|
||||||
cd build
|
|
||||||
cmake -DCMAKE_BUILD_TYPE=Release ..
|
|
||||||
make -j
|
|
||||||
|
|
||||||
- name: Upload release assets to Gitea
|
|
||||||
if: github.ref_type == 'tag'
|
|
||||||
shell: bash
|
|
||||||
env:
|
|
||||||
GITEA_TOKEN: ${{ secrets.PIP_REPOSITORY_API_TOKEN }}
|
|
||||||
run: |
|
|
||||||
set -euo pipefail
|
|
||||||
python tools/gitea_release_upload.py \
|
|
||||||
"${{ github.server_url }}" \
|
|
||||||
"${{ github.repository }}" \
|
|
||||||
"${{ github.ref_name }}"
|
|
||||||
@@ -1,56 +0,0 @@
|
|||||||
CMAKE_MINIMUM_REQUIRED(VERSION 3.19)
|
|
||||||
|
|
||||||
PROJECT(durin VERSION 1.0.0 LANGUAGES C)
|
|
||||||
|
|
||||||
include(FetchContent)
|
|
||||||
|
|
||||||
SET(CMAKE_C_FLAGS_RELEASE "-O3")
|
|
||||||
|
|
||||||
SET(CMAKE_C_STANDARD 99)
|
|
||||||
SET(CMAKE_C_STANDARD_REQUIRED ON)
|
|
||||||
SET(CMAKE_C_EXTENSIONS OFF)
|
|
||||||
|
|
||||||
SET(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
|
||||||
|
|
||||||
SET(BUILD_SHARED_LIBS OFF)
|
|
||||||
|
|
||||||
set(HDF5_USE_STATIC_LIBRARIES TRUE)
|
|
||||||
|
|
||||||
SET(HDF5_BUILD_HL_LIB OFF)
|
|
||||||
|
|
||||||
SET(HDF5_ENABLE_THREADSAFE ON)
|
|
||||||
SET(HDF5_ENABLE_SZIP_SUPPORT OFF)
|
|
||||||
SET(HDF5_ENABLE_SZIP_ENCODING OFF)
|
|
||||||
SET(HDF5_BUILD_EXAMPLES OFF)
|
|
||||||
SET(HDF5_BUILD_CPP_LIB OFF)
|
|
||||||
SET(HDF5_ENABLE_Z_LIB_SUPPORT OFF)
|
|
||||||
SET(HDF5_EXTERNALLY_CONFIGURED 1)
|
|
||||||
|
|
||||||
INCLUDE_DIRECTORIES(bslz4/src)
|
|
||||||
|
|
||||||
FetchContent_Declare(hdf5
|
|
||||||
URL https://github.com/HDFGroup/hdf5/releases/download/hdf5_1.14.6/hdf5-1.14.6.tar.gz
|
|
||||||
DOWNLOAD_EXTRACT_TIMESTAMP FALSE
|
|
||||||
EXCLUDE_FROM_ALL)
|
|
||||||
|
|
||||||
FetchContent_MakeAvailable(hdf5)
|
|
||||||
|
|
||||||
ADD_LIBRARY(durin-plugin SHARED
|
|
||||||
src/plugin.c src/plugin.h
|
|
||||||
src/err.c src/err.h
|
|
||||||
src/filters.c src/filters.h
|
|
||||||
src/file.c src/file.h bslz4/src/bitshuffle.c bslz4/src/bitshuffle.h
|
|
||||||
bslz4/src/bitshuffle_core.c bslz4/src/bitshuffle_core.h
|
|
||||||
bslz4/src/bitshuffle_internals.h
|
|
||||||
bslz4/src/bshuf_h5filter.c bslz4/src/bshuf_h5filter.h
|
|
||||||
bslz4/src/iochain.c bslz4/src/iochain.h
|
|
||||||
bslz4/src/lz4.c bslz4/src/lz4.h
|
|
||||||
)
|
|
||||||
|
|
||||||
set_target_properties(durin-plugin PROPERTIES VERSION 1.0.0)
|
|
||||||
|
|
||||||
TARGET_COMPILE_DEFINITIONS(durin-plugin PRIVATE
|
|
||||||
H5_USE_110_API
|
|
||||||
USE_BITSHUFFLE)
|
|
||||||
|
|
||||||
TARGET_LINK_LIBRARIES(durin-plugin PRIVATE hdf5-static)
|
|
||||||
-43
@@ -1,43 +0,0 @@
|
|||||||
FROM registry.access.redhat.com/ubi7/ubi
|
|
||||||
|
|
||||||
ARG HDF5_TAG="hdf5_1.14.6"
|
|
||||||
|
|
||||||
LABEL authors="Filip Leonarski"
|
|
||||||
|
|
||||||
ARG CMAKE_VERSION=3.31.6
|
|
||||||
ARG NODE_MAJOR=16
|
|
||||||
|
|
||||||
RUN yum -y update && \
|
|
||||||
yum -y install \
|
|
||||||
gcc \
|
|
||||||
gcc-c++ \
|
|
||||||
git \
|
|
||||||
make \
|
|
||||||
tar \
|
|
||||||
gzip \
|
|
||||||
curl && \
|
|
||||||
yum clean all
|
|
||||||
|
|
||||||
# Install a recent Node.js (NodeSource). Change NODE_MAJOR if you want another major version.
|
|
||||||
RUN curl -fsSL https://rpm.nodesource.com/setup_${NODE_MAJOR}.x | bash - && \
|
|
||||||
yum -y install nodejs && \
|
|
||||||
yum clean all && \
|
|
||||||
node --version && npm --version && (corepack enable || true)
|
|
||||||
|
|
||||||
RUN set -eux; \
|
|
||||||
arch="$(uname -m)"; \
|
|
||||||
case "$arch" in \
|
|
||||||
x86_64) cmake_arch="x86_64" ;; \
|
|
||||||
aarch64) cmake_arch="aarch64" ;; \
|
|
||||||
*) echo "Unsupported architecture: $arch"; exit 1 ;; \
|
|
||||||
esac; \
|
|
||||||
curl -L "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-${cmake_arch}.tar.gz" \
|
|
||||||
-o /tmp/cmake.tar.gz; \
|
|
||||||
tar -xzf /tmp/cmake.tar.gz -C /opt; \
|
|
||||||
ln -s "/opt/cmake-${CMAKE_VERSION}-linux-${cmake_arch}/bin/cmake" /usr/local/bin/cmake; \
|
|
||||||
ln -s "/opt/cmake-${CMAKE_VERSION}-linux-${cmake_arch}/bin/ctest" /usr/local/bin/ctest; \
|
|
||||||
ln -s "/opt/cmake-${CMAKE_VERSION}-linux-${cmake_arch}/bin/cpack" /usr/local/bin/cpack; \
|
|
||||||
rm -f /tmp/cmake.tar.gz
|
|
||||||
|
|
||||||
# Default entrypoint prints tool versions and hints.
|
|
||||||
CMD ["/bin/bash", "-l"]
|
|
||||||
@@ -0,0 +1,55 @@
|
|||||||
|
BUILD_DIR ?= ./build
|
||||||
|
SRC_DIR = ./src
|
||||||
|
TEST_DIR = ./test
|
||||||
|
INC_DIR = $(SRC_DIR)
|
||||||
|
|
||||||
|
BSLZ4_SRC_DIR = ./bslz4/src
|
||||||
|
BSLZ4_BUILD_DIR = ./bslz4/build
|
||||||
|
BSLZ4_INC_DIR = $(BSLZ4_SRC_DIR)
|
||||||
|
|
||||||
|
CC=h5cc
|
||||||
|
CFLAGS=-DH5_USE_110_API -Wall -g -O2 -fpic -I$(INC_DIR) -I$(BSLZ4_INC_DIR) -std=c89 -shlib
|
||||||
|
|
||||||
|
.PHONY: plugin
|
||||||
|
plugin: $(BUILD_DIR)/durin-plugin.so
|
||||||
|
|
||||||
|
.PHONY: all
|
||||||
|
all: plugin example test_plugin
|
||||||
|
|
||||||
|
.PHONY: example
|
||||||
|
example: $(BUILD_DIR)/example
|
||||||
|
|
||||||
|
.PHONY: test_plugin
|
||||||
|
test_plugin: $(BUILD_DIR)/test_plugin
|
||||||
|
|
||||||
|
$(BUILD_DIR)/test_plugin: $(TEST_DIR)/generic_data_plugin.f90 $(TEST_DIR)/test_generic_host.f90
|
||||||
|
mkdir -p $(BUILD_DIR)
|
||||||
|
gfortran -O -g -fopenmp -ldl $(TEST_DIR)/generic_data_plugin.f90 $(TEST_DIR)/test_generic_host.f90 -o $@ -J$(BUILD_DIR)
|
||||||
|
|
||||||
|
$(BUILD_DIR)/%.o: $(SRC_DIR)/%.c
|
||||||
|
mkdir -p $(BUILD_DIR)
|
||||||
|
$(CC) $(CFLAGS) -c $< -o $@
|
||||||
|
|
||||||
|
$(BSLZ4_BUILD_DIR)/%.o: $(BSLZ4_SRC_DIR)/%.c
|
||||||
|
mkdir -p $(BSLZ4_BUILD_DIR)
|
||||||
|
$(CC) $(CFLAGS) -c $< -o $@
|
||||||
|
|
||||||
|
$(BUILD_DIR)/bslz4.a: $(BSLZ4_BUILD_DIR)/lz4.o $(BSLZ4_BUILD_DIR)/bitshuffle.o \
|
||||||
|
$(BSLZ4_BUILD_DIR)/bitshuffle_core.o $(BSLZ4_BUILD_DIR)/iochain.o
|
||||||
|
mkdir -p $(BUILD_DIR)
|
||||||
|
ar rcs $@ $^
|
||||||
|
|
||||||
|
$(BUILD_DIR)/durin-plugin.so: $(BUILD_DIR)/plugin.o $(BUILD_DIR)/file.o $(BUILD_DIR)/err.o $(BUILD_DIR)/filters.o \
|
||||||
|
$(BUILD_DIR)/bslz4.a
|
||||||
|
mkdir -p $(BUILD_DIR)
|
||||||
|
$(CC) $(CFLAGS) -shared $^ -o $(BUILD_DIR)/durin-plugin.so
|
||||||
|
|
||||||
|
$(BUILD_DIR)/example: $(BUILD_DIR)/test.o $(BUILD_DIR)/file.o $(BUILD_DIR)/err.o $(BUILD_DIR)/filters.o \
|
||||||
|
$(BUILD_DIR)/bslz4.a
|
||||||
|
mkdir -p $(BUILD_DIR)
|
||||||
|
$(CC) $(CFLAGS) $^ -o $(BUILD_DIR)/example
|
||||||
|
|
||||||
|
.PHONY: clean
|
||||||
|
clean:
|
||||||
|
rm -r $(BUILD_DIR)
|
||||||
|
rm -r $(BSLZ4_BUILD_DIR)
|
||||||
@@ -8,24 +8,14 @@ See:
|
|||||||
* https://www.dectris.com/features/features-eiger-x/hdf5-and-nexus
|
* https://www.dectris.com/features/features-eiger-x/hdf5-and-nexus
|
||||||
* https://strucbio.biologie.uni-konstanz.de/xdswiki
|
* https://strucbio.biologie.uni-konstanz.de/xdswiki
|
||||||
|
|
||||||
## Paul Scherrer Institute fork
|
|
||||||
This fork is maintained by Paul Scherrer Institute.
|
|
||||||
The plugin is based on the code developed by the Diamond Light Source and modified by Global Phasing.
|
|
||||||
Modifications from PSI side:
|
|
||||||
* Using CMake for building the plugin
|
|
||||||
* HDF5 is built as part of the CMake process
|
|
||||||
* Bitshuffle/LZ4 is updated to the latest version
|
|
||||||
* HDF5 filter is automatically registered for virtual dataset HDF5 files
|
|
||||||
* Docker image to build the plugin on x86/RH7 is provided.
|
|
||||||
* Generated versioned shared library, so the used version can be tracked.
|
|
||||||
|
|
||||||
## Get Durin
|
## Get Durin
|
||||||
Linux x86 version is automatically built on RHEL 7 and available from the Gitea release page.
|
|
||||||
|
Latest release at https://github.com/DiamondLightSource/durin/releases/tag/2019v1
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
In your XDS.INP add:
|
In your XDS.INP add:
|
||||||
```
|
```
|
||||||
LIB=[path to libdurin-plugin.so]
|
LIB=[path to durin-plugin.so]
|
||||||
NAME_TEMPLATE_OF_DATA_FRAMES=[data_path]/data_images_??????.h5
|
NAME_TEMPLATE_OF_DATA_FRAMES=[data_path]/data_images_??????.h5
|
||||||
```
|
```
|
||||||
XDS will instruct the plugin to load `[data_path]/data_images_master.h5` and this must be the
|
XDS will instruct the plugin to load `[data_path]/data_images_master.h5` and this must be the
|
||||||
@@ -37,25 +27,55 @@ the master file contains an `NXdata` or `NXdetector` group with either a dataset
|
|||||||
series of datasets named `data_000001`, `data_000002`, etc.
|
series of datasets named `data_000001`, `data_000002`, etc.
|
||||||
|
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
* HDF5 Library (https://www.hdfgroup.org/downloads)
|
||||||
|
|
||||||
|
|
||||||
## Building
|
## Building
|
||||||
Requires CMake version 3.19 or later + GCC compiler. There is no need to build HDF5 separately.
|
|
||||||
To build:
|
### Building HDF5 library
|
||||||
|
The HDF5 library used when building durin must have been compiled with specific switches enabled
|
||||||
|
to allow the durin plugin to be built and used.
|
||||||
|
|
||||||
|
Download the HDF5 source code (https://www.hdfgroup.org/downloads/hdf5/source-code) and extract
|
||||||
|
to any directory (referred to as `/hdf5_dir`), and run the following commands.
|
||||||
```
|
```
|
||||||
|
cd /hdf5_dir
|
||||||
mkdir build
|
mkdir build
|
||||||
cd build
|
cd build
|
||||||
cmake ..
|
export CFLAGS=-fPIC
|
||||||
make -j
|
../configure --enable-threadsafe --enable-deprecated-symbols --enable-hl --enable-unsupported
|
||||||
|
make
|
||||||
|
make check
|
||||||
|
make install
|
||||||
```
|
```
|
||||||
|
The hdf5 tools and libraries should now be located in `/hdf5_dir/build/hdf5`
|
||||||
|
|
||||||
|
For reference, the plugin requires the thread-safe switch and the optimised chunk read function.
|
||||||
|
The chunk read function may be defined in the high level library instead of the regular library,
|
||||||
|
depending on the exact HDF5 version downloaded (hence the --enable-deprecated-symbols _and_ --enable-hl).
|
||||||
|
The unsupported flag enables building with both threadsafe and high-level enabled.
|
||||||
|
|
||||||
|
|
||||||
|
### Building durin plugin
|
||||||
|
The plugin makefile will use the "h5cc" compiler wrapper, provided by the HDF5 library, which
|
||||||
|
must be on your PATH.
|
||||||
|
Download or clone the plugin source code (https://github.com/DiamondLightSource/durin)
|
||||||
|
into any directory (referred to as `/durin_dir`) and run the following commands.
|
||||||
|
```
|
||||||
|
cd /durin_dir
|
||||||
|
PATH=/hdf5_dir/build/hdf5/bin:$PATH
|
||||||
|
make
|
||||||
|
```
|
||||||
|
The plugin is located at `/durin_dir/build/durin-plugin.so` and should be added to the
|
||||||
|
XDS.INP file as `LIB=/durin_dir/build/durin-plugin.so`
|
||||||
|
|
||||||
|
|
||||||
The plugin is located at `build/libdurin-plugin.so` and should be added to the
|
|
||||||
XDS.INP file as `LIB=<CURRENT_DIRECTORY>/build/libdurin-plugin.so`.
|
|
||||||
Alternatively, versioned copy is also provided, e.g. `libdurin-plugin.so.1.0.0`, allowing to track
|
|
||||||
the current version of the plugin.
|
|
||||||
|
|
||||||
## Example XDS.INP
|
## Example XDS.INP
|
||||||
```
|
```
|
||||||
DETECTOR=PILATUS MINIMUM_VALID_PIXEL_VALUE=0 OVERLOAD=4096
|
DETECTOR=PILATUS MINIMUM_VALID_PIXEL_VALUE=0 OVERLOAD=4096
|
||||||
LIB=/opt/durin/build/libdurin-plugin.so
|
LIB=/opt/durin/build/durin-plugin.so
|
||||||
SENSOR_THICKNESS= 0.450
|
SENSOR_THICKNESS= 0.450
|
||||||
!SENSOR_MATERIAL / THICKNESS Si 0.450
|
!SENSOR_MATERIAL / THICKNESS Si 0.450
|
||||||
!SILICON= 3.953379
|
!SILICON= 3.953379
|
||||||
@@ -75,3 +95,6 @@ TRUSTED_REGION= 0.0 1.41
|
|||||||
DATA_RANGE= 1 600
|
DATA_RANGE= 1 600
|
||||||
JOB=XYCORR INIT COLSPOT IDXREF DEFPIX INTEGRATE CORRECT
|
JOB=XYCORR INIT COLSPOT IDXREF DEFPIX INTEGRATE CORRECT
|
||||||
```
|
```
|
||||||
|
|
||||||
|
N.B. the master file is needed, not the .nxs one which follows the
|
||||||
|
standard.
|
||||||
|
|||||||
+25
-139
@@ -14,22 +14,23 @@
|
|||||||
#include "bitshuffle_internals.h"
|
#include "bitshuffle_internals.h"
|
||||||
#include "lz4.h"
|
#include "lz4.h"
|
||||||
|
|
||||||
#ifdef ZSTD_SUPPORT
|
|
||||||
#include "zstd.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
|
|
||||||
// Macros.
|
|
||||||
|
|
||||||
|
#define BSHUF_LZ4_DECOMPRESS_FAST
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#define CHECK_ERR_FREE_LZ(count, buf) if (count < 0) { \
|
#define CHECK_ERR_FREE_LZ(count, buf) if (count < 0) { \
|
||||||
free(buf); return count - 1000; }
|
free(buf); return count - 1000; }
|
||||||
|
|
||||||
|
|
||||||
/* Bitshuffle and compress a single block. */
|
/* Bitshuffle and compress a single block. */
|
||||||
int64_t bshuf_compress_lz4_block(ioc_chain *C_ptr, \
|
int64_t bshuf_compress_lz4_block(ioc_chain *C_ptr, \
|
||||||
const size_t size, const size_t elem_size, const int option) {
|
const size_t size, const size_t elem_size) {
|
||||||
|
|
||||||
int64_t nbytes, count;
|
int64_t nbytes, count;
|
||||||
void *tmp_buf_bshuf;
|
void *tmp_buf_bshuf;
|
||||||
@@ -41,8 +42,7 @@ int64_t bshuf_compress_lz4_block(ioc_chain *C_ptr, \
|
|||||||
tmp_buf_bshuf = malloc(size * elem_size);
|
tmp_buf_bshuf = malloc(size * elem_size);
|
||||||
if (tmp_buf_bshuf == NULL) return -1;
|
if (tmp_buf_bshuf == NULL) return -1;
|
||||||
|
|
||||||
int dst_capacity = LZ4_compressBound(size * elem_size);
|
tmp_buf_lz4 = malloc(LZ4_compressBound(size * elem_size));
|
||||||
tmp_buf_lz4 = malloc(dst_capacity);
|
|
||||||
if (tmp_buf_lz4 == NULL){
|
if (tmp_buf_lz4 == NULL){
|
||||||
free(tmp_buf_bshuf);
|
free(tmp_buf_bshuf);
|
||||||
return -1;
|
return -1;
|
||||||
@@ -58,7 +58,7 @@ int64_t bshuf_compress_lz4_block(ioc_chain *C_ptr, \
|
|||||||
free(tmp_buf_bshuf);
|
free(tmp_buf_bshuf);
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
nbytes = LZ4_compress_default((const char*) tmp_buf_bshuf, (char*) tmp_buf_lz4, size * elem_size, dst_capacity);
|
nbytes = LZ4_compress((const char*) tmp_buf_bshuf, (char*) tmp_buf_lz4, size * elem_size);
|
||||||
free(tmp_buf_bshuf);
|
free(tmp_buf_bshuf);
|
||||||
CHECK_ERR_FREE_LZ(nbytes, tmp_buf_lz4);
|
CHECK_ERR_FREE_LZ(nbytes, tmp_buf_lz4);
|
||||||
|
|
||||||
@@ -76,7 +76,7 @@ int64_t bshuf_compress_lz4_block(ioc_chain *C_ptr, \
|
|||||||
|
|
||||||
/* Decompress and bitunshuffle a single block. */
|
/* Decompress and bitunshuffle a single block. */
|
||||||
int64_t bshuf_decompress_lz4_block(ioc_chain *C_ptr,
|
int64_t bshuf_decompress_lz4_block(ioc_chain *C_ptr,
|
||||||
const size_t size, const size_t elem_size, const int option) {
|
const size_t size, const size_t elem_size) {
|
||||||
|
|
||||||
int64_t nbytes, count;
|
int64_t nbytes, count;
|
||||||
void *out, *tmp_buf;
|
void *out, *tmp_buf;
|
||||||
@@ -96,6 +96,14 @@ int64_t bshuf_decompress_lz4_block(ioc_chain *C_ptr,
|
|||||||
tmp_buf = malloc(size * elem_size);
|
tmp_buf = malloc(size * elem_size);
|
||||||
if (tmp_buf == NULL) return -1;
|
if (tmp_buf == NULL) return -1;
|
||||||
|
|
||||||
|
#ifdef BSHUF_LZ4_DECOMPRESS_FAST
|
||||||
|
nbytes = LZ4_decompress_fast((const char*) in + 4, (char*) tmp_buf, size * elem_size);
|
||||||
|
CHECK_ERR_FREE_LZ(nbytes, tmp_buf);
|
||||||
|
if (nbytes != nbytes_from_header) {
|
||||||
|
free(tmp_buf);
|
||||||
|
return -91;
|
||||||
|
}
|
||||||
|
#else
|
||||||
nbytes = LZ4_decompress_safe((const char*) in + 4, (char *) tmp_buf, nbytes_from_header,
|
nbytes = LZ4_decompress_safe((const char*) in + 4, (char *) tmp_buf, nbytes_from_header,
|
||||||
size * elem_size);
|
size * elem_size);
|
||||||
CHECK_ERR_FREE_LZ(nbytes, tmp_buf);
|
CHECK_ERR_FREE_LZ(nbytes, tmp_buf);
|
||||||
@@ -104,7 +112,7 @@ int64_t bshuf_decompress_lz4_block(ioc_chain *C_ptr,
|
|||||||
return -91;
|
return -91;
|
||||||
}
|
}
|
||||||
nbytes = nbytes_from_header;
|
nbytes = nbytes_from_header;
|
||||||
|
#endif
|
||||||
count = bshuf_untrans_bit_elem(tmp_buf, out, size, elem_size);
|
count = bshuf_untrans_bit_elem(tmp_buf, out, size, elem_size);
|
||||||
CHECK_ERR_FREE(count, tmp_buf);
|
CHECK_ERR_FREE(count, tmp_buf);
|
||||||
nbytes += 4;
|
nbytes += 4;
|
||||||
@@ -113,92 +121,6 @@ int64_t bshuf_decompress_lz4_block(ioc_chain *C_ptr,
|
|||||||
return nbytes;
|
return nbytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef ZSTD_SUPPORT
|
|
||||||
/* Bitshuffle and compress a single block. */
|
|
||||||
int64_t bshuf_compress_zstd_block(ioc_chain *C_ptr, \
|
|
||||||
const size_t size, const size_t elem_size, const int comp_lvl) {
|
|
||||||
|
|
||||||
int64_t nbytes, count;
|
|
||||||
void *tmp_buf_bshuf;
|
|
||||||
void *tmp_buf_zstd;
|
|
||||||
size_t this_iter;
|
|
||||||
const void *in;
|
|
||||||
void *out;
|
|
||||||
|
|
||||||
tmp_buf_bshuf = malloc(size * elem_size);
|
|
||||||
if (tmp_buf_bshuf == NULL) return -1;
|
|
||||||
|
|
||||||
size_t tmp_buf_zstd_size = ZSTD_compressBound(size * elem_size);
|
|
||||||
tmp_buf_zstd = malloc(tmp_buf_zstd_size);
|
|
||||||
if (tmp_buf_zstd == NULL){
|
|
||||||
free(tmp_buf_bshuf);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
in = ioc_get_in(C_ptr, &this_iter);
|
|
||||||
ioc_set_next_in(C_ptr, &this_iter, (void*) ((char*) in + size * elem_size));
|
|
||||||
|
|
||||||
count = bshuf_trans_bit_elem(in, tmp_buf_bshuf, size, elem_size);
|
|
||||||
if (count < 0) {
|
|
||||||
free(tmp_buf_zstd);
|
|
||||||
free(tmp_buf_bshuf);
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
nbytes = ZSTD_compress(tmp_buf_zstd, tmp_buf_zstd_size, (const void*)tmp_buf_bshuf, size * elem_size, comp_lvl);
|
|
||||||
free(tmp_buf_bshuf);
|
|
||||||
CHECK_ERR_FREE_LZ(nbytes, tmp_buf_zstd);
|
|
||||||
|
|
||||||
out = ioc_get_out(C_ptr, &this_iter);
|
|
||||||
ioc_set_next_out(C_ptr, &this_iter, (void *) ((char *) out + nbytes + 4));
|
|
||||||
|
|
||||||
bshuf_write_uint32_BE(out, nbytes);
|
|
||||||
memcpy((char *) out + 4, tmp_buf_zstd, nbytes);
|
|
||||||
|
|
||||||
free(tmp_buf_zstd);
|
|
||||||
|
|
||||||
return nbytes + 4;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* Decompress and bitunshuffle a single block. */
|
|
||||||
int64_t bshuf_decompress_zstd_block(ioc_chain *C_ptr,
|
|
||||||
const size_t size, const size_t elem_size, const int option) {
|
|
||||||
|
|
||||||
int64_t nbytes, count;
|
|
||||||
void *out, *tmp_buf;
|
|
||||||
const void *in;
|
|
||||||
size_t this_iter;
|
|
||||||
int32_t nbytes_from_header;
|
|
||||||
|
|
||||||
in = ioc_get_in(C_ptr, &this_iter);
|
|
||||||
nbytes_from_header = bshuf_read_uint32_BE(in);
|
|
||||||
ioc_set_next_in(C_ptr, &this_iter,
|
|
||||||
(void*) ((char*) in + nbytes_from_header + 4));
|
|
||||||
|
|
||||||
out = ioc_get_out(C_ptr, &this_iter);
|
|
||||||
ioc_set_next_out(C_ptr, &this_iter,
|
|
||||||
(void *) ((char *) out + size * elem_size));
|
|
||||||
|
|
||||||
tmp_buf = malloc(size * elem_size);
|
|
||||||
if (tmp_buf == NULL) return -1;
|
|
||||||
|
|
||||||
nbytes = ZSTD_decompress(tmp_buf, size * elem_size, (void *)((char *) in + 4), nbytes_from_header);
|
|
||||||
CHECK_ERR_FREE_LZ(nbytes, tmp_buf);
|
|
||||||
if (nbytes != size * elem_size) {
|
|
||||||
free(tmp_buf);
|
|
||||||
return -91;
|
|
||||||
}
|
|
||||||
|
|
||||||
nbytes = nbytes_from_header;
|
|
||||||
count = bshuf_untrans_bit_elem(tmp_buf, out, size, elem_size);
|
|
||||||
CHECK_ERR_FREE(count, tmp_buf);
|
|
||||||
nbytes += 4;
|
|
||||||
|
|
||||||
free(tmp_buf);
|
|
||||||
return nbytes;
|
|
||||||
}
|
|
||||||
#endif // ZSTD_SUPPORT
|
|
||||||
|
|
||||||
|
|
||||||
/* ---- Public functions ----
|
/* ---- Public functions ----
|
||||||
*
|
*
|
||||||
@@ -216,13 +138,13 @@ size_t bshuf_compress_lz4_bound(const size_t size,
|
|||||||
}
|
}
|
||||||
if (block_size % BSHUF_BLOCKED_MULT) return -81;
|
if (block_size % BSHUF_BLOCKED_MULT) return -81;
|
||||||
|
|
||||||
// Note that each block gets a 4 byte header.
|
|
||||||
// Size of full blocks.
|
|
||||||
bound = (LZ4_compressBound(block_size * elem_size) + 4) * (size / block_size);
|
bound = (LZ4_compressBound(block_size * elem_size) + 4) * (size / block_size);
|
||||||
// Size of partial blocks, if any.
|
|
||||||
leftover = ((size % block_size) / BSHUF_BLOCKED_MULT) * BSHUF_BLOCKED_MULT;
|
leftover = ((size % block_size) / BSHUF_BLOCKED_MULT) * BSHUF_BLOCKED_MULT;
|
||||||
if (leftover) bound += LZ4_compressBound(leftover * elem_size) + 4;
|
if (leftover) bound += LZ4_compressBound(leftover * elem_size) + 4;
|
||||||
// Size of uncompressed data not fitting into any blocks.
|
|
||||||
bound += (size % BSHUF_BLOCKED_MULT) * elem_size;
|
bound += (size % BSHUF_BLOCKED_MULT) * elem_size;
|
||||||
return bound;
|
return bound;
|
||||||
}
|
}
|
||||||
@@ -231,49 +153,13 @@ size_t bshuf_compress_lz4_bound(const size_t size,
|
|||||||
int64_t bshuf_compress_lz4(const void* in, void* out, const size_t size,
|
int64_t bshuf_compress_lz4(const void* in, void* out, const size_t size,
|
||||||
const size_t elem_size, size_t block_size) {
|
const size_t elem_size, size_t block_size) {
|
||||||
return bshuf_blocked_wrap_fun(&bshuf_compress_lz4_block, in, out, size,
|
return bshuf_blocked_wrap_fun(&bshuf_compress_lz4_block, in, out, size,
|
||||||
elem_size, block_size, 0/*option*/);
|
elem_size, block_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int64_t bshuf_decompress_lz4(const void* in, void* out, const size_t size,
|
int64_t bshuf_decompress_lz4(const void* in, void* out, const size_t size,
|
||||||
const size_t elem_size, size_t block_size) {
|
const size_t elem_size, size_t block_size) {
|
||||||
return bshuf_blocked_wrap_fun(&bshuf_decompress_lz4_block, in, out, size,
|
return bshuf_blocked_wrap_fun(&bshuf_decompress_lz4_block, in, out, size,
|
||||||
elem_size, block_size, 0/*option*/);
|
elem_size, block_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef ZSTD_SUPPORT
|
|
||||||
size_t bshuf_compress_zstd_bound(const size_t size,
|
|
||||||
const size_t elem_size, size_t block_size) {
|
|
||||||
|
|
||||||
size_t bound, leftover;
|
|
||||||
|
|
||||||
if (block_size == 0) {
|
|
||||||
block_size = bshuf_default_block_size(elem_size);
|
|
||||||
}
|
|
||||||
if (block_size % BSHUF_BLOCKED_MULT) return -81;
|
|
||||||
|
|
||||||
// Note that each block gets a 4 byte header.
|
|
||||||
// Size of full blocks.
|
|
||||||
bound = (ZSTD_compressBound(block_size * elem_size) + 4) * (size / block_size);
|
|
||||||
// Size of partial blocks, if any.
|
|
||||||
leftover = ((size % block_size) / BSHUF_BLOCKED_MULT) * BSHUF_BLOCKED_MULT;
|
|
||||||
if (leftover) bound += ZSTD_compressBound(leftover * elem_size) + 4;
|
|
||||||
// Size of uncompressed data not fitting into any blocks.
|
|
||||||
bound += (size % BSHUF_BLOCKED_MULT) * elem_size;
|
|
||||||
return bound;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int64_t bshuf_compress_zstd(const void* in, void* out, const size_t size,
|
|
||||||
const size_t elem_size, size_t block_size, const int comp_lvl) {
|
|
||||||
return bshuf_blocked_wrap_fun(&bshuf_compress_zstd_block, in, out, size,
|
|
||||||
elem_size, block_size, comp_lvl);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int64_t bshuf_decompress_zstd(const void* in, void* out, const size_t size,
|
|
||||||
const size_t elem_size, size_t block_size) {
|
|
||||||
return bshuf_blocked_wrap_fun(&bshuf_decompress_zstd_block, in, out, size,
|
|
||||||
elem_size, block_size, 0/*option*/);
|
|
||||||
}
|
|
||||||
#endif // ZSTD_SUPPORT
|
|
||||||
|
|||||||
+7
-89
@@ -35,10 +35,6 @@
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
|
||||||
* ---- LZ4 Interface ----
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ---- bshuf_compress_lz4_bound ----
|
/* ---- bshuf_compress_lz4_bound ----
|
||||||
*
|
*
|
||||||
* Bound on size of data compressed with *bshuf_compress_lz4*.
|
* Bound on size of data compressed with *bshuf_compress_lz4*.
|
||||||
@@ -98,6 +94,11 @@ int64_t bshuf_compress_lz4(const void* in, void* out, const size_t size, const s
|
|||||||
* To properly unshuffle bitshuffled data, *size*, *elem_size* and *block_size*
|
* To properly unshuffle bitshuffled data, *size*, *elem_size* and *block_size*
|
||||||
* must patch the parameters used to compress the data.
|
* must patch the parameters used to compress the data.
|
||||||
*
|
*
|
||||||
|
* NOT TO BE USED WITH UNTRUSTED DATA: This routine uses the function
|
||||||
|
* LZ4_decompress_fast from LZ4, which does not protect against maliciously
|
||||||
|
* formed datasets. By modifying the compressed data, this function could be
|
||||||
|
* coerced into leaving the boundaries of the input buffer.
|
||||||
|
*
|
||||||
* Parameters
|
* Parameters
|
||||||
* ----------
|
* ----------
|
||||||
* in : input buffer
|
* in : input buffer
|
||||||
@@ -115,91 +116,8 @@ int64_t bshuf_compress_lz4(const void* in, void* out, const size_t size, const s
|
|||||||
int64_t bshuf_decompress_lz4(const void* in, void* out, const size_t size,
|
int64_t bshuf_decompress_lz4(const void* in, void* out, const size_t size,
|
||||||
const size_t elem_size, size_t block_size);
|
const size_t elem_size, size_t block_size);
|
||||||
|
|
||||||
/*
|
|
||||||
* ---- ZSTD Interface ----
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifdef ZSTD_SUPPORT
|
|
||||||
/* ---- bshuf_compress_zstd_bound ----
|
|
||||||
*
|
|
||||||
* Bound on size of data compressed with *bshuf_compress_zstd*.
|
|
||||||
*
|
|
||||||
* Parameters
|
|
||||||
* ----------
|
|
||||||
* size : number of elements in input
|
|
||||||
* elem_size : element size of typed data
|
|
||||||
* block_size : Process in blocks of this many elements. Pass 0 to
|
|
||||||
* select automatically (recommended).
|
|
||||||
*
|
|
||||||
* Returns
|
|
||||||
* -------
|
|
||||||
* Bound on compressed data size.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
size_t bshuf_compress_zstd_bound(const size_t size,
|
|
||||||
const size_t elem_size, size_t block_size);
|
|
||||||
|
|
||||||
/* ---- bshuf_compress_zstd ----
|
|
||||||
*
|
|
||||||
* Bitshuffled and compress the data using zstd.
|
|
||||||
*
|
|
||||||
* Transpose within elements, in blocks of data of *block_size* elements then
|
|
||||||
* compress the blocks using ZSTD. In the output buffer, each block is prefixed
|
|
||||||
* by a 4 byte integer giving the compressed size of that block.
|
|
||||||
*
|
|
||||||
* Output buffer must be large enough to hold the compressed data. This could
|
|
||||||
* be in principle substantially larger than the input buffer. Use the routine
|
|
||||||
* *bshuf_compress_zstd_bound* to get an upper limit.
|
|
||||||
*
|
|
||||||
* Parameters
|
|
||||||
* ----------
|
|
||||||
* in : input buffer, must be of size * elem_size bytes
|
|
||||||
* out : output buffer, must be large enough to hold data.
|
|
||||||
* size : number of elements in input
|
|
||||||
* elem_size : element size of typed data
|
|
||||||
* block_size : Process in blocks of this many elements. Pass 0 to
|
|
||||||
* select automatically (recommended).
|
|
||||||
* comp_lvl : compression level applied
|
|
||||||
*
|
|
||||||
* Returns
|
|
||||||
* -------
|
|
||||||
* number of bytes used in output buffer, negative error-code if failed.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
int64_t bshuf_compress_zstd(const void* in, void* out, const size_t size, const size_t
|
|
||||||
elem_size, size_t block_size, const int comp_lvl);
|
|
||||||
|
|
||||||
|
|
||||||
/* ---- bshuf_decompress_zstd ----
|
|
||||||
*
|
|
||||||
* Undo compression and bitshuffling.
|
|
||||||
*
|
|
||||||
* Decompress data then un-bitshuffle it in blocks of *block_size* elements.
|
|
||||||
*
|
|
||||||
* To properly unshuffle bitshuffled data, *size*, *elem_size* and *block_size*
|
|
||||||
* must patch the parameters used to compress the data.
|
|
||||||
*
|
|
||||||
* Parameters
|
|
||||||
* ----------
|
|
||||||
* in : input buffer
|
|
||||||
* out : output buffer, must be of size * elem_size bytes
|
|
||||||
* size : number of elements in input
|
|
||||||
* elem_size : element size of typed data
|
|
||||||
* block_size : Process in blocks of this many elements. Pass 0 to
|
|
||||||
* select automatically (recommended).
|
|
||||||
*
|
|
||||||
* Returns
|
|
||||||
* -------
|
|
||||||
* number of bytes consumed in *input* buffer, negative error-code if failed.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
int64_t bshuf_decompress_zstd(const void* in, void* out, const size_t size,
|
|
||||||
const size_t elem_size, size_t block_size);
|
|
||||||
|
|
||||||
#endif // ZSTD_SUPPORT
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} // extern "C"
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif // BITSHUFFLE_H
|
#endif
|
||||||
|
|||||||
+39
-742
@@ -16,55 +16,30 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
|
|
||||||
#if defined(__AVX512F__) && defined (__AVX512BW__) && defined(__AVX2__) && defined(__SSE2__)
|
|
||||||
#define USEAVX512
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(__AVX2__) && defined (__SSE2__)
|
#if defined(__AVX2__) && defined (__SSE2__)
|
||||||
#define USEAVX2
|
#define USEAVX2
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__SSE2__) || defined(NO_WARN_X86_INTRINSICS)
|
#if defined(__SSE2__)
|
||||||
#define USESSE2
|
#define USESSE2
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__ARM_NEON__) || (__ARM_NEON)
|
|
||||||
#ifdef __aarch64__
|
|
||||||
#define USEARMNEON
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Conditional includes for SSE2 and AVX2.
|
|
||||||
#ifdef USEAVX2
|
#ifdef USEAVX2
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
#elif defined USESSE2
|
#elif defined USESSE2
|
||||||
#include <emmintrin.h>
|
#include <emmintrin.h>
|
||||||
#elif defined USEARMNEON
|
|
||||||
#include <arm_neon.h>
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(_OPENMP) && defined(_MSC_VER)
|
|
||||||
typedef int64_t omp_size_t;
|
|
||||||
#else
|
|
||||||
typedef size_t omp_size_t;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Macros.
|
|
||||||
#define CHECK_MULT_EIGHT(n) if (n % 8) return -80;
|
#define CHECK_MULT_EIGHT(n) if (n % 8) return -80;
|
||||||
#define MAX(X,Y) ((X) > (Y) ? (X) : (Y))
|
#define MAX(X,Y) ((X) > (Y) ? (X) : (Y))
|
||||||
|
|
||||||
|
|
||||||
/* ---- Functions indicating compile time instruction set. ---- */
|
/* ---- Functions indicating compile time instruction set. ---- */
|
||||||
|
|
||||||
int bshuf_using_NEON(void) {
|
|
||||||
#ifdef USEARMNEON
|
|
||||||
return 1;
|
|
||||||
#else
|
|
||||||
return 0;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int bshuf_using_SSE2(void) {
|
int bshuf_using_SSE2(void) {
|
||||||
#ifdef USESSE2
|
#ifdef USESSE2
|
||||||
return 1;
|
return 1;
|
||||||
@@ -83,14 +58,6 @@ int bshuf_using_AVX2(void) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int bshuf_using_AVX512(void) {
|
|
||||||
#ifdef USEAVX512
|
|
||||||
return 1;
|
|
||||||
#else
|
|
||||||
return 0;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---- Worker code not requiring special instruction sets. ----
|
/* ---- Worker code not requiring special instruction sets. ----
|
||||||
*
|
*
|
||||||
* The following code does not use any x86 specific vectorized instructions
|
* The following code does not use any x86 specific vectorized instructions
|
||||||
@@ -164,8 +131,8 @@ int64_t bshuf_trans_byte_elem_remainder(const void* in, void* out, const size_t
|
|||||||
CHECK_MULT_EIGHT(start);
|
CHECK_MULT_EIGHT(start);
|
||||||
|
|
||||||
if (size > start) {
|
if (size > start) {
|
||||||
// ii loop separated into 2 loops so the compiler can unroll
|
|
||||||
// the inner one.
|
|
||||||
for (ii = start; ii + 7 < size; ii += 8) {
|
for (ii = start; ii + 7 < size; ii += 8) {
|
||||||
for (jj = 0; jj < elem_size; jj++) {
|
for (jj = 0; jj < elem_size; jj++) {
|
||||||
for (kk = 0; kk < 8; kk++) {
|
for (kk = 0; kk < 8; kk++) {
|
||||||
@@ -381,516 +348,10 @@ int64_t bshuf_untrans_bit_elem_scal(const void* in, void* out, const size_t size
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* ---- Worker code that uses Arm NEON ----
|
|
||||||
*
|
|
||||||
* The following code makes use of the Arm NEON instruction set.
|
|
||||||
* NEON technology is the implementation of the ARM Advanced Single
|
|
||||||
* Instruction Multiple Data (SIMD) extension.
|
|
||||||
* The NEON unit is the component of the processor that executes SIMD instructions.
|
|
||||||
* It is also called the NEON Media Processing Engine (MPE).
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifdef USEARMNEON
|
|
||||||
|
|
||||||
/* Transpose bytes within elements for 16 bit elements. */
|
|
||||||
int64_t bshuf_trans_byte_elem_NEON_16(const void* in, void* out, const size_t size) {
|
|
||||||
|
|
||||||
size_t ii;
|
|
||||||
const char *in_b = (const char*) in;
|
|
||||||
char *out_b = (char*) out;
|
|
||||||
int8x16_t a0, b0, a1, b1;
|
|
||||||
|
|
||||||
for (ii=0; ii + 15 < size; ii += 16) {
|
|
||||||
a0 = vld1q_s8(in_b + 2*ii + 0*16);
|
|
||||||
b0 = vld1q_s8(in_b + 2*ii + 1*16);
|
|
||||||
|
|
||||||
a1 = vzip1q_s8(a0, b0);
|
|
||||||
b1 = vzip2q_s8(a0, b0);
|
|
||||||
|
|
||||||
a0 = vzip1q_s8(a1, b1);
|
|
||||||
b0 = vzip2q_s8(a1, b1);
|
|
||||||
|
|
||||||
a1 = vzip1q_s8(a0, b0);
|
|
||||||
b1 = vzip2q_s8(a0, b0);
|
|
||||||
|
|
||||||
a0 = vzip1q_s8(a1, b1);
|
|
||||||
b0 = vzip2q_s8(a1, b1);
|
|
||||||
|
|
||||||
vst1q_s8(out_b + 0*size + ii, a0);
|
|
||||||
vst1q_s8(out_b + 1*size + ii, b0);
|
|
||||||
}
|
|
||||||
|
|
||||||
return bshuf_trans_byte_elem_remainder(in, out, size, 2,
|
|
||||||
size - size % 16);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* Transpose bytes within elements for 32 bit elements. */
|
|
||||||
int64_t bshuf_trans_byte_elem_NEON_32(const void* in, void* out, const size_t size) {
|
|
||||||
|
|
||||||
size_t ii;
|
|
||||||
const char *in_b;
|
|
||||||
char *out_b;
|
|
||||||
in_b = (const char*) in;
|
|
||||||
out_b = (char*) out;
|
|
||||||
int8x16_t a0, b0, c0, d0, a1, b1, c1, d1;
|
|
||||||
int64x2_t a2, b2, c2, d2;
|
|
||||||
|
|
||||||
for (ii=0; ii + 15 < size; ii += 16) {
|
|
||||||
a0 = vld1q_s8(in_b + 4*ii + 0*16);
|
|
||||||
b0 = vld1q_s8(in_b + 4*ii + 1*16);
|
|
||||||
c0 = vld1q_s8(in_b + 4*ii + 2*16);
|
|
||||||
d0 = vld1q_s8(in_b + 4*ii + 3*16);
|
|
||||||
|
|
||||||
a1 = vzip1q_s8(a0, b0);
|
|
||||||
b1 = vzip2q_s8(a0, b0);
|
|
||||||
c1 = vzip1q_s8(c0, d0);
|
|
||||||
d1 = vzip2q_s8(c0, d0);
|
|
||||||
|
|
||||||
a0 = vzip1q_s8(a1, b1);
|
|
||||||
b0 = vzip2q_s8(a1, b1);
|
|
||||||
c0 = vzip1q_s8(c1, d1);
|
|
||||||
d0 = vzip2q_s8(c1, d1);
|
|
||||||
|
|
||||||
a1 = vzip1q_s8(a0, b0);
|
|
||||||
b1 = vzip2q_s8(a0, b0);
|
|
||||||
c1 = vzip1q_s8(c0, d0);
|
|
||||||
d1 = vzip2q_s8(c0, d0);
|
|
||||||
|
|
||||||
a2 = vzip1q_s64(vreinterpretq_s64_s8(a1), vreinterpretq_s64_s8(c1));
|
|
||||||
b2 = vzip2q_s64(vreinterpretq_s64_s8(a1), vreinterpretq_s64_s8(c1));
|
|
||||||
c2 = vzip1q_s64(vreinterpretq_s64_s8(b1), vreinterpretq_s64_s8(d1));
|
|
||||||
d2 = vzip2q_s64(vreinterpretq_s64_s8(b1), vreinterpretq_s64_s8(d1));
|
|
||||||
|
|
||||||
vst1q_s64((int64_t *) (out_b + 0*size + ii), a2);
|
|
||||||
vst1q_s64((int64_t *) (out_b + 1*size + ii), b2);
|
|
||||||
vst1q_s64((int64_t *) (out_b + 2*size + ii), c2);
|
|
||||||
vst1q_s64((int64_t *) (out_b + 3*size + ii), d2);
|
|
||||||
}
|
|
||||||
|
|
||||||
return bshuf_trans_byte_elem_remainder(in, out, size, 4,
|
|
||||||
size - size % 16);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* Transpose bytes within elements for 64 bit elements. */
|
|
||||||
int64_t bshuf_trans_byte_elem_NEON_64(const void* in, void* out, const size_t size) {
|
|
||||||
|
|
||||||
size_t ii;
|
|
||||||
const char* in_b = (const char*) in;
|
|
||||||
char* out_b = (char*) out;
|
|
||||||
int8x16_t a0, b0, c0, d0, e0, f0, g0, h0;
|
|
||||||
int8x16_t a1, b1, c1, d1, e1, f1, g1, h1;
|
|
||||||
|
|
||||||
for (ii=0; ii + 15 < size; ii += 16) {
|
|
||||||
a0 = vld1q_s8(in_b + 8*ii + 0*16);
|
|
||||||
b0 = vld1q_s8(in_b + 8*ii + 1*16);
|
|
||||||
c0 = vld1q_s8(in_b + 8*ii + 2*16);
|
|
||||||
d0 = vld1q_s8(in_b + 8*ii + 3*16);
|
|
||||||
e0 = vld1q_s8(in_b + 8*ii + 4*16);
|
|
||||||
f0 = vld1q_s8(in_b + 8*ii + 5*16);
|
|
||||||
g0 = vld1q_s8(in_b + 8*ii + 6*16);
|
|
||||||
h0 = vld1q_s8(in_b + 8*ii + 7*16);
|
|
||||||
|
|
||||||
a1 = vzip1q_s8 (a0, b0);
|
|
||||||
b1 = vzip2q_s8 (a0, b0);
|
|
||||||
c1 = vzip1q_s8 (c0, d0);
|
|
||||||
d1 = vzip2q_s8 (c0, d0);
|
|
||||||
e1 = vzip1q_s8 (e0, f0);
|
|
||||||
f1 = vzip2q_s8 (e0, f0);
|
|
||||||
g1 = vzip1q_s8 (g0, h0);
|
|
||||||
h1 = vzip2q_s8 (g0, h0);
|
|
||||||
|
|
||||||
a0 = vzip1q_s8 (a1, b1);
|
|
||||||
b0 = vzip2q_s8 (a1, b1);
|
|
||||||
c0 = vzip1q_s8 (c1, d1);
|
|
||||||
d0 = vzip2q_s8 (c1, d1);
|
|
||||||
e0 = vzip1q_s8 (e1, f1);
|
|
||||||
f0 = vzip2q_s8 (e1, f1);
|
|
||||||
g0 = vzip1q_s8 (g1, h1);
|
|
||||||
h0 = vzip2q_s8 (g1, h1);
|
|
||||||
|
|
||||||
a1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (a0), vreinterpretq_s32_s8 (c0));
|
|
||||||
b1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (a0), vreinterpretq_s32_s8 (c0));
|
|
||||||
c1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (b0), vreinterpretq_s32_s8 (d0));
|
|
||||||
d1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (b0), vreinterpretq_s32_s8 (d0));
|
|
||||||
e1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (e0), vreinterpretq_s32_s8 (g0));
|
|
||||||
f1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (e0), vreinterpretq_s32_s8 (g0));
|
|
||||||
g1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (f0), vreinterpretq_s32_s8 (h0));
|
|
||||||
h1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (f0), vreinterpretq_s32_s8 (h0));
|
|
||||||
|
|
||||||
a0 = (int8x16_t) vzip1q_s64 (vreinterpretq_s64_s8 (a1), vreinterpretq_s64_s8 (e1));
|
|
||||||
b0 = (int8x16_t) vzip2q_s64 (vreinterpretq_s64_s8 (a1), vreinterpretq_s64_s8 (e1));
|
|
||||||
c0 = (int8x16_t) vzip1q_s64 (vreinterpretq_s64_s8 (b1), vreinterpretq_s64_s8 (f1));
|
|
||||||
d0 = (int8x16_t) vzip2q_s64 (vreinterpretq_s64_s8 (b1), vreinterpretq_s64_s8 (f1));
|
|
||||||
e0 = (int8x16_t) vzip1q_s64 (vreinterpretq_s64_s8 (c1), vreinterpretq_s64_s8 (g1));
|
|
||||||
f0 = (int8x16_t) vzip2q_s64 (vreinterpretq_s64_s8 (c1), vreinterpretq_s64_s8 (g1));
|
|
||||||
g0 = (int8x16_t) vzip1q_s64 (vreinterpretq_s64_s8 (d1), vreinterpretq_s64_s8 (h1));
|
|
||||||
h0 = (int8x16_t) vzip2q_s64 (vreinterpretq_s64_s8 (d1), vreinterpretq_s64_s8 (h1));
|
|
||||||
|
|
||||||
vst1q_s8(out_b + 0*size + ii, a0);
|
|
||||||
vst1q_s8(out_b + 1*size + ii, b0);
|
|
||||||
vst1q_s8(out_b + 2*size + ii, c0);
|
|
||||||
vst1q_s8(out_b + 3*size + ii, d0);
|
|
||||||
vst1q_s8(out_b + 4*size + ii, e0);
|
|
||||||
vst1q_s8(out_b + 5*size + ii, f0);
|
|
||||||
vst1q_s8(out_b + 6*size + ii, g0);
|
|
||||||
vst1q_s8(out_b + 7*size + ii, h0);
|
|
||||||
}
|
|
||||||
|
|
||||||
return bshuf_trans_byte_elem_remainder(in, out, size, 8,
|
|
||||||
size - size % 16);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* Transpose bytes within elements using best NEON algorithm available. */
|
|
||||||
int64_t bshuf_trans_byte_elem_NEON(const void* in, void* out, const size_t size,
|
|
||||||
const size_t elem_size) {
|
|
||||||
|
|
||||||
int64_t count;
|
|
||||||
|
|
||||||
// Trivial cases: power of 2 bytes.
|
|
||||||
switch (elem_size) {
|
|
||||||
case 1:
|
|
||||||
count = bshuf_copy(in, out, size, elem_size);
|
|
||||||
return count;
|
|
||||||
case 2:
|
|
||||||
count = bshuf_trans_byte_elem_NEON_16(in, out, size);
|
|
||||||
return count;
|
|
||||||
case 4:
|
|
||||||
count = bshuf_trans_byte_elem_NEON_32(in, out, size);
|
|
||||||
return count;
|
|
||||||
case 8:
|
|
||||||
count = bshuf_trans_byte_elem_NEON_64(in, out, size);
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Worst case: odd number of bytes. Turns out that this is faster for
|
|
||||||
// (odd * 2) byte elements as well (hence % 4).
|
|
||||||
if (elem_size % 4) {
|
|
||||||
count = bshuf_trans_byte_elem_scal(in, out, size, elem_size);
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Multiple of power of 2: transpose hierarchically.
|
|
||||||
{
|
|
||||||
size_t nchunk_elem;
|
|
||||||
void* tmp_buf = malloc(size * elem_size);
|
|
||||||
if (tmp_buf == NULL) return -1;
|
|
||||||
|
|
||||||
if ((elem_size % 8) == 0) {
|
|
||||||
nchunk_elem = elem_size / 8;
|
|
||||||
TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int64_t);
|
|
||||||
count = bshuf_trans_byte_elem_NEON_64(out, tmp_buf,
|
|
||||||
size * nchunk_elem);
|
|
||||||
bshuf_trans_elem(tmp_buf, out, 8, nchunk_elem, size);
|
|
||||||
} else if ((elem_size % 4) == 0) {
|
|
||||||
nchunk_elem = elem_size / 4;
|
|
||||||
TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int32_t);
|
|
||||||
count = bshuf_trans_byte_elem_NEON_32(out, tmp_buf,
|
|
||||||
size * nchunk_elem);
|
|
||||||
bshuf_trans_elem(tmp_buf, out, 4, nchunk_elem, size);
|
|
||||||
} else {
|
|
||||||
// Not used since scalar algorithm is faster.
|
|
||||||
nchunk_elem = elem_size / 2;
|
|
||||||
TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int16_t);
|
|
||||||
count = bshuf_trans_byte_elem_NEON_16(out, tmp_buf,
|
|
||||||
size * nchunk_elem);
|
|
||||||
bshuf_trans_elem(tmp_buf, out, 2, nchunk_elem, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
free(tmp_buf);
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* Creates a mask made up of the most significant
|
|
||||||
* bit of each byte of 'input'
|
|
||||||
*/
|
|
||||||
int32_t move_byte_mask_neon(uint8x16_t input) {
|
|
||||||
|
|
||||||
return ( ((input[0] & 0x80) >> 7) | (((input[1] & 0x80) >> 7) << 1) | (((input[2] & 0x80) >> 7) << 2) | (((input[3] & 0x80) >> 7) << 3)
|
|
||||||
| (((input[4] & 0x80) >> 7) << 4) | (((input[5] & 0x80) >> 7) << 5) | (((input[6] & 0x80) >> 7) << 6) | (((input[7] & 0x80) >> 7) << 7)
|
|
||||||
| (((input[8] & 0x80) >> 7) << 8) | (((input[9] & 0x80) >> 7) << 9) | (((input[10] & 0x80) >> 7) << 10) | (((input[11] & 0x80) >> 7) << 11)
|
|
||||||
| (((input[12] & 0x80) >> 7) << 12) | (((input[13] & 0x80) >> 7) << 13) | (((input[14] & 0x80) >> 7) << 14) | (((input[15] & 0x80) >> 7) << 15)
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Transpose bits within bytes. */
|
|
||||||
int64_t bshuf_trans_bit_byte_NEON(const void* in, void* out, const size_t size,
|
|
||||||
const size_t elem_size) {
|
|
||||||
|
|
||||||
size_t ii, kk;
|
|
||||||
const char* in_b = (const char*) in;
|
|
||||||
char* out_b = (char*) out;
|
|
||||||
uint16_t* out_ui16;
|
|
||||||
|
|
||||||
int64_t count;
|
|
||||||
|
|
||||||
size_t nbyte = elem_size * size;
|
|
||||||
|
|
||||||
CHECK_MULT_EIGHT(nbyte);
|
|
||||||
|
|
||||||
int16x8_t xmm;
|
|
||||||
int32_t bt;
|
|
||||||
|
|
||||||
for (ii = 0; ii + 15 < nbyte; ii += 16) {
|
|
||||||
xmm = vld1q_s16((int16_t *) (in_b + ii));
|
|
||||||
for (kk = 0; kk < 8; kk++) {
|
|
||||||
bt = move_byte_mask_neon((uint8x16_t) xmm);
|
|
||||||
xmm = vshlq_n_s16(xmm, 1);
|
|
||||||
out_ui16 = (uint16_t*) &out_b[((7 - kk) * nbyte + ii) / 8];
|
|
||||||
*out_ui16 = bt;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
count = bshuf_trans_bit_byte_remainder(in, out, size, elem_size,
|
|
||||||
nbyte - nbyte % 16);
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* Transpose bits within elements. */
|
|
||||||
int64_t bshuf_trans_bit_elem_NEON(const void* in, void* out, const size_t size,
|
|
||||||
const size_t elem_size) {
|
|
||||||
|
|
||||||
int64_t count;
|
|
||||||
|
|
||||||
CHECK_MULT_EIGHT(size);
|
|
||||||
|
|
||||||
void* tmp_buf = malloc(size * elem_size);
|
|
||||||
if (tmp_buf == NULL) return -1;
|
|
||||||
|
|
||||||
count = bshuf_trans_byte_elem_NEON(in, out, size, elem_size);
|
|
||||||
CHECK_ERR_FREE(count, tmp_buf);
|
|
||||||
count = bshuf_trans_bit_byte_NEON(out, tmp_buf, size, elem_size);
|
|
||||||
CHECK_ERR_FREE(count, tmp_buf);
|
|
||||||
count = bshuf_trans_bitrow_eight(tmp_buf, out, size, elem_size);
|
|
||||||
|
|
||||||
free(tmp_buf);
|
|
||||||
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* For data organized into a row for each bit (8 * elem_size rows), transpose
|
|
||||||
* the bytes. */
|
|
||||||
int64_t bshuf_trans_byte_bitrow_NEON(const void* in, void* out, const size_t size,
|
|
||||||
const size_t elem_size) {
|
|
||||||
|
|
||||||
size_t ii, jj;
|
|
||||||
const char* in_b = (const char*) in;
|
|
||||||
char* out_b = (char*) out;
|
|
||||||
|
|
||||||
CHECK_MULT_EIGHT(size);
|
|
||||||
|
|
||||||
size_t nrows = 8 * elem_size;
|
|
||||||
size_t nbyte_row = size / 8;
|
|
||||||
|
|
||||||
int8x16_t a0, b0, c0, d0, e0, f0, g0, h0;
|
|
||||||
int8x16_t a1, b1, c1, d1, e1, f1, g1, h1;
|
|
||||||
int64x1_t *as, *bs, *cs, *ds, *es, *fs, *gs, *hs;
|
|
||||||
|
|
||||||
for (ii = 0; ii + 7 < nrows; ii += 8) {
|
|
||||||
for (jj = 0; jj + 15 < nbyte_row; jj += 16) {
|
|
||||||
a0 = vld1q_s8(in_b + (ii + 0)*nbyte_row + jj);
|
|
||||||
b0 = vld1q_s8(in_b + (ii + 1)*nbyte_row + jj);
|
|
||||||
c0 = vld1q_s8(in_b + (ii + 2)*nbyte_row + jj);
|
|
||||||
d0 = vld1q_s8(in_b + (ii + 3)*nbyte_row + jj);
|
|
||||||
e0 = vld1q_s8(in_b + (ii + 4)*nbyte_row + jj);
|
|
||||||
f0 = vld1q_s8(in_b + (ii + 5)*nbyte_row + jj);
|
|
||||||
g0 = vld1q_s8(in_b + (ii + 6)*nbyte_row + jj);
|
|
||||||
h0 = vld1q_s8(in_b + (ii + 7)*nbyte_row + jj);
|
|
||||||
|
|
||||||
a1 = vzip1q_s8(a0, b0);
|
|
||||||
b1 = vzip1q_s8(c0, d0);
|
|
||||||
c1 = vzip1q_s8(e0, f0);
|
|
||||||
d1 = vzip1q_s8(g0, h0);
|
|
||||||
e1 = vzip2q_s8(a0, b0);
|
|
||||||
f1 = vzip2q_s8(c0, d0);
|
|
||||||
g1 = vzip2q_s8(e0, f0);
|
|
||||||
h1 = vzip2q_s8(g0, h0);
|
|
||||||
|
|
||||||
a0 = (int8x16_t) vzip1q_s16 (vreinterpretq_s16_s8 (a1), vreinterpretq_s16_s8 (b1));
|
|
||||||
b0= (int8x16_t) vzip1q_s16 (vreinterpretq_s16_s8 (c1), vreinterpretq_s16_s8 (d1));
|
|
||||||
c0 = (int8x16_t) vzip2q_s16 (vreinterpretq_s16_s8 (a1), vreinterpretq_s16_s8 (b1));
|
|
||||||
d0 = (int8x16_t) vzip2q_s16 (vreinterpretq_s16_s8 (c1), vreinterpretq_s16_s8 (d1));
|
|
||||||
e0 = (int8x16_t) vzip1q_s16 (vreinterpretq_s16_s8 (e1), vreinterpretq_s16_s8 (f1));
|
|
||||||
f0 = (int8x16_t) vzip1q_s16 (vreinterpretq_s16_s8 (g1), vreinterpretq_s16_s8 (h1));
|
|
||||||
g0 = (int8x16_t) vzip2q_s16 (vreinterpretq_s16_s8 (e1), vreinterpretq_s16_s8 (f1));
|
|
||||||
h0 = (int8x16_t) vzip2q_s16 (vreinterpretq_s16_s8 (g1), vreinterpretq_s16_s8 (h1));
|
|
||||||
|
|
||||||
a1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (a0), vreinterpretq_s32_s8 (b0));
|
|
||||||
b1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (a0), vreinterpretq_s32_s8 (b0));
|
|
||||||
c1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (c0), vreinterpretq_s32_s8 (d0));
|
|
||||||
d1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (c0), vreinterpretq_s32_s8 (d0));
|
|
||||||
e1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (e0), vreinterpretq_s32_s8 (f0));
|
|
||||||
f1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (e0), vreinterpretq_s32_s8 (f0));
|
|
||||||
g1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (g0), vreinterpretq_s32_s8 (h0));
|
|
||||||
h1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (g0), vreinterpretq_s32_s8 (h0));
|
|
||||||
|
|
||||||
as = (int64x1_t *) &a1;
|
|
||||||
bs = (int64x1_t *) &b1;
|
|
||||||
cs = (int64x1_t *) &c1;
|
|
||||||
ds = (int64x1_t *) &d1;
|
|
||||||
es = (int64x1_t *) &e1;
|
|
||||||
fs = (int64x1_t *) &f1;
|
|
||||||
gs = (int64x1_t *) &g1;
|
|
||||||
hs = (int64x1_t *) &h1;
|
|
||||||
|
|
||||||
vst1_s64((int64_t *)(out_b + (jj + 0) * nrows + ii), *as);
|
|
||||||
vst1_s64((int64_t *)(out_b + (jj + 1) * nrows + ii), *(as + 1));
|
|
||||||
vst1_s64((int64_t *)(out_b + (jj + 2) * nrows + ii), *bs);
|
|
||||||
vst1_s64((int64_t *)(out_b + (jj + 3) * nrows + ii), *(bs + 1));
|
|
||||||
vst1_s64((int64_t *)(out_b + (jj + 4) * nrows + ii), *cs);
|
|
||||||
vst1_s64((int64_t *)(out_b + (jj + 5) * nrows + ii), *(cs + 1));
|
|
||||||
vst1_s64((int64_t *)(out_b + (jj + 6) * nrows + ii), *ds);
|
|
||||||
vst1_s64((int64_t *)(out_b + (jj + 7) * nrows + ii), *(ds + 1));
|
|
||||||
vst1_s64((int64_t *)(out_b + (jj + 8) * nrows + ii), *es);
|
|
||||||
vst1_s64((int64_t *)(out_b + (jj + 9) * nrows + ii), *(es + 1));
|
|
||||||
vst1_s64((int64_t *)(out_b + (jj + 10) * nrows + ii), *fs);
|
|
||||||
vst1_s64((int64_t *)(out_b + (jj + 11) * nrows + ii), *(fs + 1));
|
|
||||||
vst1_s64((int64_t *)(out_b + (jj + 12) * nrows + ii), *gs);
|
|
||||||
vst1_s64((int64_t *)(out_b + (jj + 13) * nrows + ii), *(gs + 1));
|
|
||||||
vst1_s64((int64_t *)(out_b + (jj + 14) * nrows + ii), *hs);
|
|
||||||
vst1_s64((int64_t *)(out_b + (jj + 15) * nrows + ii), *(hs + 1));
|
|
||||||
}
|
|
||||||
for (jj = nbyte_row - nbyte_row % 16; jj < nbyte_row; jj ++) {
|
|
||||||
out_b[jj * nrows + ii + 0] = in_b[(ii + 0)*nbyte_row + jj];
|
|
||||||
out_b[jj * nrows + ii + 1] = in_b[(ii + 1)*nbyte_row + jj];
|
|
||||||
out_b[jj * nrows + ii + 2] = in_b[(ii + 2)*nbyte_row + jj];
|
|
||||||
out_b[jj * nrows + ii + 3] = in_b[(ii + 3)*nbyte_row + jj];
|
|
||||||
out_b[jj * nrows + ii + 4] = in_b[(ii + 4)*nbyte_row + jj];
|
|
||||||
out_b[jj * nrows + ii + 5] = in_b[(ii + 5)*nbyte_row + jj];
|
|
||||||
out_b[jj * nrows + ii + 6] = in_b[(ii + 6)*nbyte_row + jj];
|
|
||||||
out_b[jj * nrows + ii + 7] = in_b[(ii + 7)*nbyte_row + jj];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return size * elem_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* Shuffle bits within the bytes of eight element blocks. */
|
|
||||||
int64_t bshuf_shuffle_bit_eightelem_NEON(const void* in, void* out, const size_t size,
|
|
||||||
const size_t elem_size) {
|
|
||||||
|
|
||||||
CHECK_MULT_EIGHT(size);
|
|
||||||
|
|
||||||
// With a bit of care, this could be written such that such that it is
|
|
||||||
// in_buf = out_buf safe.
|
|
||||||
const char* in_b = (const char*) in;
|
|
||||||
uint16_t* out_ui16 = (uint16_t*) out;
|
|
||||||
|
|
||||||
size_t ii, jj, kk;
|
|
||||||
size_t nbyte = elem_size * size;
|
|
||||||
|
|
||||||
int16x8_t xmm;
|
|
||||||
int32_t bt;
|
|
||||||
|
|
||||||
if (elem_size % 2) {
|
|
||||||
bshuf_shuffle_bit_eightelem_scal(in, out, size, elem_size);
|
|
||||||
} else {
|
|
||||||
for (ii = 0; ii + 8 * elem_size - 1 < nbyte;
|
|
||||||
ii += 8 * elem_size) {
|
|
||||||
for (jj = 0; jj + 15 < 8 * elem_size; jj += 16) {
|
|
||||||
xmm = vld1q_s16((int16_t *) &in_b[ii + jj]);
|
|
||||||
for (kk = 0; kk < 8; kk++) {
|
|
||||||
bt = move_byte_mask_neon((uint8x16_t) xmm);
|
|
||||||
xmm = vshlq_n_s16(xmm, 1);
|
|
||||||
size_t ind = (ii + jj / 8 + (7 - kk) * elem_size);
|
|
||||||
out_ui16[ind / 2] = bt;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return size * elem_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* Untranspose bits within elements. */
|
|
||||||
int64_t bshuf_untrans_bit_elem_NEON(const void* in, void* out, const size_t size,
|
|
||||||
const size_t elem_size) {
|
|
||||||
|
|
||||||
int64_t count;
|
|
||||||
|
|
||||||
CHECK_MULT_EIGHT(size);
|
|
||||||
|
|
||||||
void* tmp_buf = malloc(size * elem_size);
|
|
||||||
if (tmp_buf == NULL) return -1;
|
|
||||||
|
|
||||||
count = bshuf_trans_byte_bitrow_NEON(in, tmp_buf, size, elem_size);
|
|
||||||
CHECK_ERR_FREE(count, tmp_buf);
|
|
||||||
count = bshuf_shuffle_bit_eightelem_NEON(tmp_buf, out, size, elem_size);
|
|
||||||
|
|
||||||
free(tmp_buf);
|
|
||||||
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
|
|
||||||
#else // #ifdef USEARMNEON
|
|
||||||
|
|
||||||
int64_t bshuf_untrans_bit_elem_NEON(const void* in, void* out, const size_t size,
|
|
||||||
const size_t elem_size) {
|
|
||||||
return -13;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int64_t bshuf_trans_bit_elem_NEON(const void* in, void* out, const size_t size,
|
|
||||||
const size_t elem_size) {
|
|
||||||
return -13;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int64_t bshuf_trans_byte_bitrow_NEON(const void* in, void* out, const size_t size,
|
|
||||||
const size_t elem_size) {
|
|
||||||
return -13;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int64_t bshuf_trans_bit_byte_NEON(const void* in, void* out, const size_t size,
|
|
||||||
const size_t elem_size) {
|
|
||||||
return -13;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int64_t bshuf_trans_byte_elem_NEON(const void* in, void* out, const size_t size,
|
|
||||||
const size_t elem_size) {
|
|
||||||
return -13;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int64_t bshuf_trans_byte_elem_NEON_64(const void* in, void* out, const size_t size) {
|
|
||||||
return -13;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int64_t bshuf_trans_byte_elem_NEON_32(const void* in, void* out, const size_t size) {
|
|
||||||
return -13;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int64_t bshuf_trans_byte_elem_NEON_16(const void* in, void* out, const size_t size) {
|
|
||||||
return -13;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int64_t bshuf_shuffle_bit_eightelem_NEON(const void* in, void* out, const size_t size,
|
|
||||||
const size_t elem_size) {
|
|
||||||
return -13;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* ---- Worker code that uses SSE2 ----
|
/* ---- Worker code that uses SSE2 ----
|
||||||
*
|
*
|
||||||
* The following code makes use of the SSE2 instruction set and specialized
|
* The following code makes use of the SSE2 instruction set and specialized
|
||||||
* 16 byte registers. The SSE2 instructions are present on modern x86
|
* 16 byte registers. The SSE2 instructions are present on modern x86
|
||||||
* processors. The first Intel processor microarchitecture supporting SSE2 was
|
* processors. The first Intel processor microarchitecture supporting SSE2 was
|
||||||
* Pentium 4 (2000).
|
* Pentium 4 (2000).
|
||||||
*
|
*
|
||||||
@@ -1051,7 +512,7 @@ int64_t bshuf_trans_byte_elem_SSE(const void* in, void* out, const size_t size,
|
|||||||
|
|
||||||
int64_t count;
|
int64_t count;
|
||||||
|
|
||||||
// Trivial cases: power of 2 bytes.
|
|
||||||
switch (elem_size) {
|
switch (elem_size) {
|
||||||
case 1:
|
case 1:
|
||||||
count = bshuf_copy(in, out, size, elem_size);
|
count = bshuf_copy(in, out, size, elem_size);
|
||||||
@@ -1067,14 +528,14 @@ int64_t bshuf_trans_byte_elem_SSE(const void* in, void* out, const size_t size,
|
|||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Worst case: odd number of bytes. Turns out that this is faster for
|
|
||||||
// (odd * 2) byte elements as well (hence % 4).
|
|
||||||
if (elem_size % 4) {
|
if (elem_size % 4) {
|
||||||
count = bshuf_trans_byte_elem_scal(in, out, size, elem_size);
|
count = bshuf_trans_byte_elem_scal(in, out, size, elem_size);
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Multiple of power of 2: transpose hierarchically.
|
|
||||||
{
|
{
|
||||||
size_t nchunk_elem;
|
size_t nchunk_elem;
|
||||||
void* tmp_buf = malloc(size * elem_size);
|
void* tmp_buf = malloc(size * elem_size);
|
||||||
@@ -1093,7 +554,7 @@ int64_t bshuf_trans_byte_elem_SSE(const void* in, void* out, const size_t size,
|
|||||||
size * nchunk_elem);
|
size * nchunk_elem);
|
||||||
bshuf_trans_elem(tmp_buf, out, 4, nchunk_elem, size);
|
bshuf_trans_elem(tmp_buf, out, 4, nchunk_elem, size);
|
||||||
} else {
|
} else {
|
||||||
// Not used since scalar algorithm is faster.
|
|
||||||
nchunk_elem = elem_size / 2;
|
nchunk_elem = elem_size / 2;
|
||||||
TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int16_t);
|
TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int16_t);
|
||||||
count = bshuf_trans_byte_elem_SSE_16(out, tmp_buf,
|
count = bshuf_trans_byte_elem_SSE_16(out, tmp_buf,
|
||||||
@@ -1226,8 +687,8 @@ int64_t bshuf_trans_byte_bitrow_SSE(const void* in, void* out, const size_t size
|
|||||||
g1 = _mm_unpacklo_epi32(g0, h0);
|
g1 = _mm_unpacklo_epi32(g0, h0);
|
||||||
h1 = _mm_unpackhi_epi32(g0, h0);
|
h1 = _mm_unpackhi_epi32(g0, h0);
|
||||||
|
|
||||||
// We don't have a storeh instruction for integers, so interpret
|
|
||||||
// as a float. Have a storel (_mm_storel_epi64).
|
|
||||||
as = (__m128 *) &a1;
|
as = (__m128 *) &a1;
|
||||||
bs = (__m128 *) &b1;
|
bs = (__m128 *) &b1;
|
||||||
cs = (__m128 *) &c1;
|
cs = (__m128 *) &c1;
|
||||||
@@ -1276,8 +737,8 @@ int64_t bshuf_shuffle_bit_eightelem_SSE(const void* in, void* out, const size_t
|
|||||||
|
|
||||||
CHECK_MULT_EIGHT(size);
|
CHECK_MULT_EIGHT(size);
|
||||||
|
|
||||||
// With a bit of care, this could be written such that such that it is
|
|
||||||
// in_buf = out_buf safe.
|
|
||||||
const char* in_b = (const char*) in;
|
const char* in_b = (const char*) in;
|
||||||
uint16_t* out_ui16 = (uint16_t*) out;
|
uint16_t* out_ui16 = (uint16_t*) out;
|
||||||
|
|
||||||
@@ -1327,7 +788,7 @@ int64_t bshuf_untrans_bit_elem_SSE(const void* in, void* out, const size_t size,
|
|||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
#else // #ifdef USESSE2
|
#else
|
||||||
|
|
||||||
|
|
||||||
int64_t bshuf_untrans_bit_elem_SSE(const void* in, void* out, const size_t size,
|
int64_t bshuf_untrans_bit_elem_SSE(const void* in, void* out, const size_t size,
|
||||||
@@ -1381,7 +842,7 @@ int64_t bshuf_shuffle_bit_eightelem_SSE(const void* in, void* out, const size_t
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#endif // #ifdef USESSE2
|
#endif
|
||||||
|
|
||||||
|
|
||||||
/* ---- Code that requires AVX2. Intel Haswell (2013) and later. ---- */
|
/* ---- Code that requires AVX2. Intel Haswell (2013) and later. ---- */
|
||||||
@@ -1396,6 +857,7 @@ int64_t bshuf_shuffle_bit_eightelem_SSE(const void* in, void* out, const size_t
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#ifdef USEAVX2
|
#ifdef USEAVX2
|
||||||
|
|
||||||
/* Transpose bits within bytes. */
|
/* Transpose bits within bytes. */
|
||||||
int64_t bshuf_trans_bit_byte_AVX(const void* in, void* out, const size_t size,
|
int64_t bshuf_trans_bit_byte_AVX(const void* in, void* out, const size_t size,
|
||||||
const size_t elem_size) {
|
const size_t elem_size) {
|
||||||
@@ -1552,8 +1014,8 @@ int64_t bshuf_shuffle_bit_eightelem_AVX(const void* in, void* out, const size_t
|
|||||||
|
|
||||||
CHECK_MULT_EIGHT(size);
|
CHECK_MULT_EIGHT(size);
|
||||||
|
|
||||||
// With a bit of care, this could be written such that such that it is
|
|
||||||
// in_buf = out_buf safe.
|
|
||||||
const char* in_b = (const char*) in;
|
const char* in_b = (const char*) in;
|
||||||
char* out_b = (char*) out;
|
char* out_b = (char*) out;
|
||||||
|
|
||||||
@@ -1603,7 +1065,7 @@ int64_t bshuf_untrans_bit_elem_AVX(const void* in, void* out, const size_t size,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#else // #ifdef USEAVX2
|
#else
|
||||||
|
|
||||||
int64_t bshuf_trans_bit_byte_AVX(const void* in, void* out, const size_t size,
|
int64_t bshuf_trans_bit_byte_AVX(const void* in, void* out, const size_t size,
|
||||||
const size_t elem_size) {
|
const size_t elem_size) {
|
||||||
@@ -1634,179 +1096,19 @@ int64_t bshuf_untrans_bit_elem_AVX(const void* in, void* out, const size_t size,
|
|||||||
return -12;
|
return -12;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // #ifdef USEAVX2
|
|
||||||
|
|
||||||
#ifdef USEAVX512
|
|
||||||
|
|
||||||
/* Transpose bits within bytes. */
|
|
||||||
int64_t bshuf_trans_bit_byte_AVX512(const void* in, void* out, const size_t size,
|
|
||||||
const size_t elem_size) {
|
|
||||||
|
|
||||||
size_t ii, kk;
|
|
||||||
const char* in_b = (const char*) in;
|
|
||||||
char* out_b = (char*) out;
|
|
||||||
size_t nbyte = elem_size * size;
|
|
||||||
int64_t count;
|
|
||||||
|
|
||||||
int64_t* out_i64;
|
|
||||||
__m512i zmm;
|
|
||||||
__mmask64 bt;
|
|
||||||
if (nbyte >= 64) {
|
|
||||||
const __m512i mask = _mm512_set1_epi8(0);
|
|
||||||
|
|
||||||
for (ii = 0; ii + 63 < nbyte; ii += 64) {
|
|
||||||
zmm = _mm512_loadu_si512((__m512i *) &in_b[ii]);
|
|
||||||
for (kk = 0; kk < 8; kk++) {
|
|
||||||
bt = _mm512_cmp_epi8_mask(zmm, mask, 1);
|
|
||||||
zmm = _mm512_slli_epi16(zmm, 1);
|
|
||||||
out_i64 = (int64_t*) &out_b[((7 - kk) * nbyte + ii) / 8];
|
|
||||||
*out_i64 = (int64_t)bt;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
__m256i ymm;
|
|
||||||
int32_t bt32;
|
|
||||||
int32_t* out_i32;
|
|
||||||
size_t start = nbyte - nbyte % 64;
|
|
||||||
for (ii = start; ii + 31 < nbyte; ii += 32) {
|
|
||||||
ymm = _mm256_loadu_si256((__m256i *) &in_b[ii]);
|
|
||||||
for (kk = 0; kk < 8; kk++) {
|
|
||||||
bt32 = _mm256_movemask_epi8(ymm);
|
|
||||||
ymm = _mm256_slli_epi16(ymm, 1);
|
|
||||||
out_i32 = (int32_t*) &out_b[((7 - kk) * nbyte + ii) / 8];
|
|
||||||
*out_i32 = bt32;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
count = bshuf_trans_bit_byte_remainder(in, out, size, elem_size,
|
|
||||||
nbyte - nbyte % 64 % 32);
|
|
||||||
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* Transpose bits within elements. */
|
|
||||||
int64_t bshuf_trans_bit_elem_AVX512(const void* in, void* out, const size_t size,
|
|
||||||
const size_t elem_size) {
|
|
||||||
|
|
||||||
int64_t count;
|
|
||||||
|
|
||||||
CHECK_MULT_EIGHT(size);
|
|
||||||
|
|
||||||
void* tmp_buf = malloc(size * elem_size);
|
|
||||||
if (tmp_buf == NULL) return -1;
|
|
||||||
|
|
||||||
count = bshuf_trans_byte_elem_SSE(in, out, size, elem_size);
|
|
||||||
CHECK_ERR_FREE(count, tmp_buf);
|
|
||||||
count = bshuf_trans_bit_byte_AVX512(out, tmp_buf, size, elem_size);
|
|
||||||
CHECK_ERR_FREE(count, tmp_buf);
|
|
||||||
count = bshuf_trans_bitrow_eight(tmp_buf, out, size, elem_size);
|
|
||||||
|
|
||||||
free(tmp_buf);
|
|
||||||
|
|
||||||
return count;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Shuffle bits within the bytes of eight element blocks. */
|
|
||||||
int64_t bshuf_shuffle_bit_eightelem_AVX512(const void* in, void* out, const size_t size,
|
|
||||||
const size_t elem_size) {
|
|
||||||
|
|
||||||
CHECK_MULT_EIGHT(size);
|
|
||||||
|
|
||||||
// With a bit of care, this could be written such that such that it is
|
|
||||||
// in_buf = out_buf safe.
|
|
||||||
const char* in_b = (const char*) in;
|
|
||||||
char* out_b = (char*) out;
|
|
||||||
|
|
||||||
size_t ii, jj, kk;
|
|
||||||
size_t nbyte = elem_size * size;
|
|
||||||
|
|
||||||
__m512i zmm;
|
|
||||||
__mmask64 bt;
|
|
||||||
|
|
||||||
if (elem_size % 8) {
|
|
||||||
return bshuf_shuffle_bit_eightelem_AVX(in, out, size, elem_size);
|
|
||||||
} else {
|
|
||||||
const __m512i mask = _mm512_set1_epi8(0);
|
|
||||||
for (jj = 0; jj + 63 < 8 * elem_size; jj += 64) {
|
|
||||||
for (ii = 0; ii + 8 * elem_size - 1 < nbyte;
|
|
||||||
ii += 8 * elem_size) {
|
|
||||||
zmm = _mm512_loadu_si512((__m512i *) &in_b[ii + jj]);
|
|
||||||
for (kk = 0; kk < 8; kk++) {
|
|
||||||
bt = _mm512_cmp_epi8_mask(zmm, mask, 1);
|
|
||||||
zmm = _mm512_slli_epi16(zmm, 1);
|
|
||||||
size_t ind = (ii + jj / 8 + (7 - kk) * elem_size);
|
|
||||||
* (int64_t *) &out_b[ind] = bt;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
return size * elem_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Untranspose bits within elements. */
|
|
||||||
int64_t bshuf_untrans_bit_elem_AVX512(const void* in, void* out, const size_t size,
|
|
||||||
const size_t elem_size) {
|
|
||||||
|
|
||||||
int64_t count;
|
|
||||||
|
|
||||||
CHECK_MULT_EIGHT(size);
|
|
||||||
|
|
||||||
void* tmp_buf = malloc(size * elem_size);
|
|
||||||
if (tmp_buf == NULL) return -1;
|
|
||||||
|
|
||||||
count = bshuf_trans_byte_bitrow_AVX(in, tmp_buf, size, elem_size);
|
|
||||||
CHECK_ERR_FREE(count, tmp_buf);
|
|
||||||
count = bshuf_shuffle_bit_eightelem_AVX512(tmp_buf, out, size, elem_size);
|
|
||||||
|
|
||||||
free(tmp_buf);
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
|
|
||||||
#else // #ifdef USEAVX512
|
|
||||||
|
|
||||||
int64_t bshuf_trans_bit_byte_AVX512(const void* in, void* out, const size_t size,
|
|
||||||
const size_t elem_size) {
|
|
||||||
|
|
||||||
return -14;
|
|
||||||
}
|
|
||||||
|
|
||||||
int64_t bshuf_trans_bit_elem_AVX512(const void* in, void* out, const size_t size,
|
|
||||||
const size_t elem_size) {
|
|
||||||
return -14;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
int64_t bshuf_shuffle_bit_eightelem_AVX512(const void* in, void* out, const size_t size,
|
|
||||||
const size_t elem_size) {
|
|
||||||
return -14;
|
|
||||||
}
|
|
||||||
|
|
||||||
int64_t bshuf_untrans_bit_elem_AVX512(const void* in, void* out, const size_t size,
|
|
||||||
const size_t elem_size) {
|
|
||||||
return -14;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
/* ---- Drivers selecting best instruction set at compile time. ---- */
|
/* ---- Drivers selecting best instruction set at compile time. ---- */
|
||||||
|
|
||||||
int64_t bshuf_trans_bit_elem(const void* in, void* out, const size_t size,
|
int64_t bshuf_trans_bit_elem(const void* in, void* out, const size_t size,
|
||||||
const size_t elem_size) {
|
const size_t elem_size) {
|
||||||
|
|
||||||
int64_t count;
|
int64_t count;
|
||||||
#ifdef USEAVX512
|
#ifdef USEAVX2
|
||||||
count = bshuf_trans_bit_elem_AVX512(in, out, size, elem_size);
|
|
||||||
#elif defined USEAVX2
|
|
||||||
count = bshuf_trans_bit_elem_AVX(in, out, size, elem_size);
|
count = bshuf_trans_bit_elem_AVX(in, out, size, elem_size);
|
||||||
#elif defined(USESSE2)
|
#elif defined(USESSE2)
|
||||||
count = bshuf_trans_bit_elem_SSE(in, out, size, elem_size);
|
count = bshuf_trans_bit_elem_SSE(in, out, size, elem_size);
|
||||||
#elif defined(USEARMNEON)
|
|
||||||
count = bshuf_trans_bit_elem_NEON(in, out, size, elem_size);
|
|
||||||
#else
|
#else
|
||||||
count = bshuf_trans_bit_elem_scal(in, out, size, elem_size);
|
count = bshuf_trans_bit_elem_scal(in, out, size, elem_size);
|
||||||
#endif
|
#endif
|
||||||
@@ -1814,18 +1116,14 @@ int64_t bshuf_trans_bit_elem(const void* in, void* out, const size_t size,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int64_t bshuf_untrans_bit_elem(const void* in, void* out, const size_t size,
|
int64_t bshuf_untrans_bit_elem(const void* in, void* out, const size_t size,
|
||||||
const size_t elem_size) {
|
const size_t elem_size) {
|
||||||
|
|
||||||
int64_t count;
|
int64_t count;
|
||||||
#ifdef USEAVX512
|
#ifdef USEAVX2
|
||||||
count = bshuf_untrans_bit_elem_AVX512(in, out, size, elem_size);
|
|
||||||
#elif defined USEAVX2
|
|
||||||
count = bshuf_untrans_bit_elem_AVX(in, out, size, elem_size);
|
count = bshuf_untrans_bit_elem_AVX(in, out, size, elem_size);
|
||||||
#elif defined(USESSE2)
|
#elif defined(USESSE2)
|
||||||
count = bshuf_untrans_bit_elem_SSE(in, out, size, elem_size);
|
count = bshuf_untrans_bit_elem_SSE(in, out, size, elem_size);
|
||||||
#elif defined(USEARMNEON)
|
|
||||||
count = bshuf_untrans_bit_elem_NEON(in, out, size, elem_size);
|
|
||||||
#else
|
#else
|
||||||
count = bshuf_untrans_bit_elem_scal(in, out, size, elem_size);
|
count = bshuf_untrans_bit_elem_scal(in, out, size, elem_size);
|
||||||
#endif
|
#endif
|
||||||
@@ -1838,9 +1136,9 @@ int64_t bshuf_untrans_bit_elem(const void* in, void* out, const size_t size,
|
|||||||
/* Wrap a function for processing a single block to process an entire buffer in
|
/* Wrap a function for processing a single block to process an entire buffer in
|
||||||
* parallel. */
|
* parallel. */
|
||||||
int64_t bshuf_blocked_wrap_fun(bshufBlockFunDef fun, const void* in, void* out, \
|
int64_t bshuf_blocked_wrap_fun(bshufBlockFunDef fun, const void* in, void* out, \
|
||||||
const size_t size, const size_t elem_size, size_t block_size, const int option) {
|
const size_t size, const size_t elem_size, size_t block_size) {
|
||||||
|
|
||||||
omp_size_t ii = 0;
|
size_t ii;
|
||||||
int64_t err = 0;
|
int64_t err = 0;
|
||||||
int64_t count, cum_count=0;
|
int64_t count, cum_count=0;
|
||||||
size_t last_block_size;
|
size_t last_block_size;
|
||||||
@@ -1863,8 +1161,8 @@ int64_t bshuf_blocked_wrap_fun(bshufBlockFunDef fun, const void* in, void* out,
|
|||||||
#pragma omp parallel for schedule(dynamic, 1) \
|
#pragma omp parallel for schedule(dynamic, 1) \
|
||||||
private(count) reduction(+ : cum_count)
|
private(count) reduction(+ : cum_count)
|
||||||
#endif
|
#endif
|
||||||
for (ii = 0; ii < (omp_size_t)( size / block_size ); ii ++) {
|
for (ii = 0; ii < size / block_size; ii ++) {
|
||||||
count = fun(&C, block_size, elem_size, option);
|
count = fun(&C, block_size, elem_size);
|
||||||
if (count < 0) err = count;
|
if (count < 0) err = count;
|
||||||
cum_count += count;
|
cum_count += count;
|
||||||
}
|
}
|
||||||
@@ -1872,7 +1170,7 @@ int64_t bshuf_blocked_wrap_fun(bshufBlockFunDef fun, const void* in, void* out,
|
|||||||
last_block_size = size % block_size;
|
last_block_size = size % block_size;
|
||||||
last_block_size = last_block_size - last_block_size % BSHUF_BLOCKED_MULT;
|
last_block_size = last_block_size - last_block_size % BSHUF_BLOCKED_MULT;
|
||||||
if (last_block_size) {
|
if (last_block_size) {
|
||||||
count = fun(&C, last_block_size, elem_size, option);
|
count = fun(&C, last_block_size, elem_size);
|
||||||
if (count < 0) err = count;
|
if (count < 0) err = count;
|
||||||
cum_count += count;
|
cum_count += count;
|
||||||
}
|
}
|
||||||
@@ -1880,7 +1178,6 @@ int64_t bshuf_blocked_wrap_fun(bshufBlockFunDef fun, const void* in, void* out,
|
|||||||
if (err < 0) return err;
|
if (err < 0) return err;
|
||||||
|
|
||||||
leftover_bytes = size % BSHUF_BLOCKED_MULT * elem_size;
|
leftover_bytes = size % BSHUF_BLOCKED_MULT * elem_size;
|
||||||
//this_iter;
|
|
||||||
last_in = (char *) ioc_get_in(&C, &this_iter);
|
last_in = (char *) ioc_get_in(&C, &this_iter);
|
||||||
ioc_set_next_in(&C, &this_iter, (void *) (last_in + leftover_bytes));
|
ioc_set_next_in(&C, &this_iter, (void *) (last_in + leftover_bytes));
|
||||||
last_out = (char *) ioc_get_out(&C, &this_iter);
|
last_out = (char *) ioc_get_out(&C, &this_iter);
|
||||||
@@ -1896,7 +1193,7 @@ int64_t bshuf_blocked_wrap_fun(bshufBlockFunDef fun, const void* in, void* out,
|
|||||||
|
|
||||||
/* Bitshuffle a single block. */
|
/* Bitshuffle a single block. */
|
||||||
int64_t bshuf_bitshuffle_block(ioc_chain *C_ptr, \
|
int64_t bshuf_bitshuffle_block(ioc_chain *C_ptr, \
|
||||||
const size_t size, const size_t elem_size, const int option) {
|
const size_t size, const size_t elem_size) {
|
||||||
|
|
||||||
size_t this_iter;
|
size_t this_iter;
|
||||||
const void *in;
|
const void *in;
|
||||||
@@ -1904,7 +1201,7 @@ int64_t bshuf_bitshuffle_block(ioc_chain *C_ptr, \
|
|||||||
int64_t count;
|
int64_t count;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
in = ioc_get_in(C_ptr, &this_iter);
|
in = ioc_get_in(C_ptr, &this_iter);
|
||||||
ioc_set_next_in(C_ptr, &this_iter,
|
ioc_set_next_in(C_ptr, &this_iter,
|
||||||
(void*) ((char*) in + size * elem_size));
|
(void*) ((char*) in + size * elem_size));
|
||||||
@@ -1919,7 +1216,7 @@ int64_t bshuf_bitshuffle_block(ioc_chain *C_ptr, \
|
|||||||
|
|
||||||
/* Bitunshuffle a single block. */
|
/* Bitunshuffle a single block. */
|
||||||
int64_t bshuf_bitunshuffle_block(ioc_chain* C_ptr, \
|
int64_t bshuf_bitunshuffle_block(ioc_chain* C_ptr, \
|
||||||
const size_t size, const size_t elem_size, const int option) {
|
const size_t size, const size_t elem_size) {
|
||||||
|
|
||||||
|
|
||||||
size_t this_iter;
|
size_t this_iter;
|
||||||
@@ -1999,11 +1296,11 @@ uint32_t bshuf_read_uint32_BE(const void* buf) {
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
size_t bshuf_default_block_size(const size_t elem_size) {
|
size_t bshuf_default_block_size(const size_t elem_size) {
|
||||||
// This function needs to be absolutely stable between versions.
|
|
||||||
// Otherwise encoded data will not be decodable.
|
|
||||||
|
|
||||||
size_t block_size = BSHUF_TARGET_BLOCK_SIZE_B / elem_size;
|
size_t block_size = BSHUF_TARGET_BLOCK_SIZE_B / elem_size;
|
||||||
// Ensure it is a required multiple.
|
|
||||||
block_size = (block_size / BSHUF_BLOCKED_MULT) * BSHUF_BLOCKED_MULT;
|
block_size = (block_size / BSHUF_BLOCKED_MULT) * BSHUF_BLOCKED_MULT;
|
||||||
return MAX(block_size, BSHUF_MIN_RECOMMEND_BLOCK);
|
return MAX(block_size, BSHUF_MIN_RECOMMEND_BLOCK);
|
||||||
}
|
}
|
||||||
@@ -2013,7 +1310,7 @@ int64_t bshuf_bitshuffle(const void* in, void* out, const size_t size,
|
|||||||
const size_t elem_size, size_t block_size) {
|
const size_t elem_size, size_t block_size) {
|
||||||
|
|
||||||
return bshuf_blocked_wrap_fun(&bshuf_bitshuffle_block, in, out, size,
|
return bshuf_blocked_wrap_fun(&bshuf_bitshuffle_block, in, out, size,
|
||||||
elem_size, block_size, 0/*option*/);
|
elem_size, block_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -2021,7 +1318,7 @@ int64_t bshuf_bitunshuffle(const void* in, void* out, const size_t size,
|
|||||||
const size_t elem_size, size_t block_size) {
|
const size_t elem_size, size_t block_size) {
|
||||||
|
|
||||||
return bshuf_blocked_wrap_fun(&bshuf_bitunshuffle_block, in, out, size,
|
return bshuf_blocked_wrap_fun(&bshuf_bitunshuffle_block, in, out, size,
|
||||||
elem_size, block_size, 0/*option*/);
|
elem_size, block_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -18,8 +18,6 @@
|
|||||||
* -1 : Failed to allocate memory.
|
* -1 : Failed to allocate memory.
|
||||||
* -11 : Missing SSE.
|
* -11 : Missing SSE.
|
||||||
* -12 : Missing AVX.
|
* -12 : Missing AVX.
|
||||||
* -13 : Missing Arm Neon.
|
|
||||||
* -14 : Missing AVX512.
|
|
||||||
* -80 : Input size not a multiple of 8.
|
* -80 : Input size not a multiple of 8.
|
||||||
* -81 : block_size not multiple of 8.
|
* -81 : block_size not multiple of 8.
|
||||||
* -91 : Decompression error, wrong number of bytes processed.
|
* -91 : Decompression error, wrong number of bytes processed.
|
||||||
@@ -30,14 +28,15 @@
|
|||||||
#ifndef BITSHUFFLE_CORE_H
|
#ifndef BITSHUFFLE_CORE_H
|
||||||
#define BITSHUFFLE_CORE_H
|
#define BITSHUFFLE_CORE_H
|
||||||
|
|
||||||
// We assume GNU g++ defining `__cplusplus` has stdint.h
|
|
||||||
#if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199900L) || defined(__cplusplus)
|
#if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199900L) || defined(__cplusplus)
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#else
|
#else
|
||||||
typedef unsigned char uint8_t;
|
typedef unsigned char uint8_t;
|
||||||
typedef unsigned short uint16_t;
|
typedef unsigned short uint16_t;
|
||||||
|
typedef signed short int16_t;
|
||||||
typedef unsigned int uint32_t;
|
typedef unsigned int uint32_t;
|
||||||
typedef signed int int32_t;
|
typedef signed int int32_t;
|
||||||
typedef unsigned long long uint64_t;
|
typedef unsigned long long uint64_t;
|
||||||
typedef long long int64_t;
|
typedef long long int64_t;
|
||||||
#endif
|
#endif
|
||||||
@@ -45,11 +44,11 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
|
||||||
// These are usually set in the setup.py.
|
|
||||||
#ifndef BSHUF_VERSION_MAJOR
|
#ifndef BSHUF_VERSION_MAJOR
|
||||||
#define BSHUF_VERSION_MAJOR 0
|
#define BSHUF_VERSION_MAJOR 0
|
||||||
#define BSHUF_VERSION_MINOR 4
|
#define BSHUF_VERSION_MINOR 3
|
||||||
#define BSHUF_VERSION_POINT 0
|
#define BSHUF_VERSION_POINT 4
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
@@ -68,18 +67,6 @@ extern "C" {
|
|||||||
int bshuf_using_SSE2(void);
|
int bshuf_using_SSE2(void);
|
||||||
|
|
||||||
|
|
||||||
/* ---- bshuf_using_NEON ----
|
|
||||||
*
|
|
||||||
* Whether routines where compiled with the NEON instruction set.
|
|
||||||
*
|
|
||||||
* Returns
|
|
||||||
* -------
|
|
||||||
* 1 if using NEON, 0 otherwise.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
int bshuf_using_NEON(void);
|
|
||||||
|
|
||||||
|
|
||||||
/* ---- bshuf_using_AVX2 ----
|
/* ---- bshuf_using_AVX2 ----
|
||||||
*
|
*
|
||||||
* Whether routines where compiled with the AVX2 instruction set.
|
* Whether routines where compiled with the AVX2 instruction set.
|
||||||
@@ -92,18 +79,6 @@ int bshuf_using_NEON(void);
|
|||||||
int bshuf_using_AVX2(void);
|
int bshuf_using_AVX2(void);
|
||||||
|
|
||||||
|
|
||||||
/* ---- bshuf_using_AVX512 ----
|
|
||||||
*
|
|
||||||
* Whether routines where compiled with the AVX512 instruction set.
|
|
||||||
*
|
|
||||||
* Returns
|
|
||||||
* -------
|
|
||||||
* 1 if using AVX512, 0 otherwise.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
int bshuf_using_AVX512(void);
|
|
||||||
|
|
||||||
|
|
||||||
/* ---- bshuf_default_block_size ----
|
/* ---- bshuf_default_block_size ----
|
||||||
*
|
*
|
||||||
* The default block size as function of element size.
|
* The default block size as function of element size.
|
||||||
@@ -176,7 +151,7 @@ int64_t bshuf_bitunshuffle(const void* in, void* out, const size_t size,
|
|||||||
const size_t elem_size, size_t block_size);
|
const size_t elem_size, size_t block_size);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} // extern "C"
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif // BITSHUFFLE_CORE_H
|
#endif
|
||||||
|
|||||||
@@ -13,31 +13,19 @@
|
|||||||
#ifndef BITSHUFFLE_INTERNALS_H
|
#ifndef BITSHUFFLE_INTERNALS_H
|
||||||
#define BITSHUFFLE_INTERNALS_H
|
#define BITSHUFFLE_INTERNALS_H
|
||||||
|
|
||||||
// We assume GNU g++ defining `__cplusplus` has stdint.h
|
|
||||||
#if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199900L) || defined(__cplusplus)
|
|
||||||
#include <stdint.h>
|
|
||||||
#else
|
|
||||||
typedef unsigned char uint8_t;
|
|
||||||
typedef unsigned short uint16_t;
|
|
||||||
typedef unsigned int uint32_t;
|
|
||||||
typedef signed int int32_t;
|
|
||||||
typedef unsigned long long uint64_t;
|
|
||||||
typedef long long int64_t;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include "iochain.h"
|
#include "iochain.h"
|
||||||
|
|
||||||
|
|
||||||
// Constants.
|
|
||||||
#ifndef BSHUF_MIN_RECOMMEND_BLOCK
|
#ifndef BSHUF_MIN_RECOMMEND_BLOCK
|
||||||
#define BSHUF_MIN_RECOMMEND_BLOCK 128
|
#define BSHUF_MIN_RECOMMEND_BLOCK 128
|
||||||
#define BSHUF_BLOCKED_MULT 8 // Block sizes must be multiple of this.
|
#define BSHUF_BLOCKED_MULT 8
|
||||||
#define BSHUF_TARGET_BLOCK_SIZE_B 8192
|
#define BSHUF_TARGET_BLOCK_SIZE_B 8192
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
// Macros.
|
|
||||||
#define CHECK_ERR_FREE(count, buf) if (count < 0) { free(buf); return count; }
|
#define CHECK_ERR_FREE(count, buf) if (count < 0) { free(buf); return count; }
|
||||||
|
|
||||||
|
|
||||||
@@ -61,15 +49,15 @@ int64_t bshuf_untrans_bit_elem(const void* in, void* out, const size_t size,
|
|||||||
|
|
||||||
/* Function definition for worker functions that process a single block. */
|
/* Function definition for worker functions that process a single block. */
|
||||||
typedef int64_t (*bshufBlockFunDef)(ioc_chain* C_ptr,
|
typedef int64_t (*bshufBlockFunDef)(ioc_chain* C_ptr,
|
||||||
const size_t size, const size_t elem_size, const int option);
|
const size_t size, const size_t elem_size);
|
||||||
|
|
||||||
/* Wrap a function for processing a single block to process an entire buffer in
|
/* Wrap a function for processing a single block to process an entire buffer in
|
||||||
* parallel. */
|
* parallel. */
|
||||||
int64_t bshuf_blocked_wrap_fun(bshufBlockFunDef fun, const void* in, void* out,
|
int64_t bshuf_blocked_wrap_fun(bshufBlockFunDef fun, const void* in, void* out,
|
||||||
const size_t size, const size_t elem_size, size_t block_size, const int option);
|
const size_t size, const size_t elem_size, size_t block_size);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} // extern "C"
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif // BITSHUFFLE_INTERNALS_H
|
#endif
|
||||||
|
|||||||
@@ -1,260 +0,0 @@
|
|||||||
/*
|
|
||||||
* Bitshuffle HDF5 filter
|
|
||||||
*
|
|
||||||
* This file is part of Bitshuffle
|
|
||||||
* Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
|
|
||||||
* Website: http://www.github.com/kiyo-masui/bitshuffle
|
|
||||||
* Created: 2014
|
|
||||||
*
|
|
||||||
* See LICENSE file for details about copyright and rights to use.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "bitshuffle.h"
|
|
||||||
#include "bshuf_h5filter.h"
|
|
||||||
|
|
||||||
|
|
||||||
#define PUSH_ERR(func, minor, str) \
|
|
||||||
H5Epush1(__FILE__, func, __LINE__, H5E_PLINE, minor, str)
|
|
||||||
|
|
||||||
|
|
||||||
// Prototypes from bitshuffle.c
|
|
||||||
void bshuf_write_uint64_BE(void* buf, uint64_t num);
|
|
||||||
uint64_t bshuf_read_uint64_BE(void* buf);
|
|
||||||
void bshuf_write_uint32_BE(void* buf, uint32_t num);
|
|
||||||
uint32_t bshuf_read_uint32_BE(const void* buf);
|
|
||||||
|
|
||||||
|
|
||||||
// Only called on compression, not on reverse.
|
|
||||||
herr_t bshuf_h5_set_local(hid_t dcpl, hid_t type, hid_t space){
|
|
||||||
|
|
||||||
herr_t r;
|
|
||||||
size_t ii;
|
|
||||||
|
|
||||||
unsigned int elem_size;
|
|
||||||
|
|
||||||
unsigned int flags;
|
|
||||||
size_t nelements = 8;
|
|
||||||
size_t nelem_max = 11;
|
|
||||||
unsigned values[] = {0,0,0,0,0,0,0,0,0,0,0};
|
|
||||||
unsigned tmp_values[] = {0,0,0,0,0,0,0,0};
|
|
||||||
char msg[80];
|
|
||||||
|
|
||||||
r = H5Pget_filter_by_id2(dcpl, BSHUF_H5FILTER, &flags, &nelements,
|
|
||||||
tmp_values, 0, NULL, NULL);
|
|
||||||
if(r<0) return -1;
|
|
||||||
|
|
||||||
// First 3 slots reserved. Move any passed options to higher addresses.
|
|
||||||
for (ii=0; ii < nelements && ii + 3 < nelem_max; ii++) {
|
|
||||||
values[ii + 3] = tmp_values[ii];
|
|
||||||
}
|
|
||||||
|
|
||||||
nelements = 3 + nelements;
|
|
||||||
|
|
||||||
values[0] = BSHUF_VERSION_MAJOR;
|
|
||||||
values[1] = BSHUF_VERSION_MINOR;
|
|
||||||
|
|
||||||
elem_size = H5Tget_size(type);
|
|
||||||
if(elem_size <= 0) {
|
|
||||||
PUSH_ERR("bshuf_h5_set_local", H5E_CALLBACK,
|
|
||||||
"Invalid element size.");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
values[2] = elem_size;
|
|
||||||
|
|
||||||
// Validate user supplied arguments.
|
|
||||||
if (nelements > 3) {
|
|
||||||
if (values[3] % 8 || values[3] < 0) {
|
|
||||||
sprintf(msg, "Error in bitshuffle. Invalid block size: %d.",
|
|
||||||
values[3]);
|
|
||||||
PUSH_ERR("bshuf_h5_set_local", H5E_CALLBACK, msg);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (nelements > 4) {
|
|
||||||
switch (values[4]) {
|
|
||||||
case 0:
|
|
||||||
break;
|
|
||||||
case BSHUF_H5_COMPRESS_LZ4:
|
|
||||||
break;
|
|
||||||
#ifdef ZSTD_SUPPORT
|
|
||||||
case BSHUF_H5_COMPRESS_ZSTD:
|
|
||||||
break;
|
|
||||||
#endif
|
|
||||||
default:
|
|
||||||
PUSH_ERR("bshuf_h5_set_local", H5E_CALLBACK,
|
|
||||||
"Invalid bitshuffle compression.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
r = H5Pmodify_filter(dcpl, BSHUF_H5FILTER, flags, nelements, values);
|
|
||||||
if(r<0) return -1;
|
|
||||||
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
size_t bshuf_h5_filter(unsigned int flags, size_t cd_nelmts,
|
|
||||||
const unsigned int cd_values[], size_t nbytes,
|
|
||||||
size_t *buf_size, void **buf) {
|
|
||||||
|
|
||||||
size_t size, elem_size;
|
|
||||||
int err = -1;
|
|
||||||
char msg[80];
|
|
||||||
size_t block_size = 0;
|
|
||||||
size_t buf_size_out, nbytes_uncomp, nbytes_out;
|
|
||||||
char* in_buf = *buf;
|
|
||||||
void *out_buf;
|
|
||||||
|
|
||||||
if (cd_nelmts < 3) {
|
|
||||||
PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK,
|
|
||||||
"Not enough parameters.");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
elem_size = cd_values[2];
|
|
||||||
#ifdef ZSTD_SUPPORT
|
|
||||||
const int comp_lvl = cd_values[5];
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// User specified block size.
|
|
||||||
if (cd_nelmts > 3) block_size = cd_values[3];
|
|
||||||
|
|
||||||
if (block_size == 0) block_size = bshuf_default_block_size(elem_size);
|
|
||||||
|
|
||||||
#ifndef ZSTD_SUPPORT
|
|
||||||
if (cd_nelmts > 4 && (cd_values[4] == BSHUF_H5_COMPRESS_ZSTD)) {
|
|
||||||
PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK,
|
|
||||||
"ZSTD compression filter chosen but ZSTD support not installed.");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Compression in addition to bitshuffle.
|
|
||||||
if (cd_nelmts > 4 && (cd_values[4] == BSHUF_H5_COMPRESS_LZ4 || cd_values[4] == BSHUF_H5_COMPRESS_ZSTD)) {
|
|
||||||
if (flags & H5Z_FLAG_REVERSE) {
|
|
||||||
// First eight bytes is the number of bytes in the output buffer,
|
|
||||||
// little endian.
|
|
||||||
nbytes_uncomp = bshuf_read_uint64_BE(in_buf);
|
|
||||||
// Override the block size with the one read from the header.
|
|
||||||
block_size = bshuf_read_uint32_BE((const char*) in_buf + 8) / elem_size;
|
|
||||||
// Skip over the header.
|
|
||||||
in_buf += 12;
|
|
||||||
buf_size_out = nbytes_uncomp;
|
|
||||||
} else {
|
|
||||||
nbytes_uncomp = nbytes;
|
|
||||||
// Pick which compressions library to use
|
|
||||||
if(cd_values[4] == BSHUF_H5_COMPRESS_LZ4) {
|
|
||||||
buf_size_out = bshuf_compress_lz4_bound(nbytes_uncomp / elem_size,
|
|
||||||
elem_size, block_size) + 12;
|
|
||||||
}
|
|
||||||
#ifdef ZSTD_SUPPORT
|
|
||||||
else if (cd_values[4] == BSHUF_H5_COMPRESS_ZSTD) {
|
|
||||||
buf_size_out = bshuf_compress_zstd_bound(nbytes_uncomp / elem_size,
|
|
||||||
elem_size, block_size) + 12;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
nbytes_uncomp = nbytes;
|
|
||||||
buf_size_out = nbytes;
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO, remove this restriction by memcopying the extra.
|
|
||||||
if (nbytes_uncomp % elem_size) {
|
|
||||||
PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK,
|
|
||||||
"Non integer number of elements.");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
size = nbytes_uncomp / elem_size;
|
|
||||||
|
|
||||||
out_buf = malloc(buf_size_out);
|
|
||||||
if (out_buf == NULL) {
|
|
||||||
PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK,
|
|
||||||
"Could not allocate output buffer.");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cd_nelmts > 4 && (cd_values[4] == BSHUF_H5_COMPRESS_LZ4 || cd_values[4] == BSHUF_H5_COMPRESS_ZSTD)) {
|
|
||||||
if (flags & H5Z_FLAG_REVERSE) {
|
|
||||||
// Bit unshuffle/decompress.
|
|
||||||
// Pick which compressions library to use
|
|
||||||
if(cd_values[4] == BSHUF_H5_COMPRESS_LZ4) {
|
|
||||||
err = bshuf_decompress_lz4(in_buf, out_buf, size, elem_size, block_size);
|
|
||||||
}
|
|
||||||
#ifdef ZSTD_SUPPORT
|
|
||||||
else if (cd_values[4] == BSHUF_H5_COMPRESS_ZSTD) {
|
|
||||||
err = bshuf_decompress_zstd(in_buf, out_buf, size, elem_size, block_size);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
nbytes_out = nbytes_uncomp;
|
|
||||||
} else {
|
|
||||||
// Bit shuffle/compress.
|
|
||||||
// Write the header, described in
|
|
||||||
// http://www.hdfgroup.org/services/filters/HDF5_LZ4.pdf.
|
|
||||||
// Technically we should be using signed integers instead of
|
|
||||||
// unsigned ones, however for valid inputs (positive numbers) these
|
|
||||||
// have the same representation.
|
|
||||||
bshuf_write_uint64_BE(out_buf, nbytes_uncomp);
|
|
||||||
bshuf_write_uint32_BE((char*) out_buf + 8, block_size * elem_size);
|
|
||||||
if(cd_values[4] == BSHUF_H5_COMPRESS_LZ4) {
|
|
||||||
err = bshuf_compress_lz4(in_buf, (char*) out_buf + 12, size,
|
|
||||||
elem_size, block_size);
|
|
||||||
}
|
|
||||||
#ifdef ZSTD_SUPPORT
|
|
||||||
else if (cd_values[4] == BSHUF_H5_COMPRESS_ZSTD) {
|
|
||||||
err = bshuf_compress_zstd(in_buf, (char*) out_buf + 12, size,
|
|
||||||
elem_size, block_size, comp_lvl);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
nbytes_out = err + 12;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (flags & H5Z_FLAG_REVERSE) {
|
|
||||||
// Bit unshuffle.
|
|
||||||
err = bshuf_bitunshuffle(in_buf, out_buf, size, elem_size,
|
|
||||||
block_size); } else {
|
|
||||||
// Bit shuffle.
|
|
||||||
err = bshuf_bitshuffle(in_buf, out_buf, size, elem_size,
|
|
||||||
block_size); } nbytes_out = nbytes; }
|
|
||||||
//printf("nb_in %d, nb_uncomp %d, nb_out %d, buf_out %d, block %d\n",
|
|
||||||
//nbytes, nbytes_uncomp, nbytes_out, buf_size_out, block_size);
|
|
||||||
|
|
||||||
if (err < 0) {
|
|
||||||
sprintf(msg, "Error in bitshuffle with error code %d.", err);
|
|
||||||
PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK, msg);
|
|
||||||
free(out_buf);
|
|
||||||
return 0;
|
|
||||||
} else {
|
|
||||||
free(*buf);
|
|
||||||
*buf = out_buf;
|
|
||||||
*buf_size = buf_size_out;
|
|
||||||
|
|
||||||
return nbytes_out;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
H5Z_class_t bshuf_H5Filter[1] = {{
|
|
||||||
H5Z_CLASS_T_VERS,
|
|
||||||
(H5Z_filter_t)(BSHUF_H5FILTER),
|
|
||||||
1, 1,
|
|
||||||
"bitshuffle; see https://github.com/kiyo-masui/bitshuffle",
|
|
||||||
NULL,
|
|
||||||
(H5Z_set_local_func_t)(bshuf_h5_set_local),
|
|
||||||
(H5Z_func_t)(bshuf_h5_filter)
|
|
||||||
}};
|
|
||||||
|
|
||||||
|
|
||||||
int bshuf_register_h5filter(void){
|
|
||||||
|
|
||||||
int retval;
|
|
||||||
|
|
||||||
retval = H5Zregister(bshuf_H5Filter);
|
|
||||||
if(retval<0){
|
|
||||||
PUSH_ERR("bshuf_register_h5filter",
|
|
||||||
H5E_CANTREGISTER, "Can't register bitshuffle filter");
|
|
||||||
}
|
|
||||||
return retval;
|
|
||||||
}
|
|
||||||
@@ -1,67 +0,0 @@
|
|||||||
/*
|
|
||||||
* Bitshuffle HDF5 filter
|
|
||||||
*
|
|
||||||
* This file is part of Bitshuffle
|
|
||||||
* Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
|
|
||||||
* Website: http://www.github.com/kiyo-masui/bitshuffle
|
|
||||||
* Created: 2014
|
|
||||||
*
|
|
||||||
* See LICENSE file for details about copyright and rights to use.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
* Header File
|
|
||||||
*
|
|
||||||
* Filter Options
|
|
||||||
* --------------
|
|
||||||
* block_size (option slot 0) : integer (optional)
|
|
||||||
* What block size to use (in elements not bytes). Default is 0,
|
|
||||||
* for which bitshuffle will pick a block size with a target of 8kb.
|
|
||||||
* Compression (option slot 1) : 0 or BSHUF_H5_COMPRESS_LZ4
|
|
||||||
* Whether to apply LZ4 compression to the data after bitshuffling.
|
|
||||||
* This is much faster than applying compression as a second filter
|
|
||||||
* because it is done when the small block of data is already in the
|
|
||||||
* L1 cache.
|
|
||||||
*
|
|
||||||
* For LZ4 compression, the compressed format of the data is the same as
|
|
||||||
* for the normal LZ4 filter described in
|
|
||||||
* http://www.hdfgroup.org/services/filters/HDF5_LZ4.pdf.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
#ifndef BSHUF_H5FILTER_H
|
|
||||||
#define BSHUF_H5FILTER_H
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define H5Z_class_t_vers 2
|
|
||||||
#include "hdf5.h"
|
|
||||||
|
|
||||||
|
|
||||||
#define BSHUF_H5FILTER 32008
|
|
||||||
|
|
||||||
|
|
||||||
#define BSHUF_H5_COMPRESS_LZ4 2
|
|
||||||
#define BSHUF_H5_COMPRESS_ZSTD 3
|
|
||||||
|
|
||||||
|
|
||||||
extern H5Z_class_t bshuf_H5Filter[1];
|
|
||||||
|
|
||||||
|
|
||||||
/* ---- bshuf_register_h5filter ----
|
|
||||||
*
|
|
||||||
* Register the bitshuffle HDF5 filter within the HDF5 library.
|
|
||||||
*
|
|
||||||
* Call this before using the bitshuffle HDF5 filter from C unless
|
|
||||||
* using dynamically loaded filters.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
int bshuf_register_h5filter(void);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} // extern "C"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif // BSHUF_H5FILTER_H
|
|
||||||
+4
-4
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* IOchain - Distribute a chain of dependent IO events among threads.
|
* IOchain - Distribute a chain of dependant IO events amoung threads.
|
||||||
*
|
*
|
||||||
* This file is part of Bitshuffle
|
* This file is part of Bitshuffle
|
||||||
* Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
|
* Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
|
||||||
@@ -81,9 +81,9 @@ void ioc_set_next_out(ioc_chain *C, size_t *this_iter, void* out_ptr) {
|
|||||||
C->out_pl[(*this_iter + 1) % IOC_SIZE].ptr = out_ptr;
|
C->out_pl[(*this_iter + 1) % IOC_SIZE].ptr = out_ptr;
|
||||||
#ifdef _OPENMP
|
#ifdef _OPENMP
|
||||||
omp_unset_lock(&(C->out_pl[(*this_iter + 1) % IOC_SIZE].lock));
|
omp_unset_lock(&(C->out_pl[(*this_iter + 1) % IOC_SIZE].lock));
|
||||||
// *in_pl[this_iter]* lock released at the end of the iteration to avoid being
|
|
||||||
// overtaken by previous threads and having *out_pl[this_iter]* corrupted.
|
|
||||||
// Especially worried about thread 0, iteration 0.
|
|
||||||
omp_unset_lock(&(C->in_pl[(*this_iter) % IOC_SIZE].lock));
|
omp_unset_lock(&(C->in_pl[(*this_iter) % IOC_SIZE].lock));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|||||||
+3
-3
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* IOchain - Distribute a chain of dependent IO events among threads.
|
* IOchain - Distribute a chain of dependant IO events amoung threads.
|
||||||
*
|
*
|
||||||
* This file is part of Bitshuffle
|
* This file is part of Bitshuffle
|
||||||
* Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
|
* Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
|
||||||
@@ -25,7 +25,7 @@
|
|||||||
* Usage
|
* Usage
|
||||||
* -----
|
* -----
|
||||||
* - Call `ioc_init` in serial block.
|
* - Call `ioc_init` in serial block.
|
||||||
* - Each thread should create a local variable *size_t this_iter* and
|
* - Each thread should create a local variable *size_t this_iter* and
|
||||||
* pass its address to all function calls. Its value will be set
|
* pass its address to all function calls. Its value will be set
|
||||||
* inside the functions and is used to identify the thread.
|
* inside the functions and is used to identify the thread.
|
||||||
* - Each thread must call each of the `ioc_get*` and `ioc_set*` methods
|
* - Each thread must call each of the `ioc_get*` and `ioc_set*` methods
|
||||||
@@ -90,5 +90,5 @@ void ioc_set_next_in(ioc_chain *C, size_t* this_iter, void* in_ptr);
|
|||||||
void * ioc_get_out(ioc_chain *C, size_t *this_iter);
|
void * ioc_get_out(ioc_chain *C, size_t *this_iter);
|
||||||
void ioc_set_next_out(ioc_chain *C, size_t *this_iter, void* out_ptr);
|
void ioc_set_next_out(ioc_chain *C, size_t *this_iter, void* out_ptr);
|
||||||
|
|
||||||
#endif // IOCHAIN_H
|
#endif
|
||||||
|
|
||||||
|
|||||||
+917
-1897
File diff suppressed because it is too large
Load Diff
+250
-664
File diff suppressed because it is too large
Load Diff
@@ -3,18 +3,19 @@
|
|||||||
* Author: Charles Mita
|
* Author: Charles Mita
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <hdf5.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <hdf5.h>
|
||||||
#include "err.h"
|
#include "err.h"
|
||||||
|
|
||||||
|
|
||||||
struct error_stack_t {
|
struct error_stack_t {
|
||||||
char **files;
|
char **files;
|
||||||
char **funcs;
|
char **funcs;
|
||||||
int *lines;
|
int *lines;
|
||||||
int *errors;
|
int *errors;
|
||||||
char **messages;
|
char **messages;
|
||||||
int size;
|
int size;
|
||||||
};
|
};
|
||||||
|
|
||||||
static char files_buffer[ERR_MAX_FILENAME_LENGTH * ERR_MAX_STACK_SIZE] = {0};
|
static char files_buffer[ERR_MAX_FILENAME_LENGTH * ERR_MAX_STACK_SIZE] = {0};
|
||||||
@@ -29,89 +30,89 @@ static char *messages[ERR_MAX_STACK_SIZE] = {0};
|
|||||||
|
|
||||||
static struct error_stack_t stack = {files, funcs, lines, errors, messages, 0};
|
static struct error_stack_t stack = {files, funcs, lines, errors, messages, 0};
|
||||||
|
|
||||||
void push_error_stack(const char *file, const char *func, int line, int err,
|
|
||||||
const char *message) {
|
|
||||||
if (stack.size >= ERR_MAX_STACK_SIZE)
|
|
||||||
return; /* unfortunate */
|
|
||||||
int idx = stack.size;
|
|
||||||
|
|
||||||
/* subtract 1 to ensure room for null byte in buffer */
|
void push_error_stack(const char *file, const char *func, int line, int err, const char *message) {
|
||||||
sprintf(stack.funcs[idx], "%.*s", ERR_MAX_FUNCNAME_LENGTH - 1, func);
|
if (stack.size >= ERR_MAX_STACK_SIZE) return; /* unfortunate */
|
||||||
sprintf(stack.files[idx], "%.*s", ERR_MAX_FILENAME_LENGTH - 1, file);
|
int idx = stack.size;
|
||||||
sprintf(stack.messages[idx], "%.*s", ERR_MAX_MESSAGE_LENGTH - 1, message);
|
|
||||||
stack.lines[idx] = line;
|
|
||||||
stack.errors[idx] = err;
|
|
||||||
|
|
||||||
stack.size++;
|
/* subtract 1 to ensure room for null byte in buffer */
|
||||||
|
sprintf(stack.funcs[idx], "%.*s", ERR_MAX_FUNCNAME_LENGTH - 1, func);
|
||||||
|
sprintf(stack.files[idx], "%.*s", ERR_MAX_FILENAME_LENGTH - 1, file);
|
||||||
|
sprintf(stack.messages[idx], "%.*s", ERR_MAX_MESSAGE_LENGTH - 1, message);
|
||||||
|
stack.lines[idx] = line;
|
||||||
|
stack.errors[idx] = err;
|
||||||
|
|
||||||
|
stack.size++;
|
||||||
}
|
}
|
||||||
|
|
||||||
herr_t h5e_walk_callback(unsigned int n, const struct H5E_error2_t *err,
|
|
||||||
void *client_data) {
|
herr_t h5e_walk_callback(unsigned int n, const struct H5E_error2_t *err, void *client_data) {
|
||||||
herr_t retval = 0;
|
herr_t retval = 0;
|
||||||
/* only read the message for the innermost stack frame - the rest are just
|
/* only read the message for the innermost stack frame - the rest are just noise */
|
||||||
* noise */
|
if (n == 0) {
|
||||||
if (n == 0) {
|
char message[ERR_MAX_MESSAGE_LENGTH] = {0};
|
||||||
char message[ERR_MAX_MESSAGE_LENGTH] = {0};
|
sprintf(message, "%.*s", ERR_MAX_MESSAGE_LENGTH - 1, err->desc);
|
||||||
sprintf(message, "%.*s", ERR_MAX_MESSAGE_LENGTH - 1, err->desc);
|
push_error_stack(err->file_name, err->func_name, err->line, -1, message);
|
||||||
push_error_stack(err->file_name, err->func_name, err->line, -1, message);
|
} else {
|
||||||
} else {
|
push_error_stack(err->file_name, err->func_name, err->line, -1, "");
|
||||||
push_error_stack(err->file_name, err->func_name, err->line, -1, "");
|
}
|
||||||
}
|
return retval;
|
||||||
return retval;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int h5e_error_callback(hid_t stack_id, void *client_data) {
|
int h5e_error_callback(hid_t stack_id, void *client_data) {
|
||||||
int retval = 0;
|
int retval = 0;
|
||||||
herr_t err = 0;
|
herr_t err = 0;
|
||||||
err = H5Ewalk2(stack_id, H5E_WALK_UPWARD, &h5e_walk_callback, client_data);
|
err = H5Ewalk2(stack_id, H5E_WALK_UPWARD, &h5e_walk_callback, client_data);
|
||||||
if (err < 0) {
|
if (err < 0) {
|
||||||
ERROR_JUMP(err, done, "Error walking HDF5 Error stack");
|
ERROR_JUMP(err, done, "Error walking HDF5 Error stack");
|
||||||
}
|
}
|
||||||
done:
|
done:
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void reset_error_stack() {
|
void reset_error_stack() {
|
||||||
stack.size = 0;
|
stack.size = 0;
|
||||||
H5Eclear2(H5E_DEFAULT); /* almost certainly unnecessary */
|
H5Eclear2(H5E_DEFAULT); /* almost certainly unnecessary */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void dump_error_stack(FILE *out) {
|
void dump_error_stack(FILE *out) {
|
||||||
int idx = stack.size;
|
int idx = stack.size;
|
||||||
if (idx > 0)
|
if (idx > 0) fprintf(out, "Durin plugin error:\n");
|
||||||
fprintf(out, "Durin plugin error:\n");
|
while (idx-- > 0) {
|
||||||
while (idx-- > 0) {
|
const char *file = stack.files[idx];
|
||||||
const char *file = stack.files[idx];
|
const char *func = stack.funcs[idx];
|
||||||
const char *func = stack.funcs[idx];
|
const char *message = stack.messages[idx];
|
||||||
const char *message = stack.messages[idx];
|
const int line = stack.lines[idx];
|
||||||
const int line = stack.lines[idx];
|
if (message[0] != '\0') {
|
||||||
if (message[0] != '\0') {
|
fprintf(out, "\t%s - line %d in %s:\n\t\t%s\n", file, line, func, message);
|
||||||
fprintf(out, "\t%s - line %d in %s:\n\t\t%s\n", file, line, func,
|
} else {
|
||||||
message);
|
fprintf(out, "\t%s - line %d in %s\n", file, line, func);
|
||||||
} else {
|
}
|
||||||
fprintf(out, "\t%s - line %d in %s\n", file, line, func);
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int init_h5_error_handling() {
|
int init_h5_error_handling() {
|
||||||
int retval = 0;
|
int retval = 0;
|
||||||
hid_t err = 0;
|
hid_t err = 0;
|
||||||
if ((err = H5Eset_auto2(H5E_DEFAULT, &h5e_error_callback, NULL)) < 0) {
|
if ((err = H5Eset_auto2(H5E_DEFAULT, &h5e_error_callback, NULL)) < 0) {
|
||||||
ERROR_JUMP(err, done, "Error configuring HDF5 error callback");
|
ERROR_JUMP(err, done, "Error configuring HDF5 error callback");
|
||||||
}
|
}
|
||||||
done:
|
done:
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
int init_error_handling() {
|
int init_error_handling() {
|
||||||
int retval = 0;
|
int retval = 0;
|
||||||
int idx = 0;
|
int idx = 0;
|
||||||
while (idx < ERR_MAX_STACK_SIZE) {
|
while (idx < ERR_MAX_STACK_SIZE) {
|
||||||
stack.files[idx] = files_buffer + (idx * ERR_MAX_FILENAME_LENGTH);
|
stack.files[idx] = files_buffer + (idx * ERR_MAX_FILENAME_LENGTH);
|
||||||
stack.funcs[idx] = funcs_buffer + (idx * ERR_MAX_FUNCNAME_LENGTH);
|
stack.funcs[idx] = funcs_buffer + (idx * ERR_MAX_FUNCNAME_LENGTH);
|
||||||
stack.messages[idx] = messages_buffer + (idx * ERR_MAX_MESSAGE_LENGTH);
|
stack.messages[idx] = messages_buffer + (idx * ERR_MAX_MESSAGE_LENGTH);
|
||||||
idx++;
|
idx++;
|
||||||
}
|
}
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,44 +3,46 @@
|
|||||||
* Author: Charles Mita
|
* Author: Charles Mita
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
#ifndef NXS_XDS_ERR_H
|
#ifndef NXS_XDS_ERR_H
|
||||||
#define NXS_XDS_ERR_H
|
#define NXS_XDS_ERR_H
|
||||||
|
|
||||||
|
|
||||||
#define ERR_MAX_FILENAME_LENGTH 64
|
#define ERR_MAX_FILENAME_LENGTH 64
|
||||||
#define ERR_MAX_FUNCNAME_LENGTH 128
|
#define ERR_MAX_FUNCNAME_LENGTH 128
|
||||||
#define ERR_MAX_MESSAGE_LENGTH 1024
|
#define ERR_MAX_MESSAGE_LENGTH 1024
|
||||||
#define ERR_MAX_STACK_SIZE 128
|
#define ERR_MAX_STACK_SIZE 128
|
||||||
|
|
||||||
|
|
||||||
/* obtain __func__ from GCC if no C99 */
|
/* obtain __func__ from GCC if no C99 */
|
||||||
#if __STDC_VERSION__ < 199901L
|
#if __STDC_VERSION__ < 199901L
|
||||||
#if __GNUC__ >= 2
|
# if __GNUC__ >= 2
|
||||||
#define __func__ __FUNCTION__
|
# define __func__ __FUNCTION__
|
||||||
#else
|
# else
|
||||||
#define __func__ "<unknown>"
|
# define __func__ "<unknown>"
|
||||||
#endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if __GNUC__ >= 2
|
#if __GNUC__ >= 2
|
||||||
#define __line__ __LINE__
|
# define __line__ __LINE__
|
||||||
#else
|
#else
|
||||||
#define __line__ 0
|
# define __line__ 0
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if __GNUC__ >= 2
|
#if __GNUC__ >= 2
|
||||||
#define __file__ __FILE__
|
# define __file__ __FILE__
|
||||||
#else
|
#else
|
||||||
#define __file__ "unknown"
|
# define __file__ "unknown"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define ERROR_JUMP(err, target, message) \
|
#define ERROR_JUMP(err, target, message) \
|
||||||
{ \
|
{ \
|
||||||
push_error_stack(__file__, __func__, __line__, err, message); \
|
push_error_stack(__file__, __func__, __line__, err, message); \
|
||||||
retval = err; \
|
retval = err; \
|
||||||
goto target; \
|
goto target; \
|
||||||
}
|
}
|
||||||
|
|
||||||
void push_error_stack(const char *file, const char *func, int line, int err,
|
void push_error_stack(const char *file, const char *func, int line, int err, const char *message);
|
||||||
const char *message);
|
|
||||||
|
|
||||||
void dump_error_stack(FILE *out);
|
void dump_error_stack(FILE *out);
|
||||||
|
|
||||||
|
|||||||
+758
-790
File diff suppressed because it is too large
Load Diff
+21
-22
@@ -3,49 +3,48 @@
|
|||||||
* Author: Charles Mita
|
* Author: Charles Mita
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
#ifndef NXS_XDS_FILE_H
|
#ifndef NXS_XDS_FILE_H
|
||||||
#define NXS_XDS_FILE_H
|
#define NXS_XDS_FILE_H
|
||||||
|
|
||||||
|
#include <hdf5.h>
|
||||||
#include "err.h"
|
#include "err.h"
|
||||||
#include "filters.h"
|
#include "filters.h"
|
||||||
#include <hdf5.h>
|
|
||||||
|
|
||||||
struct ds_desc_t {
|
struct ds_desc_t {
|
||||||
hid_t det_g_id;
|
hid_t det_g_id;
|
||||||
hid_t data_g_id;
|
hid_t data_g_id;
|
||||||
hsize_t dims[3];
|
hsize_t dims[3];
|
||||||
int data_width;
|
int data_width;
|
||||||
int image_number_offset;
|
int (*get_pixel_properties)(const struct ds_desc_t*, double*, double*);
|
||||||
int (*get_pixel_properties)(const struct ds_desc_t *, double *, double *);
|
int (*get_pixel_mask)(const struct ds_desc_t*, int*);
|
||||||
int (*get_pixel_mask)(const struct ds_desc_t *, int *);
|
int (*get_data_frame)(const struct ds_desc_t*, const int, void*);
|
||||||
int (*get_data_frame)(const struct ds_desc_t *, const int, void *);
|
void (*free_desc)(struct ds_desc_t*);
|
||||||
void (*free_desc)(struct ds_desc_t *);
|
|
||||||
int i2i[]; // array to hold a translation from the image number requested by XDS and the actual position in the HDF5 file
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct nxs_ds_desc_t {
|
struct nxs_ds_desc_t {
|
||||||
struct ds_desc_t base;
|
struct ds_desc_t base;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct eiger_ds_desc_t {
|
struct eiger_ds_desc_t {
|
||||||
struct ds_desc_t base;
|
struct ds_desc_t base;
|
||||||
int n_data_blocks;
|
int n_data_blocks;
|
||||||
int *block_sizes;
|
int *block_sizes;
|
||||||
int (*frame_func)(const struct ds_desc_t *, const char *, const hsize_t *,
|
int (*frame_func)(const struct ds_desc_t*, const char*, const hsize_t*, const hsize_t*, void*);
|
||||||
const hsize_t *, void *);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct opt_eiger_ds_desc_t {
|
struct opt_eiger_ds_desc_t {
|
||||||
struct eiger_ds_desc_t base;
|
struct eiger_ds_desc_t base;
|
||||||
int bs_applied;
|
int bs_applied;
|
||||||
unsigned int bs_params[BS_H5_N_PARAMS];
|
unsigned int bs_params[BS_H5_N_PARAMS];
|
||||||
};
|
};
|
||||||
|
|
||||||
int get_detector_info(const hid_t fid, struct ds_desc_t **desc);
|
int get_detector_info(const hid_t fid, struct ds_desc_t **desc);
|
||||||
|
|
||||||
struct det_visit_objects_t {
|
struct det_visit_objects_t {
|
||||||
hid_t nxdata;
|
hid_t nxdata;
|
||||||
hid_t nxdetector;
|
hid_t nxdetector;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* NXS_XDS_FILE_H */
|
#endif /* NXS_XDS_FILE_H */
|
||||||
|
|||||||
+36
-36
@@ -4,55 +4,55 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
#include "bitshuffle.h"
|
|
||||||
#include "err.h"
|
|
||||||
#include "filters.h"
|
#include "filters.h"
|
||||||
|
#include "err.h"
|
||||||
|
#include "bitshuffle.h"
|
||||||
|
|
||||||
/* Required prototypes from bitshuffle.c but not included in header */
|
/* Required prototypes from bitshuffle.c but not included in header */
|
||||||
uint64_t bshuf_read_uint64_BE(const void *buffer);
|
uint64_t bshuf_read_uint64_BE(const void *buffer);
|
||||||
uint32_t bshuf_read_uint32_BE(const void *buffer);
|
uint32_t bshuf_read_uint32_BE(const void *buffer);
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Derived from the h5 filter code from the bitshuffle project (not included
|
* Derived from the h5 filter code from the bitshuffle project (not included here)
|
||||||
* here)
|
|
||||||
*/
|
*/
|
||||||
int bslz4_decompress(const unsigned int *bs_params, size_t in_size,
|
int bslz4_decompress(
|
||||||
void *in_buffer, size_t out_size, void *out_buffer) {
|
const unsigned int* bs_params,
|
||||||
|
size_t in_size,
|
||||||
|
void *in_buffer,
|
||||||
|
size_t out_size,
|
||||||
|
void *out_buffer) {
|
||||||
|
|
||||||
int retval = 0;
|
int retval = 0;
|
||||||
size_t size, elem_size, block_size, u_bytes;
|
size_t size, elem_size, block_size, u_bytes;
|
||||||
|
|
||||||
elem_size = bs_params[2];
|
elem_size = bs_params[2];
|
||||||
u_bytes = bshuf_read_uint64_BE(in_buffer);
|
u_bytes = bshuf_read_uint64_BE(in_buffer);
|
||||||
|
|
||||||
if (u_bytes != out_size) {
|
if (u_bytes != out_size) {
|
||||||
char message[64];
|
char message[64];
|
||||||
sprintf(message, "Decompressed chunk is %lu bytes, expected %lu", u_bytes,
|
sprintf(message, "Decompressed chunk is %lu bytes, expected %lu", u_bytes, out_size);
|
||||||
out_size);
|
ERROR_JUMP(-1, done, message);
|
||||||
ERROR_JUMP(-1, done, message);
|
}
|
||||||
}
|
|
||||||
|
|
||||||
block_size = bshuf_read_uint32_BE((const char *)in_buffer + 8) / elem_size;
|
block_size = bshuf_read_uint32_BE((const char *) in_buffer + 8) / elem_size;
|
||||||
if (!block_size) {
|
if (!block_size) {
|
||||||
ERROR_JUMP(-1, done, "Read block bitshuffle lz4 block size as 0");
|
ERROR_JUMP(-1, done, "Read block bitshuffle lz4 block size as 0");
|
||||||
}
|
}
|
||||||
/* skip over header */
|
/* skip over header */
|
||||||
in_buffer += 12;
|
in_buffer += 12;
|
||||||
size = u_bytes / elem_size;
|
size = u_bytes / elem_size;
|
||||||
|
|
||||||
if (bs_params[4] == BS_H5_PARAM_LZ4_COMPRESS) {
|
if (bs_params[4] == BS_H5_PARAM_LZ4_COMPRESS) {
|
||||||
if (bshuf_decompress_lz4(in_buffer, out_buffer, size, elem_size,
|
if (bshuf_decompress_lz4(in_buffer, out_buffer, size, elem_size, block_size) < 0) {
|
||||||
block_size) < 0) {
|
ERROR_JUMP(-1, done, "Error performing bitshuffle_lz4 decompression");
|
||||||
ERROR_JUMP(-1, done, "Error performing bitshuffle_lz4 decompression");
|
}
|
||||||
}
|
} else {
|
||||||
} else {
|
if (bshuf_bitunshuffle(in_buffer, out_buffer, size, elem_size, block_size) < 0) {
|
||||||
if (bshuf_bitunshuffle(in_buffer, out_buffer, size, elem_size, block_size) <
|
ERROR_JUMP(-1, done, "Error performing bit unshuffle");
|
||||||
0) {
|
}
|
||||||
ERROR_JUMP(-1, done, "Error performing bit unshuffle");
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
done:
|
done:
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|||||||
+8
-2
@@ -10,7 +10,13 @@
|
|||||||
#define BS_H5_FILTER_ID 32008
|
#define BS_H5_FILTER_ID 32008
|
||||||
#define BS_H5_PARAM_LZ4_COMPRESS 2
|
#define BS_H5_PARAM_LZ4_COMPRESS 2
|
||||||
|
|
||||||
int bslz4_decompress(const unsigned int *bs_params, size_t in_size,
|
|
||||||
void *in_buffer, size_t out_size, void *out_buffer);
|
|
||||||
|
int bslz4_decompress(
|
||||||
|
const unsigned int* bs_params,
|
||||||
|
size_t in_size,
|
||||||
|
void *in_buffer,
|
||||||
|
size_t out_size,
|
||||||
|
void *out_buffer);
|
||||||
|
|
||||||
#endif /* NXS_XDS_FILTER_H */
|
#endif /* NXS_XDS_FILTER_H */
|
||||||
|
|||||||
+194
-404
@@ -3,462 +3,252 @@
|
|||||||
* Author: Charles Mita
|
* Author: Charles Mita
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
#include <hdf5.h>
|
#include <hdf5.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
#include "file.h"
|
#include "file.h"
|
||||||
#include "filters.h"
|
#include "filters.h"
|
||||||
#include "plugin.h"
|
#include "plugin.h"
|
||||||
|
|
||||||
#ifdef USE_BITSHUFFLE
|
|
||||||
#include "bshuf_h5filter.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* XDS does not provide an error callback facility, so just write to stderr
|
/* XDS does not provide an error callback facility, so just write to stderr for now */
|
||||||
for now - generally regarded as poor practice */
|
/* generally regarded as poor practice */
|
||||||
#define ERROR_OUTPUT stderr
|
#define ERROR_OUTPUT stderr
|
||||||
|
|
||||||
/* mask bits loosely based on what Neggia does and what NeXus says should be
|
|
||||||
done basically - anything in the low byte (& 0xFF) means "ignore this"
|
|
||||||
Neggia uses the value -2 if bit 1, 2 or 3 are set */
|
/* mask bits loosely based on what Neggia does and what NeXus says should be done */
|
||||||
/* CV-GPhL-20210408: we want more control over the value non-masked
|
/* basically - anything in the low byte (& 0xFF) means "ignore this" */
|
||||||
pixels should be set to. */
|
/* Neggia usses the value -2 if bit 1, 2 or 3 are set */
|
||||||
#define COPY_AND_MASK(in, value, setValue, out, size, mask) \
|
#define COPY_AND_MASK(in, out, size, mask) \
|
||||||
{ \
|
{ \
|
||||||
int i; \
|
int i; \
|
||||||
if (value!=0) { \
|
if (mask) { \
|
||||||
if (mask) { \
|
for (i = 0; i < size; ++i) { \
|
||||||
for (i = 0; i < size; ++i) { \
|
out[i] = in[i]; \
|
||||||
out[i] = (in[i] == value) ? setValue : in[i]; \
|
if (mask[i] & 0xFF) out[i] = -1; \
|
||||||
if (mask[i] & 0xFF) \
|
if (mask[i] & 30) out[i] = -2; \
|
||||||
out[i] = -1; \
|
} \
|
||||||
if (mask[i] & 30) \
|
} else { \
|
||||||
out[i] = -2; \
|
for (i = 0; i < size; i++) { \
|
||||||
} \
|
out[i] = in[i]; \
|
||||||
} else { \
|
} \
|
||||||
for (i = 0; i < size; i++) { \
|
} \
|
||||||
out[i] = (in[i] == value) ? setValue : in[i]; \
|
}
|
||||||
} \
|
|
||||||
} \
|
#define APPLY_MASK(buffer, mask, size) \
|
||||||
} else { \
|
{ \
|
||||||
if (mask) { \
|
int i; \
|
||||||
for (i = 0; i < size; ++i) { \
|
if (mask) { \
|
||||||
out[i] = in[i]; \
|
for (i = 0; i < size; ++i) { \
|
||||||
if (mask[i] & 0xFF) \
|
if (mask[i] & 0xFF) buffer[i] = -1; \
|
||||||
out[i] = -1; \
|
if (mask[i] & 30) buffer[i] = -2; \
|
||||||
if (mask[i] & 30) \
|
} \
|
||||||
out[i] = -2; \
|
} \
|
||||||
} \
|
}
|
||||||
} else { \
|
|
||||||
for (i = 0; i < size; i++) { \
|
|
||||||
out[i] = in[i]; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
#define APPLY_MASK(buffer, mask, size) \
|
|
||||||
{ \
|
|
||||||
int i; \
|
|
||||||
if (mask) { \
|
|
||||||
for (i = 0; i < size; ++i) { \
|
|
||||||
if (mask[i] & 0xFF) \
|
|
||||||
buffer[i] = -1; \
|
|
||||||
if (mask[i] & 30) \
|
|
||||||
buffer[i] = -2; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
|
|
||||||
static hid_t file_id = 0;
|
static hid_t file_id = 0;
|
||||||
static struct ds_desc_t *data_desc = NULL;
|
static struct ds_desc_t *data_desc = NULL;
|
||||||
static int *mask_buffer = NULL;
|
static int *mask_buffer = NULL;
|
||||||
|
|
||||||
// CV-20240605: potentially provide a mapping from frame number (as
|
|
||||||
// requested by caller) to actual 2D slice within 3D data
|
|
||||||
// array.
|
|
||||||
//
|
|
||||||
// This is defined by the environment variable
|
|
||||||
// DURIN_IMAGE2ORDINAL (see below).
|
|
||||||
int *image2ordinal = NULL;
|
|
||||||
int image2ordinal_debug = 0;
|
|
||||||
int image2ordinal_imin = 0;
|
|
||||||
int image2ordinal_imax = 0;
|
|
||||||
|
|
||||||
void fill_info_array(int info[1024]) {
|
void fill_info_array(int info[1024]) {
|
||||||
info[0] = DLS_CUSTOMER_ID;
|
info[0] = DLS_CUSTOMER_ID;
|
||||||
info[1] = VERSION_MAJOR;
|
info[1] = VERSION_MAJOR;
|
||||||
info[2] = VERSION_MINOR;
|
info[2] = VERSION_MINOR;
|
||||||
info[3] = VERSION_PATCH;
|
info[3] = VERSION_PATCH;
|
||||||
info[4] = VERSION_TIMESTAMP;
|
info[4] = VERSION_TIMESTAMP;
|
||||||
info[5] = 0; // image number offset
|
|
||||||
info[6] = -1; // marked pixels not already in pixel_mask: reset to this value
|
|
||||||
|
|
||||||
char *cenv;
|
|
||||||
cenv = getenv("DURIN_IMAGE_NUMBER_OFFSET");
|
|
||||||
if (cenv!=NULL) {
|
|
||||||
info[5] = atoi(cenv);
|
|
||||||
}
|
|
||||||
cenv = getenv("DURIN_RESET_UNMASKED_PIXEL");
|
|
||||||
if (cenv!=NULL) {
|
|
||||||
info[6] = atoi(cenv);
|
|
||||||
}
|
|
||||||
|
|
||||||
cenv = getenv("DURIN_IMAGE2ORDINAL");
|
|
||||||
if (cenv!=NULL&&(!image2ordinal)) {
|
|
||||||
|
|
||||||
char *denv = getenv("DURIN_IMAGE2ORDINAL_DEBUG");
|
|
||||||
if (denv!=NULL) {
|
|
||||||
image2ordinal_debug=1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// <ordinal_start>,<image_1_start>-<image_1_end>,<image_2_start>-<image_2_end>,..,<image_N_start>-<image_N_end>
|
|
||||||
if (image2ordinal_debug) printf("DURIN_IMAGE2ORDINAL = \"%s\"\n",cenv);
|
|
||||||
|
|
||||||
const char outer_delimiters[] = ",";
|
|
||||||
const char inner_delimiters[] = "-";
|
|
||||||
|
|
||||||
char *found;
|
|
||||||
|
|
||||||
char *outer_saveptr;
|
|
||||||
char *inner_saveptr;
|
|
||||||
|
|
||||||
int ordinal_start = 0;
|
|
||||||
int ordinal = 0;
|
|
||||||
int ntt = -1;
|
|
||||||
found = strtok_r(cenv,outer_delimiters, &outer_saveptr);
|
|
||||||
if (found!=NULL) {
|
|
||||||
int tt[1000][2];
|
|
||||||
while(found) {
|
|
||||||
if (ordinal_start==0) {
|
|
||||||
ordinal_start = atoi(found);
|
|
||||||
ordinal = ordinal_start - 1;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
char* s = strtok_r(found, inner_delimiters, &inner_saveptr);
|
|
||||||
if (s!=NULL) {
|
|
||||||
int i1 = atoi(s);
|
|
||||||
s = strtok_r(NULL,inner_delimiters, &inner_saveptr);
|
|
||||||
if (s!=NULL) {
|
|
||||||
int i2 = atoi(s);
|
|
||||||
ntt++;
|
|
||||||
if (ntt<=1000) {
|
|
||||||
tt[ntt][0] = i1;
|
|
||||||
tt[ntt][1] = i2;
|
|
||||||
for(int i=i1; i<=i2; ++i) {
|
|
||||||
ordinal++;
|
|
||||||
if (ordinal==1) {
|
|
||||||
image2ordinal_imin=i;
|
|
||||||
image2ordinal_imax=i;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
if (i<image2ordinal_imin) image2ordinal_imin=i;
|
|
||||||
if (i>image2ordinal_imax) image2ordinal_imax=i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
found = strtok_r(NULL,outer_delimiters,&outer_saveptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ordinal_start>0) {
|
|
||||||
if (image2ordinal_debug) {
|
|
||||||
printf("ordinal_start, end = %d %d\n",ordinal_start, ordinal);
|
|
||||||
printf("imin, imax = %d %d\n",image2ordinal_imin,image2ordinal_imax);
|
|
||||||
}
|
|
||||||
|
|
||||||
// allocate array to go from image number/id to ordinal:
|
|
||||||
image2ordinal = malloc((image2ordinal_imax-image2ordinal_imin+1) * sizeof(image2ordinal_imin));
|
|
||||||
int ordinal = ordinal_start - 1;
|
|
||||||
for(int i=0; i<=ntt; i++) {
|
|
||||||
for(int j=tt[i][0];j<=tt[i][1];j++) {
|
|
||||||
ordinal++;
|
|
||||||
//printf(" %d -> %d\n",ordinal,j);
|
|
||||||
image2ordinal[j] = ordinal;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (image2ordinal&&image2ordinal_debug) {
|
|
||||||
for(int i=image2ordinal_imin; i<=image2ordinal_imax; i++) {
|
|
||||||
if (image2ordinal[i]>0) {
|
|
||||||
printf(" %d -> %d\n",i,image2ordinal[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int convert_to_int_and_mask(void *in_buffer, int width, int setValue, int *out_buffer,
|
int convert_to_int_and_mask(void *in_buffer, int d_width, int *out_buffer, int length, int *mask) {
|
||||||
int length, int *mask) {
|
/* transfer data to output buffer, performing data conversion as required */
|
||||||
/* transfer data to output buffer, performing data conversion as required */
|
int retval = 0;
|
||||||
int retval = 0;
|
/* TODO: decide how conversion of data should work */
|
||||||
/* TODO: decide how conversion of data should work */
|
/* Should we sign extend? Neggia doesn't (casts from uint*), but may be more intuitive */
|
||||||
/* Should we sign extend? Neggia doesn't (casts from uint*), but may be more
|
if (d_width == sizeof(signed char)) {
|
||||||
* intuitive */
|
signed char *in = in_buffer;
|
||||||
|
COPY_AND_MASK(in, out_buffer, length, mask);
|
||||||
int d_width = abs(width);
|
} else if (d_width == sizeof(short)) {
|
||||||
|
short *in = in_buffer;
|
||||||
// CV-20210407
|
COPY_AND_MASK(in, out_buffer, length, mask);
|
||||||
// Dealing with a signed data array: no extra check for marker
|
} else if (d_width == sizeof(int)) {
|
||||||
// value needed (since data can already take advantage of the
|
int *in = in_buffer;
|
||||||
// negative data range). It is unclear though why/when data would
|
COPY_AND_MASK(in, out_buffer, length, mask);
|
||||||
// come in as a signed array in the first place ...
|
} else if (d_width == sizeof(long int)) {
|
||||||
if (width<0) {
|
long int *in = in_buffer;
|
||||||
if (d_width == sizeof(signed char)) {
|
COPY_AND_MASK(in, out_buffer, length, mask);
|
||||||
// 8-bit
|
} else if (d_width == sizeof(long long int)) {
|
||||||
signed char *in = in_buffer;
|
long long int *in = in_buffer;
|
||||||
COPY_AND_MASK(in, 0, setValue, out_buffer, length, mask);
|
COPY_AND_MASK(in, out_buffer, length, mask);
|
||||||
} else if (d_width == sizeof(short)) {
|
} else {
|
||||||
// 16-bit
|
char message[128];
|
||||||
short *in = in_buffer;
|
sprintf(message, "Unsupported conversion of data width %d to %ld (int)", d_width, sizeof(int));
|
||||||
COPY_AND_MASK(in, 0, setValue, out_buffer, length, mask);
|
ERROR_JUMP(-1, done, message);
|
||||||
} else if (d_width == sizeof(int)) {
|
}
|
||||||
// 16-bit
|
|
||||||
int *in = in_buffer;
|
|
||||||
COPY_AND_MASK(in, 0, setValue, out_buffer, length, mask);
|
|
||||||
} else if (d_width == sizeof(long int)) {
|
|
||||||
// 32-bit
|
|
||||||
long int *in = in_buffer;
|
|
||||||
COPY_AND_MASK(in, 0, setValue, out_buffer, length, mask);
|
|
||||||
} else if (d_width == sizeof(long long int)) {
|
|
||||||
// 64-bit
|
|
||||||
long long int *in = in_buffer;
|
|
||||||
COPY_AND_MASK(in, 0, setValue, out_buffer, length, mask);
|
|
||||||
} else {
|
|
||||||
char message[128];
|
|
||||||
sprintf(message, "Unsupported conversion of data width %d to %ld (int)",
|
|
||||||
d_width, sizeof(int));
|
|
||||||
ERROR_JUMP(-1, done, message);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// CV-20210407
|
|
||||||
// Dealing with an unsigned data array: extra check for marker
|
|
||||||
// value required (to handle overloaded pixels correctly if wanted
|
|
||||||
// - see also DURIN_RESET_UNMASKED_PIXEL environment variable).
|
|
||||||
else {
|
|
||||||
if (d_width == sizeof(unsigned char)) {
|
|
||||||
// 8-bit
|
|
||||||
unsigned char *in = in_buffer;
|
|
||||||
COPY_AND_MASK(in, UINT8_MAX, setValue, out_buffer, length, mask);
|
|
||||||
} else if (d_width == sizeof(unsigned short)) {
|
|
||||||
// 16-bit
|
|
||||||
unsigned short *in = in_buffer;
|
|
||||||
COPY_AND_MASK(in, UINT16_MAX, setValue, out_buffer, length, mask);
|
|
||||||
} else if (d_width == sizeof(unsigned int)) {
|
|
||||||
// 16-bit
|
|
||||||
unsigned int *in = in_buffer;
|
|
||||||
COPY_AND_MASK(in, UINT16_MAX, setValue, out_buffer, length, mask);
|
|
||||||
} else if (d_width == sizeof(unsigned long)) {
|
|
||||||
// 32-bit
|
|
||||||
unsigned long *in = in_buffer;
|
|
||||||
COPY_AND_MASK(in, UINT32_MAX, setValue, out_buffer, length, mask);
|
|
||||||
} else if (d_width == sizeof(unsigned long long)) {
|
|
||||||
// 64-bit
|
|
||||||
unsigned long long *in = in_buffer;
|
|
||||||
COPY_AND_MASK(in, UINT32_MAX, setValue, out_buffer, length, mask);
|
|
||||||
} else {
|
|
||||||
char message[128];
|
|
||||||
sprintf(message, "Unsupported conversion of data width %d to %ld (int)",
|
|
||||||
d_width, sizeof(int));
|
|
||||||
ERROR_JUMP(-1, done, message);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
done:
|
done:
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void plugin_open(const char *filename, int info[1024], int *error_flag) {
|
void plugin_open(
|
||||||
int retval = 0;
|
const char *filename,
|
||||||
*error_flag = 0;
|
int info[1024],
|
||||||
|
int *error_flag) {
|
||||||
|
int retval = 0;
|
||||||
|
*error_flag = 0;
|
||||||
|
|
||||||
init_error_handling();
|
init_error_handling();
|
||||||
|
|
||||||
if (H5dont_atexit() < 0) {
|
if (H5dont_atexit() < 0) {
|
||||||
ERROR_JUMP(-2, done, "Failed configuring HDF5 library behaviour");
|
ERROR_JUMP(-2, done, "Failed configuring HDF5 library behaviour");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (init_h5_error_handling() < 0) {
|
if (init_h5_error_handling() < 0) {
|
||||||
ERROR_JUMP(-2, done, "Failed to configure HDF5 error handling");
|
ERROR_JUMP(-2, done, "Failed to configure HDF5 error handling");
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef USE_BITSHUFFLE
|
fill_info_array(info);
|
||||||
if (bshuf_register_h5filter() < 0 ) {
|
file_id = H5Fopen(filename, H5F_ACC_RDONLY, H5P_DEFAULT);
|
||||||
ERROR_JUMP(-2, done, "Failed to register bitshuffle filter");
|
if (file_id < 0) {
|
||||||
}
|
char message[128] = {0};
|
||||||
#endif
|
sprintf(message, "Could not open %.100s", filename);
|
||||||
|
ERROR_JUMP(-4, done, message);
|
||||||
|
}
|
||||||
|
|
||||||
fill_info_array(info);
|
reset_error_stack();
|
||||||
file_id = H5Fopen(filename, H5F_ACC_RDONLY, H5P_DEFAULT);
|
retval = get_detector_info(file_id, &data_desc);
|
||||||
if (file_id < 0) {
|
if (retval < 0) {
|
||||||
char message[128] = {0};
|
ERROR_JUMP(-4, done, "");
|
||||||
sprintf(message, "Could not open %.100s", filename);
|
}
|
||||||
ERROR_JUMP(-4, done, message);
|
|
||||||
}
|
|
||||||
|
|
||||||
reset_error_stack();
|
mask_buffer = malloc(data_desc->dims[1] * data_desc->dims[2] * sizeof(int));
|
||||||
retval = get_detector_info(file_id, &data_desc);
|
if (mask_buffer) {
|
||||||
if (retval < 0) {
|
retval = data_desc->get_pixel_mask(data_desc, mask_buffer);
|
||||||
ERROR_JUMP(-4, done, "");
|
if (retval < 0) {
|
||||||
}
|
fprintf(ERROR_OUTPUT, "WARNING: Could not read pixel mask - no masking will be applied\n");
|
||||||
|
dump_error_stack(ERROR_OUTPUT);
|
||||||
data_desc->image_number_offset = info[5];
|
free(mask_buffer);
|
||||||
|
mask_buffer = NULL;
|
||||||
mask_buffer = malloc(data_desc->dims[1] * data_desc->dims[2] * sizeof(int));
|
}
|
||||||
if (mask_buffer) {
|
}
|
||||||
retval = data_desc->get_pixel_mask(data_desc, mask_buffer);
|
retval = 0;
|
||||||
if (retval < 0) {
|
|
||||||
fprintf(
|
|
||||||
ERROR_OUTPUT,
|
|
||||||
"WARNING: Could not read pixel mask - no masking will be applied\n");
|
|
||||||
dump_error_stack(ERROR_OUTPUT);
|
|
||||||
free(mask_buffer);
|
|
||||||
mask_buffer = NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
retval = 0;
|
|
||||||
|
|
||||||
#ifdef GPHL_COMPILE_DATE
|
|
||||||
fprintf(ERROR_OUTPUT, "\n XDS HDF5/Durin plugin %d.%d.%d (DLS, 2018-2023; GPhL, 2020-2024 - built %d)\n", info[1], info[2], info[3], GPHL_COMPILE_DATE);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
done:
|
done:
|
||||||
*error_flag = retval;
|
*error_flag = retval;
|
||||||
if (retval < 0) {
|
if (retval < 0) {
|
||||||
if ((data_desc) && (data_desc->free_desc)) {
|
if ((data_desc) && (data_desc->free_desc)) {
|
||||||
data_desc->free_desc(data_desc);
|
data_desc->free_desc(data_desc);
|
||||||
data_desc = NULL;
|
data_desc = NULL;
|
||||||
}
|
}
|
||||||
dump_error_stack(ERROR_OUTPUT);
|
dump_error_stack(ERROR_OUTPUT);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void plugin_get_header(int *nx, int *ny, int *nbytes, float *qx, float *qy,
|
|
||||||
int *number_of_frames, int info[1024], int *error_flag) {
|
|
||||||
int err = 0;
|
|
||||||
int retval = 0;
|
|
||||||
double x_pixel_size, y_pixel_size;
|
|
||||||
reset_error_stack();
|
|
||||||
fill_info_array(info);
|
|
||||||
|
|
||||||
err =
|
void plugin_get_header(
|
||||||
data_desc->get_pixel_properties(data_desc, &x_pixel_size, &y_pixel_size);
|
int *nx, int *ny,
|
||||||
if (err < 0) {
|
int *nbytes,
|
||||||
ERROR_JUMP(err, done, "Failed to retrieve pixel information");
|
float *qx, float *qy,
|
||||||
}
|
int *number_of_frames,
|
||||||
|
int info[1024],
|
||||||
|
int *error_flag) {
|
||||||
|
int err = 0;
|
||||||
|
int retval = 0;
|
||||||
|
double x_pixel_size, y_pixel_size;
|
||||||
|
reset_error_stack();
|
||||||
|
fill_info_array(info);
|
||||||
|
|
||||||
*nx = data_desc->dims[2];
|
err = data_desc->get_pixel_properties(data_desc, &x_pixel_size, &y_pixel_size);
|
||||||
*ny = data_desc->dims[1];
|
if (err < 0) {
|
||||||
*nbytes = abs(data_desc->data_width);
|
ERROR_JUMP(err, done, "Failed to retrieve pixel information");
|
||||||
*number_of_frames = data_desc->dims[0];
|
}
|
||||||
*qx = (float)x_pixel_size;
|
|
||||||
*qy = (float)y_pixel_size;
|
*nx = data_desc->dims[2];
|
||||||
|
*ny = data_desc->dims[1];
|
||||||
|
*nbytes = data_desc->data_width;
|
||||||
|
*number_of_frames = data_desc->dims[0];
|
||||||
|
*qx = (float) x_pixel_size;
|
||||||
|
*qy = (float) y_pixel_size;
|
||||||
|
|
||||||
done:
|
done:
|
||||||
*error_flag = retval;
|
*error_flag = retval;
|
||||||
if (retval < 0) {
|
if (retval < 0) {
|
||||||
dump_error_stack(ERROR_OUTPUT);
|
dump_error_stack(ERROR_OUTPUT);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void plugin_get_data(int *frame_number, int *nx, int *ny, int *data_array,
|
|
||||||
int info[1024], int *error_flag) {
|
|
||||||
|
|
||||||
int retval = 0;
|
void plugin_get_data(
|
||||||
int frame_size_px = data_desc->dims[1] * data_desc->dims[2];
|
int *frame_number,
|
||||||
reset_error_stack();
|
int *nx, int *ny,
|
||||||
fill_info_array(info);
|
int *data_array,
|
||||||
|
int info[1024],
|
||||||
|
int *error_flag) {
|
||||||
|
|
||||||
void *buffer = NULL;
|
int retval = 0;
|
||||||
if (sizeof(*data_array) == abs(data_desc->data_width)) {
|
int frame_size_px = data_desc->dims[1] * data_desc->dims[2];
|
||||||
buffer = data_array;
|
reset_error_stack();
|
||||||
} else {
|
fill_info_array(info);
|
||||||
buffer = malloc(abs(data_desc->data_width) * frame_size_px);
|
|
||||||
if (!buffer) {
|
|
||||||
ERROR_JUMP(-1, done, "Unable to allocate data buffer");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int ordinal = *frame_number;
|
void *buffer = NULL;
|
||||||
if (image2ordinal) {
|
if (sizeof(*data_array) == data_desc->data_width) {
|
||||||
if (ordinal < image2ordinal_imin || ordinal>image2ordinal_imax) {
|
buffer = data_array;
|
||||||
char message[64] = {0};
|
} else {
|
||||||
sprintf(message, "Failed to map frame %d to ordinals since outside of range %d - %d", ordinal,image2ordinal_imin,image2ordinal_imax);
|
buffer = malloc(data_desc->data_width * frame_size_px);
|
||||||
ERROR_JUMP(-2, done, message);
|
if (!buffer) {
|
||||||
}
|
ERROR_JUMP(-1, done, "Unable to allocate data buffer");
|
||||||
ordinal = image2ordinal[ordinal];
|
}
|
||||||
if (ordinal!=*frame_number) {
|
}
|
||||||
if (image2ordinal_debug) printf("fetching data from ordinal %d for frame %d\n",ordinal,*frame_number);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (data_desc->get_data_frame(data_desc, ordinal - 1, buffer) < 0) {
|
if (data_desc->get_data_frame(data_desc, (*frame_number) - 1, buffer) < 0) {
|
||||||
char message[64] = {0};
|
char message[64] = {0};
|
||||||
if (image2ordinal) {
|
sprintf(message, "Failed to retrieve data for frame %d", *frame_number);
|
||||||
sprintf(message, "Failed to retrieve data for frame %d (ordinal %d)", *frame_number, ordinal);
|
ERROR_JUMP(-2, done, message);
|
||||||
} else {
|
}
|
||||||
sprintf(message, "Failed to retrieve data for frame %d", *frame_number);
|
|
||||||
}
|
|
||||||
ERROR_JUMP(-2, done, message);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (buffer != data_array) {
|
if (buffer != data_array) {
|
||||||
if (convert_to_int_and_mask(buffer, data_desc->data_width, info[6], data_array,
|
if (convert_to_int_and_mask(buffer, data_desc->data_width, data_array, frame_size_px, mask_buffer) < 0) {
|
||||||
frame_size_px, mask_buffer) < 0) {
|
char message[64];
|
||||||
char message[64];
|
sprintf(message, "Error converting data for frame %d", *frame_number);
|
||||||
if (image2ordinal) {
|
ERROR_JUMP(-2, done, message);
|
||||||
sprintf(message, "Error converting data for frame %d (ordinal %d)", *frame_number, ordinal);
|
}
|
||||||
} else {
|
} else {
|
||||||
sprintf(message, "Error converting data for frame %d", *frame_number);
|
APPLY_MASK(data_array, mask_buffer, frame_size_px);
|
||||||
}
|
}
|
||||||
ERROR_JUMP(-2, done, message);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
APPLY_MASK(data_array, mask_buffer, frame_size_px);
|
|
||||||
}
|
|
||||||
|
|
||||||
done:
|
done:
|
||||||
*error_flag = retval;
|
*error_flag = retval;
|
||||||
if (retval < 0) {
|
if (retval < 0) {
|
||||||
dump_error_stack(ERROR_OUTPUT);
|
dump_error_stack(ERROR_OUTPUT);
|
||||||
}
|
}
|
||||||
if (buffer && (buffer != data_array))
|
if (buffer && (buffer != data_array)) free(buffer);
|
||||||
free(buffer);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void plugin_close(int *error_flag) {
|
void plugin_close(int *error_flag) {
|
||||||
if (file_id) {
|
if (file_id) {
|
||||||
if (H5Fclose(file_id) < 0) {
|
if (H5Fclose(file_id) < 0) {
|
||||||
/* TODO: backtrace */
|
/* TODO: backtrace */
|
||||||
*error_flag = -1;
|
*error_flag = -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
file_id = 0;
|
file_id = 0;
|
||||||
|
|
||||||
if (image2ordinal) {
|
if (mask_buffer) {
|
||||||
free(image2ordinal);
|
free(mask_buffer);
|
||||||
image2ordinal = NULL;
|
mask_buffer = NULL;
|
||||||
}
|
}
|
||||||
|
if (data_desc->free_desc) {
|
||||||
if (mask_buffer) {
|
data_desc->free_desc(data_desc);
|
||||||
free(mask_buffer);
|
data_desc = NULL;
|
||||||
mask_buffer = NULL;
|
}
|
||||||
}
|
if (H5close() < 0) {
|
||||||
if (data_desc->free_desc) {
|
*error_flag = -1;
|
||||||
data_desc->free_desc(data_desc);
|
}
|
||||||
data_desc = NULL;
|
|
||||||
}
|
|
||||||
if (H5close() < 0) {
|
|
||||||
*error_flag = -1;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|||||||
+23
-7
@@ -15,23 +15,39 @@
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define DLS_CUSTOMER_ID \
|
#define DLS_CUSTOMER_ID 0x01 /* pretend we're Dectris, otherwise XDS doesn't work */
|
||||||
0x01 /* pretend we're Dectris, otherwise XDS doesn't work */
|
|
||||||
#define VERSION_MAJOR 0
|
#define VERSION_MAJOR 0
|
||||||
#define VERSION_MINOR 0
|
#define VERSION_MINOR 0
|
||||||
#define VERSION_PATCH 0
|
#define VERSION_PATCH 0
|
||||||
#define VERSION_TIMESTAMP -1 /* good enough for Dectris apparantely */
|
#define VERSION_TIMESTAMP -1 /* good enough for Dectris apparantely */
|
||||||
|
|
||||||
void plugin_open(const char *filename, int info[1024], int *error_flag);
|
|
||||||
|
|
||||||
void plugin_get_header(int *nx, int *ny, int *nbytes, float *qx, float *qy,
|
void plugin_open(
|
||||||
int *number_of_frames, int info[1024], int *error_flag);
|
const char *filename,
|
||||||
|
int info[1024],
|
||||||
|
int *error_flag);
|
||||||
|
|
||||||
|
|
||||||
|
void plugin_get_header(
|
||||||
|
int *nx, int *ny,
|
||||||
|
int *nbytes,
|
||||||
|
float *qx, float *qy,
|
||||||
|
int *number_of_frames,
|
||||||
|
int info[1024],
|
||||||
|
int *error_flag);
|
||||||
|
|
||||||
|
|
||||||
|
void plugin_get_data(
|
||||||
|
int *frame_number,
|
||||||
|
int *nx, int *ny,
|
||||||
|
int *data_array,
|
||||||
|
int info[1024],
|
||||||
|
int *error_flag);
|
||||||
|
|
||||||
void plugin_get_data(int *frame_number, int *nx, int *ny, int *data_array,
|
|
||||||
int info[1024], int *error_flag);
|
|
||||||
|
|
||||||
void plugin_close(int *error_flag);
|
void plugin_close(int *error_flag);
|
||||||
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} /* extern "C" */
|
} /* extern "C" */
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
+112
-119
@@ -1,141 +1,134 @@
|
|||||||
#include "err.h"
|
|
||||||
#include "file.h"
|
|
||||||
#include <hdf5.h>
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
#include <hdf5.h>
|
||||||
|
#include "file.h"
|
||||||
|
#include "err.h"
|
||||||
|
|
||||||
#define COPY_AND_MASK(in, out, size, mask) \
|
#define COPY_AND_MASK(in, out, size, mask) \
|
||||||
{ \
|
{ \
|
||||||
int i; \
|
int i; \
|
||||||
if (mask) { \
|
if (mask) { \
|
||||||
for (i = 0; i < size; ++i) { \
|
for (i = 0; i < size; ++i) { \
|
||||||
out[i] = in[i]; \
|
out[i] = in[i]; \
|
||||||
if (mask[i] & 0xFE) \
|
if (mask[i] & 0xFE) out[i] = -2; \
|
||||||
out[i] = -2; \
|
if (mask[i] & 0x01) out[i] = -1; \
|
||||||
if (mask[i] & 0x01) \
|
} \
|
||||||
out[i] = -1; \
|
} else { \
|
||||||
} \
|
for (i = 0; i < size; i++) { \
|
||||||
} else { \
|
out[i] = in[i]; \
|
||||||
for (i = 0; i < size; i++) { \
|
} \
|
||||||
out[i] = in[i]; \
|
} \
|
||||||
} \
|
}
|
||||||
} \
|
|
||||||
}
|
|
||||||
|
|
||||||
int parse_args(int argc, char **argv, char **file_name, int *frame_idx) {
|
int parse_args(int argc, char **argv, char **file_name, int *frame_idx) {
|
||||||
int retval = 0;
|
int retval = 0;
|
||||||
if (argc == 2) {
|
if (argc == 2) {
|
||||||
*frame_idx = 0;
|
*frame_idx = 0;
|
||||||
*file_name = argv[1];
|
*file_name = argv[1];
|
||||||
} else if (argc >= 2) {
|
} else if (argc >= 2) {
|
||||||
*file_name = argv[1];
|
*file_name = argv[1];
|
||||||
*frame_idx = atoi(argv[2]);
|
*frame_idx = atoi(argv[2]);
|
||||||
} else {
|
} else {
|
||||||
ERROR_JUMP(-1, done, "Require filename argument");
|
ERROR_JUMP(-1, done, "Require filename argument");
|
||||||
}
|
}
|
||||||
done:
|
done:
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int main(int argc, char **argv) {
|
int main(int argc, char **argv) {
|
||||||
int err = 0;
|
int err = 0;
|
||||||
int retval = 0;
|
int retval = 0;
|
||||||
char *test_file = "";
|
char *test_file = "";
|
||||||
struct ds_desc_t *desc;
|
struct ds_desc_t *desc;
|
||||||
int dims[3] = {0};
|
int dims[3] = {0};
|
||||||
hid_t fid = 0;
|
hid_t fid = 0;
|
||||||
int frame_idx = 0;
|
int frame_idx = 0;
|
||||||
int *mask = NULL;
|
int *mask = NULL;
|
||||||
int *data = NULL;
|
int *data = NULL;
|
||||||
void *buffer = NULL;
|
void *buffer = NULL;
|
||||||
|
|
||||||
init_error_handling();
|
init_error_handling();
|
||||||
if (init_h5_error_handling() < 0) {
|
if (init_h5_error_handling() < 0) {
|
||||||
ERROR_JUMP(-1, done, "");
|
ERROR_JUMP(-1, done, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (parse_args(argc, argv, &test_file, &frame_idx) < 0) {
|
if (parse_args(argc, argv, &test_file, &frame_idx) < 0) {
|
||||||
ERROR_JUMP(-1, done, "Failure parsing arguments");
|
ERROR_JUMP(-1, done, "Failure parsing arguments");
|
||||||
}
|
}
|
||||||
|
|
||||||
fid = H5Fopen(test_file, H5F_ACC_RDONLY, H5P_DEFAULT);
|
fid = H5Fopen(test_file, H5F_ACC_RDONLY, H5P_DEFAULT);
|
||||||
if (fid < 0)
|
if (fid < 0) ERROR_JUMP(-1, done, "Error opening file");
|
||||||
ERROR_JUMP(-1, done, "Error opening file");
|
|
||||||
|
|
||||||
err = get_detector_info(fid, &desc);
|
err = get_detector_info(fid, &desc);
|
||||||
if (err < 0) {
|
if (err < 0) {
|
||||||
ERROR_JUMP(err, done, "");
|
ERROR_JUMP(err, done, "");
|
||||||
}
|
}
|
||||||
dims[0] = desc->dims[0];
|
dims[0] = desc->dims[0];
|
||||||
dims[1] = desc->dims[1];
|
dims[1] = desc->dims[1];
|
||||||
dims[2] = desc->dims[2];
|
dims[2] = desc->dims[2];
|
||||||
|
|
||||||
printf("Dims: %d, %d, %d\n", dims[0], dims[1], dims[2]);
|
printf("Dims: %d, %d, %d\n", dims[0], dims[1], dims[2]);
|
||||||
|
|
||||||
mask = malloc(dims[1] * dims[2] * sizeof(*mask));
|
mask = malloc(dims[1] * dims[2] * sizeof(*mask));
|
||||||
if (!mask) {
|
if (!mask) {
|
||||||
ERROR_JUMP(err, done, "Failed to allocate space for pixel mask");
|
ERROR_JUMP(err, done, "Failed to allocate space for pixel mask");
|
||||||
}
|
}
|
||||||
err = desc->get_pixel_mask(desc, mask);
|
err = desc->get_pixel_mask(desc, mask);
|
||||||
if (err < 0) {
|
if (err < 0) {
|
||||||
ERROR_JUMP(err, done, "");
|
ERROR_JUMP(err, done, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
data = malloc(dims[1] * dims[2] * sizeof(*data));
|
data = malloc(dims[1] * dims[2] * sizeof(*data));
|
||||||
if (sizeof(*data) != desc->data_width) {
|
if (sizeof(*data) != desc->data_width) {
|
||||||
buffer = malloc(dims[1] * dims[2] * desc->data_width);
|
buffer = malloc(dims[1] * dims[2] * desc->data_width);
|
||||||
} else {
|
} else {
|
||||||
buffer = data;
|
buffer = data;
|
||||||
}
|
}
|
||||||
|
|
||||||
err = desc->get_data_frame(desc, frame_idx, buffer);
|
err = desc->get_data_frame(desc, frame_idx, buffer);
|
||||||
if (err < 0) {
|
if (err < 0) {
|
||||||
ERROR_JUMP(err, done, "");
|
ERROR_JUMP(err, done, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (buffer != data) {
|
if (buffer != data) {
|
||||||
if (desc->data_width == sizeof(signed char)) {
|
if (desc->data_width == sizeof(signed char)) {
|
||||||
signed char *in = buffer;
|
signed char *in = buffer;
|
||||||
COPY_AND_MASK(in, data, dims[1] * dims[2], mask);
|
COPY_AND_MASK(in, data, dims[1] * dims[2], mask);
|
||||||
} else if (desc->data_width == sizeof(short)) {
|
} else if (desc->data_width == sizeof(short)) {
|
||||||
short *in = buffer;
|
short *in = buffer;
|
||||||
COPY_AND_MASK(in, data, dims[1] * dims[2], mask);
|
COPY_AND_MASK(in, data, dims[1] * dims[2], mask);
|
||||||
} else if (desc->data_width == sizeof(int)) {
|
} else if (desc->data_width == sizeof(int)) {
|
||||||
int *in = buffer;
|
int *in = buffer;
|
||||||
COPY_AND_MASK(in, data, dims[1] * dims[2], mask);
|
COPY_AND_MASK(in, data, dims[1] * dims[2], mask);
|
||||||
} else if (desc->data_width == sizeof(long int)) {
|
} else if (desc->data_width == sizeof(long int)) {
|
||||||
long int *in = buffer;
|
long int *in = buffer;
|
||||||
COPY_AND_MASK(in, data, dims[1] * dims[2], mask);
|
COPY_AND_MASK(in, data, dims[1] * dims[2], mask);
|
||||||
} else if (desc->data_width == sizeof(long long int)) {
|
} else if (desc->data_width == sizeof(long long int)) {
|
||||||
long long int *in = buffer;
|
long long int *in = buffer;
|
||||||
COPY_AND_MASK(in, data, dims[1] * dims[2], mask);
|
COPY_AND_MASK(in, data, dims[1] * dims[2], mask);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
int i, j;
|
int i, j;
|
||||||
int max_i = 30;
|
int max_i = 30;
|
||||||
int max_j = 10;
|
int max_j = 10;
|
||||||
max_j = max_j < dims[1] ? max_j : dims[1];
|
max_j = max_j < dims[1] ? max_j : dims[1];
|
||||||
max_i = max_i < dims[2] ? max_i : dims[2];
|
max_i = max_i < dims[2] ? max_i : dims[2];
|
||||||
for (j = 0; j < max_j; j++) {
|
for (j = 0; j < max_j; j++) {
|
||||||
for (i = 0; i < max_i; i++) {
|
for (i = 0; i < max_i; i++) {
|
||||||
printf("%3d ", data[i + j * dims[2]]);
|
printf("%3d ", data[i + j*dims[2]]);
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
done:
|
done:
|
||||||
if (fid > 0)
|
if (fid > 0) H5Fclose(fid);
|
||||||
H5Fclose(fid);
|
if (data) free(data);
|
||||||
if (data)
|
if (buffer && (data != buffer)) free(buffer);
|
||||||
free(data);
|
if (mask) free(mask);
|
||||||
if (buffer && (data != buffer))
|
if (retval != 0) dump_error_stack(stderr);
|
||||||
free(buffer);
|
return retval;
|
||||||
if (mask)
|
|
||||||
free(mask);
|
|
||||||
if (retval != 0)
|
|
||||||
dump_error_stack(stderr);
|
|
||||||
return retval;
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,195 +0,0 @@
|
|||||||
#!/usr/bin/env python2
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
import json
|
|
||||||
import mimetools
|
|
||||||
import mimetypes
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import urllib2
|
|
||||||
|
|
||||||
|
|
||||||
def fail(msg, code=1):
|
|
||||||
sys.stderr.write("ERROR: %s\n" % msg)
|
|
||||||
sys.exit(code)
|
|
||||||
|
|
||||||
|
|
||||||
def api_request(url, token, method="GET", json_data=None, content_type=None):
|
|
||||||
data = None
|
|
||||||
headers = {
|
|
||||||
"Authorization": "token %s" % token,
|
|
||||||
}
|
|
||||||
|
|
||||||
if json_data is not None:
|
|
||||||
data = json.dumps(json_data)
|
|
||||||
headers["Content-Type"] = "application/json"
|
|
||||||
elif content_type is not None:
|
|
||||||
headers["Content-Type"] = content_type
|
|
||||||
|
|
||||||
request = urllib2.Request(url, data=data, headers=headers)
|
|
||||||
request.get_method = lambda: method
|
|
||||||
|
|
||||||
try:
|
|
||||||
response = urllib2.urlopen(request)
|
|
||||||
body = response.read()
|
|
||||||
status = response.getcode()
|
|
||||||
return status, body
|
|
||||||
except urllib2.HTTPError as e:
|
|
||||||
return e.code, e.read()
|
|
||||||
|
|
||||||
|
|
||||||
def encode_multipart_formdata(fields, files):
|
|
||||||
boundary = mimetools.choose_boundary()
|
|
||||||
crlf = "\r\n"
|
|
||||||
lines = []
|
|
||||||
|
|
||||||
for key, value in fields:
|
|
||||||
lines.append("--" + boundary)
|
|
||||||
lines.append('Content-Disposition: form-data; name="%s"' % key)
|
|
||||||
lines.append("")
|
|
||||||
lines.append(value)
|
|
||||||
|
|
||||||
for key, filename, content, content_type in files:
|
|
||||||
lines.append("--" + boundary)
|
|
||||||
lines.append(
|
|
||||||
'Content-Disposition: form-data; name="%s"; filename="%s"' % (key, filename)
|
|
||||||
)
|
|
||||||
lines.append("Content-Type: %s" % content_type)
|
|
||||||
lines.append("")
|
|
||||||
lines.append(content)
|
|
||||||
|
|
||||||
lines.append("--" + boundary + "--")
|
|
||||||
lines.append("")
|
|
||||||
body = crlf.join(lines)
|
|
||||||
content_type = "multipart/form-data; boundary=%s" % boundary
|
|
||||||
return content_type, body
|
|
||||||
|
|
||||||
|
|
||||||
def multipart_request(url, token, fields, files):
|
|
||||||
content_type, body = encode_multipart_formdata(fields, files)
|
|
||||||
headers = {
|
|
||||||
"Authorization": "token %s" % token,
|
|
||||||
"Content-Type": content_type,
|
|
||||||
}
|
|
||||||
|
|
||||||
request = urllib2.Request(url, data=body, headers=headers)
|
|
||||||
request.get_method = lambda: "POST"
|
|
||||||
|
|
||||||
try:
|
|
||||||
response = urllib2.urlopen(request)
|
|
||||||
return response.getcode(), response.read()
|
|
||||||
except urllib2.HTTPError as e:
|
|
||||||
return e.code, e.read()
|
|
||||||
|
|
||||||
|
|
||||||
def get_release_by_tag(api_base, token, tag):
|
|
||||||
url = "%s/releases/tags/%s" % (api_base, tag)
|
|
||||||
status, body = api_request(url, token, method="GET")
|
|
||||||
if status == 200:
|
|
||||||
return json.loads(body)
|
|
||||||
if status == 404:
|
|
||||||
return None
|
|
||||||
fail("failed to fetch release for tag %s: HTTP %s\n%s" % (tag, status, body))
|
|
||||||
|
|
||||||
|
|
||||||
def create_release(api_base, token, tag):
|
|
||||||
url = "%s/releases" % api_base
|
|
||||||
payload = {
|
|
||||||
"tag_name": tag,
|
|
||||||
"name": tag,
|
|
||||||
"draft": False,
|
|
||||||
"prerelease": False,
|
|
||||||
}
|
|
||||||
status, body = api_request(url, token, method="POST", json_data=payload)
|
|
||||||
if status not in (200, 201):
|
|
||||||
fail("failed to create release for tag %s: HTTP %s\n%s" % (tag, status, body))
|
|
||||||
return json.loads(body)
|
|
||||||
|
|
||||||
|
|
||||||
def ensure_release(api_base, token, tag):
|
|
||||||
release = get_release_by_tag(api_base, token, tag)
|
|
||||||
if release is not None:
|
|
||||||
print("Release for tag %s already exists (id=%s)" % (tag, release.get("id")))
|
|
||||||
return release
|
|
||||||
|
|
||||||
print("Release for tag %s does not exist, creating it" % tag)
|
|
||||||
release = create_release(api_base, token, tag)
|
|
||||||
print("Created release id=%s" % release.get("id"))
|
|
||||||
return release
|
|
||||||
|
|
||||||
|
|
||||||
def upload_asset(api_base, token, release_id, file_path):
|
|
||||||
if not os.path.isfile(file_path):
|
|
||||||
fail("file not found: %s" % file_path)
|
|
||||||
|
|
||||||
asset_name = os.path.basename(file_path)
|
|
||||||
mime_type = mimetypes.guess_type(asset_name)[0] or "application/octet-stream"
|
|
||||||
|
|
||||||
with open(file_path, "rb") as f:
|
|
||||||
content = f.read()
|
|
||||||
|
|
||||||
url = "%s/releases/%s/assets" % (api_base, release_id)
|
|
||||||
status, body = multipart_request(
|
|
||||||
url,
|
|
||||||
token,
|
|
||||||
fields=[("name", asset_name)],
|
|
||||||
files=[("attachment", asset_name, content, mime_type)],
|
|
||||||
)
|
|
||||||
|
|
||||||
if status not in (200, 201):
|
|
||||||
fail("failed to upload asset %s: HTTP %s\n%s" % (asset_name, status, body))
|
|
||||||
|
|
||||||
print("Uploaded asset: %s" % asset_name)
|
|
||||||
|
|
||||||
|
|
||||||
def find_assets(build_dir):
|
|
||||||
names = []
|
|
||||||
for name in sorted(os.listdir(build_dir)):
|
|
||||||
if name.startswith("libdurin-plugin.so"):
|
|
||||||
full = os.path.join(build_dir, name)
|
|
||||||
if os.path.isfile(full):
|
|
||||||
names.append(full)
|
|
||||||
return names
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
if len(sys.argv) != 4:
|
|
||||||
sys.stderr.write(
|
|
||||||
"Usage: %s <gitea-server> <owner/repo> <tag>\n" % sys.argv[0]
|
|
||||||
)
|
|
||||||
sys.stderr.write(
|
|
||||||
"Example: %s https://gitea.psi.ch mx/durin 1.0.0\n" % sys.argv[0]
|
|
||||||
)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
server = sys.argv[1].rstrip("/")
|
|
||||||
repo = sys.argv[2]
|
|
||||||
tag = sys.argv[3]
|
|
||||||
|
|
||||||
token = os.environ.get("GITEA_TOKEN")
|
|
||||||
if not token:
|
|
||||||
fail("GITEA_TOKEN environment variable is not set")
|
|
||||||
|
|
||||||
build_dir = "build"
|
|
||||||
if not os.path.isdir(build_dir):
|
|
||||||
fail("build directory not found: %s" % build_dir)
|
|
||||||
|
|
||||||
assets = find_assets(build_dir)
|
|
||||||
if not assets:
|
|
||||||
fail("no libdurin-plugin.so* files found in %s" % build_dir)
|
|
||||||
|
|
||||||
api_base = "%s/api/v1/repos/%s" % (server, repo)
|
|
||||||
|
|
||||||
release = ensure_release(api_base, token, tag)
|
|
||||||
release_id = release.get("id")
|
|
||||||
if not release_id:
|
|
||||||
fail("release id missing in API response")
|
|
||||||
|
|
||||||
for asset in assets:
|
|
||||||
upload_asset(api_base, token, release_id, asset)
|
|
||||||
|
|
||||||
print("Done.")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
Reference in New Issue
Block a user