cafe-1.12.5 release

This commit is contained in:
2021-03-16 09:30:56 +01:00
parent 7504b0a539
commit 7ba4128448
106 changed files with 185064 additions and 183247 deletions

View File

@@ -0,0 +1,123 @@
/*
* Bitshuffle - Filter for improving compression of typed binary data.
*
* This file is part of Bitshuffle
* Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
* Website: http://www.github.com/kiyo-masui/bitshuffle
* Created: 2014
*
* See LICENSE file for details about copyright and rights to use.
*
*
* Header File
*
* Worker routines return an int64_t which is the number of bytes processed
* if positive or an error code if negative.
*
* Error codes:
* -1 : Failed to allocate memory.
* -11 : Missing SSE.
* -12 : Missing AVX.
* -80 : Input size not a multiple of 8.
* -81 : block_size not multiple of 8.
* -91 : Decompression error, wrong number of bytes processed.
* -1YYY : Error internal to compression routine with error code -YYY.
*/
#ifndef BITSHUFFLE_H
#define BITSHUFFLE_H
#include <stdlib.h>
#include "bitshuffle_core.h"
#ifdef __cplusplus
extern "C" {
#endif
/* ---- bshuf_compress_lz4_bound ----
*
* Bound on size of data compressed with *bshuf_compress_lz4*.
*
* Parameters
* ----------
* size : number of elements in input
* elem_size : element size of typed data
* block_size : Process in blocks of this many elements. Pass 0 to
* select automatically (recommended).
*
* Returns
* -------
* Bound on compressed data size.
*
*/
size_t bshuf_compress_lz4_bound(const size_t size,
const size_t elem_size, size_t block_size);
/* ---- bshuf_compress_lz4 ----
*
* Bitshuffled and compress the data using LZ4.
*
* Transpose within elements, in blocks of data of *block_size* elements then
* compress the blocks using LZ4. In the output buffer, each block is prefixed
* by a 4 byte integer giving the compressed size of that block.
*
* Output buffer must be large enough to hold the compressed data. This could
* be in principle substantially larger than the input buffer. Use the routine
* *bshuf_compress_lz4_bound* to get an upper limit.
*
* Parameters
* ----------
* in : input buffer, must be of size * elem_size bytes
* out : output buffer, must be large enough to hold data.
* size : number of elements in input
* elem_size : element size of typed data
* block_size : Process in blocks of this many elements. Pass 0 to
* select automatically (recommended).
*
* Returns
* -------
* number of bytes used in output buffer, negative error-code if failed.
*
*/
int64_t bshuf_compress_lz4(const void* in, void* out, const size_t size, const size_t
elem_size, size_t block_size);
/* ---- bshuf_decompress_lz4 ----
*
* Undo compression and bitshuffling.
*
* Decompress data then un-bitshuffle it in blocks of *block_size* elements.
*
* To properly unshuffle bitshuffled data, *size*, *elem_size* and *block_size*
* must patch the parameters used to compress the data.
*
* NOT TO BE USED WITH UNTRUSTED DATA: This routine uses the function
* LZ4_decompress_fast from LZ4, which does not protect against maliciously
* formed datasets. By modifying the compressed data, this function could be
* coerced into leaving the boundaries of the input buffer.
*
* Parameters
* ----------
* in : input buffer
* out : output buffer, must be of size * elem_size bytes
* size : number of elements in input
* elem_size : element size of typed data
* block_size : Process in blocks of this many elements. Pass 0 to
* select automatically (recommended).
*
* Returns
* -------
* number of bytes consumed in *input* buffer, negative error-code if failed.
*
*/
int64_t bshuf_decompress_lz4(const void* in, void* out, const size_t size,
const size_t elem_size, size_t block_size);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // BITSHUFFLE_H

View File

@@ -0,0 +1,156 @@
/*
* Bitshuffle - Filter for improving compression of typed binary data.
*
* This file is part of Bitshuffle
* Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
* Website: http://www.github.com/kiyo-masui/bitshuffle
* Created: 2014
*
* See LICENSE file for details about copyright and rights to use.
*
*
* Header File
*
* Worker routines return an int64_t which is the number of bytes processed
* if positive or an error code if negative.
*
* Error codes:
* -1 : Failed to allocate memory.
* -11 : Missing SSE.
* -12 : Missing AVX.
* -80 : Input size not a multiple of 8.
* -81 : block_size not multiple of 8.
* -91 : Decompression error, wrong number of bytes processed.
* -1YYY : Error internal to compression routine with error code -YYY.
*/
#ifndef BITSHUFFLE_CORE_H
#define BITSHUFFLE_CORE_H
// We assume GNU g++ defining `__cplusplus` has stdint.h
#if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199900L) || defined(__cplusplus)
#include <stdint.h>
#else
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
typedef signed int int32_t;
typedef unsigned long long uint64_t;
typedef long long int64_t;
#endif
#include <stdlib.h>
// These are usually set in the setup.py.
#ifndef BSHUF_VERSION_MAJOR
#define BSHUF_VERSION_MAJOR 0
#define BSHUF_VERSION_MINOR 3
#define BSHUF_VERSION_POINT 4
#endif
#ifdef __cplusplus
extern "C" {
#endif
/* --- bshuf_using_SSE2 ----
*
* Whether routines where compiled with the SSE2 instruction set.
*
* Returns
* -------
* 1 if using SSE2, 0 otherwise.
*
*/
int bshuf_using_SSE2(void);
/* ---- bshuf_using_AVX2 ----
*
* Whether routines where compiled with the AVX2 instruction set.
*
* Returns
* -------
* 1 if using AVX2, 0 otherwise.
*
*/
int bshuf_using_AVX2(void);
/* ---- bshuf_default_block_size ----
*
* The default block size as function of element size.
*
* This is the block size used by the blocked routines (any routine
* taking a *block_size* argument) when the block_size is not provided
* (zero is passed).
*
* The results of this routine are guaranteed to be stable such that
* shuffled/compressed data can always be decompressed.
*
* Parameters
* ----------
* elem_size : element size of data to be shuffled/compressed.
*
*/
size_t bshuf_default_block_size(const size_t elem_size);
/* ---- bshuf_bitshuffle ----
*
* Bitshuffle the data.
*
* Transpose the bits within elements, in blocks of *block_size*
* elements.
*
* Parameters
* ----------
* in : input buffer, must be of size * elem_size bytes
* out : output buffer, must be of size * elem_size bytes
* size : number of elements in input
* elem_size : element size of typed data
* block_size : Do transpose in blocks of this many elements. Pass 0 to
* select automatically (recommended).
*
* Returns
* -------
* number of bytes processed, negative error-code if failed.
*
*/
int64_t bshuf_bitshuffle(const void* in, void* out, const size_t size,
const size_t elem_size, size_t block_size);
/* ---- bshuf_bitunshuffle ----
*
* Unshuffle bitshuffled data.
*
* Untranspose the bits within elements, in blocks of *block_size*
* elements.
*
* To properly unshuffle bitshuffled data, *size*, *elem_size* and *block_size*
* must match the parameters used to shuffle the data.
*
* Parameters
* ----------
* in : input buffer, must be of size * elem_size bytes
* out : output buffer, must be of size * elem_size bytes
* size : number of elements in input
* elem_size : element size of typed data
* block_size : Do transpose in blocks of this many elements. Pass 0 to
* select automatically (recommended).
*
* Returns
* -------
* number of bytes processed, negative error-code if failed.
*
*/
int64_t bshuf_bitunshuffle(const void* in, void* out, const size_t size,
const size_t elem_size, size_t block_size);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // BITSHUFFLE_CORE_H

View File

@@ -0,0 +1,75 @@
/*
* Bitshuffle - Filter for improving compression of typed binary data.
*
* This file is part of Bitshuffle
* Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
* Website: http://www.github.com/kiyo-masui/bitshuffle
* Created: 2014
*
* See LICENSE file for details about copyright and rights to use.
*/
#ifndef BITSHUFFLE_INTERNALS_H
#define BITSHUFFLE_INTERNALS_H
// We assume GNU g++ defining `__cplusplus` has stdint.h
#if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199900L) || defined(__cplusplus)
#include <stdint.h>
#else
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
typedef signed int int32_t;
typedef unsigned long long uint64_t;
typedef long long int64_t;
#endif
#include <stdlib.h>
#include "iochain.h"
// Constants.
#ifndef BSHUF_MIN_RECOMMEND_BLOCK
#define BSHUF_MIN_RECOMMEND_BLOCK 128
#define BSHUF_BLOCKED_MULT 8 // Block sizes must be multiple of this.
#define BSHUF_TARGET_BLOCK_SIZE_B 8192
#endif
// Macros.
#define CHECK_ERR_FREE(count, buf) if (count < 0) { free(buf); return count; }
#ifdef __cplusplus
extern "C" {
#endif
/* ---- Utility functions for internal use only ---- */
int64_t bshuf_trans_bit_elem(const void* in, void* out, const size_t size,
const size_t elem_size);
/* Read a 32 bit unsigned integer from a buffer big endian order. */
uint32_t bshuf_read_uint32_BE(const void* buf);
/* Write a 32 bit unsigned integer to a buffer in big endian order. */
void bshuf_write_uint32_BE(void* buf, uint32_t num);
int64_t bshuf_untrans_bit_elem(const void* in, void* out, const size_t size,
const size_t elem_size);
/* Function definition for worker functions that process a single block. */
typedef int64_t (*bshufBlockFunDef)(ioc_chain* C_ptr,
const size_t size, const size_t elem_size);
/* Wrap a function for processing a single block to process an entire buffer in
* parallel. */
int64_t bshuf_blocked_wrap_fun(bshufBlockFunDef fun, const void* in, void* out,
const size_t size, const size_t elem_size, size_t block_size);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // BITSHUFFLE_INTERNALS_H

View File

@@ -0,0 +1,59 @@
/*
* Bitshuffle HDF5 filter
*
* This file is part of Bitshuffle
* Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
* Website: http://www.github.com/kiyo-masui/bitshuffle
* Created: 2014
*
* See LICENSE file for details about copyright and rights to use.
*
*
* Header File
*
* Filter Options
* --------------
* block_size (option slot 0) : interger (optional)
* What block size to use (in elements not bytes). Default is 0,
* for which bitshuffle will pick a block size with a target of 8kb.
* Compression (option slot 1) : 0 or BSHUF_H5_COMPRESS_LZ4
* Whether to apply LZ4 compression to the data after bitshuffling.
* This is much faster than applying compression as a second filter
* because it is done when the small block of data is already in the
* L1 cache.
*
* For LZ4 compression, the compressed format of the data is the same as
* for the normal LZ4 filter described in
* http://www.hdfgroup.org/services/filters/HDF5_LZ4.pdf.
*
*/
#ifndef BSHUF_H5FILTER_H
#define BSHUF_H5FILTER_H
#define H5Z_class_t_vers 2
#include "hdf5.h"
#define BSHUF_H5FILTER 32008
#define BSHUF_H5_COMPRESS_LZ4 2
extern H5Z_class_t bshuf_H5Filter[1];
/* ---- bshuf_register_h5filter ----
*
* Register the bitshuffle HDF5 filter within the HDF5 library.
*
* Call this before using the bitshuffle HDF5 filter from C unless
* using dynamically loaded filters.
*
*/
int bshuf_register_h5filter(void);
#endif // BSHUF_H5FILTER_H

View File

@@ -0,0 +1,94 @@
/*
* IOchain - Distribute a chain of dependant IO events amoung threads.
*
* This file is part of Bitshuffle
* Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
* Website: http://www.github.com/kiyo-masui/bitshuffle
* Created: 2014
*
* See LICENSE file for details about copyright and rights to use.
*
*
* Header File
*
* Similar in concept to a queue. Each task includes reading an input
* and writing output, but the location of the input/output (the pointers)
* depend on the previous item in the chain.
*
* This is designed for parallelizing blocked compression/decompression IO,
* where the destination of a compressed block depends on the compressed size
* of all previous blocks.
*
* Implemented with OpenMP locks.
*
*
* Usage
* -----
* - Call `ioc_init` in serial block.
* - Each thread should create a local variable *size_t this_iter* and
* pass its address to all function calls. Its value will be set
* inside the functions and is used to identify the thread.
* - Each thread must call each of the `ioc_get*` and `ioc_set*` methods
* exactly once per iteration, starting with `ioc_get_in` and ending
* with `ioc_set_next_out`.
* - The order (`ioc_get_in`, `ioc_set_next_in`, *work*, `ioc_get_out`,
* `ioc_set_next_out`, *work*) is most efficient.
* - Have each thread call `ioc_end_pop`.
* - `ioc_get_in` is blocked until the previous entry's
* `ioc_set_next_in` is called.
* - `ioc_get_out` is blocked until the previous entry's
* `ioc_set_next_out` is called.
* - There are no blocks on the very first iteration.
* - Call `ioc_destroy` in serial block.
* - Safe for num_threads >= IOC_SIZE (but less efficient).
*
*/
#ifndef IOCHAIN_H
#define IOCHAIN_H
#include <stdlib.h>
#ifdef _OPENMP
#include <omp.h>
#endif
#define IOC_SIZE 33
typedef struct ioc_ptr_and_lock {
#ifdef _OPENMP
omp_lock_t lock;
#endif
void *ptr;
} ptr_and_lock;
typedef struct ioc_const_ptr_and_lock {
#ifdef _OPENMP
omp_lock_t lock;
#endif
const void *ptr;
} const_ptr_and_lock;
typedef struct ioc_chain {
#ifdef _OPENMP
omp_lock_t next_lock;
#endif
size_t next;
const_ptr_and_lock in_pl[IOC_SIZE];
ptr_and_lock out_pl[IOC_SIZE];
} ioc_chain;
void ioc_init(ioc_chain *C, const void *in_ptr_0, void *out_ptr_0);
void ioc_destroy(ioc_chain *C);
const void * ioc_get_in(ioc_chain *C, size_t *this_iter);
void ioc_set_next_in(ioc_chain *C, size_t* this_iter, void* in_ptr);
void * ioc_get_out(ioc_chain *C, size_t *this_iter);
void ioc_set_next_out(ioc_chain *C, size_t *this_iter, void* out_ptr);
#endif // IOCHAIN_H