Add bitshuffle h5 filters

This commit is contained in:
2020-05-15 12:12:11 +02:00
parent cd2b318783
commit e8de187c41
2 changed files with 277 additions and 0 deletions
+218
View File
@@ -0,0 +1,218 @@
/*
* Bitshuffle HDF5 filter
*
* This file is part of Bitshuffle
* Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
* Website: http://www.github.com/kiyo-masui/bitshuffle
* Created: 2014
*
* See LICENSE file for details about copyright and rights to use.
*
*/
#include "bitshuffle.h"
#include "bshuf_h5filter.h"
#define PUSH_ERR(func, minor, str) \
H5Epush1(__FILE__, func, __LINE__, H5E_PLINE, minor, str)
// Prototypes from bitshuffle.c
void bshuf_write_uint64_BE(void* buf, uint64_t num);
uint64_t bshuf_read_uint64_BE(void* buf);
void bshuf_write_uint32_BE(void* buf, uint32_t num);
uint32_t bshuf_read_uint32_BE(const void* buf);
// Only called on compresion, not on reverse.
herr_t bshuf_h5_set_local(hid_t dcpl, hid_t type, hid_t space){
herr_t r;
size_t ii;
unsigned int elem_size;
unsigned int flags;
size_t nelements = 8;
size_t nelem_max = 11;
unsigned values[] = {0,0,0,0,0,0,0,0,0,0,0};
unsigned tmp_values[] = {0,0,0,0,0,0,0,0};
char msg[80];
r = H5Pget_filter_by_id2(dcpl, BSHUF_H5FILTER, &flags, &nelements,
tmp_values, 0, NULL, NULL);
if(r<0) return -1;
// First 3 slots reserved. Move any passed options to higher addresses.
for (ii=0; ii < nelements && ii + 3 < nelem_max; ii++) {
values[ii + 3] = tmp_values[ii];
}
nelements = 3 + nelements;
values[0] = BSHUF_VERSION_MAJOR;
values[1] = BSHUF_VERSION_MINOR;
elem_size = H5Tget_size(type);
if(elem_size <= 0) {
PUSH_ERR("bshuf_h5_set_local", H5E_CALLBACK,
"Invalid element size.");
return -1;
}
values[2] = elem_size;
// Validate user supplied arguments.
if (nelements > 3) {
if (values[3] % 8 || values[3] < 0) {
sprintf(msg, "Error in bitshuffle. Invalid block size: %d.",
values[3]);
PUSH_ERR("bshuf_h5_set_local", H5E_CALLBACK, msg);
return -1;
}
}
if (nelements > 4) {
switch (values[4]) {
case 0:
break;
case BSHUF_H5_COMPRESS_LZ4:
break;
default:
PUSH_ERR("bshuf_h5_set_local", H5E_CALLBACK,
"Invalid bitshuffle compression.");
}
}
r = H5Pmodify_filter(dcpl, BSHUF_H5FILTER, flags, nelements, values);
if(r<0) return -1;
return 1;
}
size_t bshuf_h5_filter(unsigned int flags, size_t cd_nelmts,
const unsigned int cd_values[], size_t nbytes,
size_t *buf_size, void **buf) {
size_t size, elem_size;
int err;
char msg[80];
size_t block_size = 0;
size_t buf_size_out, nbytes_uncomp, nbytes_out;
char* in_buf = *buf;
void *out_buf;
if (cd_nelmts < 3) {
PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK,
"Not enough parameters.");
return 0;
}
elem_size = cd_values[2];
// User specified block size.
if (cd_nelmts > 3) block_size = cd_values[3];
if (block_size == 0) block_size = bshuf_default_block_size(elem_size);
// Compression in addition to bitshiffle.
if (cd_nelmts > 4 && cd_values[4] == BSHUF_H5_COMPRESS_LZ4) {
if (flags & H5Z_FLAG_REVERSE) {
// First eight bytes is the number of bytes in the output buffer,
// little endian.
nbytes_uncomp = bshuf_read_uint64_BE(in_buf);
// Override the block size with the one read from the header.
block_size = bshuf_read_uint32_BE((const char*) in_buf + 8) / elem_size;
// Skip over the header.
in_buf += 12;
buf_size_out = nbytes_uncomp;
} else {
nbytes_uncomp = nbytes;
buf_size_out = bshuf_compress_lz4_bound(nbytes_uncomp / elem_size,
elem_size, block_size) + 12;
}
} else {
nbytes_uncomp = nbytes;
buf_size_out = nbytes;
}
// TODO, remove this restriction by memcopying the extra.
if (nbytes_uncomp % elem_size) {
PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK,
"Non integer number of elements.");
return 0;
}
size = nbytes_uncomp / elem_size;
out_buf = malloc(buf_size_out);
if (out_buf == NULL) {
PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK,
"Could not allocate output buffer.");
return 0;
}
if (cd_nelmts > 4 && cd_values[4] == BSHUF_H5_COMPRESS_LZ4) {
if (flags & H5Z_FLAG_REVERSE) {
// Bit unshuffle/decompress.
err = bshuf_decompress_lz4(in_buf, out_buf, size, elem_size, block_size);
nbytes_out = nbytes_uncomp;
} else {
// Bit shuffle/compress.
// Write the header, described in
// http://www.hdfgroup.org/services/filters/HDF5_LZ4.pdf.
// Techincally we should be using signed integers instead of
// unsigned ones, however for valid inputs (positive numbers) these
// have the same representation.
bshuf_write_uint64_BE(out_buf, nbytes_uncomp);
bshuf_write_uint32_BE((char*) out_buf + 8, block_size * elem_size);
err = bshuf_compress_lz4(in_buf, (char*) out_buf + 12, size,
elem_size, block_size); nbytes_out = err + 12; } } else {
if (flags & H5Z_FLAG_REVERSE) {
// Bit unshuffle.
err = bshuf_bitunshuffle(in_buf, out_buf, size, elem_size,
block_size); } else {
// Bit shuffle.
err = bshuf_bitshuffle(in_buf, out_buf, size, elem_size,
block_size); } nbytes_out = nbytes; }
//printf("nb_in %d, nb_uncomp %d, nb_out %d, buf_out %d, block %d\n",
//nbytes, nbytes_uncomp, nbytes_out, buf_size_out, block_size);
if (err < 0) {
sprintf(msg, "Error in bitshuffle with error code %d.", err);
PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK, msg);
free(out_buf);
return 0;
} else {
free(*buf);
*buf = out_buf;
*buf_size = buf_size_out;
return nbytes_out;
}
}
H5Z_class_t bshuf_H5Filter[1] = {{
H5Z_CLASS_T_VERS,
(H5Z_filter_t)(BSHUF_H5FILTER),
1, 1,
"bitshuffle; see https://github.com/kiyo-masui/bitshuffle",
NULL,
(H5Z_set_local_func_t)(bshuf_h5_set_local),
(H5Z_func_t)(bshuf_h5_filter)
}};
int bshuf_register_h5filter(void){
int retval;
retval = H5Zregister(bshuf_H5Filter);
if(retval<0){
PUSH_ERR("bshuf_register_h5filter",
H5E_CANTREGISTER, "Can't register bitshuffle filter");
}
return retval;
}
+59
View File
@@ -0,0 +1,59 @@
/*
* Bitshuffle HDF5 filter
*
* This file is part of Bitshuffle
* Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
* Website: http://www.github.com/kiyo-masui/bitshuffle
* Created: 2014
*
* See LICENSE file for details about copyright and rights to use.
*
*
* Header File
*
* Filter Options
* --------------
* block_size (option slot 0) : interger (optional)
* What block size to use (in elements not bytes). Default is 0,
* for which bitshuffle will pick a block size with a target of 8kb.
* Compression (option slot 1) : 0 or BSHUF_H5_COMPRESS_LZ4
* Whether to apply LZ4 compression to the data after bitshuffling.
* This is much faster than applying compression as a second filter
* because it is done when the small block of data is already in the
* L1 cache.
*
* For LZ4 compression, the compressed format of the data is the same as
* for the normal LZ4 filter described in
* http://www.hdfgroup.org/services/filters/HDF5_LZ4.pdf.
*
*/
#ifndef BSHUF_H5FILTER_H
#define BSHUF_H5FILTER_H
#define H5Z_class_t_vers 2
#include "hdf5.h"
#define BSHUF_H5FILTER 32008
#define BSHUF_H5_COMPRESS_LZ4 2
extern H5Z_class_t bshuf_H5Filter[1];
/* ---- bshuf_register_h5filter ----
*
* Register the bitshuffle HDF5 filter within the HDF5 library.
*
* Call this before using the bitshuffle HDF5 filter from C unless
* using dynamically loaded filters.
*
*/
int bshuf_register_h5filter(void);
#endif // BSHUF_H5FILTER_H