From 38660b17fdba1611ea55fe411263e9c4d3b2f9c1 Mon Sep 17 00:00:00 2001 From: Charles Mita Date: Fri, 17 Aug 2018 11:02:36 +0100 Subject: [PATCH] Fixes to allow bitshuffle-lz4 to compile with -std=c89 Removes all "//" style comments, adds a missing typedef for "int16_t" (although these are not checked for correct width) and removes duplicated "intX_t" typedefs. It would be preferable to detect GCC (with its extensions) and use its definitions of fixed-with integers if not compiling with C99. --- bslz4/src/bitshuffle.c | 14 ++++----- bslz4/src/bitshuffle.h | 4 +-- bslz4/src/bitshuffle_core.c | 53 ++++++++++++++++---------------- bslz4/src/bitshuffle_core.h | 11 ++++--- bslz4/src/bitshuffle_internals.h | 20 +++--------- bslz4/src/iochain.c | 6 ++-- bslz4/src/iochain.h | 4 +-- bslz4/src/lz4.c | 1 - 8 files changed, 50 insertions(+), 63 deletions(-) diff --git a/bslz4/src/bitshuffle.c b/bslz4/src/bitshuffle.c index 54ff045..3b8815b 100644 --- a/bslz4/src/bitshuffle.c +++ b/bslz4/src/bitshuffle.c @@ -18,12 +18,12 @@ #include -// Constants. -// Use fast decompression instead of safe decompression for LZ4. + + #define BSHUF_LZ4_DECOMPRESS_FAST -// Macros. + #define CHECK_ERR_FREE_LZ(count, buf) if (count < 0) { \ free(buf); return count - 1000; } @@ -138,13 +138,13 @@ size_t bshuf_compress_lz4_bound(const size_t size, } if (block_size % BSHUF_BLOCKED_MULT) return -81; - // Note that each block gets a 4 byte header. - // Size of full blocks. + + bound = (LZ4_compressBound(block_size * elem_size) + 4) * (size / block_size); - // Size of partial blocks, if any. + leftover = ((size % block_size) / BSHUF_BLOCKED_MULT) * BSHUF_BLOCKED_MULT; if (leftover) bound += LZ4_compressBound(leftover * elem_size) + 4; - // Size of uncompressed data not fitting into any blocks. + bound += (size % BSHUF_BLOCKED_MULT) * elem_size; return bound; } diff --git a/bslz4/src/bitshuffle.h b/bslz4/src/bitshuffle.h index 3df95f4..dc87e9e 100644 --- a/bslz4/src/bitshuffle.h +++ b/bslz4/src/bitshuffle.h @@ -117,7 +117,7 @@ int64_t bshuf_decompress_lz4(const void* in, void* out, const size_t size, const size_t elem_size, size_t block_size); #ifdef __cplusplus -} // extern "C" +} #endif -#endif // BITSHUFFLE_H +#endif diff --git a/bslz4/src/bitshuffle_core.c b/bslz4/src/bitshuffle_core.c index 583e4fe..1a184d6 100644 --- a/bslz4/src/bitshuffle_core.c +++ b/bslz4/src/bitshuffle_core.c @@ -25,7 +25,7 @@ #endif -// Conditional includes for SSE2 and AVX2. + #ifdef USEAVX2 #include #elif defined USESSE2 @@ -33,7 +33,7 @@ #endif -// Macros. + #define CHECK_MULT_EIGHT(n) if (n % 8) return -80; #define MAX(X,Y) ((X) > (Y) ? (X) : (Y)) @@ -131,8 +131,8 @@ int64_t bshuf_trans_byte_elem_remainder(const void* in, void* out, const size_t CHECK_MULT_EIGHT(start); if (size > start) { - // ii loop separated into 2 loops so the compiler can unroll - // the inner one. + + for (ii = start; ii + 7 < size; ii += 8) { for (jj = 0; jj < elem_size; jj++) { for (kk = 0; kk < 8; kk++) { @@ -351,7 +351,7 @@ int64_t bshuf_untrans_bit_elem_scal(const void* in, void* out, const size_t size /* ---- Worker code that uses SSE2 ---- * * The following code makes use of the SSE2 instruction set and specialized - * 16 byte registers. The SSE2 instructions are present on modern x86 + * 16 byte registers. The SSE2 instructions are present on modern x86 * processors. The first Intel processor microarchitecture supporting SSE2 was * Pentium 4 (2000). * @@ -512,7 +512,7 @@ int64_t bshuf_trans_byte_elem_SSE(const void* in, void* out, const size_t size, int64_t count; - // Trivial cases: power of 2 bytes. + switch (elem_size) { case 1: count = bshuf_copy(in, out, size, elem_size); @@ -528,14 +528,14 @@ int64_t bshuf_trans_byte_elem_SSE(const void* in, void* out, const size_t size, return count; } - // Worst case: odd number of bytes. Turns out that this is faster for - // (odd * 2) byte elements as well (hence % 4). + + if (elem_size % 4) { count = bshuf_trans_byte_elem_scal(in, out, size, elem_size); return count; } - // Multiple of power of 2: transpose hierarchically. + { size_t nchunk_elem; void* tmp_buf = malloc(size * elem_size); @@ -554,7 +554,7 @@ int64_t bshuf_trans_byte_elem_SSE(const void* in, void* out, const size_t size, size * nchunk_elem); bshuf_trans_elem(tmp_buf, out, 4, nchunk_elem, size); } else { - // Not used since scalar algorithm is faster. + nchunk_elem = elem_size / 2; TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int16_t); count = bshuf_trans_byte_elem_SSE_16(out, tmp_buf, @@ -687,8 +687,8 @@ int64_t bshuf_trans_byte_bitrow_SSE(const void* in, void* out, const size_t size g1 = _mm_unpacklo_epi32(g0, h0); h1 = _mm_unpackhi_epi32(g0, h0); - // We don't have a storeh instruction for integers, so interpret - // as a float. Have a storel (_mm_storel_epi64). + + as = (__m128 *) &a1; bs = (__m128 *) &b1; cs = (__m128 *) &c1; @@ -737,8 +737,8 @@ int64_t bshuf_shuffle_bit_eightelem_SSE(const void* in, void* out, const size_t CHECK_MULT_EIGHT(size); - // With a bit of care, this could be written such that such that it is - // in_buf = out_buf safe. + + const char* in_b = (const char*) in; uint16_t* out_ui16 = (uint16_t*) out; @@ -788,7 +788,7 @@ int64_t bshuf_untrans_bit_elem_SSE(const void* in, void* out, const size_t size, return count; } -#else // #ifdef USESSE2 +#else int64_t bshuf_untrans_bit_elem_SSE(const void* in, void* out, const size_t size, @@ -842,7 +842,7 @@ int64_t bshuf_shuffle_bit_eightelem_SSE(const void* in, void* out, const size_t } -#endif // #ifdef USESSE2 +#endif /* ---- Code that requires AVX2. Intel Haswell (2013) and later. ---- */ @@ -1014,8 +1014,8 @@ int64_t bshuf_shuffle_bit_eightelem_AVX(const void* in, void* out, const size_t CHECK_MULT_EIGHT(size); - // With a bit of care, this could be written such that such that it is - // in_buf = out_buf safe. + + const char* in_b = (const char*) in; char* out_b = (char*) out; @@ -1065,7 +1065,7 @@ int64_t bshuf_untrans_bit_elem_AVX(const void* in, void* out, const size_t size, } -#else // #ifdef USEAVX2 +#else int64_t bshuf_trans_bit_byte_AVX(const void* in, void* out, const size_t size, const size_t elem_size) { @@ -1096,12 +1096,12 @@ int64_t bshuf_untrans_bit_elem_AVX(const void* in, void* out, const size_t size, return -12; } -#endif // #ifdef USEAVX2 +#endif /* ---- Drivers selecting best instruction set at compile time. ---- */ -int64_t bshuf_trans_bit_elem(const void* in, void* out, const size_t size, +int64_t bshuf_trans_bit_elem(const void* in, void* out, const size_t size, const size_t elem_size) { int64_t count; @@ -1116,7 +1116,7 @@ int64_t bshuf_trans_bit_elem(const void* in, void* out, const size_t size, } -int64_t bshuf_untrans_bit_elem(const void* in, void* out, const size_t size, +int64_t bshuf_untrans_bit_elem(const void* in, void* out, const size_t size, const size_t elem_size) { int64_t count; @@ -1178,7 +1178,6 @@ int64_t bshuf_blocked_wrap_fun(bshufBlockFunDef fun, const void* in, void* out, if (err < 0) return err; leftover_bytes = size % BSHUF_BLOCKED_MULT * elem_size; - //this_iter; last_in = (char *) ioc_get_in(&C, &this_iter); ioc_set_next_in(&C, &this_iter, (void *) (last_in + leftover_bytes)); last_out = (char *) ioc_get_out(&C, &this_iter); @@ -1202,7 +1201,7 @@ int64_t bshuf_bitshuffle_block(ioc_chain *C_ptr, \ int64_t count; - + in = ioc_get_in(C_ptr, &this_iter); ioc_set_next_in(C_ptr, &this_iter, (void*) ((char*) in + size * elem_size)); @@ -1297,11 +1296,11 @@ uint32_t bshuf_read_uint32_BE(const void* buf) { */ size_t bshuf_default_block_size(const size_t elem_size) { - // This function needs to be absolutely stable between versions. - // Otherwise encoded data will not be decodable. + + size_t block_size = BSHUF_TARGET_BLOCK_SIZE_B / elem_size; - // Ensure it is a required multiple. + block_size = (block_size / BSHUF_BLOCKED_MULT) * BSHUF_BLOCKED_MULT; return MAX(block_size, BSHUF_MIN_RECOMMEND_BLOCK); } diff --git a/bslz4/src/bitshuffle_core.h b/bslz4/src/bitshuffle_core.h index 4516ef4..8996d91 100644 --- a/bslz4/src/bitshuffle_core.h +++ b/bslz4/src/bitshuffle_core.h @@ -28,14 +28,15 @@ #ifndef BITSHUFFLE_CORE_H #define BITSHUFFLE_CORE_H -// We assume GNU g++ defining `__cplusplus` has stdint.h + #if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199900L) || defined(__cplusplus) #include #else typedef unsigned char uint8_t; typedef unsigned short uint16_t; + typedef signed short int16_t; typedef unsigned int uint32_t; - typedef signed int int32_t; + typedef signed int int32_t; typedef unsigned long long uint64_t; typedef long long int64_t; #endif @@ -43,7 +44,7 @@ #include -// These are usually set in the setup.py. + #ifndef BSHUF_VERSION_MAJOR #define BSHUF_VERSION_MAJOR 0 #define BSHUF_VERSION_MINOR 3 @@ -150,7 +151,7 @@ int64_t bshuf_bitunshuffle(const void* in, void* out, const size_t size, const size_t elem_size, size_t block_size); #ifdef __cplusplus -} // extern "C" +} #endif -#endif // BITSHUFFLE_CORE_H +#endif diff --git a/bslz4/src/bitshuffle_internals.h b/bslz4/src/bitshuffle_internals.h index e039925..7b5b8a5 100644 --- a/bslz4/src/bitshuffle_internals.h +++ b/bslz4/src/bitshuffle_internals.h @@ -13,31 +13,19 @@ #ifndef BITSHUFFLE_INTERNALS_H #define BITSHUFFLE_INTERNALS_H -// We assume GNU g++ defining `__cplusplus` has stdint.h -#if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199900L) || defined(__cplusplus) -#include -#else - typedef unsigned char uint8_t; - typedef unsigned short uint16_t; - typedef unsigned int uint32_t; - typedef signed int int32_t; - typedef unsigned long long uint64_t; - typedef long long int64_t; -#endif #include #include "iochain.h" -// Constants. #ifndef BSHUF_MIN_RECOMMEND_BLOCK #define BSHUF_MIN_RECOMMEND_BLOCK 128 -#define BSHUF_BLOCKED_MULT 8 // Block sizes must be multiple of this. +#define BSHUF_BLOCKED_MULT 8 #define BSHUF_TARGET_BLOCK_SIZE_B 8192 #endif -// Macros. + #define CHECK_ERR_FREE(count, buf) if (count < 0) { free(buf); return count; } @@ -69,7 +57,7 @@ int64_t bshuf_blocked_wrap_fun(bshufBlockFunDef fun, const void* in, void* out, const size_t size, const size_t elem_size, size_t block_size); #ifdef __cplusplus -} // extern "C" +} #endif -#endif // BITSHUFFLE_INTERNALS_H +#endif diff --git a/bslz4/src/iochain.c b/bslz4/src/iochain.c index baa9729..5c5eddd 100644 --- a/bslz4/src/iochain.c +++ b/bslz4/src/iochain.c @@ -81,9 +81,9 @@ void ioc_set_next_out(ioc_chain *C, size_t *this_iter, void* out_ptr) { C->out_pl[(*this_iter + 1) % IOC_SIZE].ptr = out_ptr; #ifdef _OPENMP omp_unset_lock(&(C->out_pl[(*this_iter + 1) % IOC_SIZE].lock)); - // *in_pl[this_iter]* lock released at the end of the iteration to avoid being - // overtaken by previous threads and having *out_pl[this_iter]* corrupted. - // Especially worried about thread 0, iteration 0. + + + omp_unset_lock(&(C->in_pl[(*this_iter) % IOC_SIZE].lock)); #endif } diff --git a/bslz4/src/iochain.h b/bslz4/src/iochain.h index 4e225d1..6a398ff 100644 --- a/bslz4/src/iochain.h +++ b/bslz4/src/iochain.h @@ -25,7 +25,7 @@ * Usage * ----- * - Call `ioc_init` in serial block. - * - Each thread should create a local variable *size_t this_iter* and + * - Each thread should create a local variable *size_t this_iter* and * pass its address to all function calls. Its value will be set * inside the functions and is used to identify the thread. * - Each thread must call each of the `ioc_get*` and `ioc_set*` methods @@ -90,5 +90,5 @@ void ioc_set_next_in(ioc_chain *C, size_t* this_iter, void* in_ptr); void * ioc_get_out(ioc_chain *C, size_t *this_iter); void ioc_set_next_out(ioc_chain *C, size_t *this_iter, void* out_ptr); -#endif // IOCHAIN_H +#endif diff --git a/bslz4/src/lz4.c b/bslz4/src/lz4.c index 08cf6b5..31f489d 100644 --- a/bslz4/src/lz4.c +++ b/bslz4/src/lz4.c @@ -825,7 +825,6 @@ _next_match: /* Match description too long : reduce it */ matchLength = (15-1) + (oMaxMatch-op) * 255; } - //printf("offset %5i, matchLength%5i \n", (int)(ip-match), matchLength + MINMATCH); ip += MINMATCH + matchLength; if (matchLength>=ML_MASK)