Fixes to allow bitshuffle-lz4 to compile with -std=c89
Removes all "//" style comments, adds a missing typedef for "int16_t" (although these are not checked for correct width) and removes duplicated "intX_t" typedefs. It would be preferable to detect GCC (with its extensions) and use its definitions of fixed-with integers if not compiling with C99.
This commit is contained in:
@@ -18,12 +18,12 @@
|
||||
#include <string.h>
|
||||
|
||||
|
||||
// Constants.
|
||||
// Use fast decompression instead of safe decompression for LZ4.
|
||||
|
||||
|
||||
#define BSHUF_LZ4_DECOMPRESS_FAST
|
||||
|
||||
|
||||
// Macros.
|
||||
|
||||
#define CHECK_ERR_FREE_LZ(count, buf) if (count < 0) { \
|
||||
free(buf); return count - 1000; }
|
||||
|
||||
@@ -138,13 +138,13 @@ size_t bshuf_compress_lz4_bound(const size_t size,
|
||||
}
|
||||
if (block_size % BSHUF_BLOCKED_MULT) return -81;
|
||||
|
||||
// Note that each block gets a 4 byte header.
|
||||
// Size of full blocks.
|
||||
|
||||
|
||||
bound = (LZ4_compressBound(block_size * elem_size) + 4) * (size / block_size);
|
||||
// Size of partial blocks, if any.
|
||||
|
||||
leftover = ((size % block_size) / BSHUF_BLOCKED_MULT) * BSHUF_BLOCKED_MULT;
|
||||
if (leftover) bound += LZ4_compressBound(leftover * elem_size) + 4;
|
||||
// Size of uncompressed data not fitting into any blocks.
|
||||
|
||||
bound += (size % BSHUF_BLOCKED_MULT) * elem_size;
|
||||
return bound;
|
||||
}
|
||||
|
||||
@@ -117,7 +117,7 @@ int64_t bshuf_decompress_lz4(const void* in, void* out, const size_t size,
|
||||
const size_t elem_size, size_t block_size);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // BITSHUFFLE_H
|
||||
#endif
|
||||
|
||||
+26
-27
@@ -25,7 +25,7 @@
|
||||
#endif
|
||||
|
||||
|
||||
// Conditional includes for SSE2 and AVX2.
|
||||
|
||||
#ifdef USEAVX2
|
||||
#include <immintrin.h>
|
||||
#elif defined USESSE2
|
||||
@@ -33,7 +33,7 @@
|
||||
#endif
|
||||
|
||||
|
||||
// Macros.
|
||||
|
||||
#define CHECK_MULT_EIGHT(n) if (n % 8) return -80;
|
||||
#define MAX(X,Y) ((X) > (Y) ? (X) : (Y))
|
||||
|
||||
@@ -131,8 +131,8 @@ int64_t bshuf_trans_byte_elem_remainder(const void* in, void* out, const size_t
|
||||
CHECK_MULT_EIGHT(start);
|
||||
|
||||
if (size > start) {
|
||||
// ii loop separated into 2 loops so the compiler can unroll
|
||||
// the inner one.
|
||||
|
||||
|
||||
for (ii = start; ii + 7 < size; ii += 8) {
|
||||
for (jj = 0; jj < elem_size; jj++) {
|
||||
for (kk = 0; kk < 8; kk++) {
|
||||
@@ -351,7 +351,7 @@ int64_t bshuf_untrans_bit_elem_scal(const void* in, void* out, const size_t size
|
||||
/* ---- Worker code that uses SSE2 ----
|
||||
*
|
||||
* The following code makes use of the SSE2 instruction set and specialized
|
||||
* 16 byte registers. The SSE2 instructions are present on modern x86
|
||||
* 16 byte registers. The SSE2 instructions are present on modern x86
|
||||
* processors. The first Intel processor microarchitecture supporting SSE2 was
|
||||
* Pentium 4 (2000).
|
||||
*
|
||||
@@ -512,7 +512,7 @@ int64_t bshuf_trans_byte_elem_SSE(const void* in, void* out, const size_t size,
|
||||
|
||||
int64_t count;
|
||||
|
||||
// Trivial cases: power of 2 bytes.
|
||||
|
||||
switch (elem_size) {
|
||||
case 1:
|
||||
count = bshuf_copy(in, out, size, elem_size);
|
||||
@@ -528,14 +528,14 @@ int64_t bshuf_trans_byte_elem_SSE(const void* in, void* out, const size_t size,
|
||||
return count;
|
||||
}
|
||||
|
||||
// Worst case: odd number of bytes. Turns out that this is faster for
|
||||
// (odd * 2) byte elements as well (hence % 4).
|
||||
|
||||
|
||||
if (elem_size % 4) {
|
||||
count = bshuf_trans_byte_elem_scal(in, out, size, elem_size);
|
||||
return count;
|
||||
}
|
||||
|
||||
// Multiple of power of 2: transpose hierarchically.
|
||||
|
||||
{
|
||||
size_t nchunk_elem;
|
||||
void* tmp_buf = malloc(size * elem_size);
|
||||
@@ -554,7 +554,7 @@ int64_t bshuf_trans_byte_elem_SSE(const void* in, void* out, const size_t size,
|
||||
size * nchunk_elem);
|
||||
bshuf_trans_elem(tmp_buf, out, 4, nchunk_elem, size);
|
||||
} else {
|
||||
// Not used since scalar algorithm is faster.
|
||||
|
||||
nchunk_elem = elem_size / 2;
|
||||
TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int16_t);
|
||||
count = bshuf_trans_byte_elem_SSE_16(out, tmp_buf,
|
||||
@@ -687,8 +687,8 @@ int64_t bshuf_trans_byte_bitrow_SSE(const void* in, void* out, const size_t size
|
||||
g1 = _mm_unpacklo_epi32(g0, h0);
|
||||
h1 = _mm_unpackhi_epi32(g0, h0);
|
||||
|
||||
// We don't have a storeh instruction for integers, so interpret
|
||||
// as a float. Have a storel (_mm_storel_epi64).
|
||||
|
||||
|
||||
as = (__m128 *) &a1;
|
||||
bs = (__m128 *) &b1;
|
||||
cs = (__m128 *) &c1;
|
||||
@@ -737,8 +737,8 @@ int64_t bshuf_shuffle_bit_eightelem_SSE(const void* in, void* out, const size_t
|
||||
|
||||
CHECK_MULT_EIGHT(size);
|
||||
|
||||
// With a bit of care, this could be written such that such that it is
|
||||
// in_buf = out_buf safe.
|
||||
|
||||
|
||||
const char* in_b = (const char*) in;
|
||||
uint16_t* out_ui16 = (uint16_t*) out;
|
||||
|
||||
@@ -788,7 +788,7 @@ int64_t bshuf_untrans_bit_elem_SSE(const void* in, void* out, const size_t size,
|
||||
return count;
|
||||
}
|
||||
|
||||
#else // #ifdef USESSE2
|
||||
#else
|
||||
|
||||
|
||||
int64_t bshuf_untrans_bit_elem_SSE(const void* in, void* out, const size_t size,
|
||||
@@ -842,7 +842,7 @@ int64_t bshuf_shuffle_bit_eightelem_SSE(const void* in, void* out, const size_t
|
||||
}
|
||||
|
||||
|
||||
#endif // #ifdef USESSE2
|
||||
#endif
|
||||
|
||||
|
||||
/* ---- Code that requires AVX2. Intel Haswell (2013) and later. ---- */
|
||||
@@ -1014,8 +1014,8 @@ int64_t bshuf_shuffle_bit_eightelem_AVX(const void* in, void* out, const size_t
|
||||
|
||||
CHECK_MULT_EIGHT(size);
|
||||
|
||||
// With a bit of care, this could be written such that such that it is
|
||||
// in_buf = out_buf safe.
|
||||
|
||||
|
||||
const char* in_b = (const char*) in;
|
||||
char* out_b = (char*) out;
|
||||
|
||||
@@ -1065,7 +1065,7 @@ int64_t bshuf_untrans_bit_elem_AVX(const void* in, void* out, const size_t size,
|
||||
}
|
||||
|
||||
|
||||
#else // #ifdef USEAVX2
|
||||
#else
|
||||
|
||||
int64_t bshuf_trans_bit_byte_AVX(const void* in, void* out, const size_t size,
|
||||
const size_t elem_size) {
|
||||
@@ -1096,12 +1096,12 @@ int64_t bshuf_untrans_bit_elem_AVX(const void* in, void* out, const size_t size,
|
||||
return -12;
|
||||
}
|
||||
|
||||
#endif // #ifdef USEAVX2
|
||||
#endif
|
||||
|
||||
|
||||
/* ---- Drivers selecting best instruction set at compile time. ---- */
|
||||
|
||||
int64_t bshuf_trans_bit_elem(const void* in, void* out, const size_t size,
|
||||
int64_t bshuf_trans_bit_elem(const void* in, void* out, const size_t size,
|
||||
const size_t elem_size) {
|
||||
|
||||
int64_t count;
|
||||
@@ -1116,7 +1116,7 @@ int64_t bshuf_trans_bit_elem(const void* in, void* out, const size_t size,
|
||||
}
|
||||
|
||||
|
||||
int64_t bshuf_untrans_bit_elem(const void* in, void* out, const size_t size,
|
||||
int64_t bshuf_untrans_bit_elem(const void* in, void* out, const size_t size,
|
||||
const size_t elem_size) {
|
||||
|
||||
int64_t count;
|
||||
@@ -1178,7 +1178,6 @@ int64_t bshuf_blocked_wrap_fun(bshufBlockFunDef fun, const void* in, void* out,
|
||||
if (err < 0) return err;
|
||||
|
||||
leftover_bytes = size % BSHUF_BLOCKED_MULT * elem_size;
|
||||
//this_iter;
|
||||
last_in = (char *) ioc_get_in(&C, &this_iter);
|
||||
ioc_set_next_in(&C, &this_iter, (void *) (last_in + leftover_bytes));
|
||||
last_out = (char *) ioc_get_out(&C, &this_iter);
|
||||
@@ -1202,7 +1201,7 @@ int64_t bshuf_bitshuffle_block(ioc_chain *C_ptr, \
|
||||
int64_t count;
|
||||
|
||||
|
||||
|
||||
|
||||
in = ioc_get_in(C_ptr, &this_iter);
|
||||
ioc_set_next_in(C_ptr, &this_iter,
|
||||
(void*) ((char*) in + size * elem_size));
|
||||
@@ -1297,11 +1296,11 @@ uint32_t bshuf_read_uint32_BE(const void* buf) {
|
||||
*/
|
||||
|
||||
size_t bshuf_default_block_size(const size_t elem_size) {
|
||||
// This function needs to be absolutely stable between versions.
|
||||
// Otherwise encoded data will not be decodable.
|
||||
|
||||
|
||||
|
||||
size_t block_size = BSHUF_TARGET_BLOCK_SIZE_B / elem_size;
|
||||
// Ensure it is a required multiple.
|
||||
|
||||
block_size = (block_size / BSHUF_BLOCKED_MULT) * BSHUF_BLOCKED_MULT;
|
||||
return MAX(block_size, BSHUF_MIN_RECOMMEND_BLOCK);
|
||||
}
|
||||
|
||||
@@ -28,14 +28,15 @@
|
||||
#ifndef BITSHUFFLE_CORE_H
|
||||
#define BITSHUFFLE_CORE_H
|
||||
|
||||
// We assume GNU g++ defining `__cplusplus` has stdint.h
|
||||
|
||||
#if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199900L) || defined(__cplusplus)
|
||||
#include <stdint.h>
|
||||
#else
|
||||
typedef unsigned char uint8_t;
|
||||
typedef unsigned short uint16_t;
|
||||
typedef signed short int16_t;
|
||||
typedef unsigned int uint32_t;
|
||||
typedef signed int int32_t;
|
||||
typedef signed int int32_t;
|
||||
typedef unsigned long long uint64_t;
|
||||
typedef long long int64_t;
|
||||
#endif
|
||||
@@ -43,7 +44,7 @@
|
||||
#include <stdlib.h>
|
||||
|
||||
|
||||
// These are usually set in the setup.py.
|
||||
|
||||
#ifndef BSHUF_VERSION_MAJOR
|
||||
#define BSHUF_VERSION_MAJOR 0
|
||||
#define BSHUF_VERSION_MINOR 3
|
||||
@@ -150,7 +151,7 @@ int64_t bshuf_bitunshuffle(const void* in, void* out, const size_t size,
|
||||
const size_t elem_size, size_t block_size);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // BITSHUFFLE_CORE_H
|
||||
#endif
|
||||
|
||||
@@ -13,31 +13,19 @@
|
||||
#ifndef BITSHUFFLE_INTERNALS_H
|
||||
#define BITSHUFFLE_INTERNALS_H
|
||||
|
||||
// We assume GNU g++ defining `__cplusplus` has stdint.h
|
||||
#if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199900L) || defined(__cplusplus)
|
||||
#include <stdint.h>
|
||||
#else
|
||||
typedef unsigned char uint8_t;
|
||||
typedef unsigned short uint16_t;
|
||||
typedef unsigned int uint32_t;
|
||||
typedef signed int int32_t;
|
||||
typedef unsigned long long uint64_t;
|
||||
typedef long long int64_t;
|
||||
#endif
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "iochain.h"
|
||||
|
||||
|
||||
// Constants.
|
||||
#ifndef BSHUF_MIN_RECOMMEND_BLOCK
|
||||
#define BSHUF_MIN_RECOMMEND_BLOCK 128
|
||||
#define BSHUF_BLOCKED_MULT 8 // Block sizes must be multiple of this.
|
||||
#define BSHUF_BLOCKED_MULT 8
|
||||
#define BSHUF_TARGET_BLOCK_SIZE_B 8192
|
||||
#endif
|
||||
|
||||
|
||||
// Macros.
|
||||
|
||||
#define CHECK_ERR_FREE(count, buf) if (count < 0) { free(buf); return count; }
|
||||
|
||||
|
||||
@@ -69,7 +57,7 @@ int64_t bshuf_blocked_wrap_fun(bshufBlockFunDef fun, const void* in, void* out,
|
||||
const size_t size, const size_t elem_size, size_t block_size);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // BITSHUFFLE_INTERNALS_H
|
||||
#endif
|
||||
|
||||
+3
-3
@@ -81,9 +81,9 @@ void ioc_set_next_out(ioc_chain *C, size_t *this_iter, void* out_ptr) {
|
||||
C->out_pl[(*this_iter + 1) % IOC_SIZE].ptr = out_ptr;
|
||||
#ifdef _OPENMP
|
||||
omp_unset_lock(&(C->out_pl[(*this_iter + 1) % IOC_SIZE].lock));
|
||||
// *in_pl[this_iter]* lock released at the end of the iteration to avoid being
|
||||
// overtaken by previous threads and having *out_pl[this_iter]* corrupted.
|
||||
// Especially worried about thread 0, iteration 0.
|
||||
|
||||
|
||||
|
||||
omp_unset_lock(&(C->in_pl[(*this_iter) % IOC_SIZE].lock));
|
||||
#endif
|
||||
}
|
||||
|
||||
+2
-2
@@ -25,7 +25,7 @@
|
||||
* Usage
|
||||
* -----
|
||||
* - Call `ioc_init` in serial block.
|
||||
* - Each thread should create a local variable *size_t this_iter* and
|
||||
* - Each thread should create a local variable *size_t this_iter* and
|
||||
* pass its address to all function calls. Its value will be set
|
||||
* inside the functions and is used to identify the thread.
|
||||
* - Each thread must call each of the `ioc_get*` and `ioc_set*` methods
|
||||
@@ -90,5 +90,5 @@ void ioc_set_next_in(ioc_chain *C, size_t* this_iter, void* in_ptr);
|
||||
void * ioc_get_out(ioc_chain *C, size_t *this_iter);
|
||||
void ioc_set_next_out(ioc_chain *C, size_t *this_iter, void* out_ptr);
|
||||
|
||||
#endif // IOCHAIN_H
|
||||
#endif
|
||||
|
||||
|
||||
@@ -825,7 +825,6 @@ _next_match:
|
||||
/* Match description too long : reduce it */
|
||||
matchLength = (15-1) + (oMaxMatch-op) * 255;
|
||||
}
|
||||
//printf("offset %5i, matchLength%5i \n", (int)(ip-match), matchLength + MINMATCH);
|
||||
ip += MINMATCH + matchLength;
|
||||
|
||||
if (matchLength>=ML_MASK)
|
||||
|
||||
Reference in New Issue
Block a user