Fixes to allow bitshuffle-lz4 to compile with -std=c89

Removes all "//" style comments, adds a missing typedef
for "int16_t" (although these are not checked for correct width)
and removes duplicated "intX_t" typedefs.

It would be preferable to detect GCC (with its extensions) and use
its definitions of fixed-with integers if not compiling with C99.
This commit is contained in:
Charles Mita
2018-08-17 11:02:36 +01:00
parent 369948795b
commit 38660b17fd
8 changed files with 50 additions and 63 deletions
+7 -7
View File
@@ -18,12 +18,12 @@
#include <string.h>
// Constants.
// Use fast decompression instead of safe decompression for LZ4.
#define BSHUF_LZ4_DECOMPRESS_FAST
// Macros.
#define CHECK_ERR_FREE_LZ(count, buf) if (count < 0) { \
free(buf); return count - 1000; }
@@ -138,13 +138,13 @@ size_t bshuf_compress_lz4_bound(const size_t size,
}
if (block_size % BSHUF_BLOCKED_MULT) return -81;
// Note that each block gets a 4 byte header.
// Size of full blocks.
bound = (LZ4_compressBound(block_size * elem_size) + 4) * (size / block_size);
// Size of partial blocks, if any.
leftover = ((size % block_size) / BSHUF_BLOCKED_MULT) * BSHUF_BLOCKED_MULT;
if (leftover) bound += LZ4_compressBound(leftover * elem_size) + 4;
// Size of uncompressed data not fitting into any blocks.
bound += (size % BSHUF_BLOCKED_MULT) * elem_size;
return bound;
}
+2 -2
View File
@@ -117,7 +117,7 @@ int64_t bshuf_decompress_lz4(const void* in, void* out, const size_t size,
const size_t elem_size, size_t block_size);
#ifdef __cplusplus
} // extern "C"
}
#endif
#endif // BITSHUFFLE_H
#endif
+26 -27
View File
@@ -25,7 +25,7 @@
#endif
// Conditional includes for SSE2 and AVX2.
#ifdef USEAVX2
#include <immintrin.h>
#elif defined USESSE2
@@ -33,7 +33,7 @@
#endif
// Macros.
#define CHECK_MULT_EIGHT(n) if (n % 8) return -80;
#define MAX(X,Y) ((X) > (Y) ? (X) : (Y))
@@ -131,8 +131,8 @@ int64_t bshuf_trans_byte_elem_remainder(const void* in, void* out, const size_t
CHECK_MULT_EIGHT(start);
if (size > start) {
// ii loop separated into 2 loops so the compiler can unroll
// the inner one.
for (ii = start; ii + 7 < size; ii += 8) {
for (jj = 0; jj < elem_size; jj++) {
for (kk = 0; kk < 8; kk++) {
@@ -351,7 +351,7 @@ int64_t bshuf_untrans_bit_elem_scal(const void* in, void* out, const size_t size
/* ---- Worker code that uses SSE2 ----
*
* The following code makes use of the SSE2 instruction set and specialized
* 16 byte registers. The SSE2 instructions are present on modern x86
* 16 byte registers. The SSE2 instructions are present on modern x86
* processors. The first Intel processor microarchitecture supporting SSE2 was
* Pentium 4 (2000).
*
@@ -512,7 +512,7 @@ int64_t bshuf_trans_byte_elem_SSE(const void* in, void* out, const size_t size,
int64_t count;
// Trivial cases: power of 2 bytes.
switch (elem_size) {
case 1:
count = bshuf_copy(in, out, size, elem_size);
@@ -528,14 +528,14 @@ int64_t bshuf_trans_byte_elem_SSE(const void* in, void* out, const size_t size,
return count;
}
// Worst case: odd number of bytes. Turns out that this is faster for
// (odd * 2) byte elements as well (hence % 4).
if (elem_size % 4) {
count = bshuf_trans_byte_elem_scal(in, out, size, elem_size);
return count;
}
// Multiple of power of 2: transpose hierarchically.
{
size_t nchunk_elem;
void* tmp_buf = malloc(size * elem_size);
@@ -554,7 +554,7 @@ int64_t bshuf_trans_byte_elem_SSE(const void* in, void* out, const size_t size,
size * nchunk_elem);
bshuf_trans_elem(tmp_buf, out, 4, nchunk_elem, size);
} else {
// Not used since scalar algorithm is faster.
nchunk_elem = elem_size / 2;
TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int16_t);
count = bshuf_trans_byte_elem_SSE_16(out, tmp_buf,
@@ -687,8 +687,8 @@ int64_t bshuf_trans_byte_bitrow_SSE(const void* in, void* out, const size_t size
g1 = _mm_unpacklo_epi32(g0, h0);
h1 = _mm_unpackhi_epi32(g0, h0);
// We don't have a storeh instruction for integers, so interpret
// as a float. Have a storel (_mm_storel_epi64).
as = (__m128 *) &a1;
bs = (__m128 *) &b1;
cs = (__m128 *) &c1;
@@ -737,8 +737,8 @@ int64_t bshuf_shuffle_bit_eightelem_SSE(const void* in, void* out, const size_t
CHECK_MULT_EIGHT(size);
// With a bit of care, this could be written such that such that it is
// in_buf = out_buf safe.
const char* in_b = (const char*) in;
uint16_t* out_ui16 = (uint16_t*) out;
@@ -788,7 +788,7 @@ int64_t bshuf_untrans_bit_elem_SSE(const void* in, void* out, const size_t size,
return count;
}
#else // #ifdef USESSE2
#else
int64_t bshuf_untrans_bit_elem_SSE(const void* in, void* out, const size_t size,
@@ -842,7 +842,7 @@ int64_t bshuf_shuffle_bit_eightelem_SSE(const void* in, void* out, const size_t
}
#endif // #ifdef USESSE2
#endif
/* ---- Code that requires AVX2. Intel Haswell (2013) and later. ---- */
@@ -1014,8 +1014,8 @@ int64_t bshuf_shuffle_bit_eightelem_AVX(const void* in, void* out, const size_t
CHECK_MULT_EIGHT(size);
// With a bit of care, this could be written such that such that it is
// in_buf = out_buf safe.
const char* in_b = (const char*) in;
char* out_b = (char*) out;
@@ -1065,7 +1065,7 @@ int64_t bshuf_untrans_bit_elem_AVX(const void* in, void* out, const size_t size,
}
#else // #ifdef USEAVX2
#else
int64_t bshuf_trans_bit_byte_AVX(const void* in, void* out, const size_t size,
const size_t elem_size) {
@@ -1096,12 +1096,12 @@ int64_t bshuf_untrans_bit_elem_AVX(const void* in, void* out, const size_t size,
return -12;
}
#endif // #ifdef USEAVX2
#endif
/* ---- Drivers selecting best instruction set at compile time. ---- */
int64_t bshuf_trans_bit_elem(const void* in, void* out, const size_t size,
int64_t bshuf_trans_bit_elem(const void* in, void* out, const size_t size,
const size_t elem_size) {
int64_t count;
@@ -1116,7 +1116,7 @@ int64_t bshuf_trans_bit_elem(const void* in, void* out, const size_t size,
}
int64_t bshuf_untrans_bit_elem(const void* in, void* out, const size_t size,
int64_t bshuf_untrans_bit_elem(const void* in, void* out, const size_t size,
const size_t elem_size) {
int64_t count;
@@ -1178,7 +1178,6 @@ int64_t bshuf_blocked_wrap_fun(bshufBlockFunDef fun, const void* in, void* out,
if (err < 0) return err;
leftover_bytes = size % BSHUF_BLOCKED_MULT * elem_size;
//this_iter;
last_in = (char *) ioc_get_in(&C, &this_iter);
ioc_set_next_in(&C, &this_iter, (void *) (last_in + leftover_bytes));
last_out = (char *) ioc_get_out(&C, &this_iter);
@@ -1202,7 +1201,7 @@ int64_t bshuf_bitshuffle_block(ioc_chain *C_ptr, \
int64_t count;
in = ioc_get_in(C_ptr, &this_iter);
ioc_set_next_in(C_ptr, &this_iter,
(void*) ((char*) in + size * elem_size));
@@ -1297,11 +1296,11 @@ uint32_t bshuf_read_uint32_BE(const void* buf) {
*/
size_t bshuf_default_block_size(const size_t elem_size) {
// This function needs to be absolutely stable between versions.
// Otherwise encoded data will not be decodable.
size_t block_size = BSHUF_TARGET_BLOCK_SIZE_B / elem_size;
// Ensure it is a required multiple.
block_size = (block_size / BSHUF_BLOCKED_MULT) * BSHUF_BLOCKED_MULT;
return MAX(block_size, BSHUF_MIN_RECOMMEND_BLOCK);
}
+6 -5
View File
@@ -28,14 +28,15 @@
#ifndef BITSHUFFLE_CORE_H
#define BITSHUFFLE_CORE_H
// We assume GNU g++ defining `__cplusplus` has stdint.h
#if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199900L) || defined(__cplusplus)
#include <stdint.h>
#else
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef signed short int16_t;
typedef unsigned int uint32_t;
typedef signed int int32_t;
typedef signed int int32_t;
typedef unsigned long long uint64_t;
typedef long long int64_t;
#endif
@@ -43,7 +44,7 @@
#include <stdlib.h>
// These are usually set in the setup.py.
#ifndef BSHUF_VERSION_MAJOR
#define BSHUF_VERSION_MAJOR 0
#define BSHUF_VERSION_MINOR 3
@@ -150,7 +151,7 @@ int64_t bshuf_bitunshuffle(const void* in, void* out, const size_t size,
const size_t elem_size, size_t block_size);
#ifdef __cplusplus
} // extern "C"
}
#endif
#endif // BITSHUFFLE_CORE_H
#endif
+4 -16
View File
@@ -13,31 +13,19 @@
#ifndef BITSHUFFLE_INTERNALS_H
#define BITSHUFFLE_INTERNALS_H
// We assume GNU g++ defining `__cplusplus` has stdint.h
#if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199900L) || defined(__cplusplus)
#include <stdint.h>
#else
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
typedef signed int int32_t;
typedef unsigned long long uint64_t;
typedef long long int64_t;
#endif
#include <stdlib.h>
#include "iochain.h"
// Constants.
#ifndef BSHUF_MIN_RECOMMEND_BLOCK
#define BSHUF_MIN_RECOMMEND_BLOCK 128
#define BSHUF_BLOCKED_MULT 8 // Block sizes must be multiple of this.
#define BSHUF_BLOCKED_MULT 8
#define BSHUF_TARGET_BLOCK_SIZE_B 8192
#endif
// Macros.
#define CHECK_ERR_FREE(count, buf) if (count < 0) { free(buf); return count; }
@@ -69,7 +57,7 @@ int64_t bshuf_blocked_wrap_fun(bshufBlockFunDef fun, const void* in, void* out,
const size_t size, const size_t elem_size, size_t block_size);
#ifdef __cplusplus
} // extern "C"
}
#endif
#endif // BITSHUFFLE_INTERNALS_H
#endif
+3 -3
View File
@@ -81,9 +81,9 @@ void ioc_set_next_out(ioc_chain *C, size_t *this_iter, void* out_ptr) {
C->out_pl[(*this_iter + 1) % IOC_SIZE].ptr = out_ptr;
#ifdef _OPENMP
omp_unset_lock(&(C->out_pl[(*this_iter + 1) % IOC_SIZE].lock));
// *in_pl[this_iter]* lock released at the end of the iteration to avoid being
// overtaken by previous threads and having *out_pl[this_iter]* corrupted.
// Especially worried about thread 0, iteration 0.
omp_unset_lock(&(C->in_pl[(*this_iter) % IOC_SIZE].lock));
#endif
}
+2 -2
View File
@@ -25,7 +25,7 @@
* Usage
* -----
* - Call `ioc_init` in serial block.
* - Each thread should create a local variable *size_t this_iter* and
* - Each thread should create a local variable *size_t this_iter* and
* pass its address to all function calls. Its value will be set
* inside the functions and is used to identify the thread.
* - Each thread must call each of the `ioc_get*` and `ioc_set*` methods
@@ -90,5 +90,5 @@ void ioc_set_next_in(ioc_chain *C, size_t* this_iter, void* in_ptr);
void * ioc_get_out(ioc_chain *C, size_t *this_iter);
void ioc_set_next_out(ioc_chain *C, size_t *this_iter, void* out_ptr);
#endif // IOCHAIN_H
#endif
-1
View File
@@ -825,7 +825,6 @@ _next_match:
/* Match description too long : reduce it */
matchLength = (15-1) + (oMaxMatch-op) * 255;
}
//printf("offset %5i, matchLength%5i \n", (int)(ip-match), matchLength + MINMATCH);
ip += MINMATCH + matchLength;
if (matchLength>=ML_MASK)