Fixes to allow bitshuffle-lz4 to compile with -std=c89

Removes all "//" style comments, adds a missing typedef for "int16_t" (although these are not checked for correct width) and removes duplicated "intX_t" typedefs. It would be preferable to detect GCC (with its extensions) and use its definitions of fixed-with integers if not compiling with C99.
2018-08-17 11:02:36 +01:00
parent 369948795b
commit 38660b17fd
8 changed files with 50 additions and 63 deletions
@@ -18,12 +18,12 @@
 #include <string.h>


-// Constants.
-// Use fast decompression instead of safe decompression for LZ4.
+
+
 #define BSHUF_LZ4_DECOMPRESS_FAST


-// Macros.
+
 #define CHECK_ERR_FREE_LZ(count, buf) if (count < 0) {                      \
    free(buf); return count - 1000; }

@@ -138,13 +138,13 @@ size_t bshuf_compress_lz4_bound(const size_t size,
    }
    if (block_size % BSHUF_BLOCKED_MULT) return -81;

-    // Note that each block gets a 4 byte header.
-    // Size of full blocks.
+
+
    bound = (LZ4_compressBound(block_size * elem_size) + 4) * (size / block_size);
-    // Size of partial blocks, if any.
+
    leftover = ((size % block_size) / BSHUF_BLOCKED_MULT) * BSHUF_BLOCKED_MULT;
    if (leftover) bound += LZ4_compressBound(leftover * elem_size) + 4;
-    // Size of uncompressed data not fitting into any blocks.
+
    bound += (size % BSHUF_BLOCKED_MULT) * elem_size;
    return bound;
 }
@@ -117,7 +117,7 @@ int64_t bshuf_decompress_lz4(const void* in, void* out, const size_t size,
        const size_t elem_size, size_t block_size);

 #ifdef __cplusplus
-} // extern "C"
+}
 #endif

-#endif  // BITSHUFFLE_H
+#endif
@@ -25,7 +25,7 @@
 #endif


-// Conditional includes for SSE2 and AVX2.
+
 #ifdef USEAVX2
 #include <immintrin.h>
 #elif defined USESSE2
@@ -33,7 +33,7 @@
 #endif


-// Macros.
+
 #define CHECK_MULT_EIGHT(n) if (n % 8) return -80;
 #define MAX(X,Y) ((X) > (Y) ? (X) : (Y))

@@ -131,8 +131,8 @@ int64_t bshuf_trans_byte_elem_remainder(const void* in, void* out, const size_t
    CHECK_MULT_EIGHT(start);

    if (size > start) {
-        // ii loop separated into 2 loops so the compiler can unroll
-        // the inner one.
+
+
        for (ii = start; ii + 7 < size; ii += 8) {
            for (jj = 0; jj < elem_size; jj++) {
                for (kk = 0; kk < 8; kk++) {
@@ -351,7 +351,7 @@ int64_t bshuf_untrans_bit_elem_scal(const void* in, void* out, const size_t size
 /* ---- Worker code that uses SSE2 ----
 *
 * The following code makes use of the SSE2 instruction set and specialized
- * 16 byte registers. The SSE2 instructions are present on modern x86 
+ * 16 byte registers. The SSE2 instructions are present on modern x86
 * processors. The first Intel processor microarchitecture supporting SSE2 was
 * Pentium 4 (2000).
 *
@@ -512,7 +512,7 @@ int64_t bshuf_trans_byte_elem_SSE(const void* in, void* out, const size_t size,

    int64_t count;

-    // Trivial cases: power of 2 bytes.
+
    switch (elem_size) {
        case 1:
            count = bshuf_copy(in, out, size, elem_size);
@@ -528,14 +528,14 @@ int64_t bshuf_trans_byte_elem_SSE(const void* in, void* out, const size_t size,
            return count;
    }

-    // Worst case: odd number of bytes. Turns out that this is faster for
-    // (odd * 2) byte elements as well (hence % 4).
+
+
    if (elem_size % 4) {
        count = bshuf_trans_byte_elem_scal(in, out, size, elem_size);
        return count;
    }

-    // Multiple of power of 2: transpose hierarchically.
+
    {
        size_t nchunk_elem;
        void* tmp_buf = malloc(size * elem_size);
@@ -554,7 +554,7 @@ int64_t bshuf_trans_byte_elem_SSE(const void* in, void* out, const size_t size,
                    size * nchunk_elem);
            bshuf_trans_elem(tmp_buf, out, 4, nchunk_elem, size);
        } else {
-            // Not used since scalar algorithm is faster.
+
            nchunk_elem = elem_size / 2;
            TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int16_t);
            count = bshuf_trans_byte_elem_SSE_16(out, tmp_buf,
@@ -687,8 +687,8 @@ int64_t bshuf_trans_byte_bitrow_SSE(const void* in, void* out, const size_t size
            g1 = _mm_unpacklo_epi32(g0, h0);
            h1 = _mm_unpackhi_epi32(g0, h0);

-            // We don't have a storeh instruction for integers, so interpret
-            // as a float. Have a storel (_mm_storel_epi64).
+
+
            as = (__m128 *) &a1;
            bs = (__m128 *) &b1;
            cs = (__m128 *) &c1;
@@ -737,8 +737,8 @@ int64_t bshuf_shuffle_bit_eightelem_SSE(const void* in, void* out, const size_t

    CHECK_MULT_EIGHT(size);

-    // With a bit of care, this could be written such that such that it is
-    // in_buf = out_buf safe.
+
+
    const char* in_b = (const char*) in;
    uint16_t* out_ui16 = (uint16_t*) out;

@@ -788,7 +788,7 @@ int64_t bshuf_untrans_bit_elem_SSE(const void* in, void* out, const size_t size,
    return count;
 }

-#else // #ifdef USESSE2
+#else


 int64_t bshuf_untrans_bit_elem_SSE(const void* in, void* out, const size_t size,
@@ -842,7 +842,7 @@ int64_t bshuf_shuffle_bit_eightelem_SSE(const void* in, void* out, const size_t
 }


-#endif // #ifdef USESSE2
+#endif


 /* ---- Code that requires AVX2. Intel Haswell (2013) and later. ---- */
@@ -1014,8 +1014,8 @@ int64_t bshuf_shuffle_bit_eightelem_AVX(const void* in, void* out, const size_t

    CHECK_MULT_EIGHT(size);

-    // With a bit of care, this could be written such that such that it is
-    // in_buf = out_buf safe.
+
+
    const char* in_b = (const char*) in;
    char* out_b = (char*) out;

@@ -1065,7 +1065,7 @@ int64_t bshuf_untrans_bit_elem_AVX(const void* in, void* out, const size_t size,
 }


-#else // #ifdef USEAVX2
+#else

 int64_t bshuf_trans_bit_byte_AVX(const void* in, void* out, const size_t size,
         const size_t elem_size) {
@@ -1096,12 +1096,12 @@ int64_t bshuf_untrans_bit_elem_AVX(const void* in, void* out, const size_t size,
    return -12;
 }

-#endif // #ifdef USEAVX2
+#endif


 /* ---- Drivers selecting best instruction set at compile time. ---- */

-int64_t bshuf_trans_bit_elem(const void* in, void* out, const size_t size, 
+int64_t bshuf_trans_bit_elem(const void* in, void* out, const size_t size,
        const size_t elem_size) {

    int64_t count;
@@ -1116,7 +1116,7 @@ int64_t bshuf_trans_bit_elem(const void* in, void* out, const size_t size,
 }


-int64_t bshuf_untrans_bit_elem(const void* in, void* out, const size_t size, 
+int64_t bshuf_untrans_bit_elem(const void* in, void* out, const size_t size,
        const size_t elem_size) {

    int64_t count;
@@ -1178,7 +1178,6 @@ int64_t bshuf_blocked_wrap_fun(bshufBlockFunDef fun, const void* in, void* out,
    if (err < 0) return err;

    leftover_bytes = size % BSHUF_BLOCKED_MULT * elem_size;
-    //this_iter;
    last_in = (char *) ioc_get_in(&C, &this_iter);
    ioc_set_next_in(&C, &this_iter, (void *) (last_in + leftover_bytes));
    last_out = (char *) ioc_get_out(&C, &this_iter);
@@ -1202,7 +1201,7 @@ int64_t bshuf_bitshuffle_block(ioc_chain *C_ptr, \
    int64_t count;


-    
+
    in = ioc_get_in(C_ptr, &this_iter);
    ioc_set_next_in(C_ptr, &this_iter,
            (void*) ((char*) in + size * elem_size));
@@ -1297,11 +1296,11 @@ uint32_t bshuf_read_uint32_BE(const void* buf) {
 */

 size_t bshuf_default_block_size(const size_t elem_size) {
-    // This function needs to be absolutely stable between versions.
-    // Otherwise encoded data will not be decodable.
+
+

    size_t block_size = BSHUF_TARGET_BLOCK_SIZE_B / elem_size;
-    // Ensure it is a required multiple.
+
    block_size = (block_size / BSHUF_BLOCKED_MULT) * BSHUF_BLOCKED_MULT;
    return MAX(block_size, BSHUF_MIN_RECOMMEND_BLOCK);
 }
@@ -28,14 +28,15 @@
 #ifndef BITSHUFFLE_CORE_H
 #define BITSHUFFLE_CORE_H

-// We assume GNU g++ defining `__cplusplus` has stdint.h
+
 #if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199900L) || defined(__cplusplus)
 #include <stdint.h>
 #else
  typedef unsigned char       uint8_t;
  typedef unsigned short      uint16_t;
+  typedef signed short        int16_t;
  typedef unsigned int        uint32_t;
-  typedef   signed int        int32_t;
+  typedef signed int          int32_t;
  typedef unsigned long long  uint64_t;
  typedef long long           int64_t;
 #endif
@@ -43,7 +44,7 @@
 #include <stdlib.h>


-// These are usually set in the setup.py.
+
 #ifndef BSHUF_VERSION_MAJOR
 #define BSHUF_VERSION_MAJOR 0
 #define BSHUF_VERSION_MINOR 3
@@ -150,7 +151,7 @@ int64_t bshuf_bitunshuffle(const void* in, void* out, const size_t size,
        const size_t elem_size, size_t block_size);

 #ifdef __cplusplus
-} // extern "C"
+}
 #endif

-#endif  // BITSHUFFLE_CORE_H
+#endif
@@ -13,31 +13,19 @@
 #ifndef BITSHUFFLE_INTERNALS_H
 #define BITSHUFFLE_INTERNALS_H

-// We assume GNU g++ defining `__cplusplus` has stdint.h
-#if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199900L) || defined(__cplusplus)
-#include <stdint.h>
-#else
-  typedef unsigned char       uint8_t;
-  typedef unsigned short      uint16_t;
-  typedef unsigned int        uint32_t;
-  typedef   signed int        int32_t;
-  typedef unsigned long long  uint64_t;
-  typedef long long           int64_t;
-#endif

 #include <stdlib.h>
 #include "iochain.h"


-// Constants.
 #ifndef BSHUF_MIN_RECOMMEND_BLOCK
 #define BSHUF_MIN_RECOMMEND_BLOCK 128
-#define BSHUF_BLOCKED_MULT 8    // Block sizes must be multiple of this.
+#define BSHUF_BLOCKED_MULT 8
 #define BSHUF_TARGET_BLOCK_SIZE_B 8192
 #endif


-// Macros.
+
 #define CHECK_ERR_FREE(count, buf) if (count < 0) { free(buf); return count; }


@@ -69,7 +57,7 @@ int64_t bshuf_blocked_wrap_fun(bshufBlockFunDef fun, const void* in, void* out,
        const size_t size, const size_t elem_size, size_t block_size);

 #ifdef __cplusplus
-} // extern "C"
+}
 #endif

-#endif  // BITSHUFFLE_INTERNALS_H
+#endif
@@ -81,9 +81,9 @@ void ioc_set_next_out(ioc_chain *C, size_t *this_iter, void* out_ptr) {
    C->out_pl[(*this_iter + 1) % IOC_SIZE].ptr = out_ptr;
 #ifdef _OPENMP
    omp_unset_lock(&(C->out_pl[(*this_iter + 1) % IOC_SIZE].lock));
-    // *in_pl[this_iter]* lock released at the end of the iteration to avoid being
-    // overtaken by previous threads and having *out_pl[this_iter]* corrupted.
-    // Especially worried about thread 0, iteration 0.
+
+
+
    omp_unset_lock(&(C->in_pl[(*this_iter) % IOC_SIZE].lock));
 #endif
 }
@@ -25,7 +25,7 @@
 * Usage
 * -----
 *  - Call `ioc_init` in serial block.
- *  - Each thread should create a local variable *size_t this_iter* and 
+ *  - Each thread should create a local variable *size_t this_iter* and
 *    pass its address to all function calls. Its value will be set
 *    inside the functions and is used to identify the thread.
 *  - Each thread must call each of the `ioc_get*` and `ioc_set*` methods
@@ -90,5 +90,5 @@ void ioc_set_next_in(ioc_chain *C, size_t* this_iter, void* in_ptr);
 void * ioc_get_out(ioc_chain *C, size_t *this_iter);
 void ioc_set_next_out(ioc_chain *C, size_t *this_iter, void* out_ptr);

-#endif  // IOCHAIN_H
+#endif

@@ -825,7 +825,6 @@ _next_match:
                /* Match description too long : reduce it */
                matchLength = (15-1) + (oMaxMatch-op) * 255;
            }
-            //printf("offset %5i, matchLength%5i \n", (int)(ip-match), matchLength + MINMATCH);
            ip += MINMATCH + matchLength;

            if (matchLength>=ML_MASK)