// Copyright (2019-2023) Paul Scherrer Institute #include "JFJochCompressor.h" #include #include #include #include #include #include #include "../common/JFJochException.h" extern "C" { void bshuf_write_uint64_BE(void* buf, uint64_t num); } JFJochBitShuffleCompressor::JFJochBitShuffleCompressor(CompressionAlgorithm in_algorithm) { algorithm = in_algorithm; } size_t JFJochBitShuffleCompressor::CompressBlock(char *dest, const char *source, size_t nelements, size_t elem_size) { // Assert nelements < block_size const char *src_ptr; int64_t bshuf_ret = bitshuf_encode_block(tmp_space.data(), source, scratch, nelements, elem_size); if (bshuf_ret < 0) throw JFJochException(JFJochExceptionCategory::Compression, "bshuf_trans_bit_elem error"); src_ptr = tmp_space.data(); size_t compressed_size; size_t src_size = nelements * elem_size; switch (algorithm) { case CompressionAlgorithm::BSHUF_LZ4: compressed_size = LZ4_compress_default(src_ptr, dest + 4, src_size, LZ4_compressBound(src_size)); break; case CompressionAlgorithm::BSHUF_ZSTD: compressed_size = ZSTD_compress(dest + 4, ZSTD_compressBound(src_size), src_ptr, src_size, 0); if (ZSTD_isError(compressed_size)) throw(JFJochException(JFJochExceptionCategory::Compression, ZSTD_getErrorName(compressed_size))); break; case CompressionAlgorithm::BSHUF_ZSTD_RLE: try { compressed_size = zstd_compressor.Compress(((uint8_t *) dest) + 4, (uint64_t *) src_ptr, src_size, src_size); } catch (const std::runtime_error &e) { throw JFJochException(JFJochExceptionCategory::ZSTDCompressionError, e.what()); } break; default: throw JFJochException(JFJochExceptionCategory::Compression, "Algorithm not supported"); } bshuf_write_uint32_BE(dest, compressed_size); return compressed_size + 4; } std::vector JFJochBitShuffleCompressor::Compress(const void *source, size_t nelements, size_t elem_size) { std::vector tmp(MaxCompressedSize(algorithm, nelements, elem_size)); size_t tmp_size = Compress(tmp.data(), source, nelements, elem_size); tmp.resize(tmp_size); return tmp; } size_t JFJochBitShuffleCompressor::Compress(void *dest, const void *source, size_t nelements, size_t elem_size) { auto c_dest = (char *) dest; auto c_source = (char *) source; static_assert(DefaultBlockSize % BSHUF_BLOCKED_MULT == 0, "Block size must be multiple of 8"); if (algorithm == CompressionAlgorithm::NO_COMPRESSION) { // Trivial case if no compression - copy content memcpy(dest, source, nelements * elem_size); return nelements * elem_size; } bshuf_write_uint64_BE(c_dest, nelements * elem_size); bshuf_write_uint32_BE(c_dest + 8, DefaultBlockSize * elem_size); if (tmp_space.size() < DefaultBlockSize * elem_size) tmp_space.resize(DefaultBlockSize * elem_size); size_t num_full_blocks = nelements / DefaultBlockSize; size_t reminder_size = nelements - num_full_blocks * DefaultBlockSize; size_t compressed_size = 12; for (int i = 0; i < num_full_blocks; i++) compressed_size += CompressBlock(c_dest + compressed_size, c_source + i * DefaultBlockSize * elem_size, DefaultBlockSize, elem_size); size_t last_block_size = reminder_size - reminder_size % BSHUF_BLOCKED_MULT; if (last_block_size > 0) compressed_size += CompressBlock(c_dest + compressed_size, c_source + num_full_blocks * DefaultBlockSize * elem_size, last_block_size, elem_size); size_t leftover_bytes = (reminder_size % BSHUF_BLOCKED_MULT) * elem_size; if (leftover_bytes > 0) { memcpy(c_dest + compressed_size, c_source + (num_full_blocks * DefaultBlockSize + last_block_size) * elem_size, leftover_bytes); compressed_size += leftover_bytes; } return compressed_size; }