Files
Jungfraujoch/compression/JFJochCompressor.cpp
T
leonarski_fandClaude Opus 4.8 7e7a73062c Compression: add BSHUF_ZSTD_RLE_HUFF (RLE runs + Huffman literals)
New CompressionAlgorithm that emits a standard Zstandard frame: zero/0xFF runs
become RLE_Blocks (like BSHUF_ZSTD_RLE) and literal regions become
Compressed_Blocks with per-block adaptive Huffman literals and no sequences
(Number_of_Sequences=0). Short runs are absorbed into the literal stream;
incompressible literals fall back to Raw_Blocks so the worst case stays within
ZSTD_compressBound.

The Huffman tree + bitstream are produced by zstd's own HUF_compress{1,4}X_repeat
(the same calls ZSTD_compressLiterals uses); only the frame/block/literals-section
framing is hand-written, with comments citing zstd_compression_format.md so it can
be checked clause by clause. Output decodes with stock ZSTD_decompress, so no
reader changes are needed (decode routes like BSHUF_ZSTD).

On sparse diffraction this gives ~12% smaller files than bitshuffle/LZ4 at about
the same end-to-end speed, sitting between LZ4 and full ZSTD; for maximum ratio
use BSHUF_ZSTD. Robust on any input: tests round-trip pure zeros, Poisson(10),
Mersenne-Twister noise (checked against the size bound), an extreme-sparsity mask,
and a real lyso image through stock ZSTD_decompress.

API: exposed as "bszstd_rlehuf"; regenerate the Python/TS clients (update_version.sh)
to surface the new value there.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-27 14:41:46 +02:00

147 lines
6.7 KiB
C++

// SPDX-FileCopyrightText: 2024 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
// SPDX-License-Identifier: GPL-3.0-only
#include "JFJochCompressor.h"
#include <stdexcept>
#include <cstring>
#include <bitshuffle/bitshuffle_internals.h>
#include <bitshuffle_hperf/bitshuffle.h>
#include <zstd.h>
#include <lz4/lz4.h>
#include "../common/JFJochException.h"
extern "C" {
void bshuf_write_uint64_BE(void* buf, uint64_t num);
}
// Necessary condition for BlockSize() to be a valid bitshuffle block: with elem_size a power of
// two, a byte target that is a multiple of BSHUF_BLOCKED_MULT keeps the element count a multiple
// of BSHUF_BLOCKED_MULT too.
static_assert(JFJochBitShuffleCompressor::DefaultBlockSizeBytes(CompressionAlgorithm::BSHUF_LZ4) % BSHUF_BLOCKED_MULT == 0
&& JFJochBitShuffleCompressor::DefaultBlockSizeBytes(CompressionAlgorithm::BSHUF_ZSTD) % BSHUF_BLOCKED_MULT == 0,
"block byte target must be a multiple of the bitshuffle block multiple");
// Worst-case size of one compressed block, including its 4-byte length prefix. Mirrors the
// per-block term of MaxCompressedSize(), so a dest sized to MaxCompressedSize() never fails.
static size_t MaxCompressedBlockSize(CompressionAlgorithm algorithm, size_t src_size) {
switch (algorithm) {
case CompressionAlgorithm::BSHUF_LZ4:
return LZ4_compressBound(src_size) + 4;
case CompressionAlgorithm::BSHUF_ZSTD:
case CompressionAlgorithm::BSHUF_ZSTD_RLE:
case CompressionAlgorithm::BSHUF_ZSTD_RLE_HUFF:
return ZSTD_compressBound(src_size) + 4;
default:
return src_size + 4;
}
}
JFJochBitShuffleCompressor::JFJochBitShuffleCompressor(CompressionAlgorithm in_algorithm) {
algorithm = in_algorithm;
}
size_t JFJochBitShuffleCompressor::CompressBlock(char *dest, const char *source, size_t nelements, size_t elem_size) {
// Assert nelements < block_size
const char *src_ptr;
int64_t bshuf_ret = bitshuf_encode_block(tmp_space.data(), source, scratch.data(), nelements, elem_size);
if (bshuf_ret < 0)
throw JFJochException(JFJochExceptionCategory::Compression, "bshuf_trans_bit_elem error");
src_ptr = tmp_space.data();
size_t compressed_size;
size_t src_size = nelements * elem_size;
switch (algorithm) {
case CompressionAlgorithm::BSHUF_LZ4:
compressed_size = LZ4_compress_default(src_ptr, dest + 4, src_size, LZ4_compressBound(src_size));
break;
case CompressionAlgorithm::BSHUF_ZSTD:
compressed_size = ZSTD_compress(dest + 4, ZSTD_compressBound(src_size), src_ptr, src_size, 0);
if (ZSTD_isError(compressed_size))
throw(JFJochException(JFJochExceptionCategory::Compression, ZSTD_getErrorName(compressed_size)));
break;
case CompressionAlgorithm::BSHUF_ZSTD_RLE:
try {
compressed_size = zstd_compressor.Compress(((uint8_t *) dest) + 4, (uint64_t *) src_ptr,
src_size, src_size);
} catch (const std::runtime_error &e) {
throw JFJochException(JFJochExceptionCategory::ZSTDCompressionError, e.what());
}
break;
case CompressionAlgorithm::BSHUF_ZSTD_RLE_HUFF:
compressed_size = huff_compressor.Compress(((uint8_t *) dest) + 4, (const uint64_t *) src_ptr, src_size);
break;
default:
throw JFJochException(JFJochExceptionCategory::Compression, "Algorithm not supported");
}
bshuf_write_uint32_BE(dest, compressed_size);
return compressed_size + 4;
}
std::vector<uint8_t> JFJochBitShuffleCompressor::Compress(const void *source, size_t nelements, size_t elem_size) {
std::vector<uint8_t> tmp(MaxCompressedSize(algorithm, nelements, elem_size));
size_t tmp_size = Compress(tmp.data(), tmp.size(), source, nelements, elem_size);
tmp.resize(tmp_size);
return tmp;
}
size_t JFJochBitShuffleCompressor::Compress(void *dest, size_t dest_size, const void *source, size_t nelements, size_t elem_size) {
auto c_dest = (char *) dest;
auto c_source = (char *) source;
if (algorithm == CompressionAlgorithm::NO_COMPRESSION) {
// Trivial case if no compression - copy content
if (nelements * elem_size > dest_size)
throw CompressionBufferTooSmallException("compressed output exceeds destination buffer");
memcpy(dest, source, nelements * elem_size);
return nelements * elem_size;
}
if (dest_size < 12)
throw CompressionBufferTooSmallException("compressed output exceeds destination buffer");
const size_t block_size = BlockSize(algorithm, elem_size);
bshuf_write_uint64_BE(c_dest, nelements * elem_size);
bshuf_write_uint32_BE(c_dest + 8, block_size * elem_size);
if (tmp_space.size() < block_size * elem_size)
tmp_space.resize(block_size * elem_size);
if (scratch.size() < block_size * elem_size)
scratch.resize(block_size * elem_size);
size_t num_full_blocks = nelements / block_size;
size_t reminder_size = nelements - num_full_blocks * block_size;
size_t compressed_size = 12;
// Blocks are small relative to the image, so before each one we just check that the
// remaining space still covers that block's worst case, and throw if not.
for (int i = 0; i < num_full_blocks; i++) {
if (compressed_size + MaxCompressedBlockSize(algorithm, block_size * elem_size) > dest_size)
throw CompressionBufferTooSmallException("compressed output exceeds destination buffer");
compressed_size += CompressBlock(c_dest + compressed_size,
c_source + i * block_size * elem_size, block_size, elem_size);
}
size_t last_block_size = reminder_size - reminder_size % BSHUF_BLOCKED_MULT;
if (last_block_size > 0) {
if (compressed_size + MaxCompressedBlockSize(algorithm, last_block_size * elem_size) > dest_size)
throw CompressionBufferTooSmallException("compressed output exceeds destination buffer");
compressed_size += CompressBlock(c_dest + compressed_size,
c_source + num_full_blocks * block_size * elem_size, last_block_size, elem_size);
}
size_t leftover_bytes = (reminder_size % BSHUF_BLOCKED_MULT) * elem_size;
if (leftover_bytes > 0) {
if (compressed_size + leftover_bytes > dest_size)
throw CompressionBufferTooSmallException("compressed output exceeds destination buffer");
memcpy(c_dest + compressed_size, c_source + (num_full_blocks * block_size + last_block_size) * elem_size, leftover_bytes);
compressed_size += leftover_bytes;
}
return compressed_size;
}