New CompressionAlgorithm that emits a standard Zstandard frame: zero/0xFF runs
become RLE_Blocks (like BSHUF_ZSTD_RLE) and literal regions become
Compressed_Blocks with per-block adaptive Huffman literals and no sequences
(Number_of_Sequences=0). Short runs are absorbed into the literal stream;
incompressible literals fall back to Raw_Blocks so the worst case stays within
ZSTD_compressBound.
The Huffman tree + bitstream are produced by zstd's own HUF_compress{1,4}X_repeat
(the same calls ZSTD_compressLiterals uses); only the frame/block/literals-section
framing is hand-written, with comments citing zstd_compression_format.md so it can
be checked clause by clause. Output decodes with stock ZSTD_decompress, so no
reader changes are needed (decode routes like BSHUF_ZSTD).
On sparse diffraction this gives ~12% smaller files than bitshuffle/LZ4 at about
the same end-to-end speed, sitting between LZ4 and full ZSTD; for maximum ratio
use BSHUF_ZSTD. Robust on any input: tests round-trip pure zeros, Poisson(10),
Mersenne-Twister noise (checked against the size bound), an extreme-sparsity mask,
and a real lyso image through stock ZSTD_decompress.
API: exposed as "bszstd_rlehuf"; regenerate the Python/TS clients (update_version.sh)
to surface the new value there.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
60 lines
2.5 KiB
C++
60 lines
2.5 KiB
C++
// SPDX-FileCopyrightText: 2024 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
|
|
// SPDX-License-Identifier: GPL-3.0-only
|
|
|
|
#pragma once
|
|
|
|
#include <bitshuffle/bitshuffle.h>
|
|
|
|
#include <vector>
|
|
#include <cstdint>
|
|
#include <cstddef>
|
|
#include "CompressionAlgorithmEnum.h"
|
|
#include "MaxCompressedSize.h"
|
|
|
|
#include "JFJochZstdCompressor.h"
|
|
#include "JFJochZstdHuffCompressor.h"
|
|
|
|
class JFJochBitShuffleCompressor {
|
|
JFJochZstdCompressor zstd_compressor;
|
|
JFJochZstdHuffCompressor huff_compressor;
|
|
CompressionAlgorithm algorithm;
|
|
std::vector<char> tmp_space;
|
|
std::vector<char> scratch;
|
|
|
|
size_t CompressBlock(char *dest, const char * source, size_t nelements, size_t elem_size);
|
|
public:
|
|
// The bitshuffle block size is chosen as a byte target rather than a fixed element count, so
|
|
// the per-block working set - and thus the cache behaviour - stays constant across pixel bit
|
|
// depths. The target is per-algorithm: LZ4 favours a small, cache-resident block (throughput),
|
|
// ZSTD/RLE a large block (ratio). The element block size scales inversely with elem_size,
|
|
// which is always a power of two here, so the result is a power of two >= 8 (a valid bitshuffle
|
|
// block, multiple of 8). E.g. ZSTD 128 kB -> 65536 elem at 16-bit, 32768 at 32-bit.
|
|
constexpr static size_t DefaultBlockSizeBytes(CompressionAlgorithm algorithm) {
|
|
return algorithm == CompressionAlgorithm::BSHUF_LZ4 ? 16384 : 131072;
|
|
}
|
|
constexpr static size_t BlockSize(CompressionAlgorithm algorithm, size_t elem_size) {
|
|
return DefaultBlockSizeBytes(algorithm) / elem_size;
|
|
}
|
|
|
|
explicit JFJochBitShuffleCompressor(CompressionAlgorithm algorithm);
|
|
|
|
template<class T>
|
|
size_t Compress(void *dest, size_t dest_size, const std::vector<T> &src) {
|
|
return Compress(dest, dest_size, src.data(), src.size(), sizeof(T));
|
|
};
|
|
|
|
template<class T>
|
|
std::vector<uint8_t> Compress(const std::vector<T> &src) {
|
|
return Compress(src.data(), src.size(), sizeof(T));
|
|
}
|
|
std::vector<uint8_t> Compress(const void* source, size_t nelements, size_t elem_size);
|
|
// Throws CompressionBufferTooSmallException if the compressed output would not fit dest_size.
|
|
size_t Compress(void *dest, size_t dest_size, const void* source, size_t nelements, size_t elem_size);
|
|
};
|
|
|
|
template <class T> std::vector<T> bitshuffle(const std::vector<T> &input, size_t block_size) {
|
|
std::vector<T> ret(input.size());
|
|
bshuf_bitshuffle(input.data(), ret.data(), input.size(), sizeof(T), block_size);
|
|
return ret;
|
|
}
|