// SPDX-FileCopyrightText: 2024 Filip Leonarski, Paul Scherrer Institute // SPDX-License-Identifier: GPL-3.0-only // This file hand-assembles a standard Zstandard frame so it can be decoded by stock // ZSTD_decompress. Every structure below follows the Zstandard compression format spec: // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md // Comments cite that document's section names so the framing can be checked clause by clause. // Only the framing is hand-written; the Huffman tree + bitstream inside each Compressed_Block's // Literals_Section are produced by zstd's own HUF_compress*X_repeat (so those bytes are trusted). #include "JFJochZstdHuffCompressor.h" #include #include #include extern "C" { #include size_t HUF_compress1X_repeat(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void *workSpace, size_t wkspSize, HUF_CElt *hufTable, HUF_repeat *repeat, int flags); } namespace { // --- "Zstandard frames" / "Frame_Header" --- constexpr uint32_t MAGIC_NUMBER = 0xFD2FB528u; // Frame_Header_Descriptor: Frame_Content_Size_flag = 2 (4-byte size), Single_Segment_flag = 1 // (=> no Window_Descriptor; Window_Size = Frame_Content_Size), no checksum, no dictionary. constexpr uint8_t FRAME_DESCRIPTOR = 0xA0; // --- "Blocks": Block_Type field of Block_Header --- constexpr uint32_t BLOCK_RAW = 0; constexpr uint32_t BLOCK_RLE = 1; constexpr uint32_t BLOCK_COMPRESSED = 2; // --- "Literals_Section_Header": Literals_Block_Type field --- constexpr uint8_t LITERALS_COMPRESSED = 2; // carries a Huffman_Tree_Description constexpr uint8_t LITERALS_TREELESS = 3; // reuses the tree from a previous Compressed block constexpr size_t CHUNK = 65536; // cap each block's regenerated size well under Block_Maximum_Size (128 KiB) constexpr size_t RUN_MIN = 64; // runs shorter than this are absorbed into the literal stream constexpr unsigned LIT_LOG = 11; // Huffman table-log limit for literals, per the format } JFJochZstdHuffCompressor::JFJochZstdHuffCompressor() : ctable(HUF_CTABLE_SIZE_ST(255)), entwksp(HUF_WORKSPACE_SIZE_U64) {} // append the low nbytes of v, little-endian (Zstd integer fields are little-endian) void JFJochZstdHuffCompressor::put_le(uint64_t v, int nbytes) { for (int i = 0; i < nbytes; i++) out.push_back((uint8_t)(v >> (8 * i))); } // Block_Header (3 bytes, little-endian): [ Last_Block:1 | Block_Type:2 | Block_Size:21 ]. // Last_Block (bit 0) is left 0 here and OR-ed onto the final block afterwards. Returns the // offset of the header so that final block can be marked. size_t JFJochZstdHuffCompressor::blk_hdr(uint32_t type, uint32_t size) { size_t off = out.size(); put_le((uint64_t)(type << 1) | ((uint64_t)size << 3), 3); return off; } // RLE_Block: Block_Size is the number of repeats (the regenerated size); Block_Content is the // single repeated byte. Split into <=CHUNK pieces so Block_Size stays under Block_Maximum_Size. void JFJochZstdHuffCompressor::emit_run(uint8_t value, size_t nbytes, size_t &last_off) { for (size_t off = 0; off < nbytes; off += CHUNK) { size_t c = std::min(CHUNK, nbytes - off); last_off = blk_hdr(BLOCK_RLE, (uint32_t)c); out.push_back(value); } } // Emit one Compressed_Block whose Literals_Section holds n Huffman-coded literal bytes and whose // Sequences_Section is empty (Number_of_Sequences = 0). Falls back to a Raw_Block if Huffman does // not help. The Huffman payload (tree + streams, or streams only when reusing) is produced by zstd. void JFJochZstdHuffCompressor::emit_lit_chunk(const uint8_t *lits, size_t n, size_t &last_off) { HUF_CElt *ct = reinterpret_cast(ctable.data()); HUF_repeat rep = (HUF_repeat)repeat_state; // Size_Format also selects the stream count: format 0 = 1 stream, formats 1/2/3 = 4 streams. // Use a single stream only for small inputs (matches zstd's own ZSTD_compressLiterals heuristic). const bool single = n < 256; const size_t wkspSize = entwksp.size() * sizeof(uint64_t); size_t hs = single ? HUF_compress1X_repeat(hufbuf.data(), hufbuf.size(), lits, n, 255, LIT_LOG, entwksp.data(), wkspSize, ct, &rep, 0) : HUF_compress4X_repeat(hufbuf.data(), hufbuf.size(), lits, n, 255, LIT_LOG, entwksp.data(), wkspSize, ct, &rep, 0); // Literals_Section_Header, read as a little-endian value, is laid out as // [ Literals_Block_Type:2 | Size_Format:2 | Regenerated_Size | Compressed_Size ]. // Size_Format (chosen from Regenerated_Size n) sets the header length and field widths: // n < 1 KiB -> 3-byte header, 10-bit fields (format 0 single-stream, else 1) // n < 16 KiB -> 4-byte header, 14-bit fields (format 2) // else -> 5-byte header, 18-bit fields (format 3) // Regenerated_Size starts at bit 4; Compressed_Size starts right after it, at bit (4 + width). int size_format, lh_bytes, regen_width; if (n < 1024) { size_format = single ? 0 : 1; lh_bytes = 3; regen_width = 10; } else if (n < 16384) { size_format = 2; lh_bytes = 4; regen_width = 14; } else { size_format = 3; lh_bytes = 5; regen_width = 18; } // hs == 1 is HUF's "single-symbol alphabet" signal (not a usable Huffman section); only keep // the Huffman block when it is well-formed and actually smaller than a Raw_Block. if (hs > 1 && !HUF_isError(hs) && (size_t)(lh_bytes + 1) + hs < n) { repeat_state = HUF_repeat_check; // the table now in ct is valid -> reuse it next time // HUF_compress*_repeat leaves *repeat != none only when it reused the previous table. uint8_t lit_type = (rep != HUF_repeat_none) ? LITERALS_TREELESS : LITERALS_COMPRESSED; uint64_t hdr = (uint64_t)lit_type | ((uint64_t)size_format << 2) | ((uint64_t)n << 4) | ((uint64_t)hs << (4 + regen_width)); last_off = blk_hdr(BLOCK_COMPRESSED, (uint32_t)(lh_bytes + hs + 1)); put_le(hdr, lh_bytes); // Literals_Section_Header out.insert(out.end(), hufbuf.data(), hufbuf.data() + hs); // Huffman tree + stream(s) out.push_back(0x00); // Sequences_Section: Number_of_Sequences = 0 } else { repeat_state = HUF_repeat_none; // discard any table HUF rebuilt but we did not emit last_off = blk_hdr(BLOCK_RAW, (uint32_t)n); out.insert(out.end(), lits, lits + n); } } // src = one bitshuffled block (src_size bytes, a multiple of 8). Emits a complete Zstd frame. size_t JFJochZstdHuffCompressor::Compress(uint8_t *dst, const uint64_t *src, size_t src_size) { const size_t W = src_size / 8; out.clear(); literals.clear(); segs.clear(); repeat_state = HUF_repeat_none; if (hufbuf.size() < src_size + 1024) hufbuf.resize(src_size + 1024); // Pass 1: classify 8-byte words into runs (0x00 / 0xFF) and literals. Runs >= RUN_MIN become // RLE_Blocks; shorter runs are folded into the literal stream (where 0x00 costs ~1 Huffman bit). size_t lit_start = 0; auto close_lit = [&]() { if (literals.size() > lit_start) { segs.push_back({2, literals.size() - lit_start, lit_start}); lit_start = literals.size(); } }; for (size_t i = 0; i < W; ) { if (src[i] == 0 || src[i] == UINT64_MAX) { uint64_t val = src[i]; size_t j = i; while (j < W && src[j] == val) j++; size_t bytes = (j - i) * 8; uint8_t v = val ? 0xFF : 0x00; if (bytes >= RUN_MIN) { close_lit(); segs.push_back({(uint8_t)(val ? 1 : 0), bytes, 0}); } else literals.insert(literals.end(), bytes, v); i = j; } else { const uint8_t *wb = reinterpret_cast(&src[i]); literals.insert(literals.end(), wb, wb + 8); i++; } } close_lit(); // Frame_Header: Magic_Number + Frame_Header_Descriptor + Frame_Content_Size (4 bytes). put_le(MAGIC_NUMBER, 4); out.push_back(FRAME_DESCRIPTOR); put_le(src_size, 4); // Pass 2: emit the blocks in order. size_t last_off = 0; bool emitted = false; for (const Seg &s : segs) { emitted = true; if (s.type == 2) for (size_t off = 0; off < s.bytes; off += CHUNK) emit_lit_chunk(literals.data() + s.lit_off + off, std::min(CHUNK, s.bytes - off), last_off); else emit_run(s.type == 1 ? 0xFF : 0x00, s.bytes, last_off); } if (!emitted) last_off = blk_hdr(BLOCK_RAW, 0); // empty input -> a single empty block out[last_off] |= 1; // set Last_Block on the final block memcpy(dst, out.data(), out.size()); return out.size(); }