blob: 11ef859f149673611ec8ab219f22b716badfe67f [file] [log] [blame]
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package zstd
import (
"encoding/binary"
)
// readLiterals reads and decompresses the literals from data at off.
// The literals are appended to outbuf, which is returned.
// Also returns the new input offset. RFC 3.1.1.3.1.
func (r *Reader) readLiterals(data block, off int, outbuf []byte) (int, []byte, error) {
if off >= len(data) {
return 0, nil, r.makeEOFError(off)
}
// Literals section header. RFC 3.1.1.3.1.1.
hdr := data[off]
off++
if (hdr&3) == 0 || (hdr&3) == 1 {
return r.readRawRLELiterals(data, off, hdr, outbuf)
} else {
return r.readHuffLiterals(data, off, hdr, outbuf)
}
}
// readRawRLELiterals reads and decompresses a Raw_Literals_Block or
// a RLE_Literals_Block. RFC 3.1.1.3.1.1.
func (r *Reader) readRawRLELiterals(data block, off int, hdr byte, outbuf []byte) (int, []byte, error) {
raw := (hdr & 3) == 0
var regeneratedSize int
switch (hdr >> 2) & 3 {
case 0, 2:
regeneratedSize = int(hdr >> 3)
case 1:
if off >= len(data) {
return 0, nil, r.makeEOFError(off)
}
regeneratedSize = int(hdr>>4) + (int(data[off]) << 4)
off++
case 3:
if off+1 >= len(data) {
return 0, nil, r.makeEOFError(off)
}
regeneratedSize = int(hdr>>4) + (int(data[off]) << 4) + (int(data[off+1]) << 12)
off += 2
}
// We are going to use the entire literal block in the output.
// The maximum size of one decompressed block is 128K,
// so we can't have more literals than that.
if regeneratedSize > 128<<10 {
return 0, nil, r.makeError(off, "literal size too large")
}
if raw {
// RFC 3.1.1.3.1.2.
if off+regeneratedSize > len(data) {
return 0, nil, r.makeError(off, "raw literal size too large")
}
outbuf = append(outbuf, data[off:off+regeneratedSize]...)
off += regeneratedSize
} else {
// RFC 3.1.1.3.1.3.
if off >= len(data) {
return 0, nil, r.makeError(off, "RLE literal missing")
}
rle := data[off]
off++
for i := 0; i < regeneratedSize; i++ {
outbuf = append(outbuf, rle)
}
}
return off, outbuf, nil
}
// readHuffLiterals reads and decompresses a Compressed_Literals_Block or
// a Treeless_Literals_Block. RFC 3.1.1.3.1.4.
func (r *Reader) readHuffLiterals(data block, off int, hdr byte, outbuf []byte) (int, []byte, error) {
var (
regeneratedSize int
compressedSize int
streams int
)
switch (hdr >> 2) & 3 {
case 0, 1:
if off+1 >= len(data) {
return 0, nil, r.makeEOFError(off)
}
regeneratedSize = (int(hdr) >> 4) | ((int(data[off]) & 0x3f) << 4)
compressedSize = (int(data[off]) >> 6) | (int(data[off+1]) << 2)
off += 2
if ((hdr >> 2) & 3) == 0 {
streams = 1
} else {
streams = 4
}
case 2:
if off+2 >= len(data) {
return 0, nil, r.makeEOFError(off)
}
regeneratedSize = (int(hdr) >> 4) | (int(data[off]) << 4) | ((int(data[off+1]) & 3) << 12)
compressedSize = (int(data[off+1]) >> 2) | (int(data[off+2]) << 6)
off += 3
streams = 4
case 3:
if off+3 >= len(data) {
return 0, nil, r.makeEOFError(off)
}
regeneratedSize = (int(hdr) >> 4) | (int(data[off]) << 4) | ((int(data[off+1]) & 0x3f) << 12)
compressedSize = (int(data[off+1]) >> 6) | (int(data[off+2]) << 2) | (int(data[off+3]) << 10)
off += 4
streams = 4
}
// We are going to use the entire literal block in the output.
// The maximum size of one decompressed block is 128K,
// so we can't have more literals than that.
if regeneratedSize > 128<<10 {
return 0, nil, r.makeError(off, "literal size too large")
}
roff := off + compressedSize
if roff > len(data) || roff < 0 {
return 0, nil, r.makeEOFError(off)
}
totalStreamsSize := compressedSize
if (hdr & 3) == 2 {
// Compressed_Literals_Block.
// Read new huffman tree.
if len(r.huffmanTable) < 1<<maxHuffmanBits {
r.huffmanTable = make([]uint16, 1<<maxHuffmanBits)
}
huffmanTableBits, hoff, err := r.readHuff(data, off, r.huffmanTable)
if err != nil {
return 0, nil, err
}
r.huffmanTableBits = huffmanTableBits
if totalStreamsSize < hoff-off {
return 0, nil, r.makeError(off, "Huffman table too big")
}
totalStreamsSize -= hoff - off
off = hoff
} else {
// Treeless_Literals_Block
// Reuse previous Huffman tree.
if r.huffmanTableBits == 0 {
return 0, nil, r.makeError(off, "missing literals Huffman tree")
}
}
// Decompress compressedSize bytes of data at off using the
// Huffman tree.
var err error
if streams == 1 {
outbuf, err = r.readLiteralsOneStream(data, off, totalStreamsSize, regeneratedSize, outbuf)
} else {
outbuf, err = r.readLiteralsFourStreams(data, off, totalStreamsSize, regeneratedSize, outbuf)
}
if err != nil {
return 0, nil, err
}
return roff, outbuf, nil
}
// readLiteralsOneStream reads a single stream of compressed literals.
func (r *Reader) readLiteralsOneStream(data block, off, compressedSize, regeneratedSize int, outbuf []byte) ([]byte, error) {
// We let the reverse bit reader read earlier bytes,
// because the Huffman table ignores bits that it doesn't need.
rbr, err := r.makeReverseBitReader(data, off+compressedSize-1, off-2)
if err != nil {
return nil, err
}
huffTable := r.huffmanTable
huffBits := uint32(r.huffmanTableBits)
huffMask := (uint32(1) << huffBits) - 1
for i := 0; i < regeneratedSize; i++ {
if !rbr.fetch(uint8(huffBits)) {
return nil, rbr.makeError("literals Huffman stream out of bits")
}
var t uint16
idx := (rbr.bits >> (rbr.cnt - huffBits)) & huffMask
t = huffTable[idx]
outbuf = append(outbuf, byte(t>>8))
rbr.cnt -= uint32(t & 0xff)
}
return outbuf, nil
}
// readLiteralsFourStreams reads four interleaved streams of
// compressed literals.
func (r *Reader) readLiteralsFourStreams(data block, off, totalStreamsSize, regeneratedSize int, outbuf []byte) ([]byte, error) {
// Read the jump table to find out where the streams are.
// RFC 3.1.1.3.1.6.
if off+5 >= len(data) {
return nil, r.makeEOFError(off)
}
if totalStreamsSize < 6 {
return nil, r.makeError(off, "total streams size too small for jump table")
}
// RFC 3.1.1.3.1.6.
// "The decompressed size of each stream is equal to (Regenerated_Size+3)/4,
// except for the last stream, which may be up to 3 bytes smaller,
// to reach a total decompressed size as specified in Regenerated_Size."
regeneratedStreamSize := (regeneratedSize + 3) / 4
if regeneratedSize < regeneratedStreamSize*3 {
return nil, r.makeError(off, "regenerated size too small to decode streams")
}
streamSize1 := binary.LittleEndian.Uint16(data[off:])
streamSize2 := binary.LittleEndian.Uint16(data[off+2:])
streamSize3 := binary.LittleEndian.Uint16(data[off+4:])
off += 6
tot := uint64(streamSize1) + uint64(streamSize2) + uint64(streamSize3)
if tot > uint64(totalStreamsSize)-6 {
return nil, r.makeEOFError(off)
}
streamSize4 := uint32(totalStreamsSize) - 6 - uint32(tot)
off--
off1 := off + int(streamSize1)
start1 := off + 1
off2 := off1 + int(streamSize2)
start2 := off1 + 1
off3 := off2 + int(streamSize3)
start3 := off2 + 1
off4 := off3 + int(streamSize4)
start4 := off3 + 1
// We let the reverse bit readers read earlier bytes,
// because the Huffman tables ignore bits that they don't need.
rbr1, err := r.makeReverseBitReader(data, off1, start1-2)
if err != nil {
return nil, err
}
rbr2, err := r.makeReverseBitReader(data, off2, start2-2)
if err != nil {
return nil, err
}
rbr3, err := r.makeReverseBitReader(data, off3, start3-2)
if err != nil {
return nil, err
}
rbr4, err := r.makeReverseBitReader(data, off4, start4-2)
if err != nil {
return nil, err
}
out1 := len(outbuf)
out2 := out1 + regeneratedStreamSize
out3 := out2 + regeneratedStreamSize
out4 := out3 + regeneratedStreamSize
regeneratedStreamSize4 := regeneratedSize - regeneratedStreamSize*3
outbuf = append(outbuf, make([]byte, regeneratedSize)...)
huffTable := r.huffmanTable
huffBits := uint32(r.huffmanTableBits)
huffMask := (uint32(1) << huffBits) - 1
for i := 0; i < regeneratedStreamSize; i++ {
use4 := i < regeneratedStreamSize4
fetchHuff := func(rbr *reverseBitReader) (uint16, error) {
if !rbr.fetch(uint8(huffBits)) {
return 0, rbr.makeError("literals Huffman stream out of bits")
}
idx := (rbr.bits >> (rbr.cnt - huffBits)) & huffMask
return huffTable[idx], nil
}
t1, err := fetchHuff(&rbr1)
if err != nil {
return nil, err
}
t2, err := fetchHuff(&rbr2)
if err != nil {
return nil, err
}
t3, err := fetchHuff(&rbr3)
if err != nil {
return nil, err
}
if use4 {
t4, err := fetchHuff(&rbr4)
if err != nil {
return nil, err
}
outbuf[out4] = byte(t4 >> 8)
out4++
rbr4.cnt -= uint32(t4 & 0xff)
}
outbuf[out1] = byte(t1 >> 8)
out1++
rbr1.cnt -= uint32(t1 & 0xff)
outbuf[out2] = byte(t2 >> 8)
out2++
rbr2.cnt -= uint32(t2 & 0xff)
outbuf[out3] = byte(t3 >> 8)
out3++
rbr3.cnt -= uint32(t3 & 0xff)
}
return outbuf, nil
}