blob: 5b0035e1d085702e1e7a2ec610a95951dcbb003b [file] [log] [blame]
// Copyright 2019 The Wuffs Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package flatecut
import (
"bytes"
"compress/flate"
"io"
"io/ioutil"
"testing"
"github.com/google/wuffs/internal/testcut"
)
func TestCut(t *testing.T) {
testcut.Test(t, SmallestValidMaxEncodedLen, Cut, newReader, []string{
"artificial/deflate-backref-crosses-blocks.deflate",
"artificial/deflate-distance-32768.deflate",
"romeo.txt.deflate",
"romeo.txt.fixed-huff.deflate",
})
}
func BenchmarkCut(b *testing.B) {
testcut.Benchmark(b, SmallestValidMaxEncodedLen, Cut, newReader,
"pi.txt.zlib", 2, 4, 100003)
}
func newReader(r io.Reader) (io.ReadCloser, error) {
return flate.NewReader(r), nil
}
func decodeFlate(src []byte) string {
dst, err := ioutil.ReadAll(flate.NewReader(bytes.NewReader(src)))
if err != nil {
return err.Error()
}
return string(dst)
}
func TestHuffmanDecode(t *testing.T) {
// This exercises the "ABCDEFGH" example from RFC 1951 section 3.2.2,
// discussed in the "type huffman" doc comment.
const src = "HEADFACE"
codes := []string{
'A': " 010",
'B': " 011",
'C': " 100",
'D': " 101",
'E': " 110",
'F': " 00",
'G': "1110",
'H': "1111",
}
encoded := []byte(nil)
encBits := uint32(0)
encNBits := uint32(0)
for _, s := range src {
for _, c := range codes[s] {
if c == ' ' {
continue
}
if c == '1' {
encBits |= 1 << encNBits
}
encNBits++
if encNBits == 8 {
encoded = append(encoded, uint8(encBits))
encBits = 0
encNBits = 0
}
}
}
if encNBits > 0 {
encoded = append(encoded, uint8(encBits))
}
h := huffman{
counts: [maxCodeBits + 1]uint32{
2: 1, 3: 5, 4: 2,
},
symbols: [maxNumCodes]int32{
0: 'F', 1: 'A', 2: 'B', 3: 'C', 4: 'D', 5: 'E', 6: 'G', 7: 'H',
},
}
h.constructLookUpTable()
b := &bitstream{
bytes: encoded,
}
decoded := []byte(nil)
for _ = range src {
c := h.decode(b)
if c < 0 {
decoded = append(decoded, '!')
break
}
decoded = append(decoded, uint8(c))
}
if got, want := string(decoded), src; got != want {
t.Fatalf("got %q, want %q", got, want)
}
}
// A DEFLATE encoding of the first 64 bytes of the AUTHORS file in the
// repository root directory.
//
// The encoding uses a Dynamic Huffman block, but one whose H-D Huffman
// distance tree is degenerate (there's a mapping for the "0" code but no
// mapping for the "1" code) and unused.
//
// The first 25-30 bytes contain the encoded H-L and H-D Huffman trees. The
// last 30-35 bytes contain the actual payload, encoded with H-L.
//
// The high 7 bits of the final byte is unused padding.
const (
degenerateHuffmanDec = "# This is the official list of Wuffs authors for copyright purpo"
degenerateHuffmanEnc = "" +
"\x05\xc0\xc1\x09\xc5\x30\x0c\x03\xd0\x55\x04\x7f\xa5\x0f\x3d\x87" +
"\x50\xd5\x82\x80\x82\xed\x1c\xba\x7d\xdf\x0f\xff\x50\x41\x85\x8e" +
"\x1b\x26\x35\x35\x16\x96\xaa\x61\xe2\x3a\x64\x61\x9c\x0e\x67\x81" +
"\x4e\x4c\xef\x37\xf5\x44\x63\x9f\xdc\xfe\x00"
)
func TestReplaceHuffmanWithStored(t *testing.T) {
const dec = degenerateHuffmanDec
const enc = degenerateHuffmanEnc
if (len(dec) != 64) || (len(enc) != 59) {
panic("inconsistent const string lengths")
}
if got, want := decodeFlate([]byte(enc)), dec; got != want {
t.Fatalf("before Cut: got %q, want %q", got, want)
}
for i := 4; i <= 59; i += 5 {
b := []byte(enc)
encLen, decLen, err := Cut(nil, b, i)
if err != nil {
t.Errorf("i=%d: %v", i, err)
continue
}
if encLen < 1 {
t.Errorf("i=%d: encLen: got %d, want >= 1", i, encLen)
continue
} else if encLen > len(enc) {
t.Errorf("i=%d: encLen: got %d, want <= %d", i, encLen, len(enc))
continue
}
// If we can make some progress (decLen > 0), even if the input uses a
// Huffman block, one option is to re-encode to a single Stored block
// (for 5 bytes of overhead). It's single because len(dec) < 0xFFFF.
// Regardless of whether the cut form uses a Huffman or Stored block,
// we should be able to produce at least i-5 bytes of decoded output.
if (decLen > 0) && (i > 5) && (decLen < i-5) {
t.Errorf("i=%d: decLen: got %d, want >= %d", i, decLen, i-5)
continue
} else if decLen > len(dec) {
t.Errorf("i=%d: decLen: got %d, want <= %d", i, decLen, len(dec))
continue
}
if got, want := decodeFlate(b[:encLen]), dec[:decLen]; got != want {
t.Errorf("i=%d: after Cut: got %q, want %q", i, got, want)
continue
}
// Check that we are using a space-efficient block type.
{
got := (b[0] >> 1) & 3
want := uint8(0xFF)
switch i {
case 4:
want = 1 // Static Huffman, for a decLen of 0.
case 9:
want = 0 // Stored.
case 59:
want = 2 // Dynamic Huffman.
default:
continue
}
if got != want {
t.Errorf("i=%d: block type: got %d, want %d", i, got, want)
}
}
}
}
func TestDegenerateHuffmanUnused(t *testing.T) {
const dec = degenerateHuffmanDec
const enc = degenerateHuffmanEnc
// Cutting 1 byte off the end of the encoded form will lead to cutting n
// bytes off the decoded form. Coincidentally, n equals 1, even though each
// decoded byte (8 bits) is packed into smaller number of bits, as most of
// the final encoded byte's bits are unused padding.
const n = 1
b := []byte(enc)
encLen, decLen, err := Cut(nil, b, len(enc)-1)
if err != nil {
t.Fatalf("Cut: %v", err)
} else if encLen != len(enc)-1 {
t.Fatalf("encLen: got %d, want %d", encLen, len(enc)-1)
} else if decLen != len(dec)-n {
t.Fatalf("decLen: got %d, want %d", decLen, len(dec)-n)
}
if got, want := decodeFlate(b[:encLen]), dec[:decLen]; got != want {
t.Fatalf("after Cut: got %q, want %q", got, want)
}
}