Encode copies of length 65, 66 or 67 as 5 bytes, not 6.
The benchmarks don't show a big change either way, but the output is
shorter, and it matches what the C++ snappy code does.
benchmark old MB/s new MB/s speedup
BenchmarkWordsEncode1e1-8 5.77 5.77 1.00x
BenchmarkWordsEncode1e2-8 47.15 47.26 1.00x
BenchmarkWordsEncode1e3-8 180.77 183.25 1.01x
BenchmarkWordsEncode1e4-8 202.01 198.96 0.98x
BenchmarkWordsEncode1e5-8 145.66 144.68 0.99x
BenchmarkWordsEncode1e6-8 174.12 172.31 0.99x
BenchmarkRandomEncode-8 4522.91 4495.78 0.99x
Benchmark_ZFlat0-8 359.70 359.79 1.00x
Benchmark_ZFlat1-8 181.18 181.82 1.00x
Benchmark_ZFlat2-8 4612.52 4557.46 0.99x
Benchmark_ZFlat3-8 85.65 84.82 0.99x
Benchmark_ZFlat4-8 559.51 558.52 1.00x
Benchmark_ZFlat5-8 354.88 352.91 0.99x
Benchmark_ZFlat6-8 156.14 156.26 1.00x
Benchmark_ZFlat7-8 148.18 148.12 1.00x
Benchmark_ZFlat8-8 162.68 162.21 1.00x
Benchmark_ZFlat9-8 141.81 142.32 1.00x
Benchmark_ZFlat10-8 399.79 401.94 1.01x
Benchmark_ZFlat11-8 237.43 235.91 0.99x
diff --git a/encode.go b/encode.go
index 38ebe95..939fb58 100644
--- a/encode.go
+++ b/encode.go
@@ -55,26 +55,42 @@
// emitCopy writes a copy chunk and returns the number of bytes written.
func emitCopy(dst []byte, offset, length int32) int {
i := 0
- for length > 0 {
- x := length - 4
- if 0 <= x && x < 1<<3 && offset < 1<<11 {
- dst[i+0] = uint8(offset>>8)&0x07<<5 | uint8(x)<<2 | tagCopy1
- dst[i+1] = uint8(offset)
- i += 2
- break
- }
-
- x = length
- if x > 1<<6 {
- x = 1 << 6
- }
- dst[i+0] = uint8(x-1)<<2 | tagCopy2
+ // The maximum length for a single tagCopy1 or tagCopy2 op is 64 bytes. The
+ // threshold for this loop is a little higher (at 68 = 64 + 4), and the
+ // length emitted down below is is a little lower (at 60 = 64 - 4), because
+ // it's shorter to encode a length 67 copy as a length 60 tagCopy2 followed
+ // by a length 7 tagCopy1 (which encodes as 3+2 bytes) than to encode it as
+ // a length 64 tagCopy2 followed by a length 3 tagCopy2 (which encodes as
+ // 3+3 bytes). The magic 4 in the 64±4 is because the minimum length for a
+ // tagCopy1 op is 4 bytes, which is why a length 3 copy has to be an
+ // encodes-as-3-bytes tagCopy2 instead of an encodes-as-2-bytes tagCopy1.
+ for length >= 68 {
+ // Emit a length 64 copy, encoded as 3 bytes.
+ dst[i+0] = 63<<2 | tagCopy2
dst[i+1] = uint8(offset)
dst[i+2] = uint8(offset >> 8)
i += 3
- length -= x
+ length -= 64
}
- return i
+ if length > 64 {
+ // Emit a length 60 copy, encoded as 3 bytes.
+ dst[i+0] = 59<<2 | tagCopy2
+ dst[i+1] = uint8(offset)
+ dst[i+2] = uint8(offset >> 8)
+ i += 3
+ length -= 60
+ }
+ if length >= 12 || offset >= 2048 {
+ // Emit the remaining copy, encoded as 3 bytes.
+ dst[i+0] = uint8(length-1)<<2 | tagCopy2
+ dst[i+1] = uint8(offset)
+ dst[i+2] = uint8(offset >> 8)
+ return i + 3
+ }
+ // Emit the remaining copy, encoded as 2 bytes.
+ dst[i+0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
+ dst[i+1] = uint8(offset)
+ return i + 2
}
// Encode returns the encoded form of src. The returned slice may be a sub-
diff --git a/snappy_test.go b/snappy_test.go
index 65c0013..3d70a26 100644
--- a/snappy_test.go
+++ b/snappy_test.go
@@ -553,20 +553,36 @@
defer w.Close()
w.Write([]byte("abcd")) // Not compressible.
w.Flush()
- w.Write(bytes.Repeat([]byte{'A'}, 100)) // Compressible.
+ w.Write(bytes.Repeat([]byte{'A'}, 150)) // Compressible.
w.Flush()
+ // The next chunk is also compressible, but a naive, greedy encoding of the
+ // overall length 67 copy as a length 64 copy (the longest expressible as a
+ // tagCopy1 or tagCopy2) plus a length 3 remainder would be two 3-byte
+ // tagCopy2 tags (6 bytes), since the minimum length for a tagCopy1 is 4
+ // bytes. Instead, we could do it shorter, in 5 bytes: a 3-byte tagCopy2
+ // (of length 60) and a 2-byte tagCopy1 (of length 7).
+ w.Write(bytes.Repeat([]byte{'B'}, 68))
+ w.Flush()
+
got := buf.String()
want := strings.Join([]string{
magicChunk,
"\x01\x08\x00\x00", // Uncompressed chunk, 8 bytes long (including 4 byte checksum).
"\x68\x10\xe6\xb6", // Checksum.
"\x61\x62\x63\x64", // Uncompressed payload: "abcd".
- "\x00\x0d\x00\x00", // Compressed chunk, 13 bytes long (including 4 byte checksum).
- "\x37\xcb\xbc\x9d", // Checksum.
- "\x64", // Compressed payload: Uncompressed length (varint encoded): 100.
+ "\x00\x11\x00\x00", // Compressed chunk, 17 bytes long (including 4 byte checksum).
+ "\x5f\xeb\xf2\x10", // Checksum.
+ "\x96\x01", // Compressed payload: Uncompressed length (varint encoded): 150.
"\x00\x41", // Compressed payload: tagLiteral, length=1, "A".
"\xfe\x01\x00", // Compressed payload: tagCopy2, length=64, offset=1.
- "\x8a\x01\x00", // Compressed payload: tagCopy2, length=35, offset=1.
+ "\xfe\x01\x00", // Compressed payload: tagCopy2, length=64, offset=1.
+ "\x52\x01\x00", // Compressed payload: tagCopy2, length=21, offset=1.
+ "\x00\x0c\x00\x00", // Compressed chunk, 12 bytes long (including 4 byte checksum).
+ "\x27\x50\xe4\x4e", // Checksum.
+ "\x44", // Compressed payload: Uncompressed length (varint encoded): 68.
+ "\x00\x42", // Compressed payload: tagLiteral, length=1, "B".
+ "\xee\x01\x00", // Compressed payload: tagCopy2, length=60, offset=1.
+ "\x0d\x01", // Compressed payload: tagCopy1, length=7, offset=1.
}, "")
if got != want {
t.Fatalf("\ngot: % x\nwant: % x", got, want)