Encode copies of length 65, 66 or 67 as 5 bytes, not 6. The benchmarks don't show a big change either way, but the output is shorter, and it matches what the C++ snappy code does. benchmark old MB/s new MB/s speedup BenchmarkWordsEncode1e1-8 5.77 5.77 1.00x BenchmarkWordsEncode1e2-8 47.15 47.26 1.00x BenchmarkWordsEncode1e3-8 180.77 183.25 1.01x BenchmarkWordsEncode1e4-8 202.01 198.96 0.98x BenchmarkWordsEncode1e5-8 145.66 144.68 0.99x BenchmarkWordsEncode1e6-8 174.12 172.31 0.99x BenchmarkRandomEncode-8 4522.91 4495.78 0.99x Benchmark_ZFlat0-8 359.70 359.79 1.00x Benchmark_ZFlat1-8 181.18 181.82 1.00x Benchmark_ZFlat2-8 4612.52 4557.46 0.99x Benchmark_ZFlat3-8 85.65 84.82 0.99x Benchmark_ZFlat4-8 559.51 558.52 1.00x Benchmark_ZFlat5-8 354.88 352.91 0.99x Benchmark_ZFlat6-8 156.14 156.26 1.00x Benchmark_ZFlat7-8 148.18 148.12 1.00x Benchmark_ZFlat8-8 162.68 162.21 1.00x Benchmark_ZFlat9-8 141.81 142.32 1.00x Benchmark_ZFlat10-8 399.79 401.94 1.01x Benchmark_ZFlat11-8 237.43 235.91 0.99x

commit: d1f56d22221367c22b310feca1ba49a062f3d646 [log] [tgz]
author: Nigel Tao <nigeltao@golang.org> Sat Apr 02 16:22:40 2016 +1100
committer: Nigel Tao <nigeltao@golang.org> Sat Apr 02 16:22:40 2016 +1100
tree: a81053aa146b5bffb46d53d1d50895570625d95f
parent: 5f1c01d9f64b941dd9582c638279d046eda6ca31 [diff]
diff --git a/encode.go b/encode.go
index 38ebe95..939fb58 100644
--- a/encode.go
+++ b/encode.go

@@ -55,26 +55,42 @@
 // emitCopy writes a copy chunk and returns the number of bytes written.
 func emitCopy(dst []byte, offset, length int32) int {
 	i := 0
-	for length > 0 {
-		x := length - 4
-		if 0 <= x && x < 1<<3 && offset < 1<<11 {
-			dst[i+0] = uint8(offset>>8)&0x07<<5 | uint8(x)<<2 | tagCopy1
-			dst[i+1] = uint8(offset)
-			i += 2
-			break
-		}
-
-		x = length
-		if x > 1<<6 {
-			x = 1 << 6
-		}
-		dst[i+0] = uint8(x-1)<<2 | tagCopy2
+	// The maximum length for a single tagCopy1 or tagCopy2 op is 64 bytes. The
+	// threshold for this loop is a little higher (at 68 = 64 + 4), and the
+	// length emitted down below is is a little lower (at 60 = 64 - 4), because
+	// it's shorter to encode a length 67 copy as a length 60 tagCopy2 followed
+	// by a length 7 tagCopy1 (which encodes as 3+2 bytes) than to encode it as
+	// a length 64 tagCopy2 followed by a length 3 tagCopy2 (which encodes as
+	// 3+3 bytes). The magic 4 in the 64±4 is because the minimum length for a
+	// tagCopy1 op is 4 bytes, which is why a length 3 copy has to be an
+	// encodes-as-3-bytes tagCopy2 instead of an encodes-as-2-bytes tagCopy1.
+	for length >= 68 {
+		// Emit a length 64 copy, encoded as 3 bytes.
+		dst[i+0] = 63<<2 | tagCopy2
 		dst[i+1] = uint8(offset)
 		dst[i+2] = uint8(offset >> 8)
 		i += 3
-		length -= x
+		length -= 64
 	}
-	return i
+	if length > 64 {
+		// Emit a length 60 copy, encoded as 3 bytes.
+		dst[i+0] = 59<<2 | tagCopy2
+		dst[i+1] = uint8(offset)
+		dst[i+2] = uint8(offset >> 8)
+		i += 3
+		length -= 60
+	}
+	if length >= 12 || offset >= 2048 {
+		// Emit the remaining copy, encoded as 3 bytes.
+		dst[i+0] = uint8(length-1)<<2 | tagCopy2
+		dst[i+1] = uint8(offset)
+		dst[i+2] = uint8(offset >> 8)
+		return i + 3
+	}
+	// Emit the remaining copy, encoded as 2 bytes.
+	dst[i+0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
+	dst[i+1] = uint8(offset)
+	return i + 2
 }
 
 // Encode returns the encoded form of src. The returned slice may be a sub-

diff --git a/snappy_test.go b/snappy_test.go
index 65c0013..3d70a26 100644
--- a/snappy_test.go
+++ b/snappy_test.go

@@ -553,20 +553,36 @@
 	defer w.Close()
 	w.Write([]byte("abcd")) // Not compressible.
 	w.Flush()
-	w.Write(bytes.Repeat([]byte{'A'}, 100)) // Compressible.
+	w.Write(bytes.Repeat([]byte{'A'}, 150)) // Compressible.
 	w.Flush()
+	// The next chunk is also compressible, but a naive, greedy encoding of the
+	// overall length 67 copy as a length 64 copy (the longest expressible as a
+	// tagCopy1 or tagCopy2) plus a length 3 remainder would be two 3-byte
+	// tagCopy2 tags (6 bytes), since the minimum length for a tagCopy1 is 4
+	// bytes. Instead, we could do it shorter, in 5 bytes: a 3-byte tagCopy2
+	// (of length 60) and a 2-byte tagCopy1 (of length 7).
+	w.Write(bytes.Repeat([]byte{'B'}, 68))
+	w.Flush()
+
 	got := buf.String()
 	want := strings.Join([]string{
 		magicChunk,
 		"\x01\x08\x00\x00", // Uncompressed chunk, 8 bytes long (including 4 byte checksum).
 		"\x68\x10\xe6\xb6", // Checksum.
 		"\x61\x62\x63\x64", // Uncompressed payload: "abcd".
-		"\x00\x0d\x00\x00", // Compressed chunk, 13 bytes long (including 4 byte checksum).
-		"\x37\xcb\xbc\x9d", // Checksum.
-		"\x64",             // Compressed payload: Uncompressed length (varint encoded): 100.
+		"\x00\x11\x00\x00", // Compressed chunk, 17 bytes long (including 4 byte checksum).
+		"\x5f\xeb\xf2\x10", // Checksum.
+		"\x96\x01",         // Compressed payload: Uncompressed length (varint encoded): 150.
 		"\x00\x41",         // Compressed payload: tagLiteral, length=1, "A".
 		"\xfe\x01\x00",     // Compressed payload: tagCopy2,   length=64, offset=1.
-		"\x8a\x01\x00",     // Compressed payload: tagCopy2,   length=35, offset=1.
+		"\xfe\x01\x00",     // Compressed payload: tagCopy2,   length=64, offset=1.
+		"\x52\x01\x00",     // Compressed payload: tagCopy2,   length=21, offset=1.
+		"\x00\x0c\x00\x00", // Compressed chunk, 12 bytes long (including 4 byte checksum).
+		"\x27\x50\xe4\x4e", // Checksum.
+		"\x44",             // Compressed payload: Uncompressed length (varint encoded): 68.
+		"\x00\x42",         // Compressed payload: tagLiteral, length=1, "B".
+		"\xee\x01\x00",     // Compressed payload: tagCopy2,   length=60, offset=1.
+		"\x0d\x01",         // Compressed payload: tagCopy1,   length=7, offset=1.
 	}, "")
 	if got != want {
 		t.Fatalf("\ngot:  % x\nwant: % x", got, want)
commit	d1f56d22221367c22b310feca1ba49a062f3d646	[log] [tgz]
author	Nigel Tao <nigeltao@golang.org>	Sat Apr 02 16:22:40 2016 +1100
committer	Nigel Tao <nigeltao@golang.org>	Sat Apr 02 16:22:40 2016 +1100
tree	a81053aa146b5bffb46d53d1d50895570625d95f
parent	5f1c01d9f64b941dd9582c638279d046eda6ca31 [diff]