Change the encoder's hash table values from int32 to uint16. Doing s/int32/uint16/ in "var table [maxTableSize]int32" saves 32 KiB of stack space that needed zero'ing. maxTableSize is 1<<14, or 16384. We couldn't do this before, in commit cc71ae7c, since we didn't have maxBlockSize = 65536 at the time. benchmark old MB/s new MB/s speedup BenchmarkWordsEncode1e1-4 468.81 465.82 0.99x BenchmarkWordsEncode1e2-4 32.62 60.23 1.85x BenchmarkWordsEncode1e3-4 138.36 174.61 1.26x BenchmarkWordsEncode1e4-4 170.79 172.43 1.01x BenchmarkWordsEncode1e5-4 131.02 134.25 1.02x BenchmarkWordsEncode1e6-4 149.33 153.05 1.02x BenchmarkRandomEncode-4 5933.57 6846.03 1.15x Benchmark_ZFlat0-4 306.98 310.74 1.01x Benchmark_ZFlat1-4 194.65 198.74 1.02x Benchmark_ZFlat2-4 6784.51 8110.98 1.20x Benchmark_ZFlat3-4 64.06 123.43 1.93x Benchmark_ZFlat4-4 2102.05 2224.84 1.06x Benchmark_ZFlat5-4 303.89 307.19 1.01x Benchmark_ZFlat6-4 132.74 136.37 1.03x Benchmark_ZFlat7-4 126.50 130.72 1.03x Benchmark_ZFlat8-4 140.35 143.61 1.02x Benchmark_ZFlat9-4 121.73 125.62 1.03x Benchmark_ZFlat10-4 360.89 365.62 1.01x Benchmark_ZFlat11-4 195.92 199.96 1.02x

commit: ef80b33e873821cff33038ae16c444717218cdb1 [log] [tgz]
author: Nigel Tao <nigeltao@golang.org> Mon Apr 11 12:06:28 2016 +1000
committer: Nigel Tao <nigeltao@golang.org> Mon Apr 11 12:06:28 2016 +1000
tree: 721984412cce58dfa1ce59896c80f23ce36b2614
parent: 70f093abd7c11f120162d8f77684deadbe37bbf2 [diff]
diff --git a/encode.go b/encode.go
index 9055405..1a8a821 100644
--- a/encode.go
+++ b/encode.go

@@ -178,13 +178,15 @@
 // 	minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
 func encodeBlock(dst, src []byte) (d int) {
 	// Initialize the hash table. Its size ranges from 1<<8 to 1<<14 inclusive.
+	// The table element type is uint16, as s < sLimit and sLimit < len(src)
+	// and len(src) <= maxBlockSize and maxBlockSize == 65536.
 	const maxTableSize = 1 << 14
 	shift, tableSize := uint32(32-8), 1<<8
 	for tableSize < maxTableSize && tableSize < len(src) {
 		shift--
 		tableSize *= 2
 	}
-	var table [maxTableSize]int32
+	var table [maxTableSize]uint16
 
 	// sLimit is when to stop looking for offset/length copies. The inputMargin
 	// lets us use a fast path for emitLiteral in the main loop, while we are
@@ -228,7 +230,7 @@
 				goto emitRemainder
 			}
 			candidate = int(table[nextHash])
-			table[nextHash] = int32(s)
+			table[nextHash] = uint16(s)
 			nextHash = hash(load32(src, nextS), shift)
 			if load32(src, s) == load32(src, candidate) {
 				break
@@ -269,10 +271,10 @@
 			// three load32 calls.
 			x := load64(src, s-1)
 			prevHash := hash(uint32(x>>0), shift)
-			table[prevHash] = int32(s - 1)
+			table[prevHash] = uint16(s - 1)
 			currHash := hash(uint32(x>>8), shift)
 			candidate = int(table[currHash])
-			table[currHash] = int32(s)
+			table[currHash] = uint16(s)
 			if uint32(x>>8) != load32(src, candidate) {
 				nextHash = hash(uint32(x>>16), shift)
 				s++
commit	ef80b33e873821cff33038ae16c444717218cdb1	[log] [tgz]
author	Nigel Tao <nigeltao@golang.org>	Mon Apr 11 12:06:28 2016 +1000
committer	Nigel Tao <nigeltao@golang.org>	Mon Apr 11 12:06:28 2016 +1000
tree	721984412cce58dfa1ce59896c80f23ce36b2614
parent	70f093abd7c11f120162d8f77684deadbe37bbf2 [diff]