Make a small s/uint/uint32/ decoder optimization.
I'm not entirely sure why the benchmark numbers improve as much as they
do, but I'll take it.
benchmark old MB/s new MB/s speedup
BenchmarkWordsDecode1e1-8 482.12 485.84 1.01x
BenchmarkWordsDecode1e2-8 372.28 421.86 1.13x
BenchmarkWordsDecode1e3-8 482.21 525.44 1.09x
BenchmarkWordsDecode1e4-8 339.46 360.87 1.06x
BenchmarkWordsDecode1e5-8 264.90 270.42 1.02x
BenchmarkWordsDecode1e6-8 284.27 290.98 1.02x
Benchmark_UFlat0-8 511.15 544.02 1.06x
Benchmark_UFlat1-8 431.52 450.03 1.04x
Benchmark_UFlat2-8 15208.70 15099.07 0.99x
Benchmark_UFlat3-8 805.02 871.78 1.08x
Benchmark_UFlat4-8 2631.19 2980.30 1.13x
Benchmark_UFlat5-8 501.62 535.45 1.07x
Benchmark_UFlat6-8 271.30 278.13 1.03x
Benchmark_UFlat7-8 265.19 272.14 1.03x
Benchmark_UFlat8-8 282.54 288.80 1.02x
Benchmark_UFlat9-8 256.39 262.69 1.02x
Benchmark_UFlat10-8 590.37 640.96 1.09x
Benchmark_UFlat11-8 339.13 357.01 1.05x
diff --git a/decode.go b/decode.go
index e99b20f..666e170 100644
--- a/decode.go
+++ b/decode.go
@@ -63,7 +63,7 @@
for s < len(src) {
switch src[s] & 0x03 {
case tagLiteral:
- x := uint(src[s] >> 2)
+ x := uint32(src[s] >> 2)
switch {
case x < 60:
s++
@@ -72,27 +72,27 @@
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
return nil, ErrCorrupt
}
- x = uint(src[s-1])
+ x = uint32(src[s-1])
case x == 61:
s += 3
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
return nil, ErrCorrupt
}
- x = uint(src[s-2]) | uint(src[s-1])<<8
+ x = uint32(src[s-2]) | uint32(src[s-1])<<8
case x == 62:
s += 4
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
return nil, ErrCorrupt
}
- x = uint(src[s-3]) | uint(src[s-2])<<8 | uint(src[s-1])<<16
+ x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
case x == 63:
s += 5
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
return nil, ErrCorrupt
}
- x = uint(src[s-4]) | uint(src[s-3])<<8 | uint(src[s-2])<<16 | uint(src[s-1])<<24
+ x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
}
- length = int(x + 1)
+ length = int(x) + 1
if length <= 0 {
return nil, errUnsupportedLiteralLength
}