Have flatecut use a 64-bit bit buffer, not 32
name old time/op new time/op delta
pkg:github.com/google/wuffs/lib/flatecut goos:linux goarch:amd64
Cut-56 919µs ± 0% 867µs ± 0% -5.70% (p=0.008 n=5+5)
pkg:github.com/google/wuffs/lib/zlibcut goos:linux goarch:amd64
Cut-56 2.34ms ± 1% 2.28ms ± 0% -2.46% (p=0.008 n=5+5)
diff --git a/lib/flatecut/flatecut.go b/lib/flatecut/flatecut.go
index b574222..f359a52 100644
--- a/lib/flatecut/flatecut.go
+++ b/lib/flatecut/flatecut.go
@@ -84,9 +84,11 @@
mostNegativeInt32 = -0x80000000
)
-func loadU32LE(b []byte) uint32 {
- _ = b[3] // bounds check hint to compiler; see golang.org/issue/14808
- return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
+func loadU64LE(b []byte) uint64 {
+ _ = b[7] // bounds check hint to compiler; see golang.org/issue/14808
+ return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
+ uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
+
}
type bitstream struct {
@@ -96,7 +98,7 @@
// The low nBits bits of the 'bits' field hold the next bits (in LSB-first
// order).
- bits uint32
+ bits uint64
nBits uint32
}
@@ -105,13 +107,13 @@
if b.index >= len(b.bytes) {
return mostNegativeInt32
}
- b.bits |= uint32(b.bytes[b.index]) << b.nBits
+ b.bits |= uint64(b.bytes[b.index]) << b.nBits
b.nBits += 8
b.index++
}
mask := ((uint32(1)) << nBits) - 1
- ret := b.bits & mask
+ ret := uint32(b.bits) & mask
b.bits >>= nBits
b.nBits -= nBits
return int32(ret)
@@ -162,15 +164,15 @@
func (h *huffman) decode(b *bitstream) int32 {
if b.nBits >= 8 {
// No-op.
- } else if b.index < (len(b.bytes) - 4) {
+ } else if b.index < (len(b.bytes) - 8) {
// This is "Variant 4" of
// https://fgiesen.wordpress.com/2018/02/20/reading-bits-in-far-too-many-ways-part-2/
- u := loadU32LE(b.bytes[b.index:])
+ u := loadU64LE(b.bytes[b.index:])
b.bits |= u << b.nBits
- b.index += int((31 - b.nBits) >> 3)
- b.nBits |= 24
+ b.index += int((63 - b.nBits) >> 3)
+ b.nBits |= 56
} else if b.index < len(b.bytes) {
- b.bits |= uint32(b.bytes[b.index]) << b.nBits
+ b.bits |= uint64(b.bytes[b.index]) << b.nBits
b.nBits += 8
b.index++
} else {
@@ -202,12 +204,12 @@
if b.index >= len(b.bytes) {
return mostNegativeInt32
}
- b.bits = uint32(b.bytes[b.index])
+ b.bits = uint64(b.bytes[b.index])
b.nBits = 8
b.index++
}
- code |= b.bits & 1
+ code |= uint32(b.bits & 1)
b.bits >>= 1
b.nBits -= 1