Tweak flatecut's huffman.decode

name    old time/op  new time/op  delta
pkg:github.com/google/wuffs/lib/flatecut goos:linux goarch:amd64
Cut-56  1.05ms ± 0%  1.03ms ± 0%  -2.32%  (p=0.008 n=5+5)
pkg:github.com/google/wuffs/lib/zlibcut goos:linux goarch:amd64
Cut-56  2.48ms ± 0%  2.44ms ± 0%  -1.44%  (p=0.008 n=5+5)
diff --git a/lib/flatecut/flatecut.go b/lib/flatecut/flatecut.go
index c9ef653..b0b7391 100644
--- a/lib/flatecut/flatecut.go
+++ b/lib/flatecut/flatecut.go
@@ -156,26 +156,23 @@
 
 func (h *huffman) decode(b *bitstream) int32 {
 	if b.nBits >= 8 {
-		if x := h.lookUpTable[b.bits&0xFF]; x != 0 {
-			n := x >> 16
-			b.bits >>= n
-			b.nBits -= n
-			return int32(x & 0xFFFF)
-		}
-	}
-
-	if b.index < len(b.bytes) {
+		// No-op.
+	} else if b.index < len(b.bytes) {
 		b.bits |= uint32(b.bytes[b.index]) << b.nBits
 		b.nBits += 8
 		b.index++
-		if x := h.lookUpTable[b.bits&0xFF]; x != 0 {
-			n := x >> 16
-			b.bits >>= n
-			b.nBits -= n
-			return int32(x & 0xFFFF)
-		}
+	} else {
+		goto slow
 	}
 
+	if x := h.lookUpTable[b.bits&0xFF]; x != 0 {
+		n := x >> 16
+		b.bits >>= n
+		b.nBits -= n
+		return int32(x & 0xFFFF)
+	}
+
+slow:
 	return h.slowDecode(b)
 }