Eliminate a branch in std/deflate decoding
name old speed new speed delta
wuffs_deflate_decode_1k_full_init/clang5 139MB/s ± 1% 151MB/s ± 1% +9.19% (p=0.000 n=28+30)
wuffs_deflate_decode_1k_part_init/clang5 169MB/s ± 1% 187MB/s ± 1% +11.15% (p=0.000 n=28+28)
wuffs_deflate_decode_10k_full_init/clang5 200MB/s ± 0% 242MB/s ± 1% +20.99% (p=0.000 n=28+28)
wuffs_deflate_decode_10k_part_init/clang5 205MB/s ± 1% 249MB/s ± 1% +21.51% (p=0.000 n=29+29)
wuffs_deflate_decode_100k_just_one_read/clang5 255MB/s ± 1% 280MB/s ± 1% +9.86% (p=0.000 n=29+30)
wuffs_deflate_decode_100k_many_big_reads/clang5 218MB/s ± 0% 241MB/s ± 1% +10.58% (p=0.000 n=27+24)
wuffs_deflate_decode_1k_full_init/gcc7 155MB/s ± 1% 155MB/s ± 2% ~ (p=0.521 n=28+29)
wuffs_deflate_decode_1k_part_init/gcc7 190MB/s ± 1% 191MB/s ± 1% +0.51% (p=0.000 n=27+28)
wuffs_deflate_decode_10k_full_init/gcc7 265MB/s ± 1% 266MB/s ± 1% +0.54% (p=0.000 n=29+30)
wuffs_deflate_decode_10k_part_init/gcc7 272MB/s ± 1% 274MB/s ± 1% +0.73% (p=0.000 n=29+30)
wuffs_deflate_decode_100k_just_one_read/gcc7 317MB/s ± 0% 318MB/s ± 0% +0.58% (p=0.000 n=28+28)
wuffs_deflate_decode_100k_many_big_reads/gcc7 252MB/s ± 1% 253MB/s ± 0% +0.64% (p=0.000 n=29+27)
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index ec72042..4842522 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -6336,6 +6336,10 @@
wuffs_deflate__huffs_table_size //
WUFFS_BASE__POTENTIALLY_UNUSED = 1024;
+static const uint32_t //
+ wuffs_deflate__huffs_table_mask //
+ WUFFS_BASE__POTENTIALLY_UNUSED = 1023;
+
// ---------------- Private Initializer Prototypes
// ---------------- Private Function Prototypes
@@ -7504,13 +7508,9 @@
}
v_redir_top = ((v_table_entry >> 8) & 65535);
v_redir_mask = ((((uint32_t)(1)) << ((v_table_entry >> 4) & 15)) - 1);
- if ((v_redir_top + (v_bits & v_redir_mask)) >= 1024) {
- status =
- wuffs_deflate__error__internal_error_inconsistent_huffman_decoder_state;
- goto exit;
- }
- v_table_entry = self->private_data
- .f_huffs[0][(v_redir_top + (v_bits & v_redir_mask))];
+ v_table_entry =
+ self->private_data
+ .f_huffs[0][((v_redir_top + (v_bits & v_redir_mask)) & 1023)];
v_table_entry_n_bits = (v_table_entry & 15);
v_bits >>= v_table_entry_n_bits;
v_n_bits -= v_table_entry_n_bits;
@@ -7589,13 +7589,9 @@
}
v_redir_top = ((v_table_entry >> 8) & 65535);
v_redir_mask = ((((uint32_t)(1)) << ((v_table_entry >> 4) & 15)) - 1);
- if ((v_redir_top + (v_bits & v_redir_mask)) >= 1024) {
- status =
- wuffs_deflate__error__internal_error_inconsistent_huffman_decoder_state;
- goto exit;
- }
- v_table_entry = self->private_data
- .f_huffs[1][(v_redir_top + (v_bits & v_redir_mask))];
+ v_table_entry =
+ self->private_data
+ .f_huffs[1][((v_redir_top + (v_bits & v_redir_mask)) & 1023)];
v_table_entry_n_bits = (v_table_entry & 15);
v_bits >>= v_table_entry_n_bits;
v_n_bits -= v_table_entry_n_bits;
@@ -7838,14 +7834,9 @@
v_redir_top = ((v_table_entry >> 8) & 65535);
v_redir_mask = ((((uint32_t)(1)) << ((v_table_entry >> 4) & 15)) - 1);
while (true) {
- if ((v_redir_top + (v_bits & v_redir_mask)) >= 1024) {
- status =
- wuffs_deflate__error__internal_error_inconsistent_huffman_decoder_state;
- goto exit;
- }
v_table_entry =
self->private_data
- .f_huffs[0][(v_redir_top + (v_bits & v_redir_mask))];
+ .f_huffs[0][((v_redir_top + (v_bits & v_redir_mask)) & 1023)];
v_table_entry_n_bits = (v_table_entry & 15);
if (v_n_bits >= v_table_entry_n_bits) {
v_bits >>= v_table_entry_n_bits;
@@ -7946,14 +7937,9 @@
v_redir_top = ((v_table_entry >> 8) & 65535);
v_redir_mask = ((((uint32_t)(1)) << ((v_table_entry >> 4) & 15)) - 1);
while (true) {
- if ((v_redir_top + (v_bits & v_redir_mask)) >= 1024) {
- status =
- wuffs_deflate__error__internal_error_inconsistent_huffman_decoder_state;
- goto exit;
- }
v_table_entry =
self->private_data
- .f_huffs[1][(v_redir_top + (v_bits & v_redir_mask))];
+ .f_huffs[1][((v_redir_top + (v_bits & v_redir_mask)) & 1023)];
v_table_entry_n_bits = (v_table_entry & 15);
if (v_n_bits >= v_table_entry_n_bits) {
v_bits >>= v_table_entry_n_bits;
diff --git a/std/deflate/decode_deflate.wuffs b/std/deflate/decode_deflate.wuffs
index d5dd5dd..a91e9b7 100644
--- a/std/deflate/decode_deflate.wuffs
+++ b/std/deflate/decode_deflate.wuffs
@@ -60,6 +60,7 @@
// TODO: replace the magic "big enough" 1024 with something more principled,
// perhaps discovered via an exhaustive search.
pri const huffs_table_size base.u32 = 1024
+pri const huffs_table_mask base.u32 = 1023
pub struct decoder?(
// These fields yield src's bits in Least Significant Bits order.
diff --git a/std/deflate/decode_huffman_fast.wuffs b/std/deflate/decode_huffman_fast.wuffs
index a459373..d98af1c 100644
--- a/std/deflate/decode_huffman_fast.wuffs
+++ b/std/deflate/decode_huffman_fast.wuffs
@@ -130,11 +130,7 @@
redir_top = (table_entry >> 8) & 0xFFFF
redir_mask = ((1 as base.u32) << ((table_entry >> 4) & 0x0F)) - 1
- if (redir_top + (bits & redir_mask)) >= huffs_table_size {
- return "#internal error: inconsistent Huffman decoder state"
- }
- assert (redir_top + (bits & redir_mask)) < 1024 via "a < b: a < c; c <= b"(c:huffs_table_size)
- table_entry = this.huffs[0][redir_top + (bits & redir_mask)]
+ table_entry = this.huffs[0][(redir_top + (bits & redir_mask)) & huffs_table_mask]
table_entry_n_bits = table_entry & 0x0F
bits >>= table_entry_n_bits
n_bits -= table_entry_n_bits
@@ -244,11 +240,7 @@
redir_top = (table_entry >> 8) & 0xFFFF
redir_mask = ((1 as base.u32) << ((table_entry >> 4) & 0x0F)) - 1
- if (redir_top + (bits & redir_mask)) >= huffs_table_size {
- return "#internal error: inconsistent Huffman decoder state"
- }
- assert (redir_top + (bits & redir_mask)) < 1024 via "a < b: a < c; c <= b"(c:huffs_table_size)
- table_entry = this.huffs[1][redir_top + (bits & redir_mask)]
+ table_entry = this.huffs[1][(redir_top + (bits & redir_mask)) & huffs_table_mask]
table_entry_n_bits = table_entry & 0x0F
bits >>= table_entry_n_bits
n_bits -= table_entry_n_bits
diff --git a/std/deflate/decode_huffman_slow.wuffs b/std/deflate/decode_huffman_slow.wuffs
index 39385fa..2fb085f 100644
--- a/std/deflate/decode_huffman_slow.wuffs
+++ b/std/deflate/decode_huffman_slow.wuffs
@@ -79,11 +79,7 @@
redir_top = (table_entry >> 8) & 0xFFFF
redir_mask = ((1 as base.u32) << ((table_entry >> 4) & 0x0F)) - 1
while true {
- if (redir_top + (bits & redir_mask)) >= huffs_table_size {
- return "#internal error: inconsistent Huffman decoder state"
- }
- assert (redir_top + (bits & redir_mask)) < 1024 via "a < b: a < c; c <= b"(c:huffs_table_size)
- table_entry = this.huffs[0][redir_top + (bits & redir_mask)]
+ table_entry = this.huffs[0][(redir_top + (bits & redir_mask)) & huffs_table_mask]
table_entry_n_bits = table_entry & 0x0F
if n_bits >= table_entry_n_bits {
bits >>= table_entry_n_bits
@@ -161,11 +157,7 @@
redir_top = (table_entry >> 8) & 0xFFFF
redir_mask = ((1 as base.u32) << ((table_entry >> 4) & 0x0F)) - 1
while true {
- if (redir_top + (bits & redir_mask)) >= huffs_table_size {
- return "#internal error: inconsistent Huffman decoder state"
- }
- assert (redir_top + (bits & redir_mask)) < 1024 via "a < b: a < c; c <= b"(c:huffs_table_size)
- table_entry = this.huffs[1][redir_top + (bits & redir_mask)]
+ table_entry = this.huffs[1][(redir_top + (bits & redir_mask)) & huffs_table_mask]
table_entry_n_bits = table_entry & 0x0F
if n_bits >= table_entry_n_bits {
bits >>= table_entry_n_bits