Refine the lzw.decoder.prefixes element type
name old speed new speed delta
wuffs_gif_decode_1k_bw/clang5 445MB/s ± 1% 413MB/s ± 1% -7.11% (p=0.008 n=5+5)
wuffs_gif_decode_1k_color/clang5 192MB/s ± 0% 185MB/s ± 0% -3.82% (p=0.008 n=5+5)
wuffs_gif_decode_10k_bgra/clang5 748MB/s ± 1% 742MB/s ± 0% ~ (p=0.151 n=5+5)
wuffs_gif_decode_10k_indexed/clang5 202MB/s ± 1% 198MB/s ± 1% -1.69% (p=0.008 n=5+5)
wuffs_gif_decode_20k/clang5 251MB/s ± 0% 251MB/s ± 1% ~ (p=1.000 n=4+5)
wuffs_gif_decode_100k_artificial/clang5 545MB/s ± 1% 563MB/s ± 1% +3.21% (p=0.008 n=5+5)
wuffs_gif_decode_100k_realistic/clang5 226MB/s ± 1% 228MB/s ± 0% +0.96% (p=0.016 n=5+5)
wuffs_gif_decode_1000k/clang5 230MB/s ± 1% 232MB/s ± 0% +1.25% (p=0.008 n=5+5)
wuffs_gif_decode_anim_screencap/clang5 1.07GB/s ± 1% 1.11GB/s ± 1% +3.52% (p=0.008 n=5+5)
wuffs_gif_decode_1k_bw/gcc7 467MB/s ± 0% 486MB/s ± 1% +4.11% (p=0.008 n=5+5)
wuffs_gif_decode_1k_color/gcc7 196MB/s ± 0% 186MB/s ± 1% -4.69% (p=0.008 n=5+5)
wuffs_gif_decode_10k_bgra/gcc7 639MB/s ± 1% 629MB/s ± 1% -1.69% (p=0.008 n=5+5)
wuffs_gif_decode_10k_indexed/gcc7 202MB/s ± 1% 199MB/s ± 0% -1.13% (p=0.016 n=5+5)
wuffs_gif_decode_20k/gcc7 243MB/s ± 1% 243MB/s ± 0% ~ (p=0.730 n=5+4)
wuffs_gif_decode_100k_artificial/gcc7 510MB/s ± 0% 510MB/s ± 1% ~ (p=1.000 n=5+5)
wuffs_gif_decode_100k_realistic/gcc7 211MB/s ± 0% 212MB/s ± 0% +0.55% (p=0.008 n=5+5)
wuffs_gif_decode_1000k/gcc7 214MB/s ± 0% 215MB/s ± 0% ~ (p=0.056 n=5+5)
wuffs_gif_decode_anim_screencap/gcc7 1.03GB/s ± 1% 1.05GB/s ± 3% ~ (p=0.151 n=5+5)
wuffs_lzw_decode_20k/clang5 267MB/s ± 2% 272MB/s ± 0% +1.79% (p=0.008 n=5+5)
wuffs_lzw_decode_100k/clang5 451MB/s ± 2% 469MB/s ± 1% +3.99% (p=0.008 n=5+5)
wuffs_lzw_decode_20k/gcc7 268MB/s ± 0% 269MB/s ± 0% +0.40% (p=0.032 n=5+5)
wuffs_lzw_decode_100k/gcc7 483MB/s ± 1% 483MB/s ± 0% ~ (p=1.000 n=5+5)
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index 1a60316..ea79258 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -3093,6 +3093,7 @@
uint32_t f_output_ri;
uint32_t f_output_wi;
uint32_t f_read_from_return_value;
+ uint16_t f_prefixes[4096];
uint32_t p_decode_io_writer[1];
uint32_t p_write_to[1];
@@ -3100,7 +3101,6 @@
struct {
uint8_t f_suffixes[4096][8];
- uint16_t f_prefixes[4096];
uint16_t f_lm1s[4096];
uint8_t f_output[8199];
@@ -8352,15 +8352,15 @@
v_lm1_a = ((self->private_data.f_lm1s[v_prev_code] + 1) & 4095);
self->private_data.f_lm1s[v_save_code] = v_lm1_a;
if ((v_lm1_a % 8) != 0) {
- self->private_data.f_prefixes[v_save_code] =
- self->private_data.f_prefixes[v_prev_code];
+ self->private_impl.f_prefixes[v_save_code] =
+ self->private_impl.f_prefixes[v_prev_code];
memcpy(self->private_data.f_suffixes[v_save_code],
self->private_data.f_suffixes[v_prev_code],
sizeof(self->private_data.f_suffixes[v_save_code]));
self->private_data.f_suffixes[v_save_code][(v_lm1_a % 8)] =
((uint8_t)(v_code));
} else {
- self->private_data.f_prefixes[v_save_code] =
+ self->private_impl.f_prefixes[v_save_code] =
((uint16_t)(v_prev_code));
self->private_data.f_suffixes[v_save_code][0] = ((uint8_t)(v_code));
}
@@ -8398,7 +8398,7 @@
}
v_steps -= 1;
v_o = ((v_o - 8) & 8191);
- v_c = (((uint32_t)(self->private_data.f_prefixes[v_c])) & 4095);
+ v_c = ((uint32_t)(self->private_impl.f_prefixes[v_c]));
}
label_1_break:;
v_first_byte = self->private_data.f_suffixes[v_c][0];
@@ -8410,15 +8410,15 @@
v_lm1_b = ((self->private_data.f_lm1s[v_prev_code] + 1) & 4095);
self->private_data.f_lm1s[v_save_code] = v_lm1_b;
if ((v_lm1_b % 8) != 0) {
- self->private_data.f_prefixes[v_save_code] =
- self->private_data.f_prefixes[v_prev_code];
+ self->private_impl.f_prefixes[v_save_code] =
+ self->private_impl.f_prefixes[v_prev_code];
memcpy(self->private_data.f_suffixes[v_save_code],
self->private_data.f_suffixes[v_prev_code],
sizeof(self->private_data.f_suffixes[v_save_code]));
self->private_data.f_suffixes[v_save_code][(v_lm1_b % 8)] =
v_first_byte;
} else {
- self->private_data.f_prefixes[v_save_code] =
+ self->private_impl.f_prefixes[v_save_code] =
((uint16_t)(v_prev_code));
self->private_data.f_suffixes[v_save_code][0] =
((uint8_t)(v_first_byte));
diff --git a/std/lzw/decode_lzw.wuffs b/std/lzw/decode_lzw.wuffs
index 04469c3..3e08741 100644
--- a/std/lzw/decode_lzw.wuffs
+++ b/std/lzw/decode_lzw.wuffs
@@ -53,11 +53,13 @@
// might as well save it explicitly as a decoder field.
read_from_return_value base.u32,
+ // read_from per-code state.
+ prefixes array[4096] base.u16[..4095],
+
util base.utility,
)(
// read_from per-code state.
suffixes array[4096] array[8] base.u8,
- prefixes array[4096] base.u16,
// lm1s is the "length minus 1"s of the values for the implicit key-value
// table in this decoder. See std/lzw/README.md for more detail.
lm1s array[4096] base.u16,
@@ -262,7 +264,7 @@
// This line is essentially "o -= 8". The "& 8191" is a no-op
// in practice, but is necessary for the overflow checker.
o = (o ~mod- 8) & 8191
- c = (this.prefixes[c] as base.u32) & 4095
+ c = this.prefixes[c] as base.u32
}
first_byte = this.suffixes[c][0]