Refine the lzw.decoder.prefixes element type

name                                             old speed      new speed      delta

wuffs_gif_decode_1k_bw/clang5                     445MB/s ± 1%   413MB/s ± 1%  -7.11%  (p=0.008 n=5+5)
wuffs_gif_decode_1k_color/clang5                  192MB/s ± 0%   185MB/s ± 0%  -3.82%  (p=0.008 n=5+5)
wuffs_gif_decode_10k_bgra/clang5                  748MB/s ± 1%   742MB/s ± 0%    ~     (p=0.151 n=5+5)
wuffs_gif_decode_10k_indexed/clang5               202MB/s ± 1%   198MB/s ± 1%  -1.69%  (p=0.008 n=5+5)
wuffs_gif_decode_20k/clang5                       251MB/s ± 0%   251MB/s ± 1%    ~     (p=1.000 n=4+5)
wuffs_gif_decode_100k_artificial/clang5           545MB/s ± 1%   563MB/s ± 1%  +3.21%  (p=0.008 n=5+5)
wuffs_gif_decode_100k_realistic/clang5            226MB/s ± 1%   228MB/s ± 0%  +0.96%  (p=0.016 n=5+5)
wuffs_gif_decode_1000k/clang5                     230MB/s ± 1%   232MB/s ± 0%  +1.25%  (p=0.008 n=5+5)
wuffs_gif_decode_anim_screencap/clang5           1.07GB/s ± 1%  1.11GB/s ± 1%  +3.52%  (p=0.008 n=5+5)

wuffs_gif_decode_1k_bw/gcc7                       467MB/s ± 0%   486MB/s ± 1%  +4.11%  (p=0.008 n=5+5)
wuffs_gif_decode_1k_color/gcc7                    196MB/s ± 0%   186MB/s ± 1%  -4.69%  (p=0.008 n=5+5)
wuffs_gif_decode_10k_bgra/gcc7                    639MB/s ± 1%   629MB/s ± 1%  -1.69%  (p=0.008 n=5+5)
wuffs_gif_decode_10k_indexed/gcc7                 202MB/s ± 1%   199MB/s ± 0%  -1.13%  (p=0.016 n=5+5)
wuffs_gif_decode_20k/gcc7                         243MB/s ± 1%   243MB/s ± 0%    ~     (p=0.730 n=5+4)
wuffs_gif_decode_100k_artificial/gcc7             510MB/s ± 0%   510MB/s ± 1%    ~     (p=1.000 n=5+5)
wuffs_gif_decode_100k_realistic/gcc7              211MB/s ± 0%   212MB/s ± 0%  +0.55%  (p=0.008 n=5+5)
wuffs_gif_decode_1000k/gcc7                       214MB/s ± 0%   215MB/s ± 0%    ~     (p=0.056 n=5+5)
wuffs_gif_decode_anim_screencap/gcc7             1.03GB/s ± 1%  1.05GB/s ± 3%    ~     (p=0.151 n=5+5)

wuffs_lzw_decode_20k/clang5                       267MB/s ± 2%   272MB/s ± 0%  +1.79%  (p=0.008 n=5+5)
wuffs_lzw_decode_100k/clang5                      451MB/s ± 2%   469MB/s ± 1%  +3.99%  (p=0.008 n=5+5)

wuffs_lzw_decode_20k/gcc7                         268MB/s ± 0%   269MB/s ± 0%  +0.40%  (p=0.032 n=5+5)
wuffs_lzw_decode_100k/gcc7                        483MB/s ± 1%   483MB/s ± 0%    ~     (p=1.000 n=5+5)
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index 1a60316..ea79258 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -3093,6 +3093,7 @@
     uint32_t f_output_ri;
     uint32_t f_output_wi;
     uint32_t f_read_from_return_value;
+    uint16_t f_prefixes[4096];
 
     uint32_t p_decode_io_writer[1];
     uint32_t p_write_to[1];
@@ -3100,7 +3101,6 @@
 
   struct {
     uint8_t f_suffixes[4096][8];
-    uint16_t f_prefixes[4096];
     uint16_t f_lm1s[4096];
     uint8_t f_output[8199];
 
@@ -8352,15 +8352,15 @@
         v_lm1_a = ((self->private_data.f_lm1s[v_prev_code] + 1) & 4095);
         self->private_data.f_lm1s[v_save_code] = v_lm1_a;
         if ((v_lm1_a % 8) != 0) {
-          self->private_data.f_prefixes[v_save_code] =
-              self->private_data.f_prefixes[v_prev_code];
+          self->private_impl.f_prefixes[v_save_code] =
+              self->private_impl.f_prefixes[v_prev_code];
           memcpy(self->private_data.f_suffixes[v_save_code],
                  self->private_data.f_suffixes[v_prev_code],
                  sizeof(self->private_data.f_suffixes[v_save_code]));
           self->private_data.f_suffixes[v_save_code][(v_lm1_a % 8)] =
               ((uint8_t)(v_code));
         } else {
-          self->private_data.f_prefixes[v_save_code] =
+          self->private_impl.f_prefixes[v_save_code] =
               ((uint16_t)(v_prev_code));
           self->private_data.f_suffixes[v_save_code][0] = ((uint8_t)(v_code));
         }
@@ -8398,7 +8398,7 @@
         }
         v_steps -= 1;
         v_o = ((v_o - 8) & 8191);
-        v_c = (((uint32_t)(self->private_data.f_prefixes[v_c])) & 4095);
+        v_c = ((uint32_t)(self->private_impl.f_prefixes[v_c]));
       }
     label_1_break:;
       v_first_byte = self->private_data.f_suffixes[v_c][0];
@@ -8410,15 +8410,15 @@
         v_lm1_b = ((self->private_data.f_lm1s[v_prev_code] + 1) & 4095);
         self->private_data.f_lm1s[v_save_code] = v_lm1_b;
         if ((v_lm1_b % 8) != 0) {
-          self->private_data.f_prefixes[v_save_code] =
-              self->private_data.f_prefixes[v_prev_code];
+          self->private_impl.f_prefixes[v_save_code] =
+              self->private_impl.f_prefixes[v_prev_code];
           memcpy(self->private_data.f_suffixes[v_save_code],
                  self->private_data.f_suffixes[v_prev_code],
                  sizeof(self->private_data.f_suffixes[v_save_code]));
           self->private_data.f_suffixes[v_save_code][(v_lm1_b % 8)] =
               v_first_byte;
         } else {
-          self->private_data.f_prefixes[v_save_code] =
+          self->private_impl.f_prefixes[v_save_code] =
               ((uint16_t)(v_prev_code));
           self->private_data.f_suffixes[v_save_code][0] =
               ((uint8_t)(v_first_byte));
diff --git a/std/lzw/decode_lzw.wuffs b/std/lzw/decode_lzw.wuffs
index 04469c3..3e08741 100644
--- a/std/lzw/decode_lzw.wuffs
+++ b/std/lzw/decode_lzw.wuffs
@@ -53,11 +53,13 @@
 	// might as well save it explicitly as a decoder field.
 	read_from_return_value base.u32,
 
+	// read_from per-code state.
+	prefixes array[4096] base.u16[..4095],
+
 	util base.utility,
 )(
 	// read_from per-code state.
 	suffixes array[4096] array[8] base.u8,
-	prefixes array[4096] base.u16,
 	// lm1s is the "length minus 1"s of the values for the implicit key-value
 	// table in this decoder. See std/lzw/README.md for more detail.
 	lm1s array[4096] base.u16,
@@ -262,7 +264,7 @@
 				// This line is essentially "o -= 8". The "& 8191" is a no-op
 				// in practice, but is necessary for the overflow checker.
 				o = (o ~mod- 8) & 8191
-				c = (this.prefixes[c] as base.u32) & 4095
+				c = this.prefixes[c] as base.u32
 			}
 			first_byte = this.suffixes[c][0]