Work around clang memset / branch performance

name                                             old speed      new speed      delta

wuffs_deflate_decode_1k/clang5                    163MB/s ± 0%   163MB/s ± 0%    ~     (p=0.151 n=5+5)
wuffs_deflate_decode_10k/clang5                   230MB/s ± 1%   234MB/s ± 0%  +1.60%  (p=0.008 n=5+5)
wuffs_deflate_decode_100k_just_one_read/clang5    262MB/s ± 1%   265MB/s ± 0%  +0.97%  (p=0.008 n=5+5)
wuffs_deflate_decode_100k_many_big_reads/clang5   220MB/s ± 0%   221MB/s ± 0%  +0.59%  (p=0.008 n=5+5)

wuffs_deflate_decode_1k/gcc7                      193MB/s ± 1%   194MB/s ± 0%    ~     (p=0.151 n=5+5)
wuffs_deflate_decode_10k/gcc7                     276MB/s ± 1%   276MB/s ± 1%    ~     (p=0.690 n=5+5)
wuffs_deflate_decode_100k_just_one_read/gcc7      325MB/s ± 0%   324MB/s ± 1%    ~     (p=0.690 n=5+5)
wuffs_deflate_decode_100k_many_big_reads/gcc7     258MB/s ± 1%   258MB/s ± 0%    ~     (p=0.690 n=5+5)

wuffs_gif_decode_1k_bw/clang5                     430MB/s ± 1%   445MB/s ± 1%  +3.58%  (p=0.008 n=5+5)
wuffs_gif_decode_1k_color/clang5                  186MB/s ± 1%   192MB/s ± 0%  +3.17%  (p=0.008 n=5+5)
wuffs_gif_decode_10k_bgra/clang5                  729MB/s ± 1%   748MB/s ± 1%  +2.53%  (p=0.008 n=5+5)
wuffs_gif_decode_10k_indexed/clang5               196MB/s ± 1%   202MB/s ± 1%  +2.71%  (p=0.008 n=5+5)
wuffs_gif_decode_20k/clang5                       242MB/s ± 2%   251MB/s ± 0%  +3.54%  (p=0.016 n=5+4)
wuffs_gif_decode_100k_artificial/clang5           539MB/s ± 1%   545MB/s ± 1%    ~     (p=0.095 n=5+5)
wuffs_gif_decode_100k_realistic/clang5            221MB/s ± 0%   226MB/s ± 1%  +2.32%  (p=0.008 n=5+5)
wuffs_gif_decode_1000k/clang5                     224MB/s ± 0%   230MB/s ± 1%  +2.28%  (p=0.008 n=5+5)
wuffs_gif_decode_anim_screencap/clang5           1.06GB/s ± 0%  1.07GB/s ± 1%  +1.31%  (p=0.008 n=5+5)

wuffs_gif_decode_1k_bw/gcc7                       478MB/s ± 0%   467MB/s ± 0%  -2.27%  (p=0.008 n=5+5)
wuffs_gif_decode_1k_color/gcc7                    194MB/s ± 0%   196MB/s ± 0%  +0.94%  (p=0.008 n=5+5)
wuffs_gif_decode_10k_bgra/gcc7                    635MB/s ± 1%   639MB/s ± 1%    ~     (p=0.056 n=5+5)
wuffs_gif_decode_10k_indexed/gcc7                 200MB/s ± 1%   202MB/s ± 1%    ~     (p=0.095 n=5+5)
wuffs_gif_decode_20k/gcc7                         242MB/s ± 1%   243MB/s ± 1%    ~     (p=0.421 n=5+5)
wuffs_gif_decode_100k_artificial/gcc7             509MB/s ± 0%   510MB/s ± 0%    ~     (p=1.000 n=5+5)
wuffs_gif_decode_100k_realistic/gcc7              210MB/s ± 0%   211MB/s ± 0%  +0.50%  (p=0.016 n=5+5)
wuffs_gif_decode_1000k/gcc7                       214MB/s ± 0%   214MB/s ± 0%    ~     (p=0.151 n=5+5)
wuffs_gif_decode_anim_screencap/gcc7             1.03GB/s ± 0%  1.03GB/s ± 1%    ~     (p=0.421 n=5+5)

wuffs_lzw_decode_20k/clang5                       263MB/s ± 1%   267MB/s ± 2%    ~     (p=0.151 n=5+5)
wuffs_lzw_decode_100k/clang5                      427MB/s ± 1%   451MB/s ± 2%  +5.61%  (p=0.008 n=5+5)

wuffs_lzw_decode_20k/gcc7                         270MB/s ± 0%   268MB/s ± 0%  -0.66%  (p=0.008 n=5+5)
wuffs_lzw_decode_100k/gcc7                        481MB/s ± 1%   483MB/s ± 1%    ~     (p=0.222 n=5+5)
diff --git a/cmd/wuffs-c/internal/cgen/cgen.go b/cmd/wuffs-c/internal/cgen/cgen.go
index 3c9249c..f4bffc0 100644
--- a/cmd/wuffs-c/internal/cgen/cgen.go
+++ b/cmd/wuffs-c/internal/cgen/cgen.go
@@ -998,22 +998,26 @@
 	b.writes("if (!self) { return wuffs_base__error__bad_receiver; }\n")
 
 	b.writes("if (sizeof(*self) != sizeof_star_self) {\n")
-	b.writes("return wuffs_base__error__bad_sizeof_receiver;\n")
+	b.writes("  return wuffs_base__error__bad_sizeof_receiver;\n")
 	b.writes("}\n")
 	b.writes("if (((wuffs_version >> 32) != WUFFS_VERSION_MAJOR) || " +
 		"(((wuffs_version >> 16) & 0xFFFF) > WUFFS_VERSION_MINOR)) {\n")
-	b.writes("return wuffs_base__error__bad_wuffs_version;\n")
+	b.writes("  return wuffs_base__error__bad_wuffs_version;\n")
 	b.writes("}\n\n")
 
 	b.writes("if ((initialize_flags & WUFFS_INITIALIZE__ALREADY_ZEROED) != 0) {\n")
-	b.writes("if (self->private_impl.magic != 0) {\n")
-	b.writes("return wuffs_base__error__initialize_falsely_claimed_already_zeroed;\n")
-	b.writes("}\n")
-	b.writes("} else if ((initialize_flags & WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED) != 0) {\n")
-	b.writes("memset(&(self->private_impl), 0, sizeof(self->private_impl));\n")
+	b.writes("  if (self->private_impl.magic != 0) {\n")
+	b.writes("    return wuffs_base__error__initialize_falsely_claimed_already_zeroed;\n")
+	b.writes("  }\n")
 	b.writes("} else {\n")
-	b.writes("memset(self, 0, sizeof(*self));\n")
-	b.writes("initialize_flags |= WUFFS_INITIALIZE__ALREADY_ZEROED;\n")
+	b.writes("  void* p = &(self->private_impl);\n")
+	b.writes("  size_t n = sizeof(self->private_impl);\n")
+	b.writes("  if ((initialize_flags & WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED) == 0) {\n")
+	b.writes("    p = self;\n")
+	b.writes("    n = sizeof(*self);\n")
+	b.writes("    initialize_flags |= WUFFS_INITIALIZE__ALREADY_ZEROED;\n")
+	b.writes("  }\n")
+	b.writes("  memset(p, 0, n);\n")
 	b.writes("}\n\n")
 
 	// Call any ctors on sub-structs.
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index a4ebb28..1a60316 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -4998,12 +4998,16 @@
     if (self->private_impl.magic != 0) {
       return wuffs_base__error__initialize_falsely_claimed_already_zeroed;
     }
-  } else if ((initialize_flags &
-              WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED) != 0) {
-    memset(&(self->private_impl), 0, sizeof(self->private_impl));
   } else {
-    memset(self, 0, sizeof(*self));
-    initialize_flags |= WUFFS_INITIALIZE__ALREADY_ZEROED;
+    void* p = &(self->private_impl);
+    size_t n = sizeof(self->private_impl);
+    if ((initialize_flags &
+         WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED) == 0) {
+      p = self;
+      n = sizeof(*self);
+      initialize_flags |= WUFFS_INITIALIZE__ALREADY_ZEROED;
+    }
+    memset(p, 0, n);
   }
 
   self->private_impl.magic = WUFFS_BASE__MAGIC;
@@ -6005,12 +6009,16 @@
     if (self->private_impl.magic != 0) {
       return wuffs_base__error__initialize_falsely_claimed_already_zeroed;
     }
-  } else if ((initialize_flags &
-              WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED) != 0) {
-    memset(&(self->private_impl), 0, sizeof(self->private_impl));
   } else {
-    memset(self, 0, sizeof(*self));
-    initialize_flags |= WUFFS_INITIALIZE__ALREADY_ZEROED;
+    void* p = &(self->private_impl);
+    size_t n = sizeof(self->private_impl);
+    if ((initialize_flags &
+         WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED) == 0) {
+      p = self;
+      n = sizeof(*self);
+      initialize_flags |= WUFFS_INITIALIZE__ALREADY_ZEROED;
+    }
+    memset(p, 0, n);
   }
 
   self->private_impl.magic = WUFFS_BASE__MAGIC;
@@ -6291,12 +6299,16 @@
     if (self->private_impl.magic != 0) {
       return wuffs_base__error__initialize_falsely_claimed_already_zeroed;
     }
-  } else if ((initialize_flags &
-              WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED) != 0) {
-    memset(&(self->private_impl), 0, sizeof(self->private_impl));
   } else {
-    memset(self, 0, sizeof(*self));
-    initialize_flags |= WUFFS_INITIALIZE__ALREADY_ZEROED;
+    void* p = &(self->private_impl);
+    size_t n = sizeof(self->private_impl);
+    if ((initialize_flags &
+         WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED) == 0) {
+      p = self;
+      n = sizeof(*self);
+      initialize_flags |= WUFFS_INITIALIZE__ALREADY_ZEROED;
+    }
+    memset(p, 0, n);
   }
 
   self->private_impl.magic = WUFFS_BASE__MAGIC;
@@ -8094,12 +8106,16 @@
     if (self->private_impl.magic != 0) {
       return wuffs_base__error__initialize_falsely_claimed_already_zeroed;
     }
-  } else if ((initialize_flags &
-              WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED) != 0) {
-    memset(&(self->private_impl), 0, sizeof(self->private_impl));
   } else {
-    memset(self, 0, sizeof(*self));
-    initialize_flags |= WUFFS_INITIALIZE__ALREADY_ZEROED;
+    void* p = &(self->private_impl);
+    size_t n = sizeof(self->private_impl);
+    if ((initialize_flags &
+         WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED) == 0) {
+      p = self;
+      n = sizeof(*self);
+      initialize_flags |= WUFFS_INITIALIZE__ALREADY_ZEROED;
+    }
+    memset(p, 0, n);
   }
 
   self->private_impl.magic = WUFFS_BASE__MAGIC;
@@ -8678,12 +8694,16 @@
     if (self->private_impl.magic != 0) {
       return wuffs_base__error__initialize_falsely_claimed_already_zeroed;
     }
-  } else if ((initialize_flags &
-              WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED) != 0) {
-    memset(&(self->private_impl), 0, sizeof(self->private_impl));
   } else {
-    memset(self, 0, sizeof(*self));
-    initialize_flags |= WUFFS_INITIALIZE__ALREADY_ZEROED;
+    void* p = &(self->private_impl);
+    size_t n = sizeof(self->private_impl);
+    if ((initialize_flags &
+         WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED) == 0) {
+      p = self;
+      n = sizeof(*self);
+      initialize_flags |= WUFFS_INITIALIZE__ALREADY_ZEROED;
+    }
+    memset(p, 0, n);
   }
 
   {
@@ -10839,12 +10859,16 @@
     if (self->private_impl.magic != 0) {
       return wuffs_base__error__initialize_falsely_claimed_already_zeroed;
     }
-  } else if ((initialize_flags &
-              WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED) != 0) {
-    memset(&(self->private_impl), 0, sizeof(self->private_impl));
   } else {
-    memset(self, 0, sizeof(*self));
-    initialize_flags |= WUFFS_INITIALIZE__ALREADY_ZEROED;
+    void* p = &(self->private_impl);
+    size_t n = sizeof(self->private_impl);
+    if ((initialize_flags &
+         WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED) == 0) {
+      p = self;
+      n = sizeof(*self);
+      initialize_flags |= WUFFS_INITIALIZE__ALREADY_ZEROED;
+    }
+    memset(p, 0, n);
   }
 
   {
@@ -11320,12 +11344,16 @@
     if (self->private_impl.magic != 0) {
       return wuffs_base__error__initialize_falsely_claimed_already_zeroed;
     }
-  } else if ((initialize_flags &
-              WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED) != 0) {
-    memset(&(self->private_impl), 0, sizeof(self->private_impl));
   } else {
-    memset(self, 0, sizeof(*self));
-    initialize_flags |= WUFFS_INITIALIZE__ALREADY_ZEROED;
+    void* p = &(self->private_impl);
+    size_t n = sizeof(self->private_impl);
+    if ((initialize_flags &
+         WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED) == 0) {
+      p = self;
+      n = sizeof(*self);
+      initialize_flags |= WUFFS_INITIALIZE__ALREADY_ZEROED;
+    }
+    memset(p, 0, n);
   }
 
   {