Work around clang memset / branch performance
name old speed new speed delta
wuffs_deflate_decode_1k/clang5 163MB/s ± 0% 163MB/s ± 0% ~ (p=0.151 n=5+5)
wuffs_deflate_decode_10k/clang5 230MB/s ± 1% 234MB/s ± 0% +1.60% (p=0.008 n=5+5)
wuffs_deflate_decode_100k_just_one_read/clang5 262MB/s ± 1% 265MB/s ± 0% +0.97% (p=0.008 n=5+5)
wuffs_deflate_decode_100k_many_big_reads/clang5 220MB/s ± 0% 221MB/s ± 0% +0.59% (p=0.008 n=5+5)
wuffs_deflate_decode_1k/gcc7 193MB/s ± 1% 194MB/s ± 0% ~ (p=0.151 n=5+5)
wuffs_deflate_decode_10k/gcc7 276MB/s ± 1% 276MB/s ± 1% ~ (p=0.690 n=5+5)
wuffs_deflate_decode_100k_just_one_read/gcc7 325MB/s ± 0% 324MB/s ± 1% ~ (p=0.690 n=5+5)
wuffs_deflate_decode_100k_many_big_reads/gcc7 258MB/s ± 1% 258MB/s ± 0% ~ (p=0.690 n=5+5)
wuffs_gif_decode_1k_bw/clang5 430MB/s ± 1% 445MB/s ± 1% +3.58% (p=0.008 n=5+5)
wuffs_gif_decode_1k_color/clang5 186MB/s ± 1% 192MB/s ± 0% +3.17% (p=0.008 n=5+5)
wuffs_gif_decode_10k_bgra/clang5 729MB/s ± 1% 748MB/s ± 1% +2.53% (p=0.008 n=5+5)
wuffs_gif_decode_10k_indexed/clang5 196MB/s ± 1% 202MB/s ± 1% +2.71% (p=0.008 n=5+5)
wuffs_gif_decode_20k/clang5 242MB/s ± 2% 251MB/s ± 0% +3.54% (p=0.016 n=5+4)
wuffs_gif_decode_100k_artificial/clang5 539MB/s ± 1% 545MB/s ± 1% ~ (p=0.095 n=5+5)
wuffs_gif_decode_100k_realistic/clang5 221MB/s ± 0% 226MB/s ± 1% +2.32% (p=0.008 n=5+5)
wuffs_gif_decode_1000k/clang5 224MB/s ± 0% 230MB/s ± 1% +2.28% (p=0.008 n=5+5)
wuffs_gif_decode_anim_screencap/clang5 1.06GB/s ± 0% 1.07GB/s ± 1% +1.31% (p=0.008 n=5+5)
wuffs_gif_decode_1k_bw/gcc7 478MB/s ± 0% 467MB/s ± 0% -2.27% (p=0.008 n=5+5)
wuffs_gif_decode_1k_color/gcc7 194MB/s ± 0% 196MB/s ± 0% +0.94% (p=0.008 n=5+5)
wuffs_gif_decode_10k_bgra/gcc7 635MB/s ± 1% 639MB/s ± 1% ~ (p=0.056 n=5+5)
wuffs_gif_decode_10k_indexed/gcc7 200MB/s ± 1% 202MB/s ± 1% ~ (p=0.095 n=5+5)
wuffs_gif_decode_20k/gcc7 242MB/s ± 1% 243MB/s ± 1% ~ (p=0.421 n=5+5)
wuffs_gif_decode_100k_artificial/gcc7 509MB/s ± 0% 510MB/s ± 0% ~ (p=1.000 n=5+5)
wuffs_gif_decode_100k_realistic/gcc7 210MB/s ± 0% 211MB/s ± 0% +0.50% (p=0.016 n=5+5)
wuffs_gif_decode_1000k/gcc7 214MB/s ± 0% 214MB/s ± 0% ~ (p=0.151 n=5+5)
wuffs_gif_decode_anim_screencap/gcc7 1.03GB/s ± 0% 1.03GB/s ± 1% ~ (p=0.421 n=5+5)
wuffs_lzw_decode_20k/clang5 263MB/s ± 1% 267MB/s ± 2% ~ (p=0.151 n=5+5)
wuffs_lzw_decode_100k/clang5 427MB/s ± 1% 451MB/s ± 2% +5.61% (p=0.008 n=5+5)
wuffs_lzw_decode_20k/gcc7 270MB/s ± 0% 268MB/s ± 0% -0.66% (p=0.008 n=5+5)
wuffs_lzw_decode_100k/gcc7 481MB/s ± 1% 483MB/s ± 1% ~ (p=0.222 n=5+5)
diff --git a/cmd/wuffs-c/internal/cgen/cgen.go b/cmd/wuffs-c/internal/cgen/cgen.go
index 3c9249c..f4bffc0 100644
--- a/cmd/wuffs-c/internal/cgen/cgen.go
+++ b/cmd/wuffs-c/internal/cgen/cgen.go
@@ -998,22 +998,26 @@
b.writes("if (!self) { return wuffs_base__error__bad_receiver; }\n")
b.writes("if (sizeof(*self) != sizeof_star_self) {\n")
- b.writes("return wuffs_base__error__bad_sizeof_receiver;\n")
+ b.writes(" return wuffs_base__error__bad_sizeof_receiver;\n")
b.writes("}\n")
b.writes("if (((wuffs_version >> 32) != WUFFS_VERSION_MAJOR) || " +
"(((wuffs_version >> 16) & 0xFFFF) > WUFFS_VERSION_MINOR)) {\n")
- b.writes("return wuffs_base__error__bad_wuffs_version;\n")
+ b.writes(" return wuffs_base__error__bad_wuffs_version;\n")
b.writes("}\n\n")
b.writes("if ((initialize_flags & WUFFS_INITIALIZE__ALREADY_ZEROED) != 0) {\n")
- b.writes("if (self->private_impl.magic != 0) {\n")
- b.writes("return wuffs_base__error__initialize_falsely_claimed_already_zeroed;\n")
- b.writes("}\n")
- b.writes("} else if ((initialize_flags & WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED) != 0) {\n")
- b.writes("memset(&(self->private_impl), 0, sizeof(self->private_impl));\n")
+ b.writes(" if (self->private_impl.magic != 0) {\n")
+ b.writes(" return wuffs_base__error__initialize_falsely_claimed_already_zeroed;\n")
+ b.writes(" }\n")
b.writes("} else {\n")
- b.writes("memset(self, 0, sizeof(*self));\n")
- b.writes("initialize_flags |= WUFFS_INITIALIZE__ALREADY_ZEROED;\n")
+ b.writes(" void* p = &(self->private_impl);\n")
+ b.writes(" size_t n = sizeof(self->private_impl);\n")
+ b.writes(" if ((initialize_flags & WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED) == 0) {\n")
+ b.writes(" p = self;\n")
+ b.writes(" n = sizeof(*self);\n")
+ b.writes(" initialize_flags |= WUFFS_INITIALIZE__ALREADY_ZEROED;\n")
+ b.writes(" }\n")
+ b.writes(" memset(p, 0, n);\n")
b.writes("}\n\n")
// Call any ctors on sub-structs.
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index a4ebb28..1a60316 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -4998,12 +4998,16 @@
if (self->private_impl.magic != 0) {
return wuffs_base__error__initialize_falsely_claimed_already_zeroed;
}
- } else if ((initialize_flags &
- WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED) != 0) {
- memset(&(self->private_impl), 0, sizeof(self->private_impl));
} else {
- memset(self, 0, sizeof(*self));
- initialize_flags |= WUFFS_INITIALIZE__ALREADY_ZEROED;
+ void* p = &(self->private_impl);
+ size_t n = sizeof(self->private_impl);
+ if ((initialize_flags &
+ WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED) == 0) {
+ p = self;
+ n = sizeof(*self);
+ initialize_flags |= WUFFS_INITIALIZE__ALREADY_ZEROED;
+ }
+ memset(p, 0, n);
}
self->private_impl.magic = WUFFS_BASE__MAGIC;
@@ -6005,12 +6009,16 @@
if (self->private_impl.magic != 0) {
return wuffs_base__error__initialize_falsely_claimed_already_zeroed;
}
- } else if ((initialize_flags &
- WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED) != 0) {
- memset(&(self->private_impl), 0, sizeof(self->private_impl));
} else {
- memset(self, 0, sizeof(*self));
- initialize_flags |= WUFFS_INITIALIZE__ALREADY_ZEROED;
+ void* p = &(self->private_impl);
+ size_t n = sizeof(self->private_impl);
+ if ((initialize_flags &
+ WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED) == 0) {
+ p = self;
+ n = sizeof(*self);
+ initialize_flags |= WUFFS_INITIALIZE__ALREADY_ZEROED;
+ }
+ memset(p, 0, n);
}
self->private_impl.magic = WUFFS_BASE__MAGIC;
@@ -6291,12 +6299,16 @@
if (self->private_impl.magic != 0) {
return wuffs_base__error__initialize_falsely_claimed_already_zeroed;
}
- } else if ((initialize_flags &
- WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED) != 0) {
- memset(&(self->private_impl), 0, sizeof(self->private_impl));
} else {
- memset(self, 0, sizeof(*self));
- initialize_flags |= WUFFS_INITIALIZE__ALREADY_ZEROED;
+ void* p = &(self->private_impl);
+ size_t n = sizeof(self->private_impl);
+ if ((initialize_flags &
+ WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED) == 0) {
+ p = self;
+ n = sizeof(*self);
+ initialize_flags |= WUFFS_INITIALIZE__ALREADY_ZEROED;
+ }
+ memset(p, 0, n);
}
self->private_impl.magic = WUFFS_BASE__MAGIC;
@@ -8094,12 +8106,16 @@
if (self->private_impl.magic != 0) {
return wuffs_base__error__initialize_falsely_claimed_already_zeroed;
}
- } else if ((initialize_flags &
- WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED) != 0) {
- memset(&(self->private_impl), 0, sizeof(self->private_impl));
} else {
- memset(self, 0, sizeof(*self));
- initialize_flags |= WUFFS_INITIALIZE__ALREADY_ZEROED;
+ void* p = &(self->private_impl);
+ size_t n = sizeof(self->private_impl);
+ if ((initialize_flags &
+ WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED) == 0) {
+ p = self;
+ n = sizeof(*self);
+ initialize_flags |= WUFFS_INITIALIZE__ALREADY_ZEROED;
+ }
+ memset(p, 0, n);
}
self->private_impl.magic = WUFFS_BASE__MAGIC;
@@ -8678,12 +8694,16 @@
if (self->private_impl.magic != 0) {
return wuffs_base__error__initialize_falsely_claimed_already_zeroed;
}
- } else if ((initialize_flags &
- WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED) != 0) {
- memset(&(self->private_impl), 0, sizeof(self->private_impl));
} else {
- memset(self, 0, sizeof(*self));
- initialize_flags |= WUFFS_INITIALIZE__ALREADY_ZEROED;
+ void* p = &(self->private_impl);
+ size_t n = sizeof(self->private_impl);
+ if ((initialize_flags &
+ WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED) == 0) {
+ p = self;
+ n = sizeof(*self);
+ initialize_flags |= WUFFS_INITIALIZE__ALREADY_ZEROED;
+ }
+ memset(p, 0, n);
}
{
@@ -10839,12 +10859,16 @@
if (self->private_impl.magic != 0) {
return wuffs_base__error__initialize_falsely_claimed_already_zeroed;
}
- } else if ((initialize_flags &
- WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED) != 0) {
- memset(&(self->private_impl), 0, sizeof(self->private_impl));
} else {
- memset(self, 0, sizeof(*self));
- initialize_flags |= WUFFS_INITIALIZE__ALREADY_ZEROED;
+ void* p = &(self->private_impl);
+ size_t n = sizeof(self->private_impl);
+ if ((initialize_flags &
+ WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED) == 0) {
+ p = self;
+ n = sizeof(*self);
+ initialize_flags |= WUFFS_INITIALIZE__ALREADY_ZEROED;
+ }
+ memset(p, 0, n);
}
{
@@ -11320,12 +11344,16 @@
if (self->private_impl.magic != 0) {
return wuffs_base__error__initialize_falsely_claimed_already_zeroed;
}
- } else if ((initialize_flags &
- WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED) != 0) {
- memset(&(self->private_impl), 0, sizeof(self->private_impl));
} else {
- memset(self, 0, sizeof(*self));
- initialize_flags |= WUFFS_INITIALIZE__ALREADY_ZEROED;
+ void* p = &(self->private_impl);
+ size_t n = sizeof(self->private_impl);
+ if ((initialize_flags &
+ WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED) == 0) {
+ p = self;
+ n = sizeof(*self);
+ initialize_flags |= WUFFS_INITIALIZE__ALREADY_ZEROED;
+ }
+ memset(p, 0, n);
}
{