Add mark/since/count_since methods

name                                             old speed     new speed     delta

wuffs_deflate_decode_1k_full_init/clang5         149MB/s ± 0%  151MB/s ± 1%  +1.46%  (p=0.000 n=9+7)
wuffs_deflate_decode_1k_part_init/clang5         183MB/s ± 1%  188MB/s ± 2%  +2.40%  (p=0.000 n=10+10)
wuffs_deflate_decode_10k_full_init/clang5        236MB/s ± 0%  242MB/s ± 1%  +2.56%  (p=0.000 n=10+10)
wuffs_deflate_decode_10k_part_init/clang5        243MB/s ± 0%  249MB/s ± 0%  +2.36%  (p=0.000 n=9+9)
wuffs_deflate_decode_100k_just_one_read/clang5   279MB/s ± 0%  288MB/s ± 1%  +3.28%  (p=0.000 n=9+8)
wuffs_deflate_decode_100k_many_big_reads/clang5  232MB/s ± 0%  240MB/s ± 1%  +3.76%  (p=0.000 n=9+9)

wuffs_deflate_decode_1k_full_init/gcc7           152MB/s ± 1%  152MB/s ± 1%    ~     (p=0.661 n=10+9)
wuffs_deflate_decode_1k_part_init/gcc7           188MB/s ± 0%  187MB/s ± 1%  -0.58%  (p=0.013 n=10+9)
wuffs_deflate_decode_10k_full_init/gcc7          260MB/s ± 0%  260MB/s ± 0%    ~     (p=0.393 n=10+10)
wuffs_deflate_decode_10k_part_init/gcc7          268MB/s ± 0%  268MB/s ± 0%    ~     (p=0.971 n=10+10)
wuffs_deflate_decode_100k_just_one_read/gcc7     312MB/s ± 1%  312MB/s ± 0%    ~     (p=0.720 n=10+9)
wuffs_deflate_decode_100k_many_big_reads/gcc7    247MB/s ± 1%  247MB/s ± 0%  -0.26%  (p=0.022 n=10+9)
diff --git a/internal/cgen/base/io-private.h b/internal/cgen/base/io-private.h
index 088f3ce..4802c91 100644
--- a/internal/cgen/base/io-private.h
+++ b/internal/cgen/base/io-private.h
@@ -22,6 +22,22 @@
          (buf.data.len >= buf.meta.wi) && (buf.meta.wi >= buf.meta.ri);
 }
 
+static inline uint64_t  //
+wuffs_base__io__count_since(uint64_t mark, uint64_t index) {
+  if (index >= mark) {
+    return index - mark;
+  }
+  return 0;
+}
+
+static inline wuffs_base__slice_u8  //
+wuffs_base__io__since(uint64_t mark, uint64_t index, uint8_t* ptr) {
+  if (index >= mark) {
+    return wuffs_base__make_slice_u8(ptr + mark, index - mark);
+  }
+  return wuffs_base__make_slice_u8(NULL, 0);
+}
+
 // TODO: wuffs_base__io_reader__is_eof is no longer used by Wuffs per se, but
 // it might be handy to programs that use Wuffs. Either delete it, or promote
 // it to the public API.
diff --git a/internal/cgen/builtin.go b/internal/cgen/builtin.go
index 636637e..40848b7 100644
--- a/internal/cgen/builtin.go
+++ b/internal/cgen/builtin.go
@@ -144,6 +144,18 @@
 		b.writes("(iop_a_src > io0_a_src)")
 		return nil
 
+	case t.IDCountSince:
+		b.printf("(a_src.private_impl.buf ? wuffs_base__io__count_since(")
+		if err := g.writeExpr(b, args[0].AsArg().Value(), depth); err != nil {
+			return err
+		}
+		b.printf(", iop_a_src - a_src.private_impl.buf->data.ptr) : 0)")
+		return nil
+
+	case t.IDMark:
+		b.printf("(a_src.private_impl.buf ? ((uint64_t)(iop_a_src - a_src.private_impl.buf->data.ptr)) : 0)")
+		return nil
+
 	case t.IDPosition:
 		b.printf("(a_src.private_impl.buf ? wuffs_base__u64__sat_add(" +
 			"a_src.private_impl.buf->meta.pos, ((uint64_t)(iop_a_src - a_src.private_impl.buf->data.ptr))) : 0)")
@@ -158,6 +170,14 @@
 		b.printf("wuffs_base__io_reader__set_mark(&%ssrc, iop_a_src)", aPrefix)
 		return nil
 
+	case t.IDSince:
+		b.printf("(a_src.private_impl.buf ? wuffs_base__io__since(")
+		if err := g.writeExpr(b, args[0].AsArg().Value(), depth); err != nil {
+			return err
+		}
+		b.printf(", iop_a_src - a_src.private_impl.buf->data.ptr, a_src.private_impl.buf->data.ptr) : wuffs_base__make_slice_u8(NULL, 0))")
+		return nil
+
 	case t.IDSinceMark, t.IDSinceMarkLength:
 		prefix, name := aPrefix, "src"
 		if recv.Operator() == 0 {
@@ -245,6 +265,18 @@
 		b.printf("wuffs_base__io_writer__copy_n_from_slice(&iop_a_dst, io1_a_dst,")
 		return g.writeArgs(b, args, depth)
 
+	case t.IDCountSince:
+		b.printf("(a_dst.private_impl.buf ? wuffs_base__io__count_since(")
+		if err := g.writeExpr(b, args[0].AsArg().Value(), depth); err != nil {
+			return err
+		}
+		b.printf(", iop_a_dst - a_dst.private_impl.buf->data.ptr) : 0)")
+		return nil
+
+	case t.IDMark:
+		b.printf("(a_dst.private_impl.buf ? ((uint64_t)(iop_a_dst - a_dst.private_impl.buf->data.ptr)) : 0)")
+		return nil
+
 	case t.IDPosition:
 		b.printf("(a_dst.private_impl.buf ? wuffs_base__u64__sat_add(" +
 			"a_dst.private_impl.buf->meta.pos, iop_a_dst - a_dst.private_impl.buf->data.ptr) : 0)")
@@ -258,6 +290,14 @@
 		b.printf("wuffs_base__io_writer__set_mark(&%sdst, iop_a_dst)", aPrefix)
 		return nil
 
+	case t.IDSince:
+		b.printf("(a_dst.private_impl.buf ? wuffs_base__io__since(")
+		if err := g.writeExpr(b, args[0].AsArg().Value(), depth); err != nil {
+			return err
+		}
+		b.printf(", iop_a_dst - a_dst.private_impl.buf->data.ptr, a_dst.private_impl.buf->data.ptr) : wuffs_base__make_slice_u8(NULL, 0))")
+		return nil
+
 	case t.IDSinceMark, t.IDSinceMarkLength:
 		prefix, name := aPrefix, "dst"
 		if recv.Operator() == 0 {
diff --git a/internal/cgen/data.go b/internal/cgen/data.go
index 8845f95..621cbd6 100644
--- a/internal/cgen/data.go
+++ b/internal/cgen/data.go
@@ -163,15 +163,15 @@
 	""
 
 const baseIOPrivateH = "" +
-	"// ---------------- I/O\n\nstatic inline bool  //\nwuffs_base__io_buffer__is_valid(wuffs_base__io_buffer buf) {\n  return (buf.data.ptr || (buf.data.len == 0)) &&\n         (buf.data.len >= buf.meta.wi) && (buf.meta.wi >= buf.meta.ri);\n}\n\n// TODO: wuffs_base__io_reader__is_eof is no longer used by Wuffs per se, but\n// it might be handy to programs that use Wuffs. Either delete it, or promote\n// it to the public API.\n//\n// If making this function public (i.e. moving it to base-header.h), it also\n// needs to allow NULL (i.e. implicit, callee-calculated) mark/limit.\n\nstatic inline bool  //\nwuffs_base__io_reader__is_eof(wuffs_base__io_reader o) {\n  wuffs_base__io_buffer* buf = o.private_impl.buf;\n  return buf && buf->meta.closed &&\n         (buf->data.ptr + buf->meta.wi == o.private_impl.limit);\n}\n\nstatic inline bool  //\nwuffs_base__io_reader__is_valid(wuffs_base__io_reader o) {\n  wuffs_base__io_buffer* buf = o.private_impl.buf;\n  // Note: if making this function public (i.e. moving it to base-header.h), it\n  // also " +
-	"needs to allow NULL (i.e. implicit, callee-calculated) mark/limit.\n  return buf ? ((buf->data.ptr <= o.private_impl.mark) &&\n                (o.private_impl.mark <= o.private_impl.limit) &&\n                (o.private_impl.limit <= buf->data.ptr + buf->data.len))\n             : ((o.private_impl.mark == NULL) &&\n                (o.private_impl.limit == NULL));\n}\n\nstatic inline bool  //\nwuffs_base__io_writer__is_valid(wuffs_base__io_writer o) {\n  wuffs_base__io_buffer* buf = o.private_impl.buf;\n  // Note: if making this function public (i.e. moving it to base-header.h), it\n  // also needs to allow NULL (i.e. implicit, callee-calculated) mark/limit.\n  return buf ? ((buf->data.ptr <= o.private_impl.mark) &&\n                (o.private_impl.mark <= o.private_impl.limit) &&\n                (o.private_impl.limit <= buf->data.ptr + buf->data.len))\n             : ((o.private_impl.mark == NULL) &&\n                (o.private_impl.limit == NULL));\n}\n\nstatic inline uint32_t  //\nwuffs_base__io_writer__copy_n_from_history(uin" +
-	"t8_t** ptr_iop_w,\n                                           uint8_t* io0_w,\n                                           uint8_t* io1_w,\n                                           uint32_t length,\n                                           uint32_t distance) {\n  if (!distance) {\n    return 0;\n  }\n  uint8_t* p = *ptr_iop_w;\n  if ((size_t)(p - io0_w) < (size_t)(distance)) {\n    return 0;\n  }\n  uint8_t* q = p - distance;\n  size_t n = (size_t)(io1_w - p);\n  if ((size_t)(length) > n) {\n    length = (uint32_t)(n);\n  } else {\n    n = (size_t)(length);\n  }\n  // TODO: unrolling by 3 seems best for the std/deflate benchmarks, but that\n  // is mostly because 3 is the minimum length for the deflate format. This\n  // function implementation shouldn't overfit to that one format. Perhaps the\n  // copy_n_from_history Wuffs method should also take an unroll hint argument,\n  // and the cgen can look if that argument is the constant expression '3'.\n  //\n  // See also wuffs_base__io_writer__copy_n_from_history_fast below.\n  //\n  " +
-	"// Alternatively, or additionally, have a sloppy_copy_n_from_history method\n  // that copies 8 bytes at a time, possibly writing more than length bytes?\n  for (; n >= 3; n -= 3) {\n    *p++ = *q++;\n    *p++ = *q++;\n    *p++ = *q++;\n  }\n  for (; n; n--) {\n    *p++ = *q++;\n  }\n  *ptr_iop_w = p;\n  return length;\n}\n\n// wuffs_base__io_writer__copy_n_from_history_fast is like the\n// wuffs_base__io_writer__copy_n_from_history function above, but has stronger\n// pre-conditions. The caller needs to prove that:\n//  - distance >  0\n//  - distance <= (*ptr_iop_w - io0_w)\n//  - length   <= (io1_w      - *ptr_iop_w)\nstatic inline uint32_t  //\nwuffs_base__io_writer__copy_n_from_history_fast(uint8_t** ptr_iop_w,\n                                                uint8_t* io0_w,\n                                                uint8_t* io1_w,\n                                                uint32_t length,\n                                                uint32_t distance) {\n  uint8_t* p = *ptr_iop_w;\n  uint8_t* q = p - distance;\n " +
-	" uint32_t n = length;\n  for (; n >= 3; n -= 3) {\n    *p++ = *q++;\n    *p++ = *q++;\n    *p++ = *q++;\n  }\n  for (; n; n--) {\n    *p++ = *q++;\n  }\n  *ptr_iop_w = p;\n  return length;\n}\n\nstatic inline uint32_t  //\nwuffs_base__io_writer__copy_n_from_reader(uint8_t** ptr_iop_w,\n                                          uint8_t* io1_w,\n                                          uint32_t length,\n                                          uint8_t** ptr_iop_r,\n                                          uint8_t* io1_r) {\n  uint8_t* iop_w = *ptr_iop_w;\n  size_t n = length;\n  if (n > ((size_t)(io1_w - iop_w))) {\n    n = (size_t)(io1_w - iop_w);\n  }\n  uint8_t* iop_r = *ptr_iop_r;\n  if (n > ((size_t)(io1_r - iop_r))) {\n    n = (size_t)(io1_r - iop_r);\n  }\n  if (n > 0) {\n    memmove(iop_w, iop_r, n);\n    *ptr_iop_w += n;\n    *ptr_iop_r += n;\n  }\n  return (uint32_t)(n);\n}\n\nstatic inline uint64_t  //\nwuffs_base__io_writer__copy_from_slice(uint8_t** ptr_iop_w,\n                                       uint8_t* io1_w,\n                 " +
-	"                      wuffs_base__slice_u8 src) {\n  uint8_t* iop_w = *ptr_iop_w;\n  size_t n = src.len;\n  if (n > ((size_t)(io1_w - iop_w))) {\n    n = (size_t)(io1_w - iop_w);\n  }\n  if (n > 0) {\n    memmove(iop_w, src.ptr, n);\n    *ptr_iop_w += n;\n  }\n  return (uint64_t)(n);\n}\n\nstatic inline uint32_t  //\nwuffs_base__io_writer__copy_n_from_slice(uint8_t** ptr_iop_w,\n                                         uint8_t* io1_w,\n                                         uint32_t length,\n                                         wuffs_base__slice_u8 src) {\n  uint8_t* iop_w = *ptr_iop_w;\n  size_t n = src.len;\n  if (n > length) {\n    n = length;\n  }\n  if (n > ((size_t)(io1_w - iop_w))) {\n    n = (size_t)(io1_w - iop_w);\n  }\n  if (n > 0) {\n    memmove(iop_w, src.ptr, n);\n    *ptr_iop_w += n;\n  }\n  return (uint32_t)(n);\n}\n\nstatic inline wuffs_base__empty_struct  //\nwuffs_base__io_reader__set(wuffs_base__io_reader* o,\n                           wuffs_base__io_buffer* b,\n                           uint8_t** ptr_iop_r,\n        " +
-	"                   uint8_t** ptr_io1_r,\n                           wuffs_base__slice_u8 data) {\n  b->data = data;\n  b->meta.wi = data.len;\n  b->meta.ri = 0;\n  b->meta.pos = 0;\n  b->meta.closed = false;\n\n  o->private_impl.buf = b;\n  o->private_impl.mark = data.ptr;\n  o->private_impl.limit = data.ptr + data.len;\n  *ptr_iop_r = data.ptr;\n  *ptr_io1_r = data.ptr + data.len;\n\n  wuffs_base__empty_struct ret;\n  ret.private_impl = 0;\n  return ret;\n}\n\nstatic inline wuffs_base__empty_struct  //\nwuffs_base__io_reader__set_limit(wuffs_base__io_reader* o,\n                                 uint8_t* iop_r,\n                                 uint64_t limit) {\n  if (o && (((size_t)(o->private_impl.limit - iop_r)) > limit)) {\n    o->private_impl.limit = iop_r + limit;\n  }\n\n  wuffs_base__empty_struct ret;\n  ret.private_impl = 0;\n  return ret;\n}\n\nstatic inline wuffs_base__empty_struct  //\nwuffs_base__io_reader__set_mark(wuffs_base__io_reader* o, uint8_t* mark) {\n  o->private_impl.mark = mark;\n\n  wuffs_base__empty_struct ret;\n  ret." +
-	"private_impl = 0;\n  return ret;\n}\n\nstatic inline wuffs_base__slice_u8  //\nwuffs_base__io_reader__take(uint8_t** ptr_iop_r, uint8_t* io1_r, uint64_t n) {\n  if (n <= ((size_t)(io1_r - *ptr_iop_r))) {\n    uint8_t* p = *ptr_iop_r;\n    *ptr_iop_r += n;\n    return wuffs_base__make_slice_u8(p, n);\n  }\n  return wuffs_base__make_slice_u8(NULL, 0);\n}\n\nstatic inline wuffs_base__empty_struct  //\nwuffs_base__io_writer__set(wuffs_base__io_writer* o,\n                           wuffs_base__io_buffer* b,\n                           uint8_t** ptr_iop_w,\n                           uint8_t** ptr_io1_w,\n                           wuffs_base__slice_u8 data) {\n  b->data = data;\n  b->meta.wi = 0;\n  b->meta.ri = 0;\n  b->meta.pos = 0;\n  b->meta.closed = false;\n\n  o->private_impl.buf = b;\n  o->private_impl.mark = data.ptr;\n  o->private_impl.limit = data.ptr + data.len;\n  *ptr_iop_w = data.ptr;\n  *ptr_io1_w = data.ptr + data.len;\n\n  wuffs_base__empty_struct ret;\n  ret.private_impl = 0;\n  return ret;\n}\n\nstatic inline wuffs_base__empty_str" +
-	"uct  //\nwuffs_base__io_writer__set_mark(wuffs_base__io_writer* o, uint8_t* mark) {\n  o->private_impl.mark = mark;\n\n  wuffs_base__empty_struct ret;\n  ret.private_impl = 0;\n  return ret;\n}\n\n" +
+	"// ---------------- I/O\n\nstatic inline bool  //\nwuffs_base__io_buffer__is_valid(wuffs_base__io_buffer buf) {\n  return (buf.data.ptr || (buf.data.len == 0)) &&\n         (buf.data.len >= buf.meta.wi) && (buf.meta.wi >= buf.meta.ri);\n}\n\nstatic inline uint64_t  //\nwuffs_base__io__count_since(uint64_t mark, uint64_t index) {\n  if (index >= mark) {\n    return index - mark;\n  }\n  return 0;\n}\n\nstatic inline wuffs_base__slice_u8  //\nwuffs_base__io__since(uint64_t mark, uint64_t index, uint8_t* ptr) {\n  if (index >= mark) {\n    return wuffs_base__make_slice_u8(ptr + mark, index - mark);\n  }\n  return wuffs_base__make_slice_u8(NULL, 0);\n}\n\n// TODO: wuffs_base__io_reader__is_eof is no longer used by Wuffs per se, but\n// it might be handy to programs that use Wuffs. Either delete it, or promote\n// it to the public API.\n//\n// If making this function public (i.e. moving it to base-header.h), it also\n// needs to allow NULL (i.e. implicit, callee-calculated) mark/limit.\n\nstatic inline bool  //\nwuffs_base__io_reader__is_eof(wuf" +
+	"fs_base__io_reader o) {\n  wuffs_base__io_buffer* buf = o.private_impl.buf;\n  return buf && buf->meta.closed &&\n         (buf->data.ptr + buf->meta.wi == o.private_impl.limit);\n}\n\nstatic inline bool  //\nwuffs_base__io_reader__is_valid(wuffs_base__io_reader o) {\n  wuffs_base__io_buffer* buf = o.private_impl.buf;\n  // Note: if making this function public (i.e. moving it to base-header.h), it\n  // also needs to allow NULL (i.e. implicit, callee-calculated) mark/limit.\n  return buf ? ((buf->data.ptr <= o.private_impl.mark) &&\n                (o.private_impl.mark <= o.private_impl.limit) &&\n                (o.private_impl.limit <= buf->data.ptr + buf->data.len))\n             : ((o.private_impl.mark == NULL) &&\n                (o.private_impl.limit == NULL));\n}\n\nstatic inline bool  //\nwuffs_base__io_writer__is_valid(wuffs_base__io_writer o) {\n  wuffs_base__io_buffer* buf = o.private_impl.buf;\n  // Note: if making this function public (i.e. moving it to base-header.h), it\n  // also needs to allow NULL (i.e. implicit," +
+	" callee-calculated) mark/limit.\n  return buf ? ((buf->data.ptr <= o.private_impl.mark) &&\n                (o.private_impl.mark <= o.private_impl.limit) &&\n                (o.private_impl.limit <= buf->data.ptr + buf->data.len))\n             : ((o.private_impl.mark == NULL) &&\n                (o.private_impl.limit == NULL));\n}\n\nstatic inline uint32_t  //\nwuffs_base__io_writer__copy_n_from_history(uint8_t** ptr_iop_w,\n                                           uint8_t* io0_w,\n                                           uint8_t* io1_w,\n                                           uint32_t length,\n                                           uint32_t distance) {\n  if (!distance) {\n    return 0;\n  }\n  uint8_t* p = *ptr_iop_w;\n  if ((size_t)(p - io0_w) < (size_t)(distance)) {\n    return 0;\n  }\n  uint8_t* q = p - distance;\n  size_t n = (size_t)(io1_w - p);\n  if ((size_t)(length) > n) {\n    length = (uint32_t)(n);\n  } else {\n    n = (size_t)(length);\n  }\n  // TODO: unrolling by 3 seems best for the std/deflate benchmarks," +
+	" but that\n  // is mostly because 3 is the minimum length for the deflate format. This\n  // function implementation shouldn't overfit to that one format. Perhaps the\n  // copy_n_from_history Wuffs method should also take an unroll hint argument,\n  // and the cgen can look if that argument is the constant expression '3'.\n  //\n  // See also wuffs_base__io_writer__copy_n_from_history_fast below.\n  //\n  // Alternatively, or additionally, have a sloppy_copy_n_from_history method\n  // that copies 8 bytes at a time, possibly writing more than length bytes?\n  for (; n >= 3; n -= 3) {\n    *p++ = *q++;\n    *p++ = *q++;\n    *p++ = *q++;\n  }\n  for (; n; n--) {\n    *p++ = *q++;\n  }\n  *ptr_iop_w = p;\n  return length;\n}\n\n// wuffs_base__io_writer__copy_n_from_history_fast is like the\n// wuffs_base__io_writer__copy_n_from_history function above, but has stronger\n// pre-conditions. The caller needs to prove that:\n//  - distance >  0\n//  - distance <= (*ptr_iop_w - io0_w)\n//  - length   <= (io1_w      - *ptr_iop_w)\nstatic inline" +
+	" uint32_t  //\nwuffs_base__io_writer__copy_n_from_history_fast(uint8_t** ptr_iop_w,\n                                                uint8_t* io0_w,\n                                                uint8_t* io1_w,\n                                                uint32_t length,\n                                                uint32_t distance) {\n  uint8_t* p = *ptr_iop_w;\n  uint8_t* q = p - distance;\n  uint32_t n = length;\n  for (; n >= 3; n -= 3) {\n    *p++ = *q++;\n    *p++ = *q++;\n    *p++ = *q++;\n  }\n  for (; n; n--) {\n    *p++ = *q++;\n  }\n  *ptr_iop_w = p;\n  return length;\n}\n\nstatic inline uint32_t  //\nwuffs_base__io_writer__copy_n_from_reader(uint8_t** ptr_iop_w,\n                                          uint8_t* io1_w,\n                                          uint32_t length,\n                                          uint8_t** ptr_iop_r,\n                                          uint8_t* io1_r) {\n  uint8_t* iop_w = *ptr_iop_w;\n  size_t n = length;\n  if (n > ((size_t)(io1_w - iop_w))) {\n    n = (size_t)(io" +
+	"1_w - iop_w);\n  }\n  uint8_t* iop_r = *ptr_iop_r;\n  if (n > ((size_t)(io1_r - iop_r))) {\n    n = (size_t)(io1_r - iop_r);\n  }\n  if (n > 0) {\n    memmove(iop_w, iop_r, n);\n    *ptr_iop_w += n;\n    *ptr_iop_r += n;\n  }\n  return (uint32_t)(n);\n}\n\nstatic inline uint64_t  //\nwuffs_base__io_writer__copy_from_slice(uint8_t** ptr_iop_w,\n                                       uint8_t* io1_w,\n                                       wuffs_base__slice_u8 src) {\n  uint8_t* iop_w = *ptr_iop_w;\n  size_t n = src.len;\n  if (n > ((size_t)(io1_w - iop_w))) {\n    n = (size_t)(io1_w - iop_w);\n  }\n  if (n > 0) {\n    memmove(iop_w, src.ptr, n);\n    *ptr_iop_w += n;\n  }\n  return (uint64_t)(n);\n}\n\nstatic inline uint32_t  //\nwuffs_base__io_writer__copy_n_from_slice(uint8_t** ptr_iop_w,\n                                         uint8_t* io1_w,\n                                         uint32_t length,\n                                         wuffs_base__slice_u8 src) {\n  uint8_t* iop_w = *ptr_iop_w;\n  size_t n = src.len;\n  if (n > length) " +
+	"{\n    n = length;\n  }\n  if (n > ((size_t)(io1_w - iop_w))) {\n    n = (size_t)(io1_w - iop_w);\n  }\n  if (n > 0) {\n    memmove(iop_w, src.ptr, n);\n    *ptr_iop_w += n;\n  }\n  return (uint32_t)(n);\n}\n\nstatic inline wuffs_base__empty_struct  //\nwuffs_base__io_reader__set(wuffs_base__io_reader* o,\n                           wuffs_base__io_buffer* b,\n                           uint8_t** ptr_iop_r,\n                           uint8_t** ptr_io1_r,\n                           wuffs_base__slice_u8 data) {\n  b->data = data;\n  b->meta.wi = data.len;\n  b->meta.ri = 0;\n  b->meta.pos = 0;\n  b->meta.closed = false;\n\n  o->private_impl.buf = b;\n  o->private_impl.mark = data.ptr;\n  o->private_impl.limit = data.ptr + data.len;\n  *ptr_iop_r = data.ptr;\n  *ptr_io1_r = data.ptr + data.len;\n\n  wuffs_base__empty_struct ret;\n  ret.private_impl = 0;\n  return ret;\n}\n\nstatic inline wuffs_base__empty_struct  //\nwuffs_base__io_reader__set_limit(wuffs_base__io_reader* o,\n                                 uint8_t* iop_r,\n                        " +
+	"         uint64_t limit) {\n  if (o && (((size_t)(o->private_impl.limit - iop_r)) > limit)) {\n    o->private_impl.limit = iop_r + limit;\n  }\n\n  wuffs_base__empty_struct ret;\n  ret.private_impl = 0;\n  return ret;\n}\n\nstatic inline wuffs_base__empty_struct  //\nwuffs_base__io_reader__set_mark(wuffs_base__io_reader* o, uint8_t* mark) {\n  o->private_impl.mark = mark;\n\n  wuffs_base__empty_struct ret;\n  ret.private_impl = 0;\n  return ret;\n}\n\nstatic inline wuffs_base__slice_u8  //\nwuffs_base__io_reader__take(uint8_t** ptr_iop_r, uint8_t* io1_r, uint64_t n) {\n  if (n <= ((size_t)(io1_r - *ptr_iop_r))) {\n    uint8_t* p = *ptr_iop_r;\n    *ptr_iop_r += n;\n    return wuffs_base__make_slice_u8(p, n);\n  }\n  return wuffs_base__make_slice_u8(NULL, 0);\n}\n\nstatic inline wuffs_base__empty_struct  //\nwuffs_base__io_writer__set(wuffs_base__io_writer* o,\n                           wuffs_base__io_buffer* b,\n                           uint8_t** ptr_iop_w,\n                           uint8_t** ptr_io1_w,\n                           wuffs_" +
+	"base__slice_u8 data) {\n  b->data = data;\n  b->meta.wi = 0;\n  b->meta.ri = 0;\n  b->meta.pos = 0;\n  b->meta.closed = false;\n\n  o->private_impl.buf = b;\n  o->private_impl.mark = data.ptr;\n  o->private_impl.limit = data.ptr + data.len;\n  *ptr_iop_w = data.ptr;\n  *ptr_io1_w = data.ptr + data.len;\n\n  wuffs_base__empty_struct ret;\n  ret.private_impl = 0;\n  return ret;\n}\n\nstatic inline wuffs_base__empty_struct  //\nwuffs_base__io_writer__set_mark(wuffs_base__io_writer* o, uint8_t* mark) {\n  o->private_impl.mark = mark;\n\n  wuffs_base__empty_struct ret;\n  ret.private_impl = 0;\n  return ret;\n}\n\n" +
 	"" +
 	"// ---------------- I/O (Utility)\n\n#define wuffs_base__utility__null_io_reader wuffs_base__null_io_reader\n#define wuffs_base__utility__null_io_writer wuffs_base__null_io_writer\n" +
 	""
diff --git a/internal/cgen/resume.go b/internal/cgen/resume.go
index 09bf05c..1ade456 100644
--- a/internal/cgen/resume.go
+++ b/internal/cgen/resume.go
@@ -236,8 +236,14 @@
 }
 
 func (h *resumabilityHelper) doAssign(r resumabilities, n *a.Assign, depth uint32) error {
-	if err := h.doExpr(r, n.RHS()); err != nil {
-		return err
+	if n.Operator() == t.IDEqQuestion {
+		if err := h.doExpr1(r, n.RHS(), subExprFilterNone, 0); err != nil {
+			return err
+		}
+	} else {
+		if err := h.doExpr(r, n.RHS()); err != nil {
+			return err
+		}
 	}
 
 	if n.LHS() == nil {
@@ -302,12 +308,14 @@
 				}
 				return nil
 			}
+			panic("TODO: unreachable; delete")
 			processOnlySubExprs = true
 
 		case subExprFilterAfterCoroutine:
 			if n.Effect().Coroutine() {
 				return nil
 			}
+			panic("TODO: unreachable; delete")
 		}
 	}
 
diff --git a/lang/builtin/builtin.go b/lang/builtin/builtin.go
index 045aae3..d96706c 100644
--- a/lang/builtin/builtin.go
+++ b/lang/builtin/builtin.go
@@ -217,10 +217,13 @@
 	"io_reader.peek_u64le() u64",
 
 	"io_reader.available() u64",
+	"io_reader.count_since(mark u64) u64",
+	"io_reader.mark() u64",
 	"io_reader.position() u64",
 	"io_reader.set!(s slice u8, closed bool)", // TODO: remove, as it's no longer used?
 	"io_reader.set_limit!(l u64)",             // TODO: remove, as it's no longer used?
 	"io_reader.set_mark!()",
+	"io_reader.since(mark u64) slice u8",
 	"io_reader.since_mark() slice u8",
 	"io_reader.take!(n u64) slice u8",
 
@@ -272,10 +275,13 @@
 	"io_writer.write_fast_u64le!(x u64)",
 
 	"io_writer.available() u64",
+	"io_writer.count_since(mark u64) u64",
+	"io_writer.mark() u64",
 	"io_writer.position() u64",
 	"io_writer.set!(s slice u8)",  // TODO: remove, as it's no longer used?
 	"io_writer.set_limit!(l u64)", // TODO: remove, as it's no longer used?
 	"io_writer.set_mark!()",
+	"io_writer.since(mark u64) slice u8",
 	"io_writer.since_mark() slice u8",
 
 	"io_writer.copy_from_slice!(s slice u8) u64",
diff --git a/lang/token/list.go b/lang/token/list.go
index 85cb5c5..f10c57a 100644
--- a/lang/token/list.go
+++ b/lang/token/list.go
@@ -460,14 +460,17 @@
 	IDDecodeFrameOptions = ID(0x158)
 
 	IDCanUndoByte     = ID(0x160)
-	IDPosition        = ID(0x161)
-	IDSetLimit        = ID(0x162)
-	IDSetMark         = ID(0x163)
-	IDSinceMark       = ID(0x164)
-	IDSinceMarkLength = ID(0x165)
-	IDSkip            = ID(0x166)
-	IDSkipFast        = ID(0x167)
-	IDTake            = ID(0x168)
+	IDCountSince      = ID(0x161)
+	IDMark            = ID(0x162)
+	IDPosition        = ID(0x163)
+	IDSetLimit        = ID(0x164)
+	IDSetMark         = ID(0x165)
+	IDSince           = ID(0x166)
+	IDSinceMark       = ID(0x167)
+	IDSinceMarkLength = ID(0x168)
+	IDSkip            = ID(0x169)
+	IDSkipFast        = ID(0x16A)
+	IDTake            = ID(0x16B)
 
 	IDCopyFromSlice        = ID(0x170)
 	IDCopyNFromHistory     = ID(0x171)
@@ -792,9 +795,12 @@
 	IDDecodeFrameOptions: "decode_frame_options",
 
 	IDCanUndoByte:     "can_undo_byte",
+	IDCountSince:      "count_since",
+	IDMark:            "mark",
 	IDPosition:        "position",
 	IDSetLimit:        "set_limit",
 	IDSetMark:         "set_mark",
+	IDSince:           "since",
 	IDSinceMark:       "since_mark",
 	IDSinceMarkLength: "since_mark_length",
 	IDSkip:            "skip",
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index 5d1eda0..b07b06f 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -4526,6 +4526,22 @@
          (buf.data.len >= buf.meta.wi) && (buf.meta.wi >= buf.meta.ri);
 }
 
+static inline uint64_t  //
+wuffs_base__io__count_since(uint64_t mark, uint64_t index) {
+  if (index >= mark) {
+    return index - mark;
+  }
+  return 0;
+}
+
+static inline wuffs_base__slice_u8  //
+wuffs_base__io__since(uint64_t mark, uint64_t index, uint8_t* ptr) {
+  if (index >= mark) {
+    return wuffs_base__make_slice_u8(ptr + mark, index - mark);
+  }
+  return wuffs_base__make_slice_u8(NULL, 0);
+}
+
 // TODO: wuffs_base__io_reader__is_eof is no longer used by Wuffs per se, but
 // it might be handy to programs that use Wuffs. Either delete it, or promote
 // it to the public API.
@@ -6506,6 +6522,7 @@
   self->private_impl.active_coroutine = 0;
   wuffs_base__status status = NULL;
 
+  uint64_t v_mark = 0;
   wuffs_base__status v_status = NULL;
   wuffs_base__slice_u8 v_written = {0};
   uint64_t v_n_copied = 0;
@@ -6536,7 +6553,9 @@
     WUFFS_BASE__COROUTINE_SUSPENSION_POINT_0;
 
     while (true) {
-      wuffs_base__io_writer__set_mark(&a_dst, iop_a_dst);
+      v_mark = (a_dst.private_impl.buf
+                    ? ((uint64_t)(iop_a_dst - a_dst.private_impl.buf->data.ptr))
+                    : 0);
       {
         if (a_dst.private_impl.buf) {
           a_dst.private_impl.buf->meta.wi =
@@ -6560,9 +6579,12 @@
         }
         goto ok;
       }
-      v_written = wuffs_base__make_slice_u8(
-          a_dst.private_impl.mark,
-          (size_t)(iop_a_dst - a_dst.private_impl.mark));
+      v_written =
+          (a_dst.private_impl.buf
+               ? wuffs_base__io__since(
+                     v_mark, iop_a_dst - a_dst.private_impl.buf->data.ptr,
+                     a_dst.private_impl.buf->data.ptr)
+               : wuffs_base__make_slice_u8(NULL, 0));
       if (((uint64_t)(v_written.len)) >= 32768) {
         v_written = wuffs_base__slice_u8__suffix(v_written, 32768);
         wuffs_base__slice_u8__copy_from_slice(
@@ -11548,6 +11570,7 @@
   uint8_t v_c = 0;
   uint8_t v_flags = 0;
   uint16_t v_xlen = 0;
+  uint64_t v_mark = 0;
   uint32_t v_checksum_got = 0;
   uint32_t v_decoded_length_got = 0;
   wuffs_base__status v_status = NULL;
@@ -11751,7 +11774,9 @@
       goto exit;
     }
     while (true) {
-      wuffs_base__io_writer__set_mark(&a_dst, iop_a_dst);
+      v_mark = (a_dst.private_impl.buf
+                    ? ((uint64_t)(iop_a_dst - a_dst.private_impl.buf->data.ptr))
+                    : 0);
       {
         if (a_dst.private_impl.buf) {
           a_dst.private_impl.buf->meta.wi =
@@ -11776,11 +11801,17 @@
       if (!self->private_impl.f_ignore_checksum) {
         v_checksum_got = wuffs_crc32__ieee_hasher__update(
             &self->private_data.f_checksum,
-            wuffs_base__make_slice_u8(
-                a_dst.private_impl.mark,
-                (size_t)(iop_a_dst - a_dst.private_impl.mark)));
+            (a_dst.private_impl.buf
+                 ? wuffs_base__io__since(
+                       v_mark, iop_a_dst - a_dst.private_impl.buf->data.ptr,
+                       a_dst.private_impl.buf->data.ptr)
+                 : wuffs_base__make_slice_u8(NULL, 0)));
         v_decoded_length_got += ((uint32_t)(
-            (((uint64_t)(iop_a_dst - a_dst.private_impl.mark)) & 4294967295)));
+            ((a_dst.private_impl.buf
+                  ? wuffs_base__io__count_since(
+                        v_mark, iop_a_dst - a_dst.private_impl.buf->data.ptr)
+                  : 0) &
+             4294967295)));
       }
       if (wuffs_base__status__is_ok(v_status)) {
         goto label_2_break;
@@ -12038,6 +12069,7 @@
   uint32_t v_checksum_got = 0;
   wuffs_base__status v_status = NULL;
   uint32_t v_checksum_want = 0;
+  uint64_t v_mark = 0;
 
   uint8_t* iop_a_dst = NULL;
   uint8_t* io0_a_dst WUFFS_BASE__POTENTIALLY_UNUSED = NULL;
@@ -12124,7 +12156,9 @@
       goto exit;
     }
     while (true) {
-      wuffs_base__io_writer__set_mark(&a_dst, iop_a_dst);
+      v_mark = (a_dst.private_impl.buf
+                    ? ((uint64_t)(iop_a_dst - a_dst.private_impl.buf->data.ptr))
+                    : 0);
       {
         if (a_dst.private_impl.buf) {
           a_dst.private_impl.buf->meta.wi =
@@ -12149,9 +12183,11 @@
       if (!self->private_impl.f_ignore_checksum) {
         v_checksum_got = wuffs_adler32__hasher__update(
             &self->private_data.f_checksum,
-            wuffs_base__make_slice_u8(
-                a_dst.private_impl.mark,
-                (size_t)(iop_a_dst - a_dst.private_impl.mark)));
+            (a_dst.private_impl.buf
+                 ? wuffs_base__io__since(
+                       v_mark, iop_a_dst - a_dst.private_impl.buf->data.ptr,
+                       a_dst.private_impl.buf->data.ptr)
+                 : wuffs_base__make_slice_u8(NULL, 0)));
       }
       if (wuffs_base__status__is_ok(v_status)) {
         goto label_0_break;
diff --git a/std/deflate/decode_deflate.wuffs b/std/deflate/decode_deflate.wuffs
index a91e9b7..89117ab 100644
--- a/std/deflate/decode_deflate.wuffs
+++ b/std/deflate/decode_deflate.wuffs
@@ -139,21 +139,22 @@
 }
 
 pub func decoder.decode_io_writer?(dst base.io_writer, src base.io_reader, workbuf slice base.u8) {
+	var mark         base.u64
 	var status       base.status
 	var written      slice base.u8
 	var n_copied     base.u64
 	var already_full base.u32[..0x8000]
 
 	while true {
-		args.dst.set_mark!()
+		mark = args.dst.mark()
 		status =? this.decode_blocks?(dst:args.dst, src:args.src)
 		if not status.is_suspension() {
 			return status
 		}
-		// TODO: should "since_mark" be "since_mark!", as the return value lets
-		// you modify the state of args.dst, so future mutations (via the
-		// slice) can change the veracity of any args.dst assertions?
-		written = args.dst.since_mark()
+		// TODO: should "since" be "since!", as the return value lets you
+		// modify the state of args.dst, so future mutations (via the slice)
+		// can change the veracity of any args.dst assertions?
+		written = args.dst.since(mark:mark)
 		// Append written, the decoded output, to the history ringbuffer.
 		if written.length() >= 0x8000 {
 			// If written is longer than the ringbuffer, we can ignore the
diff --git a/std/gzip/decode_gzip.wuffs b/std/gzip/decode_gzip.wuffs
index 297b191..49a7cb6 100644
--- a/std/gzip/decode_gzip.wuffs
+++ b/std/gzip/decode_gzip.wuffs
@@ -46,6 +46,7 @@
 	var c                   base.u8
 	var flags               base.u8
 	var xlen                base.u16
+	var mark                base.u64
 	var checksum_got        base.u32
 	var decoded_length_got  base.u32
 	var status              base.status
@@ -111,11 +112,11 @@
 
 	// Decode and checksum the DEFLATE-encoded payload.
 	while true {
-		args.dst.set_mark!()
+		mark = args.dst.mark()
 		status =? this.flate.decode_io_writer?(dst:args.dst, src:args.src, workbuf:args.workbuf)
 		if not this.ignore_checksum {
-			checksum_got = this.checksum.update!(x:args.dst.since_mark())
-			decoded_length_got ~mod+= ((args.dst.since_mark().length() & 0xFFFFFFFF) as base.u32)
+			checksum_got = this.checksum.update!(x:args.dst.since(mark:mark))
+			decoded_length_got ~mod+= ((args.dst.count_since(mark:mark) & 0xFFFFFFFF) as base.u32)
 		}
 		if status.is_ok() {
 			break
diff --git a/std/zlib/decode_zlib.wuffs b/std/zlib/decode_zlib.wuffs
index b7b5817..fcf6c65 100644
--- a/std/zlib/decode_zlib.wuffs
+++ b/std/zlib/decode_zlib.wuffs
@@ -49,6 +49,7 @@
 	var checksum_got  base.u32
 	var status        base.status
 	var checksum_want base.u32
+	var mark          base.u64
 
 	x = args.src.read_u16be?()
 	if ((x >> 8) & 0x0F) <> 0x08 {
@@ -66,10 +67,10 @@
 
 	// Decode and checksum the DEFLATE-encoded payload.
 	while true {
-		args.dst.set_mark!()
+		mark = args.dst.mark()
 		status =? this.flate.decode_io_writer?(dst:args.dst, src:args.src, workbuf:args.workbuf)
 		if not this.ignore_checksum {
-			checksum_got = this.checksum.update!(x:args.dst.since_mark())
+			checksum_got = this.checksum.update!(x:args.dst.since(mark:mark))
 		}
 		if status.is_ok() {
 			break