Add io_writer.history_available method

Also change copy_n_from_history to examine the buffer start instead of
the buffer mark.

name                                             old speed     new speed     delta

wuffs_deflate_decode_1k_full_init/clang5         151MB/s ± 0%  134MB/s ± 1%  -11.11%  (p=0.000 n=10+10)
wuffs_deflate_decode_1k_part_init/clang5         187MB/s ± 0%  162MB/s ± 1%  -13.11%  (p=0.000 n=10+10)
wuffs_deflate_decode_10k_full_init/clang5        241MB/s ± 0%  220MB/s ± 0%   -8.76%  (p=0.000 n=10+10)
wuffs_deflate_decode_10k_part_init/clang5        248MB/s ± 0%  225MB/s ± 0%   -9.12%  (p=0.000 n=10+9)
wuffs_deflate_decode_100k_just_one_read/clang5   288MB/s ± 0%  270MB/s ± 0%   -6.16%  (p=0.000 n=10+10)
wuffs_deflate_decode_100k_many_big_reads/clang5  239MB/s ± 0%  225MB/s ± 0%   -5.84%  (p=0.000 n=10+10)

wuffs_deflate_decode_1k_full_init/gcc7           153MB/s ± 1%  151MB/s ± 1%   -1.13%  (p=0.000 n=10+9)
wuffs_deflate_decode_1k_part_init/gcc7           187MB/s ± 1%  186MB/s ± 1%     ~     (p=0.052 n=10+10)
wuffs_deflate_decode_10k_full_init/gcc7          261MB/s ± 0%  256MB/s ± 1%   -1.83%  (p=0.000 n=10+10)
wuffs_deflate_decode_10k_part_init/gcc7          269MB/s ± 1%  264MB/s ± 1%   -1.62%  (p=0.000 n=10+10)
wuffs_deflate_decode_100k_just_one_read/gcc7     313MB/s ± 0%  310MB/s ± 1%   -0.69%  (p=0.000 n=9+10)
wuffs_deflate_decode_100k_many_big_reads/gcc7    247MB/s ± 0%  247MB/s ± 0%     ~     (p=0.089 n=10+10)
diff --git a/internal/cgen/builtin.go b/internal/cgen/builtin.go
index 9b43902..9276a38 100644
--- a/internal/cgen/builtin.go
+++ b/internal/cgen/builtin.go
@@ -245,7 +245,7 @@
 			suffix = "_fast"
 		}
 		b.printf("wuffs_base__io_writer__copy_n_from_history%s("+
-			"&iop_a_dst, %sdst.private_impl.mark, io1_a_dst",
+			"&iop_a_dst, %sdst.private_impl.buf->data.ptr, io1_a_dst",
 			suffix, aPrefix)
 		for _, o := range args {
 			b.writeb(',')
@@ -281,6 +281,10 @@
 		b.printf(", iop_a_dst - a_dst.private_impl.buf->data.ptr) : 0)")
 		return nil
 
+	case t.IDHistoryAvailable:
+		b.printf("((uint64_t)(iop_%s%s - %s%s.private_impl.buf->data.ptr))", prefix, name, prefix, name)
+		return nil
+
 	case t.IDMark:
 		b.printf("(a_dst.private_impl.buf ? ((uint64_t)(iop_a_dst - a_dst.private_impl.buf->data.ptr)) : 0)")
 		return nil
diff --git a/lang/builtin/builtin.go b/lang/builtin/builtin.go
index d96706c..25ab7c7 100644
--- a/lang/builtin/builtin.go
+++ b/lang/builtin/builtin.go
@@ -276,6 +276,7 @@
 
 	"io_writer.available() u64",
 	"io_writer.count_since(mark u64) u64",
+	"io_writer.history_available() u64",
 	"io_writer.mark() u64",
 	"io_writer.position() u64",
 	"io_writer.set!(s slice u8)",  // TODO: remove, as it's no longer used?
diff --git a/lang/token/list.go b/lang/token/list.go
index f10c57a..53756ba 100644
--- a/lang/token/list.go
+++ b/lang/token/list.go
@@ -459,18 +459,19 @@
 
 	IDDecodeFrameOptions = ID(0x158)
 
-	IDCanUndoByte     = ID(0x160)
-	IDCountSince      = ID(0x161)
-	IDMark            = ID(0x162)
-	IDPosition        = ID(0x163)
-	IDSetLimit        = ID(0x164)
-	IDSetMark         = ID(0x165)
-	IDSince           = ID(0x166)
-	IDSinceMark       = ID(0x167)
-	IDSinceMarkLength = ID(0x168)
-	IDSkip            = ID(0x169)
-	IDSkipFast        = ID(0x16A)
-	IDTake            = ID(0x16B)
+	IDCanUndoByte      = ID(0x160)
+	IDCountSince       = ID(0x161)
+	IDHistoryAvailable = ID(0x162)
+	IDMark             = ID(0x163)
+	IDPosition         = ID(0x164)
+	IDSetLimit         = ID(0x165)
+	IDSetMark          = ID(0x166)
+	IDSince            = ID(0x167)
+	IDSinceMark        = ID(0x168)
+	IDSinceMarkLength  = ID(0x169)
+	IDSkip             = ID(0x16A)
+	IDSkipFast         = ID(0x16B)
+	IDTake             = ID(0x16C)
 
 	IDCopyFromSlice        = ID(0x170)
 	IDCopyNFromHistory     = ID(0x171)
@@ -794,18 +795,19 @@
 
 	IDDecodeFrameOptions: "decode_frame_options",
 
-	IDCanUndoByte:     "can_undo_byte",
-	IDCountSince:      "count_since",
-	IDMark:            "mark",
-	IDPosition:        "position",
-	IDSetLimit:        "set_limit",
-	IDSetMark:         "set_mark",
-	IDSince:           "since",
-	IDSinceMark:       "since_mark",
-	IDSinceMarkLength: "since_mark_length",
-	IDSkip:            "skip",
-	IDSkipFast:        "skip_fast",
-	IDTake:            "take",
+	IDCanUndoByte:      "can_undo_byte",
+	IDCountSince:       "count_since",
+	IDHistoryAvailable: "history_available",
+	IDMark:             "mark",
+	IDPosition:         "position",
+	IDSetLimit:         "set_limit",
+	IDSetMark:          "set_mark",
+	IDSince:            "since",
+	IDSinceMark:        "since_mark",
+	IDSinceMarkLength:  "since_mark_length",
+	IDSkip:             "skip",
+	IDSkipFast:         "skip_fast",
+	IDTake:             "take",
 
 	IDCopyFromSlice:        "copy_from_slice",
 	IDCopyNFromHistory:     "copy_n_from_history",
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index c5e9a9b..e0a0e06 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -7753,7 +7753,7 @@
         }
       }
       wuffs_base__io_writer__copy_n_from_history_fast(
-          &iop_a_dst, a_dst.private_impl.mark, io1_a_dst, v_length,
+          &iop_a_dst, a_dst.private_impl.buf->data.ptr, io1_a_dst, v_length,
           (v_dist_minus_1 + 1));
       goto label_2_break;
     }
@@ -8078,10 +8078,10 @@
       }
       while (true) {
         if (((uint64_t)((v_dist_minus_1 + 1))) >
-            ((uint64_t)(iop_a_dst - a_dst.private_impl.mark))) {
-          v_hdist =
-              ((uint32_t)((((uint64_t)((v_dist_minus_1 + 1))) -
-                           ((uint64_t)(iop_a_dst - a_dst.private_impl.mark)))));
+            ((uint64_t)(iop_a_dst - a_dst.private_impl.buf->data.ptr))) {
+          v_hdist = ((uint32_t)(
+              (((uint64_t)((v_dist_minus_1 + 1))) -
+               ((uint64_t)(iop_a_dst - a_dst.private_impl.buf->data.ptr)))));
           if (v_length > v_hdist) {
             v_length -= v_hdist;
             v_hlen = v_hdist;
@@ -8140,7 +8140,7 @@
           }
         }
         v_n_copied = wuffs_base__io_writer__copy_n_from_history(
-            &iop_a_dst, a_dst.private_impl.mark, io1_a_dst, v_length,
+            &iop_a_dst, a_dst.private_impl.buf->data.ptr, io1_a_dst, v_length,
             (v_dist_minus_1 + 1));
         if (v_length <= v_n_copied) {
           v_length = 0;
diff --git a/std/deflate/decode_huffman_slow.wuffs b/std/deflate/decode_huffman_slow.wuffs
index 2fb085f..2c66e9d 100644
--- a/std/deflate/decode_huffman_slow.wuffs
+++ b/std/deflate/decode_huffman_slow.wuffs
@@ -203,11 +203,11 @@
 
 		while true {
 			// Copy from this.history.
-			if ((dist_minus_1 + 1) as base.u64) > args.dst.since_mark().length() {
+			if ((dist_minus_1 + 1) as base.u64) > args.dst.history_available() {
 				// Set (hlen, hdist) to be the length-distance pair to copy
 				// from this.history, and (length, distance) to be the
 				// remaining length-distance pair to copy from args.dst.
-				hdist = (((dist_minus_1 + 1) as base.u64) - args.dst.since_mark().length()) as base.u32
+				hdist = (((dist_minus_1 + 1) as base.u64) - args.dst.history_available()) as base.u32
 				if length > hdist {
 					assert hdist < length via "a < b: b > a"()
 					assert hdist < 0x8000 via "a < b: a < c; c <= b"(c:length)