Have flatecut emit Stored blocks if shorter
diff --git a/lib/flatecut/flatecut.go b/lib/flatecut/flatecut.go
index 07919bb..0557168 100644
--- a/lib/flatecut/flatecut.go
+++ b/lib/flatecut/flatecut.go
@@ -34,6 +34,7 @@
 
 	errInternalInconsistentDecodedLen = errors.New("flatecut: internal: inconsistent decodedLen")
 	errInternalNoProgress             = errors.New("flatecut: internal: no progress")
+	errInternalReplaceWithSingleBlock = errors.New("flatecut: internal: replace with single block")
 	errInternalSomeProgress           = errors.New("flatecut: internal: some progress")
 
 	errInvalidBadBlockLength = errors.New("flatecut: invalid input: bad block length")
@@ -324,15 +325,47 @@
 	}
 }
 
-// cutEmpty sets encoding[:2] to contain valid DEFLATE-encoded data. Decoding
-// that data produces zero bytes.
-func cutEmpty(encoded []byte, maxEncodedLen int) (encodedLen int, decodedLen int, retErr error) {
+// cutSingleBlock is an implementation of Cut whose output consists of a single
+// DEFLATE block.
+//
+// If maxEncodedLen is sufficiently large, this will be a Stored block (i.e. a
+// header followed by literal bytes). Otherwise, it will set encoding[:2] so
+// that decoding produces zero bytes.
+//
+// A precondition is that maxEncodedLen >= SmallestValidMaxEncodedLen.
+func cutSingleBlock(encoded []byte, maxEncodedLen int) (encodedLen int, decodedLen int, retErr error) {
 	if maxEncodedLen < SmallestValidMaxEncodedLen {
 		panic("unreachable")
 	}
+
+	// Try re-encoding as a single Stored block: up to 0xFFFF literal bytes,
+	// with a 5 byte prefix.
+	if maxEncodedLen > 5 {
+		n := maxEncodedLen - 5
+		if n > 0xFFFF {
+			n = 0xFFFF
+		}
+
+		buf := make([]byte, n)
+		n, err := io.ReadFull(flate.NewReader(bytes.NewReader(encoded)), buf)
+		if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF {
+			return 0, 0, err
+		}
+
+		if n > 0 {
+			encoded[0] = 0x01 // finalBlock = true, blockType = 0 (Stored).
+			encoded[1] = uint8(n >> 0)
+			encoded[2] = uint8(n >> 8)
+			encoded[3] = ^encoded[1]
+			encoded[4] = ^encoded[2]
+			copy(encoded[5:], buf[:n])
+			return n + 5, n, nil
+		}
+	}
+
 	// Set encoded[:2] to hold:
 	//  - 1 bit   ...._...._...._...1  finalBlock   = true.
-	//  - 2 bits  ...._...._...._.01.  blockType    = 1 (static Huffman).
+	//  - 2 bits  ...._...._...._.01.  blockType    = 1 (Static Huffman).
 	//  - 7 bits  ...._..00_0000_0...  litLenSymbol = 256 (end-of-block).
 	//  - 6 bits  0000_00.._...._....  padding.
 	encoded[0] = 0x03
@@ -438,9 +471,9 @@
 		case 0:
 			err = c.doStored()
 		case 1:
-			err = c.doStaticHuffman()
+			err = c.doStaticHuffman(prevFinalBlockIndex < 0)
 		case 2:
-			err = c.doDynamicHuffman()
+			err = c.doDynamicHuffman(prevFinalBlockIndex < 0)
 		case 3:
 			return 0, 0, errInvalidBadBlockType
 		}
@@ -460,7 +493,7 @@
 
 		case errInternalNoProgress:
 			if prevFinalBlockIndex < 0 {
-				return cutEmpty(c.bits.bytes, c.maxEncodedLen)
+				return cutSingleBlock(c.bits.bytes, c.maxEncodedLen)
 			}
 
 			// Un-read to just before the finalBlock bit.
@@ -482,6 +515,9 @@
 			mask := uint32(1) << n
 			c.bits.bytes[finalBlockIndex-1] |= uint8(mask)
 
+		case errInternalReplaceWithSingleBlock:
+			return cutSingleBlock(c.bits.bytes, c.maxEncodedLen)
+
 		default:
 			return 0, 0, err
 
@@ -543,7 +579,7 @@
 	return errInternalSomeProgress
 }
 
-func (c *cutter) doStaticHuffman() error {
+func (c *cutter) doStaticHuffman(isFirstBlock bool) error {
 	const (
 		numLCodes = 288
 		numDCodes = 32
@@ -568,10 +604,10 @@
 		lengths[i] = 5
 	}
 
-	return c.doHuffman(lengths[:numLCodes], lengths[numLCodes:])
+	return c.doHuffman(isFirstBlock, lengths[:numLCodes], lengths[numLCodes:])
 }
 
-func (c *cutter) doDynamicHuffman() error {
+func (c *cutter) doDynamicHuffman(isFirstBlock bool) error {
 	numLCodes := 257 + c.bits.take(5)
 	if numLCodes < 0 {
 		return errInvalidNotEnoughData
@@ -644,10 +680,10 @@
 		}
 	}
 
-	return c.doHuffman(lengths[:numLCodes], lengths[numLCodes:])
+	return c.doHuffman(isFirstBlock, lengths[:numLCodes], lengths[numLCodes:])
 }
 
-func (c *cutter) doHuffman(lLengths []uint32, dLengths []uint32) error {
+func (c *cutter) doHuffman(isFirstBlock bool, lLengths []uint32, dLengths []uint32) error {
 	err := error(nil)
 	if c.endCodeBits, c.endCodeNBits, err = c.lHuff.construct(lLengths); err != nil {
 		return err
@@ -730,6 +766,19 @@
 	if checkpointIndex < 0 {
 		return errInternalNoProgress
 	}
+
+	// If we're the first block in the DEFLATE stream, check if we would be
+	// better off replacing the Huffman block with a Stored block.
+	if isFirstBlock && (c.maxEncodedLen > 5) {
+		n := c.maxEncodedLen - 5
+		if n > 0xFFFF {
+			n = 0xFFFF
+		}
+		if c.decodedLen < int32(n) {
+			return errInternalReplaceWithSingleBlock
+		}
+	}
+
 	c.bits.index = checkpointIndex
 	c.bits.nBits = checkpointNBits
 	for c.bits.nBits >= 8 {
diff --git a/lib/flatecut/flatecut_test.go b/lib/flatecut/flatecut_test.go
index 5ba070e..5b0035e 100644
--- a/lib/flatecut/flatecut_test.go
+++ b/lib/flatecut/flatecut_test.go
@@ -42,6 +42,14 @@
 	return flate.NewReader(r), nil
 }
 
+func decodeFlate(src []byte) string {
+	dst, err := ioutil.ReadAll(flate.NewReader(bytes.NewReader(src)))
+	if err != nil {
+		return err.Error()
+	}
+	return string(dst)
+}
+
 func TestHuffmanDecode(t *testing.T) {
 	// This exercises the "ABCDEFGH" example from RFC 1951 section 3.2.2,
 	// discussed in the "type huffman" doc comment.
@@ -112,38 +120,112 @@
 	}
 }
 
-func TestDegenerateHuffmanUnused(t *testing.T) {
-	decode := func(src []byte) string {
-		dst, err := ioutil.ReadAll(
-			flate.NewReader(bytes.NewReader(src)))
-		if err != nil {
-			return err.Error()
-		}
-		return string(dst)
-	}
+// A DEFLATE encoding of the first 64 bytes of the AUTHORS file in the
+// repository root directory.
+//
+// The encoding uses a Dynamic Huffman block, but one whose H-D Huffman
+// distance tree is degenerate (there's a mapping for the "0" code but no
+// mapping for the "1" code) and unused.
+//
+// The first 25-30 bytes contain the encoded H-L and H-D Huffman trees. The
+// last 30-35 bytes contain the actual payload, encoded with H-L.
+//
+// The high 7 bits of the final byte is unused padding.
+const (
+	degenerateHuffmanDec = "# This is the official list of Wuffs authors for copyright purpo"
+	degenerateHuffmanEnc = "" +
+		"\x05\xc0\xc1\x09\xc5\x30\x0c\x03\xd0\x55\x04\x7f\xa5\x0f\x3d\x87" +
+		"\x50\xd5\x82\x80\x82\xed\x1c\xba\x7d\xdf\x0f\xff\x50\x41\x85\x8e" +
+		"\x1b\x26\x35\x35\x16\x96\xaa\x61\xe2\x3a\x64\x61\x9c\x0e\x67\x81" +
+		"\x4e\x4c\xef\x37\xf5\x44\x63\x9f\xdc\xfe\x00"
+)
 
-	full, err := ioutil.ReadFile("../../test/data/artificial/deflate-degenerate-huffman-unused.deflate")
-	if err != nil {
-		t.Fatalf("ReadFile: %v", err)
+func TestReplaceHuffmanWithStored(t *testing.T) {
+	const dec = degenerateHuffmanDec
+	const enc = degenerateHuffmanEnc
+	if (len(dec) != 64) || (len(enc) != 59) {
+		panic("inconsistent const string lengths")
 	}
-	if len(full) != 15 {
-		t.Fatalf("len(full): got %d, want %d", len(full), 15)
-	}
-
-	if got, want := decode(full), "foo"; got != want {
+	if got, want := decodeFlate([]byte(enc)), dec; got != want {
 		t.Fatalf("before Cut: got %q, want %q", got, want)
 	}
 
-	if encLen, decLen, err := Cut(nil, full, 14); err != nil {
+	for i := 4; i <= 59; i += 5 {
+		b := []byte(enc)
+		encLen, decLen, err := Cut(nil, b, i)
+		if err != nil {
+			t.Errorf("i=%d: %v", i, err)
+			continue
+		}
+		if encLen < 1 {
+			t.Errorf("i=%d: encLen: got %d, want >= 1", i, encLen)
+			continue
+		} else if encLen > len(enc) {
+			t.Errorf("i=%d: encLen: got %d, want <= %d", i, encLen, len(enc))
+			continue
+		}
+		// If we can make some progress (decLen > 0), even if the input uses a
+		// Huffman block, one option is to re-encode to a single Stored block
+		// (for 5 bytes of overhead). It's single because len(dec) < 0xFFFF.
+		// Regardless of whether the cut form uses a Huffman or Stored block,
+		// we should be able to produce at least i-5 bytes of decoded output.
+		if (decLen > 0) && (i > 5) && (decLen < i-5) {
+			t.Errorf("i=%d: decLen: got %d, want >= %d", i, decLen, i-5)
+			continue
+		} else if decLen > len(dec) {
+			t.Errorf("i=%d: decLen: got %d, want <= %d", i, decLen, len(dec))
+			continue
+		}
+
+		if got, want := decodeFlate(b[:encLen]), dec[:decLen]; got != want {
+			t.Errorf("i=%d: after Cut: got %q, want %q", i, got, want)
+			continue
+		}
+
+		// Check that we are using a space-efficient block type.
+		{
+			got := (b[0] >> 1) & 3
+			want := uint8(0xFF)
+
+			switch i {
+			case 4:
+				want = 1 // Static Huffman, for a decLen of 0.
+			case 9:
+				want = 0 // Stored.
+			case 59:
+				want = 2 // Dynamic Huffman.
+			default:
+				continue
+			}
+
+			if got != want {
+				t.Errorf("i=%d: block type: got %d, want %d", i, got, want)
+			}
+		}
+	}
+}
+
+func TestDegenerateHuffmanUnused(t *testing.T) {
+	const dec = degenerateHuffmanDec
+	const enc = degenerateHuffmanEnc
+
+	// Cutting 1 byte off the end of the encoded form will lead to cutting n
+	// bytes off the decoded form. Coincidentally, n equals 1, even though each
+	// decoded byte (8 bits) is packed into smaller number of bits, as most of
+	// the final encoded byte's bits are unused padding.
+	const n = 1
+
+	b := []byte(enc)
+	encLen, decLen, err := Cut(nil, b, len(enc)-1)
+	if err != nil {
 		t.Fatalf("Cut: %v", err)
-	} else if encLen != 14 {
-		t.Fatalf("encLen: got %d, want %d", encLen, 14)
-	} else if decLen != 2 {
-		t.Fatalf("decLen: got %d, want %d", decLen, 2)
+	} else if encLen != len(enc)-1 {
+		t.Fatalf("encLen: got %d, want %d", encLen, len(enc)-1)
+	} else if decLen != len(dec)-n {
+		t.Fatalf("decLen: got %d, want %d", decLen, len(dec)-n)
 	}
 
-	if got, want := decode(full[:14]), "fo"; got != want {
+	if got, want := decodeFlate(b[:encLen]), dec[:decLen]; got != want {
 		t.Fatalf("after Cut: got %q, want %q", got, want)
 	}
-
 }