Update zopfli to current google state.
diff --git a/Makefile b/Makefile
index c31bd80..b3a0ac4 100644
--- a/Makefile
+++ b/Makefile
@@ -17,7 +17,7 @@
 
 try.o: try.c try.h
 
-deflate.o: $(ZOPFLI)deflate.c $(ZOPFLI)deflate.h $(ZOPFLI)blocksplitter.h $(ZOPFLI)lz77.h $(ZOPFLI)squeeze.h $(ZOPFLI)tree.h $(ZOPFLI)zopfli.h $(ZOPFLI)cache.h $(ZOPFLI)hash.h $(ZOPFLI)util.h
+deflate.o: $(ZOPFLI)deflate.c $(ZOPFLI)deflate.h $(ZOPFLI)blocksplitter.h $(ZOPFLI)lz77.h $(ZOPFLI)squeeze.h $(ZOPFLI)tree.h $(ZOPFLI)zopfli.h $(ZOPFLI)cache.h $(ZOPFLI)hash.h $(ZOPFLI)util.h $(ZOPFLI)symbols.h
 	$(CC) $(CFLAGS) -c $(ZOPFLI)deflate.c
 
 blocksplitter.o: $(ZOPFLI)blocksplitter.c $(ZOPFLI)blocksplitter.h $(ZOPFLI)deflate.h $(ZOPFLI)lz77.h $(ZOPFLI)squeeze.h $(ZOPFLI)tree.h $(ZOPFLI)util.h $(ZOPFLI)zopfli.h $(ZOPFLI)cache.h $(ZOPFLI)hash.h
@@ -26,7 +26,7 @@
 tree.o: $(ZOPFLI)tree.c $(ZOPFLI)tree.h $(ZOPFLI)katajainen.h $(ZOPFLI)util.h
 	$(CC) $(CFLAGS) -c $(ZOPFLI)tree.c
 
-lz77.o: $(ZOPFLI)lz77.c $(ZOPFLI)lz77.h $(ZOPFLI)util.h $(ZOPFLI)cache.h $(ZOPFLI)hash.h $(ZOPFLI)zopfli.h
+lz77.o: $(ZOPFLI)lz77.c $(ZOPFLI)lz77.h $(ZOPFLI)util.h $(ZOPFLI)cache.h $(ZOPFLI)hash.h $(ZOPFLI)zopfli.h $(ZOPFLI)symbols.h
 	$(CC) $(CFLAGS) -c $(ZOPFLI)lz77.c
 
 cache.o: $(ZOPFLI)cache.c $(ZOPFLI)cache.h $(ZOPFLI)util.h
@@ -38,7 +38,7 @@
 util.o: $(ZOPFLI)util.c $(ZOPFLI)util.h
 	$(CC) $(CFLAGS) -c $(ZOPFLI)util.c
 
-squeeze.o: $(ZOPFLI)squeeze.c $(ZOPFLI)squeeze.h $(ZOPFLI)blocksplitter.h $(ZOPFLI)deflate.h $(ZOPFLI)tree.h $(ZOPFLI)util.h $(ZOPFLI)zopfli.h $(ZOPFLI)lz77.h $(ZOPFLI)cache.h $(ZOPFLI)hash.h
+squeeze.o: $(ZOPFLI)squeeze.c $(ZOPFLI)squeeze.h $(ZOPFLI)blocksplitter.h $(ZOPFLI)deflate.h $(ZOPFLI)tree.h $(ZOPFLI)util.h $(ZOPFLI)zopfli.h $(ZOPFLI)lz77.h $(ZOPFLI)cache.h $(ZOPFLI)hash.h $(ZOPFLI)symbols.h
 	$(CC) $(CFLAGS) -c $(ZOPFLI)squeeze.c
 
 katajainen.o: $(ZOPFLI)katajainen.c $(ZOPFLI)katajainen.h
diff --git a/zopfli/CONTRIBUTING.md b/zopfli/CONTRIBUTING.md
new file mode 100644
index 0000000..1ba8539
--- /dev/null
+++ b/zopfli/CONTRIBUTING.md
@@ -0,0 +1,24 @@
+Want to contribute? Great! First, read this page (including the small print at the end).
+
+### Before you contribute
+Before we can use your code, you must sign the
+[Google Individual Contributor License Agreement](https://developers.google.com/open-source/cla/individual?csw=1)
+(CLA), which you can do online. The CLA is necessary mainly because you own the
+copyright to your changes, even after your contribution becomes part of our
+codebase, so we need your permission to use and distribute your code. We also
+need to be sure of various other things—for instance that you'll tell us if you
+know that your code infringes on other people's patents. You don't have to sign
+the CLA until after you've submitted your code for review and a member has
+approved it, but you must do it before we can put your code into our codebase.
+Before you start working on a larger contribution, you should get in touch with
+us first through the issue tracker with your idea so that we can help out and
+possibly guide you. Coordinating up front makes it much easier to avoid
+frustration later on.
+
+### Code reviews
+All submissions, including submissions by project members, require review. We
+use Github pull requests for this purpose.
+
+### The small print
+Contributions made by corporations are covered by a different agreement than
+the one above, the Software Grant and Corporate Contributor License Agreement.
diff --git a/zopfli/CONTRIBUTORS b/zopfli/CONTRIBUTORS
index a1800be..6eccb31 100644
--- a/zopfli/CONTRIBUTORS
+++ b/zopfli/CONTRIBUTORS
@@ -1,7 +1,9 @@
 Mark Adler
 Jyrki Alakuijala
 Frédéric Kayser
+Jeffrey Lim
 Daniel Reed
 Huzaifa Sidhpurwala
 Péter Szabó
 Lode Vandevenne
+Derek Buitenhuis
diff --git a/zopfli/README b/zopfli/README
index b28b189..37f6fc3 100644
--- a/zopfli/README
+++ b/zopfli/README
@@ -25,8 +25,15 @@
 create very well compressed gzip files. Currently the makefile builds this
 program with the library statically linked in.
 
-To build the binary, use "make". To build the library as a shared Linux library,
-use "make libzopfli". The source code of Zopfli is under src/zopfli.
+The source code of Zopfli is under src/zopfli. Build instructions:
+
+To build zopfli, compile all .c source files under src/zopfli to a single binary
+with C, and link to the standard C math library, e.g.:
+gcc src/zopfli/*.c -O2 -W -Wall -Wextra -Wno-unused-function -ansi -pedantic -lm -o zopfli
+
+A makefile is provided as well, but only for linux. Use "make" to build the
+binary, "make libzopfli" to build it as a shared library. For other platforms,
+please use the build instructions above instead.
 
 Zopfli Compression Algorithm was created by Lode Vandevenne and Jyrki
 Alakuijala, based on an algorithm by Jyrki Alakuijala.
diff --git a/zopfli/src/zopfli/blocksplitter.c b/zopfli/src/zopfli/blocksplitter.c
index 68f5ff3..161783d 100644
--- a/zopfli/src/zopfli/blocksplitter.c
+++ b/zopfli/src/zopfli/blocksplitter.c
@@ -24,7 +24,6 @@
 #include <stdlib.h>
 
 #include "deflate.h"
-#include "lz77.h"
 #include "squeeze.h"
 #include "tree.h"
 #include "util.h"
@@ -39,9 +38,10 @@
 /*
 Finds minimum of function f(i) where is is of type size_t, f(i) is of type
 double, i is in range start-end (excluding end).
+Outputs the minimum value in *smallest and returns the index of this value.
 */
 static size_t FindMinimum(FindMinimumFun f, void* context,
-                          size_t start, size_t end) {
+                          size_t start, size_t end, double* smallest) {
   if (end - start < 1024) {
     double best = ZOPFLI_LARGE_FLOAT;
     size_t result = start;
@@ -53,6 +53,7 @@
         result = i;
       }
     }
+    *smallest = best;
     return result;
   } else {
     /* Try to find minimum faster by recursively checking multiple points. */
@@ -88,6 +89,7 @@
       pos = p[besti];
       lastbest = best;
     }
+    *smallest = lastbest;
     return pos;
 #undef NUM
   }
@@ -103,16 +105,13 @@
 lstart: start of block
 lend: end of block (not inclusive)
 */
-static double EstimateCost(const unsigned short* litlens,
-                           const unsigned short* dists,
+static double EstimateCost(const ZopfliLZ77Store* lz77,
                            size_t lstart, size_t lend) {
-  return ZopfliCalculateBlockSize(litlens, dists, lstart, lend, 2);
+  return ZopfliCalculateBlockSizeAutoType(lz77, lstart, lend);
 }
 
 typedef struct SplitCostContext {
-  const unsigned short* litlens;
-  const unsigned short* dists;
-  size_t llsize;
+  const ZopfliLZ77Store* lz77;
   size_t start;
   size_t end;
 } SplitCostContext;
@@ -125,8 +124,7 @@
 */
 static double SplitCost(size_t i, void* context) {
   SplitCostContext* c = (SplitCostContext*)context;
-  return EstimateCost(c->litlens, c->dists, c->start, i) +
-      EstimateCost(c->litlens, c->dists, i, c->end);
+  return EstimateCost(c->lz77, c->start, i) + EstimateCost(c->lz77, i, c->end);
 }
 
 static void AddSorted(size_t value, size_t** out, size_t* outsize) {
@@ -147,9 +145,8 @@
 /*
 Prints the block split points as decimal and hex values in the terminal.
 */
-static void PrintBlockSplitPoints(const unsigned short* litlens,
-                                  const unsigned short* dists,
-                                  size_t llsize, const size_t* lz77splitpoints,
+static void PrintBlockSplitPoints(const ZopfliLZ77Store* lz77,
+                                  const size_t* lz77splitpoints,
                                   size_t nlz77points) {
   size_t* splitpoints = 0;
   size_t npoints = 0;
@@ -158,8 +155,8 @@
   index values. */
   size_t pos = 0;
   if (nlz77points > 0) {
-    for (i = 0; i < llsize; i++) {
-      size_t length = dists[i] == 0 ? 1 : litlens[i];
+    for (i = 0; i < lz77->size; i++) {
+      size_t length = lz77->dists[i] == 0 ? 1 : lz77->litlens[i];
       if (lz77splitpoints[npoints] == i) {
         ZOPFLI_APPEND_DATA(pos, &splitpoints, &npoints);
         if (npoints == nlz77points) break;
@@ -186,7 +183,7 @@
 Finds next block to try to split, the largest of the available ones.
 The largest is chosen to make sure that if only a limited amount of blocks is
 requested, their sizes are spread evenly.
-llsize: the size of the LL77 data, which is the size of the done array here.
+lz77size: the size of the LL77 data, which is the size of the done array here.
 done: array indicating which blocks starting at that position are no longer
     splittable (splitting them increases rather than decreases cost).
 splitpoints: the splitpoints found so far.
@@ -196,7 +193,7 @@
 returns 1 if a block was found, 0 if no block found (all are done).
 */
 static int FindLargestSplittableBlock(
-    size_t llsize, const unsigned char* done,
+    size_t lz77size, const unsigned char* done,
     const size_t* splitpoints, size_t npoints,
     size_t* lstart, size_t* lend) {
   size_t longest = 0;
@@ -204,7 +201,7 @@
   size_t i;
   for (i = 0; i <= npoints; i++) {
     size_t start = i == 0 ? 0 : splitpoints[i - 1];
-    size_t end = i == npoints ? llsize - 1 : splitpoints[i];
+    size_t end = i == npoints ? lz77size - 1 : splitpoints[i];
     if (!done[start] && end - start > longest) {
       *lstart = start;
       *lend = end;
@@ -216,9 +213,7 @@
 }
 
 void ZopfliBlockSplitLZ77(const ZopfliOptions* options,
-                          const unsigned short* litlens,
-                          const unsigned short* dists,
-                          size_t llsize, size_t maxblocks,
+                          const ZopfliLZ77Store* lz77, size_t maxblocks,
                           size_t** splitpoints, size_t* npoints) {
   size_t lstart, lend;
   size_t i;
@@ -227,14 +222,14 @@
   unsigned char* done;
   double splitcost, origcost;
 
-  if (llsize < 10) return;  /* This code fails on tiny files. */
+  if (lz77->size < 10) return;  /* This code fails on tiny files. */
 
-  done = (unsigned char*)malloc(llsize);
+  done = (unsigned char*)malloc(lz77->size);
   if (!done) exit(-1); /* Allocation failed. */
-  for (i = 0; i < llsize; i++) done[i] = 0;
+  for (i = 0; i < lz77->size; i++) done[i] = 0;
 
   lstart = 0;
-  lend = llsize;
+  lend = lz77->size;
   for (;;) {
     SplitCostContext c;
 
@@ -242,20 +237,16 @@
       break;
     }
 
-    c.litlens = litlens;
-    c.dists = dists;
-    c.llsize = llsize;
+    c.lz77 = lz77;
     c.start = lstart;
     c.end = lend;
     assert(lstart < lend);
-    llpos = FindMinimum(SplitCost, &c, lstart + 1, lend);
+    llpos = FindMinimum(SplitCost, &c, lstart + 1, lend, &splitcost);
 
     assert(llpos > lstart);
     assert(llpos < lend);
 
-    splitcost = EstimateCost(litlens, dists, lstart, llpos) +
-        EstimateCost(litlens, dists, llpos, lend);
-    origcost = EstimateCost(litlens, dists, lstart, lend);
+    origcost = EstimateCost(lz77, lstart, lend);
 
     if (splitcost > origcost || llpos == lstart + 1 || llpos == lend) {
       done[lstart] = 1;
@@ -265,7 +256,7 @@
     }
 
     if (!FindLargestSplittableBlock(
-        llsize, done, *splitpoints, *npoints, &lstart, &lend)) {
+        lz77->size, done, *splitpoints, *npoints, &lstart, &lend)) {
       break;  /* No further split will probably reduce compression. */
     }
 
@@ -275,7 +266,7 @@
   }
 
   if (options->verbose) {
-    PrintBlockSplitPoints(litlens, dists, llsize, *splitpoints, *npoints);
+    PrintBlockSplitPoints(lz77, *splitpoints, *npoints);
   }
 
   free(done);
@@ -290,25 +281,22 @@
   size_t* lz77splitpoints = 0;
   size_t nlz77points = 0;
   ZopfliLZ77Store store;
+  ZopfliHash hash;
+  ZopfliHash* h = &hash;
 
-  ZopfliInitLZ77Store(&store);
-
-  s.options = options;
-  s.blockstart = instart;
-  s.blockend = inend;
-#ifdef ZOPFLI_LONGEST_MATCH_CACHE
-  s.lmc = 0;
-#endif
+  ZopfliInitLZ77Store(in, &store);
+  ZopfliInitBlockState(options, instart, inend, 0, &s);
+  ZopfliAllocHash(ZOPFLI_WINDOW_SIZE, h);
 
   *npoints = 0;
   *splitpoints = 0;
 
   /* Unintuitively, Using a simple LZ77 method here instead of ZopfliLZ77Optimal
   results in better blocks. */
-  ZopfliLZ77Greedy(&s, in, instart, inend, &store);
+  ZopfliLZ77Greedy(&s, in, instart, inend, &store, h);
 
   ZopfliBlockSplitLZ77(options,
-                       store.litlens, store.dists, store.size, maxblocks,
+                       &store, maxblocks,
                        &lz77splitpoints, &nlz77points);
 
   /* Convert LZ77 positions to positions in the uncompressed input. */
@@ -326,7 +314,9 @@
   assert(*npoints == nlz77points);
 
   free(lz77splitpoints);
+  ZopfliCleanBlockState(&s);
   ZopfliCleanLZ77Store(&store);
+  ZopfliCleanHash(h);
 }
 
 void ZopfliBlockSplitSimple(const unsigned char* in,
diff --git a/zopfli/src/zopfli/blocksplitter.h b/zopfli/src/zopfli/blocksplitter.h
index 6791702..d1d622f 100644
--- a/zopfli/src/zopfli/blocksplitter.h
+++ b/zopfli/src/zopfli/blocksplitter.h
@@ -30,21 +30,17 @@
 
 #include <stdlib.h>
 
+#include "lz77.h"
 #include "zopfli.h"
 
 
 /*
 Does blocksplitting on LZ77 data.
 The output splitpoints are indices in the LZ77 data.
-litlens: lz77 lit/lengths
-dists: lz77 distances
-llsize: size of litlens and dists
 maxblocks: set a limit to the amount of blocks. Set to 0 to mean no limit.
 */
 void ZopfliBlockSplitLZ77(const ZopfliOptions* options,
-                          const unsigned short* litlens,
-                          const unsigned short* dists,
-                          size_t llsize, size_t maxblocks,
+                          const ZopfliLZ77Store* lz77, size_t maxblocks,
                           size_t** splitpoints, size_t* npoints);
 
 /*
diff --git a/zopfli/src/zopfli/cache.c b/zopfli/src/zopfli/cache.c
index 88a49ac..f5559c3 100644
--- a/zopfli/src/zopfli/cache.c
+++ b/zopfli/src/zopfli/cache.c
@@ -31,6 +31,12 @@
   lmc->dist = (unsigned short*)malloc(sizeof(unsigned short) * blocksize);
   /* Rather large amount of memory. */
   lmc->sublen = (unsigned char*)malloc(ZOPFLI_CACHE_LENGTH * 3 * blocksize);
+  if(lmc->sublen == NULL) {
+    fprintf(stderr,
+        "Error: Out of memory. Tried allocating %lu bytes of memory.\n",
+        ZOPFLI_CACHE_LENGTH * 3 * blocksize);
+    exit (EXIT_FAILURE);
+  }
 
   /* length > 0 and dist 0 is invalid combination, which indicates on purpose
   that this cache value is not filled in yet. */
diff --git a/zopfli/src/zopfli/deflate.c b/zopfli/src/zopfli/deflate.c
index 4b0724b..abe7360 100644
--- a/zopfli/src/zopfli/deflate.c
+++ b/zopfli/src/zopfli/deflate.c
@@ -24,8 +24,8 @@
 #include <stdlib.h>
 
 #include "blocksplitter.h"
-#include "lz77.h"
 #include "squeeze.h"
+#include "symbols.h"
 #include "tree.h"
 
 /*
@@ -294,8 +294,7 @@
 end code 256. expected_data_size is the uncompressed block size, used for
 assert, but you can set it to 0 to not do the assertion.
 */
-static void AddLZ77Data(const unsigned short* litlens,
-                        const unsigned short* dists,
+static void AddLZ77Data(const ZopfliLZ77Store* lz77,
                         size_t lstart, size_t lend,
                         size_t expected_data_size,
                         const unsigned* ll_symbols, const unsigned* ll_lengths,
@@ -306,8 +305,8 @@
   size_t i;
 
   for (i = lstart; i < lend; i++) {
-    unsigned dist = dists[i];
-    unsigned litlen = litlens[i];
+    unsigned dist = lz77->dists[i];
+    unsigned litlen = lz77->litlens[i];
     if (dist == 0) {
       assert(litlen < 256);
       assert(ll_lengths[litlen] > 0);
@@ -343,29 +342,83 @@
 }
 
 /*
-Calculates size of the part after the header and tree of an LZ77 block, in bits.
+Same as CalculateBlockSymbolSize, but for block size smaller than histogram
+size.
 */
-static size_t CalculateBlockSymbolSize(const unsigned* ll_lengths,
-                                       const unsigned* d_lengths,
-                                       const unsigned short* litlens,
-                                       const unsigned short* dists,
-                                       size_t lstart, size_t lend) {
+static size_t CalculateBlockSymbolSizeSmall(const unsigned* ll_lengths,
+                                            const unsigned* d_lengths,
+                                            const ZopfliLZ77Store* lz77,
+                                            size_t lstart, size_t lend) {
   size_t result = 0;
   size_t i;
   for (i = lstart; i < lend; i++) {
-    if (dists[i] == 0) {
-      result += ll_lengths[litlens[i]];
+    assert(i < lz77->size);
+    assert(lz77->litlens[i] < 259);
+    if (lz77->dists[i] == 0) {
+      result += ll_lengths[lz77->litlens[i]];
     } else {
-      result += ll_lengths[ZopfliGetLengthSymbol(litlens[i])];
-      result += d_lengths[ZopfliGetDistSymbol(dists[i])];
-      result += ZopfliGetLengthExtraBits(litlens[i]);
-      result += ZopfliGetDistExtraBits(dists[i]);
+      int ll_symbol = ZopfliGetLengthSymbol(lz77->litlens[i]);
+      int d_symbol = ZopfliGetDistSymbol(lz77->dists[i]);
+      result += ll_lengths[ll_symbol];
+      result += d_lengths[d_symbol];
+      result += ZopfliGetLengthSymbolExtraBits(ll_symbol);
+      result += ZopfliGetDistSymbolExtraBits(d_symbol);
     }
   }
   result += ll_lengths[256]; /*end symbol*/
   return result;
 }
 
+/*
+Same as CalculateBlockSymbolSize, but with the histogram provided by the caller.
+*/
+static size_t CalculateBlockSymbolSizeGivenCounts(const size_t* ll_counts,
+                                                  const size_t* d_counts,
+                                                  const unsigned* ll_lengths,
+                                                  const unsigned* d_lengths,
+                                                  const ZopfliLZ77Store* lz77,
+                                                  size_t lstart, size_t lend) {
+  size_t result = 0;
+  size_t i;
+  if (lstart + ZOPFLI_NUM_LL * 3 > lend) {
+    return CalculateBlockSymbolSizeSmall(
+        ll_lengths, d_lengths, lz77, lstart, lend);
+  } else {
+    for (i = 0; i < 256; i++) {
+      result += ll_lengths[i] * ll_counts[i];
+    }
+    for (i = 257; i < 286; i++) {
+      result += ll_lengths[i] * ll_counts[i];
+      result += ZopfliGetLengthSymbolExtraBits(i) * ll_counts[i];
+    }
+    for (i = 0; i < 30; i++) {
+      result += d_lengths[i] * d_counts[i];
+      result += ZopfliGetDistSymbolExtraBits(i) * d_counts[i];
+    }
+    result += ll_lengths[256]; /*end symbol*/
+    return result;
+  }
+}
+
+/*
+Calculates size of the part after the header and tree of an LZ77 block, in bits.
+*/
+static size_t CalculateBlockSymbolSize(const unsigned* ll_lengths,
+                                       const unsigned* d_lengths,
+                                       const ZopfliLZ77Store* lz77,
+                                       size_t lstart, size_t lend) {
+  if (lstart + ZOPFLI_NUM_LL * 3 > lend) {
+    return CalculateBlockSymbolSizeSmall(
+        ll_lengths, d_lengths, lz77, lstart, lend);
+  } else {
+    size_t ll_counts[ZOPFLI_NUM_LL];
+    size_t d_counts[ZOPFLI_NUM_D];
+    ZopfliLZ77GetHistogram(lz77, lstart, lend, ll_counts, d_counts);
+    return CalculateBlockSymbolSizeGivenCounts(
+        ll_counts, d_counts, ll_lengths, d_lengths, lz77, lstart, lend);
+  }
+}
+
 static size_t AbsDiff(size_t x, size_t y) {
   if (x > y)
     return x - y;
@@ -374,9 +427,9 @@
 }
 
 /*
-Change the population counts in a way that the consequent Hufmann tree
-compression, especially its rle-part will be more likely to compress this data
-more efficiently. length containts the size of the histogram.
+Changes the population counts in a way that the consequent Huffman tree
+compression, especially its rle-part, will be more likely to compress this data
+more efficiently. length contains the size of the histogram.
 */
 void OptimizeHuffmanForRle(int length, size_t* counts) {
   int i, k, stride;
@@ -465,49 +518,150 @@
 }
 
 /*
+Tries out OptimizeHuffmanForRle for this block, if the result is smaller,
+uses it, otherwise keeps the original. Returns size of encoded tree and data in
+bits, not including the 3-bit block header.
+*/
+static double TryOptimizeHuffmanForRle(
+    const ZopfliLZ77Store* lz77, size_t lstart, size_t lend,
+    const size_t* ll_counts, const size_t* d_counts,
+    unsigned* ll_lengths, unsigned* d_lengths) {
+  size_t ll_counts2[ZOPFLI_NUM_LL];
+  size_t d_counts2[ZOPFLI_NUM_D];
+  unsigned ll_lengths2[ZOPFLI_NUM_LL];
+  unsigned d_lengths2[ZOPFLI_NUM_D];
+  double treesize;
+  double datasize;
+  double treesize2;
+  double datasize2;
+
+  treesize = CalculateTreeSize(ll_lengths, d_lengths);
+  datasize = CalculateBlockSymbolSizeGivenCounts(ll_counts, d_counts,
+      ll_lengths, d_lengths, lz77, lstart, lend);
+
+  memcpy(ll_counts2, ll_counts, sizeof(ll_counts2));
+  memcpy(d_counts2, d_counts, sizeof(d_counts2));
+  OptimizeHuffmanForRle(ZOPFLI_NUM_LL, ll_counts2);
+  OptimizeHuffmanForRle(ZOPFLI_NUM_D, d_counts2);
+  ZopfliCalculateBitLengths(ll_counts2, ZOPFLI_NUM_LL, 15, ll_lengths2);
+  ZopfliCalculateBitLengths(d_counts2, ZOPFLI_NUM_D, 15, d_lengths2);
+  PatchDistanceCodesForBuggyDecoders(d_lengths2);
+
+  treesize2 = CalculateTreeSize(ll_lengths2, d_lengths2);
+  datasize2 = CalculateBlockSymbolSizeGivenCounts(ll_counts, d_counts,
+      ll_lengths2, d_lengths2, lz77, lstart, lend);
+
+  if (treesize2 + datasize2 < treesize + datasize) {
+    memcpy(ll_lengths, ll_lengths2, sizeof(ll_lengths2));
+    memcpy(d_lengths, d_lengths2, sizeof(d_lengths2));
+    return treesize2 + datasize2;
+  }
+  return treesize + datasize;
+}
+
+/*
 Calculates the bit lengths for the symbols for dynamic blocks. Chooses bit
 lengths that give the smallest size of tree encoding + encoding of all the
 symbols to have smallest output size. This are not necessarily the ideal Huffman
-bit lengths.
+bit lengths. Returns size of encoded tree and data in bits, not including the
+3-bit block header.
 */
-static void GetDynamicLengths(const unsigned short* litlens,
-                              const unsigned short* dists,
-                              size_t lstart, size_t lend,
-                              unsigned* ll_lengths, unsigned* d_lengths) {
-  size_t ll_counts[288];
-  size_t d_counts[32];
+static double GetDynamicLengths(const ZopfliLZ77Store* lz77,
+                                size_t lstart, size_t lend,
+                                unsigned* ll_lengths, unsigned* d_lengths) {
+  size_t ll_counts[ZOPFLI_NUM_LL];
+  size_t d_counts[ZOPFLI_NUM_D];
 
-  ZopfliLZ77Counts(litlens, dists, lstart, lend, ll_counts, d_counts);
-  OptimizeHuffmanForRle(288, ll_counts);
-  OptimizeHuffmanForRle(32, d_counts);
-  ZopfliCalculateBitLengths(ll_counts, 288, 15, ll_lengths);
-  ZopfliCalculateBitLengths(d_counts, 32, 15, d_lengths);
+  ZopfliLZ77GetHistogram(lz77, lstart, lend, ll_counts, d_counts);
+  ll_counts[256] = 1;  /* End symbol. */
+  ZopfliCalculateBitLengths(ll_counts, ZOPFLI_NUM_LL, 15, ll_lengths);
+  ZopfliCalculateBitLengths(d_counts, ZOPFLI_NUM_D, 15, d_lengths);
   PatchDistanceCodesForBuggyDecoders(d_lengths);
+  return TryOptimizeHuffmanForRle(
+      lz77, lstart, lend, ll_counts, d_counts, ll_lengths, d_lengths);
 }
 
-double ZopfliCalculateBlockSize(const unsigned short* litlens,
-                                const unsigned short* dists,
+double ZopfliCalculateBlockSize(const ZopfliLZ77Store* lz77,
                                 size_t lstart, size_t lend, int btype) {
-  unsigned ll_lengths[288];
-  unsigned d_lengths[32];
+  unsigned ll_lengths[ZOPFLI_NUM_LL];
+  unsigned d_lengths[ZOPFLI_NUM_D];
 
   double result = 3; /* bfinal and btype bits */
 
-  assert(btype == 1 || btype == 2); /* This is not for uncompressed blocks. */
-
-  if(btype == 1) {
+  if (btype == 0) {
+    size_t length = ZopfliLZ77GetByteRange(lz77, lstart, lend);
+    size_t rem = length % 65535;
+    size_t blocks = length / 65535 + (rem ? 1 : 0);
+    /* An uncompressed block must actually be split into multiple blocks if it's
+       larger than 65535 bytes long. Eeach block header is 5 bytes: 3 bits,
+       padding, LEN and NLEN (potential less padding for first one ignored). */
+    return blocks * 5 * 8 + length * 8;
+  } if (btype == 1) {
     GetFixedTree(ll_lengths, d_lengths);
+    result += CalculateBlockSymbolSize(
+        ll_lengths, d_lengths, lz77, lstart, lend);
   } else {
-    GetDynamicLengths(litlens, dists, lstart, lend, ll_lengths, d_lengths);
-    result += CalculateTreeSize(ll_lengths, d_lengths);
+    result += GetDynamicLengths(lz77, lstart, lend, ll_lengths, d_lengths);
   }
 
-  result += CalculateBlockSymbolSize(
-      ll_lengths, d_lengths, litlens, dists, lstart, lend);
-
   return result;
 }
 
+double ZopfliCalculateBlockSizeAutoType(const ZopfliLZ77Store* lz77,
+                                        size_t lstart, size_t lend) {
+  double uncompressedcost = ZopfliCalculateBlockSize(lz77, lstart, lend, 0);
+  /* Don't do the expensive fixed cost calculation for larger blocks that are
+     unlikely to use it. */
+  double fixedcost = (lz77->size > 1000) ?
+      uncompressedcost : ZopfliCalculateBlockSize(lz77, lstart, lend, 1);
+  double dyncost = ZopfliCalculateBlockSize(lz77, lstart, lend, 2);
+  return (uncompressedcost < fixedcost && uncompressedcost < dyncost)
+      ? uncompressedcost
+      : (fixedcost < dyncost ? fixedcost : dyncost);
+}
+
+/* Since an uncompressed block can be max 65535 in size, it actually adds
+multible blocks if needed. */
+static void AddNonCompressedBlock(const ZopfliOptions* options, int final,
+                                  const unsigned char* in, size_t instart,
+                                  size_t inend,
+                                  unsigned char* bp,
+                                  unsigned char** out, size_t* outsize) {
+  size_t pos = instart;
+  (void)options;
+  for (;;) {
+    size_t i;
+    unsigned short blocksize = 65535;
+    unsigned short nlen;
+    int currentfinal;
+
+    if (pos + blocksize > inend) blocksize = inend - pos;
+    currentfinal = pos + blocksize >= inend;
+
+    nlen = ~blocksize;
+
+    AddBit(final && currentfinal, bp, out, outsize);
+    /* BTYPE 00 */
+    AddBit(0, bp, out, outsize);
+    AddBit(0, bp, out, outsize);
+
+    /* Any bits of input up to the next byte boundary are ignored. */
+    *bp = 0;
+
+    ZOPFLI_APPEND_DATA(blocksize % 256, out, outsize);
+    ZOPFLI_APPEND_DATA((blocksize / 256) % 256, out, outsize);
+    ZOPFLI_APPEND_DATA(nlen % 256, out, outsize);
+    ZOPFLI_APPEND_DATA((nlen / 256) % 256, out, outsize);
+
+    for (i = 0; i < blocksize; i++) {
+      ZOPFLI_APPEND_DATA(in[pos + i], out, outsize);
+    }
+
+    if (currentfinal) break;
+    pos += blocksize;
+  }
+}
+
 /*
 Adds a deflate block with the given LZ77 data to the output.
 options: global program options
@@ -526,20 +680,27 @@
 outsize: dynamic output array size
 */
 static void AddLZ77Block(const ZopfliOptions* options, int btype, int final,
-                         const unsigned short* litlens,
-                         const unsigned short* dists,
+                         const ZopfliLZ77Store* lz77,
                          size_t lstart, size_t lend,
                          size_t expected_data_size,
                          unsigned char* bp,
                          unsigned char** out, size_t* outsize) {
-  unsigned ll_lengths[288];
-  unsigned d_lengths[32];
-  unsigned ll_symbols[288];
-  unsigned d_symbols[32];
+  unsigned ll_lengths[ZOPFLI_NUM_LL];
+  unsigned d_lengths[ZOPFLI_NUM_D];
+  unsigned ll_symbols[ZOPFLI_NUM_LL];
+  unsigned d_symbols[ZOPFLI_NUM_D];
   size_t detect_block_size = *outsize;
   size_t compressed_size;
   size_t uncompressed_size = 0;
   size_t i;
+  if (btype == 0) {
+    size_t length = ZopfliLZ77GetByteRange(lz77, lstart, lend);
+    size_t pos = lstart == lend ? 0 : lz77->pos[lstart];
+    size_t end = pos + length;
+    AddNonCompressedBlock(options, final,
+                          lz77->data, pos, end, bp, out, outsize);
+    return;
+  }
 
   AddBit(final, bp, out, outsize);
   AddBit(btype & 1, bp, out, outsize);
@@ -553,7 +714,7 @@
     unsigned detect_tree_size;
     assert(btype == 2);
 
-    GetDynamicLengths(litlens, dists, lstart, lend, ll_lengths, d_lengths);
+    GetDynamicLengths(lz77, lstart, lend, ll_lengths, d_lengths);
 
     detect_tree_size = *outsize;
     AddDynamicTree(ll_lengths, d_lengths, bp, out, outsize);
@@ -562,18 +723,18 @@
     }
   }
 
-  ZopfliLengthsToSymbols(ll_lengths, 288, 15, ll_symbols);
-  ZopfliLengthsToSymbols(d_lengths, 32, 15, d_symbols);
+  ZopfliLengthsToSymbols(ll_lengths, ZOPFLI_NUM_LL, 15, ll_symbols);
+  ZopfliLengthsToSymbols(d_lengths, ZOPFLI_NUM_D, 15, d_symbols);
 
   detect_block_size = *outsize;
-  AddLZ77Data(litlens, dists, lstart, lend, expected_data_size,
+  AddLZ77Data(lz77, lstart, lend, expected_data_size,
               ll_symbols, ll_lengths, d_symbols, d_lengths,
               bp, out, outsize);
   /* End symbol. */
   AddHuffmanBits(ll_symbols[256], ll_lengths[256], bp, out, outsize);
 
   for (i = lstart; i < lend; i++) {
-    uncompressed_size += dists[i] == 0 ? 1 : litlens[i];
+    uncompressed_size += lz77->dists[i] == 0 ? 1 : lz77->litlens[i];
   }
   compressed_size = *outsize - detect_block_size;
   if (options->verbose) {
@@ -583,236 +744,59 @@
   }
 }
 
-static void DeflateDynamicBlock(const ZopfliOptions* options, int final,
-                                const unsigned char* in,
-                                size_t instart, size_t inend,
-                                unsigned char* bp,
-                                unsigned char** out, size_t* outsize) {
-  ZopfliBlockState s;
-  size_t blocksize = inend - instart;
-  ZopfliLZ77Store store;
-  int btype = 2;
-
-  ZopfliInitLZ77Store(&store);
-
-  s.options = options;
-  s.blockstart = instart;
-  s.blockend = inend;
-#ifdef ZOPFLI_LONGEST_MATCH_CACHE
-  s.lmc = (ZopfliLongestMatchCache*)malloc(sizeof(ZopfliLongestMatchCache));
-  ZopfliInitCache(blocksize, s.lmc);
-#endif
-
-  ZopfliLZ77Optimal(&s, in, instart, inend, &store);
-
-  /* For small block, encoding with fixed tree can be smaller. For large block,
-  don't bother doing this expensive test, dynamic tree will be better.*/
-  if (store.size < 1000) {
-    double dyncost, fixedcost;
-    ZopfliLZ77Store fixedstore;
-    ZopfliInitLZ77Store(&fixedstore);
-    ZopfliLZ77OptimalFixed(&s, in, instart, inend, &fixedstore);
-    dyncost = ZopfliCalculateBlockSize(store.litlens, store.dists,
-        0, store.size, 2);
-    fixedcost = ZopfliCalculateBlockSize(fixedstore.litlens, fixedstore.dists,
-        0, fixedstore.size, 1);
-    if (fixedcost < dyncost) {
-      btype = 1;
-      ZopfliCleanLZ77Store(&store);
-      store = fixedstore;
-    } else {
-      ZopfliCleanLZ77Store(&fixedstore);
-    }
-  }
-
-  AddLZ77Block(s.options, btype, final,
-               store.litlens, store.dists, 0, store.size,
-               blocksize, bp, out, outsize);
-
-#ifdef ZOPFLI_LONGEST_MATCH_CACHE
-  ZopfliCleanCache(s.lmc);
-  free(s.lmc);
-#endif
-  ZopfliCleanLZ77Store(&store);
-}
-
-static void DeflateFixedBlock(const ZopfliOptions* options, int final,
-                              const unsigned char* in,
-                              size_t instart, size_t inend,
-                              unsigned char* bp,
-                              unsigned char** out, size_t* outsize) {
-  ZopfliBlockState s;
-  size_t blocksize = inend - instart;
-  ZopfliLZ77Store store;
-
-  ZopfliInitLZ77Store(&store);
-
-  s.options = options;
-  s.blockstart = instart;
-  s.blockend = inend;
-#ifdef ZOPFLI_LONGEST_MATCH_CACHE
-  s.lmc = (ZopfliLongestMatchCache*)malloc(sizeof(ZopfliLongestMatchCache));
-  ZopfliInitCache(blocksize, s.lmc);
-#endif
-
-  ZopfliLZ77OptimalFixed(&s, in, instart, inend, &store);
-
-  AddLZ77Block(s.options, 1, final, store.litlens, store.dists, 0, store.size,
-               blocksize, bp, out, outsize);
-
-#ifdef ZOPFLI_LONGEST_MATCH_CACHE
-  ZopfliCleanCache(s.lmc);
-  free(s.lmc);
-#endif
-  ZopfliCleanLZ77Store(&store);
-}
-
-static void DeflateNonCompressedBlock(const ZopfliOptions* options, int final,
-                                      const unsigned char* in, size_t instart,
-                                      size_t inend,
-                                      unsigned char* bp,
-                                      unsigned char** out, size_t* outsize) {
-  size_t i;
-  size_t blocksize = inend - instart;
-  unsigned short nlen = ~blocksize;
-
-  (void)options;
-  assert(blocksize < 65536);  /* Non compressed blocks are max this size. */
-
-  AddBit(final, bp, out, outsize);
-  /* BTYPE 00 */
-  AddBit(0, bp, out, outsize);
-  AddBit(0, bp, out, outsize);
-
-  /* Any bits of input up to the next byte boundary are ignored. */
-  *bp = 0;
-
-  ZOPFLI_APPEND_DATA(blocksize % 256, out, outsize);
-  ZOPFLI_APPEND_DATA((blocksize / 256) % 256, out, outsize);
-  ZOPFLI_APPEND_DATA(nlen % 256, out, outsize);
-  ZOPFLI_APPEND_DATA((nlen / 256) % 256, out, outsize);
-
-  for (i = instart; i < inend; i++) {
-    ZOPFLI_APPEND_DATA(in[i], out, outsize);
-  }
-}
-
-static void DeflateBlock(const ZopfliOptions* options,
-                         int btype, int final,
-                         const unsigned char* in, size_t instart, size_t inend,
-                         unsigned char* bp,
-                         unsigned char** out, size_t* outsize) {
-  if (btype == 0) {
-    DeflateNonCompressedBlock(
-        options, final, in, instart, inend, bp, out, outsize);
-  } else if (btype == 1) {
-     DeflateFixedBlock(options, final, in, instart, inend, bp, out, outsize);
-  } else {
-    assert (btype == 2);
-    DeflateDynamicBlock(options, final, in, instart, inend, bp, out, outsize);
-  }
-}
-
-/*
-Does squeeze strategy where first block splitting is done, then each block is
-squeezed.
-Parameters: see description of the ZopfliDeflate function.
-*/
-static void DeflateSplittingFirst(const ZopfliOptions* options,
-                                  int btype, int final,
-                                  const unsigned char* in,
-                                  size_t instart, size_t inend,
-                                  unsigned char* bp,
-                                  unsigned char** out, size_t* outsize) {
-  size_t i;
-  size_t* splitpoints = 0;
-  size_t npoints = 0;
-  if (btype == 0) {
-    ZopfliBlockSplitSimple(in, instart, inend, 65535, &splitpoints, &npoints);
-  } else if (btype == 1) {
-    /* If all blocks are fixed tree, splitting into separate blocks only
-    increases the total size. Leave npoints at 0, this represents 1 block. */
-  } else {
-    ZopfliBlockSplit(options, in, instart, inend,
-                     options->blocksplittingmax, &splitpoints, &npoints);
-  }
-
-  for (i = 0; i <= npoints; i++) {
-    size_t start = i == 0 ? instart : splitpoints[i - 1];
-    size_t end = i == npoints ? inend : splitpoints[i];
-    DeflateBlock(options, btype, i == npoints && final, in, start, end,
-                 bp, out, outsize);
-  }
-
-  free(splitpoints);
-}
-
-/*
-Does squeeze strategy where first the best possible lz77 is done, and then based
-on that data, block splitting is done.
-Parameters: see description of the ZopfliDeflate function.
-*/
-static void DeflateSplittingLast(const ZopfliOptions* options,
-                                 int btype, int final,
-                                 const unsigned char* in,
-                                 size_t instart, size_t inend,
+static void AddLZ77BlockAutoType(const ZopfliOptions* options, int final,
+                                 const ZopfliLZ77Store* lz77,
+                                 size_t lstart, size_t lend,
+                                 size_t expected_data_size,
                                  unsigned char* bp,
                                  unsigned char** out, size_t* outsize) {
-  size_t i;
-  ZopfliBlockState s;
-  ZopfliLZ77Store store;
-  size_t* splitpoints = 0;
-  size_t npoints = 0;
+  double uncompressedcost = ZopfliCalculateBlockSize(lz77, lstart, lend, 0);
+  double fixedcost = ZopfliCalculateBlockSize(lz77, lstart, lend, 1);
+  double dyncost = ZopfliCalculateBlockSize(lz77, lstart, lend, 2);
 
-  if (btype == 0) {
-    /* This function only supports LZ77 compression. DeflateSplittingFirst
-       supports the special case of noncompressed data. Punt it to that one. */
-    DeflateSplittingFirst(options, btype, final,
-                          in, instart, inend,
-                          bp, out, outsize);
+  /* Whether to perform the expensive calculation of creating an optimal block
+  with fixed huffman tree to check if smaller. Only do this for small blocks or
+  blocks which already are pretty good with fixed huffman tree. */
+  int expensivefixed = (lz77->size < 1000) || fixedcost <= dyncost * 1.1;
+
+  ZopfliLZ77Store fixedstore;
+  if (lstart == lend) {
+    /* Smallest empty block is represented by fixed block */
+    AddBits(final, 1, bp, out, outsize);
+    AddBits(1, 2, bp, out, outsize);  /* btype 01 */
+    AddBits(0, 7, bp, out, outsize);  /* end symbol has code 0000000 */
+    return;
   }
-  assert(btype == 1 || btype == 2);
+  ZopfliInitLZ77Store(lz77->data, &fixedstore);
+  if (expensivefixed) {
+    /* Recalculate the LZ77 with ZopfliLZ77OptimalFixed */
+    size_t instart = lz77->pos[lstart];
+    size_t inend = instart + ZopfliLZ77GetByteRange(lz77, lstart, lend);
 
-  ZopfliInitLZ77Store(&store);
+    ZopfliBlockState s;
+    ZopfliInitBlockState(options, instart, inend, 1, &s);
+    ZopfliLZ77OptimalFixed(&s, lz77->data, instart, inend, &fixedstore);
+    fixedcost = ZopfliCalculateBlockSize(&fixedstore, 0, fixedstore.size, 1);
+    ZopfliCleanBlockState(&s);
+  }
 
-  s.options = options;
-  s.blockstart = instart;
-  s.blockend = inend;
-#ifdef ZOPFLI_LONGEST_MATCH_CACHE
-  s.lmc = (ZopfliLongestMatchCache*)malloc(sizeof(ZopfliLongestMatchCache));
-  ZopfliInitCache(inend - instart, s.lmc);
-#endif
-
-  if (btype == 2) {
-    ZopfliLZ77Optimal(&s, in, instart, inend, &store);
+  if (uncompressedcost < fixedcost && uncompressedcost < dyncost) {
+    AddLZ77Block(options, 0, final, lz77, lstart, lend,
+                 expected_data_size, bp, out, outsize);
+  } else if (fixedcost < dyncost) {
+    if (expensivefixed) {
+      AddLZ77Block(options, 1, final, &fixedstore, 0, fixedstore.size,
+                   expected_data_size, bp, out, outsize);
+    } else {
+      AddLZ77Block(options, 1, final, lz77, lstart, lend,
+                   expected_data_size, bp, out, outsize);
+    }
   } else {
-    assert (btype == 1);
-    ZopfliLZ77OptimalFixed(&s, in, instart, inend, &store);
+    AddLZ77Block(options, 2, final, lz77, lstart, lend,
+                 expected_data_size, bp, out, outsize);
   }
 
-  if (btype == 1) {
-    /* If all blocks are fixed tree, splitting into separate blocks only
-    increases the total size. Leave npoints at 0, this represents 1 block. */
-  } else {
-    ZopfliBlockSplitLZ77(options, store.litlens, store.dists, store.size,
-                         options->blocksplittingmax, &splitpoints, &npoints);
-  }
-
-  for (i = 0; i <= npoints; i++) {
-    size_t start = i == 0 ? 0 : splitpoints[i - 1];
-    size_t end = i == npoints ? store.size : splitpoints[i];
-    AddLZ77Block(options, btype, i == npoints && final,
-                 store.litlens, store.dists, start, end, 0,
-                 bp, out, outsize);
-  }
-
-#ifdef ZOPFLI_LONGEST_MATCH_CACHE
-  ZopfliCleanCache(s.lmc);
-  free(s.lmc);
-#endif
-
-  ZopfliCleanLZ77Store(&store);
-  free(splitpoints);
+  ZopfliCleanLZ77Store(&fixedstore);
 }
 
 /*
@@ -828,39 +812,120 @@
                        const unsigned char* in, size_t instart, size_t inend,
                        unsigned char* bp, unsigned char** out,
                        size_t* outsize) {
-  if (options->blocksplitting) {
-    if (options->blocksplittinglast) {
-      DeflateSplittingLast(options, btype, final, in, instart, inend,
-                           bp, out, outsize);
-    } else {
-      DeflateSplittingFirst(options, btype, final, in, instart, inend,
-                            bp, out, outsize);
-    }
-  } else {
-    DeflateBlock(options, btype, final, in, instart, inend, bp, out, outsize);
+  size_t i;
+  /* byte coordinates rather than lz77 index */
+  size_t* splitpoints_uncompressed = 0;
+  size_t npoints = 0;
+  size_t* splitpoints = 0;
+  double totalcost = 0;
+  ZopfliLZ77Store lz77;
+
+  /* If btype=2 is specified, it tries all block types. If a lesser btype is
+  given, then however it forces that one. Neither of the lesser types needs
+  block splitting as they have no dynamic huffman trees. */
+  if (btype == 0) {
+    AddNonCompressedBlock(options, final, in, instart, inend, bp, out, outsize);
+    return;
+  } else if (btype == 1) {
+    ZopfliLZ77Store store;
+    ZopfliBlockState s;
+    ZopfliInitLZ77Store(in, &store);
+    ZopfliInitBlockState(options, instart, inend, 1, &s);
+
+    ZopfliLZ77OptimalFixed(&s, in, instart, inend, &store);
+    AddLZ77Block(options, btype, final, &store, 0, store.size, 0,
+                 bp, out, outsize);
+
+    ZopfliCleanBlockState(&s);
+    ZopfliCleanLZ77Store(&store);
+    return;
   }
+
+
+  if (options->blocksplitting) {
+    ZopfliBlockSplit(options, in, instart, inend,
+                     options->blocksplittingmax,
+                     &splitpoints_uncompressed, &npoints);
+    splitpoints = (size_t*)malloc(sizeof(*splitpoints) * npoints);
+  }
+
+  ZopfliInitLZ77Store(in, &lz77);
+
+  for (i = 0; i <= npoints; i++) {
+    size_t start = i == 0 ? instart : splitpoints_uncompressed[i - 1];
+    size_t end = i == npoints ? inend : splitpoints_uncompressed[i];
+    ZopfliBlockState s;
+    ZopfliLZ77Store store;
+    ZopfliInitLZ77Store(in, &store);
+    ZopfliInitBlockState(options, start, end, 1, &s);
+    ZopfliLZ77Optimal(&s, in, start, end, options->numiterations, &store);
+    totalcost += ZopfliCalculateBlockSizeAutoType(&store, 0, store.size);
+
+    ZopfliAppendLZ77Store(&store, &lz77);
+    if (i < npoints) splitpoints[i] = lz77.size;
+
+    ZopfliCleanBlockState(&s);
+    ZopfliCleanLZ77Store(&store);
+  }
+
+  /* Second block splitting attempt */
+  if (options->blocksplitting && npoints > 1) {
+    size_t* splitpoints2 = 0;
+    size_t npoints2 = 0;
+    double totalcost2 = 0;
+
+    ZopfliBlockSplitLZ77(options, &lz77,
+                         options->blocksplittingmax, &splitpoints2, &npoints2);
+
+    for (i = 0; i <= npoints2; i++) {
+      size_t start = i == 0 ? 0 : splitpoints2[i - 1];
+      size_t end = i == npoints2 ? lz77.size : splitpoints2[i];
+      totalcost2 += ZopfliCalculateBlockSizeAutoType(&lz77, start, end);
+    }
+
+    if (totalcost2 < totalcost) {
+      free(splitpoints);
+      splitpoints = splitpoints2;
+      npoints = npoints2;
+    } else {
+      free(splitpoints2);
+    }
+  }
+
+  for (i = 0; i <= npoints; i++) {
+    size_t start = i == 0 ? 0 : splitpoints[i - 1];
+    size_t end = i == npoints ? lz77.size : splitpoints[i];
+    AddLZ77BlockAutoType(options, i == npoints && final,
+                         &lz77, start, end, 0,
+                         bp, out, outsize);
+  }
+
+  ZopfliCleanLZ77Store(&lz77);
+  free(splitpoints);
+  free(splitpoints_uncompressed);
 }
 
 void ZopfliDeflate(const ZopfliOptions* options, int btype, int final,
                    const unsigned char* in, size_t insize,
                    unsigned char* bp, unsigned char** out, size_t* outsize) {
+ size_t offset = *outsize;
 #if ZOPFLI_MASTER_BLOCK_SIZE == 0
   ZopfliDeflatePart(options, btype, final, in, 0, insize, bp, out, outsize);
 #else
   size_t i = 0;
-  while (i < insize) {
+  do {
     int masterfinal = (i + ZOPFLI_MASTER_BLOCK_SIZE >= insize);
     int final2 = final && masterfinal;
     size_t size = masterfinal ? insize - i : ZOPFLI_MASTER_BLOCK_SIZE;
     ZopfliDeflatePart(options, btype, final2,
                       in, i, i + size, bp, out, outsize);
     i += size;
-  }
+  } while (i < insize);
 #endif
   if (options->verbose) {
     fprintf(stderr,
-            "Original Size: %d, Deflate: %d, Compression: %f%% Removed\n",
-            (int)insize, (int)*outsize,
-            100.0 * (double)(insize - *outsize) / (double)insize);
+            "Original Size: %lu, Deflate: %lu, Compression: %f%% Removed\n",
+            (unsigned long)insize, (unsigned long)(*outsize - offset),
+            100.0 * (double)(insize - (*outsize - offset)) / (double)insize);
   }
 }
diff --git a/zopfli/src/zopfli/deflate.h b/zopfli/src/zopfli/deflate.h
index 189c77a..fcd9ddc 100644
--- a/zopfli/src/zopfli/deflate.h
+++ b/zopfli/src/zopfli/deflate.h
@@ -25,6 +25,7 @@
 "squeeze" LZ77 compression backend.
 */
 
+#include "lz77.h"
 #include "zopfli.h"
 
 #ifdef __cplusplus
@@ -75,10 +76,15 @@
 lstart: start of block
 lend: end of block (not inclusive)
 */
-double ZopfliCalculateBlockSize(const unsigned short* litlens,
-                                const unsigned short* dists,
+double ZopfliCalculateBlockSize(const ZopfliLZ77Store* lz77,
                                 size_t lstart, size_t lend, int btype);
 
+/*
+Calculates block size in bits, automatically using the best btype.
+*/
+double ZopfliCalculateBlockSizeAutoType(const ZopfliLZ77Store* lz77,
+                                        size_t lstart, size_t lend);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/zopfli/src/zopfli/hash.c b/zopfli/src/zopfli/hash.c
index a3b294f..3025d1e 100644
--- a/zopfli/src/zopfli/hash.c
+++ b/zopfli/src/zopfli/hash.c
@@ -26,13 +26,26 @@
 #define HASH_SHIFT 5
 #define HASH_MASK 32767
 
-void ZopfliInitHash(size_t window_size, ZopfliHash* h) {
-  size_t i;
-
-  h->val = 0;
+void ZopfliAllocHash(size_t window_size, ZopfliHash* h) {
   h->head = (int*)malloc(sizeof(*h->head) * 65536);
   h->prev = (unsigned short*)malloc(sizeof(*h->prev) * window_size);
   h->hashval = (int*)malloc(sizeof(*h->hashval) * window_size);
+
+#ifdef ZOPFLI_HASH_SAME
+  h->same = (unsigned short*)malloc(sizeof(*h->same) * window_size);
+#endif
+
+#ifdef ZOPFLI_HASH_SAME_HASH
+  h->head2 = (int*)malloc(sizeof(*h->head2) * 65536);
+  h->prev2 = (unsigned short*)malloc(sizeof(*h->prev2) * window_size);
+  h->hashval2 = (int*)malloc(sizeof(*h->hashval2) * window_size);
+#endif
+}
+
+void ZopfliResetHash(size_t window_size, ZopfliHash* h) {
+  size_t i;
+
+  h->val = 0;
   for (i = 0; i < 65536; i++) {
     h->head[i] = -1;  /* -1 indicates no head so far. */
   }
@@ -42,7 +55,6 @@
   }
 
 #ifdef ZOPFLI_HASH_SAME
-  h->same = (unsigned short*)malloc(sizeof(*h->same) * window_size);
   for (i = 0; i < window_size; i++) {
     h->same[i] = 0;
   }
@@ -50,9 +62,6 @@
 
 #ifdef ZOPFLI_HASH_SAME_HASH
   h->val2 = 0;
-  h->head2 = (int*)malloc(sizeof(*h->head2) * 65536);
-  h->prev2 = (unsigned short*)malloc(sizeof(*h->prev2) * window_size);
-  h->hashval2 = (int*)malloc(sizeof(*h->hashval2) * window_size);
   for (i = 0; i < 65536; i++) {
     h->head2[i] = -1;
   }
@@ -129,7 +138,6 @@
 
 void ZopfliWarmupHash(const unsigned char* array, size_t pos, size_t end,
                 ZopfliHash* h) {
-  (void)end;
   UpdateHashValue(h, array[pos + 0]);
-  UpdateHashValue(h, array[pos + 1]);
+  if (pos + 1 < end) UpdateHashValue(h, array[pos + 1]);
 }
diff --git a/zopfli/src/zopfli/hash.h b/zopfli/src/zopfli/hash.h
index 79c2479..e59c1d4 100644
--- a/zopfli/src/zopfli/hash.h
+++ b/zopfli/src/zopfli/hash.h
@@ -27,16 +27,16 @@
 #include "util.h"
 
 typedef struct ZopfliHash {
-  int* head;  /* Hash value to index of its most recent occurance. */
-  unsigned short* prev;  /* Index to index of prev. occurance of same hash. */
+  int* head;  /* Hash value to index of its most recent occurrence. */
+  unsigned short* prev;  /* Index to index of prev. occurrence of same hash. */
   int* hashval;  /* Index to hash value at this index. */
   int val;  /* Current hash value. */
 
 #ifdef ZOPFLI_HASH_SAME_HASH
   /* Fields with similar purpose as the above hash, but for the second hash with
   a value that is calculated differently.  */
-  int* head2;  /* Hash value to index of its most recent occurance. */
-  unsigned short* prev2;  /* Index to index of prev. occurance of same hash. */
+  int* head2;  /* Hash value to index of its most recent occurrence. */
+  unsigned short* prev2;  /* Index to index of prev. occurrence of same hash. */
   int* hashval2;  /* Index to hash value at this index. */
   int val2;  /* Current hash value. */
 #endif
@@ -46,10 +46,13 @@
 #endif
 } ZopfliHash;
 
-/* Allocates and initializes all fields of ZopfliHash. */
-void ZopfliInitHash(size_t window_size, ZopfliHash* h);
+/* Allocates ZopfliHash memory. */
+void ZopfliAllocHash(size_t window_size, ZopfliHash* h);
 
-/* Frees all fields of ZopfliHash. */
+/* Resets all fields of ZopfliHash. */
+void ZopfliResetHash(size_t window_size, ZopfliHash* h);
+
+/* Frees ZopfliHash memory. */
 void ZopfliCleanHash(ZopfliHash* h);
 
 /*
diff --git a/zopfli/src/zopfli/katajainen.c b/zopfli/src/zopfli/katajainen.c
index 783ea08..1459017 100644
--- a/zopfli/src/zopfli/katajainen.c
+++ b/zopfli/src/zopfli/katajainen.c
@@ -26,6 +26,7 @@
 #include "katajainen.h"
 #include <assert.h>
 #include <stdlib.h>
+#include <limits.h>
 
 typedef struct Node Node;
 
@@ -36,16 +37,13 @@
   size_t weight;  /* Total weight (symbol count) of this chain. */
   Node* tail;  /* Previous node(s) of this chain, or 0 if none. */
   int count;  /* Leaf symbol index, or number of leaves before this chain. */
-  char inuse;  /* Tracking for garbage collection. */
 };
 
 /*
 Memory pool for nodes.
 */
 typedef struct NodePool {
-  Node* nodes;  /* The pool. */
-  Node* next;  /* Pointer to a possibly free node in the pool. */
-  int size;  /* Size of the memory pool. */
+  Node* next;  /* Pointer to a free node in the pool. */
 } NodePool;
 
 /*
@@ -55,41 +53,9 @@
   node->weight = weight;
   node->count = count;
   node->tail = tail;
-  node->inuse = 1;
 }
 
 /*
-Finds a free location in the memory pool. Performs garbage collection if needed.
-lists: If given, used to mark in-use nodes during garbage collection.
-maxbits: Size of lists.
-pool: Memory pool to get free node from.
-*/
-static Node* GetFreeNode(Node* (*lists)[2], int maxbits, NodePool* pool) {
-  for (;;) {
-    if (pool->next >= &pool->nodes[pool->size]) {
-      /* Garbage collection. */
-      int i;
-      for (i = 0; i < pool->size; i++) {
-        pool->nodes[i].inuse = 0;
-      }
-      if (lists) {
-        for (i = 0; i < maxbits * 2; i++) {
-          Node* node;
-          for (node = lists[i / 2][i % 2]; node; node = node->tail) {
-            node->inuse = 1;
-          }
-        }
-      }
-      pool->next = &pool->nodes[0];
-    }
-    if (!pool->next->inuse) break;  /* Found one. */
-    pool->next++;
-  }
-  return pool->next++;
-}
-
-
-/*
 Performs a Boundary Package-Merge step. Puts a new chain in the given list. The
 new chain is, depending on the weights, a leaf or a combination of two chains
 from the previous list.
@@ -99,18 +65,16 @@
 numsymbols: Number of leaves.
 pool: the node memory pool.
 index: The index of the list in which a new chain or leaf is required.
-final: Whether this is the last time this function is called. If it is then it
-  is no more needed to recursively call self.
 */
-static void BoundaryPM(Node* (*lists)[2], int maxbits,
-    Node* leaves, int numsymbols, NodePool* pool, int index, char final) {
+static void BoundaryPM(Node* (*lists)[2], Node* leaves, int numsymbols,
+                       NodePool* pool, int index) {
   Node* newchain;
   Node* oldchain;
   int lastcount = lists[index][1]->count;  /* Count of last chain of list. */
 
   if (index == 0 && lastcount >= numsymbols) return;
 
-  newchain = GetFreeNode(lists, maxbits, pool);
+  newchain = pool->next++;
   oldchain = lists[index][1];
 
   /* These are set up before the recursive calls below, so that there is a list
@@ -129,15 +93,31 @@
           newchain);
     } else {
       InitNode(sum, lastcount, lists[index - 1][1], newchain);
-      if (!final) {
-        /* Two lookahead chains of previous list used up, create new ones. */
-        BoundaryPM(lists, maxbits, leaves, numsymbols, pool, index - 1, 0);
-        BoundaryPM(lists, maxbits, leaves, numsymbols, pool, index - 1, 0);
-      }
+      /* Two lookahead chains of previous list used up, create new ones. */
+      BoundaryPM(lists, leaves, numsymbols, pool, index - 1);
+      BoundaryPM(lists, leaves, numsymbols, pool, index - 1);
     }
   }
 }
 
+static void BoundaryPMFinal(Node* (*lists)[2],
+    Node* leaves, int numsymbols, NodePool* pool, int index) {
+  int lastcount = lists[index][1]->count;  /* Count of last chain of list. */
+
+  size_t sum = lists[index - 1][0]->weight + lists[index - 1][1]->weight;
+
+  if (lastcount < numsymbols && sum > leaves[lastcount].weight) {
+    Node* newchain = pool->next;
+    Node* oldchain = lists[index][1]->tail;
+
+    lists[index][1] = newchain;
+    newchain->count = lastcount + 1;
+    newchain->tail = oldchain;
+  } else {
+    lists[index][1]->tail = lists[index - 1][1];
+  }
+}
+
 /*
 Initializes each list with as lookahead chains the two leaves with lowest
 weights.
@@ -145,8 +125,8 @@
 static void InitLists(
     NodePool* pool, const Node* leaves, int maxbits, Node* (*lists)[2]) {
   int i;
-  Node* node0 = GetFreeNode(0, maxbits, pool);
-  Node* node1 = GetFreeNode(0, maxbits, pool);
+  Node* node0 = pool->next++;
+  Node* node1 = pool->next++;
   InitNode(leaves[0].weight, 1, 0, node0);
   InitNode(leaves[1].weight, 2, 0, node1);
   for (i = 0; i < maxbits; i++) {
@@ -161,12 +141,24 @@
 chain: Chain to extract the bit length from (last chain from last list).
 */
 static void ExtractBitLengths(Node* chain, Node* leaves, unsigned* bitlengths) {
+  int counts[16] = {0};
+  unsigned end = 16;
+  unsigned ptr = 15;
+  unsigned value = 1;
   Node* node;
+  int val;
+
   for (node = chain; node; node = node->tail) {
-    int i;
-    for (i = 0; i < node->count; i++) {
-      bitlengths[leaves[i].count]++;
+    counts[--end] = node->count;
+  }
+
+  val = counts[15];
+  while (ptr >= end) {
+    for (; val > counts[ptr - 1]; val--) {
+      bitlengths[leaves[val - 1].count] = value;
     }
+    ptr--;
+    value++;
   }
 }
 
@@ -183,6 +175,7 @@
   int i;
   int numsymbols = 0;  /* Amount of symbols with frequency > 0. */
   int numBoundaryPMRuns;
+  Node* nodes;
 
   /* Array of lists of chains. Each list requires only two lookahead chains at
   a time, so each list is a array of two Node*'s. */
@@ -219,17 +212,35 @@
     free(leaves);
     return 0;  /* Only one symbol, give it bitlength 1, not 0. OK. */
   }
+  if (numsymbols == 2) {
+    bitlengths[leaves[0].count]++;
+    bitlengths[leaves[1].count]++;
+    free(leaves);
+    return 0;
+  }
 
-  /* Sort the leaves from lightest to heaviest. */
+  /* Sort the leaves from lightest to heaviest. Add count into the same
+  variable for stable sorting. */
+  for (i = 0; i < numsymbols; i++) {
+    if (leaves[i].weight >=
+        ((size_t)1 << (sizeof(leaves[0].weight) * CHAR_BIT - 9))) {
+      free(leaves);
+      return 1;  /* Error, we need 9 bits for the count. */
+    }
+    leaves[i].weight = (leaves[i].weight << 9) | leaves[i].count;
+  }
   qsort(leaves, numsymbols, sizeof(Node), LeafComparator);
+  for (i = 0; i < numsymbols; i++) {
+    leaves[i].weight >>= 9;
+  }
+
+  if (numsymbols - 1 < maxbits) {
+    maxbits = numsymbols - 1;
+  }
 
   /* Initialize node memory pool. */
-  pool.size = 2 * maxbits * (maxbits + 1);
-  pool.nodes = (Node*)malloc(pool.size * sizeof(*pool.nodes));
-  pool.next = pool.nodes;
-  for (i = 0; i < pool.size; i++) {
-    pool.nodes[i].inuse = 0;
-  }
+  nodes = (Node*)malloc(maxbits * 2 * numsymbols * sizeof(Node));
+  pool.next = nodes;
 
   lists = (Node* (*)[2])malloc(maxbits * sizeof(*lists));
   InitLists(&pool, leaves, maxbits, lists);
@@ -237,15 +248,15 @@
   /* In the last list, 2 * numsymbols - 2 active chains need to be created. Two
   are already created in the initialization. Each BoundaryPM run creates one. */
   numBoundaryPMRuns = 2 * numsymbols - 4;
-  for (i = 0; i < numBoundaryPMRuns; i++) {
-    char final = i == numBoundaryPMRuns - 1;
-    BoundaryPM(lists, maxbits, leaves, numsymbols, &pool, maxbits - 1, final);
+  for (i = 0; i < numBoundaryPMRuns - 1; i++) {
+    BoundaryPM(lists, leaves, numsymbols, &pool, maxbits - 1);
   }
+  BoundaryPMFinal(lists, leaves, numsymbols, &pool, maxbits - 1);
 
   ExtractBitLengths(lists[maxbits - 1][1], leaves, bitlengths);
 
   free(lists);
   free(leaves);
-  free(pool.nodes);
+  free(nodes);
   return 0;  /* OK. */
 }
diff --git a/zopfli/src/zopfli/katajainen.h b/zopfli/src/zopfli/katajainen.h
index ee8a91e..5927350 100644
--- a/zopfli/src/zopfli/katajainen.h
+++ b/zopfli/src/zopfli/katajainen.h
@@ -30,7 +30,7 @@
 of 0, and if only a single symbol occurs at least once, its bitlength will be 1,
 and not 0 as would theoretically be needed for a single symbol.
 
-frequencies: The amount of occurances of each symbol.
+frequencies: The amount of occurrences of each symbol.
 n: The amount of symbols.
 maxbits: Maximum bit length, inclusive.
 bitlengths: Output, the bitlengths for the symbol prefix codes.
diff --git a/zopfli/src/zopfli/lz77.c b/zopfli/src/zopfli/lz77.c
index 26186b4..9df899d 100644
--- a/zopfli/src/zopfli/lz77.c
+++ b/zopfli/src/zopfli/lz77.c
@@ -18,37 +18,76 @@
 */
 
 #include "lz77.h"
+#include "symbols.h"
 #include "util.h"
 
 #include <assert.h>
 #include <stdio.h>
 #include <stdlib.h>
 
-void ZopfliInitLZ77Store(ZopfliLZ77Store* store) {
+void ZopfliInitLZ77Store(const unsigned char* data, ZopfliLZ77Store* store) {
   store->size = 0;
   store->litlens = 0;
   store->dists = 0;
+  store->pos = 0;
+  store->data = data;
+  store->ll_symbol = 0;
+  store->d_symbol = 0;
+  store->ll_counts = 0;
+  store->d_counts = 0;
 }
 
 void ZopfliCleanLZ77Store(ZopfliLZ77Store* store) {
   free(store->litlens);
   free(store->dists);
+  free(store->pos);
+  free(store->ll_symbol);
+  free(store->d_symbol);
+  free(store->ll_counts);
+  free(store->d_counts);
+}
+
+static size_t CeilDiv(size_t a, size_t b) {
+  return (a + b - 1) / b;
 }
 
 void ZopfliCopyLZ77Store(
     const ZopfliLZ77Store* source, ZopfliLZ77Store* dest) {
   size_t i;
+  size_t llsize = ZOPFLI_NUM_LL * CeilDiv(source->size, ZOPFLI_NUM_LL);
+  size_t dsize = ZOPFLI_NUM_D * CeilDiv(source->size, ZOPFLI_NUM_D);
   ZopfliCleanLZ77Store(dest);
+  ZopfliInitLZ77Store(source->data, dest);
   dest->litlens =
       (unsigned short*)malloc(sizeof(*dest->litlens) * source->size);
   dest->dists = (unsigned short*)malloc(sizeof(*dest->dists) * source->size);
+  dest->pos = (size_t*)malloc(sizeof(*dest->pos) * source->size);
+  dest->ll_symbol =
+      (unsigned short*)malloc(sizeof(*dest->ll_symbol) * source->size);
+  dest->d_symbol =
+      (unsigned short*)malloc(sizeof(*dest->d_symbol) * source->size);
+  dest->ll_counts = (size_t*)malloc(sizeof(*dest->ll_counts) * llsize);
+  dest->d_counts = (size_t*)malloc(sizeof(*dest->d_counts) * dsize);
 
-  if (!dest->litlens || !dest->dists) exit(-1); /* Allocation failed. */
+  /* Allocation failed. */
+  if (!dest->litlens || !dest->dists) exit(-1);
+  if (!dest->pos) exit(-1);
+  if (!dest->ll_symbol || !dest->d_symbol) exit(-1);
+  if (!dest->ll_counts || !dest->d_counts) exit(-1);
 
   dest->size = source->size;
   for (i = 0; i < source->size; i++) {
     dest->litlens[i] = source->litlens[i];
     dest->dists[i] = source->dists[i];
+    dest->pos[i] = source->pos[i];
+    dest->ll_symbol[i] = source->ll_symbol[i];
+    dest->d_symbol[i] = source->d_symbol[i];
+  }
+  for (i = 0; i < llsize; i++) {
+    dest->ll_counts[i] = source->ll_counts[i];
+  }
+  for (i = 0; i < dsize; i++) {
+    dest->d_counts[i] = source->d_counts[i];
   }
 }
 
@@ -57,10 +96,149 @@
 context must be a ZopfliLZ77Store*.
 */
 void ZopfliStoreLitLenDist(unsigned short length, unsigned short dist,
-                           ZopfliLZ77Store* store) {
-  size_t size2 = store->size;  /* Needed for using ZOPFLI_APPEND_DATA twice. */
+                           size_t pos, ZopfliLZ77Store* store) {
+  size_t i;
+  /* Needed for using ZOPFLI_APPEND_DATA multiple times. */
+  size_t origsize = store->size;
+  size_t llstart = ZOPFLI_NUM_LL * (origsize / ZOPFLI_NUM_LL);
+  size_t dstart = ZOPFLI_NUM_D * (origsize / ZOPFLI_NUM_D);
+
+  /* Everytime the index wraps around, a new cumulative histogram is made: we're
+  keeping one histogram value per LZ77 symbol rather than a full histogram for
+  each to save memory. */
+  if (origsize % ZOPFLI_NUM_LL == 0) {
+    size_t llsize = origsize;
+    for (i = 0; i < ZOPFLI_NUM_LL; i++) {
+      ZOPFLI_APPEND_DATA(
+          origsize == 0 ? 0 : store->ll_counts[origsize - ZOPFLI_NUM_LL + i],
+          &store->ll_counts, &llsize);
+    }
+  }
+  if (origsize % ZOPFLI_NUM_D == 0) {
+    size_t dsize = origsize;
+    for (i = 0; i < ZOPFLI_NUM_D; i++) {
+      ZOPFLI_APPEND_DATA(
+          origsize == 0 ? 0 : store->d_counts[origsize - ZOPFLI_NUM_D + i],
+          &store->d_counts, &dsize);
+    }
+  }
+
   ZOPFLI_APPEND_DATA(length, &store->litlens, &store->size);
-  ZOPFLI_APPEND_DATA(dist, &store->dists, &size2);
+  store->size = origsize;
+  ZOPFLI_APPEND_DATA(dist, &store->dists, &store->size);
+  store->size = origsize;
+  ZOPFLI_APPEND_DATA(pos, &store->pos, &store->size);
+  assert(length < 259);
+
+  if (dist == 0) {
+    store->size = origsize;
+    ZOPFLI_APPEND_DATA(length, &store->ll_symbol, &store->size);
+    store->size = origsize;
+    ZOPFLI_APPEND_DATA(0, &store->d_symbol, &store->size);
+    store->ll_counts[llstart + length]++;
+  } else {
+    store->size = origsize;
+    ZOPFLI_APPEND_DATA(ZopfliGetLengthSymbol(length),
+                       &store->ll_symbol, &store->size);
+    store->size = origsize;
+    ZOPFLI_APPEND_DATA(ZopfliGetDistSymbol(dist),
+                       &store->d_symbol, &store->size);
+    store->ll_counts[llstart + ZopfliGetLengthSymbol(length)]++;
+    store->d_counts[dstart + ZopfliGetDistSymbol(dist)]++;
+  }
+}
+
+void ZopfliAppendLZ77Store(const ZopfliLZ77Store* store,
+                           ZopfliLZ77Store* target) {
+  size_t i;
+  for (i = 0; i < store->size; i++) {
+    ZopfliStoreLitLenDist(store->litlens[i], store->dists[i],
+                          store->pos[i], target);
+  }
+}
+
+size_t ZopfliLZ77GetByteRange(const ZopfliLZ77Store* lz77,
+                              size_t lstart, size_t lend) {
+  size_t l = lend - 1;
+  if (lstart == lend) return 0;
+  return lz77->pos[l] + ((lz77->dists[l] == 0) ?
+      1 : lz77->litlens[l]) - lz77->pos[lstart];
+}
+
+static void ZopfliLZ77GetHistogramAt(const ZopfliLZ77Store* lz77, size_t lpos,
+                                     size_t* ll_counts, size_t* d_counts) {
+  /* The real histogram is created by using the histogram for this chunk, but
+  all superfluous values of this chunk subtracted. */
+  size_t llpos = ZOPFLI_NUM_LL * (lpos / ZOPFLI_NUM_LL);
+  size_t dpos = ZOPFLI_NUM_D * (lpos / ZOPFLI_NUM_D);
+  size_t i;
+  for (i = 0; i < ZOPFLI_NUM_LL; i++) {
+    ll_counts[i] = lz77->ll_counts[llpos + i];
+  }
+  for (i = lpos + 1; i < llpos + ZOPFLI_NUM_LL && i < lz77->size; i++) {
+    ll_counts[lz77->ll_symbol[i]]--;
+  }
+  for (i = 0; i < ZOPFLI_NUM_D; i++) {
+    d_counts[i] = lz77->d_counts[dpos + i];
+  }
+  for (i = lpos + 1; i < dpos + ZOPFLI_NUM_D && i < lz77->size; i++) {
+    if (lz77->dists[i] != 0) d_counts[lz77->d_symbol[i]]--;
+  }
+}
+
+void ZopfliLZ77GetHistogram(const ZopfliLZ77Store* lz77,
+                           size_t lstart, size_t lend,
+                           size_t* ll_counts, size_t* d_counts) {
+  size_t i;
+  if (lstart + ZOPFLI_NUM_LL * 3 > lend) {
+    memset(ll_counts, 0, sizeof(*ll_counts) * ZOPFLI_NUM_LL);
+    memset(d_counts, 0, sizeof(*d_counts) * ZOPFLI_NUM_D);
+    for (i = lstart; i < lend; i++) {
+      ll_counts[lz77->ll_symbol[i]]++;
+      if (lz77->dists[i] != 0) d_counts[lz77->d_symbol[i]]++;
+    }
+  } else {
+    /* Subtract the cumulative histograms at the end and the start to get the
+    histogram for this range. */
+    ZopfliLZ77GetHistogramAt(lz77, lend - 1, ll_counts, d_counts);
+    if (lstart > 0) {
+      size_t ll_counts2[ZOPFLI_NUM_LL];
+      size_t d_counts2[ZOPFLI_NUM_D];
+      ZopfliLZ77GetHistogramAt(lz77, lstart - 1, ll_counts2, d_counts2);
+
+      for (i = 0; i < ZOPFLI_NUM_LL; i++) {
+        ll_counts[i] -= ll_counts2[i];
+      }
+      for (i = 0; i < ZOPFLI_NUM_D; i++) {
+        d_counts[i] -= d_counts2[i];
+      }
+    }
+  }
+}
+
+void ZopfliInitBlockState(const ZopfliOptions* options,
+                          size_t blockstart, size_t blockend, int add_lmc,
+                          ZopfliBlockState* s) {
+  s->options = options;
+  s->blockstart = blockstart;
+  s->blockend = blockend;
+#ifdef ZOPFLI_LONGEST_MATCH_CACHE
+  if (add_lmc) {
+    s->lmc = (ZopfliLongestMatchCache*)malloc(sizeof(ZopfliLongestMatchCache));
+    ZopfliInitCache(blockend - blockstart, s->lmc);
+  } else {
+    s->lmc = 0;
+  }
+#endif
+}
+
+void ZopfliCleanBlockState(ZopfliBlockState* s) {
+#ifdef ZOPFLI_LONGEST_MATCH_CACHE
+  if (s->lmc) {
+    ZopfliCleanCache(s->lmc);
+    free(s->lmc);
+  }
+#endif
 }
 
 /*
@@ -365,7 +543,7 @@
 
 void ZopfliLZ77Greedy(ZopfliBlockState* s, const unsigned char* in,
                       size_t instart, size_t inend,
-                      ZopfliLZ77Store* store) {
+                      ZopfliLZ77Store* store, ZopfliHash* h) {
   size_t i = 0, j;
   unsigned short leng;
   unsigned short dist;
@@ -374,9 +552,6 @@
       ? instart - ZOPFLI_WINDOW_SIZE : 0;
   unsigned short dummysublen[259];
 
-  ZopfliHash hash;
-  ZopfliHash* h = &hash;
-
 #ifdef ZOPFLI_LAZY_MATCHING
   /* Lazy matching. */
   unsigned prev_length = 0;
@@ -387,7 +562,7 @@
 
   if (instart == inend) return;
 
-  ZopfliInitHash(ZOPFLI_WINDOW_SIZE, h);
+  ZopfliResetHash(ZOPFLI_WINDOW_SIZE, h);
   ZopfliWarmupHash(in, windowstart, inend, h);
   for (i = windowstart; i < instart; i++) {
     ZopfliUpdateHash(in, i, inend, h);
@@ -406,7 +581,7 @@
     if (match_available) {
       match_available = 0;
       if (lengthscore > prevlengthscore + 1) {
-        ZopfliStoreLitLenDist(in[i - 1], 0, store);
+        ZopfliStoreLitLenDist(in[i - 1], 0, i - 1, store);
         if (lengthscore >= ZOPFLI_MIN_MATCH && leng < ZOPFLI_MAX_MATCH) {
           match_available = 1;
           prev_length = leng;
@@ -420,7 +595,7 @@
         lengthscore = prevlengthscore;
         /* Add to output. */
         ZopfliVerifyLenDist(in, inend, i - 1, dist, leng);
-        ZopfliStoreLitLenDist(leng, dist, store);
+        ZopfliStoreLitLenDist(leng, dist, i - 1, store);
         for (j = 2; j < leng; j++) {
           assert(i < inend);
           i++;
@@ -441,10 +616,10 @@
     /* Add to output. */
     if (lengthscore >= ZOPFLI_MIN_MATCH) {
       ZopfliVerifyLenDist(in, inend, i, dist, leng);
-      ZopfliStoreLitLenDist(leng, dist, store);
+      ZopfliStoreLitLenDist(leng, dist, i, store);
     } else {
       leng = 1;
-      ZopfliStoreLitLenDist(in[i], 0, store);
+      ZopfliStoreLitLenDist(in[i], 0, i, store);
     }
     for (j = 1; j < leng; j++) {
       assert(i < inend);
@@ -452,31 +627,4 @@
       ZopfliUpdateHash(in, i, inend, h);
     }
   }
-
-  ZopfliCleanHash(h);
-}
-
-void ZopfliLZ77Counts(const unsigned short* litlens,
-                      const unsigned short* dists,
-                      size_t start, size_t end,
-                      size_t* ll_count, size_t* d_count) {
-  size_t i;
-
-  for (i = 0; i < 288; i++) {
-    ll_count[i] = 0;
-  }
-  for (i = 0; i < 32; i++) {
-    d_count[i] = 0;
-  }
-
-  for (i = start; i < end; i++) {
-    if (dists[i] == 0) {
-      ll_count[litlens[i]]++;
-    } else {
-      ll_count[ZopfliGetLengthSymbol(litlens[i])]++;
-      d_count[ZopfliGetDistSymbol(dists[i])]++;
-    }
-  }
-
-  ll_count[256] = 1;  /* End symbol. */
 }
diff --git a/zopfli/src/zopfli/lz77.h b/zopfli/src/zopfli/lz77.h
index 55186a7..dc8597a 100644
--- a/zopfli/src/zopfli/lz77.h
+++ b/zopfli/src/zopfli/lz77.h
@@ -46,13 +46,37 @@
   unsigned short* dists;  /* If 0: indicates literal in corresponding litlens,
       if > 0: length in corresponding litlens, this is the distance. */
   size_t size;
+
+  const unsigned char* data;  /* original data */
+  size_t* pos;  /* position in data where this LZ77 command begins */
+
+  unsigned short* ll_symbol;
+  unsigned short* d_symbol;
+
+  /* Cumulative histograms wrapping around per chunk. Each chunk has the amount
+  of distinct symbols as length, so using 1 value per LZ77 symbol, we have a
+  precise histogram at every N symbols, and the rest can be calculated by
+  looping through the actual symbols of this chunk. */
+  size_t* ll_counts;
+  size_t* d_counts;
 } ZopfliLZ77Store;
 
-void ZopfliInitLZ77Store(ZopfliLZ77Store* store);
+void ZopfliInitLZ77Store(const unsigned char* data, ZopfliLZ77Store* store);
 void ZopfliCleanLZ77Store(ZopfliLZ77Store* store);
 void ZopfliCopyLZ77Store(const ZopfliLZ77Store* source, ZopfliLZ77Store* dest);
 void ZopfliStoreLitLenDist(unsigned short length, unsigned short dist,
-                           ZopfliLZ77Store* store);
+                           size_t pos, ZopfliLZ77Store* store);
+void ZopfliAppendLZ77Store(const ZopfliLZ77Store* store,
+                           ZopfliLZ77Store* target);
+/* Gets the amount of raw bytes that this range of LZ77 symbols spans. */
+size_t ZopfliLZ77GetByteRange(const ZopfliLZ77Store* lz77,
+                              size_t lstart, size_t lend);
+/* Gets the histogram of lit/len and dist symbols in the given range, using the
+cumulative histograms, so faster than adding one by one for large range. Does
+not add the one end symbol of value 256. */
+void ZopfliLZ77GetHistogram(const ZopfliLZ77Store* lz77,
+                            size_t lstart, size_t lend,
+                            size_t* ll_counts, size_t* d_counts);
 
 /*
 Some state information for compressing a block.
@@ -72,6 +96,11 @@
   size_t blockend;
 } ZopfliBlockState;
 
+void ZopfliInitBlockState(const ZopfliOptions* options,
+                          size_t blockstart, size_t blockend, int add_lmc,
+                          ZopfliBlockState* s);
+void ZopfliCleanBlockState(ZopfliBlockState* s);
+
 /*
 Finds the longest match (length and corresponding distance) for LZ77
 compression.
@@ -100,22 +129,6 @@
                          unsigned short dist, unsigned short length);
 
 /*
-Counts the number of literal, length and distance symbols in the given lz77
-arrays.
-litlens: lz77 lit/lengths
-dists: ll77 distances
-start: where to begin counting in litlens and dists
-end: where to stop counting in litlens and dists (not inclusive)
-ll_count: count of each lit/len symbol, must have size 288 (see deflate
-    standard)
-d_count: count of each dist symbol, must have size 32 (see deflate standard)
-*/
-void ZopfliLZ77Counts(const unsigned short* litlens,
-                      const unsigned short* dists,
-                      size_t start, size_t end,
-                      size_t* ll_count, size_t* d_count);
-
-/*
 Does LZ77 using an algorithm similar to gzip, with lazy matching, rather than
 with the slow but better "squeeze" implementation.
 The result is placed in the ZopfliLZ77Store.
@@ -124,6 +137,6 @@
 */
 void ZopfliLZ77Greedy(ZopfliBlockState* s, const unsigned char* in,
                       size_t instart, size_t inend,
-                      ZopfliLZ77Store* store);
+                      ZopfliLZ77Store* store, ZopfliHash* h);
 
 #endif  /* ZOPFLI_LZ77_H_ */
diff --git a/zopfli/src/zopfli/squeeze.c b/zopfli/src/zopfli/squeeze.c
index 09e7e2e..a695c18 100644
--- a/zopfli/src/zopfli/squeeze.c
+++ b/zopfli/src/zopfli/squeeze.c
@@ -25,35 +25,40 @@
 
 #include "blocksplitter.h"
 #include "deflate.h"
+#include "symbols.h"
 #include "tree.h"
 #include "util.h"
 
 typedef struct SymbolStats {
   /* The literal and length symbols. */
-  size_t litlens[288];
+  size_t litlens[ZOPFLI_NUM_LL];
   /* The 32 unique dist symbols, not the 32768 possible dists. */
-  size_t dists[32];
+  size_t dists[ZOPFLI_NUM_D];
 
-  double ll_symbols[288];  /* Length of each lit/len symbol in bits. */
-  double d_symbols[32];  /* Length of each dist symbol in bits. */
+  /* Length of each lit/len symbol in bits. */
+  double ll_symbols[ZOPFLI_NUM_LL];
+  /* Length of each dist symbol in bits. */
+  double d_symbols[ZOPFLI_NUM_D];
 } SymbolStats;
 
 /* Sets everything to 0. */
 static void InitStats(SymbolStats* stats) {
-  memset(stats->litlens, 0, 288 * sizeof(stats->litlens[0]));
-  memset(stats->dists, 0, 32 * sizeof(stats->dists[0]));
+  memset(stats->litlens, 0, ZOPFLI_NUM_LL * sizeof(stats->litlens[0]));
+  memset(stats->dists, 0, ZOPFLI_NUM_D * sizeof(stats->dists[0]));
 
-  memset(stats->ll_symbols, 0, 288 * sizeof(stats->ll_symbols[0]));
-  memset(stats->d_symbols, 0, 32 * sizeof(stats->d_symbols[0]));
+  memset(stats->ll_symbols, 0, ZOPFLI_NUM_LL * sizeof(stats->ll_symbols[0]));
+  memset(stats->d_symbols, 0, ZOPFLI_NUM_D * sizeof(stats->d_symbols[0]));
 }
 
 static void CopyStats(SymbolStats* source, SymbolStats* dest) {
-  memcpy(dest->litlens, source->litlens, 288 * sizeof(dest->litlens[0]));
-  memcpy(dest->dists, source->dists, 32 * sizeof(dest->dists[0]));
+  memcpy(dest->litlens, source->litlens,
+         ZOPFLI_NUM_LL * sizeof(dest->litlens[0]));
+  memcpy(dest->dists, source->dists, ZOPFLI_NUM_D * sizeof(dest->dists[0]));
 
   memcpy(dest->ll_symbols, source->ll_symbols,
-         288 * sizeof(dest->ll_symbols[0]));
-  memcpy(dest->d_symbols, source->d_symbols, 32 * sizeof(dest->d_symbols[0]));
+         ZOPFLI_NUM_LL * sizeof(dest->ll_symbols[0]));
+  memcpy(dest->d_symbols, source->d_symbols,
+         ZOPFLI_NUM_D * sizeof(dest->d_symbols[0]));
 }
 
 /* Adds the bit lengths. */
@@ -61,11 +66,11 @@
                                 const SymbolStats* stats2, double w2,
                                 SymbolStats* result) {
   size_t i;
-  for (i = 0; i < 288; i++) {
+  for (i = 0; i < ZOPFLI_NUM_LL; i++) {
     result->litlens[i] =
         (size_t) (stats1->litlens[i] * w1 + stats2->litlens[i] * w2);
   }
-  for (i = 0; i < 32; i++) {
+  for (i = 0; i < ZOPFLI_NUM_D; i++) {
     result->dists[i] =
         (size_t) (stats1->dists[i] * w1 + stats2->dists[i] * w2);
   }
@@ -96,15 +101,15 @@
 }
 
 static void RandomizeStatFreqs(RanState* state, SymbolStats* stats) {
-  RandomizeFreqs(state, stats->litlens, 288);
-  RandomizeFreqs(state, stats->dists, 32);
+  RandomizeFreqs(state, stats->litlens, ZOPFLI_NUM_LL);
+  RandomizeFreqs(state, stats->dists, ZOPFLI_NUM_D);
   stats->litlens[256] = 1;  /* End symbol. */
 }
 
 static void ClearStatFreqs(SymbolStats* stats) {
   size_t i;
-  for (i = 0; i < 288; i++) stats->litlens[i] = 0;
-  for (i = 0; i < 32; i++) stats->dists[i] = 0;
+  for (i = 0; i < ZOPFLI_NUM_LL; i++) stats->litlens[i] = 0;
+  for (i = 0; i < ZOPFLI_NUM_D; i++) stats->dists[i] = 0;
 }
 
 /*
@@ -126,7 +131,7 @@
     int dbits = ZopfliGetDistExtraBits(dist);
     int lbits = ZopfliGetLengthExtraBits(litlen);
     int lsym = ZopfliGetLengthSymbol(litlen);
-    double cost = 0;
+    int cost = 0;
     if (lsym <= 279) cost += 7;
     else cost += 8;
     cost += 5;  /* Every dist symbol has length 5. */
@@ -147,7 +152,7 @@
     int lbits = ZopfliGetLengthExtraBits(litlen);
     int dsym = ZopfliGetDistSymbol(dist);
     int dbits = ZopfliGetDistExtraBits(dist);
-    return stats->ll_symbols[lsym] + lbits + stats->d_symbols[dsym] + dbits;
+    return lbits + dbits + stats->ll_symbols[lsym] + stats->d_symbols[dsym];
   }
 }
 
@@ -192,6 +197,10 @@
   return costmodel(bestlength, bestdist, costcontext);
 }
 
+static size_t zopfli_min(size_t a, size_t b) {
+  return a < b ? a : b;
+}
+
 /*
 Performs the forward pass for "squeeze". Gets the most optimal length to reach
 every byte from a previous byte, using cost calculations.
@@ -209,27 +218,23 @@
                              const unsigned char* in,
                              size_t instart, size_t inend,
                              CostModelFun* costmodel, void* costcontext,
-                             unsigned short* length_array) {
+                             unsigned short* length_array,
+                             ZopfliHash* h, float* costs) {
   /* Best cost to get here so far. */
   size_t blocksize = inend - instart;
-  float* costs;
-  size_t i = 0, k;
+  size_t i = 0, k, kend;
   unsigned short leng;
   unsigned short dist;
   unsigned short sublen[259];
   size_t windowstart = instart > ZOPFLI_WINDOW_SIZE
       ? instart - ZOPFLI_WINDOW_SIZE : 0;
-  ZopfliHash hash;
-  ZopfliHash* h = &hash;
   double result;
   double mincost = GetCostModelMinCost(costmodel, costcontext);
+  double mincostaddcostj;
 
   if (instart == inend) return 0;
 
-  costs = (float*)malloc(sizeof(float) * (blocksize + 1));
-  if (!costs) exit(-1); /* Allocation failed. */
-
-  ZopfliInitHash(ZOPFLI_WINDOW_SIZE, h);
+  ZopfliResetHash(ZOPFLI_WINDOW_SIZE, h);
   ZopfliWarmupHash(in, windowstart, inend, h);
   for (i = windowstart; i < instart; i++) {
     ZopfliUpdateHash(in, i, inend, h);
@@ -270,7 +275,7 @@
 
     /* Literal. */
     if (i + 1 <= inend) {
-      double newCost = costs[j] + costmodel(in[i], 0, costcontext);
+      double newCost = costmodel(in[i], 0, costcontext) + costs[j];
       assert(newCost >= 0);
       if (newCost < costs[j + 1]) {
         costs[j + 1] = newCost;
@@ -278,14 +283,16 @@
       }
     }
     /* Lengths. */
-    for (k = 3; k <= leng && i + k <= inend; k++) {
+    kend = zopfli_min(leng, inend-i);
+    mincostaddcostj = mincost + costs[j];
+    for (k = 3; k <= kend; k++) {
       double newCost;
 
       /* Calling the cost model is expensive, avoid this if we are already at
       the minimum possible cost that it can return. */
-     if (costs[j + k] - costs[j] <= mincost) continue;
+     if (costs[j + k] <= mincostaddcostj) continue;
 
-      newCost = costs[j] + costmodel(k, sublen[k], costcontext);
+      newCost = costmodel(k, sublen[k], costcontext) + costs[j];
       assert(newCost >= 0);
       if (newCost < costs[j + k]) {
         assert(k <= ZOPFLI_MAX_MATCH);
@@ -298,9 +305,6 @@
   assert(costs[blocksize] >= 0);
   result = costs[blocksize];
 
-  ZopfliCleanHash(h);
-  free(costs);
-
   return result;
 }
 
@@ -334,19 +338,16 @@
 static void FollowPath(ZopfliBlockState* s,
                        const unsigned char* in, size_t instart, size_t inend,
                        unsigned short* path, size_t pathsize,
-                       ZopfliLZ77Store* store) {
+                       ZopfliLZ77Store* store, ZopfliHash *h) {
   size_t i, j, pos = 0;
   size_t windowstart = instart > ZOPFLI_WINDOW_SIZE
       ? instart - ZOPFLI_WINDOW_SIZE : 0;
 
   size_t total_length_test = 0;
 
-  ZopfliHash hash;
-  ZopfliHash* h = &hash;
-
   if (instart == inend) return;
 
-  ZopfliInitHash(ZOPFLI_WINDOW_SIZE, h);
+  ZopfliResetHash(ZOPFLI_WINDOW_SIZE, h);
   ZopfliWarmupHash(in, windowstart, inend, h);
   for (i = windowstart; i < instart; i++) {
     ZopfliUpdateHash(in, i, inend, h);
@@ -369,11 +370,11 @@
                              &dist, &dummy_length);
       assert(!(dummy_length != length && length > 2 && dummy_length > 2));
       ZopfliVerifyLenDist(in, inend, pos, dist, length);
-      ZopfliStoreLitLenDist(length, dist, store);
+      ZopfliStoreLitLenDist(length, dist, pos, store);
       total_length_test += length;
     } else {
       length = 1;
-      ZopfliStoreLitLenDist(in[pos], 0, store);
+      ZopfliStoreLitLenDist(in[pos], 0, pos, store);
       total_length_test++;
     }
 
@@ -385,14 +386,12 @@
 
     pos += length;
   }
-
-  ZopfliCleanHash(h);
 }
 
 /* Calculates the entropy of the statistics */
 static void CalculateStatistics(SymbolStats* stats) {
-  ZopfliCalculateEntropy(stats->litlens, 288, stats->ll_symbols);
-  ZopfliCalculateEntropy(stats->dists, 32, stats->d_symbols);
+  ZopfliCalculateEntropy(stats->litlens, ZOPFLI_NUM_LL, stats->ll_symbols);
+  ZopfliCalculateEntropy(stats->dists, ZOPFLI_NUM_D, stats->d_symbols);
 }
 
 /* Appends the symbol statistics from the store. */
@@ -414,14 +413,13 @@
 /*
 Does a single run for ZopfliLZ77Optimal. For good compression, repeated runs
 with updated statistics should be performed.
-
 s: the block state
 in: the input data array
 instart: where to start
 inend: where to stop (not inclusive)
 path: pointer to dynamically allocated memory to store the path
 pathsize: pointer to the size of the dynamic path array
-length_array: array if size (inend - instart) used to store lengths
+length_array: array of size (inend - instart) used to store lengths
 costmodel: function to use as the cost model for this squeeze run
 costcontext: abstract context for the costmodel function
 store: place to output the LZ77 data
@@ -432,20 +430,22 @@
     const unsigned char* in, size_t instart, size_t inend,
     unsigned short** path, size_t* pathsize,
     unsigned short* length_array, CostModelFun* costmodel,
-    void* costcontext, ZopfliLZ77Store* store) {
-  double cost = GetBestLengths(
-      s, in, instart, inend, costmodel, costcontext, length_array);
+    void* costcontext, ZopfliLZ77Store* store,
+    ZopfliHash* h, float* costs) {
+  double cost = GetBestLengths(s, in, instart, inend, costmodel,
+                costcontext, length_array, h, costs);
   free(*path);
   *path = 0;
   *pathsize = 0;
   TraceBackwards(inend - instart, length_array, path, pathsize);
-  FollowPath(s, in, instart, inend, *path, *pathsize, store);
+  FollowPath(s, in, instart, inend, *path, *pathsize, store, h);
   assert(cost < ZOPFLI_LARGE_FLOAT);
   return cost;
 }
 
 void ZopfliLZ77Optimal(ZopfliBlockState *s,
                        const unsigned char* in, size_t instart, size_t inend,
+                       int numiterations,
                        ZopfliLZ77Store* store) {
   /* Dist to get to here with smallest cost. */
   size_t blocksize = inend - instart;
@@ -454,8 +454,11 @@
   unsigned short* path = 0;
   size_t pathsize = 0;
   ZopfliLZ77Store currentstore;
+  ZopfliHash hash;
+  ZopfliHash* h = &hash;
   SymbolStats stats, beststats, laststats;
   int i;
+  float* costs = (float*)malloc(sizeof(float) * (blocksize + 1));
   double cost;
   double bestcost = ZOPFLI_LARGE_FLOAT;
   double lastcost = 0;
@@ -463,29 +466,30 @@
   RanState ran_state;
   int lastrandomstep = -1;
 
+  if (!costs) exit(-1); /* Allocation failed. */
   if (!length_array) exit(-1); /* Allocation failed. */
 
   InitRanState(&ran_state);
   InitStats(&stats);
-  ZopfliInitLZ77Store(&currentstore);
+  ZopfliInitLZ77Store(in, &currentstore);
+  ZopfliAllocHash(ZOPFLI_WINDOW_SIZE, h);
 
   /* Do regular deflate, then loop multiple shortest path runs, each time using
   the statistics of the previous run. */
 
   /* Initial run. */
-  ZopfliLZ77Greedy(s, in, instart, inend, &currentstore);
+  ZopfliLZ77Greedy(s, in, instart, inend, &currentstore, h);
   GetStatistics(&currentstore, &stats);
 
   /* Repeat statistics with each time the cost model from the previous stat
   run. */
-  for (i = 0; i < s->options->numiterations; i++) {
+  for (i = 0; i < numiterations; i++) {
     ZopfliCleanLZ77Store(&currentstore);
-    ZopfliInitLZ77Store(&currentstore);
+    ZopfliInitLZ77Store(in, &currentstore);
     LZ77OptimalRun(s, in, instart, inend, &path, &pathsize,
                    length_array, GetCostStat, (void*)&stats,
-                   &currentstore);
-    cost = ZopfliCalculateBlockSize(currentstore.litlens, currentstore.dists,
-                                    0, currentstore.size, 2);
+                   &currentstore, h, costs);
+    cost = ZopfliCalculateBlockSize(&currentstore, 0, currentstore.size, 2);
     if (s->options->verbose_more || (s->options->verbose && cost < bestcost)) {
       fprintf(stderr, "Iteration %d: %d bit\n", i, (int) cost);
     }
@@ -516,7 +520,9 @@
 
   free(length_array);
   free(path);
+  free(costs);
   ZopfliCleanLZ77Store(&currentstore);
+  ZopfliCleanHash(h);
 }
 
 void ZopfliLZ77OptimalFixed(ZopfliBlockState *s,
@@ -530,17 +536,25 @@
       (unsigned short*)malloc(sizeof(unsigned short) * (blocksize + 1));
   unsigned short* path = 0;
   size_t pathsize = 0;
+  ZopfliHash hash;
+  ZopfliHash* h = &hash;
+  float* costs = (float*)malloc(sizeof(float) * (blocksize + 1));
 
+  if (!costs) exit(-1); /* Allocation failed. */
   if (!length_array) exit(-1); /* Allocation failed. */
 
+  ZopfliAllocHash(ZOPFLI_WINDOW_SIZE, h);
+
   s->blockstart = instart;
   s->blockend = inend;
 
   /* Shortest path for fixed tree This one should give the shortest possible
   result for fixed tree, no repeated runs are needed since the tree is known. */
   LZ77OptimalRun(s, in, instart, inend, &path, &pathsize,
-                 length_array, GetCostFixed, 0, store);
+                 length_array, GetCostFixed, 0, store, h, costs);
 
   free(length_array);
   free(path);
+  free(costs);
+  ZopfliCleanHash(h);
 }
diff --git a/zopfli/src/zopfli/squeeze.h b/zopfli/src/zopfli/squeeze.h
index e850aaa..48bb775 100644
--- a/zopfli/src/zopfli/squeeze.h
+++ b/zopfli/src/zopfli/squeeze.h
@@ -40,6 +40,7 @@
 */
 void ZopfliLZ77Optimal(ZopfliBlockState *s,
                        const unsigned char* in, size_t instart, size_t inend,
+                       int numiterations,
                        ZopfliLZ77Store* store);
 
 /*
diff --git a/zopfli/src/zopfli/symbols.h b/zopfli/src/zopfli/symbols.h
new file mode 100644
index 0000000..b49df06
--- /dev/null
+++ b/zopfli/src/zopfli/symbols.h
@@ -0,0 +1,239 @@
+/*
+Copyright 2016 Google Inc. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+Author: lode.vandevenne@gmail.com (Lode Vandevenne)
+Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
+*/
+
+/*
+Utilities for using the lz77 symbols of the deflate spec.
+*/
+
+#ifndef ZOPFLI_SYMBOLS_H_
+#define ZOPFLI_SYMBOLS_H_
+
+/* __has_builtin available in clang */
+#ifdef __has_builtin
+# if __has_builtin(__builtin_clz)
+#   define ZOPFLI_HAS_BUILTIN_CLZ
+# endif
+/* __builtin_clz available beginning with GCC 3.4 */
+#elif __GNUC__ * 100 + __GNUC_MINOR__ >= 304
+# define ZOPFLI_HAS_BUILTIN_CLZ
+#endif
+
+/* Gets the amount of extra bits for the given dist, cfr. the DEFLATE spec. */
+static int ZopfliGetDistExtraBits(int dist) {
+#ifdef ZOPFLI_HAS_BUILTIN_CLZ
+  if (dist < 5) return 0;
+  return (31 ^ __builtin_clz(dist - 1)) - 1; /* log2(dist - 1) - 1 */
+#else
+  if (dist < 5) return 0;
+  else if (dist < 9) return 1;
+  else if (dist < 17) return 2;
+  else if (dist < 33) return 3;
+  else if (dist < 65) return 4;
+  else if (dist < 129) return 5;
+  else if (dist < 257) return 6;
+  else if (dist < 513) return 7;
+  else if (dist < 1025) return 8;
+  else if (dist < 2049) return 9;
+  else if (dist < 4097) return 10;
+  else if (dist < 8193) return 11;
+  else if (dist < 16385) return 12;
+  else return 13;
+#endif
+}
+
+/* Gets value of the extra bits for the given dist, cfr. the DEFLATE spec. */
+static int ZopfliGetDistExtraBitsValue(int dist) {
+#ifdef ZOPFLI_HAS_BUILTIN_CLZ
+  if (dist < 5) {
+    return 0;
+  } else {
+    int l = 31 ^ __builtin_clz(dist - 1); /* log2(dist - 1) */
+    return (dist - (1 + (1 << l))) & ((1 << (l - 1)) - 1);
+  }
+#else
+  if (dist < 5) return 0;
+  else if (dist < 9) return (dist - 5) & 1;
+  else if (dist < 17) return (dist - 9) & 3;
+  else if (dist < 33) return (dist - 17) & 7;
+  else if (dist < 65) return (dist - 33) & 15;
+  else if (dist < 129) return (dist - 65) & 31;
+  else if (dist < 257) return (dist - 129) & 63;
+  else if (dist < 513) return (dist - 257) & 127;
+  else if (dist < 1025) return (dist - 513) & 255;
+  else if (dist < 2049) return (dist - 1025) & 511;
+  else if (dist < 4097) return (dist - 2049) & 1023;
+  else if (dist < 8193) return (dist - 4097) & 2047;
+  else if (dist < 16385) return (dist - 8193) & 4095;
+  else return (dist - 16385) & 8191;
+#endif
+}
+
+/* Gets the symbol for the given dist, cfr. the DEFLATE spec. */
+static int ZopfliGetDistSymbol(int dist) {
+#ifdef ZOPFLI_HAS_BUILTIN_CLZ
+  if (dist < 5) {
+    return dist - 1;
+  } else {
+    int l = (31 ^ __builtin_clz(dist - 1)); /* log2(dist - 1) */
+    int r = ((dist - 1) >> (l - 1)) & 1;
+    return l * 2 + r;
+  }
+#else
+  if (dist < 193) {
+    if (dist < 13) {  /* dist 0..13. */
+      if (dist < 5) return dist - 1;
+      else if (dist < 7) return 4;
+      else if (dist < 9) return 5;
+      else return 6;
+    } else {  /* dist 13..193. */
+      if (dist < 17) return 7;
+      else if (dist < 25) return 8;
+      else if (dist < 33) return 9;
+      else if (dist < 49) return 10;
+      else if (dist < 65) return 11;
+      else if (dist < 97) return 12;
+      else if (dist < 129) return 13;
+      else return 14;
+    }
+  } else {
+    if (dist < 2049) {  /* dist 193..2049. */
+      if (dist < 257) return 15;
+      else if (dist < 385) return 16;
+      else if (dist < 513) return 17;
+      else if (dist < 769) return 18;
+      else if (dist < 1025) return 19;
+      else if (dist < 1537) return 20;
+      else return 21;
+    } else {  /* dist 2049..32768. */
+      if (dist < 3073) return 22;
+      else if (dist < 4097) return 23;
+      else if (dist < 6145) return 24;
+      else if (dist < 8193) return 25;
+      else if (dist < 12289) return 26;
+      else if (dist < 16385) return 27;
+      else if (dist < 24577) return 28;
+      else return 29;
+    }
+  }
+#endif
+}
+
+/* Gets the amount of extra bits for the given length, cfr. the DEFLATE spec. */
+static int ZopfliGetLengthExtraBits(int l) {
+  static const int table[259] = {
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0
+  };
+  return table[l];
+}
+
+/* Gets value of the extra bits for the given length, cfr. the DEFLATE spec. */
+static int ZopfliGetLengthExtraBitsValue(int l) {
+  static const int table[259] = {
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 0,
+    1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5,
+    6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6,
+    7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
+    13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2,
+    3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+    10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
+    29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+    18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6,
+    7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
+    27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0
+  };
+  return table[l];
+}
+
+/*
+Gets the symbol for the given length, cfr. the DEFLATE spec.
+Returns the symbol in the range [257-285] (inclusive)
+*/
+static int ZopfliGetLengthSymbol(int l) {
+  static const int table[259] = {
+    0, 0, 0, 257, 258, 259, 260, 261, 262, 263, 264,
+    265, 265, 266, 266, 267, 267, 268, 268,
+    269, 269, 269, 269, 270, 270, 270, 270,
+    271, 271, 271, 271, 272, 272, 272, 272,
+    273, 273, 273, 273, 273, 273, 273, 273,
+    274, 274, 274, 274, 274, 274, 274, 274,
+    275, 275, 275, 275, 275, 275, 275, 275,
+    276, 276, 276, 276, 276, 276, 276, 276,
+    277, 277, 277, 277, 277, 277, 277, 277,
+    277, 277, 277, 277, 277, 277, 277, 277,
+    278, 278, 278, 278, 278, 278, 278, 278,
+    278, 278, 278, 278, 278, 278, 278, 278,
+    279, 279, 279, 279, 279, 279, 279, 279,
+    279, 279, 279, 279, 279, 279, 279, 279,
+    280, 280, 280, 280, 280, 280, 280, 280,
+    280, 280, 280, 280, 280, 280, 280, 280,
+    281, 281, 281, 281, 281, 281, 281, 281,
+    281, 281, 281, 281, 281, 281, 281, 281,
+    281, 281, 281, 281, 281, 281, 281, 281,
+    281, 281, 281, 281, 281, 281, 281, 281,
+    282, 282, 282, 282, 282, 282, 282, 282,
+    282, 282, 282, 282, 282, 282, 282, 282,
+    282, 282, 282, 282, 282, 282, 282, 282,
+    282, 282, 282, 282, 282, 282, 282, 282,
+    283, 283, 283, 283, 283, 283, 283, 283,
+    283, 283, 283, 283, 283, 283, 283, 283,
+    283, 283, 283, 283, 283, 283, 283, 283,
+    283, 283, 283, 283, 283, 283, 283, 283,
+    284, 284, 284, 284, 284, 284, 284, 284,
+    284, 284, 284, 284, 284, 284, 284, 284,
+    284, 284, 284, 284, 284, 284, 284, 284,
+    284, 284, 284, 284, 284, 284, 284, 285
+  };
+  return table[l];
+}
+
+/* Gets the amount of extra bits for the given length symbol. */
+static int ZopfliGetLengthSymbolExtraBits(int s) {
+  static const int table[29] = {
+    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
+    3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0
+  };
+  return table[s - 257];
+}
+
+/* Gets the amount of extra bits for the given distance symbol. */
+static int ZopfliGetDistSymbolExtraBits(int s) {
+  static const int table[30] = {
+    0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
+    9, 9, 10, 10, 11, 11, 12, 12, 13, 13
+  };
+  return table[s];
+}
+
+#endif  /* ZOPFLI_SYMBOLS_H_ */
diff --git a/zopfli/src/zopfli/util.c b/zopfli/src/zopfli/util.c
index d207145..428961c 100644
--- a/zopfli/src/zopfli/util.c
+++ b/zopfli/src/zopfli/util.c
@@ -25,184 +25,6 @@
 #include <stdio.h>
 #include <stdlib.h>
 
-int ZopfliGetDistExtraBits(int dist) {
-#ifdef __GNUC__
-  if (dist < 5) return 0;
-  return (31 ^ __builtin_clz(dist - 1)) - 1; /* log2(dist - 1) - 1 */
-#else
-  if (dist < 5) return 0;
-  else if (dist < 9) return 1;
-  else if (dist < 17) return 2;
-  else if (dist < 33) return 3;
-  else if (dist < 65) return 4;
-  else if (dist < 129) return 5;
-  else if (dist < 257) return 6;
-  else if (dist < 513) return 7;
-  else if (dist < 1025) return 8;
-  else if (dist < 2049) return 9;
-  else if (dist < 4097) return 10;
-  else if (dist < 8193) return 11;
-  else if (dist < 16385) return 12;
-  else return 13;
-#endif
-}
-
-int ZopfliGetDistExtraBitsValue(int dist) {
-#ifdef __GNUC__
-  if (dist < 5) {
-    return 0;
-  } else {
-    int l = 31 ^ __builtin_clz(dist - 1); /* log2(dist - 1) */
-    return (dist - (1 + (1 << l))) & ((1 << (l - 1)) - 1);
-  }
-#else
-  if (dist < 5) return 0;
-  else if (dist < 9) return (dist - 5) & 1;
-  else if (dist < 17) return (dist - 9) & 3;
-  else if (dist < 33) return (dist - 17) & 7;
-  else if (dist < 65) return (dist - 33) & 15;
-  else if (dist < 129) return (dist - 65) & 31;
-  else if (dist < 257) return (dist - 129) & 63;
-  else if (dist < 513) return (dist - 257) & 127;
-  else if (dist < 1025) return (dist - 513) & 255;
-  else if (dist < 2049) return (dist - 1025) & 511;
-  else if (dist < 4097) return (dist - 2049) & 1023;
-  else if (dist < 8193) return (dist - 4097) & 2047;
-  else if (dist < 16385) return (dist - 8193) & 4095;
-  else return (dist - 16385) & 8191;
-#endif
-}
-
-int ZopfliGetDistSymbol(int dist) {
-#ifdef __GNUC__
-  if (dist < 5) {
-    return dist - 1;
-  } else {
-    int l = (31 ^ __builtin_clz(dist - 1)); /* log2(dist - 1) */
-    int r = ((dist - 1) >> (l - 1)) & 1;
-    return l * 2 + r;
-  }
-#else
-  if (dist < 193) {
-    if (dist < 13) {  /* dist 0..13. */
-      if (dist < 5) return dist - 1;
-      else if (dist < 7) return 4;
-      else if (dist < 9) return 5;
-      else return 6;
-    } else {  /* dist 13..193. */
-      if (dist < 17) return 7;
-      else if (dist < 25) return 8;
-      else if (dist < 33) return 9;
-      else if (dist < 49) return 10;
-      else if (dist < 65) return 11;
-      else if (dist < 97) return 12;
-      else if (dist < 129) return 13;
-      else return 14;
-    }
-  } else {
-    if (dist < 2049) {  /* dist 193..2049. */
-      if (dist < 257) return 15;
-      else if (dist < 385) return 16;
-      else if (dist < 513) return 17;
-      else if (dist < 769) return 18;
-      else if (dist < 1025) return 19;
-      else if (dist < 1537) return 20;
-      else return 21;
-    } else {  /* dist 2049..32768. */
-      if (dist < 3073) return 22;
-      else if (dist < 4097) return 23;
-      else if (dist < 6145) return 24;
-      else if (dist < 8193) return 25;
-      else if (dist < 12289) return 26;
-      else if (dist < 16385) return 27;
-      else if (dist < 24577) return 28;
-      else return 29;
-    }
-  }
-#endif
-}
-
-int ZopfliGetLengthExtraBits(int l) {
-  static const int table[259] = {
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0
-  };
-  return table[l];
-}
-
-int ZopfliGetLengthExtraBitsValue(int l) {
-  static const int table[259] = {
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 0,
-    1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5,
-    6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6,
-    7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
-    13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2,
-    3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
-    10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
-    29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
-    18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6,
-    7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
-    27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
-    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0
-  };
-  return table[l];
-}
-
-/*
-Returns symbol in range [257-285] (inclusive).
-*/
-int ZopfliGetLengthSymbol(int l) {
-  static const int table[259] = {
-    0, 0, 0, 257, 258, 259, 260, 261, 262, 263, 264,
-    265, 265, 266, 266, 267, 267, 268, 268,
-    269, 269, 269, 269, 270, 270, 270, 270,
-    271, 271, 271, 271, 272, 272, 272, 272,
-    273, 273, 273, 273, 273, 273, 273, 273,
-    274, 274, 274, 274, 274, 274, 274, 274,
-    275, 275, 275, 275, 275, 275, 275, 275,
-    276, 276, 276, 276, 276, 276, 276, 276,
-    277, 277, 277, 277, 277, 277, 277, 277,
-    277, 277, 277, 277, 277, 277, 277, 277,
-    278, 278, 278, 278, 278, 278, 278, 278,
-    278, 278, 278, 278, 278, 278, 278, 278,
-    279, 279, 279, 279, 279, 279, 279, 279,
-    279, 279, 279, 279, 279, 279, 279, 279,
-    280, 280, 280, 280, 280, 280, 280, 280,
-    280, 280, 280, 280, 280, 280, 280, 280,
-    281, 281, 281, 281, 281, 281, 281, 281,
-    281, 281, 281, 281, 281, 281, 281, 281,
-    281, 281, 281, 281, 281, 281, 281, 281,
-    281, 281, 281, 281, 281, 281, 281, 281,
-    282, 282, 282, 282, 282, 282, 282, 282,
-    282, 282, 282, 282, 282, 282, 282, 282,
-    282, 282, 282, 282, 282, 282, 282, 282,
-    282, 282, 282, 282, 282, 282, 282, 282,
-    283, 283, 283, 283, 283, 283, 283, 283,
-    283, 283, 283, 283, 283, 283, 283, 283,
-    283, 283, 283, 283, 283, 283, 283, 283,
-    283, 283, 283, 283, 283, 283, 283, 283,
-    284, 284, 284, 284, 284, 284, 284, 284,
-    284, 284, 284, 284, 284, 284, 284, 284,
-    284, 284, 284, 284, 284, 284, 284, 284,
-    284, 284, 284, 284, 284, 284, 284, 285
-  };
-  return table[l];
-}
-
 void ZopfliInitOptions(ZopfliOptions* options) {
   options->verbose = 0;
   options->verbose_more = 0;
diff --git a/zopfli/src/zopfli/util.h b/zopfli/src/zopfli/util.h
index 4188f51..4b73504 100644
--- a/zopfli/src/zopfli/util.h
+++ b/zopfli/src/zopfli/util.h
@@ -32,6 +32,10 @@
 #define ZOPFLI_MAX_MATCH 258
 #define ZOPFLI_MIN_MATCH 3
 
+/* Number of distinct literal/length and distance symbols in DEFLATE */
+#define ZOPFLI_NUM_LL 288
+#define ZOPFLI_NUM_D 32
+
 /*
 The window size for deflate. Must be a power of two. This should be 32768, the
 maximum possible by the deflate spec. Anything less hurts compression more than
@@ -51,9 +55,9 @@
 The whole compression algorithm, including the smarter block splitting, will
 be executed independently on each huge block.
 Dividing into huge blocks hurts compression, but not much relative to the size.
-Set this to, for example, 20MB (20000000). Set it to 0 to disable master blocks.
+Set it to 0 to disable master blocks.
 */
-#define ZOPFLI_MASTER_BLOCK_SIZE 20000000
+#define ZOPFLI_MASTER_BLOCK_SIZE 1000000
 
 /*
 Used to initialize costs for example
@@ -117,27 +121,6 @@
 #define ZOPFLI_LAZY_MATCHING
 
 /*
-Gets the symbol for the given length, cfr. the DEFLATE spec.
-Returns the symbol in the range [257-285] (inclusive)
-*/
-int ZopfliGetLengthSymbol(int l);
-
-/* Gets the amount of extra bits for the given length, cfr. the DEFLATE spec. */
-int ZopfliGetLengthExtraBits(int l);
-
-/* Gets value of the extra bits for the given length, cfr. the DEFLATE spec. */
-int ZopfliGetLengthExtraBitsValue(int l);
-
-/* Gets the symbol for the given dist, cfr. the DEFLATE spec. */
-int ZopfliGetDistSymbol(int dist);
-
-/* Gets the amount of extra bits for the given dist, cfr. the DEFLATE spec. */
-int ZopfliGetDistExtraBits(int dist);
-
-/* Gets value of the extra bits for the given dist, cfr. the DEFLATE spec. */
-int ZopfliGetDistExtraBitsValue(int dist);
-
-/*
 Appends value to dynamically allocated memory, doubling its allocation size
 whenever needed.
 
diff --git a/zopfli/src/zopfli/zopfli.h b/zopfli/src/zopfli/zopfli.h
index 56512a2..c079662 100644
--- a/zopfli/src/zopfli/zopfli.h
+++ b/zopfli/src/zopfli/zopfli.h
@@ -52,10 +52,7 @@
   int blocksplitting;
 
   /*
-  If true, chooses the optimal block split points only after doing the iterative
-  LZ77 compression. If false, chooses the block split points first, then does
-  iterative LZ77 on each individual block. Depending on the file, either first
-  or last gives the best compression. Default: false (0).
+  No longer used, left for compatibility.
   */
   int blocksplittinglast;