Add crc32_combine_gen() and crc32_combine_op() for fast combines.

When the same len2 is used repeatedly, it is faster to use
crc32_combine_gen() to generate an operator, that is then used to
combine CRCs with crc32_combine_op().
diff --git a/crc32.c b/crc32.c
index 5ccfe09..2d213b3 100644
--- a/crc32.c
+++ b/crc32.c
@@ -50,6 +50,7 @@
 #define GF2_DIM 32      /* dimension of GF(2) vectors (length of CRC) */
 local z_crc_t gf2_matrix_times OF((const z_crc_t *mat, z_crc_t vec));
 local uLong crc32_combine_ OF((uLong crc1, uLong crc2, z_off64_t len2));
+local void crc32_combine_gen_ OF((z_crc_t *op, z_off64_t len2));
 
 /* ========================================================================= */
 local z_crc_t gf2_matrix_times(mat, vec)
@@ -452,3 +453,77 @@
 {
     return crc32_combine_(crc1, crc2, len2);
 }
+
+/* ========================================================================= */
+local void crc32_combine_gen_(op, len2)
+    z_crc_t *op;
+    z_off64_t len2;
+{
+    z_crc_t row;
+    int j;
+    unsigned i;
+
+#ifdef DYNAMIC_CRC_TABLE
+    if (crc_table_empty)
+        make_crc_table();
+#endif /* DYNAMIC_CRC_TABLE */
+
+    /* if len2 is zero or negative, return the identity matrix */
+    if (len2 <= 0) {
+        row = 1;
+        for (j = 0; j < GF2_DIM; j++) {
+            op[j] = row;
+            row <<= 1;
+        }
+        return;
+    }
+
+    /* at least one bit in len2 is set -- find it, and copy the operator
+       corresponding to that position into op */
+    i = 0;
+    for (;;) {
+        if (len2 & 1) {
+            for (j = 0; j < GF2_DIM; j++)
+                op[j] = crc_comb[i][j];
+            break;
+        }
+        len2 >>= 1;
+        i = (i + 1) % GF2_DIM;
+    }
+
+    /* for each remaining bit set in len2 (if any), multiply op by the operator
+       corresponding to that position */
+    for (;;) {
+        len2 >>= 1;
+        i = (i + 1) % GF2_DIM;
+        if (len2 == 0)
+            break;
+        if (len2 & 1)
+            for (j = 0; j < GF2_DIM; j++)
+                op[j] = gf2_matrix_times(crc_comb[i], op[j]);
+    }
+}
+
+/* ========================================================================= */
+void ZEXPORT crc32_combine_gen(op, len2)
+    z_crc_t *op;
+    z_off_t len2;
+{
+    crc32_combine_gen_(op, len2);
+}
+
+void ZEXPORT crc32_combine_gen64(op, len2)
+    z_crc_t *op;
+    z_off64_t len2;
+{
+    crc32_combine_gen_(op, len2);
+}
+
+/* ========================================================================= */
+uLong crc32_combine_op(crc1, crc2, op)
+    uLong crc1;
+    uLong crc2;
+    const z_crc_t *op;
+{
+    return gf2_matrix_times(op, crc1) ^ crc2;
+}
diff --git a/zlib.h b/zlib.h
index 0b74b25..14d87a2 100644
--- a/zlib.h
+++ b/zlib.h
@@ -1757,6 +1757,22 @@
    len2.
 */
 
+/*
+ZEXTERN void ZEXPORT crc32_combine_gen OF((z_crc_t op[32], z_off_t len2));
+
+     Generate the operator op corresponding to length len2, to be used with
+   crc32_combine_op(). op must have room for 32 z_crc_t values. (32 is the
+   number of bits in the CRC.)
+*/
+
+ZEXTERN uLong ZEXPORT crc32_combine_op OF((uLong crc1, uLong crc2,
+                                           const z_crc_t *op));
+/*
+     Give the same result as crc32_combine(), using op in place of len2. op is
+   is generated from len2 by crc32_combine_gen(). This will be faster than
+   crc32_combine() if the generated op is used many times.
+*/
+
 
                         /* various hacks, don't look :) */
 
@@ -1844,6 +1860,7 @@
    ZEXTERN z_off64_t ZEXPORT gzoffset64 OF((gzFile));
    ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off64_t));
    ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off64_t));
+   ZEXTERN void ZEXPORT crc32_combine_gen64 OF((z_crc_t *op, z_off64_t));
 #endif
 
 #if !defined(ZLIB_INTERNAL) && defined(Z_WANT64)
@@ -1854,6 +1871,7 @@
 #    define z_gzoffset z_gzoffset64
 #    define z_adler32_combine z_adler32_combine64
 #    define z_crc32_combine z_crc32_combine64
+#    define z_crc32_combine_gen z_crc32_combine64_gen
 #  else
 #    define gzopen gzopen64
 #    define gzseek gzseek64
@@ -1861,6 +1879,7 @@
 #    define gzoffset gzoffset64
 #    define adler32_combine adler32_combine64
 #    define crc32_combine crc32_combine64
+#    define crc32_combine_gen crc32_combine_gen64
 #  endif
 #  ifndef Z_LARGE64
      ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *));
@@ -1869,6 +1888,7 @@
      ZEXTERN z_off_t ZEXPORT gzoffset64 OF((gzFile));
      ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off_t));
      ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off_t));
+     ZEXTERN void ZEXPORT crc32_combine_gen64 OF((z_crc_t *op, z_off_t));
 #  endif
 #else
    ZEXTERN gzFile ZEXPORT gzopen OF((const char *, const char *));
@@ -1877,12 +1897,14 @@
    ZEXTERN z_off_t ZEXPORT gzoffset OF((gzFile));
    ZEXTERN uLong ZEXPORT adler32_combine OF((uLong, uLong, z_off_t));
    ZEXTERN uLong ZEXPORT crc32_combine OF((uLong, uLong, z_off_t));
+   ZEXTERN void ZEXPORT crc32_combine_gen OF((z_crc_t *op, z_off_t));
 #endif
 
 #else /* Z_SOLO */
 
    ZEXTERN uLong ZEXPORT adler32_combine OF((uLong, uLong, z_off_t));
    ZEXTERN uLong ZEXPORT crc32_combine OF((uLong, uLong, z_off_t));
+   ZEXTERN void ZEXPORT crc32_combine_gen OF((z_crc_t *op, z_off_t));
 
 #endif /* !Z_SOLO */