Merge pull request #2326 from daltonmaag/compact-gpos

Add GPOS compaction code in varLib.merger and otlLib.builder + new `fonttools otlLib.optimize` command
diff --git a/Lib/fontTools/misc/intTools.py b/Lib/fontTools/misc/intTools.py
index d31a75b..6ba03e1 100644
--- a/Lib/fontTools/misc/intTools.py
+++ b/Lib/fontTools/misc/intTools.py
@@ -1,14 +1,25 @@
-__all__ = ['popCount']
+__all__ = ["popCount"]
 
 
 try:
     bit_count = int.bit_count
 except AttributeError:
+
     def bit_count(v):
-        return bin(v).count('1')
+        return bin(v).count("1")
+
 
 """Return number of 1 bits (population count) of the absolute value of an integer.
 
 See https://docs.python.org/3.10/library/stdtypes.html#int.bit_count
 """
 popCount = bit_count
+
+
+def bit_indices(v):
+    """Return list of indices where bits are set, 0 being the index of the least significant bit.
+
+    >>> bit_indices(0b101)
+    [0, 2]
+    """
+    return [i for i, b in enumerate(bin(v)[::-1]) if b == "1"]
diff --git a/Lib/fontTools/otlLib/builder.py b/Lib/fontTools/otlLib/builder.py
index 182f7da..bfb9d41 100644
--- a/Lib/fontTools/otlLib/builder.py
+++ b/Lib/fontTools/otlLib/builder.py
@@ -1,4 +1,5 @@
 from collections import namedtuple, OrderedDict
+import os
 from fontTools.misc.fixedTools import fixedToFloat
 from fontTools import ttLib
 from fontTools.ttLib.tables import otTables as ot
@@ -10,6 +11,7 @@
 )
 from fontTools.ttLib.tables import otBase
 from fontTools.feaLib.ast import STATNameStatement
+from fontTools.otlLib.optimize.gpos import GPOS_COMPACT_MODE_DEFAULT, GPOS_COMPACT_MODE_ENV_KEY, compact_lookup
 from fontTools.otlLib.error import OpenTypeLibError
 from functools import reduce
 import logging
@@ -1373,7 +1375,17 @@
             subtables.extend(buildPairPosGlyphs(self.glyphPairs, self.glyphMap))
         for key in sorted(builders.keys()):
             subtables.extend(builders[key].subtables())
-        return self.buildLookup_(subtables)
+        lookup = self.buildLookup_(subtables)
+
+        # Compact the lookup
+        # This is a good moment to do it because the compaction should create
+        # smaller subtables, which may prevent overflows from happening.
+        mode = os.environ.get(GPOS_COMPACT_MODE_ENV_KEY, GPOS_COMPACT_MODE_DEFAULT)
+        if mode and mode != "0":
+            log.info("Compacting GPOS...")
+            compact_lookup(self.font, mode, lookup)
+
+        return lookup
 
 
 class SinglePosBuilder(LookupBuilder):
diff --git a/Lib/fontTools/otlLib/optimize/__init__.py b/Lib/fontTools/otlLib/optimize/__init__.py
new file mode 100644
index 0000000..5c007e8
--- /dev/null
+++ b/Lib/fontTools/otlLib/optimize/__init__.py
@@ -0,0 +1,68 @@
+from argparse import RawTextHelpFormatter
+from textwrap import dedent
+
+from fontTools.ttLib import TTFont
+from fontTools.otlLib.optimize.gpos import compact, GPOS_COMPACT_MODE_DEFAULT
+
+def main(args=None):
+    """Optimize the layout tables of an existing font."""
+    from argparse import ArgumentParser
+    from fontTools import configLogger
+
+    parser = ArgumentParser(prog="otlLib.optimize", description=main.__doc__, formatter_class=RawTextHelpFormatter)
+    parser.add_argument("font")
+    parser.add_argument(
+        "-o", metavar="OUTPUTFILE", dest="outfile", default=None, help="output file"
+    )
+    parser.add_argument(
+        "--gpos-compact-mode",
+        help=dedent(
+            f"""\
+            GPOS Lookup type 2 (PairPos) compaction mode:
+                0 = do not attempt to compact PairPos lookups;
+                1 to 8 = create at most 1 to 8 new subtables for each existing
+                    subtable, provided that it would yield a 50%% file size saving;
+                9 = create as many new subtables as needed to yield a file size saving.
+            Default: {GPOS_COMPACT_MODE_DEFAULT}.
+
+            This compaction aims to save file size, by splitting large class
+            kerning subtables (Format 2) that contain many zero values into
+            smaller and denser subtables. It's a trade-off between the overhead
+            of several subtables versus the sparseness of one big subtable.
+
+            See the pull request: https://github.com/fonttools/fonttools/pull/2326
+            """
+        ),
+        default=int(GPOS_COMPACT_MODE_DEFAULT),
+        choices=list(range(10)),
+        type=int,
+    )
+    logging_group = parser.add_mutually_exclusive_group(required=False)
+    logging_group.add_argument(
+        "-v", "--verbose", action="store_true", help="Run more verbosely."
+    )
+    logging_group.add_argument(
+        "-q", "--quiet", action="store_true", help="Turn verbosity off."
+    )
+    options = parser.parse_args(args)
+
+    configLogger(
+        level=("DEBUG" if options.verbose else "ERROR" if options.quiet else "INFO")
+    )
+
+    font = TTFont(options.font)
+    # TODO: switch everything to have type(mode) = int when using the Config class
+    compact(font, str(options.gpos_compact_mode))
+    font.save(options.outfile or options.font)
+
+
+
+if __name__ == "__main__":
+    import sys
+
+    if len(sys.argv) > 1:
+        sys.exit(main())
+    import doctest
+
+    sys.exit(doctest.testmod().failed)
+
diff --git a/Lib/fontTools/otlLib/optimize/__main__.py b/Lib/fontTools/otlLib/optimize/__main__.py
new file mode 100644
index 0000000..03027ec
--- /dev/null
+++ b/Lib/fontTools/otlLib/optimize/__main__.py
@@ -0,0 +1,6 @@
+import sys
+from fontTools.otlLib.optimize import main
+
+
+if __name__ == '__main__':
+	sys.exit(main())
diff --git a/Lib/fontTools/otlLib/optimize/gpos.py b/Lib/fontTools/otlLib/optimize/gpos.py
new file mode 100644
index 0000000..79873fa
--- /dev/null
+++ b/Lib/fontTools/otlLib/optimize/gpos.py
@@ -0,0 +1,439 @@
+import logging
+from collections import defaultdict, namedtuple
+from functools import reduce
+from itertools import chain
+from math import log2
+from typing import DefaultDict, Dict, Iterable, List, Sequence, Tuple
+
+from fontTools.misc.intTools import bit_count, bit_indices
+from fontTools.ttLib import TTFont
+from fontTools.ttLib.tables import otBase, otTables
+
+# NOTE: activating this optimization via the environment variable is
+# experimental and may not be supported once an alternative mechanism
+# is in place. See: https://github.com/fonttools/fonttools/issues/2349
+GPOS_COMPACT_MODE_ENV_KEY = "FONTTOOLS_GPOS_COMPACT_MODE"
+GPOS_COMPACT_MODE_DEFAULT = "0"
+
+log = logging.getLogger("fontTools.otlLib.optimize.gpos")
+
+
+def compact(font: TTFont, mode: str) -> TTFont:
+    # Ideal plan:
+    #  1. Find lookups of Lookup Type 2: Pair Adjustment Positioning Subtable
+    #     https://docs.microsoft.com/en-us/typography/opentype/spec/gpos#lookup-type-2-pair-adjustment-positioning-subtable
+    #  2. Extract glyph-glyph kerning and class-kerning from all present subtables
+    #  3. Regroup into different subtable arrangements
+    #  4. Put back into the lookup
+    #
+    # Actual implementation:
+    #  2. Only class kerning is optimized currently
+    #  3. If the input kerning is already in several subtables, the subtables
+    #     are not grouped together first; instead each subtable is treated
+    #     independently, so currently this step is:
+    #     Split existing subtables into more smaller subtables
+    gpos = font["GPOS"]
+    for lookup in gpos.table.LookupList.Lookup:
+        if lookup.LookupType == 2:
+            compact_lookup(font, mode, lookup)
+        elif lookup.LookupType == 9 and lookup.SubTable[0].ExtensionLookupType == 2:
+            compact_ext_lookup(font, mode, lookup)
+    return font
+
+
+def compact_lookup(font: TTFont, mode: str, lookup: otTables.Lookup) -> None:
+    new_subtables = compact_pair_pos(font, mode, lookup.SubTable)
+    lookup.SubTable = new_subtables
+    lookup.SubTableCount = len(new_subtables)
+
+
+def compact_ext_lookup(font: TTFont, mode: str, lookup: otTables.Lookup) -> None:
+    new_subtables = compact_pair_pos(
+        font, mode, [ext_subtable.ExtSubTable for ext_subtable in lookup.SubTable]
+    )
+    new_ext_subtables = []
+    for subtable in new_subtables:
+        ext_subtable = otTables.ExtensionPos()
+        ext_subtable.Format = 1
+        ext_subtable.ExtSubTable = subtable
+        new_ext_subtables.append(ext_subtable)
+    lookup.SubTable = new_ext_subtables
+    lookup.SubTableCount = len(new_ext_subtables)
+
+
+def compact_pair_pos(
+    font: TTFont, mode: str, subtables: Sequence[otTables.PairPos]
+) -> Sequence[otTables.PairPos]:
+    new_subtables = []
+    for subtable in subtables:
+        if subtable.Format == 1:
+            # Not doing anything to Format 1 (yet?)
+            new_subtables.append(subtable)
+        elif subtable.Format == 2:
+            new_subtables.extend(compact_class_pairs(font, mode, subtable))
+    return new_subtables
+
+
+def compact_class_pairs(
+    font: TTFont, mode: str, subtable: otTables.PairPos
+) -> List[otTables.PairPos]:
+    from fontTools.otlLib.builder import buildPairPosClassesSubtable
+
+    subtables = []
+    classes1: DefaultDict[int, List[str]] = defaultdict(list)
+    for g in subtable.Coverage.glyphs:
+        classes1[subtable.ClassDef1.classDefs.get(g, 0)].append(g)
+    classes2: DefaultDict[int, List[str]] = defaultdict(list)
+    for g, i in subtable.ClassDef2.classDefs.items():
+        classes2[i].append(g)
+    all_pairs = {}
+    for i, class1 in enumerate(subtable.Class1Record):
+        for j, class2 in enumerate(class1.Class2Record):
+            if is_really_zero(class2):
+                continue
+            all_pairs[(tuple(sorted(classes1[i])), tuple(sorted(classes2[j])))] = (
+                getattr(class2, "Value1", None),
+                getattr(class2, "Value2", None),
+            )
+
+    if len(mode) == 1 and mode in "123456789":
+        grouped_pairs = cluster_pairs_by_class2_coverage_custom_cost(
+            font, all_pairs, int(mode)
+        )
+        for pairs in grouped_pairs:
+            subtables.append(
+                buildPairPosClassesSubtable(pairs, font.getReverseGlyphMap())
+            )
+    else:
+        raise ValueError(f"Bad {GPOS_COMPACT_MODE_ENV_KEY}={mode}")
+    return subtables
+
+
+def is_really_zero(class2: otTables.Class2Record) -> bool:
+    v1 = getattr(class2, "Value1", None)
+    v2 = getattr(class2, "Value2", None)
+    return (v1 is None or v1.getEffectiveFormat() == 0) and (
+        v2 is None or v2.getEffectiveFormat() == 0
+    )
+
+
+Pairs = Dict[
+    Tuple[Tuple[str, ...], Tuple[str, ...]],
+    Tuple[otBase.ValueRecord, otBase.ValueRecord],
+]
+
+# Adapted from https://github.com/fonttools/fonttools/blob/f64f0b42f2d1163b2d85194e0979def539f5dca3/Lib/fontTools/ttLib/tables/otTables.py#L935-L958
+def _getClassRanges(glyphIDs: Iterable[int]):
+    glyphIDs = sorted(glyphIDs)
+    last = glyphIDs[0]
+    ranges = [[last]]
+    for glyphID in glyphIDs[1:]:
+        if glyphID != last + 1:
+            ranges[-1].append(last)
+            ranges.append([glyphID])
+        last = glyphID
+    ranges[-1].append(last)
+    return ranges, glyphIDs[0], glyphIDs[-1]
+
+
+# Adapted from https://github.com/fonttools/fonttools/blob/f64f0b42f2d1163b2d85194e0979def539f5dca3/Lib/fontTools/ttLib/tables/otTables.py#L960-L989
+def _classDef_bytes(
+    class_data: List[Tuple[List[Tuple[int, int]], int, int]],
+    class_ids: List[int],
+    coverage=False,
+):
+    if not class_ids:
+        return 0
+    first_ranges, min_glyph_id, max_glyph_id = class_data[class_ids[0]]
+    range_count = len(first_ranges)
+    for i in class_ids[1:]:
+        data = class_data[i]
+        range_count += len(data[0])
+        min_glyph_id = min(min_glyph_id, data[1])
+        max_glyph_id = max(max_glyph_id, data[2])
+    glyphCount = max_glyph_id - min_glyph_id + 1
+    # https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#class-definition-table-format-1
+    format1_bytes = 6 + glyphCount * 2
+    # https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#class-definition-table-format-2
+    format2_bytes = 4 + range_count * 6
+    return min(format1_bytes, format2_bytes)
+
+
+ClusteringContext = namedtuple(
+    "ClusteringContext",
+    [
+        "lines",
+        "all_class1",
+        "all_class1_data",
+        "all_class2_data",
+        "valueFormat1_bytes",
+        "valueFormat2_bytes",
+    ],
+)
+
+
+class Cluster:
+    # TODO(Python 3.7): Turn this into a dataclass
+    # ctx: ClusteringContext
+    # indices: int
+    # Caches
+    # TODO(Python 3.8): use functools.cached_property instead of the
+    # manually cached properties, and remove the cache fields listed below.
+    # _indices: Optional[List[int]] = None
+    # _column_indices: Optional[List[int]] = None
+    # _cost: Optional[int] = None
+
+    __slots__ = "ctx", "indices_bitmask", "_indices", "_column_indices", "_cost"
+
+    def __init__(self, ctx: ClusteringContext, indices_bitmask: int):
+        self.ctx = ctx
+        self.indices_bitmask = indices_bitmask
+        self._indices = None
+        self._column_indices = None
+        self._cost = None
+
+    @property
+    def indices(self):
+        if self._indices is None:
+            self._indices = bit_indices(self.indices_bitmask)
+        return self._indices
+
+    @property
+    def column_indices(self):
+        if self._column_indices is None:
+            # Indices of columns that have a 1 in at least 1 line
+            #   => binary OR all the lines
+            bitmask = reduce(int.__or__, (self.ctx.lines[i] for i in self.indices))
+            self._column_indices = bit_indices(bitmask)
+        return self._column_indices
+
+    @property
+    def width(self):
+        # Add 1 because Class2=0 cannot be used but needs to be encoded.
+        return len(self.column_indices) + 1
+
+    @property
+    def cost(self):
+        if self._cost is None:
+            self._cost = (
+                # 2 bytes to store the offset to this subtable in the Lookup table above
+                2
+                # Contents of the subtable
+                # From: https://docs.microsoft.com/en-us/typography/opentype/spec/gpos#pair-adjustment-positioning-format-2-class-pair-adjustment
+                # uint16	posFormat	Format identifier: format = 2
+                + 2
+                # Offset16	coverageOffset	Offset to Coverage table, from beginning of PairPos subtable.
+                + 2
+                + self.coverage_bytes
+                # uint16	valueFormat1	ValueRecord definition — for the first glyph of the pair (may be zero).
+                + 2
+                # uint16	valueFormat2	ValueRecord definition — for the second glyph of the pair (may be zero).
+                + 2
+                # Offset16	classDef1Offset	Offset to ClassDef table, from beginning of PairPos subtable — for the first glyph of the pair.
+                + 2
+                + self.classDef1_bytes
+                # Offset16	classDef2Offset	Offset to ClassDef table, from beginning of PairPos subtable — for the second glyph of the pair.
+                + 2
+                + self.classDef2_bytes
+                # uint16	class1Count	Number of classes in classDef1 table — includes Class 0.
+                + 2
+                # uint16	class2Count	Number of classes in classDef2 table — includes Class 0.
+                + 2
+                # Class1Record	class1Records[class1Count]	Array of Class1 records, ordered by classes in classDef1.
+                + (self.ctx.valueFormat1_bytes + self.ctx.valueFormat2_bytes)
+                * len(self.indices)
+                * self.width
+            )
+        return self._cost
+
+    @property
+    def coverage_bytes(self):
+        format1_bytes = (
+            # From https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#coverage-format-1
+            # uint16	coverageFormat	Format identifier — format = 1
+            # uint16	glyphCount	Number of glyphs in the glyph array
+            4
+            # uint16	glyphArray[glyphCount]	Array of glyph IDs — in numerical order
+            + sum(len(self.ctx.all_class1[i]) for i in self.indices) * 2
+        )
+        ranges = sorted(
+            chain.from_iterable(self.ctx.all_class1_data[i][0] for i in self.indices)
+        )
+        merged_range_count = 0
+        last = None
+        for (start, end) in ranges:
+            if last is not None and start != last + 1:
+                merged_range_count += 1
+            last = end
+        format2_bytes = (
+            # From https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#coverage-format-2
+            # uint16	coverageFormat	Format identifier — format = 2
+            # uint16	rangeCount	Number of RangeRecords
+            4
+            # RangeRecord	rangeRecords[rangeCount]	Array of glyph ranges — ordered by startGlyphID.
+            # uint16	startGlyphID	First glyph ID in the range
+            # uint16	endGlyphID	Last glyph ID in the range
+            # uint16	startCoverageIndex	Coverage Index of first glyph ID in range
+            + merged_range_count * 6
+        )
+        return min(format1_bytes, format2_bytes)
+
+    @property
+    def classDef1_bytes(self):
+        # We can skip encoding one of the Class1 definitions, and use
+        # Class1=0 to represent it instead, because Class1 is gated by the
+        # Coverage definition. Use Class1=0 for the highest byte savings.
+        # Going through all options takes too long, pick the biggest class
+        # = what happens in otlLib.builder.ClassDefBuilder.classes()
+        biggest_index = max(self.indices, key=lambda i: len(self.ctx.all_class1[i]))
+        return _classDef_bytes(
+            self.ctx.all_class1_data, [i for i in self.indices if i != biggest_index]
+        )
+
+    @property
+    def classDef2_bytes(self):
+        # All Class2 need to be encoded because we can't use Class2=0
+        return _classDef_bytes(self.ctx.all_class2_data, self.column_indices)
+
+
+def cluster_pairs_by_class2_coverage_custom_cost(
+    font: TTFont,
+    pairs: Pairs,
+    compression: int = 5,
+) -> List[Pairs]:
+    if not pairs:
+        # The subtable was actually empty?
+        return [pairs]
+
+    # Sorted for reproducibility/determinism
+    all_class1 = sorted(set(pair[0] for pair in pairs))
+    all_class2 = sorted(set(pair[1] for pair in pairs))
+
+    # Use Python's big ints for binary vectors representing each line
+    lines = [
+        sum(
+            1 << i if (class1, class2) in pairs else 0
+            for i, class2 in enumerate(all_class2)
+        )
+        for class1 in all_class1
+    ]
+
+    # Map glyph names to ids and work with ints throughout for ClassDef formats
+    name_to_id = font.getReverseGlyphMap()
+    # Each entry in the arrays below is (range_count, min_glyph_id, max_glyph_id)
+    all_class1_data = [
+        _getClassRanges(name_to_id[name] for name in cls) for cls in all_class1
+    ]
+    all_class2_data = [
+        _getClassRanges(name_to_id[name] for name in cls) for cls in all_class2
+    ]
+
+    format1 = 0
+    format2 = 0
+    for pair, value in pairs.items():
+        format1 |= value[0].getEffectiveFormat() if value[0] else 0
+        format2 |= value[1].getEffectiveFormat() if value[1] else 0
+    valueFormat1_bytes = bit_count(format1) * 2
+    valueFormat2_bytes = bit_count(format2) * 2
+
+    ctx = ClusteringContext(
+        lines,
+        all_class1,
+        all_class1_data,
+        all_class2_data,
+        valueFormat1_bytes,
+        valueFormat2_bytes,
+    )
+
+    cluster_cache: Dict[int, Cluster] = {}
+
+    def make_cluster(indices: int) -> Cluster:
+        cluster = cluster_cache.get(indices, None)
+        if cluster is not None:
+            return cluster
+        cluster = Cluster(ctx, indices)
+        cluster_cache[indices] = cluster
+        return cluster
+
+    def merge(cluster: Cluster, other: Cluster) -> Cluster:
+        return make_cluster(cluster.indices_bitmask | other.indices_bitmask)
+
+    # Agglomerative clustering by hand, checking the cost gain of the new
+    # cluster against the previously separate clusters
+    # Start with 1 cluster per line
+    # cluster = set of lines = new subtable
+    clusters = [make_cluster(1 << i) for i in range(len(lines))]
+
+    # Cost of 1 cluster with everything
+    # `(1 << len) - 1` gives a bitmask full of 1's of length `len`
+    cost_before_splitting = make_cluster((1 << len(lines)) - 1).cost
+    log.debug(f"        len(clusters) = {len(clusters)}")
+
+    while len(clusters) > 1:
+        lowest_cost_change = None
+        best_cluster_index = None
+        best_other_index = None
+        best_merged = None
+        for i, cluster in enumerate(clusters):
+            for j, other in enumerate(clusters[i + 1 :]):
+                merged = merge(cluster, other)
+                cost_change = merged.cost - cluster.cost - other.cost
+                if lowest_cost_change is None or cost_change < lowest_cost_change:
+                    lowest_cost_change = cost_change
+                    best_cluster_index = i
+                    best_other_index = i + 1 + j
+                    best_merged = merged
+        assert lowest_cost_change is not None
+        assert best_cluster_index is not None
+        assert best_other_index is not None
+        assert best_merged is not None
+
+        # If the best merge we found is still taking down the file size, then
+        # there's no question: we must do it, because it's beneficial in both
+        # ways (lower file size and lower number of subtables).  However, if the
+        # best merge we found is not reducing file size anymore, then we need to
+        # look at the other stop criteria = the compression factor.
+        if lowest_cost_change > 0:
+            # Stop critera: check whether we should keep merging.
+            # Compute size reduction brought by splitting
+            cost_after_splitting = sum(c.cost for c in clusters)
+            # size_reduction so that after = before * (1 - size_reduction)
+            # E.g. before = 1000, after = 800, 1 - 800/1000 = 0.2
+            size_reduction = 1 - cost_after_splitting / cost_before_splitting
+
+            # Force more merging by taking into account the compression number.
+            # Target behaviour: compression number = 1 to 9, default 5 like gzip
+            #   - 1 = accept to add 1 subtable to reduce size by 50%
+            #   - 5 = accept to add 5 subtables to reduce size by 50%
+            # See https://github.com/harfbuzz/packtab/blob/master/Lib/packTab/__init__.py#L690-L691
+            # Given the size reduction we have achieved so far, compute how many
+            # new subtables are acceptable.
+            max_new_subtables = -log2(1 - size_reduction) * compression
+            log.debug(
+                f"            len(clusters) = {len(clusters):3d}    size_reduction={size_reduction:5.2f}    max_new_subtables={max_new_subtables}",
+            )
+            if compression == 9:
+                # Override level 9 to mean: create any number of subtables
+                max_new_subtables = len(clusters)
+
+            # If we have managed to take the number of new subtables below the
+            # threshold, then we can stop.
+            if len(clusters) <= max_new_subtables + 1:
+                break
+
+        # No reason to stop yet, do the merge and move on to the next.
+        del clusters[best_other_index]
+        clusters[best_cluster_index] = best_merged
+
+    # All clusters are final; turn bitmasks back into the "Pairs" format
+    pairs_by_class1: Dict[Tuple[str, ...], Pairs] = defaultdict(dict)
+    for pair, values in pairs.items():
+        pairs_by_class1[pair[0]][pair] = values
+    pairs_groups: List[Pairs] = []
+    for cluster in clusters:
+        pairs_group: Pairs = dict()
+        for i in cluster.indices:
+            class1 = all_class1[i]
+            pairs_group.update(pairs_by_class1[class1])
+        pairs_groups.append(pairs_group)
+    return pairs_groups
diff --git a/Lib/fontTools/varLib/merger.py b/Lib/fontTools/varLib/merger.py
index 888b52c..aaf2a51 100644
--- a/Lib/fontTools/varLib/merger.py
+++ b/Lib/fontTools/varLib/merger.py
@@ -1,8 +1,10 @@
 """
 Merge OpenType Layout tables (GDEF / GPOS / GSUB).
 """
+import os
 import copy
 from operator import ior
+import logging
 from fontTools.misc import classifyTools
 from fontTools.misc.roundTools import otRound
 from fontTools.ttLib.tables import otTables as ot
@@ -13,6 +15,13 @@
 from fontTools.varLib.varStore import VarStoreInstancer
 from functools import reduce
 from fontTools.otlLib.builder import buildSinglePos
+from fontTools.otlLib.optimize.gpos import (
+    compact_pair_pos,
+    GPOS_COMPACT_MODE_DEFAULT,
+    GPOS_COMPACT_MODE_ENV_KEY,
+)
+
+log = logging.getLogger("fontTools.varLib.merger")
 
 from .errors import (
     ShouldBeConstant,
@@ -837,6 +846,15 @@
 			self.SubTable.pop(-1)
 			self.SubTableCount -= 1
 
+		# Compact the merged subtables
+		# This is a good moment to do it because the compaction should create
+		# smaller subtables, which may prevent overflows from happening.
+		mode = os.environ.get(GPOS_COMPACT_MODE_ENV_KEY, GPOS_COMPACT_MODE_DEFAULT)
+		if mode and mode != "0":
+			log.info("Compacting GPOS...")
+			self.SubTable = compact_pair_pos(merger.font, mode, self.SubTable)
+			self.SubTableCount = len(self.SubTable)
+
 	elif isSinglePos and flattened:
 		singlePosTable = self.SubTable[0]
 		glyphs = singlePosTable.Coverage.glyphs
@@ -851,7 +869,6 @@
 
 	del merger.lookup_subtables
 
-
 #
 # InstancerMerger
 #
diff --git a/NEWS.rst b/NEWS.rst
index 16dca63..9de1c6c 100644
--- a/NEWS.rst
+++ b/NEWS.rst
@@ -15,6 +15,11 @@
 - [post] Fixed parsing ``post`` table format 2.0 when it contains extra garbage
   at the end of the stringData array (#2314).
 - [subset] drop empty features unless 'size' with FeatureParams table (#2324).
+- [otlLib] Added ``otlLib.optimize`` module; added GPOS compaction algorithm.
+  The compaction can be run on existing fonts with ``fonttools otlLib.optimize``
+  or using the snippet ``compact_gpos.py``. There's experimental support for
+  compacting fonts at compilation time using an environment variable, but that
+  might be removed later (#2326).
 
 4.24.4 (released 2021-05-25)
 ----------------------------
@@ -498,7 +503,7 @@
   instance, correctly map the value forward.
 - [varLib] The avar table can now contain mapping output values that are greater than
   OR EQUAL to the preceeding value, as the avar specification allows this.
-- [varLib] The errors of the module are now ordered hierarchically below VarLibError. 
+- [varLib] The errors of the module are now ordered hierarchically below VarLibError.
   See #1821.
 
 4.3.0 (released 2020-02-03)
@@ -792,13 +797,13 @@
 - [mutator] Set ``OVERLAP_SIMPLE`` and ``OVERLAP_COMPOUND`` glyf flags by
   default in ``instantiateVariableFont``. Added ``--no-overlap`` cli option
   to disable this (#1518).
-- [subset] Fixed subsetting ``VVAR`` table (#1516, #1517).  
+- [subset] Fixed subsetting ``VVAR`` table (#1516, #1517).
   Fixed subsetting an ``HVAR`` table that has an ``AdvanceWidthMap`` when the
   option ``--retain-gids`` is used.
-- [feaLib] Added ``forceChained`` in MultipleSubstStatement (#1511).  
-  Fixed double indentation of ``subtable`` statement (#1512).  
+- [feaLib] Added ``forceChained`` in MultipleSubstStatement (#1511).
+  Fixed double indentation of ``subtable`` statement (#1512).
   Added support for ``subtable`` statement in more places than just PairPos
-  lookups (#1520).  
+  lookups (#1520).
   Handle lookupflag 0 and lookupflag without a value (#1540).
 - [varLib] In ``load_designspace``, provide a default English name for the
   ``ital`` axis tag.
diff --git a/Snippets/compact_gpos.py b/Snippets/compact_gpos.py
new file mode 100644
index 0000000..a5bd2f8
--- /dev/null
+++ b/Snippets/compact_gpos.py
@@ -0,0 +1,144 @@
+#! /usr/bin/env python3
+
+"""
+Sample script to use the otlLib.optimize.gpos functions to compact GPOS tables
+of existing fonts. This script takes one or more TTF files as arguments and
+will create compacted copies of the fonts using all available modes of the GPOS
+compaction algorithm. For each copy, it will measure the new size of the GPOS
+table and also the new size of the font in WOFF2 format. All results will be
+printed to stdout in CSV format, so the savings provided by the algorithm in
+each mode can be inspected.
+
+This was initially made to debug the algorithm but can also be used to choose
+a mode value for a specific font (trade-off between bytes saved in TTF format
+vs more bytes in WOFF2 format and more subtables).
+
+Run:
+
+python Snippets/compact_gpos.py MyFont.ttf > results.csv
+"""
+
+import argparse
+from collections import defaultdict
+import csv
+import time
+import sys
+from pathlib import Path
+from typing import Any, Iterable, List, Optional, Sequence, Tuple
+
+from fontTools.ttLib import TTFont
+from fontTools.otlLib.optimize import compact
+
+MODES = [str(c) for c in range(1, 10)]
+
+
+def main(args: Optional[List[str]] = None):
+    parser = argparse.ArgumentParser()
+    parser.add_argument("fonts", type=Path, nargs="+", help="Path to TTFs.")
+    parsed_args = parser.parse_args(args)
+
+    runtimes = defaultdict(list)
+    rows = []
+    font_path: Path
+    for font_path in parsed_args.fonts:
+        font = TTFont(font_path)
+        if "GPOS" not in font:
+            print(f"No GPOS in {font_path.name}, skipping.", file=sys.stderr)
+            continue
+        size_orig = len(font.getTableData("GPOS")) / 1024
+        print(f"Measuring {font_path.name}...", file=sys.stderr)
+
+        fonts = {}
+        font_paths = {}
+        sizes = {}
+        for mode in MODES:
+            print(f"    Running mode={mode}", file=sys.stderr)
+            fonts[mode] = TTFont(font_path)
+            before = time.perf_counter()
+            compact(fonts[mode], mode=str(mode))
+            runtimes[mode].append(time.perf_counter() - before)
+            font_paths[mode] = (
+                font_path.parent
+                / "compact"
+                / (font_path.stem + f"_{mode}" + font_path.suffix)
+            )
+            font_paths[mode].parent.mkdir(parents=True, exist_ok=True)
+            fonts[mode].save(font_paths[mode])
+            fonts[mode] = TTFont(font_paths[mode])
+            sizes[mode] = len(fonts[mode].getTableData("GPOS")) / 1024
+
+        print(f"    Runtimes:", file=sys.stderr)
+        for mode, times in runtimes.items():
+            print(
+                f"        {mode:10} {' '.join(f'{t:5.2f}' for t in times)}",
+                file=sys.stderr,
+            )
+
+        # Bonus: measure WOFF2 file sizes.
+        print(f"    Measuring WOFF2 sizes", file=sys.stderr)
+        size_woff_orig = woff_size(font, font_path) / 1024
+        sizes_woff = {
+            mode: woff_size(fonts[mode], font_paths[mode]) / 1024 for mode in MODES
+        }
+
+        rows.append(
+            (
+                font_path.name,
+                size_orig,
+                size_woff_orig,
+                *flatten(
+                    (
+                        sizes[mode],
+                        pct(sizes[mode], size_orig),
+                        sizes_woff[mode],
+                        pct(sizes_woff[mode], size_woff_orig),
+                    )
+                    for mode in MODES
+                ),
+            )
+        )
+
+    write_csv(rows)
+
+
+def woff_size(font: TTFont, path: Path) -> int:
+    font.flavor = "woff2"
+    woff_path = path.with_suffix(".woff2")
+    font.save(woff_path)
+    return woff_path.stat().st_size
+
+
+def write_csv(rows: List[Tuple[Any]]) -> None:
+    sys.stdout.reconfigure(encoding="utf-8")
+    sys.stdout.write("\uFEFF")
+    writer = csv.writer(sys.stdout, lineterminator="\n")
+    writer.writerow(
+        [
+            "File",
+            "Original GPOS Size",
+            "Original WOFF2 Size",
+            *flatten(
+                (
+                    f"mode={mode}",
+                    f"Change {mode}",
+                    f"mode={mode} WOFF2 Size",
+                    f"Change {mode} WOFF2 Size",
+                )
+                for mode in MODES
+            ),
+        ]
+    )
+    for row in rows:
+        writer.writerow(row)
+
+
+def pct(new: float, old: float) -> float:
+    return -(1 - (new / old))
+
+
+def flatten(seq_seq: Iterable[Iterable[Any]]) -> List[Any]:
+    return [thing for seq in seq_seq for thing in seq]
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Tests/otlLib/optimize_test.py b/Tests/otlLib/optimize_test.py
new file mode 100644
index 0000000..40cf389
--- /dev/null
+++ b/Tests/otlLib/optimize_test.py
@@ -0,0 +1,175 @@
+import logging
+from pathlib import Path
+from subprocess import run
+import contextlib
+import os
+from typing import List, Optional, Tuple
+from fontTools.ttLib import TTFont
+
+import pytest
+
+from fontTools.feaLib.builder import addOpenTypeFeaturesFromString
+from fontTools.fontBuilder import FontBuilder
+
+from fontTools.ttLib.tables.otBase import OTTableWriter, ValueRecord
+
+
+def test_main(tmpdir: Path):
+    """Check that calling the main function on an input TTF works."""
+    glyphs = ".notdef space A Aacute B D".split()
+    features = """
+    @A = [A Aacute];
+    @B = [B D];
+    feature kern {
+        pos @A @B -50;
+    } kern;
+    """
+    fb = FontBuilder(1000)
+    fb.setupGlyphOrder(glyphs)
+    addOpenTypeFeaturesFromString(fb.font, features)
+    input = tmpdir / "in.ttf"
+    fb.save(str(input))
+    output = tmpdir / "out.ttf"
+    run(
+        [
+            "fonttools",
+            "otlLib.optimize",
+            "--gpos-compact-mode",
+            "5",
+            str(input),
+            "-o",
+            str(output),
+        ],
+        check=True,
+    )
+    assert output.exists()
+
+
+# Copy-pasted from https://stackoverflow.com/questions/2059482/python-temporarily-modify-the-current-processs-environment
+# TODO: remove when moving to the Config class
+@contextlib.contextmanager
+def set_env(**environ):
+    """
+    Temporarily set the process environment variables.
+
+    >>> with set_env(PLUGINS_DIR=u'test/plugins'):
+    ...   "PLUGINS_DIR" in os.environ
+    True
+
+    >>> "PLUGINS_DIR" in os.environ
+    False
+
+    :type environ: dict[str, unicode]
+    :param environ: Environment variables to set
+    """
+    old_environ = dict(os.environ)
+    os.environ.update(environ)
+    try:
+        yield
+    finally:
+        os.environ.clear()
+        os.environ.update(old_environ)
+
+
+def count_pairpos_subtables(font: TTFont) -> int:
+    subtables = 0
+    for lookup in font["GPOS"].table.LookupList.Lookup:
+        if lookup.LookupType == 2:
+            subtables += len(lookup.SubTable)
+        elif lookup.LookupType == 9:
+            for subtable in lookup.SubTable:
+                if subtable.ExtensionLookupType == 2:
+                    subtables += 1
+    return subtables
+
+
+def count_pairpos_bytes(font: TTFont) -> int:
+    bytes = 0
+    gpos = font["GPOS"]
+    for lookup in font["GPOS"].table.LookupList.Lookup:
+        if lookup.LookupType == 2:
+            w = OTTableWriter(tableTag=gpos.tableTag)
+            lookup.compile(w, font)
+            bytes += len(w.getAllData())
+        elif lookup.LookupType == 9:
+            if any(subtable.ExtensionLookupType == 2 for subtable in lookup.SubTable):
+                w = OTTableWriter(tableTag=gpos.tableTag)
+                lookup.compile(w, font)
+                bytes += len(w.getAllData())
+    return bytes
+
+
+def get_kerning_by_blocks(blocks: List[Tuple[int, int]]) -> Tuple[List[str], str]:
+    """Generate a highly compressible font by generating a bunch of rectangular
+    blocks on the diagonal that can easily be sliced into subtables.
+
+    Returns the list of glyphs and feature code of the font.
+    """
+    value = 0
+    glyphs: List[str] = []
+    rules = []
+    # Each block is like a script in a multi-script font
+    for script, (width, height) in enumerate(blocks):
+        glyphs.extend(f"g_{script}_{i}" for i in range(max(width, height)))
+        for l in range(height):
+            for r in range(width):
+                value += 1
+                rules.append((f"g_{script}_{l}", f"g_{script}_{r}", value))
+    classes = "\n".join([f"@{g} = [{g}];" for g in glyphs])
+    statements = "\n".join([f"pos @{l} @{r} {v};" for (l, r, v) in rules])
+    features = f"""
+        {classes}
+        feature kern {{
+            {statements}
+        }} kern;
+    """
+    return glyphs, features
+
+
+@pytest.mark.parametrize(
+    ("blocks", "mode", "expected_subtables", "expected_bytes"),
+    [
+        # Mode = 0 = no optimization leads to 650 bytes of GPOS
+        ([(15, 3), (2, 10)], None, 1, 602),
+        # Optimization level 1 recognizes the 2 blocks and splits into 2
+        # subtables = adds 1 subtable leading to a size reduction of
+        # (602-298)/602 = 50%
+        ([(15, 3), (2, 10)], 1, 2, 298),
+        # On a bigger block configuration, we see that mode=5 doesn't create
+        # as many subtables as it could, because of the stop criteria
+        ([(4, 4) for _ in range(20)], 5, 14, 2042),
+        # while level=9 creates as many subtables as there were blocks on the
+        # diagonal and yields a better saving
+        ([(4, 4) for _ in range(20)], 9, 20, 1886),
+        # On a fully occupied kerning matrix, even the strategy 9 doesn't
+        # split anything.
+        ([(10, 10)], 9, 1, 304)
+    ],
+)
+def test_optimization_mode(
+    caplog,
+    blocks: List[Tuple[int, int]],
+    mode: Optional[int],
+    expected_subtables: int,
+    expected_bytes: int,
+):
+    """Check that the optimizations are off by default, and that increasing
+    the optimization level creates more subtables and a smaller byte size.
+    """
+    caplog.set_level(logging.DEBUG)
+
+    glyphs, features = get_kerning_by_blocks(blocks)
+    glyphs = [".notdef space"] + glyphs
+
+    env = {}
+    if mode is not None:
+        # NOTE: activating this optimization via the environment variable is
+        # experimental and may not be supported once an alternative mechanism
+        # is in place. See: https://github.com/fonttools/fonttools/issues/2349
+        env["FONTTOOLS_GPOS_COMPACT_MODE"] = str(mode)
+    with set_env(**env):
+        fb = FontBuilder(1000)
+        fb.setupGlyphOrder(glyphs)
+        addOpenTypeFeaturesFromString(fb.font, features)
+        assert expected_subtables == count_pairpos_subtables(fb.font)
+        assert expected_bytes == count_pairpos_bytes(fb.font)