add_glyphs.py - third_party/github.com/googlefonts/noto-emoji - Git at Google

 #!/usr/bin/env python3

 """Extend a ttx file with additional data.

 Takes a ttx file and one or more directories containing image files named
 after sequences of codepoints, extends the cmap, hmtx, GSUB, and GlyphOrder
 tables in the source ttx file based on these sequences, and writes out a new
 ttx file.

 This can also apply aliases from an alias file."""

 import argparse
 import collections
 import os
 from os import path
 import re
 import sys

 from fontTools import ttx
 from fontTools.ttLib.tables import otTables
 from fontTools.pens.ttGlyphPen import TTGlyphPen
 from fontTools.ttLib.tables._c_m_a_p import CmapSubtable
 from fontTools.ttLib import newTable

 import add_emoji_gsub
 import add_aliases

 sys.path.append(
     path.join(os.path.dirname(__file__), 'third_party', 'color_emoji'))
 from png import PNG


 def get_seq_to_file(image_dir, prefix, suffix):
   """Return a mapping from codepoint sequences to files in the given directory,
   for files that match the prefix and suffix.  File names with this prefix and
   suffix should consist of codepoints in hex separated by underscore.  'fe0f'
   (the codepoint of the emoji presentation variation selector) is stripped from
   the sequence.
   """
   start = len(prefix)
   limit = -len(suffix)
   seq_to_file = {}
   for name in os.listdir(image_dir):
     if not (name.startswith(prefix) and name.endswith(suffix)):
       continue
     try:
       cps = [int(s, 16) for s in name[start:limit].split('_')]
       seq = tuple(cp for cp in cps if cp != 0xfe0f)
     except:
       raise Exception('could not parse "%s"' % name)
     for cp in cps:
       if not (0 <= cp <= 0x10ffff):
         raise Exception('bad codepoint(s) in "%s"' % name)
     if seq in seq_to_file:
       raise Exception('duplicate sequence for "%s" in %s' % (name, image_dir))
     seq_to_file[seq] = path.join(image_dir, name)
   return seq_to_file


 def collect_seq_to_file(image_dirs, prefix, suffix):
   """Return a sequence to file mapping by calling get_seq_to_file on a list
   of directories.  When sequences for files in later directories match those
   from earlier directories, the later file replaces the earlier one.
   """
   seq_to_file = {}
   for image_dir in image_dirs:
     seq_to_file.update(get_seq_to_file(image_dir, prefix, suffix))
   return seq_to_file


 def remap_values(seq_to_file, map_fn):
   return {k: map_fn(v) for k, v in seq_to_file.items()}


 def get_png_file_to_advance_mapper(lineheight):
   def map_fn(filename):
     wid, ht = PNG(filename).get_size()
     return int(round(float(lineheight) * wid / ht))
   return map_fn


 def cp_name(cp):
   """return uniXXXX or uXXXXX(X) as a name for the glyph mapped to this cp."""
   return '%s%04X' % ('u' if cp > 0xffff else 'uni', cp)


 def seq_name(seq):
   """Sequences of length one get the cp_name.  Others start with 'u' followed by
   two or more 4-to-6-digit hex strings separated by underscore."""
   if len(seq) == 1:
     return cp_name(seq[0])
   return 'u' + '_'.join('%04X' % cp for cp in seq)


 def collect_cps(seqs):
   cps = set()
   for seq in seqs:
     cps.update(seq)
   return cps


 def get_glyphorder_cps_and_truncate(glyphOrder):
   """This scans glyphOrder for names that correspond to a single codepoint
   using the 'u(ni)XXXXXX' syntax.  All names that don't match are moved
   to the front the glyphOrder list in their original order, and the
   list is truncated.  The ones that do match are returned as a set of
   codepoints."""
   glyph_name_re = re.compile(r'^u(?:ni)?([0-9a-fA-F]{4,6})$')
   cps = set()
   write_ix = 0
   for ix, name in enumerate(glyphOrder):
     m = glyph_name_re.match(name)
     if m:
       cps.add(int(m.group(1), 16))
     else:
       glyphOrder[write_ix] = name
       write_ix += 1
   del glyphOrder[write_ix:]
   return cps


 def get_all_seqs(font, seq_to_advance):
   """Copies the sequences from seq_to_advance and extends it with single-
   codepoint sequences from the GlyphOrder table as well as those internal
   to sequences in seq_to_advance.  Reduces the GlyphOrder table. """

   all_seqs = set(seq_to_advance.keys())
   # using collect_cps includes cps internal to a seq
   cps = collect_cps(all_seqs)
   glyphOrder = font.getGlyphOrder()
   # extract cps in glyphOrder and reduce glyphOrder to only those that remain
   glyphOrder_cps = get_glyphorder_cps_and_truncate(glyphOrder)
   cps.update(glyphOrder_cps)
   # add new single codepoint sequences from glyphOrder and sequences
   all_seqs.update((cp,) for cp in cps)
   return all_seqs


 def get_font_cmap(font):
   """Return the first cmap in the font, we assume it exists and is a unicode
   cmap."""
   return font['cmap'].tables[0].cmap


 def add_glyph_data(font, seqs, seq_to_advance, vadvance, add_glyf):
   """Add hmtx and GlyphOrder data for all sequences in seqs, and ensures there's
   a cmap entry for each single-codepoint sequence.  Seqs not in seq_to_advance
   will get a zero advance."""

   # We allow the template cmap to omit mappings for single-codepoint glyphs
   # defined in the template's GlyphOrder table.  Similarly, the hmtx table can
   # omit advances.  We assume glyphs named 'uniXXXX' or 'uXXXXX(X)' in the
   # GlyphOrder table correspond to codepoints based on the name; we don't
   # attempt to handle other types of names and these must occur in the cmap and
   # hmtx tables in the template.
   #
   # seq_to_advance maps sequences (including single codepoints) to advances.
   # All codepoints in these sequences will be added to the cmap.  Some cps
   # in these sequences have no corresponding single-codepoint sequence, they
   # will also get added.
   #
   # The added codepoints have no advance information, so will get a zero
   # advance.

   cmap = get_font_cmap(font)
   hmtx = font['hmtx'].metrics
   vmtx = font['vmtx'].metrics

   # Add glyf table so empty glyphs will be added to ensure compatibility
   # with systems requiring a glyf table, like Windows 10.
   if add_glyf:
     pen = TTGlyphPen(None)
     empty_glyph = pen.glyph()
     font['loca'] = newTable("loca")
     font['glyf'] = glyf_table = newTable("glyf")
     glyf_table.glyphOrder = font.getGlyphOrder()
     glyf_table.glyphs = {g: empty_glyph for g in glyf_table.glyphOrder}

   # We don't expect sequences to be in the glyphOrder, since we removed all the
   # single-cp sequences from it and don't expect it to already contain names
   # corresponding to multiple-cp sequencess.  But just in case, we use
   # reverseGlyphMap to avoid duplicating names accidentally.

   updatedGlyphOrder = False
   reverseGlyphMap = font.getReverseGlyphMap()

   # Order the glyphs by grouping all the single-codepoint sequences first,
   # then order by sequence so that related sequences are together.  We group
   # by single-codepoint sequence first in order to keep these glyphs together--
   # they're used in the coverage tables for some of the substitutions, and
   # those tables can be more compact this way.
   for seq in sorted(seqs, key=lambda s: (0 if len(s) == 1 else 1, s)):
     name = seq_name(seq)
     if len(seq) == 1:
       cmap[seq[0]] = name
     advance = seq_to_advance.get(seq, 0)
     hmtx[name] = [advance, 0]
     vmtx[name] = [vadvance, 0]
     if name not in reverseGlyphMap:
       font.glyphOrder.append(name)
       updatedGlyphOrder=True
     if add_glyf:
       glyf_table[name] = empty_glyph

   if updatedGlyphOrder:
     delattr(font, '_reverseGlyphOrderDict')

 def add_aliases_to_cmap(font, aliases):
   """Some aliases might map a single codepoint to some other sequence.  These
   should map directly to the glyph for that sequence in the cmap.  (Others will
   map via GSUB).
   """
   if not aliases:
     return

   cp_aliases = [seq for seq in aliases if len(seq) == 1]
   if not cp_aliases:
     return

   cmap = get_font_cmap(font)
   for src_seq in cp_aliases:
     cp = src_seq[0]
     name = seq_name(aliases[src_seq])
     cmap[cp] = name


 def get_rtl_seq(seq):
   """Return the rtl variant of the sequence, if it has one, else the empty
   sequence.
   """
   # Sequences with ZWJ in them will reflect.  Fitzpatrick modifiers
   # however do not, so if we reflect we make a pass to swap them back into their
   # logical order.
   # Used to check for TAG_END 0xe007f as well but Android fontchain_lint
   # dislikes the resulting mangling of flags for England, Scotland, Wales.

   ZWJ = 0x200d
   def is_fitzpatrick(cp):
     return 0x1f3fb <= cp <= 0x1f3ff

   if ZWJ not in seq:
     return ()

   rev_seq = list(seq)
   rev_seq.reverse()
   for i in range(1, len(rev_seq)):
     if is_fitzpatrick(rev_seq[i-1]):
       tmp = rev_seq[i]
       rev_seq[i] = rev_seq[i-1]
       rev_seq[i-1] = tmp
   return tuple(rev_seq)


 def get_gsub_ligature_lookup(font):
   """If the font does not have a GSUB table, create one with a ligature
   substitution lookup.  If it does, ensure the first lookup is a properly
   initialized ligature substitution lookup.  Return the lookup."""

   # The template might include more lookups after lookup 0, if it has a
   # GSUB table.
   if 'GSUB' not in font:
     ligature_subst = otTables.LigatureSubst()
     ligature_subst.ligatures = {}

     lookup = otTables.Lookup()
     lookup.LookupType = 4
     lookup.LookupFlag = 0
     lookup.SubTableCount = 1
     lookup.SubTable = [ligature_subst]

     font['GSUB'] = add_emoji_gsub.create_simple_gsub([lookup])
   else:
     lookup = font['GSUB'].table.LookupList.Lookup[0]
     assert lookup.LookupFlag == 0

     # importXML doesn't fully init GSUB structures, so help it out
     st = lookup.SubTable[0]
     if not hasattr(lookup, 'LookupType'):
       assert st.LookupType == 4
       setattr(lookup, 'LookupType', 4)

     if not hasattr(st, 'ligatures'):
       setattr(st, 'ligatures', {})

   return lookup


 def add_ligature_sequences(font, seqs, aliases):
   """Add ligature sequences."""

   seq_to_target_name = {
       seq: seq_name(seq) for seq in seqs if len(seq) > 1}
   if aliases:
     seq_to_target_name.update({
         seq: seq_name(aliases[seq]) for seq in aliases if len(seq) > 1})
   if not seq_to_target_name:
     return

   rtl_seq_to_target_name = {
       get_rtl_seq(seq): name for seq, name in seq_to_target_name.items()}
   seq_to_target_name.update(rtl_seq_to_target_name)
   # sequences that don't have rtl variants get mapped to the empty sequence,
   # delete it.
   if () in seq_to_target_name:
     del seq_to_target_name[()]

   # organize by first codepoint in sequence
   keyed_ligatures = collections.defaultdict(list)
   for t in seq_to_target_name.items():
     first_cp = t[0][0]
     keyed_ligatures[first_cp].append(t)

   def add_ligature(lookup, cmap, seq, name):
     # The sequences consist of codepoints, but the entries in the ligature table
     # are glyph names.  Aliasing can give single codepoints names based on
     # sequences (e.g. 'guardsman' with 'male guardsman') so we map the
     # codepoints through the cmap to get the glyph names.
     glyph_names = [cmap[cp] for cp in seq]

     lig = otTables.Ligature()
     lig.CompCount = len(seq)
     lig.Component = glyph_names[1:]
     lig.LigGlyph = name

     ligatures = lookup.SubTable[0].ligatures
     first_name = glyph_names[0]
     try:
       ligatures[first_name].append(lig)
     except KeyError:
       ligatures[first_name] = [lig]

   lookup = get_gsub_ligature_lookup(font)
   cmap = get_font_cmap(font)
   for first_cp in sorted(keyed_ligatures):
     pairs = keyed_ligatures[first_cp]

     # Sort longest first, this ensures longer sequences with common prefixes
     # are handled before shorter ones.  The secondary sort is a standard
     # sort on the codepoints in the sequence.
     pairs.sort(key = lambda pair: (-len(pair[0]), pair[0]))
     for seq, name in pairs:
       add_ligature(lookup, cmap, seq, name)

 def add_cmap_format_4(font):
   """Add cmap format 4 table for Windows support, based on the
   format 12 cmap."""

   cmap = get_font_cmap(font)

   newtable = CmapSubtable.newSubtable(4)
   newtable.platformID = 3
   newtable.platEncID = 1
   newtable.language = 0

   # Format 4 only has unicode values 0x0000 to 0xFFFF
   newtable.cmap = {cp: name for cp, name in cmap.items() if cp <= 0xFFFF}

   font['cmap'].tables.append(newtable)

 def update_font_data(font, seq_to_advance, vadvance, aliases, add_cmap4, add_glyf):
   """Update the font's cmap, hmtx, GSUB, and GlyphOrder tables."""
   seqs = get_all_seqs(font, seq_to_advance)
   add_glyph_data(font, seqs, seq_to_advance, vadvance, add_glyf)
   add_aliases_to_cmap(font, aliases)
   add_ligature_sequences(font, seqs, aliases)
   if add_cmap4:
     add_cmap_format_4(font)

 def apply_aliases(seq_dict, aliases):
   """Aliases is a mapping from sequence to replacement sequence.  We can use
   an alias if the target is a key in the dictionary.  Furthermore, if the
   source is a key in the dictionary, we can delete it.  This updates the
   dictionary and returns the usable aliases."""
   usable_aliases = {}
   for k, v in aliases.items():
     if v in seq_dict:
       usable_aliases[k] = v
       if k in seq_dict:
         del seq_dict[k]
   return usable_aliases


 def update_ttx(in_file, out_file, image_dirs, prefix, ext, aliases_file, add_cmap4, add_glyf):
   if ext != '.png':
     raise Exception('extension "%s" not supported' % ext)

   seq_to_file = collect_seq_to_file(image_dirs, prefix, ext)
   if not seq_to_file:
     raise ValueError(
         'no sequences with prefix "%s" and extension "%s" in %s' % (
             prefix, ext, ', '.join(image_dirs)))

   aliases = None
   if aliases_file:
     aliases = add_aliases.read_emoji_aliases(aliases_file)
     aliases = apply_aliases(seq_to_file, aliases)

   font = ttx.TTFont()
   font.importXML(in_file)

   lineheight = font['hhea'].ascent - font['hhea'].descent
   map_fn = get_png_file_to_advance_mapper(lineheight)
   seq_to_advance = remap_values(seq_to_file, map_fn)

   vadvance = font['vhea'].advanceHeightMax if 'vhea' in font else lineheight

   update_font_data(font, seq_to_advance, vadvance, aliases, add_cmap4, add_glyf)

   font.saveXML(out_file)


 def main():
   parser = argparse.ArgumentParser()
   parser.add_argument(
       '-f', '--in_file', help='ttx input file', metavar='file', required=True)
   parser.add_argument(
       '-o', '--out_file', help='ttx output file', metavar='file', required=True)
   parser.add_argument(
       '-d', '--image_dirs', help='directories containing image files',
       nargs='+', metavar='dir', required=True)
   parser.add_argument(
       '-p', '--prefix', help='file prefix (default "emoji_u")',
       metavar='pfx', default='emoji_u')
   parser.add_argument(
       '-e', '--ext', help='file extension (default ".png", currently only '
       '".png" is supported',  metavar='ext', default='.png')
   parser.add_argument(
       '-a', '--aliases', help='process alias table', const='emoji_aliases.txt',
       nargs='?', metavar='file')
   parser.add_argument(
       '--add_cmap4', help='add cmap format 4 table', dest='add_cmap4', action='store_true')
   parser.add_argument(
       '--add_glyf', help='add glyf and loca tables', dest='add_glyf', action='store_true')
   args = parser.parse_args()

   update_ttx(
       args.in_file, args.out_file, args.image_dirs, args.prefix, args.ext,
       args.aliases, args.add_cmap4, args.add_glyf)


 if __name__ == '__main__':
   main()
	#!/usr/bin/env python3

	"""Extend a ttx file with additional data.

	Takes a ttx file and one or more directories containing image files named
	after sequences of codepoints, extends the cmap, hmtx, GSUB, and GlyphOrder
	tables in the source ttx file based on these sequences, and writes out a new
	ttx file.

	This can also apply aliases from an alias file."""

	import argparse
	import collections
	import os
	from os import path
	import re
	import sys

	from fontTools import ttx
	from fontTools.ttLib.tables import otTables
	from fontTools.pens.ttGlyphPen import TTGlyphPen
	from fontTools.ttLib.tables._c_m_a_p import CmapSubtable
	from fontTools.ttLib import newTable

	import add_emoji_gsub
	import add_aliases

	sys.path.append(
	path.join(os.path.dirname(__file__), 'third_party', 'color_emoji'))
	from png import PNG


	def get_seq_to_file(image_dir, prefix, suffix):
	"""Return a mapping from codepoint sequences to files in the given directory,
	for files that match the prefix and suffix. File names with this prefix and
	suffix should consist of codepoints in hex separated by underscore. 'fe0f'
	(the codepoint of the emoji presentation variation selector) is stripped from
	the sequence.
	"""
	start = len(prefix)
	limit = -len(suffix)
	seq_to_file = {}
	for name in os.listdir(image_dir):
	if not (name.startswith(prefix) and name.endswith(suffix)):
	continue
	try:
	cps = [int(s, 16) for s in name[start:limit].split('_')]
	seq = tuple(cp for cp in cps if cp != 0xfe0f)
	except:
	raise Exception('could not parse "%s"' % name)
	for cp in cps:
	if not (0 <= cp <= 0x10ffff):
	raise Exception('bad codepoint(s) in "%s"' % name)
	if seq in seq_to_file:
	raise Exception('duplicate sequence for "%s" in %s' % (name, image_dir))
	seq_to_file[seq] = path.join(image_dir, name)
	return seq_to_file


	def collect_seq_to_file(image_dirs, prefix, suffix):
	"""Return a sequence to file mapping by calling get_seq_to_file on a list
	of directories. When sequences for files in later directories match those
	from earlier directories, the later file replaces the earlier one.
	"""
	seq_to_file = {}
	for image_dir in image_dirs:
	seq_to_file.update(get_seq_to_file(image_dir, prefix, suffix))
	return seq_to_file


	def remap_values(seq_to_file, map_fn):
	return {k: map_fn(v) for k, v in seq_to_file.items()}


	def get_png_file_to_advance_mapper(lineheight):
	def map_fn(filename):
	wid, ht = PNG(filename).get_size()
	return int(round(float(lineheight) * wid / ht))
	return map_fn


	def cp_name(cp):
	"""return uniXXXX or uXXXXX(X) as a name for the glyph mapped to this cp."""
	return '%s%04X' % ('u' if cp > 0xffff else 'uni', cp)


	def seq_name(seq):
	"""Sequences of length one get the cp_name. Others start with 'u' followed by
	two or more 4-to-6-digit hex strings separated by underscore."""
	if len(seq) == 1:
	return cp_name(seq[0])
	return 'u' + '_'.join('%04X' % cp for cp in seq)


	def collect_cps(seqs):
	cps = set()
	for seq in seqs:
	cps.update(seq)
	return cps


	def get_glyphorder_cps_and_truncate(glyphOrder):
	"""This scans glyphOrder for names that correspond to a single codepoint
	using the 'u(ni)XXXXXX' syntax. All names that don't match are moved
	to the front the glyphOrder list in their original order, and the
	list is truncated. The ones that do match are returned as a set of
	codepoints."""
	glyph_name_re = re.compile(r'^u(?:ni)?([0-9a-fA-F]{4,6})$')
	cps = set()
	write_ix = 0
	for ix, name in enumerate(glyphOrder):
	m = glyph_name_re.match(name)
	if m:
	cps.add(int(m.group(1), 16))
	else:
	glyphOrder[write_ix] = name
	write_ix += 1
	del glyphOrder[write_ix:]
	return cps


	def get_all_seqs(font, seq_to_advance):
	"""Copies the sequences from seq_to_advance and extends it with single-
	codepoint sequences from the GlyphOrder table as well as those internal
	to sequences in seq_to_advance. Reduces the GlyphOrder table. """

	all_seqs = set(seq_to_advance.keys())
	# using collect_cps includes cps internal to a seq
	cps = collect_cps(all_seqs)
	glyphOrder = font.getGlyphOrder()
	# extract cps in glyphOrder and reduce glyphOrder to only those that remain
	glyphOrder_cps = get_glyphorder_cps_and_truncate(glyphOrder)
	cps.update(glyphOrder_cps)
	# add new single codepoint sequences from glyphOrder and sequences
	all_seqs.update((cp,) for cp in cps)
	return all_seqs


	def get_font_cmap(font):
	"""Return the first cmap in the font, we assume it exists and is a unicode
	cmap."""
	return font['cmap'].tables[0].cmap


	def add_glyph_data(font, seqs, seq_to_advance, vadvance, add_glyf):
	"""Add hmtx and GlyphOrder data for all sequences in seqs, and ensures there's
	a cmap entry for each single-codepoint sequence. Seqs not in seq_to_advance
	will get a zero advance."""

	# We allow the template cmap to omit mappings for single-codepoint glyphs
	# defined in the template's GlyphOrder table. Similarly, the hmtx table can
	# omit advances. We assume glyphs named 'uniXXXX' or 'uXXXXX(X)' in the
	# GlyphOrder table correspond to codepoints based on the name; we don't
	# attempt to handle other types of names and these must occur in the cmap and
	# hmtx tables in the template.
	#
	# seq_to_advance maps sequences (including single codepoints) to advances.
	# All codepoints in these sequences will be added to the cmap. Some cps
	# in these sequences have no corresponding single-codepoint sequence, they
	# will also get added.
	#
	# The added codepoints have no advance information, so will get a zero
	# advance.

	cmap = get_font_cmap(font)
	hmtx = font['hmtx'].metrics
	vmtx = font['vmtx'].metrics

	# Add glyf table so empty glyphs will be added to ensure compatibility
	# with systems requiring a glyf table, like Windows 10.
	if add_glyf:
	pen = TTGlyphPen(None)
	empty_glyph = pen.glyph()
	font['loca'] = newTable("loca")
	font['glyf'] = glyf_table = newTable("glyf")
	glyf_table.glyphOrder = font.getGlyphOrder()
	glyf_table.glyphs = {g: empty_glyph for g in glyf_table.glyphOrder}

	# We don't expect sequences to be in the glyphOrder, since we removed all the
	# single-cp sequences from it and don't expect it to already contain names
	# corresponding to multiple-cp sequencess. But just in case, we use
	# reverseGlyphMap to avoid duplicating names accidentally.

	updatedGlyphOrder = False
	reverseGlyphMap = font.getReverseGlyphMap()

	# Order the glyphs by grouping all the single-codepoint sequences first,
	# then order by sequence so that related sequences are together. We group
	# by single-codepoint sequence first in order to keep these glyphs together--
	# they're used in the coverage tables for some of the substitutions, and
	# those tables can be more compact this way.
	for seq in sorted(seqs, key=lambda s: (0 if len(s) == 1 else 1, s)):
	name = seq_name(seq)
	if len(seq) == 1:
	cmap[seq[0]] = name
	advance = seq_to_advance.get(seq, 0)
	hmtx[name] = [advance, 0]
	vmtx[name] = [vadvance, 0]
	if name not in reverseGlyphMap:
	font.glyphOrder.append(name)
	updatedGlyphOrder=True
	if add_glyf:
	glyf_table[name] = empty_glyph

	if updatedGlyphOrder:
	delattr(font, '_reverseGlyphOrderDict')

	def add_aliases_to_cmap(font, aliases):
	"""Some aliases might map a single codepoint to some other sequence. These
	should map directly to the glyph for that sequence in the cmap. (Others will
	map via GSUB).
	"""
	if not aliases:
	return

	cp_aliases = [seq for seq in aliases if len(seq) == 1]
	if not cp_aliases:
	return

	cmap = get_font_cmap(font)
	for src_seq in cp_aliases:
	cp = src_seq[0]
	name = seq_name(aliases[src_seq])
	cmap[cp] = name


	def get_rtl_seq(seq):
	"""Return the rtl variant of the sequence, if it has one, else the empty
	sequence.
	"""
	# Sequences with ZWJ in them will reflect. Fitzpatrick modifiers
	# however do not, so if we reflect we make a pass to swap them back into their
	# logical order.
	# Used to check for TAG_END 0xe007f as well but Android fontchain_lint
	# dislikes the resulting mangling of flags for England, Scotland, Wales.

	ZWJ = 0x200d
	def is_fitzpatrick(cp):
	return 0x1f3fb <= cp <= 0x1f3ff

	if ZWJ not in seq:
	return ()

	rev_seq = list(seq)
	rev_seq.reverse()
	for i in range(1, len(rev_seq)):
	if is_fitzpatrick(rev_seq[i-1]):
	tmp = rev_seq[i]
	rev_seq[i] = rev_seq[i-1]
	rev_seq[i-1] = tmp
	return tuple(rev_seq)


	def get_gsub_ligature_lookup(font):
	"""If the font does not have a GSUB table, create one with a ligature
	substitution lookup. If it does, ensure the first lookup is a properly
	initialized ligature substitution lookup. Return the lookup."""

	# The template might include more lookups after lookup 0, if it has a
	# GSUB table.
	if 'GSUB' not in font:
	ligature_subst = otTables.LigatureSubst()
	ligature_subst.ligatures = {}

	lookup = otTables.Lookup()
	lookup.LookupType = 4
	lookup.LookupFlag = 0
	lookup.SubTableCount = 1
	lookup.SubTable = [ligature_subst]

	font['GSUB'] = add_emoji_gsub.create_simple_gsub([lookup])
	else:
	lookup = font['GSUB'].table.LookupList.Lookup[0]
	assert lookup.LookupFlag == 0

	# importXML doesn't fully init GSUB structures, so help it out
	st = lookup.SubTable[0]
	if not hasattr(lookup, 'LookupType'):
	assert st.LookupType == 4
	setattr(lookup, 'LookupType', 4)

	if not hasattr(st, 'ligatures'):
	setattr(st, 'ligatures', {})

	return lookup


	def add_ligature_sequences(font, seqs, aliases):
	"""Add ligature sequences."""

	seq_to_target_name = {
	seq: seq_name(seq) for seq in seqs if len(seq) > 1}
	if aliases:
	seq_to_target_name.update({
	seq: seq_name(aliases[seq]) for seq in aliases if len(seq) > 1})
	if not seq_to_target_name:
	return

	rtl_seq_to_target_name = {
	get_rtl_seq(seq): name for seq, name in seq_to_target_name.items()}
	seq_to_target_name.update(rtl_seq_to_target_name)
	# sequences that don't have rtl variants get mapped to the empty sequence,
	# delete it.
	if () in seq_to_target_name:
	del seq_to_target_name[()]

	# organize by first codepoint in sequence
	keyed_ligatures = collections.defaultdict(list)
	for t in seq_to_target_name.items():
	first_cp = t[0][0]
	keyed_ligatures[first_cp].append(t)

	def add_ligature(lookup, cmap, seq, name):
	# The sequences consist of codepoints, but the entries in the ligature table
	# are glyph names. Aliasing can give single codepoints names based on
	# sequences (e.g. 'guardsman' with 'male guardsman') so we map the
	# codepoints through the cmap to get the glyph names.
	glyph_names = [cmap[cp] for cp in seq]

	lig = otTables.Ligature()
	lig.CompCount = len(seq)
	lig.Component = glyph_names[1:]
	lig.LigGlyph = name

	ligatures = lookup.SubTable[0].ligatures
	first_name = glyph_names[0]
	try:
	ligatures[first_name].append(lig)
	except KeyError:
	ligatures[first_name] = [lig]

	lookup = get_gsub_ligature_lookup(font)
	cmap = get_font_cmap(font)
	for first_cp in sorted(keyed_ligatures):
	pairs = keyed_ligatures[first_cp]

	# Sort longest first, this ensures longer sequences with common prefixes
	# are handled before shorter ones. The secondary sort is a standard
	# sort on the codepoints in the sequence.
	pairs.sort(key = lambda pair: (-len(pair[0]), pair[0]))
	for seq, name in pairs:
	add_ligature(lookup, cmap, seq, name)

	def add_cmap_format_4(font):
	"""Add cmap format 4 table for Windows support, based on the
	format 12 cmap."""

	cmap = get_font_cmap(font)

	newtable = CmapSubtable.newSubtable(4)
	newtable.platformID = 3
	newtable.platEncID = 1
	newtable.language = 0

	# Format 4 only has unicode values 0x0000 to 0xFFFF
	newtable.cmap = {cp: name for cp, name in cmap.items() if cp <= 0xFFFF}

	font['cmap'].tables.append(newtable)

	def update_font_data(font, seq_to_advance, vadvance, aliases, add_cmap4, add_glyf):
	"""Update the font's cmap, hmtx, GSUB, and GlyphOrder tables."""
	seqs = get_all_seqs(font, seq_to_advance)
	add_glyph_data(font, seqs, seq_to_advance, vadvance, add_glyf)
	add_aliases_to_cmap(font, aliases)
	add_ligature_sequences(font, seqs, aliases)
	if add_cmap4:
	add_cmap_format_4(font)

	def apply_aliases(seq_dict, aliases):
	"""Aliases is a mapping from sequence to replacement sequence. We can use
	an alias if the target is a key in the dictionary. Furthermore, if the
	source is a key in the dictionary, we can delete it. This updates the
	dictionary and returns the usable aliases."""
	usable_aliases = {}
	for k, v in aliases.items():
	if v in seq_dict:
	usable_aliases[k] = v
	if k in seq_dict:
	del seq_dict[k]
	return usable_aliases


	def update_ttx(in_file, out_file, image_dirs, prefix, ext, aliases_file, add_cmap4, add_glyf):
	if ext != '.png':
	raise Exception('extension "%s" not supported' % ext)

	seq_to_file = collect_seq_to_file(image_dirs, prefix, ext)
	if not seq_to_file:
	raise ValueError(
	'no sequences with prefix "%s" and extension "%s" in %s' % (
	prefix, ext, ', '.join(image_dirs)))

	aliases = None
	if aliases_file:
	aliases = add_aliases.read_emoji_aliases(aliases_file)
	aliases = apply_aliases(seq_to_file, aliases)

	font = ttx.TTFont()
	font.importXML(in_file)

	lineheight = font['hhea'].ascent - font['hhea'].descent
	map_fn = get_png_file_to_advance_mapper(lineheight)
	seq_to_advance = remap_values(seq_to_file, map_fn)

	vadvance = font['vhea'].advanceHeightMax if 'vhea' in font else lineheight

	update_font_data(font, seq_to_advance, vadvance, aliases, add_cmap4, add_glyf)

	font.saveXML(out_file)


	def main():
	parser = argparse.ArgumentParser()
	parser.add_argument(
	'-f', '--in_file', help='ttx input file', metavar='file', required=True)
	parser.add_argument(
	'-o', '--out_file', help='ttx output file', metavar='file', required=True)
	parser.add_argument(
	'-d', '--image_dirs', help='directories containing image files',
	nargs='+', metavar='dir', required=True)
	parser.add_argument(
	'-p', '--prefix', help='file prefix (default "emoji_u")',
	metavar='pfx', default='emoji_u')
	parser.add_argument(
	'-e', '--ext', help='file extension (default ".png", currently only '
	'".png" is supported', metavar='ext', default='.png')
	parser.add_argument(
	'-a', '--aliases', help='process alias table', const='emoji_aliases.txt',
	nargs='?', metavar='file')
	parser.add_argument(
	'--add_cmap4', help='add cmap format 4 table', dest='add_cmap4', action='store_true')
	parser.add_argument(
	'--add_glyf', help='add glyf and loca tables', dest='add_glyf', action='store_true')
	args = parser.parse_args()

	update_ttx(
	args.in_file, args.out_file, args.image_dirs, args.prefix, args.ext,
	args.aliases, args.add_cmap4, args.add_glyf)


	if __name__ == '__main__':
	main()