| from __future__ import print_function, division, absolute_import |
| from __future__ import unicode_literals |
| from fontTools.feaLib.error import FeatureLibError |
| from fontTools.feaLib.lexer import Lexer, IncludingLexer, NonIncludingLexer |
| from fontTools.misc.encodingTools import getEncoding |
| from fontTools.misc.py23 import * |
| import fontTools.feaLib.ast as ast |
| import logging |
| import os |
| import re |
| |
| |
| log = logging.getLogger(__name__) |
| |
| |
| class Parser(object): |
| extensions = {} |
| ast = ast |
| SS_FEATURE_TAGS = {"ss%02d" % i for i in range(1, 20+1)} |
| CV_FEATURE_TAGS = {"cv%02d" % i for i in range(1, 99+1)} |
| |
| def __init__(self, featurefile, glyphNames=(), followIncludes=True, |
| **kwargs): |
| if "glyphMap" in kwargs: |
| from fontTools.misc.loggingTools import deprecateArgument |
| deprecateArgument("glyphMap", "use 'glyphNames' (iterable) instead") |
| if glyphNames: |
| raise TypeError("'glyphNames' and (deprecated) 'glyphMap' are " |
| "mutually exclusive") |
| glyphNames = kwargs.pop("glyphMap") |
| if kwargs: |
| raise TypeError("unsupported keyword argument%s: %s" |
| % ("" if len(kwargs) == 1 else "s", |
| ", ".join(repr(k) for k in kwargs))) |
| |
| self.glyphNames_ = set(glyphNames) |
| self.doc_ = self.ast.FeatureFile() |
| self.anchors_ = SymbolTable() |
| self.glyphclasses_ = SymbolTable() |
| self.lookups_ = SymbolTable() |
| self.valuerecords_ = SymbolTable() |
| self.symbol_tables_ = { |
| self.anchors_, self.valuerecords_ |
| } |
| self.next_token_type_, self.next_token_ = (None, None) |
| self.cur_comments_ = [] |
| self.next_token_location_ = None |
| lexerClass = IncludingLexer if followIncludes else NonIncludingLexer |
| self.lexer_ = lexerClass(featurefile) |
| self.advance_lexer_(comments=True) |
| |
| def parse(self): |
| statements = self.doc_.statements |
| while self.next_token_type_ is not None or self.cur_comments_: |
| self.advance_lexer_(comments=True) |
| if self.cur_token_type_ is Lexer.COMMENT: |
| statements.append( |
| self.ast.Comment(self.cur_token_, |
| location=self.cur_token_location_)) |
| elif self.is_cur_keyword_("include"): |
| statements.append(self.parse_include_()) |
| elif self.cur_token_type_ is Lexer.GLYPHCLASS: |
| statements.append(self.parse_glyphclass_definition_()) |
| elif self.is_cur_keyword_(("anon", "anonymous")): |
| statements.append(self.parse_anonymous_()) |
| elif self.is_cur_keyword_("anchorDef"): |
| statements.append(self.parse_anchordef_()) |
| elif self.is_cur_keyword_("languagesystem"): |
| statements.append(self.parse_languagesystem_()) |
| elif self.is_cur_keyword_("lookup"): |
| statements.append(self.parse_lookup_(vertical=False)) |
| elif self.is_cur_keyword_("markClass"): |
| statements.append(self.parse_markClass_()) |
| elif self.is_cur_keyword_("feature"): |
| statements.append(self.parse_feature_block_()) |
| elif self.is_cur_keyword_("table"): |
| statements.append(self.parse_table_()) |
| elif self.is_cur_keyword_("valueRecordDef"): |
| statements.append( |
| self.parse_valuerecord_definition_(vertical=False)) |
| elif self.cur_token_type_ is Lexer.NAME and self.cur_token_ in self.extensions: |
| statements.append(self.extensions[self.cur_token_](self)) |
| elif self.cur_token_type_ is Lexer.SYMBOL and self.cur_token_ == ";": |
| continue |
| else: |
| raise FeatureLibError( |
| "Expected feature, languagesystem, lookup, markClass, " |
| "table, or glyph class definition, got {} \"{}\"".format(self.cur_token_type_, self.cur_token_), |
| self.cur_token_location_) |
| return self.doc_ |
| |
| def parse_anchor_(self): |
| self.expect_symbol_("<") |
| self.expect_keyword_("anchor") |
| location = self.cur_token_location_ |
| |
| if self.next_token_ == "NULL": |
| self.expect_keyword_("NULL") |
| self.expect_symbol_(">") |
| return None |
| |
| if self.next_token_type_ == Lexer.NAME: |
| name = self.expect_name_() |
| anchordef = self.anchors_.resolve(name) |
| if anchordef is None: |
| raise FeatureLibError( |
| 'Unknown anchor "%s"' % name, |
| self.cur_token_location_) |
| self.expect_symbol_(">") |
| return self.ast.Anchor(anchordef.x, anchordef.y, |
| name=name, |
| contourpoint=anchordef.contourpoint, |
| xDeviceTable=None, yDeviceTable=None, |
| location=location) |
| |
| x, y = self.expect_number_(), self.expect_number_() |
| |
| contourpoint = None |
| if self.next_token_ == "contourpoint": |
| self.expect_keyword_("contourpoint") |
| contourpoint = self.expect_number_() |
| |
| if self.next_token_ == "<": |
| xDeviceTable = self.parse_device_() |
| yDeviceTable = self.parse_device_() |
| else: |
| xDeviceTable, yDeviceTable = None, None |
| |
| self.expect_symbol_(">") |
| return self.ast.Anchor(x, y, name=None, |
| contourpoint=contourpoint, |
| xDeviceTable=xDeviceTable, |
| yDeviceTable=yDeviceTable, |
| location=location) |
| |
| def parse_anchor_marks_(self): |
| """Parses a sequence of [<anchor> mark @MARKCLASS]*.""" |
| anchorMarks = [] # [(self.ast.Anchor, markClassName)*] |
| while self.next_token_ == "<": |
| anchor = self.parse_anchor_() |
| if anchor is None and self.next_token_ != "mark": |
| continue # <anchor NULL> without mark, eg. in GPOS type 5 |
| self.expect_keyword_("mark") |
| markClass = self.expect_markClass_reference_() |
| anchorMarks.append((anchor, markClass)) |
| return anchorMarks |
| |
| def parse_anchordef_(self): |
| assert self.is_cur_keyword_("anchorDef") |
| location = self.cur_token_location_ |
| x, y = self.expect_number_(), self.expect_number_() |
| contourpoint = None |
| if self.next_token_ == "contourpoint": |
| self.expect_keyword_("contourpoint") |
| contourpoint = self.expect_number_() |
| name = self.expect_name_() |
| self.expect_symbol_(";") |
| anchordef = self.ast.AnchorDefinition(name, x, y, |
| contourpoint=contourpoint, |
| location=location) |
| self.anchors_.define(name, anchordef) |
| return anchordef |
| |
| def parse_anonymous_(self): |
| assert self.is_cur_keyword_(("anon", "anonymous")) |
| tag = self.expect_tag_() |
| _, content, location = self.lexer_.scan_anonymous_block(tag) |
| self.advance_lexer_() |
| self.expect_symbol_('}') |
| end_tag = self.expect_tag_() |
| assert tag == end_tag, "bad splitting in Lexer.scan_anonymous_block()" |
| self.expect_symbol_(';') |
| return self.ast.AnonymousBlock(tag, content, location=location) |
| |
| def parse_attach_(self): |
| assert self.is_cur_keyword_("Attach") |
| location = self.cur_token_location_ |
| glyphs = self.parse_glyphclass_(accept_glyphname=True) |
| contourPoints = {self.expect_number_()} |
| while self.next_token_ != ";": |
| contourPoints.add(self.expect_number_()) |
| self.expect_symbol_(";") |
| return self.ast.AttachStatement(glyphs, contourPoints, |
| location=location) |
| |
| def parse_enumerate_(self, vertical): |
| assert self.cur_token_ in {"enumerate", "enum"} |
| self.advance_lexer_() |
| return self.parse_position_(enumerated=True, vertical=vertical) |
| |
| def parse_GlyphClassDef_(self): |
| """Parses 'GlyphClassDef @BASE, @LIGATURES, @MARKS, @COMPONENTS;'""" |
| assert self.is_cur_keyword_("GlyphClassDef") |
| location = self.cur_token_location_ |
| if self.next_token_ != ",": |
| baseGlyphs = self.parse_glyphclass_(accept_glyphname=False) |
| else: |
| baseGlyphs = None |
| self.expect_symbol_(",") |
| if self.next_token_ != ",": |
| ligatureGlyphs = self.parse_glyphclass_(accept_glyphname=False) |
| else: |
| ligatureGlyphs = None |
| self.expect_symbol_(",") |
| if self.next_token_ != ",": |
| markGlyphs = self.parse_glyphclass_(accept_glyphname=False) |
| else: |
| markGlyphs = None |
| self.expect_symbol_(",") |
| if self.next_token_ != ";": |
| componentGlyphs = self.parse_glyphclass_(accept_glyphname=False) |
| else: |
| componentGlyphs = None |
| self.expect_symbol_(";") |
| return self.ast.GlyphClassDefStatement(baseGlyphs, markGlyphs, |
| ligatureGlyphs, componentGlyphs, |
| location=location) |
| |
| def parse_glyphclass_definition_(self): |
| """Parses glyph class definitions such as '@UPPERCASE = [A-Z];'""" |
| location, name = self.cur_token_location_, self.cur_token_ |
| self.expect_symbol_("=") |
| glyphs = self.parse_glyphclass_(accept_glyphname=False) |
| self.expect_symbol_(";") |
| glyphclass = self.ast.GlyphClassDefinition(name, glyphs, |
| location=location) |
| self.glyphclasses_.define(name, glyphclass) |
| return glyphclass |
| |
| def split_glyph_range_(self, name, location): |
| # Since v1.20, the OpenType Feature File specification allows |
| # for dashes in glyph names. A sequence like "a-b-c-d" could |
| # therefore mean a single glyph whose name happens to be |
| # "a-b-c-d", or it could mean a range from glyph "a" to glyph |
| # "b-c-d", or a range from glyph "a-b" to glyph "c-d", or a |
| # range from glyph "a-b-c" to glyph "d".Technically, this |
| # example could be resolved because the (pretty complex) |
| # definition of glyph ranges renders most of these splits |
| # invalid. But the specification does not say that a compiler |
| # should try to apply such fancy heuristics. To encourage |
| # unambiguous feature files, we therefore try all possible |
| # splits and reject the feature file if there are multiple |
| # splits possible. It is intentional that we don't just emit a |
| # warning; warnings tend to get ignored. To fix the problem, |
| # font designers can trivially add spaces around the intended |
| # split point, and we emit a compiler error that suggests |
| # how exactly the source should be rewritten to make things |
| # unambiguous. |
| parts = name.split("-") |
| solutions = [] |
| for i in range(len(parts)): |
| start, limit = "-".join(parts[0:i]), "-".join(parts[i:]) |
| if start in self.glyphNames_ and limit in self.glyphNames_: |
| solutions.append((start, limit)) |
| if len(solutions) == 1: |
| start, limit = solutions[0] |
| return start, limit |
| elif len(solutions) == 0: |
| raise FeatureLibError( |
| "\"%s\" is not a glyph in the font, and it can not be split " |
| "into a range of known glyphs" % name, location) |
| else: |
| ranges = " or ".join(["\"%s - %s\"" % (s, l) for s, l in solutions]) |
| raise FeatureLibError( |
| "Ambiguous glyph range \"%s\"; " |
| "please use %s to clarify what you mean" % (name, ranges), |
| location) |
| |
| def parse_glyphclass_(self, accept_glyphname): |
| if (accept_glyphname and |
| self.next_token_type_ in (Lexer.NAME, Lexer.CID)): |
| glyph = self.expect_glyph_() |
| return self.ast.GlyphName(glyph, location=self.cur_token_location_) |
| if self.next_token_type_ is Lexer.GLYPHCLASS: |
| self.advance_lexer_() |
| gc = self.glyphclasses_.resolve(self.cur_token_) |
| if gc is None: |
| raise FeatureLibError( |
| "Unknown glyph class @%s" % self.cur_token_, |
| self.cur_token_location_) |
| if isinstance(gc, self.ast.MarkClass): |
| return self.ast.MarkClassName( |
| gc, location=self.cur_token_location_) |
| else: |
| return self.ast.GlyphClassName( |
| gc, location=self.cur_token_location_) |
| |
| self.expect_symbol_("[") |
| location = self.cur_token_location_ |
| glyphs = self.ast.GlyphClass(location=location) |
| while self.next_token_ != "]": |
| if self.next_token_type_ is Lexer.NAME: |
| glyph = self.expect_glyph_() |
| location = self.cur_token_location_ |
| if '-' in glyph and glyph not in self.glyphNames_: |
| start, limit = self.split_glyph_range_(glyph, location) |
| glyphs.add_range( |
| start, limit, |
| self.make_glyph_range_(location, start, limit)) |
| elif self.next_token_ == "-": |
| start = glyph |
| self.expect_symbol_("-") |
| limit = self.expect_glyph_() |
| glyphs.add_range( |
| start, limit, |
| self.make_glyph_range_(location, start, limit)) |
| else: |
| glyphs.append(glyph) |
| elif self.next_token_type_ is Lexer.CID: |
| glyph = self.expect_glyph_() |
| if self.next_token_ == "-": |
| range_location = self.cur_token_location_ |
| range_start = self.cur_token_ |
| self.expect_symbol_("-") |
| range_end = self.expect_cid_() |
| glyphs.add_cid_range(range_start, range_end, |
| self.make_cid_range_(range_location, |
| range_start, range_end)) |
| else: |
| glyphs.append("cid%05d" % self.cur_token_) |
| elif self.next_token_type_ is Lexer.GLYPHCLASS: |
| self.advance_lexer_() |
| gc = self.glyphclasses_.resolve(self.cur_token_) |
| if gc is None: |
| raise FeatureLibError( |
| "Unknown glyph class @%s" % self.cur_token_, |
| self.cur_token_location_) |
| if isinstance(gc, self.ast.MarkClass): |
| gc = self.ast.MarkClassName( |
| gc, location=self.cur_token_location_) |
| else: |
| gc = self.ast.GlyphClassName( |
| gc, location=self.cur_token_location_) |
| glyphs.add_class(gc) |
| else: |
| raise FeatureLibError( |
| "Expected glyph name, glyph range, " |
| "or glyph class reference", |
| self.next_token_location_) |
| self.expect_symbol_("]") |
| return glyphs |
| |
| def parse_class_name_(self): |
| name = self.expect_class_name_() |
| gc = self.glyphclasses_.resolve(name) |
| if gc is None: |
| raise FeatureLibError( |
| "Unknown glyph class @%s" % name, |
| self.cur_token_location_) |
| if isinstance(gc, self.ast.MarkClass): |
| return self.ast.MarkClassName( |
| gc, location=self.cur_token_location_) |
| else: |
| return self.ast.GlyphClassName( |
| gc, location=self.cur_token_location_) |
| |
| def parse_glyph_pattern_(self, vertical): |
| prefix, glyphs, lookups, values, suffix = ([], [], [], [], []) |
| hasMarks = False |
| while self.next_token_ not in {"by", "from", ";", ","}: |
| gc = self.parse_glyphclass_(accept_glyphname=True) |
| marked = False |
| if self.next_token_ == "'": |
| self.expect_symbol_("'") |
| hasMarks = marked = True |
| if marked: |
| if suffix: |
| # makeotf also reports this as an error, while FontForge |
| # silently inserts ' in all the intervening glyphs. |
| # https://github.com/fonttools/fonttools/pull/1096 |
| raise FeatureLibError( |
| "Unsupported contextual target sequence: at most " |
| "one run of marked (') glyph/class names allowed", |
| self.cur_token_location_) |
| glyphs.append(gc) |
| elif glyphs: |
| suffix.append(gc) |
| else: |
| prefix.append(gc) |
| |
| if self.is_next_value_(): |
| values.append(self.parse_valuerecord_(vertical)) |
| else: |
| values.append(None) |
| |
| lookup = None |
| if self.next_token_ == "lookup": |
| self.expect_keyword_("lookup") |
| if not marked: |
| raise FeatureLibError( |
| "Lookups can only follow marked glyphs", |
| self.cur_token_location_) |
| lookup_name = self.expect_name_() |
| lookup = self.lookups_.resolve(lookup_name) |
| if lookup is None: |
| raise FeatureLibError( |
| 'Unknown lookup "%s"' % lookup_name, |
| self.cur_token_location_) |
| if marked: |
| lookups.append(lookup) |
| |
| if not glyphs and not suffix: # eg., "sub f f i by" |
| assert lookups == [] |
| return ([], prefix, [None] * len(prefix), values, [], hasMarks) |
| else: |
| assert not any(values[:len(prefix)]), values |
| format1 = values[len(prefix):][:len(glyphs)] |
| format2 = values[(len(prefix) + len(glyphs)):][:len(suffix)] |
| values = format2 if format2 and isinstance(format2[0], self.ast.ValueRecord) else format1 |
| return (prefix, glyphs, lookups, values, suffix, hasMarks) |
| |
| def parse_chain_context_(self): |
| location = self.cur_token_location_ |
| prefix, glyphs, lookups, values, suffix, hasMarks = \ |
| self.parse_glyph_pattern_(vertical=False) |
| chainContext = [(prefix, glyphs, suffix)] |
| hasLookups = any(lookups) |
| while self.next_token_ == ",": |
| self.expect_symbol_(",") |
| prefix, glyphs, lookups, values, suffix, hasMarks = \ |
| self.parse_glyph_pattern_(vertical=False) |
| chainContext.append((prefix, glyphs, suffix)) |
| hasLookups = hasLookups or any(lookups) |
| self.expect_symbol_(";") |
| return chainContext, hasLookups |
| |
| def parse_ignore_(self): |
| assert self.is_cur_keyword_("ignore") |
| location = self.cur_token_location_ |
| self.advance_lexer_() |
| if self.cur_token_ in ["substitute", "sub"]: |
| chainContext, hasLookups = self.parse_chain_context_() |
| if hasLookups: |
| raise FeatureLibError( |
| "No lookups can be specified for \"ignore sub\"", |
| location) |
| return self.ast.IgnoreSubstStatement(chainContext, |
| location=location) |
| if self.cur_token_ in ["position", "pos"]: |
| chainContext, hasLookups = self.parse_chain_context_() |
| if hasLookups: |
| raise FeatureLibError( |
| "No lookups can be specified for \"ignore pos\"", |
| location) |
| return self.ast.IgnorePosStatement(chainContext, |
| location=location) |
| raise FeatureLibError( |
| "Expected \"substitute\" or \"position\"", |
| self.cur_token_location_) |
| |
| def parse_include_(self): |
| assert self.cur_token_ == "include" |
| location = self.cur_token_location_ |
| filename = self.expect_filename_() |
| # self.expect_symbol_(";") |
| return ast.IncludeStatement(filename, location=location) |
| |
| def parse_language_(self): |
| assert self.is_cur_keyword_("language") |
| location = self.cur_token_location_ |
| language = self.expect_language_tag_() |
| include_default, required = (True, False) |
| if self.next_token_ in {"exclude_dflt", "include_dflt"}: |
| include_default = (self.expect_name_() == "include_dflt") |
| if self.next_token_ == "required": |
| self.expect_keyword_("required") |
| required = True |
| self.expect_symbol_(";") |
| return self.ast.LanguageStatement(language, |
| include_default, required, |
| location=location) |
| |
| def parse_ligatureCaretByIndex_(self): |
| assert self.is_cur_keyword_("LigatureCaretByIndex") |
| location = self.cur_token_location_ |
| glyphs = self.parse_glyphclass_(accept_glyphname=True) |
| carets = [self.expect_number_()] |
| while self.next_token_ != ";": |
| carets.append(self.expect_number_()) |
| self.expect_symbol_(";") |
| return self.ast.LigatureCaretByIndexStatement(glyphs, carets, |
| location=location) |
| |
| def parse_ligatureCaretByPos_(self): |
| assert self.is_cur_keyword_("LigatureCaretByPos") |
| location = self.cur_token_location_ |
| glyphs = self.parse_glyphclass_(accept_glyphname=True) |
| carets = [self.expect_number_()] |
| while self.next_token_ != ";": |
| carets.append(self.expect_number_()) |
| self.expect_symbol_(";") |
| return self.ast.LigatureCaretByPosStatement(glyphs, carets, |
| location=location) |
| |
| def parse_lookup_(self, vertical): |
| assert self.is_cur_keyword_("lookup") |
| location, name = self.cur_token_location_, self.expect_name_() |
| |
| if self.next_token_ == ";": |
| lookup = self.lookups_.resolve(name) |
| if lookup is None: |
| raise FeatureLibError("Unknown lookup \"%s\"" % name, |
| self.cur_token_location_) |
| self.expect_symbol_(";") |
| return self.ast.LookupReferenceStatement(lookup, |
| location=location) |
| |
| use_extension = False |
| if self.next_token_ == "useExtension": |
| self.expect_keyword_("useExtension") |
| use_extension = True |
| |
| block = self.ast.LookupBlock(name, use_extension, location=location) |
| self.parse_block_(block, vertical) |
| self.lookups_.define(name, block) |
| return block |
| |
| def parse_lookupflag_(self): |
| assert self.is_cur_keyword_("lookupflag") |
| location = self.cur_token_location_ |
| |
| # format B: "lookupflag 6;" |
| if self.next_token_type_ == Lexer.NUMBER: |
| value = self.expect_number_() |
| self.expect_symbol_(";") |
| return self.ast.LookupFlagStatement(value, location=location) |
| |
| # format A: "lookupflag RightToLeft MarkAttachmentType @M;" |
| value_seen = False |
| value, markAttachment, markFilteringSet = 0, None, None |
| flags = { |
| "RightToLeft": 1, "IgnoreBaseGlyphs": 2, |
| "IgnoreLigatures": 4, "IgnoreMarks": 8 |
| } |
| seen = set() |
| while self.next_token_ != ";": |
| if self.next_token_ in seen: |
| raise FeatureLibError( |
| "%s can be specified only once" % self.next_token_, |
| self.next_token_location_) |
| seen.add(self.next_token_) |
| if self.next_token_ == "MarkAttachmentType": |
| self.expect_keyword_("MarkAttachmentType") |
| markAttachment = self.parse_class_name_() |
| elif self.next_token_ == "UseMarkFilteringSet": |
| self.expect_keyword_("UseMarkFilteringSet") |
| markFilteringSet = self.parse_class_name_() |
| elif self.next_token_ in flags: |
| value_seen = True |
| value = value | flags[self.expect_name_()] |
| else: |
| raise FeatureLibError( |
| '"%s" is not a recognized lookupflag' % self.next_token_, |
| self.next_token_location_) |
| self.expect_symbol_(";") |
| |
| if not any([value_seen, markAttachment, markFilteringSet]): |
| raise FeatureLibError( |
| 'lookupflag must have a value', self.next_token_location_) |
| |
| return self.ast.LookupFlagStatement(value, |
| markAttachment=markAttachment, |
| markFilteringSet=markFilteringSet, |
| location=location) |
| |
| def parse_markClass_(self): |
| assert self.is_cur_keyword_("markClass") |
| location = self.cur_token_location_ |
| glyphs = self.parse_glyphclass_(accept_glyphname=True) |
| anchor = self.parse_anchor_() |
| name = self.expect_class_name_() |
| self.expect_symbol_(";") |
| markClass = self.doc_.markClasses.get(name) |
| if markClass is None: |
| markClass = self.ast.MarkClass(name) |
| self.doc_.markClasses[name] = markClass |
| self.glyphclasses_.define(name, markClass) |
| mcdef = self.ast.MarkClassDefinition(markClass, anchor, glyphs, |
| location=location) |
| markClass.addDefinition(mcdef) |
| return mcdef |
| |
| def parse_position_(self, enumerated, vertical): |
| assert self.cur_token_ in {"position", "pos"} |
| if self.next_token_ == "cursive": # GPOS type 3 |
| return self.parse_position_cursive_(enumerated, vertical) |
| elif self.next_token_ == "base": # GPOS type 4 |
| return self.parse_position_base_(enumerated, vertical) |
| elif self.next_token_ == "ligature": # GPOS type 5 |
| return self.parse_position_ligature_(enumerated, vertical) |
| elif self.next_token_ == "mark": # GPOS type 6 |
| return self.parse_position_mark_(enumerated, vertical) |
| |
| location = self.cur_token_location_ |
| prefix, glyphs, lookups, values, suffix, hasMarks = \ |
| self.parse_glyph_pattern_(vertical) |
| self.expect_symbol_(";") |
| |
| if any(lookups): |
| # GPOS type 8: Chaining contextual positioning; explicit lookups |
| if any(values): |
| raise FeatureLibError( |
| "If \"lookup\" is present, no values must be specified", |
| location) |
| return self.ast.ChainContextPosStatement( |
| prefix, glyphs, suffix, lookups, location=location) |
| |
| # Pair positioning, format A: "pos V 10 A -10;" |
| # Pair positioning, format B: "pos V A -20;" |
| if not prefix and not suffix and len(glyphs) == 2 and not hasMarks: |
| if values[0] is None: # Format B: "pos V A -20;" |
| values.reverse() |
| return self.ast.PairPosStatement( |
| glyphs[0], values[0], glyphs[1], values[1], |
| enumerated=enumerated, |
| location=location) |
| |
| if enumerated: |
| raise FeatureLibError( |
| '"enumerate" is only allowed with pair positionings', location) |
| return self.ast.SinglePosStatement(list(zip(glyphs, values)), |
| prefix, suffix, forceChain=hasMarks, |
| location=location) |
| |
| def parse_position_cursive_(self, enumerated, vertical): |
| location = self.cur_token_location_ |
| self.expect_keyword_("cursive") |
| if enumerated: |
| raise FeatureLibError( |
| '"enumerate" is not allowed with ' |
| 'cursive attachment positioning', |
| location) |
| glyphclass = self.parse_glyphclass_(accept_glyphname=True) |
| entryAnchor = self.parse_anchor_() |
| exitAnchor = self.parse_anchor_() |
| self.expect_symbol_(";") |
| return self.ast.CursivePosStatement( |
| glyphclass, entryAnchor, exitAnchor, location=location) |
| |
| def parse_position_base_(self, enumerated, vertical): |
| location = self.cur_token_location_ |
| self.expect_keyword_("base") |
| if enumerated: |
| raise FeatureLibError( |
| '"enumerate" is not allowed with ' |
| 'mark-to-base attachment positioning', |
| location) |
| base = self.parse_glyphclass_(accept_glyphname=True) |
| marks = self.parse_anchor_marks_() |
| self.expect_symbol_(";") |
| return self.ast.MarkBasePosStatement(base, marks, location=location) |
| |
| def parse_position_ligature_(self, enumerated, vertical): |
| location = self.cur_token_location_ |
| self.expect_keyword_("ligature") |
| if enumerated: |
| raise FeatureLibError( |
| '"enumerate" is not allowed with ' |
| 'mark-to-ligature attachment positioning', |
| location) |
| ligatures = self.parse_glyphclass_(accept_glyphname=True) |
| marks = [self.parse_anchor_marks_()] |
| while self.next_token_ == "ligComponent": |
| self.expect_keyword_("ligComponent") |
| marks.append(self.parse_anchor_marks_()) |
| self.expect_symbol_(";") |
| return self.ast.MarkLigPosStatement(ligatures, marks, location=location) |
| |
| def parse_position_mark_(self, enumerated, vertical): |
| location = self.cur_token_location_ |
| self.expect_keyword_("mark") |
| if enumerated: |
| raise FeatureLibError( |
| '"enumerate" is not allowed with ' |
| 'mark-to-mark attachment positioning', |
| location) |
| baseMarks = self.parse_glyphclass_(accept_glyphname=True) |
| marks = self.parse_anchor_marks_() |
| self.expect_symbol_(";") |
| return self.ast.MarkMarkPosStatement(baseMarks, marks, |
| location=location) |
| |
| def parse_script_(self): |
| assert self.is_cur_keyword_("script") |
| location, script = self.cur_token_location_, self.expect_script_tag_() |
| self.expect_symbol_(";") |
| return self.ast.ScriptStatement(script, location=location) |
| |
| def parse_substitute_(self): |
| assert self.cur_token_ in {"substitute", "sub", "reversesub", "rsub"} |
| location = self.cur_token_location_ |
| reverse = self.cur_token_ in {"reversesub", "rsub"} |
| old_prefix, old, lookups, values, old_suffix, hasMarks = \ |
| self.parse_glyph_pattern_(vertical=False) |
| if any(values): |
| raise FeatureLibError( |
| "Substitution statements cannot contain values", location) |
| new = [] |
| if self.next_token_ == "by": |
| keyword = self.expect_keyword_("by") |
| while self.next_token_ != ";": |
| gc = self.parse_glyphclass_(accept_glyphname=True) |
| new.append(gc) |
| elif self.next_token_ == "from": |
| keyword = self.expect_keyword_("from") |
| new = [self.parse_glyphclass_(accept_glyphname=False)] |
| else: |
| keyword = None |
| self.expect_symbol_(";") |
| if len(new) is 0 and not any(lookups): |
| raise FeatureLibError( |
| 'Expected "by", "from" or explicit lookup references', |
| self.cur_token_location_) |
| |
| # GSUB lookup type 3: Alternate substitution. |
| # Format: "substitute a from [a.1 a.2 a.3];" |
| if keyword == "from": |
| if reverse: |
| raise FeatureLibError( |
| 'Reverse chaining substitutions do not support "from"', |
| location) |
| if len(old) != 1 or len(old[0].glyphSet()) != 1: |
| raise FeatureLibError( |
| 'Expected a single glyph before "from"', |
| location) |
| if len(new) != 1: |
| raise FeatureLibError( |
| 'Expected a single glyphclass after "from"', |
| location) |
| return self.ast.AlternateSubstStatement( |
| old_prefix, old[0], old_suffix, new[0], location=location) |
| |
| num_lookups = len([l for l in lookups if l is not None]) |
| |
| # GSUB lookup type 1: Single substitution. |
| # Format A: "substitute a by a.sc;" |
| # Format B: "substitute [one.fitted one.oldstyle] by one;" |
| # Format C: "substitute [a-d] by [A.sc-D.sc];" |
| if (not reverse and len(old) == 1 and len(new) == 1 and |
| num_lookups == 0): |
| glyphs = list(old[0].glyphSet()) |
| replacements = list(new[0].glyphSet()) |
| if len(replacements) == 1: |
| replacements = replacements * len(glyphs) |
| if len(glyphs) != len(replacements): |
| raise FeatureLibError( |
| 'Expected a glyph class with %d elements after "by", ' |
| 'but found a glyph class with %d elements' % |
| (len(glyphs), len(replacements)), location) |
| return self.ast.SingleSubstStatement( |
| old, new, |
| old_prefix, old_suffix, |
| forceChain=hasMarks, |
| location=location |
| ) |
| |
| # GSUB lookup type 2: Multiple substitution. |
| # Format: "substitute f_f_i by f f i;" |
| if (not reverse and |
| len(old) == 1 and len(old[0].glyphSet()) == 1 and |
| len(new) > 1 and max([len(n.glyphSet()) for n in new]) == 1 and |
| num_lookups == 0): |
| return self.ast.MultipleSubstStatement( |
| old_prefix, tuple(old[0].glyphSet())[0], old_suffix, |
| tuple([list(n.glyphSet())[0] for n in new]), |
| forceChain=hasMarks, location=location) |
| |
| # GSUB lookup type 4: Ligature substitution. |
| # Format: "substitute f f i by f_f_i;" |
| if (not reverse and |
| len(old) > 1 and len(new) == 1 and |
| len(new[0].glyphSet()) == 1 and |
| num_lookups == 0): |
| return self.ast.LigatureSubstStatement( |
| old_prefix, old, old_suffix, |
| list(new[0].glyphSet())[0], forceChain=hasMarks, |
| location=location) |
| |
| # GSUB lookup type 8: Reverse chaining substitution. |
| if reverse: |
| if len(old) != 1: |
| raise FeatureLibError( |
| "In reverse chaining single substitutions, " |
| "only a single glyph or glyph class can be replaced", |
| location) |
| if len(new) != 1: |
| raise FeatureLibError( |
| 'In reverse chaining single substitutions, ' |
| 'the replacement (after "by") must be a single glyph ' |
| 'or glyph class', location) |
| if num_lookups != 0: |
| raise FeatureLibError( |
| "Reverse chaining substitutions cannot call named lookups", |
| location) |
| glyphs = sorted(list(old[0].glyphSet())) |
| replacements = sorted(list(new[0].glyphSet())) |
| if len(replacements) == 1: |
| replacements = replacements * len(glyphs) |
| if len(glyphs) != len(replacements): |
| raise FeatureLibError( |
| 'Expected a glyph class with %d elements after "by", ' |
| 'but found a glyph class with %d elements' % |
| (len(glyphs), len(replacements)), location) |
| return self.ast.ReverseChainSingleSubstStatement( |
| old_prefix, old_suffix, old, new, location=location) |
| |
| if len(old) > 1 and len(new) > 1: |
| raise FeatureLibError( |
| 'Direct substitution of multiple glyphs by multiple glyphs ' |
| 'is not supported', |
| location) |
| |
| # GSUB lookup type 6: Chaining contextual substitution. |
| assert len(new) == 0, new |
| rule = self.ast.ChainContextSubstStatement( |
| old_prefix, old, old_suffix, lookups, location=location) |
| return rule |
| |
| def parse_subtable_(self): |
| assert self.is_cur_keyword_("subtable") |
| location = self.cur_token_location_ |
| self.expect_symbol_(";") |
| return self.ast.SubtableStatement(location=location) |
| |
| def parse_size_parameters_(self): |
| assert self.is_cur_keyword_("parameters") |
| location = self.cur_token_location_ |
| DesignSize = self.expect_decipoint_() |
| SubfamilyID = self.expect_number_() |
| RangeStart = 0 |
| RangeEnd = 0 |
| if self.next_token_type_ in (Lexer.NUMBER, Lexer.FLOAT) or \ |
| SubfamilyID != 0: |
| RangeStart = self.expect_decipoint_() |
| RangeEnd = self.expect_decipoint_() |
| |
| self.expect_symbol_(";") |
| return self.ast.SizeParameters(DesignSize, SubfamilyID, |
| RangeStart, RangeEnd, |
| location=location) |
| |
| def parse_size_menuname_(self): |
| assert self.is_cur_keyword_("sizemenuname") |
| location = self.cur_token_location_ |
| platformID, platEncID, langID, string = self.parse_name_() |
| return self.ast.FeatureNameStatement("size", platformID, |
| platEncID, langID, string, |
| location=location) |
| |
| def parse_table_(self): |
| assert self.is_cur_keyword_("table") |
| location, name = self.cur_token_location_, self.expect_tag_() |
| table = self.ast.TableBlock(name, location=location) |
| self.expect_symbol_("{") |
| handler = { |
| "GDEF": self.parse_table_GDEF_, |
| "head": self.parse_table_head_, |
| "hhea": self.parse_table_hhea_, |
| "vhea": self.parse_table_vhea_, |
| "name": self.parse_table_name_, |
| "BASE": self.parse_table_BASE_, |
| "OS/2": self.parse_table_OS_2_, |
| }.get(name) |
| if handler: |
| handler(table) |
| else: |
| raise FeatureLibError('"table %s" is not supported' % name.strip(), |
| location) |
| self.expect_symbol_("}") |
| end_tag = self.expect_tag_() |
| if end_tag != name: |
| raise FeatureLibError('Expected "%s"' % name.strip(), |
| self.cur_token_location_) |
| self.expect_symbol_(";") |
| return table |
| |
| def parse_table_GDEF_(self, table): |
| statements = table.statements |
| while self.next_token_ != "}" or self.cur_comments_: |
| self.advance_lexer_(comments=True) |
| if self.cur_token_type_ is Lexer.COMMENT: |
| statements.append(self.ast.Comment( |
| self.cur_token_, location=self.cur_token_location_)) |
| elif self.is_cur_keyword_("Attach"): |
| statements.append(self.parse_attach_()) |
| elif self.is_cur_keyword_("GlyphClassDef"): |
| statements.append(self.parse_GlyphClassDef_()) |
| elif self.is_cur_keyword_("LigatureCaretByIndex"): |
| statements.append(self.parse_ligatureCaretByIndex_()) |
| elif self.is_cur_keyword_("LigatureCaretByPos"): |
| statements.append(self.parse_ligatureCaretByPos_()) |
| elif self.cur_token_ == ";": |
| continue |
| else: |
| raise FeatureLibError( |
| "Expected Attach, LigatureCaretByIndex, " |
| "or LigatureCaretByPos", |
| self.cur_token_location_) |
| |
| def parse_table_head_(self, table): |
| statements = table.statements |
| while self.next_token_ != "}" or self.cur_comments_: |
| self.advance_lexer_(comments=True) |
| if self.cur_token_type_ is Lexer.COMMENT: |
| statements.append(self.ast.Comment( |
| self.cur_token_, location=self.cur_token_location_)) |
| elif self.is_cur_keyword_("FontRevision"): |
| statements.append(self.parse_FontRevision_()) |
| elif self.cur_token_ == ";": |
| continue |
| else: |
| raise FeatureLibError("Expected FontRevision", |
| self.cur_token_location_) |
| |
| def parse_table_hhea_(self, table): |
| statements = table.statements |
| fields = ("CaretOffset", "Ascender", "Descender", "LineGap") |
| while self.next_token_ != "}" or self.cur_comments_: |
| self.advance_lexer_(comments=True) |
| if self.cur_token_type_ is Lexer.COMMENT: |
| statements.append(self.ast.Comment( |
| self.cur_token_, location=self.cur_token_location_)) |
| elif self.cur_token_type_ is Lexer.NAME and self.cur_token_ in fields: |
| key = self.cur_token_.lower() |
| value = self.expect_number_() |
| statements.append( |
| self.ast.HheaField(key, value, |
| location=self.cur_token_location_)) |
| if self.next_token_ != ";": |
| raise FeatureLibError("Incomplete statement", self.next_token_location_) |
| elif self.cur_token_ == ";": |
| continue |
| else: |
| raise FeatureLibError("Expected CaretOffset, Ascender, " |
| "Descender or LineGap", |
| self.cur_token_location_) |
| |
| def parse_table_vhea_(self, table): |
| statements = table.statements |
| fields = ("VertTypoAscender", "VertTypoDescender", "VertTypoLineGap") |
| while self.next_token_ != "}" or self.cur_comments_: |
| self.advance_lexer_(comments=True) |
| if self.cur_token_type_ is Lexer.COMMENT: |
| statements.append(self.ast.Comment( |
| self.cur_token_, location=self.cur_token_location_)) |
| elif self.cur_token_type_ is Lexer.NAME and self.cur_token_ in fields: |
| key = self.cur_token_.lower() |
| value = self.expect_number_() |
| statements.append( |
| self.ast.VheaField(key, value, |
| location=self.cur_token_location_)) |
| if self.next_token_ != ";": |
| raise FeatureLibError("Incomplete statement", self.next_token_location_) |
| elif self.cur_token_ == ";": |
| continue |
| else: |
| raise FeatureLibError("Expected VertTypoAscender, " |
| "VertTypoDescender or VertTypoLineGap", |
| self.cur_token_location_) |
| |
| def parse_table_name_(self, table): |
| statements = table.statements |
| while self.next_token_ != "}" or self.cur_comments_: |
| self.advance_lexer_(comments=True) |
| if self.cur_token_type_ is Lexer.COMMENT: |
| statements.append(self.ast.Comment( |
| self.cur_token_, location=self.cur_token_location_)) |
| elif self.is_cur_keyword_("nameid"): |
| statement = self.parse_nameid_() |
| if statement: |
| statements.append(statement) |
| elif self.cur_token_ == ";": |
| continue |
| else: |
| raise FeatureLibError("Expected nameid", |
| self.cur_token_location_) |
| |
| def parse_name_(self): |
| platEncID = None |
| langID = None |
| if self.next_token_type_ == Lexer.NUMBER: |
| platformID = self.expect_number_() |
| location = self.cur_token_location_ |
| if platformID not in (1, 3): |
| raise FeatureLibError("Expected platform id 1 or 3", location) |
| if self.next_token_type_ == Lexer.NUMBER: |
| platEncID = self.expect_number_() |
| langID = self.expect_number_() |
| else: |
| platformID = 3 |
| location = self.cur_token_location_ |
| |
| if platformID == 1: # Macintosh |
| platEncID = platEncID or 0 # Roman |
| langID = langID or 0 # English |
| else: # 3, Windows |
| platEncID = platEncID or 1 # Unicode |
| langID = langID or 0x0409 # English |
| |
| string = self.expect_string_() |
| self.expect_symbol_(";") |
| |
| encoding = getEncoding(platformID, platEncID, langID) |
| if encoding is None: |
| raise FeatureLibError("Unsupported encoding", location) |
| unescaped = self.unescape_string_(string, encoding) |
| return platformID, platEncID, langID, unescaped |
| |
| def parse_nameid_(self): |
| assert self.cur_token_ == "nameid", self.cur_token_ |
| location, nameID = self.cur_token_location_, self.expect_number_() |
| if nameID > 32767: |
| raise FeatureLibError("Name id value cannot be greater than 32767", |
| self.cur_token_location_) |
| if 1 <= nameID <= 6: |
| log.warning("Name id %d cannot be set from the feature file. " |
| "Ignoring record" % nameID) |
| self.parse_name_() # skip to the next record |
| return None |
| |
| platformID, platEncID, langID, string = self.parse_name_() |
| return self.ast.NameRecord(nameID, platformID, platEncID, |
| langID, string, location=location) |
| |
| def unescape_string_(self, string, encoding): |
| if encoding == "utf_16_be": |
| s = re.sub(r"\\[0-9a-fA-F]{4}", self.unescape_unichr_, string) |
| else: |
| unescape = lambda m: self.unescape_byte_(m, encoding) |
| s = re.sub(r"\\[0-9a-fA-F]{2}", unescape, string) |
| # We now have a Unicode string, but it might contain surrogate pairs. |
| # We convert surrogates to actual Unicode by round-tripping through |
| # Python's UTF-16 codec in a special mode. |
| utf16 = tobytes(s, "utf_16_be", "surrogatepass") |
| return tounicode(utf16, "utf_16_be") |
| |
| @staticmethod |
| def unescape_unichr_(match): |
| n = match.group(0)[1:] |
| return unichr(int(n, 16)) |
| |
| @staticmethod |
| def unescape_byte_(match, encoding): |
| n = match.group(0)[1:] |
| return bytechr(int(n, 16)).decode(encoding) |
| |
| def parse_table_BASE_(self, table): |
| statements = table.statements |
| while self.next_token_ != "}" or self.cur_comments_: |
| self.advance_lexer_(comments=True) |
| if self.cur_token_type_ is Lexer.COMMENT: |
| statements.append(self.ast.Comment( |
| self.cur_token_, location=self.cur_token_location_)) |
| elif self.is_cur_keyword_("HorizAxis.BaseTagList"): |
| horiz_bases = self.parse_base_tag_list_() |
| elif self.is_cur_keyword_("HorizAxis.BaseScriptList"): |
| horiz_scripts = self.parse_base_script_list_(len(horiz_bases)) |
| statements.append( |
| self.ast.BaseAxis(horiz_bases, |
| horiz_scripts, False, |
| location=self.cur_token_location_)) |
| elif self.is_cur_keyword_("VertAxis.BaseTagList"): |
| vert_bases = self.parse_base_tag_list_() |
| elif self.is_cur_keyword_("VertAxis.BaseScriptList"): |
| vert_scripts = self.parse_base_script_list_(len(vert_bases)) |
| statements.append( |
| self.ast.BaseAxis(vert_bases, |
| vert_scripts, True, |
| location=self.cur_token_location_)) |
| elif self.cur_token_ == ";": |
| continue |
| |
| def parse_table_OS_2_(self, table): |
| statements = table.statements |
| numbers = ("FSType", "TypoAscender", "TypoDescender", "TypoLineGap", |
| "winAscent", "winDescent", "XHeight", "CapHeight", |
| "WeightClass", "WidthClass", "LowerOpSize", "UpperOpSize") |
| ranges = ("UnicodeRange", "CodePageRange") |
| while self.next_token_ != "}" or self.cur_comments_: |
| self.advance_lexer_(comments=True) |
| if self.cur_token_type_ is Lexer.COMMENT: |
| statements.append(self.ast.Comment( |
| self.cur_token_, location=self.cur_token_location_)) |
| elif self.cur_token_type_ is Lexer.NAME: |
| key = self.cur_token_.lower() |
| value = None |
| if self.cur_token_ in numbers: |
| value = self.expect_number_() |
| elif self.is_cur_keyword_("Panose"): |
| value = [] |
| for i in range(10): |
| value.append(self.expect_number_()) |
| elif self.cur_token_ in ranges: |
| value = [] |
| while self.next_token_ != ";": |
| value.append(self.expect_number_()) |
| elif self.is_cur_keyword_("Vendor"): |
| value = self.expect_string_() |
| statements.append( |
| self.ast.OS2Field(key, value, |
| location=self.cur_token_location_)) |
| elif self.cur_token_ == ";": |
| continue |
| |
| def parse_base_tag_list_(self): |
| assert self.cur_token_ in ("HorizAxis.BaseTagList", |
| "VertAxis.BaseTagList"), self.cur_token_ |
| bases = [] |
| while self.next_token_ != ";": |
| bases.append(self.expect_script_tag_()) |
| self.expect_symbol_(";") |
| return bases |
| |
| def parse_base_script_list_(self, count): |
| assert self.cur_token_ in ("HorizAxis.BaseScriptList", |
| "VertAxis.BaseScriptList"), self.cur_token_ |
| scripts = [(self.parse_base_script_record_(count))] |
| while self.next_token_ == ",": |
| self.expect_symbol_(",") |
| scripts.append(self.parse_base_script_record_(count)) |
| self.expect_symbol_(";") |
| return scripts |
| |
| def parse_base_script_record_(self, count): |
| script_tag = self.expect_script_tag_() |
| base_tag = self.expect_script_tag_() |
| coords = [self.expect_number_() for i in range(count)] |
| return script_tag, base_tag, coords |
| |
| def parse_device_(self): |
| result = None |
| self.expect_symbol_("<") |
| self.expect_keyword_("device") |
| if self.next_token_ == "NULL": |
| self.expect_keyword_("NULL") |
| else: |
| result = [(self.expect_number_(), self.expect_number_())] |
| while self.next_token_ == ",": |
| self.expect_symbol_(",") |
| result.append((self.expect_number_(), self.expect_number_())) |
| result = tuple(result) # make it hashable |
| self.expect_symbol_(">") |
| return result |
| |
| def is_next_value_(self): |
| return self.next_token_type_ is Lexer.NUMBER or self.next_token_ == "<" |
| |
| def parse_valuerecord_(self, vertical): |
| if self.next_token_type_ is Lexer.NUMBER: |
| number, location = self.expect_number_(), self.cur_token_location_ |
| if vertical: |
| val = self.ast.ValueRecord(yAdvance=number, |
| vertical=vertical, |
| location=location) |
| else: |
| val = self.ast.ValueRecord(xAdvance=number, |
| vertical=vertical, |
| location=location) |
| return val |
| self.expect_symbol_("<") |
| location = self.cur_token_location_ |
| if self.next_token_type_ is Lexer.NAME: |
| name = self.expect_name_() |
| if name == "NULL": |
| self.expect_symbol_(">") |
| return self.ast.ValueRecord() |
| vrd = self.valuerecords_.resolve(name) |
| if vrd is None: |
| raise FeatureLibError("Unknown valueRecordDef \"%s\"" % name, |
| self.cur_token_location_) |
| value = vrd.value |
| xPlacement, yPlacement = (value.xPlacement, value.yPlacement) |
| xAdvance, yAdvance = (value.xAdvance, value.yAdvance) |
| else: |
| xPlacement, yPlacement, xAdvance, yAdvance = ( |
| self.expect_number_(), self.expect_number_(), |
| self.expect_number_(), self.expect_number_()) |
| |
| if self.next_token_ == "<": |
| xPlaDevice, yPlaDevice, xAdvDevice, yAdvDevice = ( |
| self.parse_device_(), self.parse_device_(), |
| self.parse_device_(), self.parse_device_()) |
| allDeltas = sorted([ |
| delta |
| for size, delta |
| in (xPlaDevice if xPlaDevice else ()) + |
| (yPlaDevice if yPlaDevice else ()) + |
| (xAdvDevice if xAdvDevice else ()) + |
| (yAdvDevice if yAdvDevice else ())]) |
| if allDeltas[0] < -128 or allDeltas[-1] > 127: |
| raise FeatureLibError( |
| "Device value out of valid range (-128..127)", |
| self.cur_token_location_) |
| else: |
| xPlaDevice, yPlaDevice, xAdvDevice, yAdvDevice = ( |
| None, None, None, None) |
| |
| self.expect_symbol_(">") |
| return self.ast.ValueRecord( |
| xPlacement, yPlacement, xAdvance, yAdvance, |
| xPlaDevice, yPlaDevice, xAdvDevice, yAdvDevice, |
| vertical=vertical, location=location) |
| |
| def parse_valuerecord_definition_(self, vertical): |
| assert self.is_cur_keyword_("valueRecordDef") |
| location = self.cur_token_location_ |
| value = self.parse_valuerecord_(vertical) |
| name = self.expect_name_() |
| self.expect_symbol_(";") |
| vrd = self.ast.ValueRecordDefinition(name, value, location=location) |
| self.valuerecords_.define(name, vrd) |
| return vrd |
| |
| def parse_languagesystem_(self): |
| assert self.cur_token_ == "languagesystem" |
| location = self.cur_token_location_ |
| script = self.expect_script_tag_() |
| language = self.expect_language_tag_() |
| self.expect_symbol_(";") |
| return self.ast.LanguageSystemStatement(script, language, |
| location=location) |
| |
| def parse_feature_block_(self): |
| assert self.cur_token_ == "feature" |
| location = self.cur_token_location_ |
| tag = self.expect_tag_() |
| vertical = (tag in {"vkrn", "vpal", "vhal", "valt"}) |
| |
| stylisticset = None |
| cv_feature = None |
| size_feature = False |
| if tag in self.SS_FEATURE_TAGS: |
| stylisticset = tag |
| elif tag in self.CV_FEATURE_TAGS: |
| cv_feature = tag |
| elif tag == "size": |
| size_feature = True |
| |
| use_extension = False |
| if self.next_token_ == "useExtension": |
| self.expect_keyword_("useExtension") |
| use_extension = True |
| |
| block = self.ast.FeatureBlock(tag, use_extension=use_extension, |
| location=location) |
| self.parse_block_(block, vertical, stylisticset, size_feature, |
| cv_feature) |
| return block |
| |
| def parse_feature_reference_(self): |
| assert self.cur_token_ == "feature", self.cur_token_ |
| location = self.cur_token_location_ |
| featureName = self.expect_tag_() |
| self.expect_symbol_(";") |
| return self.ast.FeatureReferenceStatement(featureName, |
| location=location) |
| |
| def parse_featureNames_(self, tag): |
| assert self.cur_token_ == "featureNames", self.cur_token_ |
| block = self.ast.NestedBlock(tag, self.cur_token_, |
| location=self.cur_token_location_) |
| self.expect_symbol_("{") |
| for symtab in self.symbol_tables_: |
| symtab.enter_scope() |
| while self.next_token_ != "}" or self.cur_comments_: |
| self.advance_lexer_(comments=True) |
| if self.cur_token_type_ is Lexer.COMMENT: |
| block.statements.append(self.ast.Comment( |
| self.cur_token_, location=self.cur_token_location_)) |
| elif self.is_cur_keyword_("name"): |
| location = self.cur_token_location_ |
| platformID, platEncID, langID, string = self.parse_name_() |
| block.statements.append( |
| self.ast.FeatureNameStatement(tag, platformID, |
| platEncID, langID, string, |
| location=location)) |
| elif self.cur_token_ == ";": |
| continue |
| else: |
| raise FeatureLibError('Expected "name"', |
| self.cur_token_location_) |
| self.expect_symbol_("}") |
| for symtab in self.symbol_tables_: |
| symtab.exit_scope() |
| self.expect_symbol_(";") |
| return block |
| |
| def parse_cvParameters_(self, tag): |
| assert self.cur_token_ == "cvParameters", self.cur_token_ |
| block = self.ast.NestedBlock(tag, self.cur_token_, |
| location=self.cur_token_location_) |
| self.expect_symbol_("{") |
| for symtab in self.symbol_tables_: |
| symtab.enter_scope() |
| |
| statements = block.statements |
| while self.next_token_ != "}" or self.cur_comments_: |
| self.advance_lexer_(comments=True) |
| if self.cur_token_type_ is Lexer.COMMENT: |
| statements.append(self.ast.Comment( |
| self.cur_token_, location=self.cur_token_location_)) |
| elif self.is_cur_keyword_({"FeatUILabelNameID", |
| "FeatUITooltipTextNameID", |
| "SampleTextNameID", |
| "ParamUILabelNameID"}): |
| statements.append(self.parse_cvNameIDs_(tag, self.cur_token_)) |
| elif self.is_cur_keyword_("Character"): |
| statements.append(self.parse_cvCharacter_(tag)) |
| elif self.cur_token_ == ";": |
| continue |
| else: |
| raise FeatureLibError( |
| "Expected statement: got {} {}".format( |
| self.cur_token_type_, self.cur_token_), |
| self.cur_token_location_) |
| |
| self.expect_symbol_("}") |
| for symtab in self.symbol_tables_: |
| symtab.exit_scope() |
| self.expect_symbol_(";") |
| return block |
| |
| def parse_cvNameIDs_(self, tag, block_name): |
| assert self.cur_token_ == block_name, self.cur_token_ |
| block = self.ast.NestedBlock(tag, block_name, |
| location=self.cur_token_location_) |
| self.expect_symbol_("{") |
| for symtab in self.symbol_tables_: |
| symtab.enter_scope() |
| while self.next_token_ != "}" or self.cur_comments_: |
| self.advance_lexer_(comments=True) |
| if self.cur_token_type_ is Lexer.COMMENT: |
| block.statements.append(self.ast.Comment( |
| self.cur_token_, location=self.cur_token_location_)) |
| elif self.is_cur_keyword_("name"): |
| location = self.cur_token_location_ |
| platformID, platEncID, langID, string = self.parse_name_() |
| block.statements.append( |
| self.ast.CVParametersNameStatement( |
| tag, platformID, platEncID, langID, string, |
| block_name, location=location)) |
| elif self.cur_token_ == ";": |
| continue |
| else: |
| raise FeatureLibError('Expected "name"', |
| self.cur_token_location_) |
| self.expect_symbol_("}") |
| for symtab in self.symbol_tables_: |
| symtab.exit_scope() |
| self.expect_symbol_(";") |
| return block |
| |
| def parse_cvCharacter_(self, tag): |
| assert self.cur_token_ == "Character", self.cur_token_ |
| location, character = self.cur_token_location_, self.expect_decimal_or_hexadecimal_() |
| self.expect_symbol_(";") |
| if not (0xFFFFFF >= character >= 0): |
| raise FeatureLibError("Character value must be between " |
| "{:#x} and {:#x}".format(0, 0xFFFFFF), |
| location) |
| return self.ast.CharacterStatement(character, tag, location=location) |
| |
| def parse_FontRevision_(self): |
| assert self.cur_token_ == "FontRevision", self.cur_token_ |
| location, version = self.cur_token_location_, self.expect_float_() |
| self.expect_symbol_(";") |
| if version <= 0: |
| raise FeatureLibError("Font revision numbers must be positive", |
| location) |
| return self.ast.FontRevisionStatement(version, location=location) |
| |
| def parse_block_(self, block, vertical, stylisticset=None, |
| size_feature=False, cv_feature=None): |
| self.expect_symbol_("{") |
| for symtab in self.symbol_tables_: |
| symtab.enter_scope() |
| |
| statements = block.statements |
| while self.next_token_ != "}" or self.cur_comments_: |
| self.advance_lexer_(comments=True) |
| if self.cur_token_type_ is Lexer.COMMENT: |
| statements.append(self.ast.Comment( |
| self.cur_token_, location=self.cur_token_location_)) |
| elif self.cur_token_type_ is Lexer.GLYPHCLASS: |
| statements.append(self.parse_glyphclass_definition_()) |
| elif self.is_cur_keyword_("anchorDef"): |
| statements.append(self.parse_anchordef_()) |
| elif self.is_cur_keyword_({"enum", "enumerate"}): |
| statements.append(self.parse_enumerate_(vertical=vertical)) |
| elif self.is_cur_keyword_("feature"): |
| statements.append(self.parse_feature_reference_()) |
| elif self.is_cur_keyword_("ignore"): |
| statements.append(self.parse_ignore_()) |
| elif self.is_cur_keyword_("language"): |
| statements.append(self.parse_language_()) |
| elif self.is_cur_keyword_("lookup"): |
| statements.append(self.parse_lookup_(vertical)) |
| elif self.is_cur_keyword_("lookupflag"): |
| statements.append(self.parse_lookupflag_()) |
| elif self.is_cur_keyword_("markClass"): |
| statements.append(self.parse_markClass_()) |
| elif self.is_cur_keyword_({"pos", "position"}): |
| statements.append( |
| self.parse_position_(enumerated=False, vertical=vertical)) |
| elif self.is_cur_keyword_("script"): |
| statements.append(self.parse_script_()) |
| elif (self.is_cur_keyword_({"sub", "substitute", |
| "rsub", "reversesub"})): |
| statements.append(self.parse_substitute_()) |
| elif self.is_cur_keyword_("subtable"): |
| statements.append(self.parse_subtable_()) |
| elif self.is_cur_keyword_("valueRecordDef"): |
| statements.append(self.parse_valuerecord_definition_(vertical)) |
| elif stylisticset and self.is_cur_keyword_("featureNames"): |
| statements.append(self.parse_featureNames_(stylisticset)) |
| elif cv_feature and self.is_cur_keyword_("cvParameters"): |
| statements.append(self.parse_cvParameters_(cv_feature)) |
| elif size_feature and self.is_cur_keyword_("parameters"): |
| statements.append(self.parse_size_parameters_()) |
| elif size_feature and self.is_cur_keyword_("sizemenuname"): |
| statements.append(self.parse_size_menuname_()) |
| elif self.cur_token_type_ is Lexer.NAME and self.cur_token_ in self.extensions: |
| statements.append(self.extensions[self.cur_token_](self)) |
| elif self.cur_token_ == ";": |
| continue |
| else: |
| raise FeatureLibError( |
| "Expected glyph class definition or statement: got {} {}".format(self.cur_token_type_, self.cur_token_), |
| self.cur_token_location_) |
| |
| self.expect_symbol_("}") |
| for symtab in self.symbol_tables_: |
| symtab.exit_scope() |
| |
| name = self.expect_name_() |
| if name != block.name.strip(): |
| raise FeatureLibError("Expected \"%s\"" % block.name.strip(), |
| self.cur_token_location_) |
| self.expect_symbol_(";") |
| |
| # A multiple substitution may have a single destination, in which case |
| # it will look just like a single substitution. So if there are both |
| # multiple and single substitutions, upgrade all the single ones to |
| # multiple substitutions. |
| |
| # Check if we have a mix of non-contextual singles and multiples. |
| has_single = False |
| has_multiple = False |
| for s in statements: |
| if isinstance(s, self.ast.SingleSubstStatement): |
| has_single = not any([s.prefix, s.suffix, s.forceChain]) |
| elif isinstance(s, self.ast.MultipleSubstStatement): |
| has_multiple = not any([s.prefix, s.suffix, s.forceChain]) |
| |
| # Upgrade all single substitutions to multiple substitutions. |
| if has_single and has_multiple: |
| for i, s in enumerate(statements): |
| if isinstance(s, self.ast.SingleSubstStatement): |
| statements[i] = self.ast.MultipleSubstStatement( |
| s.prefix, s.glyphs[0].glyphSet()[0], s.suffix, |
| [r.glyphSet()[0] for r in s.replacements], |
| s.forceChain, location=s.location) |
| |
| def is_cur_keyword_(self, k): |
| if self.cur_token_type_ is Lexer.NAME: |
| if isinstance(k, type("")): # basestring is gone in Python3 |
| return self.cur_token_ == k |
| else: |
| return self.cur_token_ in k |
| return False |
| |
| def expect_class_name_(self): |
| self.advance_lexer_() |
| if self.cur_token_type_ is not Lexer.GLYPHCLASS: |
| raise FeatureLibError("Expected @NAME", self.cur_token_location_) |
| return self.cur_token_ |
| |
| def expect_cid_(self): |
| self.advance_lexer_() |
| if self.cur_token_type_ is Lexer.CID: |
| return self.cur_token_ |
| raise FeatureLibError("Expected a CID", self.cur_token_location_) |
| |
| def expect_filename_(self): |
| self.advance_lexer_() |
| if self.cur_token_type_ is not Lexer.FILENAME: |
| raise FeatureLibError("Expected file name", |
| self.cur_token_location_) |
| return self.cur_token_ |
| |
| def expect_glyph_(self): |
| self.advance_lexer_() |
| if self.cur_token_type_ is Lexer.NAME: |
| self.cur_token_ = self.cur_token_.lstrip("\\") |
| if len(self.cur_token_) > 63: |
| raise FeatureLibError( |
| "Glyph names must not be longer than 63 characters", |
| self.cur_token_location_) |
| return self.cur_token_ |
| elif self.cur_token_type_ is Lexer.CID: |
| return "cid%05d" % self.cur_token_ |
| raise FeatureLibError("Expected a glyph name or CID", |
| self.cur_token_location_) |
| |
| def expect_markClass_reference_(self): |
| name = self.expect_class_name_() |
| mc = self.glyphclasses_.resolve(name) |
| if mc is None: |
| raise FeatureLibError("Unknown markClass @%s" % name, |
| self.cur_token_location_) |
| if not isinstance(mc, self.ast.MarkClass): |
| raise FeatureLibError("@%s is not a markClass" % name, |
| self.cur_token_location_) |
| return mc |
| |
| def expect_tag_(self): |
| self.advance_lexer_() |
| if self.cur_token_type_ is not Lexer.NAME: |
| raise FeatureLibError("Expected a tag", self.cur_token_location_) |
| if len(self.cur_token_) > 4: |
| raise FeatureLibError("Tags can not be longer than 4 characters", |
| self.cur_token_location_) |
| return (self.cur_token_ + " ")[:4] |
| |
| def expect_script_tag_(self): |
| tag = self.expect_tag_() |
| if tag == "dflt": |
| raise FeatureLibError( |
| '"dflt" is not a valid script tag; use "DFLT" instead', |
| self.cur_token_location_) |
| return tag |
| |
| def expect_language_tag_(self): |
| tag = self.expect_tag_() |
| if tag == "DFLT": |
| raise FeatureLibError( |
| '"DFLT" is not a valid language tag; use "dflt" instead', |
| self.cur_token_location_) |
| return tag |
| |
| def expect_symbol_(self, symbol): |
| self.advance_lexer_() |
| if self.cur_token_type_ is Lexer.SYMBOL and self.cur_token_ == symbol: |
| return symbol |
| raise FeatureLibError("Expected '%s'" % symbol, |
| self.cur_token_location_) |
| |
| def expect_keyword_(self, keyword): |
| self.advance_lexer_() |
| if self.cur_token_type_ is Lexer.NAME and self.cur_token_ == keyword: |
| return self.cur_token_ |
| raise FeatureLibError("Expected \"%s\"" % keyword, |
| self.cur_token_location_) |
| |
| def expect_name_(self): |
| self.advance_lexer_() |
| if self.cur_token_type_ is Lexer.NAME: |
| return self.cur_token_ |
| raise FeatureLibError("Expected a name", self.cur_token_location_) |
| |
| # TODO: Don't allow this method to accept hexadecimal values |
| def expect_number_(self): |
| self.advance_lexer_() |
| if self.cur_token_type_ is Lexer.NUMBER: |
| return self.cur_token_ |
| raise FeatureLibError("Expected a number", self.cur_token_location_) |
| |
| def expect_float_(self): |
| self.advance_lexer_() |
| if self.cur_token_type_ is Lexer.FLOAT: |
| return self.cur_token_ |
| raise FeatureLibError("Expected a floating-point number", |
| self.cur_token_location_) |
| |
| # TODO: Don't allow this method to accept hexadecimal values |
| def expect_decipoint_(self): |
| if self.next_token_type_ == Lexer.FLOAT: |
| return self.expect_float_() |
| elif self.next_token_type_ is Lexer.NUMBER: |
| return self.expect_number_() / 10 |
| else: |
| raise FeatureLibError("Expected an integer or floating-point number", |
| self.cur_token_location_) |
| |
| def expect_decimal_or_hexadecimal_(self): |
| # the lexer returns the same token type 'NUMBER' for either decimal or |
| # hexadecimal integers, and casts them both to a `int` type, so it's |
| # impossible to distinguish the two here. This method is implemented |
| # the same as `expect_number_`, only it gives a more informative |
| # error message |
| self.advance_lexer_() |
| if self.cur_token_type_ is Lexer.NUMBER: |
| return self.cur_token_ |
| raise FeatureLibError("Expected a decimal or hexadecimal number", |
| self.cur_token_location_) |
| |
| def expect_string_(self): |
| self.advance_lexer_() |
| if self.cur_token_type_ is Lexer.STRING: |
| return self.cur_token_ |
| raise FeatureLibError("Expected a string", self.cur_token_location_) |
| |
| def advance_lexer_(self, comments=False): |
| if comments and self.cur_comments_: |
| self.cur_token_type_ = Lexer.COMMENT |
| self.cur_token_, self.cur_token_location_ = self.cur_comments_.pop(0) |
| return |
| else: |
| self.cur_token_type_, self.cur_token_, self.cur_token_location_ = ( |
| self.next_token_type_, self.next_token_, self.next_token_location_) |
| while True: |
| try: |
| (self.next_token_type_, self.next_token_, |
| self.next_token_location_) = next(self.lexer_) |
| except StopIteration: |
| self.next_token_type_, self.next_token_ = (None, None) |
| if self.next_token_type_ != Lexer.COMMENT: |
| break |
| self.cur_comments_.append((self.next_token_, self.next_token_location_)) |
| |
| @staticmethod |
| def reverse_string_(s): |
| """'abc' --> 'cba'""" |
| return ''.join(reversed(list(s))) |
| |
| def make_cid_range_(self, location, start, limit): |
| """(location, 999, 1001) --> ["cid00999", "cid01000", "cid01001"]""" |
| result = list() |
| if start > limit: |
| raise FeatureLibError( |
| "Bad range: start should be less than limit", location) |
| for cid in range(start, limit + 1): |
| result.append("cid%05d" % cid) |
| return result |
| |
| def make_glyph_range_(self, location, start, limit): |
| """(location, "a.sc", "d.sc") --> ["a.sc", "b.sc", "c.sc", "d.sc"]""" |
| result = list() |
| if len(start) != len(limit): |
| raise FeatureLibError( |
| "Bad range: \"%s\" and \"%s\" should have the same length" % |
| (start, limit), location) |
| |
| rev = self.reverse_string_ |
| prefix = os.path.commonprefix([start, limit]) |
| suffix = rev(os.path.commonprefix([rev(start), rev(limit)])) |
| if len(suffix) > 0: |
| start_range = start[len(prefix):-len(suffix)] |
| limit_range = limit[len(prefix):-len(suffix)] |
| else: |
| start_range = start[len(prefix):] |
| limit_range = limit[len(prefix):] |
| |
| if start_range >= limit_range: |
| raise FeatureLibError( |
| "Start of range must be smaller than its end", |
| location) |
| |
| uppercase = re.compile(r'^[A-Z]$') |
| if uppercase.match(start_range) and uppercase.match(limit_range): |
| for c in range(ord(start_range), ord(limit_range) + 1): |
| result.append("%s%c%s" % (prefix, c, suffix)) |
| return result |
| |
| lowercase = re.compile(r'^[a-z]$') |
| if lowercase.match(start_range) and lowercase.match(limit_range): |
| for c in range(ord(start_range), ord(limit_range) + 1): |
| result.append("%s%c%s" % (prefix, c, suffix)) |
| return result |
| |
| digits = re.compile(r'^[0-9]{1,3}$') |
| if digits.match(start_range) and digits.match(limit_range): |
| for i in range(int(start_range, 10), int(limit_range, 10) + 1): |
| number = ("000" + str(i))[-len(start_range):] |
| result.append("%s%s%s" % (prefix, number, suffix)) |
| return result |
| |
| raise FeatureLibError("Bad range: \"%s-%s\"" % (start, limit), |
| location) |
| |
| |
| class SymbolTable(object): |
| def __init__(self): |
| self.scopes_ = [{}] |
| |
| def enter_scope(self): |
| self.scopes_.append({}) |
| |
| def exit_scope(self): |
| self.scopes_.pop() |
| |
| def define(self, name, item): |
| self.scopes_[-1][name] = item |
| |
| def resolve(self, name): |
| for scope in reversed(self.scopes_): |
| item = scope.get(name) |
| if item: |
| return item |
| return None |