blob: 095cb6684c9941b1f21691246d102cdb1fc94b00 [file] [log] [blame]
from __future__ import print_function, division, absolute_import
from __future__ import unicode_literals
from fontTools.misc.py23 import *
from fontTools.feaLib.error import FeatureLibError, IncludedFeaNotFound
import re
import os
class Lexer(object):
NUMBER = "NUMBER"
FLOAT = "FLOAT"
STRING = "STRING"
NAME = "NAME"
FILENAME = "FILENAME"
GLYPHCLASS = "GLYPHCLASS"
CID = "CID"
SYMBOL = "SYMBOL"
COMMENT = "COMMENT"
NEWLINE = "NEWLINE"
ANONYMOUS_BLOCK = "ANONYMOUS_BLOCK"
CHAR_WHITESPACE_ = " \t"
CHAR_NEWLINE_ = "\r\n"
CHAR_SYMBOL_ = ",;:-+'{}[]<>()="
CHAR_DIGIT_ = "0123456789"
CHAR_HEXDIGIT_ = "0123456789ABCDEFabcdef"
CHAR_LETTER_ = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
CHAR_NAME_START_ = CHAR_LETTER_ + "_+*:.^~!\\"
CHAR_NAME_CONTINUATION_ = CHAR_LETTER_ + CHAR_DIGIT_ + "_.+*:^~!/-"
RE_GLYPHCLASS = re.compile(r"^[A-Za-z_0-9.\-]+$")
MODE_NORMAL_ = "NORMAL"
MODE_FILENAME_ = "FILENAME"
def __init__(self, text, filename):
self.filename_ = filename
self.line_ = 1
self.pos_ = 0
self.line_start_ = 0
self.text_ = text
self.text_length_ = len(text)
self.mode_ = Lexer.MODE_NORMAL_
def __iter__(self):
return self
def next(self): # Python 2
return self.__next__()
def __next__(self): # Python 3
while True:
token_type, token, location = self.next_()
if token_type != Lexer.NEWLINE:
return (token_type, token, location)
def location_(self):
column = self.pos_ - self.line_start_ + 1
return (self.filename_ or "<features>", self.line_, column)
def next_(self):
self.scan_over_(Lexer.CHAR_WHITESPACE_)
location = self.location_()
start = self.pos_
text = self.text_
limit = len(text)
if start >= limit:
raise StopIteration()
cur_char = text[start]
next_char = text[start + 1] if start + 1 < limit else None
if cur_char == "\n":
self.pos_ += 1
self.line_ += 1
self.line_start_ = self.pos_
return (Lexer.NEWLINE, None, location)
if cur_char == "\r":
self.pos_ += (2 if next_char == "\n" else 1)
self.line_ += 1
self.line_start_ = self.pos_
return (Lexer.NEWLINE, None, location)
if cur_char == "#":
self.scan_until_(Lexer.CHAR_NEWLINE_)
return (Lexer.COMMENT, text[start:self.pos_], location)
if self.mode_ is Lexer.MODE_FILENAME_:
if cur_char != "(":
raise FeatureLibError("Expected '(' before file name",
location)
self.scan_until_(")")
cur_char = text[self.pos_] if self.pos_ < limit else None
if cur_char != ")":
raise FeatureLibError("Expected ')' after file name",
location)
self.pos_ += 1
self.mode_ = Lexer.MODE_NORMAL_
return (Lexer.FILENAME, text[start + 1:self.pos_ - 1], location)
if cur_char == "\\" and next_char in Lexer.CHAR_DIGIT_:
self.pos_ += 1
self.scan_over_(Lexer.CHAR_DIGIT_)
return (Lexer.CID, int(text[start + 1:self.pos_], 10), location)
if cur_char == "@":
self.pos_ += 1
self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_)
glyphclass = text[start + 1:self.pos_]
if len(glyphclass) < 1:
raise FeatureLibError("Expected glyph class name", location)
if len(glyphclass) > 63:
raise FeatureLibError(
"Glyph class names must not be longer than 63 characters",
location)
if not Lexer.RE_GLYPHCLASS.match(glyphclass):
raise FeatureLibError(
"Glyph class names must consist of letters, digits, "
"underscore, period or hyphen", location)
return (Lexer.GLYPHCLASS, glyphclass, location)
if cur_char in Lexer.CHAR_NAME_START_:
self.pos_ += 1
self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_)
token = text[start:self.pos_]
if token == "include":
self.mode_ = Lexer.MODE_FILENAME_
return (Lexer.NAME, token, location)
if cur_char == "0" and next_char in "xX":
self.pos_ += 2
self.scan_over_(Lexer.CHAR_HEXDIGIT_)
return (Lexer.NUMBER, int(text[start:self.pos_], 16), location)
if cur_char in Lexer.CHAR_DIGIT_:
self.scan_over_(Lexer.CHAR_DIGIT_)
if self.pos_ >= limit or text[self.pos_] != ".":
return (Lexer.NUMBER, int(text[start:self.pos_], 10), location)
self.scan_over_(".")
self.scan_over_(Lexer.CHAR_DIGIT_)
return (Lexer.FLOAT, float(text[start:self.pos_]), location)
if cur_char == "-" and next_char in Lexer.CHAR_DIGIT_:
self.pos_ += 1
self.scan_over_(Lexer.CHAR_DIGIT_)
if self.pos_ >= limit or text[self.pos_] != ".":
return (Lexer.NUMBER, int(text[start:self.pos_], 10), location)
self.scan_over_(".")
self.scan_over_(Lexer.CHAR_DIGIT_)
return (Lexer.FLOAT, float(text[start:self.pos_]), location)
if cur_char in Lexer.CHAR_SYMBOL_:
self.pos_ += 1
return (Lexer.SYMBOL, cur_char, location)
if cur_char == '"':
self.pos_ += 1
self.scan_until_('"')
if self.pos_ < self.text_length_ and self.text_[self.pos_] == '"':
self.pos_ += 1
# strip newlines embedded within a string
string = re.sub("[\r\n]", "", text[start + 1:self.pos_ - 1])
return (Lexer.STRING, string, location)
else:
raise FeatureLibError("Expected '\"' to terminate string",
location)
raise FeatureLibError("Unexpected character: %r" % cur_char,
location)
def scan_over_(self, valid):
p = self.pos_
while p < self.text_length_ and self.text_[p] in valid:
p += 1
self.pos_ = p
def scan_until_(self, stop_at):
p = self.pos_
while p < self.text_length_ and self.text_[p] not in stop_at:
p += 1
self.pos_ = p
def scan_anonymous_block(self, tag):
location = self.location_()
tag = tag.strip()
self.scan_until_(Lexer.CHAR_NEWLINE_)
self.scan_over_(Lexer.CHAR_NEWLINE_)
regexp = r'}\s*' + tag + r'\s*;'
split = re.split(regexp, self.text_[self.pos_:], maxsplit=1)
if len(split) != 2:
raise FeatureLibError(
"Expected '} %s;' to terminate anonymous block" % tag,
location)
self.pos_ += len(split[0])
return (Lexer.ANONYMOUS_BLOCK, split[0], location)
class IncludingLexer(object):
def __init__(self, featurefile):
self.lexers_ = [self.make_lexer_(featurefile)]
self.featurefilepath = self.lexers_[0].filename_
def __iter__(self):
return self
def next(self): # Python 2
return self.__next__()
def __next__(self): # Python 3
while self.lexers_:
lexer = self.lexers_[-1]
try:
token_type, token, location = next(lexer)
except StopIteration:
self.lexers_.pop()
continue
if token_type is Lexer.NAME and token == "include":
fname_type, fname_token, fname_location = lexer.next()
if fname_type is not Lexer.FILENAME:
raise FeatureLibError("Expected file name", fname_location)
#semi_type, semi_token, semi_location = lexer.next()
#if semi_type is not Lexer.SYMBOL or semi_token != ";":
# raise FeatureLibError("Expected ';'", semi_location)
if os.path.isabs(fname_token):
path = fname_token
else:
if self.featurefilepath is not None:
curpath = os.path.dirname(self.featurefilepath)
else:
# if the IncludingLexer was initialized from an in-memory
# file-like stream, it doesn't have a 'name' pointing to
# its filesystem path, therefore we fall back to using the
# current working directory to resolve relative includes
curpath = os.getcwd()
path = os.path.join(curpath, fname_token)
if len(self.lexers_) >= 5:
raise FeatureLibError("Too many recursive includes",
fname_location)
try:
self.lexers_.append(self.make_lexer_(path))
except IOError as err:
# FileNotFoundError does not exist on Python < 3.3
import errno
if err.errno == errno.ENOENT:
raise IncludedFeaNotFound(fname_token, fname_location)
raise # pragma: no cover
else:
return (token_type, token, location)
raise StopIteration()
@staticmethod
def make_lexer_(file_or_path):
if hasattr(file_or_path, "read"):
fileobj, closing = file_or_path, False
else:
filename, closing = file_or_path, True
fileobj = open(filename, "r", encoding="utf-8")
data = fileobj.read()
filename = getattr(fileobj, "name", None)
if closing:
fileobj.close()
return Lexer(data, filename)
def scan_anonymous_block(self, tag):
return self.lexers_[-1].scan_anonymous_block(tag)
class NonIncludingLexer(IncludingLexer):
"""Lexer that does not follow `include` statements, emits them as-is."""
def __next__(self): # Python 3
return next(self.lexers_[0])