Lib/fontTools/feaLib/lexer.py - third_party/fonttools - Git at Google

 from __future__ import print_function, division, absolute_import
 from __future__ import unicode_literals
 from fontTools.misc.py23 import *
 from fontTools.feaLib.error import FeatureLibError, IncludedFeaNotFound
 import re
 import os


 class Lexer(object):
     NUMBER = "NUMBER"
     FLOAT = "FLOAT"
     STRING = "STRING"
     NAME = "NAME"
     FILENAME = "FILENAME"
     GLYPHCLASS = "GLYPHCLASS"
     CID = "CID"
     SYMBOL = "SYMBOL"
     COMMENT = "COMMENT"
     NEWLINE = "NEWLINE"
     ANONYMOUS_BLOCK = "ANONYMOUS_BLOCK"

     CHAR_WHITESPACE_ = " \t"
     CHAR_NEWLINE_ = "\r\n"
     CHAR_SYMBOL_ = ",;:-+'{}[]<>()="
     CHAR_DIGIT_ = "0123456789"
     CHAR_HEXDIGIT_ = "0123456789ABCDEFabcdef"
     CHAR_LETTER_ = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
     CHAR_NAME_START_ = CHAR_LETTER_ + "_+*:.^~!\\"
     CHAR_NAME_CONTINUATION_ = CHAR_LETTER_ + CHAR_DIGIT_ + "_.+*:^~!/-"

     RE_GLYPHCLASS = re.compile(r"^[A-Za-z_0-9.\-]+$")

     MODE_NORMAL_ = "NORMAL"
     MODE_FILENAME_ = "FILENAME"

     def __init__(self, text, filename):
         self.filename_ = filename
         self.line_ = 1
         self.pos_ = 0
         self.line_start_ = 0
         self.text_ = text
         self.text_length_ = len(text)
         self.mode_ = Lexer.MODE_NORMAL_

     def __iter__(self):
         return self

     def next(self):  # Python 2
         return self.__next__()

     def __next__(self):  # Python 3
         while True:
             token_type, token, location = self.next_()
             if token_type != Lexer.NEWLINE:
                 return (token_type, token, location)

     def location_(self):
         column = self.pos_ - self.line_start_ + 1
         return (self.filename_ or "<features>", self.line_, column)

     def next_(self):
         self.scan_over_(Lexer.CHAR_WHITESPACE_)
         location = self.location_()
         start = self.pos_
         text = self.text_
         limit = len(text)
         if start >= limit:
             raise StopIteration()
         cur_char = text[start]
         next_char = text[start + 1] if start + 1 < limit else None

         if cur_char == "\n":
             self.pos_ += 1
             self.line_ += 1
             self.line_start_ = self.pos_
             return (Lexer.NEWLINE, None, location)
         if cur_char == "\r":
             self.pos_ += (2 if next_char == "\n" else 1)
             self.line_ += 1
             self.line_start_ = self.pos_
             return (Lexer.NEWLINE, None, location)
         if cur_char == "#":
             self.scan_until_(Lexer.CHAR_NEWLINE_)
             return (Lexer.COMMENT, text[start:self.pos_], location)

         if self.mode_ is Lexer.MODE_FILENAME_:
             if cur_char != "(":
                 raise FeatureLibError("Expected '(' before file name",
                                       location)
             self.scan_until_(")")
             cur_char = text[self.pos_] if self.pos_ < limit else None
             if cur_char != ")":
                 raise FeatureLibError("Expected ')' after file name",
                                       location)
             self.pos_ += 1
             self.mode_ = Lexer.MODE_NORMAL_
             return (Lexer.FILENAME, text[start + 1:self.pos_ - 1], location)

         if cur_char == "\\" and next_char in Lexer.CHAR_DIGIT_:
             self.pos_ += 1
             self.scan_over_(Lexer.CHAR_DIGIT_)
             return (Lexer.CID, int(text[start + 1:self.pos_], 10), location)
         if cur_char == "@":
             self.pos_ += 1
             self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_)
             glyphclass = text[start + 1:self.pos_]
             if len(glyphclass) < 1:
                 raise FeatureLibError("Expected glyph class name", location)
             if len(glyphclass) > 63:
                 raise FeatureLibError(
                     "Glyph class names must not be longer than 63 characters",
                     location)
             if not Lexer.RE_GLYPHCLASS.match(glyphclass):
                 raise FeatureLibError(
                     "Glyph class names must consist of letters, digits, "
                     "underscore, period or hyphen", location)
             return (Lexer.GLYPHCLASS, glyphclass, location)
         if cur_char in Lexer.CHAR_NAME_START_:
             self.pos_ += 1
             self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_)
             token = text[start:self.pos_]
             if token == "include":
                 self.mode_ = Lexer.MODE_FILENAME_
             return (Lexer.NAME, token, location)
         if cur_char == "0" and next_char in "xX":
             self.pos_ += 2
             self.scan_over_(Lexer.CHAR_HEXDIGIT_)
             return (Lexer.NUMBER, int(text[start:self.pos_], 16), location)
         if cur_char in Lexer.CHAR_DIGIT_:
             self.scan_over_(Lexer.CHAR_DIGIT_)
             if self.pos_ >= limit or text[self.pos_] != ".":
                 return (Lexer.NUMBER, int(text[start:self.pos_], 10), location)
             self.scan_over_(".")
             self.scan_over_(Lexer.CHAR_DIGIT_)
             return (Lexer.FLOAT, float(text[start:self.pos_]), location)
         if cur_char == "-" and next_char in Lexer.CHAR_DIGIT_:
             self.pos_ += 1
             self.scan_over_(Lexer.CHAR_DIGIT_)
             if self.pos_ >= limit or text[self.pos_] != ".":
                 return (Lexer.NUMBER, int(text[start:self.pos_], 10), location)
             self.scan_over_(".")
             self.scan_over_(Lexer.CHAR_DIGIT_)
             return (Lexer.FLOAT, float(text[start:self.pos_]), location)
         if cur_char in Lexer.CHAR_SYMBOL_:
             self.pos_ += 1
             return (Lexer.SYMBOL, cur_char, location)
         if cur_char == '"':
             self.pos_ += 1
             self.scan_until_('"')
             if self.pos_ < self.text_length_ and self.text_[self.pos_] == '"':
                 self.pos_ += 1
                 # strip newlines embedded within a string
                 string = re.sub("[\r\n]", "", text[start + 1:self.pos_ - 1])
                 return (Lexer.STRING, string, location)
             else:
                 raise FeatureLibError("Expected '\"' to terminate string",
                                       location)
         raise FeatureLibError("Unexpected character: %r" % cur_char,
                               location)

     def scan_over_(self, valid):
         p = self.pos_
         while p < self.text_length_ and self.text_[p] in valid:
             p += 1
         self.pos_ = p

     def scan_until_(self, stop_at):
         p = self.pos_
         while p < self.text_length_ and self.text_[p] not in stop_at:
             p += 1
         self.pos_ = p

     def scan_anonymous_block(self, tag):
         location = self.location_()
         tag = tag.strip()
         self.scan_until_(Lexer.CHAR_NEWLINE_)
         self.scan_over_(Lexer.CHAR_NEWLINE_)
         regexp = r'}\s*' + tag + r'\s*;'
         split = re.split(regexp, self.text_[self.pos_:], maxsplit=1)
         if len(split) != 2:
             raise FeatureLibError(
                 "Expected '} %s;' to terminate anonymous block" % tag,
                 location)
         self.pos_ += len(split[0])
         return (Lexer.ANONYMOUS_BLOCK, split[0], location)


 class IncludingLexer(object):
     def __init__(self, featurefile):
         self.lexers_ = [self.make_lexer_(featurefile)]
         self.featurefilepath = self.lexers_[0].filename_

     def __iter__(self):
         return self

     def next(self):  # Python 2
         return self.__next__()

     def __next__(self):  # Python 3
         while self.lexers_:
             lexer = self.lexers_[-1]
             try:
                 token_type, token, location = next(lexer)
             except StopIteration:
                 self.lexers_.pop()
                 continue
             if token_type is Lexer.NAME and token == "include":
                 fname_type, fname_token, fname_location = lexer.next()
                 if fname_type is not Lexer.FILENAME:
                     raise FeatureLibError("Expected file name", fname_location)
                 #semi_type, semi_token, semi_location = lexer.next()
                 #if semi_type is not Lexer.SYMBOL or semi_token != ";":
                 #    raise FeatureLibError("Expected ';'", semi_location)
                 if os.path.isabs(fname_token):
                     path = fname_token
                 else:
                     if self.featurefilepath is not None:
                         curpath = os.path.dirname(self.featurefilepath)
                     else:
                         # if the IncludingLexer was initialized from an in-memory
                         # file-like stream, it doesn't have a 'name' pointing to
                         # its filesystem path, therefore we fall back to using the
                         # current working directory to resolve relative includes
                         curpath = os.getcwd()
                     path = os.path.join(curpath, fname_token)
                 if len(self.lexers_) >= 5:
                     raise FeatureLibError("Too many recursive includes",
                                           fname_location)
                 try:
                     self.lexers_.append(self.make_lexer_(path))
                 except IOError as err:
                     # FileNotFoundError does not exist on Python < 3.3
                     import errno
                     if err.errno == errno.ENOENT:
                         raise IncludedFeaNotFound(fname_token, fname_location)
                     raise  # pragma: no cover
             else:
                 return (token_type, token, location)
         raise StopIteration()

     @staticmethod
     def make_lexer_(file_or_path):
         if hasattr(file_or_path, "read"):
             fileobj, closing = file_or_path, False
         else:
             filename, closing = file_or_path, True
             fileobj = open(filename, "r", encoding="utf-8")
         data = fileobj.read()
         filename = getattr(fileobj, "name", None)
         if closing:
             fileobj.close()
         return Lexer(data, filename)

     def scan_anonymous_block(self, tag):
         return self.lexers_[-1].scan_anonymous_block(tag)


 class NonIncludingLexer(IncludingLexer):
     """Lexer that does not follow `include` statements, emits them as-is."""
     def __next__(self):  # Python 3
         return next(self.lexers_[0])
	from __future__ import print_function, division, absolute_import
	from __future__ import unicode_literals
	from fontTools.misc.py23 import *
	from fontTools.feaLib.error import FeatureLibError, IncludedFeaNotFound
	import re
	import os


	class Lexer(object):
	NUMBER = "NUMBER"
	FLOAT = "FLOAT"
	STRING = "STRING"
	NAME = "NAME"
	FILENAME = "FILENAME"
	GLYPHCLASS = "GLYPHCLASS"
	CID = "CID"
	SYMBOL = "SYMBOL"
	COMMENT = "COMMENT"
	NEWLINE = "NEWLINE"
	ANONYMOUS_BLOCK = "ANONYMOUS_BLOCK"

	CHAR_WHITESPACE_ = " \t"
	CHAR_NEWLINE_ = "\r\n"
	CHAR_SYMBOL_ = ",;:-+'{}[]<>()="
	CHAR_DIGIT_ = "0123456789"
	CHAR_HEXDIGIT_ = "0123456789ABCDEFabcdef"
	CHAR_LETTER_ = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
	CHAR_NAME_START_ = CHAR_LETTER_ + "_+*:.^~!\\"
	CHAR_NAME_CONTINUATION_ = CHAR_LETTER_ + CHAR_DIGIT_ + "_.+*:^~!/-"

	RE_GLYPHCLASS = re.compile(r"^[A-Za-z_0-9.\-]+$")

	MODE_NORMAL_ = "NORMAL"
	MODE_FILENAME_ = "FILENAME"

	def __init__(self, text, filename):
	self.filename_ = filename
	self.line_ = 1
	self.pos_ = 0
	self.line_start_ = 0
	self.text_ = text
	self.text_length_ = len(text)
	self.mode_ = Lexer.MODE_NORMAL_

	def __iter__(self):
	return self

	def next(self): # Python 2
	return self.__next__()

	def __next__(self): # Python 3
	while True:
	token_type, token, location = self.next_()
	if token_type != Lexer.NEWLINE:
	return (token_type, token, location)

	def location_(self):
	column = self.pos_ - self.line_start_ + 1
	return (self.filename_ or "<features>", self.line_, column)

	def next_(self):
	self.scan_over_(Lexer.CHAR_WHITESPACE_)
	location = self.location_()
	start = self.pos_
	text = self.text_
	limit = len(text)
	if start >= limit:
	raise StopIteration()
	cur_char = text[start]
	next_char = text[start + 1] if start + 1 < limit else None

	if cur_char == "\n":
	self.pos_ += 1
	self.line_ += 1
	self.line_start_ = self.pos_
	return (Lexer.NEWLINE, None, location)
	if cur_char == "\r":
	self.pos_ += (2 if next_char == "\n" else 1)
	self.line_ += 1
	self.line_start_ = self.pos_
	return (Lexer.NEWLINE, None, location)
	if cur_char == "#":
	self.scan_until_(Lexer.CHAR_NEWLINE_)
	return (Lexer.COMMENT, text[start:self.pos_], location)

	if self.mode_ is Lexer.MODE_FILENAME_:
	if cur_char != "(":
	raise FeatureLibError("Expected '(' before file name",
	location)
	self.scan_until_(")")
	cur_char = text[self.pos_] if self.pos_ < limit else None
	if cur_char != ")":
	raise FeatureLibError("Expected ')' after file name",
	location)
	self.pos_ += 1
	self.mode_ = Lexer.MODE_NORMAL_
	return (Lexer.FILENAME, text[start + 1:self.pos_ - 1], location)

	if cur_char == "\\" and next_char in Lexer.CHAR_DIGIT_:
	self.pos_ += 1
	self.scan_over_(Lexer.CHAR_DIGIT_)
	return (Lexer.CID, int(text[start + 1:self.pos_], 10), location)
	if cur_char == "@":
	self.pos_ += 1
	self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_)
	glyphclass = text[start + 1:self.pos_]
	if len(glyphclass) < 1:
	raise FeatureLibError("Expected glyph class name", location)
	if len(glyphclass) > 63:
	raise FeatureLibError(
	"Glyph class names must not be longer than 63 characters",
	location)
	if not Lexer.RE_GLYPHCLASS.match(glyphclass):
	raise FeatureLibError(
	"Glyph class names must consist of letters, digits, "
	"underscore, period or hyphen", location)
	return (Lexer.GLYPHCLASS, glyphclass, location)
	if cur_char in Lexer.CHAR_NAME_START_:
	self.pos_ += 1
	self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_)
	token = text[start:self.pos_]
	if token == "include":
	self.mode_ = Lexer.MODE_FILENAME_
	return (Lexer.NAME, token, location)
	if cur_char == "0" and next_char in "xX":
	self.pos_ += 2
	self.scan_over_(Lexer.CHAR_HEXDIGIT_)
	return (Lexer.NUMBER, int(text[start:self.pos_], 16), location)
	if cur_char in Lexer.CHAR_DIGIT_:
	self.scan_over_(Lexer.CHAR_DIGIT_)
	if self.pos_ >= limit or text[self.pos_] != ".":
	return (Lexer.NUMBER, int(text[start:self.pos_], 10), location)
	self.scan_over_(".")
	self.scan_over_(Lexer.CHAR_DIGIT_)
	return (Lexer.FLOAT, float(text[start:self.pos_]), location)
	if cur_char == "-" and next_char in Lexer.CHAR_DIGIT_:
	self.pos_ += 1
	self.scan_over_(Lexer.CHAR_DIGIT_)
	if self.pos_ >= limit or text[self.pos_] != ".":
	return (Lexer.NUMBER, int(text[start:self.pos_], 10), location)
	self.scan_over_(".")
	self.scan_over_(Lexer.CHAR_DIGIT_)
	return (Lexer.FLOAT, float(text[start:self.pos_]), location)
	if cur_char in Lexer.CHAR_SYMBOL_:
	self.pos_ += 1
	return (Lexer.SYMBOL, cur_char, location)
	if cur_char == '"':
	self.pos_ += 1
	self.scan_until_('"')
	if self.pos_ < self.text_length_ and self.text_[self.pos_] == '"':
	self.pos_ += 1
	# strip newlines embedded within a string
	string = re.sub("[\r\n]", "", text[start + 1:self.pos_ - 1])
	return (Lexer.STRING, string, location)
	else:
	raise FeatureLibError("Expected '\"' to terminate string",
	location)
	raise FeatureLibError("Unexpected character: %r" % cur_char,
	location)

	def scan_over_(self, valid):
	p = self.pos_
	while p < self.text_length_ and self.text_[p] in valid:
	p += 1
	self.pos_ = p

	def scan_until_(self, stop_at):
	p = self.pos_
	while p < self.text_length_ and self.text_[p] not in stop_at:
	p += 1
	self.pos_ = p

	def scan_anonymous_block(self, tag):
	location = self.location_()
	tag = tag.strip()
	self.scan_until_(Lexer.CHAR_NEWLINE_)
	self.scan_over_(Lexer.CHAR_NEWLINE_)
	regexp = r'}\s' + tag + r'\s;'
	split = re.split(regexp, self.text_[self.pos_:], maxsplit=1)
	if len(split) != 2:
	raise FeatureLibError(
	"Expected '} %s;' to terminate anonymous block" % tag,
	location)
	self.pos_ += len(split[0])
	return (Lexer.ANONYMOUS_BLOCK, split[0], location)


	class IncludingLexer(object):
	def __init__(self, featurefile):
	self.lexers_ = [self.make_lexer_(featurefile)]
	self.featurefilepath = self.lexers_[0].filename_

	def __iter__(self):
	return self

	def next(self): # Python 2
	return self.__next__()

	def __next__(self): # Python 3
	while self.lexers_:
	lexer = self.lexers_[-1]
	try:
	token_type, token, location = next(lexer)
	except StopIteration:
	self.lexers_.pop()
	continue
	if token_type is Lexer.NAME and token == "include":
	fname_type, fname_token, fname_location = lexer.next()
	if fname_type is not Lexer.FILENAME:
	raise FeatureLibError("Expected file name", fname_location)
	#semi_type, semi_token, semi_location = lexer.next()
	#if semi_type is not Lexer.SYMBOL or semi_token != ";":
	# raise FeatureLibError("Expected ';'", semi_location)
	if os.path.isabs(fname_token):
	path = fname_token
	else:
	if self.featurefilepath is not None:
	curpath = os.path.dirname(self.featurefilepath)
	else:
	# if the IncludingLexer was initialized from an in-memory
	# file-like stream, it doesn't have a 'name' pointing to
	# its filesystem path, therefore we fall back to using the
	# current working directory to resolve relative includes
	curpath = os.getcwd()
	path = os.path.join(curpath, fname_token)
	if len(self.lexers_) >= 5:
	raise FeatureLibError("Too many recursive includes",
	fname_location)
	try:
	self.lexers_.append(self.make_lexer_(path))
	except IOError as err:
	# FileNotFoundError does not exist on Python < 3.3
	import errno
	if err.errno == errno.ENOENT:
	raise IncludedFeaNotFound(fname_token, fname_location)
	raise # pragma: no cover
	else:
	return (token_type, token, location)
	raise StopIteration()

	@staticmethod
	def make_lexer_(file_or_path):
	if hasattr(file_or_path, "read"):
	fileobj, closing = file_or_path, False
	else:
	filename, closing = file_or_path, True
	fileobj = open(filename, "r", encoding="utf-8")
	data = fileobj.read()
	filename = getattr(fileobj, "name", None)
	if closing:
	fileobj.close()
	return Lexer(data, filename)

	def scan_anonymous_block(self, tag):
	return self.lexers_[-1].scan_anonymous_block(tag)


	class NonIncludingLexer(IncludingLexer):
	"""Lexer that does not follow `include` statements, emits them as-is."""
	def __next__(self): # Python 3
	return next(self.lexers_[0])