| # mako/lexer.py |
| # Copyright 2006-2019 the Mako authors and contributors <see AUTHORS file> |
| # |
| # This module is part of Mako and is released under |
| # the MIT License: http://www.opensource.org/licenses/mit-license.php |
| |
| """provides the Lexer class for parsing template strings into parse trees.""" |
| |
| import codecs |
| import re |
| |
| from mako import compat |
| from mako import exceptions |
| from mako import parsetree |
| from mako.pygen import adjust_whitespace |
| |
| _regexp_cache = {} |
| |
| |
| class Lexer(object): |
| def __init__( |
| self, |
| text, |
| filename=None, |
| disable_unicode=False, |
| input_encoding=None, |
| preprocessor=None, |
| ): |
| self.text = text |
| self.filename = filename |
| self.template = parsetree.TemplateNode(self.filename) |
| self.matched_lineno = 1 |
| self.matched_charpos = 0 |
| self.lineno = 1 |
| self.match_position = 0 |
| self.tag = [] |
| self.control_line = [] |
| self.ternary_stack = [] |
| self.disable_unicode = disable_unicode |
| self.encoding = input_encoding |
| |
| if compat.py3k and disable_unicode: |
| raise exceptions.UnsupportedError( |
| "Mako for Python 3 does not " "support disabling Unicode" |
| ) |
| |
| if preprocessor is None: |
| self.preprocessor = [] |
| elif not hasattr(preprocessor, "__iter__"): |
| self.preprocessor = [preprocessor] |
| else: |
| self.preprocessor = preprocessor |
| |
| @property |
| def exception_kwargs(self): |
| return { |
| "source": self.text, |
| "lineno": self.matched_lineno, |
| "pos": self.matched_charpos, |
| "filename": self.filename, |
| } |
| |
| def match(self, regexp, flags=None): |
| """compile the given regexp, cache the reg, and call match_reg().""" |
| |
| try: |
| reg = _regexp_cache[(regexp, flags)] |
| except KeyError: |
| if flags: |
| reg = re.compile(regexp, flags) |
| else: |
| reg = re.compile(regexp) |
| _regexp_cache[(regexp, flags)] = reg |
| |
| return self.match_reg(reg) |
| |
| def match_reg(self, reg): |
| """match the given regular expression object to the current text |
| position. |
| |
| if a match occurs, update the current text and line position. |
| |
| """ |
| |
| mp = self.match_position |
| |
| match = reg.match(self.text, self.match_position) |
| if match: |
| (start, end) = match.span() |
| if end == start: |
| self.match_position = end + 1 |
| else: |
| self.match_position = end |
| self.matched_lineno = self.lineno |
| lines = re.findall(r"\n", self.text[mp : self.match_position]) |
| cp = mp - 1 |
| while cp >= 0 and cp < self.textlength and self.text[cp] != "\n": |
| cp -= 1 |
| self.matched_charpos = mp - cp |
| self.lineno += len(lines) |
| # print "MATCHED:", match.group(0), "LINE START:", |
| # self.matched_lineno, "LINE END:", self.lineno |
| # print "MATCH:", regexp, "\n", self.text[mp : mp + 15], \ |
| # (match and "TRUE" or "FALSE") |
| return match |
| |
| def parse_until_text(self, watch_nesting, *text): |
| startpos = self.match_position |
| text_re = r"|".join(text) |
| brace_level = 0 |
| paren_level = 0 |
| bracket_level = 0 |
| while True: |
| match = self.match(r"#.*\n") |
| if match: |
| continue |
| match = self.match( |
| r"(\"\"\"|\'\'\'|\"|\')[^\\]*?(\\.[^\\]*?)*\1", re.S |
| ) |
| if match: |
| continue |
| match = self.match(r"(%s)" % text_re) |
| if match and not ( |
| watch_nesting |
| and (brace_level > 0 or paren_level > 0 or bracket_level > 0) |
| ): |
| return ( |
| self.text[ |
| startpos : self.match_position - len(match.group(1)) |
| ], |
| match.group(1), |
| ) |
| elif not match: |
| match = self.match(r"(.*?)(?=\"|\'|#|%s)" % text_re, re.S) |
| if match: |
| brace_level += match.group(1).count("{") |
| brace_level -= match.group(1).count("}") |
| paren_level += match.group(1).count("(") |
| paren_level -= match.group(1).count(")") |
| bracket_level += match.group(1).count("[") |
| bracket_level -= match.group(1).count("]") |
| continue |
| raise exceptions.SyntaxException( |
| "Expected: %s" % ",".join(text), **self.exception_kwargs |
| ) |
| |
| def append_node(self, nodecls, *args, **kwargs): |
| kwargs.setdefault("source", self.text) |
| kwargs.setdefault("lineno", self.matched_lineno) |
| kwargs.setdefault("pos", self.matched_charpos) |
| kwargs["filename"] = self.filename |
| node = nodecls(*args, **kwargs) |
| if len(self.tag): |
| self.tag[-1].nodes.append(node) |
| else: |
| self.template.nodes.append(node) |
| # build a set of child nodes for the control line |
| # (used for loop variable detection) |
| # also build a set of child nodes on ternary control lines |
| # (used for determining if a pass needs to be auto-inserted |
| if self.control_line: |
| control_frame = self.control_line[-1] |
| control_frame.nodes.append(node) |
| if not ( |
| isinstance(node, parsetree.ControlLine) |
| and control_frame.is_ternary(node.keyword) |
| ): |
| if self.ternary_stack and self.ternary_stack[-1]: |
| self.ternary_stack[-1][-1].nodes.append(node) |
| if isinstance(node, parsetree.Tag): |
| if len(self.tag): |
| node.parent = self.tag[-1] |
| self.tag.append(node) |
| elif isinstance(node, parsetree.ControlLine): |
| if node.isend: |
| self.control_line.pop() |
| self.ternary_stack.pop() |
| elif node.is_primary: |
| self.control_line.append(node) |
| self.ternary_stack.append([]) |
| elif self.control_line and self.control_line[-1].is_ternary( |
| node.keyword |
| ): |
| self.ternary_stack[-1].append(node) |
| elif self.control_line and not self.control_line[-1].is_ternary( |
| node.keyword |
| ): |
| raise exceptions.SyntaxException( |
| "Keyword '%s' not a legal ternary for keyword '%s'" |
| % (node.keyword, self.control_line[-1].keyword), |
| **self.exception_kwargs |
| ) |
| |
| _coding_re = re.compile(r"#.*coding[:=]\s*([-\w.]+).*\r?\n") |
| |
| def decode_raw_stream(self, text, decode_raw, known_encoding, filename): |
| """given string/unicode or bytes/string, determine encoding |
| from magic encoding comment, return body as unicode |
| or raw if decode_raw=False |
| |
| """ |
| if isinstance(text, compat.text_type): |
| m = self._coding_re.match(text) |
| encoding = m and m.group(1) or known_encoding or "ascii" |
| return encoding, text |
| |
| if text.startswith(codecs.BOM_UTF8): |
| text = text[len(codecs.BOM_UTF8) :] |
| parsed_encoding = "utf-8" |
| m = self._coding_re.match(text.decode("utf-8", "ignore")) |
| if m is not None and m.group(1) != "utf-8": |
| raise exceptions.CompileException( |
| "Found utf-8 BOM in file, with conflicting " |
| "magic encoding comment of '%s'" % m.group(1), |
| text.decode("utf-8", "ignore"), |
| 0, |
| 0, |
| filename, |
| ) |
| else: |
| m = self._coding_re.match(text.decode("utf-8", "ignore")) |
| if m: |
| parsed_encoding = m.group(1) |
| else: |
| parsed_encoding = known_encoding or "ascii" |
| |
| if decode_raw: |
| try: |
| text = text.decode(parsed_encoding) |
| except UnicodeDecodeError: |
| raise exceptions.CompileException( |
| "Unicode decode operation of encoding '%s' failed" |
| % parsed_encoding, |
| text.decode("utf-8", "ignore"), |
| 0, |
| 0, |
| filename, |
| ) |
| |
| return parsed_encoding, text |
| |
| def parse(self): |
| self.encoding, self.text = self.decode_raw_stream( |
| self.text, not self.disable_unicode, self.encoding, self.filename |
| ) |
| |
| for preproc in self.preprocessor: |
| self.text = preproc(self.text) |
| |
| # push the match marker past the |
| # encoding comment. |
| self.match_reg(self._coding_re) |
| |
| self.textlength = len(self.text) |
| |
| while True: |
| if self.match_position > self.textlength: |
| break |
| |
| if self.match_end(): |
| break |
| if self.match_expression(): |
| continue |
| if self.match_control_line(): |
| continue |
| if self.match_comment(): |
| continue |
| if self.match_tag_start(): |
| continue |
| if self.match_tag_end(): |
| continue |
| if self.match_python_block(): |
| continue |
| if self.match_text(): |
| continue |
| |
| if self.match_position > self.textlength: |
| break |
| raise exceptions.CompileException("assertion failed") |
| |
| if len(self.tag): |
| raise exceptions.SyntaxException( |
| "Unclosed tag: <%%%s>" % self.tag[-1].keyword, |
| **self.exception_kwargs |
| ) |
| if len(self.control_line): |
| raise exceptions.SyntaxException( |
| "Unterminated control keyword: '%s'" |
| % self.control_line[-1].keyword, |
| self.text, |
| self.control_line[-1].lineno, |
| self.control_line[-1].pos, |
| self.filename, |
| ) |
| return self.template |
| |
| def match_tag_start(self): |
| match = self.match( |
| r""" |
| \<% # opening tag |
| |
| ([\w\.\:]+) # keyword |
| |
| ((?:\s+\w+|\s*=\s*|".*?"|'.*?')*) # attrname, = \ |
| # sign, string expression |
| |
| \s* # more whitespace |
| |
| (/)?> # closing |
| |
| """, |
| re.I | re.S | re.X, |
| ) |
| |
| if match: |
| keyword, attr, isend = match.groups() |
| self.keyword = keyword |
| attributes = {} |
| if attr: |
| for att in re.findall( |
| r"\s*(\w+)\s*=\s*(?:'([^']*)'|\"([^\"]*)\")", attr |
| ): |
| key, val1, val2 = att |
| text = val1 or val2 |
| text = text.replace("\r\n", "\n") |
| attributes[key] = text |
| self.append_node(parsetree.Tag, keyword, attributes) |
| if isend: |
| self.tag.pop() |
| else: |
| if keyword == "text": |
| match = self.match(r"(.*?)(?=\</%text>)", re.S) |
| if not match: |
| raise exceptions.SyntaxException( |
| "Unclosed tag: <%%%s>" % self.tag[-1].keyword, |
| **self.exception_kwargs |
| ) |
| self.append_node(parsetree.Text, match.group(1)) |
| return self.match_tag_end() |
| return True |
| else: |
| return False |
| |
| def match_tag_end(self): |
| match = self.match(r"\</%[\t ]*(.+?)[\t ]*>") |
| if match: |
| if not len(self.tag): |
| raise exceptions.SyntaxException( |
| "Closing tag without opening tag: </%%%s>" |
| % match.group(1), |
| **self.exception_kwargs |
| ) |
| elif self.tag[-1].keyword != match.group(1): |
| raise exceptions.SyntaxException( |
| "Closing tag </%%%s> does not match tag: <%%%s>" |
| % (match.group(1), self.tag[-1].keyword), |
| **self.exception_kwargs |
| ) |
| self.tag.pop() |
| return True |
| else: |
| return False |
| |
| def match_end(self): |
| match = self.match(r"\Z", re.S) |
| if match: |
| string = match.group() |
| if string: |
| return string |
| else: |
| return True |
| else: |
| return False |
| |
| def match_text(self): |
| match = self.match( |
| r""" |
| (.*?) # anything, followed by: |
| ( |
| (?<=\n)(?=[ \t]*(?=%|\#\#)) # an eval or line-based |
| # comment preceded by a |
| # consumed newline and whitespace |
| | |
| (?=\${) # an expression |
| | |
| (?=</?[%&]) # a substitution or block or call start or end |
| # - don't consume |
| | |
| (\\\r?\n) # an escaped newline - throw away |
| | |
| \Z # end of string |
| )""", |
| re.X | re.S, |
| ) |
| |
| if match: |
| text = match.group(1) |
| if text: |
| self.append_node(parsetree.Text, text) |
| return True |
| else: |
| return False |
| |
| def match_python_block(self): |
| match = self.match(r"<%(!)?") |
| if match: |
| line, pos = self.matched_lineno, self.matched_charpos |
| text, end = self.parse_until_text(False, r"%>") |
| # the trailing newline helps |
| # compiler.parse() not complain about indentation |
| text = adjust_whitespace(text) + "\n" |
| self.append_node( |
| parsetree.Code, |
| text, |
| match.group(1) == "!", |
| lineno=line, |
| pos=pos, |
| ) |
| return True |
| else: |
| return False |
| |
| def match_expression(self): |
| match = self.match(r"\${") |
| if match: |
| line, pos = self.matched_lineno, self.matched_charpos |
| text, end = self.parse_until_text(True, r"\|", r"}") |
| if end == "|": |
| escapes, end = self.parse_until_text(True, r"}") |
| else: |
| escapes = "" |
| text = text.replace("\r\n", "\n") |
| self.append_node( |
| parsetree.Expression, |
| text, |
| escapes.strip(), |
| lineno=line, |
| pos=pos, |
| ) |
| return True |
| else: |
| return False |
| |
| def match_control_line(self): |
| match = self.match( |
| r"(?<=^)[\t ]*(%(?!%)|##)[\t ]*((?:(?:\\r?\n)|[^\r\n])*)" |
| r"(?:\r?\n|\Z)", |
| re.M, |
| ) |
| if match: |
| operator = match.group(1) |
| text = match.group(2) |
| if operator == "%": |
| m2 = re.match(r"(end)?(\w+)\s*(.*)", text) |
| if not m2: |
| raise exceptions.SyntaxException( |
| "Invalid control line: '%s'" % text, |
| **self.exception_kwargs |
| ) |
| isend, keyword = m2.group(1, 2) |
| isend = isend is not None |
| |
| if isend: |
| if not len(self.control_line): |
| raise exceptions.SyntaxException( |
| "No starting keyword '%s' for '%s'" |
| % (keyword, text), |
| **self.exception_kwargs |
| ) |
| elif self.control_line[-1].keyword != keyword: |
| raise exceptions.SyntaxException( |
| "Keyword '%s' doesn't match keyword '%s'" |
| % (text, self.control_line[-1].keyword), |
| **self.exception_kwargs |
| ) |
| self.append_node(parsetree.ControlLine, keyword, isend, text) |
| else: |
| self.append_node(parsetree.Comment, text) |
| return True |
| else: |
| return False |
| |
| def match_comment(self): |
| """matches the multiline version of a comment""" |
| match = self.match(r"<%doc>(.*?)</%doc>", re.S) |
| if match: |
| self.append_node(parsetree.Comment, match.group(1)) |
| return True |
| else: |
| return False |