| import re |
| from mako import compat |
| from mako import lexer |
| from mako import parsetree |
| |
| |
| class MessageExtractor(object): |
| |
| def process_file(self, fileobj): |
| template_node = lexer.Lexer( |
| fileobj.read(), |
| input_encoding=self.config['encoding']).parse() |
| for extracted in self.extract_nodes(template_node.get_children()): |
| yield extracted |
| |
| def extract_nodes(self, nodes): |
| translator_comments = [] |
| in_translator_comments = False |
| input_encoding = self.config['encoding'] or 'ascii' |
| comment_tags = list( |
| filter(None, re.split(r'\s+', self.config['comment-tags']))) |
| |
| for node in nodes: |
| child_nodes = None |
| if in_translator_comments and \ |
| isinstance(node, parsetree.Text) and \ |
| not node.content.strip(): |
| # Ignore whitespace within translator comments |
| continue |
| |
| if isinstance(node, parsetree.Comment): |
| value = node.text.strip() |
| if in_translator_comments: |
| translator_comments.extend( |
| self._split_comment(node.lineno, value)) |
| continue |
| for comment_tag in comment_tags: |
| if value.startswith(comment_tag): |
| in_translator_comments = True |
| translator_comments.extend( |
| self._split_comment(node.lineno, value)) |
| continue |
| |
| if isinstance(node, parsetree.DefTag): |
| code = node.function_decl.code |
| child_nodes = node.nodes |
| elif isinstance(node, parsetree.BlockTag): |
| code = node.body_decl.code |
| child_nodes = node.nodes |
| elif isinstance(node, parsetree.CallTag): |
| code = node.code.code |
| child_nodes = node.nodes |
| elif isinstance(node, parsetree.PageTag): |
| code = node.body_decl.code |
| elif isinstance(node, parsetree.CallNamespaceTag): |
| code = node.expression |
| child_nodes = node.nodes |
| elif isinstance(node, parsetree.ControlLine): |
| if node.isend: |
| in_translator_comments = False |
| continue |
| code = node.text |
| elif isinstance(node, parsetree.Code): |
| in_translator_comments = False |
| code = node.code.code |
| elif isinstance(node, parsetree.Expression): |
| code = node.code.code |
| else: |
| continue |
| |
| # Comments don't apply unless they immediately preceed the message |
| if translator_comments and \ |
| translator_comments[-1][0] < node.lineno - 1: |
| translator_comments = [] |
| |
| translator_strings = [ |
| comment[1] for comment in translator_comments] |
| |
| if isinstance(code, compat.text_type): |
| code = code.encode(input_encoding, 'backslashreplace') |
| |
| used_translator_comments = False |
| # We add extra newline to work around a pybabel bug |
| # (see python-babel/babel#274, parse_encoding dies if the first |
| # input string of the input is non-ascii) |
| # Also, because we added it, we have to subtract one from |
| # node.lineno |
| code = compat.byte_buffer(compat.b('\n') + code) |
| |
| for message in self.process_python( |
| code, node.lineno - 1, translator_strings): |
| yield message |
| used_translator_comments = True |
| |
| if used_translator_comments: |
| translator_comments = [] |
| in_translator_comments = False |
| |
| if child_nodes: |
| for extracted in self.extract_nodes(child_nodes): |
| yield extracted |
| |
| @staticmethod |
| def _split_comment(lineno, comment): |
| """Return the multiline comment at lineno split into a list of |
| comment line numbers and the accompanying comment line""" |
| return [(lineno + index, line) for index, line in |
| enumerate(comment.splitlines())] |