mako/ext/extract.py - third_party/mako - Git at Google

 import re
 from mako import compat
 from mako import lexer
 from mako import parsetree


 class MessageExtractor(object):

     def process_file(self, fileobj):
         template_node = lexer.Lexer(
             fileobj.read(),
             input_encoding=self.config['encoding']).parse()
         for extracted in self.extract_nodes(template_node.get_children()):
             yield extracted

     def extract_nodes(self, nodes):
         translator_comments = []
         in_translator_comments = False
         input_encoding = self.config['encoding'] or 'ascii'
         comment_tags = list(
             filter(None, re.split(r'\s+', self.config['comment-tags'])))

         for node in nodes:
             child_nodes = None
             if in_translator_comments and \
                     isinstance(node, parsetree.Text) and \
                     not node.content.strip():
                 # Ignore whitespace within translator comments
                 continue

             if isinstance(node, parsetree.Comment):
                 value = node.text.strip()
                 if in_translator_comments:
                     translator_comments.extend(
                         self._split_comment(node.lineno, value))
                     continue
                 for comment_tag in comment_tags:
                     if value.startswith(comment_tag):
                         in_translator_comments = True
                         translator_comments.extend(
                             self._split_comment(node.lineno, value))
                 continue

             if isinstance(node, parsetree.DefTag):
                 code = node.function_decl.code
                 child_nodes = node.nodes
             elif isinstance(node, parsetree.BlockTag):
                 code = node.body_decl.code
                 child_nodes = node.nodes
             elif isinstance(node, parsetree.CallTag):
                 code = node.code.code
                 child_nodes = node.nodes
             elif isinstance(node, parsetree.PageTag):
                 code = node.body_decl.code
             elif isinstance(node, parsetree.CallNamespaceTag):
                 code = node.expression
                 child_nodes = node.nodes
             elif isinstance(node, parsetree.ControlLine):
                 if node.isend:
                     in_translator_comments = False
                     continue
                 code = node.text
             elif isinstance(node, parsetree.Code):
                 in_translator_comments = False
                 code = node.code.code
             elif isinstance(node, parsetree.Expression):
                 code = node.code.code
             else:
                 continue

             # Comments don't apply unless they immediately preceed the message
             if translator_comments and \
                     translator_comments[-1][0] < node.lineno - 1:
                 translator_comments = []

             translator_strings = [
                 comment[1] for comment in translator_comments]

             if isinstance(code, compat.text_type):
                 code = code.encode(input_encoding, 'backslashreplace')

             used_translator_comments = False
             # We add extra newline to work around a pybabel bug
             # (see python-babel/babel#274, parse_encoding dies if the first
             # input string of the input is non-ascii)
             # Also, because we added it, we have to subtract one from
             # node.lineno
             code = compat.byte_buffer(compat.b('\n') + code)

             for message in self.process_python(
                     code, node.lineno - 1, translator_strings):
                 yield message
                 used_translator_comments = True

             if used_translator_comments:
                 translator_comments = []
             in_translator_comments = False

             if child_nodes:
                 for extracted in self.extract_nodes(child_nodes):
                     yield extracted

     @staticmethod
     def _split_comment(lineno, comment):
         """Return the multiline comment at lineno split into a list of
         comment line numbers and the accompanying comment line"""
         return [(lineno + index, line) for index, line in
                 enumerate(comment.splitlines())]
	import re
	from mako import compat
	from mako import lexer
	from mako import parsetree


	class MessageExtractor(object):

	def process_file(self, fileobj):
	template_node = lexer.Lexer(
	fileobj.read(),
	input_encoding=self.config['encoding']).parse()
	for extracted in self.extract_nodes(template_node.get_children()):
	yield extracted

	def extract_nodes(self, nodes):
	translator_comments = []
	in_translator_comments = False
	input_encoding = self.config['encoding'] or 'ascii'
	comment_tags = list(
	filter(None, re.split(r'\s+', self.config['comment-tags'])))

	for node in nodes:
	child_nodes = None
	if in_translator_comments and \
	isinstance(node, parsetree.Text) and \
	not node.content.strip():
	# Ignore whitespace within translator comments
	continue

	if isinstance(node, parsetree.Comment):
	value = node.text.strip()
	if in_translator_comments:
	translator_comments.extend(
	self._split_comment(node.lineno, value))
	continue
	for comment_tag in comment_tags:
	if value.startswith(comment_tag):
	in_translator_comments = True
	translator_comments.extend(
	self._split_comment(node.lineno, value))
	continue

	if isinstance(node, parsetree.DefTag):
	code = node.function_decl.code
	child_nodes = node.nodes
	elif isinstance(node, parsetree.BlockTag):
	code = node.body_decl.code
	child_nodes = node.nodes
	elif isinstance(node, parsetree.CallTag):
	code = node.code.code
	child_nodes = node.nodes
	elif isinstance(node, parsetree.PageTag):
	code = node.body_decl.code
	elif isinstance(node, parsetree.CallNamespaceTag):
	code = node.expression
	child_nodes = node.nodes
	elif isinstance(node, parsetree.ControlLine):
	if node.isend:
	in_translator_comments = False
	continue
	code = node.text
	elif isinstance(node, parsetree.Code):
	in_translator_comments = False
	code = node.code.code
	elif isinstance(node, parsetree.Expression):
	code = node.code.code
	else:
	continue

	# Comments don't apply unless they immediately preceed the message
	if translator_comments and \
	translator_comments[-1][0] < node.lineno - 1:
	translator_comments = []

	translator_strings = [
	comment[1] for comment in translator_comments]

	if isinstance(code, compat.text_type):
	code = code.encode(input_encoding, 'backslashreplace')

	used_translator_comments = False
	# We add extra newline to work around a pybabel bug
	# (see python-babel/babel#274, parse_encoding dies if the first
	# input string of the input is non-ascii)
	# Also, because we added it, we have to subtract one from
	# node.lineno
	code = compat.byte_buffer(compat.b('\n') + code)

	for message in self.process_python(
	code, node.lineno - 1, translator_strings):
	yield message
	used_translator_comments = True

	if used_translator_comments:
	translator_comments = []
	in_translator_comments = False

	if child_nodes:
	for extracted in self.extract_nodes(child_nodes):
	yield extracted

	@staticmethod
	def _split_comment(lineno, comment):
	"""Return the multiline comment at lineno split into a list of
	comment line numbers and the accompanying comment line"""
	return [(lineno + index, line) for index, line in
	enumerate(comment.splitlines())]