Merge branch 'lingua-extractor' of https://bitbucket.org/wichert/mako
diff --git a/.gitignore b/.gitignore
old mode 100755
new mode 100644
index c5771df..d518bf8
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,10 @@
 *.pyc
 *.orig
 *.egg-info
+*.sw[opq]
+
+/.Python
+/bin
+/include
+/lib
+/man
diff --git a/mako/ext/babelplugin.py b/mako/ext/babelplugin.py
index 2ff0781..699d8e0 100644
--- a/mako/ext/babelplugin.py
+++ b/mako/ext/babelplugin.py
@@ -6,9 +6,31 @@
 
 """gettext message extraction via Babel: http://babel.edgewall.org/"""
 from babel.messages.extract import extract_python
-from mako.compat import StringIO
-from mako import compat
-from mako import lexer, parsetree
+from mako.ext.extract import MessageExtractor
+
+
+class BabelMakoExtractor(MessageExtractor):
+    def __init__(self, keywords, comment_tags, options):
+        self.keywords = keywords
+        self.options = options
+        self.config = {
+                'comment-tags': u' '.join(comment_tags),
+                'encoding': options.get('input_encoding',
+                    options.get('encoding', None)),
+            }
+        super(BabelMakoExtractor, self).__init__()
+
+    def __call__(self, fileobj):
+        return self.process_file(fileobj)
+
+    def process_python(self, code, code_lineno, translator_strings):
+        comment_tags = self.config['comment-tags']
+        for lineno, funcname, messages, python_translator_comments \
+                in extract_python(code,
+                        self.keywords, comment_tags, self.options):
+            yield (code_lineno + (lineno - 1), funcname, messages,
+                   translator_strings + python_translator_comments)
+
 
 def extract(fileobj, keywords, comment_tags, options):
     """Extract messages from Mako templates.
@@ -22,105 +44,6 @@
     :return: an iterator over ``(lineno, funcname, message, comments)`` tuples
     :rtype: ``iterator``
     """
-    encoding = options.get('input_encoding', options.get('encoding', None))
-
-    template_node = lexer.Lexer(fileobj.read(),
-                                input_encoding=encoding).parse()
-    for extracted in extract_nodes(template_node.get_children(),
-                                   keywords, comment_tags, options):
-        yield extracted
-
-def extract_nodes(nodes, keywords, comment_tags, options):
-    """Extract messages from Mako's lexer node objects
-
-    :param nodes: an iterable of Mako parsetree.Node objects to extract from
-    :param keywords: a list of keywords (i.e. function names) that should be
-                     recognized as translation functions
-    :param comment_tags: a list of translator tags to search for and include
-                         in the results
-    :param options: a dictionary of additional options (optional)
-    :return: an iterator over ``(lineno, funcname, message, comments)`` tuples
-    :rtype: ``iterator``
-    """
-    translator_comments = []
-    in_translator_comments = False
-
-    for node in nodes:
-        child_nodes = None
-        if in_translator_comments and isinstance(node, parsetree.Text) and \
-                not node.content.strip():
-            # Ignore whitespace within translator comments
-            continue
-
-        if isinstance(node, parsetree.Comment):
-            value = node.text.strip()
-            if in_translator_comments:
-                translator_comments.extend(_split_comment(node.lineno, value))
-                continue
-            for comment_tag in comment_tags:
-                if value.startswith(comment_tag):
-                    in_translator_comments = True
-                    translator_comments.extend(_split_comment(node.lineno,
-                                                              value))
-            continue
-
-        if isinstance(node, parsetree.DefTag):
-            code = node.function_decl.code
-            child_nodes = node.nodes
-        elif isinstance(node, parsetree.BlockTag):
-            code = node.body_decl.code
-            child_nodes = node.nodes
-        elif isinstance(node, parsetree.CallTag):
-            code = node.code.code
-            child_nodes = node.nodes
-        elif isinstance(node, parsetree.PageTag):
-            code = node.body_decl.code
-        elif isinstance(node, parsetree.CallNamespaceTag):
-            code = node.expression
-            child_nodes = node.nodes
-        elif isinstance(node, parsetree.ControlLine):
-            if node.isend:
-                in_translator_comments = False
-                continue
-            code = node.text
-        elif isinstance(node, parsetree.Code):
-            in_translator_comments = False
-            code = node.code.code
-        elif isinstance(node, parsetree.Expression):
-            code = node.code.code
-        else:
-            continue
-
-        # Comments don't apply unless they immediately preceed the message
-        if translator_comments and \
-                translator_comments[-1][0] < node.lineno - 1:
-            translator_comments = []
-
-        translator_strings = [comment[1] for comment in translator_comments]
-
-        if isinstance(code, compat.text_type):
-            code = code.encode('ascii', 'backslashreplace')
-
-        used_translator_comments = False
-        code = compat.byte_buffer(code)
-        for lineno, funcname, messages, python_translator_comments \
-                in extract_python(code, keywords, comment_tags, options):
-            yield (node.lineno + (lineno - 1), funcname, messages,
-                   translator_strings + python_translator_comments)
-            used_translator_comments = True
-
-        if used_translator_comments:
-            translator_comments = []
-        in_translator_comments = False
-
-        if child_nodes:
-            for extracted in extract_nodes(child_nodes, keywords, comment_tags,
-                                           options):
-                yield extracted
-
-
-def _split_comment(lineno, comment):
-    """Return the multiline comment at lineno split into a list of comment line
-    numbers and the accompanying comment line"""
-    return [(lineno + index, line) for index, line in
-            enumerate(comment.splitlines())]
+    extractor = BabelMakoExtractor(keywords, comment_tags, options)
+    for message in extractor(fileobj):
+        yield message
diff --git a/mako/ext/extract.py b/mako/ext/extract.py
new file mode 100644
index 0000000..0d9c60c
--- /dev/null
+++ b/mako/ext/extract.py
@@ -0,0 +1,94 @@
+import re
+from mako import compat
+from mako import lexer
+from mako import parsetree
+
+
+class MessageExtractor(object):
+    def process_file(self, fileobj):
+        template_node = lexer.Lexer(fileobj.read(),
+                                    input_encoding=self.config['encoding']).parse()
+        for extracted in self.extract_nodes(template_node.get_children()):
+            yield extracted
+
+    def extract_nodes(self, nodes):
+        translator_comments = []
+        in_translator_comments = False
+        comment_tags = filter(None, re.split(r'\s+', self.config['comment-tags']))
+
+        for node in nodes:
+            child_nodes = None
+            if in_translator_comments and isinstance(node, parsetree.Text) and \
+                    not node.content.strip():
+                # Ignore whitespace within translator comments
+                continue
+
+            if isinstance(node, parsetree.Comment):
+                value = node.text.strip()
+                if in_translator_comments:
+                    translator_comments.extend(self._split_comment(node.lineno, value))
+                    continue
+                for comment_tag in comment_tags:
+                    if value.startswith(comment_tag):
+                        in_translator_comments = True
+                        translator_comments.extend(self._split_comment(node.lineno,
+                                                                  value))
+                continue
+
+            if isinstance(node, parsetree.DefTag):
+                code = node.function_decl.code
+                child_nodes = node.nodes
+            elif isinstance(node, parsetree.BlockTag):
+                code = node.body_decl.code
+                child_nodes = node.nodes
+            elif isinstance(node, parsetree.CallTag):
+                code = node.code.code
+                child_nodes = node.nodes
+            elif isinstance(node, parsetree.PageTag):
+                code = node.body_decl.code
+            elif isinstance(node, parsetree.CallNamespaceTag):
+                code = node.expression
+                child_nodes = node.nodes
+            elif isinstance(node, parsetree.ControlLine):
+                if node.isend:
+                    in_translator_comments = False
+                    continue
+                code = node.text
+            elif isinstance(node, parsetree.Code):
+                in_translator_comments = False
+                code = node.code.code
+            elif isinstance(node, parsetree.Expression):
+                code = node.code.code
+            else:
+                continue
+
+            # Comments don't apply unless they immediately preceed the message
+            if translator_comments and \
+                    translator_comments[-1][0] < node.lineno - 1:
+                translator_comments = []
+
+            translator_strings = [comment[1] for comment in translator_comments]
+
+            if isinstance(code, compat.text_type):
+                code = code.encode('ascii', 'backslashreplace')
+
+            used_translator_comments = False
+            code = compat.byte_buffer(code)
+            for message in self.process_python(code, node.lineno, translator_strings):
+                yield message
+                used_translator_comments = True
+
+            if used_translator_comments:
+                translator_comments = []
+            in_translator_comments = False
+
+            if child_nodes:
+                for extracted in self.extract_nodes(child_nodes):
+                    yield extracted
+
+    @staticmethod
+    def _split_comment(lineno, comment):
+        """Return the multiline comment at lineno split into a list of comment line
+        numbers and the accompanying comment line"""
+        return [(lineno + index, line) for index, line in
+                enumerate(comment.splitlines())]
diff --git a/mako/ext/linguaplugin.py b/mako/ext/linguaplugin.py
new file mode 100644
index 0000000..2f970d9
--- /dev/null
+++ b/mako/ext/linguaplugin.py
@@ -0,0 +1,35 @@
+import io
+from lingua.extractors import Extractor
+from lingua.extractors import Message
+from lingua.extractors import get_extractor
+from mako.ext.extract import MessageExtractor
+
+
+class LinguaMakoExtractor(Extractor, MessageExtractor):
+    '''Mako templates'''
+    extensions = ['.mako']
+    default_config = {
+            'encoding': 'utf-8',
+            'comment-tags': '',
+            }
+
+    def __call__(self, filename, options, fileobj=None):
+        self.options = options
+        self.filename = filename
+        self.python_extractor = get_extractor('x.py')
+        if fileobj is None:
+            fileobj = open(filename, 'rb')
+        return self.process_file(fileobj)
+
+    def process_python(self, code, code_lineno, translator_strings):
+        source = code.getvalue().strip()
+        if source.endswith(':'):
+            source += ' pass'
+            code = io.BytesIO(source)
+        for msg in self.python_extractor(self.filename, self.options, code, code_lineno):
+            if translator_strings:
+                msg = Message(msg.msgctxt, msg.msgid, msg.msgid_plural,
+                              msg.flags,
+                              u' '.join(translator_strings + [msg.comment]),
+                              msg.tcomment, msg.location)
+            yield msg
diff --git a/setup.py b/setup.py
index 0094901..60ce08e 100644
--- a/setup.py
+++ b/setup.py
@@ -47,7 +47,7 @@
       url='http://www.makotemplates.org/',
       license='MIT',
       packages=find_packages('.', exclude=['examples*', 'test*']),
-      tests_require=['nose >= 0.11', 'mock'],
+      tests_require=['nose >= 0.11', 'mock', 'Babel', 'lingua >= 3.2'],
       test_suite="nose.collector",
       zip_safe=False,
       install_requires=install_requires,
@@ -66,6 +66,9 @@
       [babel.extractors]
       mako = mako.ext.babelplugin:extract
 
+      [lingua.extractors]
+      mako = mako.ext.linguaplugin:LinguaMakoExtractor
+
       [console_scripts]
       mako-render = mako.cmd:cmdline
       """
diff --git a/test/ext/__init__.py b/test/ext/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/ext/__init__.py
diff --git a/test/test_babelplugin.py b/test/ext/test_babelplugin.py
similarity index 63%
rename from test/test_babelplugin.py
rename to test/ext/test_babelplugin.py
index 023433d..4f17ad9 100644
--- a/test/test_babelplugin.py
+++ b/test/ext/test_babelplugin.py
@@ -1,20 +1,34 @@
-
-from test import TemplateTest, template_base, skip_if
-
-try:
-    import babel
-except:
-    babel = None
-
-if babel is not None:
-    from mako.ext.babelplugin import extract
-
+import io
 import os
+import unittest
+from mako.ext.babelplugin import extract
+from .. import TemplateTest, template_base
+
+
+class Test_extract(unittest.TestCase):
+    def test_parse_python_expression(self):
+        input = io.BytesIO(b'<p>${_("Message")}</p>')
+        messages = list(extract(input, ['_'], [], {}))
+        self.assertEqual(messages, [(1, '_', u'Message', [])])
+
+    def test_python_gettext_call(self):
+        input = io.BytesIO(b'<p>${_("Message")}</p>')
+        messages = list(extract(input, ['_'], [], {}))
+        self.assertEqual(messages, [(1, '_', u'Message', [])])
+
+    def test_translator_comment(self):
+        input = io.BytesIO(b'''
+        <p>
+          ## TRANSLATORS: This is a comment.
+          ${_("Message")}
+        </p>''')
+        messages = list(extract(input, ['_'], ['TRANSLATORS:'], {}))
+        self.assertEqual(
+                messages,
+                [(4, '_', u'Message', [u'TRANSLATORS: This is a comment.'])])
 
 
 class ExtractMakoTestCase(TemplateTest):
-
-    @skip_if(lambda: not babel, 'babel not installed: skipping babelplugin test')
     def test_extract(self):
         mako_tmpl = open(os.path.join(template_base, 'gettext.mako'))
         messages = list(extract(mako_tmpl, {'_': None, 'gettext': None,
@@ -46,4 +60,3 @@
              (99, '_', 'No action at a distance.', []),
              ]
         self.assertEqual(expected, messages)
-
diff --git a/test/ext/test_linguaplugin.py b/test/ext/test_linguaplugin.py
new file mode 100644
index 0000000..55b3ba1
--- /dev/null
+++ b/test/ext/test_linguaplugin.py
@@ -0,0 +1,43 @@
+import os
+from mako.ext.linguaplugin import LinguaMakoExtractor
+from lingua.extractors import register_extractors
+from .. import TemplateTest, template_base
+
+
+class MockOptions:
+    keywords = []
+    domain = None
+
+
+class ExtractMakoTestCase(TemplateTest):
+    def test_extract(self):
+        register_extractors()
+        plugin = LinguaMakoExtractor({'comment-tags': 'TRANSLATOR'})
+        messages = list(plugin(os.path.join(template_base, 'gettext.mako'), MockOptions()))
+        msgids = [(m.msgid, m.msgid_plural) for m in messages]
+        self.assertEqual(
+                msgids,
+                [
+                    ('Page arg 1', None),
+                    ('Page arg 2', None),
+                    ('Begin', None),
+                    ('Hi there!', None),
+                    ('Hello', None),
+                    ('Welcome', None),
+                    ('Yo', None),
+                    ('The', None),
+                    ('bunny', 'bunnies'),
+                    ('Goodbye', None),
+                    ('Babel', None),
+                    ('hella', 'hellas'),
+                    ('The', None),
+                    ('bunny', 'bunnies'),
+                    ('Goodbye, really!', None),
+                    ('P.S. byebye', None),
+                    ('Top', None),
+                    (u'foo', None),
+                    ('hoho', None),
+                    (u'bar', None),
+                    ('Inside a p tag', None),
+                    ('Later in a p tag', None),
+                    ('No action at a distance.', None)])