Fixes, Python 3 support.

commit: 963c6c849041c71783b3454f8aa06ae901e22d1d [log] [tgz]
author: Martin Vejnár <avakar@ratatanek.cz> Sun Mar 15 15:27:43 2015 +0100
committer: Martin Vejnár <avakar@ratatanek.cz> Sun Mar 15 15:27:43 2015 +0100
tree: 85a038bb00ada536219b937d05a8d1c58d03fd4c
parent: 14058d9948b08337113767ba5cf4e88e148df805 [diff]
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..c445023
--- /dev/null
+++ b/README.md

@@ -0,0 +1,30 @@
+# pytoml
+
+This project aims at being a specs-conforming and strict parser for [TOML][1] files.
+The parser currently supports [version 0.4.0][2] of the specs.
+
+The project supports Python 2.7 and 3.4+.
+
+Install:
+
+    easy_install pytoml
+
+The interface is the same as for the standard `json` package.
+
+    >>> import pytoml as toml
+    >>> toml.loads('a = 1')
+    {'a': 1}
+    >>> with open('file.toml', 'rb') as fin:
+    ...     toml.load(fin)
+    {'a': 1}
+
+The `loads` function accepts either a bytes object
+(that gets decoded as UTF-8 with no BOM allowed),
+or a unicode object.
+
+## Installation
+
+    easy_install pytoml
+
+  [1]: https://github.com/toml-lang/toml
+  [2]: https://github.com/toml-lang/toml/blob/master/versions/en/toml-v0.4.0.md

diff --git a/pytoml/__init__.py b/pytoml/__init__.py
new file mode 100644
index 0000000..e20c2dd
--- /dev/null
+++ b/pytoml/__init__.py

@@ -0,0 +1 @@
+from .parser import TomlError, load, loads

diff --git a/toml.py b/pytoml/parser.py
similarity index 86%
rename from toml.py
rename to pytoml/parser.py
index bae4b3e..005aede 100644
--- a/toml.py
+++ b/pytoml/parser.py

@@ -1,22 +1,27 @@
-import string, re
+import string, re, sys
 
 class TomlError(RuntimeError):
-    def __init__(self, kind, line, col):
-        self.kind = kind
+    def __init__(self, message, line, col, filename):
+        RuntimeError.__init__(self, message, line, col, filename)
+        self.message = message
         self.line = line
         self.col = col
-        RuntimeError.__init__(self, kind)
+        self.filename = filename
 
     def __str__(self):
-        return '%s(%d, %d)' % (self.kind, self.line, self.col)
+        return '{}({}, {}): {}'.format(self.filename, self.line, self.col, self.message)
+
+    def __repr__(self):
+        return 'TomlError({!r}, {!r}, {!r}, {!r})'.format(self.message, self.line, self.col, self.filename)
 
 class _CharSource:
-    def __init__(self, s):
+    def __init__(self, s, filename):
         self._s = s
         self._index = 0
         self._mark = 0
         self._line = 1
         self._col = 1
+        self._filename = filename
         self._update_cur()
 
     def __bool__(self):
@@ -56,8 +61,8 @@
             text = tok
         return type, text, pos
 
-    def error(self, kind):
-        raise TomlError(kind, self._line, self._col)
+    def error(self, message):
+        raise TomlError(message, self._line, self._col, self._filename)
 
     def _update_cur(self):
         self.tail = self._s[self._index:]
@@ -66,8 +71,13 @@
         else:
             self.cur = None
 
-def lex(s):
-    src = _CharSource(s.replace('\r\n', '\n'))
+if sys.version_info[0] == 2:
+    _chr = unichr
+else:
+    _chr = chr
+
+def _lex(s, filename):
+    src = _CharSource(s.replace('\r\n', '\n'), filename)
     def is_id(ch):
         return ch is not None and (ch.isalnum() or ch in '-_')
 
@@ -79,12 +89,12 @@
         if src.cur == 'u':
             if len(src) < 5 or any(ch not in string.hexdigits for ch in src[1:5]):
                 src.error('invalid_escape_sequence')
-            res = unichr(int(src[1:5], 16))
+            res = _chr(int(src[1:5], 16))
             src.next(5)
         elif src.cur == 'U':
             if len(src) < 9 or any(ch not in string.hexdigits for ch in src[1:9]):
                 src.error('invalid_escape_sequence')
-            res = unichr(int(src[1:9], 16))
+            res = _chr(int(src[1:9], 16))
             src.next(9)
         elif src.cur == '\n':
             while src and src.cur in ' \n\t':
@@ -222,8 +232,9 @@
     yield src.commit('eof', '')
 
 class _TokSource:
-    def __init__(self, s):
-        self._lex = iter(lex(s))
+    def __init__(self, s, filename):
+        self._filename = filename
+        self._lex = iter(_lex(s, filename))
         self.pos = None
         self.next()
 
@@ -246,8 +257,12 @@
         while self.consume('\n'):
             pass
 
-    def error(self, kind):
-        raise TomlError(kind, self.pos[0][0], self.pos[0][1])
+    def expect(self, kind, error_text):
+        if not self.consume(kind):
+            self.error(error_text)
+
+    def error(self, message):
+        raise TomlError(message, self.pos[0][0], self.pos[0][1], self._filename)
 
 def _translate_literal(type, text):
     if type == 'bool':
@@ -262,13 +277,15 @@
         return text
 
 def load(fin, translate_literal=_translate_literal, translate_array=id):
-    return loads(fin.read(), translate_literal=translate_literal, translate_array=translate_array)
+    return loads(fin.read(),
+        translate_literal=translate_literal, translate_array=translate_array,
+        filename=fin.name)
 
-def loads(s, translate_literal=_translate_literal, translate_array=id):
-    if isinstance(s, str):
+def loads(s, translate_literal=_translate_literal, translate_array=id, filename='<string>'):
+    if isinstance(s, bytes):
         s = s.decode('utf-8')
 
-    toks = _TokSource(s)
+    toks = _TokSource(s, filename)
 
     def read_value():
         while True:
@@ -302,8 +319,7 @@
                         res.append(val)
                         toks.consume_nls()
                     else:
-                        if not toks.consume(']'):
-                            toks.error('expected_right_brace')
+                        toks.expect(']', 'expected_right_brace')
                 return 'array', translate_array(res)
             elif toks.consume('{'):
                 res = {}
@@ -312,14 +328,12 @@
                     toks.next()
                     if k in res:
                         toks.error('duplicate_key')
-                    if not toks.consume('='):
-                        toks.error('expected_equals')
+                    toks.expect('=', 'expected_equals')
                     type, v = read_value()
                     res[k] = v
                     if not toks.consume(','):
                         break
-                if not toks.consume('}'):
-                    toks.error('expected_closing_brace')
+                toks.expect('}', 'expected_closing_brace')
                 return 'table', res
             else:
                 toks.error('unexpected_token')
@@ -332,12 +346,12 @@
         if toks.tok in ('id', 'str'):
             k = toks.text
             toks.next()
-            if not toks.consume('='):
-                toks.error('expected_equals')
+            toks.expect('=', 'expected_equals')
             type, v = read_value()
             if k in scope:
                 toks.error('duplicate_keys')
             scope[k] = v
+            toks.expect('\n', 'expected_eol')
         elif toks.consume('\n'):
             pass
         elif toks.consume('['):
@@ -355,8 +369,7 @@
                 toks.next()
             if not toks.consume(']') or (is_table_array and not toks.consume_adjacent(']')):
                 toks.error('malformed_table_name')
-            if not toks.consume('\n'):
-                toks.error('garbage_after_table_name')
+            toks.expect('\n', 'expected_eol')
 
             cur = tables
             for name in path[:-1]:
@@ -389,9 +402,10 @@
     def merge_tables(scope, tables):
         if scope is None:
             scope = {}
-        for k, v in tables.iteritems():
+        for k in tables:
             if k in scope:
                 toks.error('key_table_conflict')
+            v = tables[k]
             if isinstance(v, list):
                 scope[k] = [merge_tables(sc, tbl) for sc, tbl in v]
             else:
@@ -399,8 +413,3 @@
         return scope
 
     return merge_tables(root, tables)
-
-if __name__ == '__main__':
-    import sys, json
-    t = sys.stdin.read()
-    print json.dumps(loads(t), indent=4)

diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..30404df
--- /dev/null
+++ b/setup.py

@@ -0,0 +1,17 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+from setuptools import setup
+
+setup(
+    name='pytoml',
+    version='0.1.0',
+
+    description='A parser for TOML-0.4.0',
+    author='Martin Vejnár',
+    author_email='avakar@ratatanek.cz',
+    url='https://github.com/avakar/pytoml',
+    license='MIT',
+
+    packages=['pytoml'],
+    )

diff --git a/test/invalid/text-after-table2.toml b/test/invalid/key-after-table.toml
similarity index 100%
rename from test/invalid/text-after-table2.toml
rename to test/invalid/key-after-table.toml


diff --git a/test/invalid/key-no-eol.toml b/test/invalid/key-no-eol.toml
new file mode 100644
index 0000000..3c58eee
--- /dev/null
+++ b/test/invalid/key-no-eol.toml

@@ -0,0 +1 @@
+a = 1 b = 2

diff --git a/test.py b/test/test.py
similarity index 76%
rename from test.py
rename to test/test.py
index 2b12baa..79f4756 100644
--- a/test.py
+++ b/test/test.py

@@ -1,4 +1,5 @@
-import toml, os, json, sys
+import os, json, sys, io
+import pytoml as toml
 
 def _testbench_literal(type, text):
     _type_table = {'str': 'string', 'int': 'integer'}
@@ -11,7 +12,7 @@
     succeeded = []
     failed = []
 
-    for top, dirnames, fnames in os.walk('test'):
+    for top, dirnames, fnames in os.walk('.'):
         for fname in fnames:
             if not fname.endswith('.toml'):
                 continue
@@ -23,7 +24,7 @@
                 parsed = None
 
             try:
-                with open(os.path.join(top, fname[:-5] + '.json'), 'rb') as fin:
+                with io.open(os.path.join(top, fname[:-5] + '.json'), 'rt', encoding='utf-8') as fin:
                     bench = json.load(fin)
             except IOError:
                 bench = None
@@ -34,8 +35,8 @@
                 succeeded.append(fname)
 
     for f in failed:
-        print 'failed: {f}'.format(f=f)
-    print 'succeeded: {succ}'.format(succ=len(succeeded))
+        print('failed: {}'.format(f))
+    print('succeeded: {}'.format(len(succeeded)))
     return 1 if failed else 0
 
 if __name__ == '__main__':
commit	963c6c849041c71783b3454f8aa06ae901e22d1d	[log] [tgz]
author	Martin Vejnár <avakar@ratatanek.cz>	Sun Mar 15 15:27:43 2015 +0100
committer	Martin Vejnár <avakar@ratatanek.cz>	Sun Mar 15 15:27:43 2015 +0100
tree	85a038bb00ada536219b937d05a8d1c58d03fd4c
parent	14058d9948b08337113767ba5cf4e88e148df805 [diff]