| """Implementation of JSONDecoder | |
| """ | |
| import re | |
| import sys | |
| import struct | |
| from json import scanner | |
| try: | |
| from _json import scanstring as c_scanstring | |
| except ImportError: | |
| c_scanstring = None | |
| __all__ = ['JSONDecoder'] | |
| FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL | |
| def _floatconstants(): | |
| _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') | |
| if sys.byteorder != 'big': | |
| _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] | |
| nan, inf = struct.unpack('dd', _BYTES) | |
| return nan, inf, -inf | |
| NaN, PosInf, NegInf = _floatconstants() | |
| def linecol(doc, pos): | |
| lineno = doc.count('\n', 0, pos) + 1 | |
| if lineno == 1: | |
| colno = pos | |
| else: | |
| colno = pos - doc.rindex('\n', 0, pos) | |
| return lineno, colno | |
| def errmsg(msg, doc, pos, end=None): | |
| # Note that this function is called from _json | |
| lineno, colno = linecol(doc, pos) | |
| if end is None: | |
| fmt = '{0}: line {1} column {2} (char {3})' | |
| return fmt.format(msg, lineno, colno, pos) | |
| #fmt = '%s: line %d column %d (char %d)' | |
| #return fmt % (msg, lineno, colno, pos) | |
| endlineno, endcolno = linecol(doc, end) | |
| fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' | |
| return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) | |
| #fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' | |
| #return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end) | |
| _CONSTANTS = { | |
| '-Infinity': NegInf, | |
| 'Infinity': PosInf, | |
| 'NaN': NaN, | |
| } | |
| STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) | |
| BACKSLASH = { | |
| '"': u'"', '\\': u'\\', '/': u'/', | |
| 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', | |
| } | |
| DEFAULT_ENCODING = "utf-8" | |
| def py_scanstring(s, end, encoding=None, strict=True, | |
| _b=BACKSLASH, _m=STRINGCHUNK.match): | |
| """Scan the string s for a JSON string. End is the index of the | |
| character in s after the quote that started the JSON string. | |
| Unescapes all valid JSON string escape sequences and raises ValueError | |
| on attempt to decode an invalid string. If strict is False then literal | |
| control characters are allowed in the string. | |
| Returns a tuple of the decoded string and the index of the character in s | |
| after the end quote.""" | |
| if encoding is None: | |
| encoding = DEFAULT_ENCODING | |
| chunks = [] | |
| _append = chunks.append | |
| begin = end - 1 | |
| while 1: | |
| chunk = _m(s, end) | |
| if chunk is None: | |
| raise ValueError( | |
| errmsg("Unterminated string starting at", s, begin)) | |
| end = chunk.end() | |
| content, terminator = chunk.groups() | |
| # Content is contains zero or more unescaped string characters | |
| if content: | |
| if not isinstance(content, unicode): | |
| content = unicode(content, encoding) | |
| _append(content) | |
| # Terminator is the end of string, a literal control character, | |
| # or a backslash denoting that an escape sequence follows | |
| if terminator == '"': | |
| break | |
| elif terminator != '\\': | |
| if strict: | |
| #msg = "Invalid control character %r at" % (terminator,) | |
| msg = "Invalid control character {0!r} at".format(terminator) | |
| raise ValueError(errmsg(msg, s, end)) | |
| else: | |
| _append(terminator) | |
| continue | |
| try: | |
| esc = s[end] | |
| except IndexError: | |
| raise ValueError( | |
| errmsg("Unterminated string starting at", s, begin)) | |
| # If not a unicode escape sequence, must be in the lookup table | |
| if esc != 'u': | |
| try: | |
| char = _b[esc] | |
| except KeyError: | |
| msg = "Invalid \\escape: " + repr(esc) | |
| raise ValueError(errmsg(msg, s, end)) | |
| end += 1 | |
| else: | |
| # Unicode escape sequence | |
| esc = s[end + 1:end + 5] | |
| next_end = end + 5 | |
| if len(esc) != 4: | |
| msg = "Invalid \\uXXXX escape" | |
| raise ValueError(errmsg(msg, s, end)) | |
| uni = int(esc, 16) | |
| # Check for surrogate pair on UCS-4 systems | |
| if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: | |
| msg = "Invalid \\uXXXX\\uXXXX surrogate pair" | |
| if not s[end + 5:end + 7] == '\\u': | |
| raise ValueError(errmsg(msg, s, end)) | |
| esc2 = s[end + 7:end + 11] | |
| if len(esc2) != 4: | |
| raise ValueError(errmsg(msg, s, end)) | |
| uni2 = int(esc2, 16) | |
| uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) | |
| next_end += 6 | |
| char = unichr(uni) | |
| end = next_end | |
| # Append the unescaped character | |
| _append(char) | |
| return u''.join(chunks), end | |
| # Use speedup if available | |
| scanstring = c_scanstring or py_scanstring | |
| WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) | |
| WHITESPACE_STR = ' \t\n\r' | |
| def JSONObject(s_and_end, encoding, strict, scan_once, object_hook, | |
| object_pairs_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR): | |
| s, end = s_and_end | |
| pairs = [] | |
| pairs_append = pairs.append | |
| # Use a slice to prevent IndexError from being raised, the following | |
| # check will raise a more specific ValueError if the string is empty | |
| nextchar = s[end:end + 1] | |
| # Normally we expect nextchar == '"' | |
| if nextchar != '"': | |
| if nextchar in _ws: | |
| end = _w(s, end).end() | |
| nextchar = s[end:end + 1] | |
| # Trivial empty object | |
| if nextchar == '}': | |
| if object_pairs_hook is not None: | |
| result = object_pairs_hook(pairs) | |
| return result, end | |
| pairs = {} | |
| if object_hook is not None: | |
| pairs = object_hook(pairs) | |
| return pairs, end + 1 | |
| elif nextchar != '"': | |
| raise ValueError(errmsg("Expecting property name", s, end)) | |
| end += 1 | |
| while True: | |
| key, end = scanstring(s, end, encoding, strict) | |
| # To skip some function call overhead we optimize the fast paths where | |
| # the JSON key separator is ": " or just ":". | |
| if s[end:end + 1] != ':': | |
| end = _w(s, end).end() | |
| if s[end:end + 1] != ':': | |
| raise ValueError(errmsg("Expecting : delimiter", s, end)) | |
| end += 1 | |
| try: | |
| if s[end] in _ws: | |
| end += 1 | |
| if s[end] in _ws: | |
| end = _w(s, end + 1).end() | |
| except IndexError: | |
| pass | |
| try: | |
| value, end = scan_once(s, end) | |
| except StopIteration: | |
| raise ValueError(errmsg("Expecting object", s, end)) | |
| pairs_append((key, value)) | |
| try: | |
| nextchar = s[end] | |
| if nextchar in _ws: | |
| end = _w(s, end + 1).end() | |
| nextchar = s[end] | |
| except IndexError: | |
| nextchar = '' | |
| end += 1 | |
| if nextchar == '}': | |
| break | |
| elif nextchar != ',': | |
| raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) | |
| try: | |
| nextchar = s[end] | |
| if nextchar in _ws: | |
| end += 1 | |
| nextchar = s[end] | |
| if nextchar in _ws: | |
| end = _w(s, end + 1).end() | |
| nextchar = s[end] | |
| except IndexError: | |
| nextchar = '' | |
| end += 1 | |
| if nextchar != '"': | |
| raise ValueError(errmsg("Expecting property name", s, end - 1)) | |
| if object_pairs_hook is not None: | |
| result = object_pairs_hook(pairs) | |
| return result, end | |
| pairs = dict(pairs) | |
| if object_hook is not None: | |
| pairs = object_hook(pairs) | |
| return pairs, end | |
| def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): | |
| s, end = s_and_end | |
| values = [] | |
| nextchar = s[end:end + 1] | |
| if nextchar in _ws: | |
| end = _w(s, end + 1).end() | |
| nextchar = s[end:end + 1] | |
| # Look-ahead for trivial empty array | |
| if nextchar == ']': | |
| return values, end + 1 | |
| _append = values.append | |
| while True: | |
| try: | |
| value, end = scan_once(s, end) | |
| except StopIteration: | |
| raise ValueError(errmsg("Expecting object", s, end)) | |
| _append(value) | |
| nextchar = s[end:end + 1] | |
| if nextchar in _ws: | |
| end = _w(s, end + 1).end() | |
| nextchar = s[end:end + 1] | |
| end += 1 | |
| if nextchar == ']': | |
| break | |
| elif nextchar != ',': | |
| raise ValueError(errmsg("Expecting , delimiter", s, end)) | |
| try: | |
| if s[end] in _ws: | |
| end += 1 | |
| if s[end] in _ws: | |
| end = _w(s, end + 1).end() | |
| except IndexError: | |
| pass | |
| return values, end | |
| class JSONDecoder(object): | |
| """Simple JSON <http://json.org> decoder | |
| Performs the following translations in decoding by default: | |
| +---------------+-------------------+ | |
| | JSON | Python | | |
| +===============+===================+ | |
| | object | dict | | |
| +---------------+-------------------+ | |
| | array | list | | |
| +---------------+-------------------+ | |
| | string | unicode | | |
| +---------------+-------------------+ | |
| | number (int) | int, long | | |
| +---------------+-------------------+ | |
| | number (real) | float | | |
| +---------------+-------------------+ | |
| | true | True | | |
| +---------------+-------------------+ | |
| | false | False | | |
| +---------------+-------------------+ | |
| | null | None | | |
| +---------------+-------------------+ | |
| It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as | |
| their corresponding ``float`` values, which is outside the JSON spec. | |
| """ | |
| def __init__(self, encoding=None, object_hook=None, parse_float=None, | |
| parse_int=None, parse_constant=None, strict=True, | |
| object_pairs_hook=None): | |
| """``encoding`` determines the encoding used to interpret any ``str`` | |
| objects decoded by this instance (utf-8 by default). It has no | |
| effect when decoding ``unicode`` objects. | |
| Note that currently only encodings that are a superset of ASCII work, | |
| strings of other encodings should be passed in as ``unicode``. | |
| ``object_hook``, if specified, will be called with the result | |
| of every JSON object decoded and its return value will be used in | |
| place of the given ``dict``. This can be used to provide custom | |
| deserializations (e.g. to support JSON-RPC class hinting). | |
| ``object_pairs_hook``, if specified will be called with the result of | |
| every JSON object decoded with an ordered list of pairs. The return | |
| value of ``object_pairs_hook`` will be used instead of the ``dict``. | |
| This feature can be used to implement custom decoders that rely on the | |
| order that the key and value pairs are decoded (for example, | |
| collections.OrderedDict will remember the order of insertion). If | |
| ``object_hook`` is also defined, the ``object_pairs_hook`` takes | |
| priority. | |
| ``parse_float``, if specified, will be called with the string | |
| of every JSON float to be decoded. By default this is equivalent to | |
| float(num_str). This can be used to use another datatype or parser | |
| for JSON floats (e.g. decimal.Decimal). | |
| ``parse_int``, if specified, will be called with the string | |
| of every JSON int to be decoded. By default this is equivalent to | |
| int(num_str). This can be used to use another datatype or parser | |
| for JSON integers (e.g. float). | |
| ``parse_constant``, if specified, will be called with one of the | |
| following strings: -Infinity, Infinity, NaN. | |
| This can be used to raise an exception if invalid JSON numbers | |
| are encountered. | |
| If ``strict`` is false (true is the default), then control | |
| characters will be allowed inside strings. Control characters in | |
| this context are those with character codes in the 0-31 range, | |
| including ``'\\t'`` (tab), ``'\\n'``, ``'\\r'`` and ``'\\0'``. | |
| """ | |
| self.encoding = encoding | |
| self.object_hook = object_hook | |
| self.object_pairs_hook = object_pairs_hook | |
| self.parse_float = parse_float or float | |
| self.parse_int = parse_int or int | |
| self.parse_constant = parse_constant or _CONSTANTS.__getitem__ | |
| self.strict = strict | |
| self.parse_object = JSONObject | |
| self.parse_array = JSONArray | |
| self.parse_string = scanstring | |
| self.scan_once = scanner.make_scanner(self) | |
| def decode(self, s, _w=WHITESPACE.match): | |
| """Return the Python representation of ``s`` (a ``str`` or ``unicode`` | |
| instance containing a JSON document) | |
| """ | |
| obj, end = self.raw_decode(s, idx=_w(s, 0).end()) | |
| end = _w(s, end).end() | |
| if end != len(s): | |
| raise ValueError(errmsg("Extra data", s, end, len(s))) | |
| return obj | |
| def raw_decode(self, s, idx=0): | |
| """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` | |
| beginning with a JSON document) and return a 2-tuple of the Python | |
| representation and the index in ``s`` where the document ended. | |
| This can be used to decode a JSON document from a string that may | |
| have extraneous data at the end. | |
| """ | |
| try: | |
| obj, end = self.scan_once(s, idx) | |
| except StopIteration: | |
| raise ValueError("No JSON object could be decoded") | |
| return obj, end |