| # Copyright 2015 Google Inc. All rights reserved. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| import math |
| import re |
| import sys |
| import unicodedata |
| |
| from .parser import Parser |
| |
| if sys.version_info[0] < 3: |
| str_types = (str, unicode) |
| str = unicode # pylint: disable=redefined-builtin, invalid-name |
| else: |
| str_types = (str,) |
| long = int # pylint: disable=redefined-builtin, invalid-name |
| |
| |
| def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, |
| parse_int=None, parse_constant=None, object_pairs_hook=None, |
| allow_duplicate_keys=True): |
| """Deserialize ``fp`` (a ``.read()``-supporting file-like object |
| containing a JSON document) to a Python object. |
| |
| Supports almost the same arguments as ``json.load()`` except that: |
| - the `cls` keyword is ignored. |
| - an extra `allow_duplicate_keys` parameter supports checking for |
| duplicate keys in a object; by default, this is True for |
| compatibility with ``json.load()``, but if set to False and |
| the object contains duplicate keys, a ValueError will be raised. |
| """ |
| |
| s = fp.read() |
| return loads(s, encoding=encoding, cls=cls, object_hook=object_hook, |
| parse_float=parse_float, parse_int=parse_int, |
| parse_constant=parse_constant, |
| object_pairs_hook=object_pairs_hook, |
| allow_duplicate_keys=allow_duplicate_keys) |
| |
| |
| def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, |
| parse_int=None, parse_constant=None, object_pairs_hook=None, |
| allow_duplicate_keys=True): |
| """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a |
| JSON5 document) to a Python object. |
| |
| Supports the same arguments as ``json.load()`` except that: |
| - the `cls` keyword is ignored. |
| - an extra `allow_duplicate_keys` parameter supports checking for |
| duplicate keys in a object; by default, this is True for |
| compatibility with ``json.load()``, but if set to False and |
| the object contains duplicate keys, a ValueError will be raised. |
| """ |
| |
| assert cls is None, 'Custom decoders are not supported' |
| |
| if sys.version_info[0] < 3: |
| decodable_type = type('') |
| else: |
| decodable_type = type(b'') |
| if isinstance(s, decodable_type): |
| encoding = encoding or 'utf-8' |
| s = s.decode(encoding) |
| |
| if not s: |
| raise ValueError('Empty strings are not legal JSON5') |
| parser = Parser(s, '<string>') |
| ast, err, _ = parser.parse() |
| if err: |
| raise ValueError(err) |
| |
| def _fp_constant_parser(s): |
| return float(s.replace('Infinity', 'inf').replace('NaN', 'nan')) |
| |
| if object_pairs_hook: |
| dictify = object_pairs_hook |
| elif object_hook: |
| dictify = lambda pairs: object_hook(dict(pairs)) |
| else: |
| dictify = lambda pairs: dict(pairs) # pylint: disable=unnecessary-lambda |
| |
| if not allow_duplicate_keys: |
| _orig_dictify = dictify |
| dictify = lambda pairs: _reject_duplicate_keys(pairs, _orig_dictify) |
| |
| parse_float = parse_float or float |
| parse_int = parse_int or int |
| parse_constant = parse_constant or _fp_constant_parser |
| |
| return _walk_ast(ast, dictify, parse_float, parse_int, parse_constant) |
| |
| |
| def _reject_duplicate_keys(pairs, dictify): |
| keys = set() |
| for key, _ in pairs: |
| if key in keys: |
| raise ValueError('Duplicate key "%s" found in object', key) |
| keys.add(key) |
| return dictify(pairs) |
| |
| def _walk_ast(el, dictify, parse_float, parse_int, parse_constant): |
| if el == 'None': |
| return None |
| if el == 'True': |
| return True |
| if el == 'False': |
| return False |
| ty, v = el |
| if ty == 'number': |
| if v.startswith('0x') or v.startswith('0X'): |
| return parse_int(v, base=16) |
| elif '.' in v or 'e' in v or 'E' in v: |
| return parse_float(v) |
| elif 'Infinity' in v or 'NaN' in v: |
| return parse_constant(v) |
| else: |
| return parse_int(v) |
| if ty == 'string': |
| return v |
| if ty == 'object': |
| pairs = [] |
| for key, val_expr in v: |
| val = _walk_ast(val_expr, dictify, parse_float, parse_int, |
| parse_constant) |
| pairs.append((key, val)) |
| return dictify(pairs) |
| if ty == 'array': |
| return [_walk_ast(el, dictify, parse_float, parse_int, parse_constant) |
| for el in v] |
| raise Exception('unknown el: ' + el) # pragma: no cover |
| |
| |
| def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, |
| allow_nan=True, cls=None, indent=None, separators=None, |
| default=None, sort_keys=False, |
| quote_keys=False, trailing_commas=True, |
| allow_duplicate_keys=True, |
| **kwargs): |
| """Serialize ``obj`` to a JSON5-formatted stream to ``fp`` (a ``.write()``- |
| supporting file-like object). |
| |
| Supports the same arguments as ``json.dump()``, except that: |
| |
| - The ``cls`` keyword is not supported. |
| - The ``encoding`` keyword is ignored; Unicode strings are always written. |
| - By default, object keys that are legal identifiers are not quoted; |
| if you pass ``quote_keys=True``, they will be. |
| - By default, if lists and objects span multiple lines of output (i.e., |
| when ``indent`` >=0), the last item will have a trailing comma |
| after it. If you pass ``trailing_commas=False``, it will not. |
| - If you use a number, a boolean, or ``None`` as a key value in a dict, |
| it will be converted to the corresponding JSON string value, e.g. |
| "1", "true", or "null". By default, ``dump()`` will match the `json` |
| modules behavior and produce malformed JSON if you mix keys of |
| different types that have the same converted value; e.g., |
| ``{1: "foo", "1": "bar"}`` produces '{"1": "foo", "1": "bar"}', an |
| object with duplicated keys. If you pass ``allow_duplicate_keys=False``, |
| an exception will be raised instead. |
| - If `quote_keys` is true, then keys of objects will be enclosed in quotes, |
| as in regular JSON. Otheriwse, keys will not be enclosed in quotes unless |
| they contain whitespace. |
| - If `trailing_commas` is false, then commas will not be inserted after |
| the final elements of objects and arrays, as in regular JSON. Otherwise, |
| such commas will be inserted. |
| - If `allow_duplicate_keys` is false, then only the last entry with a given |
| key will be written. Otherwise, all entries with the same key will be |
| written. |
| |
| Calling ``dump(obj, fp, quote_keys=True, trailing_commas=False, \ |
| allow_duplicate_keys=True)`` |
| should produce exactly the same output as ``json.dump(obj, fp).`` |
| """ |
| |
| fp.write(str(dumps(obj=obj, skipkeys=skipkeys, ensure_ascii=ensure_ascii, |
| check_circular=check_circular, allow_nan=allow_nan, |
| cls=cls, indent=indent, separators=separators, |
| default=default, sort_keys=sort_keys, |
| quote_keys=quote_keys, trailing_commas=trailing_commas, |
| allow_duplicate_keys=allow_duplicate_keys))) |
| |
| |
| def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, |
| allow_nan=True, cls=None, indent=None, separators=None, |
| default=None, sort_keys=False, |
| quote_keys=False, trailing_commas=True, allow_duplicate_keys=True, |
| **kwargs): |
| """Serialize ``obj`` to a JSON5-formatted ``str``. |
| |
| Supports the same arguments as ``json.dumps()``, except that: |
| |
| - The ``cls`` keyword is not supported. |
| - The ``encoding`` keyword is ignored; Unicode strings are always written. |
| - By default, object keys that are legal identifiers are not quoted; |
| if you pass ``quote_keys=True``, they will be. |
| - By default, if lists and objects span multiple lines of output (i.e., |
| when ``indent`` >=0), the last item will have a trailing comma |
| after it. If you pass ``trailing_commas=False``, it will not. |
| - If you use a number, a boolean, or ``None`` as a key value in a dict, |
| it will be converted to the corresponding JSON string value, e.g. |
| "1", "true", or "null". By default, ``dump()`` will match the `json` |
| modules behavior and produce malformed JSON if you mix keys of |
| different types that have the same converted value; e.g., |
| ``{1: "foo", "1": "bar"}`` produces '{"1": "foo", "1": "bar"}', an |
| object with duplicated keys. If you pass ``allow_duplicate_keys=False``, |
| an exception will be raised instead. |
| - If `quote_keys` is true, then keys of objects will be enclosed in quotes, |
| as in regular JSON. Otheriwse, keys will not be enclosed in quotes unless |
| they contain whitespace. |
| - If `trailing_commas` is false, then commas will not be inserted after |
| the final elements of objects and arrays, as in regular JSON. Otherwise, |
| such commas will be inserted. |
| - If `allow_duplicate_keys` is false, then only the last entry with a given |
| key will be written. Otherwise, all entries with the same key will be |
| written. |
| |
| Calling ``dumps(obj, quote_keys=True, trailing_commas=False, \ |
| allow_duplicate_keys=True)`` |
| should produce exactly the same output as ``json.dumps(obj).`` |
| """ |
| |
| assert kwargs.get('cls', None) is None, 'Custom encoders are not supported' |
| |
| if separators is None: |
| if indent is None: |
| separators = (u', ', u': ') |
| else: |
| separators = (u',', u': ') |
| |
| default = default or _raise_type_error |
| |
| if check_circular: |
| seen = set() |
| else: |
| seen = None |
| |
| level = 1 |
| is_key = False |
| |
| _, v = _dumps(obj, skipkeys, ensure_ascii, check_circular, |
| allow_nan, indent, separators, default, sort_keys, |
| quote_keys, trailing_commas, allow_duplicate_keys, |
| seen, level, is_key) |
| return v |
| |
| |
| def _dumps(obj, skipkeys, ensure_ascii, check_circular, allow_nan, indent, |
| separators, default, sort_keys, |
| quote_keys, trailing_commas, allow_duplicate_keys, |
| seen, level, is_key): |
| if obj is True: |
| s = u'true' |
| elif obj is False: |
| s = u'false' |
| elif obj is None: |
| s = u'null' |
| elif obj == math.inf: |
| if allow_nan: |
| s = u'Infinity' |
| else: |
| raise ValueError() |
| elif obj == -math.inf: |
| if allow_nan: |
| s = u'-Infinity' |
| else: |
| raise ValueError() |
| elif isinstance(obj, float) and math.isnan(obj): |
| if allow_nan: |
| s = u'NaN' |
| else: |
| raise ValueError() |
| elif isinstance(obj, str_types): |
| if (is_key and _is_ident(obj) and not quote_keys |
| and not _is_reserved_word(obj)): |
| return True, obj |
| return True, _dump_str(obj, ensure_ascii) |
| elif isinstance(obj, int): |
| # Subclasses of `int` and `float` may have custom |
| # __repr__ or __str__ methods, but the `JSON` library |
| # ignores them in order to ensure that the representation |
| # are just bare numbers. In order to match JSON's behavior |
| # we call the methods of the `float` and `int` class directly. |
| s = int.__repr__(obj) |
| elif isinstance(obj, float): |
| # See comment above for int |
| s = float.__repr__(obj) |
| else: |
| s = None |
| |
| if is_key: |
| if s is not None: |
| return True, '"%s"' % s |
| if skipkeys: |
| return False, None |
| raise TypeError('invalid key %s' % repr(obj)) |
| |
| if s is not None: |
| return True, s |
| |
| if indent is not None: |
| end_str = '' |
| if trailing_commas: |
| end_str = ',' |
| if type(indent) == int: |
| if indent > 0: |
| indent_str = '\n' + ' ' * indent * level |
| end_str += '\n' + ' ' * indent * (level - 1) |
| else: |
| indent_str = '\n' |
| end_str += '\n' |
| else: |
| indent_str = '\n' + indent * level |
| end_str += '\n' + indent * (level - 1) |
| else: |
| indent_str = '' |
| end_str = '' |
| |
| item_sep, kv_sep = separators |
| item_sep += indent_str |
| |
| if seen is not None: |
| i = id(obj) |
| if i in seen: |
| raise ValueError('Circular reference detected.') |
| else: |
| seen.add(i) |
| |
| # In Python3, we'd check if this was an abc.Mapping or an abc.Sequence. |
| # For now, just check for the attrs we need to iterate over the object. |
| if hasattr(obj, 'keys') and hasattr(obj, '__getitem__'): |
| s = _dump_dict(obj, skipkeys, ensure_ascii, |
| check_circular, allow_nan, indent, |
| separators, default, sort_keys, |
| quote_keys, trailing_commas, |
| allow_duplicate_keys, seen, level + 1, |
| item_sep, kv_sep, indent_str, end_str) |
| elif hasattr(obj, '__getitem__') and hasattr(obj, '__iter__'): |
| s = _dump_array(obj, skipkeys, ensure_ascii, |
| check_circular, allow_nan, indent, |
| separators, default, sort_keys, |
| quote_keys, trailing_commas, |
| allow_duplicate_keys, seen, level + 1, |
| item_sep, indent_str, end_str) |
| else: |
| s = _dumps(default(obj), skipkeys, ensure_ascii, |
| check_circular, allow_nan, indent, |
| separators, default, sort_keys, |
| quote_keys, trailing_commas, |
| allow_duplicate_keys, seen, level, |
| is_key)[1] |
| |
| if seen is not None: |
| seen.remove(i) |
| return False, s |
| |
| |
| def _dump_dict(obj, skipkeys, ensure_ascii, check_circular, allow_nan, |
| indent, separators, default, sort_keys, |
| quote_keys, trailing_commas, allow_duplicate_keys, |
| seen, level, item_sep, kv_sep, indent_str, end_str): |
| if not obj: |
| return u'{}' |
| |
| if sort_keys: |
| keys = sorted(obj.keys()) |
| else: |
| keys = obj.keys() |
| |
| s = u'{' + indent_str |
| |
| num_items_added = 0 |
| new_keys = set() |
| for key in keys: |
| valid_key, key_str = _dumps(key, skipkeys, ensure_ascii, check_circular, |
| allow_nan, indent, separators, default, |
| sort_keys, |
| quote_keys, trailing_commas, |
| allow_duplicate_keys, |
| seen, level, is_key=True) |
| if valid_key: |
| if not allow_duplicate_keys: |
| if key_str in new_keys: |
| raise ValueError('duplicate key %s' % repr(key)) |
| else: |
| new_keys.add(key_str) |
| if num_items_added: |
| s += item_sep |
| s += key_str + kv_sep + _dumps(obj[key], skipkeys, ensure_ascii, |
| check_circular, allow_nan, indent, |
| separators, default, sort_keys, |
| quote_keys, trailing_commas, |
| allow_duplicate_keys, |
| seen, level, is_key=False)[1] |
| num_items_added += 1 |
| elif not skipkeys: |
| raise TypeError('invalid key %s' % repr(key)) |
| |
| s += end_str + u'}' |
| return s |
| |
| |
| def _dump_array(obj, skipkeys, ensure_ascii, check_circular, allow_nan, |
| indent, separators, default, sort_keys, |
| quote_keys, trailing_commas, allow_duplicate_keys, |
| seen, level, item_sep, indent_str, end_str): |
| if not obj: |
| return u'[]' |
| return (u'[' + indent_str + |
| item_sep.join([_dumps(el, skipkeys, ensure_ascii, check_circular, |
| allow_nan, indent, separators, default, |
| sort_keys, quote_keys, trailing_commas, |
| allow_duplicate_keys, |
| seen, level, False)[1] for el in obj]) + |
| end_str + u']') |
| |
| |
| def _dump_str(obj, ensure_ascii): |
| ret = ['"'] |
| for ch in obj: |
| if ch == '\\': |
| ret.append('\\\\') |
| elif ch == '"': |
| ret.append('\\"') |
| elif ch == u'\u2028': |
| ret.append('\\u2028') |
| elif ch == u'\u2029': |
| ret.append('\\u2029') |
| elif ch == '\n': |
| ret.append('\\n') |
| elif ch == '\r': |
| ret.append('\\r') |
| elif ch == '\b': |
| ret.append('\\b') |
| elif ch == '\f': |
| ret.append('\\f') |
| elif ch == '\t': |
| ret.append('\\t') |
| elif ch == '\v': |
| ret.append('\\v') |
| elif ch == '\0': |
| ret.append('\\0') |
| elif not ensure_ascii: |
| ret.append(ch) |
| else: |
| o = ord(ch) |
| if o >= 32 and o < 128: |
| ret.append(ch) |
| elif o < 65536: |
| ret.append('\\u' + '%04x' % o) |
| else: |
| val = o - 0x10000 |
| high = 0xd800 + (val >> 10) |
| low = 0xdc00 + (val & 0x3ff) |
| ret.append('\\u%04x\\u%04x' % (high, low)) |
| return u''.join(ret) + '"' |
| |
| |
| def _is_ident(k): |
| k = str(k) |
| if not k or not _is_id_start(k[0]) and k[0] not in (u'$', u'_'): |
| return False |
| for ch in k[1:]: |
| if not _is_id_continue(ch) and ch not in (u'$', u'_'): |
| return False |
| return True |
| |
| |
| def _is_id_start(ch): |
| return unicodedata.category(ch) in ( |
| 'Lu', 'Ll', 'Li', 'Lt', 'Lm', 'Lo', 'Nl') |
| |
| |
| def _is_id_continue(ch): |
| return unicodedata.category(ch) in ( |
| 'Lu', 'Ll', 'Li', 'Lt', 'Lm', 'Lo', 'Nl', 'Nd', 'Mn', 'Mc', 'Pc') |
| |
| |
| _reserved_word_re = None |
| |
| def _is_reserved_word(k): |
| global _reserved_word_re |
| |
| if _reserved_word_re is None: |
| # List taken from section 7.6.1 of ECMA-262. |
| _reserved_word_re = re.compile('(' + '|'.join([ |
| 'break', |
| 'case', |
| 'catch', |
| 'class', |
| 'const', |
| 'continue', |
| 'debugger', |
| 'default', |
| 'delete', |
| 'do', |
| 'else', |
| 'enum', |
| 'export', |
| 'extends', |
| 'false', |
| 'finally', |
| 'for', |
| 'function', |
| 'if', |
| 'import', |
| 'in', |
| 'instanceof', |
| 'new', |
| 'null', |
| 'return', |
| 'super', |
| 'switch', |
| 'this', |
| 'throw', |
| 'true', |
| 'try', |
| 'typeof', |
| 'var', |
| 'void', |
| 'while', |
| 'with', |
| ]) + ')$') |
| return _reserved_word_re.match(k) is not None |
| |
| |
| def _raise_type_error(obj): |
| raise TypeError('%s is not JSON5 serializable' % repr(obj)) |