blob: f458593328d54d17f9dc3c3267b55b177b396f5e [file] [log] [blame]
# Copyright 2015 Google Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import re
from typing import (
Any,
Callable,
IO,
Iterable,
Mapping,
Optional,
Set,
Tuple,
Union,
)
import unicodedata
from .parser import Parser
def load(
fp: IO,
*,
encoding: Optional[str] = None,
cls: None = None,
object_hook: Optional[Callable[[Mapping[str, Any]], Any]] = None,
parse_float: Optional[Callable[[str], Any]] = None,
parse_int: Optional[Callable[[str], Any]] = None,
parse_constant: Optional[Callable[[str], Any]] = None,
object_pairs_hook: Optional[
Callable[[Iterable[Tuple[str, Any]]], Any]
] = None,
allow_duplicate_keys: bool = True,
) -> Any:
"""Deserialize ``fp`` (a ``.read()``-supporting file-like object
containing a JSON document) to a Python object.
Supports almost the same arguments as ``json.load()`` except that:
- the `cls` keyword is ignored.
- an extra `allow_duplicate_keys` parameter supports checking for
duplicate keys in a object; by default, this is True for
compatibility with ``json.load()``, but if set to False and
the object contains duplicate keys, a ValueError will be raised.
"""
s = fp.read()
return loads(
s,
encoding=encoding,
cls=cls,
object_hook=object_hook,
parse_float=parse_float,
parse_int=parse_int,
parse_constant=parse_constant,
object_pairs_hook=object_pairs_hook,
allow_duplicate_keys=allow_duplicate_keys,
)
def loads(
s: str,
*,
encoding: Optional[str] = None,
cls: None = None,
object_hook: Optional[Callable[[Mapping[str, Any]], Any]] = None,
parse_float: Optional[Callable[[str], Any]] = None,
parse_int: Optional[Callable[[str], Any]] = None,
parse_constant: Optional[Callable[[str], Any]] = None,
object_pairs_hook: Optional[
Callable[[Iterable[Tuple[str, Any]]], Any]
] = None,
allow_duplicate_keys: bool = True,
):
"""Deserialize ``s`` (a string containing a JSON5 document) to a Python
object.
Supports the same arguments as ``json.load()`` except that:
- the `cls` keyword is ignored.
- an extra `allow_duplicate_keys` parameter supports checking for
duplicate keys in a object; by default, this is True for
compatibility with ``json.load()``, but if set to False and
the object contains duplicate keys, a ValueError will be raised.
"""
assert cls is None, 'Custom decoders are not supported'
if isinstance(s, bytes):
encoding = encoding or 'utf-8'
s = s.decode(encoding)
if not s:
raise ValueError('Empty strings are not legal JSON5')
parser = Parser(s, '<string>')
ast, err, _ = parser.parse()
if err:
raise ValueError(err)
def _fp_constant_parser(s):
return float(s.replace('Infinity', 'inf').replace('NaN', 'nan'))
if object_pairs_hook:
dictify = object_pairs_hook
elif object_hook:
def dictify(pairs):
return object_hook(dict(pairs))
else:
dictify = dict
if not allow_duplicate_keys:
_orig_dictify = dictify
def dictify(pairs): # pylint: disable=function-redefined
return _reject_duplicate_keys(pairs, _orig_dictify)
parse_float = parse_float or float
parse_int = parse_int or int
parse_constant = parse_constant or _fp_constant_parser
return _walk_ast(ast, dictify, parse_float, parse_int, parse_constant)
def _reject_duplicate_keys(pairs, dictify):
keys = set()
for key, _ in pairs:
if key in keys:
raise ValueError(f'Duplicate key "{key}" found in object')
keys.add(key)
return dictify(pairs)
def _walk_ast(
el,
dictify: Callable[[Iterable[Tuple[str, Any]]], Any],
parse_float,
parse_int,
parse_constant,
):
if el == 'None':
return None
if el == 'True':
return True
if el == 'False':
return False
ty, v = el
if ty == 'number':
if v.startswith('0x') or v.startswith('0X'):
return parse_int(v, base=16)
if '.' in v or 'e' in v or 'E' in v:
return parse_float(v)
if 'Infinity' in v or 'NaN' in v:
return parse_constant(v)
return parse_int(v)
if ty == 'string':
return v
if ty == 'object':
pairs = []
for key, val_expr in v:
val = _walk_ast(
val_expr, dictify, parse_float, parse_int, parse_constant
)
pairs.append((key, val))
return dictify(pairs)
if ty == 'array':
return [
_walk_ast(el, dictify, parse_float, parse_int, parse_constant)
for el in v
]
raise ValueError('unknown el: ' + el) # pragma: no cover
def dump(
obj: Any,
fp: IO,
*,
skipkeys: bool = False,
ensure_ascii: bool = True,
check_circular: bool = True,
allow_nan: bool = True,
cls: None = None,
indent: Optional[Union[int, str]] = None,
separators: Optional[Tuple[str, str]] = None,
default: Optional[Callable[[Any], Any]] = None,
sort_keys: bool = False,
quote_keys: bool = False,
trailing_commas: bool = True,
allow_duplicate_keys: bool = True,
**kwargs,
):
"""Serialize ``obj`` to a JSON5-formatted stream to ``fp``,
a ``.write()``-supporting file-like object.
Supports the same arguments as ``json.dump()``, except that:
- The ``cls`` keyword is not supported.
- The ``encoding`` keyword is ignored; Unicode strings are always
written.
- By default, object keys that are legal identifiers are not quoted;
if you pass ``quote_keys=True``, they will be.
- By default, if lists and objects span multiple lines of output (i.e.,
when ``indent`` >=0), the last item will have a trailing comma
after it. If you pass ``trailing_commas=False``, it will not.
- If you use a number, a boolean, or ``None`` as a key value in a dict,
it will be converted to the corresponding JSON string value, e.g.
"1", "true", or "null". By default, ``dump()`` will match the `json`
modules behavior and produce malformed JSON if you mix keys of
different types that have the same converted value; e.g.,
``{1: "foo", "1": "bar"}`` produces '{"1": "foo", "1": "bar"}', an
object with duplicated keys. If you pass
``allow_duplicate_keys=False``, an exception will be raised instead.
- If `quote_keys` is true, then keys of objects will be enclosed in
quotes, as in regular JSON. Otherwise, keys will not be enclosed in
quotes unless they contain whitespace.
- If `trailing_commas` is false, then commas will not be inserted after
the final elements of objects and arrays, as in regular JSON.
Otherwise, such commas will be inserted.
- If `allow_duplicate_keys` is false, then only the last entry with a
given key will be written. Otherwise, all entries with the same key
will be written.
Calling ``dump(obj, fp, quote_keys=True, trailing_commas=False, \
allow_duplicate_keys=True)``
should produce exactly the same output as ``json.dump(obj, fp).``
"""
del kwargs
fp.write(
dumps(
obj=obj,
skipkeys=skipkeys,
ensure_ascii=ensure_ascii,
check_circular=check_circular,
allow_nan=allow_nan,
cls=cls,
indent=indent,
separators=separators,
default=default,
sort_keys=sort_keys,
quote_keys=quote_keys,
trailing_commas=trailing_commas,
allow_duplicate_keys=allow_duplicate_keys,
)
)
def dumps(
obj: Any,
*,
skipkeys: bool = False,
ensure_ascii: bool = True,
check_circular: bool = True,
allow_nan: bool = True,
cls: None = None,
indent: Optional[Union[int, str]] = None,
separators: Optional[Tuple[str, str]] = None,
default: Optional[Callable[[Any], Any]] = None,
sort_keys: bool = False,
quote_keys: bool = False,
trailing_commas: bool = True,
allow_duplicate_keys: bool = True,
**kwargs,
):
"""Serialize ``obj`` to a JSON5-formatted string.
Supports the same arguments as ``json.dumps()``, except that:
- The ``cls`` keyword is not supported.
- The ``encoding`` keyword is ignored; Unicode strings are always
written.
- By default, object keys that are legal identifiers are not quoted;
if you pass ``quote_keys=True``, they will be.
- By default, if lists and objects span multiple lines of output (i.e.,
when ``indent`` >=0), the last item will have a trailing comma
after it. If you pass ``trailing_commas=False``, it will not.
- If you use a number, a boolean, or ``None`` as a key value in a dict,
it will be converted to the corresponding JSON string value, e.g.
"1", "true", or "null". By default, ``dump()`` will match the `json`
modules behavior and produce malformed JSON if you mix keys of
different types that have the same converted value; e.g.,
``{1: "foo", "1": "bar"}`` produces '{"1": "foo", "1": "bar"}', an
object with duplicated keys. If you pass
``allow_duplicate_keys=False``, an exception will be raised instead.
- If `quote_keys` is true, then keys of objects will be enclosed
in quotes, as in regular JSON. Otheriwse, keys will not be enclosed
in quotes unless they contain whitespace.
- If `trailing_commas` is false, then commas will not be inserted after
the final elements of objects and arrays, as in regular JSON.
Otherwise, such commas will be inserted.
- If `allow_duplicate_keys` is false, then only the last entry with a
given key will be written. Otherwise, all entries with the same key
will be written.
Calling ``dumps(obj, quote_keys=True, trailing_commas=False, \
allow_duplicate_keys=True)``
should produce exactly the same output as ``json.dumps(obj).``
"""
assert kwargs.get('cls', None) is None, 'Custom encoders are not supported'
del cls
if separators is None:
if indent is None:
separators = (', ', ': ')
else:
separators = (',', ': ')
default = default or _raise_type_error
if check_circular:
seen: Optional[Set[int]] = set()
else:
seen = None
level = 1
is_key = False
_, v = _dumps(
obj,
skipkeys,
ensure_ascii,
check_circular,
allow_nan,
indent,
separators,
default,
sort_keys,
quote_keys,
trailing_commas,
allow_duplicate_keys,
seen,
level,
is_key,
)
return v
def _dumps(
obj,
skipkeys,
ensure_ascii,
check_circular,
allow_nan,
indent,
separators,
default,
sort_keys,
quote_keys,
trailing_commas,
allow_duplicate_keys,
seen: Optional[Set[int]],
level: int,
is_key: bool,
):
# pylint: disable=too-many-statements
if obj is True:
s = 'true'
elif obj is False:
s = 'false'
elif obj is None:
s = 'null'
elif obj == float('inf'):
if allow_nan:
s = 'Infinity'
else:
raise ValueError()
elif obj == float('-inf'):
if allow_nan:
s = '-Infinity'
else:
raise ValueError()
elif isinstance(obj, float) and math.isnan(obj):
if allow_nan:
s = 'NaN'
else:
raise ValueError()
elif isinstance(obj, str):
if (
is_key
and _is_ident(obj)
and not quote_keys
and not _is_reserved_word(obj)
):
return True, obj
return True, _dump_str(obj, ensure_ascii)
elif isinstance(obj, int):
# Subclasses of `int` and `float` may have custom
# __repr__ or __str__ methods, but the `JSON` library
# ignores them in order to ensure that the representation
# are just bare numbers. In order to match JSON's behavior
# we call the methods of the `float` and `int` class directly.
s = int.__repr__(obj)
elif isinstance(obj, float):
# See comment above for int
s = float.__repr__(obj)
else:
s = None
if is_key:
if s is not None:
return True, f'"{s}"'
if skipkeys:
return False, None
raise TypeError(f'invalid key {repr(obj)}')
if s is not None:
return True, s
if indent is not None:
end_str = ''
if trailing_commas:
end_str = ','
if isinstance(indent, int):
if indent > 0:
indent_str = '\n' + ' ' * indent * level
end_str += '\n' + ' ' * indent * (level - 1)
else:
indent_str = '\n'
end_str += '\n'
else:
indent_str = '\n' + indent * level
end_str += '\n' + indent * (level - 1)
else:
indent_str = ''
end_str = ''
item_sep, kv_sep = separators
item_sep += indent_str
if seen is not None:
i = id(obj)
if i in seen:
raise ValueError('Circular reference detected.')
seen.add(i)
# Ideally we'd use collections.abc.Mapping and collections.abc.Sequence
# here, but for backwards-compatibility with potential old callers,
# we only check for the two attributes we need in each case.
if hasattr(obj, 'keys') and hasattr(obj, '__getitem__'):
s = _dump_dict(
obj,
skipkeys,
ensure_ascii,
check_circular,
allow_nan,
indent,
separators,
default,
sort_keys,
quote_keys,
trailing_commas,
allow_duplicate_keys,
seen,
level + 1,
item_sep,
kv_sep,
indent_str,
end_str,
)
elif hasattr(obj, '__getitem__') and hasattr(obj, '__iter__'):
s = _dump_array(
obj,
skipkeys,
ensure_ascii,
check_circular,
allow_nan,
indent,
separators,
default,
sort_keys,
quote_keys,
trailing_commas,
allow_duplicate_keys,
seen,
level + 1,
item_sep,
indent_str,
end_str,
)
else:
s = _dumps(
default(obj),
skipkeys,
ensure_ascii,
check_circular,
allow_nan,
indent,
separators,
default,
sort_keys,
quote_keys,
trailing_commas,
allow_duplicate_keys,
seen,
level,
is_key,
)[1]
if seen is not None:
seen.remove(i)
return False, s
def _dump_dict(
obj,
skipkeys,
ensure_ascii,
check_circular,
allow_nan,
indent,
separators,
default,
sort_keys,
quote_keys,
trailing_commas,
allow_duplicate_keys,
seen,
level,
item_sep,
kv_sep,
indent_str,
end_str,
):
if not obj:
return '{}'
if sort_keys:
keys = sorted(obj.keys())
else:
keys = obj.keys()
s = '{' + indent_str
num_items_added = 0
new_keys = set()
for key in keys:
valid_key, key_str = _dumps(
key,
skipkeys,
ensure_ascii,
check_circular,
allow_nan,
indent,
separators,
default,
sort_keys,
quote_keys,
trailing_commas,
allow_duplicate_keys,
seen,
level,
is_key=True,
)
if skipkeys and not valid_key:
continue
if not allow_duplicate_keys:
if key_str in new_keys:
raise ValueError(f'duplicate key {repr(key)}')
new_keys.add(key_str)
if num_items_added:
s += item_sep
s += (
key_str
+ kv_sep
+ _dumps(
obj[key],
skipkeys,
ensure_ascii,
check_circular,
allow_nan,
indent,
separators,
default,
sort_keys,
quote_keys,
trailing_commas,
allow_duplicate_keys,
seen,
level,
is_key=False,
)[1]
)
num_items_added += 1
s += end_str + '}'
return s
def _dump_array(
obj,
skipkeys,
ensure_ascii,
check_circular,
allow_nan,
indent,
separators,
default,
sort_keys,
quote_keys,
trailing_commas,
allow_duplicate_keys,
seen,
level,
item_sep,
indent_str,
end_str,
):
if not obj:
return '[]'
return (
'['
+ indent_str
+ item_sep.join(
[
_dumps(
el,
skipkeys,
ensure_ascii,
check_circular,
allow_nan,
indent,
separators,
default,
sort_keys,
quote_keys,
trailing_commas,
allow_duplicate_keys,
seen,
level,
False,
)[1]
for el in obj
]
)
+ end_str
+ ']'
)
def _dump_str(obj, ensure_ascii):
ret = ['"']
for ch in obj:
if ch == '\\':
ret.append('\\\\')
elif ch == '"':
ret.append('\\"')
elif ch == '\u2028':
ret.append('\\u2028')
elif ch == '\u2029':
ret.append('\\u2029')
elif ch == '\n':
ret.append('\\n')
elif ch == '\r':
ret.append('\\r')
elif ch == '\b':
ret.append('\\b')
elif ch == '\f':
ret.append('\\f')
elif ch == '\t':
ret.append('\\t')
elif ch == '\v':
ret.append('\\v')
elif ch == '\0':
ret.append('\\0')
elif not ensure_ascii:
ret.append(ch)
else:
o = ord(ch)
if 32 <= o < 128:
ret.append(ch)
elif o < 65536:
ret.append(f'\\u{o:04x}')
else:
val = o - 0x10000
high = 0xD800 + (val >> 10)
low = 0xDC00 + (val & 0x3FF)
ret.append(f'\\u{high:04x}\\u{low:04x}')
return ''.join(ret) + '"'
def _is_ident(k):
if not k or not _is_id_start(k[0]) and k[0] not in ('$', '_'):
return False
for ch in k[1:]:
if not _is_id_continue(ch) and ch not in ('$', '_'):
return False
return True
def _is_id_start(ch):
return unicodedata.category(ch) in (
'Lu',
'Ll',
'Li',
'Lt',
'Lm',
'Lo',
'Nl',
)
def _is_id_continue(ch):
return unicodedata.category(ch) in (
'Lu',
'Ll',
'Li',
'Lt',
'Lm',
'Lo',
'Nl',
'Nd',
'Mn',
'Mc',
'Pc',
)
_reserved_word_re = None
def _is_reserved_word(k):
global _reserved_word_re
if _reserved_word_re is None:
# List taken from section 7.6.1 of ECMA-262.
_reserved_word_re = re.compile(
'('
+ '|'.join(
[
'break',
'case',
'catch',
'class',
'const',
'continue',
'debugger',
'default',
'delete',
'do',
'else',
'enum',
'export',
'extends',
'false',
'finally',
'for',
'function',
'if',
'import',
'in',
'instanceof',
'new',
'null',
'return',
'super',
'switch',
'this',
'throw',
'true',
'try',
'typeof',
'var',
'void',
'while',
'with',
]
)
+ ')$'
)
return _reserved_word_re.match(k) is not None
def _raise_type_error(obj):
raise TypeError(f'{repr(obj)} is not JSON5 serializable')