Initial
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..635979d
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,29 @@
+*.py[cdo]
+
+/env*/
+/build/
+/cython_debug/
+/dist/
+/*.egg-info/
+
+*.c
+*.cpp
+*.so
+*.o
+
+run.cgi
+
+*.swp*
+*.nfs*
+*~
+*.~*
+~*
+*.tmp
+*.old
+*.bak
+*.pid
+
+.*
+!.git*
+
+*.orig
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..d645695
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,202 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..f8a6e42
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,6 @@
+include pyjson5.pyx
+include pyjson5.cpp
+include Makefile
+include requires.txt
+recursive-include src **
+recursive-include docs **
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..c15adb7
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,27 @@
+all: sdist bdist_wheel docs
+
+.PHONY: all sdist bdist_wheel clean docs
+
+FILES := Makefile MANIFEST.in pyjson5.pyx README.rst setup.py \
+ src/native.hpp src/VERSION
+
+pyjson5.cpp: pyjson5.pyx $(wildcard src/*.pyx)
+ python -m cython -o $@ $<
+
+sdist: pyjson5.cpp ${FILES}
+ rm -f -- dist/pyjson5-*.tar.gz
+ python setup.py sdist
+
+bdist_wheel: pyjson5.cpp ${FILES} | sdist
+ rm -f -- dist/pyjson5-*.whl
+ python setup.py bdist_wheel
+
+docs: bdist_wheel $(wildcard docs/* docs/*/*)
+ pip install --force dist/pyjson5-*.whl
+ python -m sphinx -M html docs/ dist/
+
+clean:
+ [ ! -d build/ ] || rm -r -- build/
+ [ ! -d dist/ ] || rm -r -- dist/
+ [ ! -d pyjson5.egg-info/ ] || rm -r -- pyjson5.egg-info/
+ rm -f -- pyjson5.*.so python5.cpp
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..6db15cd
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,44 @@
+PyJSON5
+==========
+
+A JSON5 serializer and parser library for Python 3 written in
+`Cython <http://cython.org/>`_.
+
+
+Serializer
+----------
+
+The serializer returns ASCII data that can safely be used in an HTML template.
+Apostrophes, ampersands, greater-than, and less-then signs are encoded as
+unicode escaped sequences. E.g. this snippet is safe for any and all input:
+
+.. code:: html
+
+ "<a onclick='alert(" + encode(data) + ")'>show message</a>"
+
+Unless the input contains infinite or NaN values, the result will be valid
+`JSON <https://tools.ietf.org/html/rfc8259>`_ data.
+
+
+Parser
+------
+
+All valid `JSON5 1.0.0 <https://spec.json5.org/>`_ and
+`JSON <https://tools.ietf.org/html/rfc8259>`_ data can be read,
+unless the nesting level is absurdly high.
+
+Functions
+---------
+
+You can find the full documentation online at https://pyjson5.readthedocs.io/en/latest/.
+Or simply call ``help(pyjson5)``. :-)
+
+The library supplies load(s) and dump(s) functions, so you can use it as a
+drop-in replacement for Python's builtin ``json`` module, but you *should*
+use the functions ``encode_*()`` and ``decode_*()`` instead.
+
+Compatibility
+-------------
+
+At least CPython 3.4 is needed.
+Other interpreters such as Pypy and older CPython versions are not supported and will not be supported.
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 0000000..a300ad9
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,238 @@
+# -*- coding: utf-8 -*-
+#
+# PyJSON5 documentation build configuration file, created by
+# sphinx-quickstart on Wed May 2 18:15:32 2018.
+#
+# This file is execfile()d with the current directory set to its
+# containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+import os
+import sys
+sys.path.insert(0, os.path.abspath('..'))
+
+
+# -- General configuration ------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#
+# needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+ 'sphinx.ext.autodoc',
+ 'sphinx.ext.autosummary',
+ 'sphinx.ext.napoleon',
+ 'sphinx.ext.intersphinx',
+ 'sphinx.ext.inheritance_diagram',
+ 'sphinx_autodoc_typehints',
+]
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+#
+# source_suffix = ['.rst', '.md']
+source_suffix = '.rst'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = u'PyJSON5'
+copyright = u'2018, René Kijewski'
+author = u'René Kijewski'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = u'0.3.6'
+# The full version, including alpha/beta/rc tags.
+release = u'0.3.6'
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#
+# This is also used if you do content translation via gettext catalogs.
+# Usually you set "language" from the command line for these cases.
+language = None
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This patterns also effect to html_static_path and html_extra_path
+exclude_patterns = []
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# If true, `todo` and `todoList` produce output, else they produce nothing.
+todo_include_todos = False
+
+
+# -- Options for HTML output ----------------------------------------------
+
+# The theme to use for HTML and HTML Help pages. See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'sphinx_rtd_theme'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further. For a list of options available for each theme, see the
+# documentation.
+
+html_theme_options = {
+ 'navigation_depth': -1,
+}
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+#html_static_path = ['_static']
+
+# Custom sidebar templates, must be a dictionary that maps document names
+# to template names.
+#
+# This is required for the alabaster theme
+# refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars
+html_sidebars = {
+ '**': [
+ 'localtoc.html',
+ 'searchbox.html',
+ ]
+}
+
+
+# -- Options for HTMLHelp output ------------------------------------------
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'PyJSON5doc'
+
+
+# -- Options for LaTeX output ---------------------------------------------
+
+latex_elements = {
+ # The paper size ('letterpaper' or 'a4paper').
+ #
+ # 'papersize': 'letterpaper',
+
+ # The font size ('10pt', '11pt' or '12pt').
+ #
+ # 'pointsize': '10pt',
+
+ # Additional stuff for the LaTeX preamble.
+ #
+ # 'preamble': '',
+
+ # Latex figure (float) alignment
+ #
+ # 'figure_align': 'htbp',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+# author, documentclass [howto, manual, or own class]).
+latex_documents = [
+ (master_doc, 'PyJSON5.tex', u'PyJSON5 Documentation',
+ u'René Kijewski', 'manual'),
+]
+
+
+# -- Options for manual page output ---------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+ (master_doc, 'pyjson5', u'PyJSON5 Documentation',
+ [author], 1)
+]
+
+
+# -- Options for Texinfo output -------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+# dir menu entry, description, category)
+texinfo_documents = [
+ (master_doc, 'PyJSON5', u'PyJSON5 Documentation',
+ author, 'PyJSON5', 'One line description of project.',
+ 'Miscellaneous'),
+]
+
+
+display_toc = True
+autodoc_default_flags = ['members']
+autosummary_generate = True
+
+intersphinx_mapping = {
+ 'python': ('https://docs.python.org/3', None),
+}
+
+inheritance_graph_attrs = {
+ 'size': '"6.0, 8.0"',
+ 'fontsize': 32,
+ 'bgcolor': 'transparent',
+}
+inheritance_node_attrs = {
+ 'color': 'black',
+ 'fillcolor': 'white',
+ 'style': '"filled,solid"',
+}
+inheritance_edge_attrs = {
+ 'penwidth': 1.2,
+ 'arrowsize': 0.8,
+}
+
+
+###################################################################################################
+
+
+from sphinx.ext.autosummary import Autosummary
+from sphinx.ext.autosummary import get_documenter
+from sphinx.util.inspect import safe_getattr
+
+
+class AutoAutoSummary(Autosummary):
+ option_spec = {}
+
+ required_arguments = 1
+
+ @staticmethod
+ def get_members(obj):
+ for name in dir(obj):
+ try:
+ documenter = get_documenter(safe_getattr(obj, name), obj)
+ except AttributeError:
+ continue
+
+ if documenter.objtype in ('function',):
+ yield name
+
+ def run(self):
+ module = str(self.arguments[0])
+ try:
+ m = __import__(module, globals(), locals(), [])
+ self.content = sorted(
+ ('~%s.%s' % (module, member) for member in self.get_members(m)),
+ key=str.lower,
+ )
+ finally:
+ return super().run()
+
+
+def setup(app):
+ app.add_directive('autoautosummary', AutoAutoSummary)
diff --git a/docs/index.rst b/docs/index.rst
new file mode 100644
index 0000000..91472f0
--- /dev/null
+++ b/docs/index.rst
@@ -0,0 +1,21 @@
+.. currentmodule:: pyjson5
+
+.. automodule:: pyjson5
+ :members:
+ :inherited-members:
+
+
+ → :ref:`Glossary / Index <genindex>`
+
+ Function summary
+ ----------------
+
+ .. autoautosummary:: pyjson5
+
+ Class / exception summary
+ -------------------------
+
+ .. inheritance-diagram:: pyjson5
+
+ Full description
+ ----------------
diff --git a/pyjson5.pyx b/pyjson5.pyx
new file mode 100644
index 0000000..60407d5
--- /dev/null
+++ b/pyjson5.pyx
@@ -0,0 +1,30 @@
+# distutils: language = c++
+# cython: embedsignature = True
+
+include 'src/_imports.pyx'
+include 'src/_constants.pyx'
+
+include 'src/_exceptions.pyx'
+include 'src/_exceptions_decoder.pyx'
+include 'src/_exceptions_encoder.pyx'
+include 'src/_raise_decoder.pyx'
+include 'src/_raise_encoder.pyx'
+
+include 'src/_unicode.pyx'
+include 'src/_unicode_mc.pyx'
+include 'src/_unicode_mn.pyx'
+
+include 'src/_reader_ucs.pyx'
+include 'src/_reader_callback.pyx'
+include 'src/_readers.pyx'
+include 'src/_decoder.pyx'
+
+include 'src/_writers.pyx'
+include 'src/_writer_reallocatable.pyx'
+include 'src/_writer_callback.pyx'
+include 'src/_writer_noop.pyx'
+include 'src/_encoder_options.pyx'
+include 'src/_encoder.pyx'
+
+include 'src/_exports.pyx'
+include 'src/_legacy.pyx'
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..f665769
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,4 @@
+cython == 0.*, >= 0.28.2
+wheel == 0.*, >= 0.31.0
+sphinx_autodoc_typehints == 1.*, >= 1.3
+sphinx_rtd_theme == 0.*, >= 0.3.1
diff --git a/setup.py b/setup.py
new file mode 100755
index 0000000..ec9fe7e
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python
+
+from setuptools import setup, Extension
+from os.path import dirname, join, abspath
+
+
+def get_text(name):
+ root = abspath(dirname(__file__))
+ with open(join(root, 'src', name), 'rt') as f:
+ return eval(f.read().strip())
+
+
+extra_compile_args = [
+ '-std=c++14', '-O2', '-fPIC', '-ggdb1', '-pipe',
+ '-fomit-frame-pointer', '-fstack-protector-strong',
+]
+
+name = 'pyjson5'
+
+setup(
+ name=name,
+ version=get_text('VERSION'),
+ long_description=get_text('DESCRIPTION'),
+ description='JSON5 serializer and parser for Python 3 written in Cython.',
+ author='René Kijewski',
+ author_email='pypi.org@k6i.de',
+ maintainer='René Kijewski',
+ maintainer_email='pypi.org@k6i.de',
+ url='https://github.com/Kijewski/pyjson5',
+ python_requires='~= 3.4',
+ zip_safe=False,
+ ext_modules=[Extension(
+ name,
+ sources=[name + '.pyx'],
+ include_dirs=['src'],
+ extra_compile_args=extra_compile_args,
+ extra_link_args=extra_compile_args,
+ language='c++',
+ )],
+ platforms=['any'],
+ license='Apache 2.0',
+ classifiers=[
+ 'Development Status :: 4 - Beta',
+ 'Intended Audience :: Developers',
+ 'Intended Audience :: System Administrators',
+ 'License :: OSI Approved :: Apache Software License',
+ 'Operating System :: OS Independent',
+ 'Programming Language :: Cython',
+ 'Programming Language :: JavaScript',
+ 'Programming Language :: Python :: 3',
+ 'Programming Language :: Python :: 3.4',
+ 'Programming Language :: Python :: 3.5',
+ 'Programming Language :: Python :: 3.6',
+ 'Programming Language :: Python :: 3.7',
+ 'Programming Language :: Python :: 3 :: Only',
+ 'Programming Language :: Python :: Implementation :: CPython',
+ 'Topic :: Text Processing :: General',
+ ],
+)
diff --git a/src/DESCRIPTION b/src/DESCRIPTION
new file mode 100644
index 0000000..43f722d
--- /dev/null
+++ b/src/DESCRIPTION
@@ -0,0 +1,20 @@
+'''\
+PyJSON5
+-------
+
+A `JSON5 <https://spec.json5.org/>`_ serializer and parser library for Python 3 written in Cython.
+
+The serializer returns ASCII data that can safely be used in an HTML template.
+Apostrophes, ampersands, greater-than, and less-then signs are encoded as
+unicode escaped sequences. E.g. this snippet is safe for any and all input:
+
+.. code:: python
+
+ "<a onclick='alert(" + encode(data) + ")'>show message</a>"
+
+Unless the input contains infinite or NaN values, the result will be valid
+JSON data.
+
+All valid JSON5 1.0.0 and `JSON <https://tools.ietf.org/html/rfc8259>`_ data can be read,
+unless the nesting level is absurdly high.
+'''
diff --git a/src/VERSION b/src/VERSION
new file mode 100644
index 0000000..4273b3c
--- /dev/null
+++ b/src/VERSION
@@ -0,0 +1 @@
+"0.4.3"
diff --git a/src/_constants.pyx b/src/_constants.pyx
new file mode 100644
index 0000000..c1edb56
--- /dev/null
+++ b/src/_constants.pyx
@@ -0,0 +1,15 @@
+cdef object CONST_POS_NAN = float('+NaN')
+cdef object CONST_POS_INF = float('+Infinity')
+cdef object CONST_NEG_NAN = float('-NaN')
+cdef object CONST_NEG_INF = float('-Infinity')
+
+cdef object DATETIME_CLASSES = (date, time, datetime,)
+cdef object ORD_CLASSES = (unicode, bytes, bytearray,)
+
+cdef object UCS1_COMPATIBLE_CODECS = frozenset((
+ # ASCII
+ 'ascii', 646, '646', 'us-ascii',
+ # Latin-1
+ 'latin_1', 'latin-1', 'iso-8859-1', 'iso8859-1',
+ 8859, '8859', 'cp819', 'latin', 'latin1', 'l1',
+))
diff --git a/src/_decoder.pyx b/src/_decoder.pyx
new file mode 100644
index 0000000..9f6b8d1
--- /dev/null
+++ b/src/_decoder.pyx
@@ -0,0 +1,800 @@
+cdef enum:
+ NO_EXTRA_DATA = 0x0011_0000
+
+
+cdef void _skip_single_line(ReaderRef reader):
+ cdef uint32_t c0
+ while _reader_good(reader):
+ c0 = _reader_get(reader)
+ if _is_line_terminator(c0):
+ break
+
+
+cdef boolean _skip_multiline_comment(ReaderRef reader) except False:
+ cdef uint32_t c0
+ cdef boolean seen_asterisk = False
+ cdef Py_ssize_t comment_start = _reader_tell(reader)
+
+ comment_start = _reader_tell(reader)
+
+ seen_asterisk = False
+ while True:
+ if expect(not _reader_good(reader), False):
+ break
+
+ c0 = _reader_get(reader)
+ if c0 == b'*':
+ seen_asterisk = True
+ elif seen_asterisk:
+ if c0 == b'/':
+ return True
+ seen_asterisk = False
+
+ _raise_unclosed(b'comment', comment_start)
+ return False
+
+
+# data found
+# -1: exhausted
+# -2: exception
+cdef int32_t _skip_to_data_sub(ReaderRef reader, uint32_t c0) except -2:
+ cdef int32_t c1
+ cdef boolean seen_slash
+
+ seen_slash = False
+ while True:
+ if c0 == b'/':
+ if seen_slash:
+ _skip_single_line(reader)
+ seen_slash = False
+ else:
+ seen_slash = True
+ elif c0 == b'*':
+ if expect(not seen_slash, False):
+ _raise_stray_character('asterisk', _reader_tell(reader))
+
+ _skip_multiline_comment(reader)
+ seen_slash = False
+ elif not _is_ws_zs(c0):
+ c1 = cast_to_int32(c0)
+ break
+ elif expect(seen_slash, False):
+ _raise_stray_character('slash', _reader_tell(reader))
+
+ if not _reader_good(reader):
+ c1 = -1
+ break
+
+ c0 = _reader_get(reader)
+
+ if expect(seen_slash, False):
+ _raise_stray_character('slash', _reader_tell(reader))
+
+ return c1
+
+
+# data found
+# -1 exhausted
+# -2 exception
+cdef int32_t _skip_to_data(ReaderRef reader) except -2:
+ cdef uint32_t c0
+ cdef int32_t c1
+ if _reader_good(reader):
+ c0 = _reader_get(reader)
+ c1 = _skip_to_data_sub(reader, c0)
+ else:
+ c1 = -1
+ return c1
+
+
+cdef int32_t _get_hex_character(ReaderRef reader, Py_ssize_t length) except -1:
+ cdef Py_ssize_t start
+ cdef uint32_t c0
+ cdef uint32_t result
+ cdef Py_ssize_t index
+
+ start = _reader_tell(reader)
+ result = 0
+ for index in range(length):
+ result <<= 4
+ if expect(not _reader_good(reader), False):
+ _raise_unclosed(b'escape sequence', start)
+
+ c0 = _reader_get(reader)
+ if b'0' <= c0 <= b'9':
+ result |= c0 - <uint32_t> b'0'
+ elif b'a' <= c0 <= b'f':
+ result |= c0 - <uint32_t> b'a' + 10
+ elif b'A' <= c0 <= b'F':
+ result |= c0 - <uint32_t> b'A' + 10
+ else:
+ _raise_expected_s('hexadecimal character', start, c0)
+
+ if expect(result > 0x10ffff, False):
+ _raise_expected_s('Unicode code point', start, result)
+
+ return cast_to_int32(result)
+
+
+# >= 0: character to append
+# -1: skip
+# < -1: -(next character + 1)
+cdef int32_t _get_escape_sequence(ReaderRef reader,
+ Py_ssize_t start) except 0x7ffffff:
+ cdef uint32_t c0
+ cdef uint32_t c1
+
+ c0 = _reader_get(reader)
+ if expect(not _reader_good(reader), False):
+ _raise_unclosed(b'string', start)
+
+ if c0 == b'b':
+ return 0x0008
+ elif c0 == b'f':
+ return 0x000c
+ elif c0 == b'n':
+ return 0x000a
+ elif c0 == b'r':
+ return 0x000d
+ elif c0 == b't':
+ return 0x0009
+ elif c0 == b'v':
+ return 0x000b
+ elif c0 == b'0':
+ return 0x0000
+ elif c0 == b'x':
+ return _get_hex_character(reader, 2)
+ elif c0 == b'u':
+ c0 = cast_to_uint32(_get_hex_character(reader, 4))
+ if expect(Py_UNICODE_IS_LOW_SURROGATE(c0), False):
+ _raise_expected_s('high surrogate before low surrogate', start, c0)
+ elif not Py_UNICODE_IS_HIGH_SURROGATE(c0):
+ return c0
+
+ _accept_string(reader, b'\\u')
+
+ c1 = cast_to_uint32(_get_hex_character(reader, 4))
+ if expect(not Py_UNICODE_IS_LOW_SURROGATE(c1), False):
+ _raise_expected_s('low surrogate', start, c1)
+
+ return Py_UNICODE_JOIN_SURROGATES(c0, c1)
+ elif c0 == b'U':
+ return _get_hex_character(reader, 8)
+ elif expect(b'1' <= c0 <= b'9', False):
+ _raise_expected_s('escape sequence', start, c0)
+ return -2
+ elif _is_line_terminator(c0):
+ if c0 != 0x000D:
+ return -1
+
+ c0 = _reader_get(reader)
+ if c0 == 0x000A:
+ return -1
+
+ return -cast_to_int32(c0 + 1)
+ else:
+ return cast_to_int32(c0)
+
+
+cdef object _decode_string_sub(ReaderRef reader, uint32_t delim,
+ Py_ssize_t start, uint32_t c0):
+ cdef int32_t c1
+ cdef std_vector[uint32_t] buf
+
+ while True:
+ if c0 == delim:
+ break
+
+ if expect(not _reader_good(reader), False):
+ _raise_unclosed(b'string', start)
+
+ if c0 != b'\\':
+ buf.push_back(c0)
+ c0 = _reader_get(reader)
+ continue
+
+ c1 = _get_escape_sequence(reader, start)
+ if c1 >= -1:
+ if expect(not _reader_good(reader), False):
+ _raise_unclosed(b'string', start)
+
+ if c1 >= 0:
+ c0 = cast_to_uint32(c1)
+ buf.push_back(c0)
+
+ c0 = _reader_get(reader)
+ else:
+ c0 = cast_to_uint32(-(c1 + 1))
+
+ return PyUnicode_FromKindAndData(
+ PyUnicode_4BYTE_KIND, buf.data(), buf.size(),
+ )
+
+
+cdef object _decode_string(ReaderRef reader, int32_t *c_in_out):
+ cdef uint32_t delim
+ cdef uint32_t c0
+ cdef int32_t c1
+ cdef Py_ssize_t start
+ cdef object result
+
+ c1 = c_in_out[0]
+ delim = cast_to_uint32(c1)
+ start = _reader_tell(reader)
+
+ if expect(not _reader_good(reader), False):
+ _raise_unclosed(b'string', start)
+
+ c0 = _reader_get(reader)
+ result = _decode_string_sub(reader, delim, start, c0)
+
+ c_in_out[0] = NO_EXTRA_DATA
+ return result
+
+
+cdef object _decode_number_leading_zero(ReaderRef reader, std_vector[char] &buf,
+ int32_t *c_in_out):
+ cdef uint32_t c0
+ cdef int32_t c1
+ cdef object pybuf
+
+ if not _reader_good(reader):
+ c_in_out[0] = -1
+ return 0
+
+ c0 = _reader_get(reader)
+ if _is_x(c0):
+ while True:
+ if not _reader_good(reader):
+ c1 = -1
+ break
+
+ c0 = _reader_get(reader)
+ if _is_hexadecimal(c0):
+ buf.push_back(<char> <unsigned char> c0)
+ elif c0 != b'_':
+ c1 = cast_to_int32(c0)
+ break
+
+ c_in_out[0] = c1
+ pybuf = PyBytes_FromStringAndSize(buf.data(), buf.size())
+ return int(pybuf, 16)
+ elif c0 == b'.':
+ buf.push_back(b'.')
+
+ while True:
+ if not _reader_good(reader):
+ c1 = -1
+ break
+
+ c0 = _reader_get(reader)
+ if _is_in_float_representation(c0):
+ buf.push_back(<char> <unsigned char> c0)
+ elif c0 != b'_':
+ c1 = cast_to_int32(c0)
+ break
+
+ c_in_out[0] = c1
+ pybuf = PyBytes_FromStringAndSize(buf.data(), buf.size())
+ return float(pybuf)
+ elif _is_e(c0):
+ while True:
+ if not _reader_good(reader):
+ c1 = -1
+ break
+
+ c0 = _reader_get(reader)
+ if _is_in_float_representation(c0):
+ pass
+ elif c0 == b'_':
+ pass
+ else:
+ c1 = cast_to_int32(c0)
+ break
+
+ c_in_out[0] = c1
+ return 0.0
+ else:
+ c1 = cast_to_int32(c0)
+ c_in_out[0] = c1
+ return 0
+
+
+cdef object _decode_number_any(ReaderRef reader, std_vector[char] &buf,
+ int32_t *c_in_out):
+ cdef uint32_t c0
+ cdef int32_t c1
+ cdef boolean is_float
+ cdef object pybuf
+
+ c1 = c_in_out[0]
+ c0 = cast_to_uint32(c1)
+
+ is_float = False
+ while True:
+ if _is_decimal(c0):
+ pass
+ elif _is_in_float_representation(c0):
+ is_float = True
+ elif c0 != b'_':
+ c1 = cast_to_int32(c0)
+ break
+
+ if c0 != b'_':
+ buf.push_back(<char> <unsigned char> c0)
+
+ if not _reader_good(reader):
+ c1 = -1
+ break
+
+ c0 = _reader_get(reader)
+
+ c_in_out[0] = c1
+
+ pybuf = PyBytes_FromStringAndSize(buf.data(), buf.size())
+ if is_float:
+ return float(pybuf)
+ else:
+ return int(pybuf, 10)
+
+
+cdef object _decode_number(ReaderRef reader, int32_t *c_in_out):
+ cdef uint32_t c0
+ cdef int32_t c1
+ cdef Py_ssize_t start
+ cdef std_vector[char] buf
+
+ c1 = c_in_out[0]
+ c0 = cast_to_uint32(c1)
+
+ if c0 == b'+':
+ start = _reader_tell(reader)
+ if expect(not _reader_good(reader), False):
+ _raise_unclosed(b'number', start)
+
+ c0 = _reader_get(reader)
+ if c0 == 'I':
+ _accept_string(reader, b'nfinity')
+ c_in_out[0] = NO_EXTRA_DATA
+ return CONST_POS_INF
+ elif c0 == b'N':
+ _accept_string(reader, b'aN')
+ c_in_out[0] = NO_EXTRA_DATA
+ return CONST_POS_NAN
+
+ buf.reserve(16)
+ elif c0 == b'-':
+ start = _reader_tell(reader)
+ if expect(not _reader_good(reader), False):
+ _raise_unclosed(b'number', start)
+
+ c0 = _reader_get(reader)
+ if c0 == 'I':
+ _accept_string(reader, b'nfinity')
+ c_in_out[0] = NO_EXTRA_DATA
+ return CONST_NEG_INF
+ elif c0 == b'N':
+ _accept_string(reader, b'aN')
+ c_in_out[0] = NO_EXTRA_DATA
+ return CONST_NEG_NAN
+
+ buf.reserve(16)
+ buf.push_back(b'-')
+ else:
+ buf.reserve(16)
+
+ if c0 == b'0':
+ return _decode_number_leading_zero(reader, buf, c_in_out)
+ else:
+ c1 = cast_to_int32(c0)
+ c_in_out[0] = c1
+ return _decode_number_any(reader, buf, c_in_out)
+
+
+# 1: done
+# 0: data found
+# -1: exception (exhausted)
+cdef uint32_t _skip_comma(ReaderRef reader, Py_ssize_t start,
+ uint32_t terminator, const char *what,
+ int32_t *c_in_out) except -1:
+ cdef int32_t c0
+ cdef uint32_t c1
+ cdef boolean needs_comma
+ cdef uint32_t done
+
+ c0 = c_in_out[0]
+ c1 = cast_to_uint32(c0)
+
+ needs_comma = True
+ while True:
+ c0 = _skip_to_data_sub(reader, c1)
+ if c0 < 0:
+ break
+
+ c1 = cast_to_uint32(c0)
+ if c1 == terminator:
+ c_in_out[0] = NO_EXTRA_DATA
+ return 1
+
+ if c1 != b',':
+ if expect(needs_comma, False):
+ _raise_expected_sc(
+ 'comma', terminator, _reader_tell(reader), c1,
+ )
+ c_in_out[0] = c0
+ return 0
+
+ if expect(not needs_comma, False):
+ _raise_stray_character('comma', _reader_tell(reader))
+
+ if expect(not _reader_good(reader), False):
+ break
+
+ c1 = _reader_get(reader)
+ needs_comma = False
+
+ _raise_unclosed(what, start)
+ return -1
+
+
+cdef unicode _decode_identifier_name(ReaderRef reader, int32_t *c_in_out):
+ cdef int32_t c0
+ cdef uint32_t c1
+ cdef Py_ssize_t start
+ cdef std_vector[uint32_t] buf
+
+ start = _reader_tell(reader)
+
+ c0 = c_in_out[0]
+ c1 = cast_to_uint32(c0)
+ if expect(not _is_identifier_start(c1), False):
+ _raise_expected_s('IdentifierStart', _reader_tell(reader), c1)
+
+ while True:
+ buf.push_back(c1)
+
+ if not _reader_good(reader):
+ c0 = -1
+ break
+
+ c1 = _reader_get(reader)
+ if not _is_identifier_part(c1):
+ c0 = cast_to_int32(c1)
+ break
+
+ c_in_out[0] = c0
+ return PyUnicode_FromKindAndData(
+ PyUnicode_4BYTE_KIND, buf.data(), buf.size(),
+ )
+
+
+cdef boolean _decode_object(ReaderRef reader, dict result) except False:
+ cdef int32_t c0
+ cdef uint32_t c1
+ cdef Py_ssize_t start
+ cdef boolean done
+ cdef object key
+ cdef object value
+
+ start = _reader_tell(reader)
+
+ c0 = _skip_to_data(reader)
+ if expect(c0 >= 0, True):
+ c1 = cast_to_uint32(c0)
+ if c1 == b'}':
+ return True
+
+ while True:
+ if c1 in b'"\'':
+ key = _decode_string(reader, &c0)
+ else:
+ key = _decode_identifier_name(reader, &c0)
+ if expect(c0 < 0, False):
+ break
+
+ c1 = cast_to_uint32(c0)
+ c0 = _skip_to_data_sub(reader, c1)
+ if expect(c0 < 0, False):
+ break
+
+ c1 = cast_to_uint32(c0)
+ if expect(c1 != b':', False):
+ _raise_expected_s('colon', _reader_tell(reader), c1)
+
+ if expect(not _reader_good(reader), False):
+ break
+
+ c0 = _skip_to_data(reader)
+ if expect(c0 < 0, False):
+ break
+
+ try:
+ value = _decode_recursive(reader, &c0)
+ except _DecoderException as ex:
+ result[key] = (<_DecoderException> ex).result
+ raise
+
+ if expect(c0 < 0, False):
+ break
+
+ result[key] = value
+
+ done = _skip_comma(
+ reader, start, <unsigned char>b'}', b'object', &c0,
+ )
+ if done:
+ return True
+
+ c1 = cast_to_uint32(c0)
+
+ _raise_unclosed(b'object', start)
+ return False
+
+
+cdef boolean _decode_array(ReaderRef reader, list result) except False:
+ cdef int32_t c0
+ cdef uint32_t c1
+ cdef Py_ssize_t start
+ cdef boolean done
+ cdef object value
+
+ start = _reader_tell(reader)
+
+ c0 = _skip_to_data(reader)
+ if expect(c0 >= 0, True):
+ c1 = cast_to_uint32(c0)
+ if c1 == b']':
+ return True
+
+ while True:
+ try:
+ value = _decode_recursive(reader, &c0)
+ except _DecoderException as ex:
+ result.append((<_DecoderException> ex).result)
+ raise
+
+ if expect(c0 < 0, False):
+ break
+
+ result.append(value)
+
+ done = _skip_comma(
+ reader, start, <unsigned char>b']', b'array', &c0,
+ )
+ if done:
+ return True
+
+ _raise_unclosed(b'array', start)
+
+
+cdef boolean _accept_string(ReaderRef reader, const char *string) except False:
+ cdef uint32_t c0
+ cdef uint32_t c1
+ cdef Py_ssize_t start
+
+ start = _reader_tell(reader)
+ while True:
+ c0 = string[0]
+ string += 1
+ if not c0:
+ break
+
+ if expect(not _reader_good(reader), False):
+ _raise_unclosed(b'literal', start)
+
+ c1 = _reader_get(reader)
+ if expect(c0 != c1, False):
+ _raise_expected_c(c0, start, c1)
+
+ return True
+
+
+cdef object _decode_literal(ReaderRef reader, int32_t *c_in_out):
+ cdef const char *tail
+ cdef object result
+ cdef uint32_t c0
+ cdef int32_t c1
+
+ c0 = cast_to_uint32(c_in_out[0])
+ if c0 == b'n':
+ tail = b'ull'
+ result = None
+ elif c0 == b't':
+ tail = b'rue'
+ result = True
+ elif c0 == b'f':
+ tail = b'alse'
+ result = False
+ elif c0 == b'I':
+ tail = b'nfinity'
+ result = CONST_POS_INF
+ else: # elif c0 == b'N':
+ tail = b'aN'
+ result = CONST_POS_NAN
+
+ _accept_string(reader, tail)
+
+ c_in_out[0] = NO_EXTRA_DATA
+ return result
+
+
+cdef object _decode_recursive_enter(ReaderRef reader, int32_t *c_in_out):
+ cdef object result
+ cdef int32_t c0
+ cdef uint32_t c1
+
+ c0 = c_in_out[0]
+ c1 = cast_to_uint32(c0)
+
+ _reader_enter(reader)
+ try:
+ if c1 == b'{':
+ result = {}
+ _decode_object(reader, result)
+ else:
+ result = []
+ _decode_array(reader, result)
+ except RecursionError:
+ _raise_nesting(_reader_tell(reader), result)
+ except _DecoderException as ex:
+ (<_DecoderException> ex).result = result
+ raise ex
+ finally:
+ _reader_leave(reader)
+
+ c_in_out[0] = NO_EXTRA_DATA
+ return result
+
+
+cdef object _decoder_unknown(ReaderRef reader, int32_t *c_in_out):
+ cdef int32_t c0
+ cdef uint32_t c1
+ cdef Py_ssize_t start
+
+ c0 = c_in_out[0]
+ c1 = cast_to_uint32(c0)
+ start = _reader_tell(reader)
+
+ _raise_expected_s('JSON5Value', start, c1)
+
+
+cdef object _decode_recursive(ReaderRef reader, int32_t *c_in_out):
+ cdef object (*decoder)(ReaderRef, int32_t*)
+ cdef int32_t c0
+ cdef uint32_t c1
+
+ c0 = c_in_out[0]
+ c1 = cast_to_uint32(c0)
+
+ decoder = _decoder_unknown
+ if c1 in b'ntfIN':
+ decoder = _decode_literal
+ elif c1 in b'\'"':
+ decoder = _decode_string
+ elif c1 in b'+-.0123456789':
+ decoder = _decode_number
+ elif c1 in b'{[':
+ decoder = _decode_recursive_enter
+
+ return decoder(reader, c_in_out)
+
+
+cdef object _decode_all_sub(ReaderRef reader, boolean some):
+ cdef Py_ssize_t start
+ cdef int32_t c0
+ cdef uint32_t c1
+ cdef object result
+
+ start = _reader_tell(reader)
+ c0 = _skip_to_data(reader)
+ if expect(c0 < 0, False):
+ _raise_no_data(start)
+
+ result = _decode_recursive(reader, &c0)
+ try:
+ if c0 < 0:
+ pass
+ elif not some:
+ start = _reader_tell(reader)
+ c1 = cast_to_uint32(c0)
+ c0 = _skip_to_data_sub(reader, c1)
+ if expect(c0 >= 0, False):
+ c1 = cast_to_uint32(c0)
+ _raise_extra_data(c1, start)
+ elif expect(not _is_ws_zs(c0), False):
+ start = _reader_tell(reader)
+ c1 = cast_to_uint32(c0)
+ _raise_unframed_data(c1, start)
+ except _DecoderException as ex:
+ (<_DecoderException> ex).result = result
+ raise ex
+
+ return result
+
+
+cdef object _decode_all(ReaderRef reader, boolean some):
+ cdef Exception ex_result
+ cdef _DecoderException ex
+ try:
+ return _decode_all_sub(reader, some)
+ except _DecoderException as e:
+ ex = <_DecoderException> e
+ ex_result = ex.cls(ex.msg, ex.result, ex.extra)
+ raise ex_result
+
+
+cdef object _decode_ucs1(const void *string, Py_ssize_t length,
+ Py_ssize_t maxdepth, boolean some):
+ cdef ReaderUCS1 reader = ReaderUCS1(
+ ReaderUCS(length, 0, maxdepth),
+ <const Py_UCS1*> string,
+ )
+ return _decode_all(reader, some)
+
+
+cdef object _decode_ucs2(const void *string, Py_ssize_t length,
+ Py_ssize_t maxdepth, boolean some):
+ cdef ReaderUCS2 reader = ReaderUCS2(
+ ReaderUCS(length, 0, maxdepth),
+ <const Py_UCS2*> string,
+ )
+ return _decode_all(reader, some)
+
+
+cdef object _decode_ucs4(const void *string, Py_ssize_t length,
+ Py_ssize_t maxdepth, boolean some):
+ cdef ReaderUCS4 reader = ReaderUCS4(
+ ReaderUCS(length, 0, maxdepth),
+ <const Py_UCS4*> string,
+ )
+ return _decode_all(reader, some)
+
+
+cdef object _decode_unicode(object data, Py_ssize_t maxdepth, boolean some):
+ cdef Py_ssize_t length
+ cdef int kind
+
+ PyUnicode_READY(data)
+
+ length = PyUnicode_GET_LENGTH(data)
+ kind = PyUnicode_KIND(data)
+
+ if kind == PyUnicode_1BYTE_KIND:
+ return _decode_ucs1(PyUnicode_1BYTE_DATA(data), length, maxdepth, some)
+ elif kind == PyUnicode_2BYTE_KIND:
+ return _decode_ucs2(PyUnicode_2BYTE_DATA(data), length, maxdepth, some)
+ elif kind == PyUnicode_4BYTE_KIND:
+ return _decode_ucs4(PyUnicode_4BYTE_DATA(data), length, maxdepth, some)
+ else:
+ pass # impossible
+
+
+cdef object _decode_buffer(Py_buffer &view, int32_t wordlength,
+ Py_ssize_t maxdepth, boolean some):
+ cdef object (*decoder)(const void*, Py_ssize_t, Py_ssize_t, boolean)
+ cdef Py_ssize_t length
+
+ if wordlength == 1:
+ decoder = _decode_ucs1
+ length = view.len // 1
+ elif wordlength == 2:
+ decoder = _decode_ucs2
+ length = view.len // 2
+ elif wordlength == 4:
+ decoder = _decode_ucs4
+ length = view.len // 4
+ else:
+ _raise_illegal_wordlength(wordlength)
+
+ return decoder(view.buf, length, maxdepth, some)
+
+
+cdef object _decode_callback(object cb, object args, Py_ssize_t maxdepth,
+ boolean some):
+ cdef ReaderCallback reader = ReaderCallback(
+ ReaderCallbackBase(0, maxdepth),
+ <PyObject*> cb,
+ <PyObject*> args,
+ -1,
+ )
+ return _decode_all(reader, some)
diff --git a/src/_encoder.pyx b/src/_encoder.pyx
new file mode 100644
index 0000000..f151597
--- /dev/null
+++ b/src/_encoder.pyx
@@ -0,0 +1,404 @@
+cdef enum EncType:
+ ENC_TYPE_EXCEPTION
+ ENC_TYPE_UNKNOWN
+ ENC_TYPE_NONE
+ ENC_TYPE_UNICODE
+ ENC_TYPE_BOOL
+ ENC_TYPE_BYTES
+ ENC_TYPE_LONG
+ ENC_TYPE_DECIMAL
+ ENC_TYPE_FLOAT
+ ENC_TYPE_DATETIME
+ ENC_TYPE_MAPPING
+ ENC_TYPE_SEQUENCE
+
+
+cdef boolean _encode_unicode_impl(WriterRef writer, UCSString data, Py_ssize_t length) except False:
+ cdef char buf[16]
+ cdef uint32_t c
+ cdef uint32_t s1, s2
+ cdef Py_ssize_t index
+ cdef const char *escaped_string
+ cdef Py_ssize_t escaped_length
+ cdef size_t unescaped_length
+
+ if length > 0:
+ writer.reserve(writer, 2 + length)
+ writer.append_c(writer, <char> b'"')
+ if UCSString is UCS1String:
+ while True:
+ unescaped_length = ESCAPE_DCT.find_unescaped_range(<const char*> data, length)
+ if unescaped_length > 0:
+ writer.append_s(writer, <const char*> data, unescaped_length)
+
+ data += unescaped_length
+ length -= unescaped_length
+ if length <= 0:
+ break
+
+ c = data[0]
+ escaped_string = &ESCAPE_DCT.items[c][0]
+ escaped_length = ESCAPE_DCT.items[c][7]
+ writer.append_s(writer, escaped_string, escaped_length)
+
+ data += 1
+ length -= 1
+ if length <= 0:
+ break
+ else:
+ for index in range(length):
+ c = data[index]
+ if UCSString is UCS2String:
+ if not ESCAPE_DCT.is_escaped(c):
+ writer.append_c(writer, <char> <unsigned char> c)
+ else:
+ escaped_string = &ESCAPE_DCT.items[c][0]
+ escaped_length = ESCAPE_DCT.items[c][7]
+ writer.append_s(writer, escaped_string, escaped_length)
+ elif UCSString is UCS4String:
+ if not ESCAPE_DCT.is_escaped(c):
+ writer.append_c(writer, <char> <unsigned char> c)
+ elif c < 0x10000:
+ escaped_string = &ESCAPE_DCT.items[c][0]
+ escaped_length = ESCAPE_DCT.items[c][7]
+ writer.append_s(writer, escaped_string, escaped_length)
+ else:
+ # surrogate pair
+ c -= 0x10000
+ s1 = 0xd800 | ((c >> 10) & 0x3ff)
+ s2 = 0xdc00 | (c & 0x3ff)
+
+ snprintf(buf, sizeof(buf), b'\\u%04x\\u%04x', s1, s2)
+ writer.append_s(writer, buf, 2 * 6)
+ writer.append_c(writer, <char> b'"')
+ else:
+ writer.append_s(writer, b'""', 2)
+
+ return True
+
+
+cdef boolean _encode_unicode(WriterRef writer, object data, EncType enc_type) except False:
+ cdef Py_ssize_t length
+ cdef int kind
+
+ PyUnicode_READY(data)
+
+ length = PyUnicode_GET_LENGTH(data)
+ kind = PyUnicode_KIND(data)
+
+ if kind == PyUnicode_1BYTE_KIND:
+ _encode_unicode_impl(writer, PyUnicode_1BYTE_DATA(data), length)
+ elif kind == PyUnicode_2BYTE_KIND:
+ _encode_unicode_impl(writer, PyUnicode_2BYTE_DATA(data), length)
+ elif kind == PyUnicode_4BYTE_KIND:
+ _encode_unicode_impl(writer, PyUnicode_4BYTE_DATA(data), length)
+ else:
+ pass # impossible
+
+ return True
+
+
+cdef boolean _encode_nested_key(WriterRef writer, object data) except False:
+ cdef const char *string
+ cdef char c
+ cdef Py_ssize_t index, length
+
+ cdef WriterReallocatable sub_writer = WriterReallocatable(
+ Writer(
+ _WriterReallocatable_reserve,
+ _WriterReallocatable_append_c,
+ _WriterReallocatable_append_s,
+ writer.options,
+ ),
+ 0, 0, NULL,
+ )
+ try:
+ _encode(sub_writer.base, data)
+
+ length = sub_writer.position
+ string = <char*> sub_writer.obj
+
+ writer.reserve(writer, 2 + length)
+ writer.append_c(writer, <char> b'"')
+ for index in range(length):
+ c = string[index]
+ if c not in b'\\"':
+ writer.append_c(writer, c)
+ elif c == b'\\':
+ writer.append_s(writer, b'\\\\', 2)
+ else:
+ writer.append_s(writer, b'\\u0022', 6)
+ writer.append_c(writer, <char> b'"')
+ finally:
+ if sub_writer.obj is not NULL:
+ ObjectFree(sub_writer.obj)
+
+ return True
+
+
+cdef boolean _append_ascii(WriterRef writer, object data) except False:
+ cdef Py_buffer view
+ cdef const char *buf
+
+ if PyUnicode_Check(data):
+ PyUnicode_READY(data)
+ if not PyUnicode_IS_ASCII(data):
+ raise TypeError('Expected ASCII data')
+ writer.append_s(writer, <const char*> PyUnicode_1BYTE_DATA(data), PyUnicode_GET_LENGTH(data))
+ else:
+ PyObject_GetBuffer(data, &view, PyBUF_CONTIG_RO)
+ try:
+ buf = <const char*> view.buf
+ for index in range(view.len):
+ c = buf[index]
+ if c & ~0x7f:
+ raise TypeError('Expected ASCII data')
+
+ writer.append_s(writer, buf, view.len)
+ finally:
+ PyBuffer_Release(&view)
+
+ return True
+
+
+cdef boolean _encode_sequence(WriterRef writer, object data) except False:
+ cdef boolean first
+ cdef object value
+
+ writer.append_c(writer, <char> b'[')
+ first = True
+ for value in data:
+ if not first:
+ writer.append_c(writer, <char> b',')
+ else:
+ first = False
+ _encode(writer, value)
+ writer.append_c(writer, <char> b']')
+
+ return True
+
+
+cdef boolean _encode_mapping(WriterRef writer, object data) except False:
+ cdef boolean first
+ cdef object key, value
+
+ writer.append_c(writer, <char> b'{')
+ first = True
+ for key in data:
+ if not first:
+ writer.append_c(writer, <char> b',')
+ else:
+ first = False
+ value = data[key]
+
+ if PyUnicode_Check(key):
+ _encode_unicode(writer, key, ENC_TYPE_UNICODE)
+ else:
+ _encode_nested_key(writer, key)
+
+ writer.append_c(writer, <char> b':')
+ _encode(writer, value)
+ writer.append_c(writer, <char> b'}')
+
+ return True
+
+
+cdef boolean _encode_constant(WriterRef writer, object data, EncType enc_type) except False:
+ cdef const char *string
+ cdef Py_ssize_t length
+
+ if data is True:
+ string = b'true'
+ length = 4
+ elif data is False:
+ string = b'false'
+ length = 5
+ else:
+ string = b'null'
+ length = 4
+
+ writer.append_s(writer, string, length)
+ return True
+
+
+cdef boolean _encode_bytes(WriterRef writer, object data, EncType enc_type) except False:
+ cdef unicode_data = data.decode('UTF-8', 'replace')
+ _encode_unicode(writer, unicode_data, ENC_TYPE_UNICODE)
+ return True
+
+
+cdef boolean _encode_datetime(WriterRef writer, object data, EncType enc_type) except False:
+ cdef const char *string
+ cdef Py_ssize_t length
+ cdef object stringified
+
+ stringified = data.isoformat()
+ string = PyUnicode_AsUTF8AndSize(stringified, &length)
+
+ writer.reserve(writer, 2 + length)
+ writer.append_c(writer, <char> b'"')
+ writer.append_s(writer, string, length)
+ writer.append_c(writer, <char> b'"')
+
+ return True
+
+
+cdef boolean _encode_numeric(WriterRef writer, object data, EncType enc_type) except False:
+ cdef object formatter_string
+ cdef const char *string
+ cdef Py_ssize_t length
+ cdef int classification
+
+ if enc_type == ENC_TYPE_LONG:
+ formatter_string = (<Options> writer.options).intformat
+ elif enc_type == ENC_TYPE_DECIMAL:
+ formatter_string = (<Options> writer.options).decimalformat
+ else:
+ value = PyFloat_AsDouble(data)
+ classification = fpclassify(value)
+ if classification == FP_NORMAL:
+ formatter_string = (<Options> writer.options).floatformat
+ elif classification in (FP_SUBNORMAL, FP_ZERO):
+ string = b'0'
+ length = 1
+
+ writer.append_s(writer, string, length)
+ return True
+ else:
+ if classification == FP_NAN:
+ formatter_string = (<Options> writer.options).nan
+ elif value > 0.0:
+ formatter_string = (<Options> writer.options).posinfinity
+ else:
+ formatter_string = (<Options> writer.options).neginfinity
+
+ if formatter_string is None:
+ _raise_unstringifiable(data)
+
+ string = <const char*> PyUnicode_1BYTE_DATA(formatter_string)
+ length = PyUnicode_GET_LENGTH(formatter_string)
+
+ writer.append_s(writer, string, length)
+ return True
+
+ if formatter_string is None:
+ _raise_unstringifiable(data)
+
+ formatter_string = (formatter_string % data)
+ string = PyUnicode_AsUTF8AndSize(formatter_string, &length)
+ writer.append_s(writer, string, length)
+ return True
+
+
+cdef boolean _encode_recursive(WriterRef writer, object data, EncType enc_type) except False:
+ cdef object to_json
+ cdef boolean (*encoder)(WriterRef writer, object data) except False
+
+ Py_EnterRecursiveCall(' while encoding nested JSON5 object')
+ try:
+ to_json = (<Options> writer.options).tojson
+ if to_json is not None:
+ to_json = getattr(data, to_json, None)
+ if to_json is not None:
+ if callable(to_json):
+ to_json = to_json()
+ _append_ascii(writer, to_json)
+ return True
+
+ if enc_type == ENC_TYPE_SEQUENCE:
+ encoder = _encode_sequence
+ elif enc_type == ENC_TYPE_MAPPING:
+ encoder = _encode_mapping
+ else:
+ _raise_unstringifiable(data)
+ encoder = NULL
+
+ encoder(writer, data)
+ return True
+ finally:
+ Py_LeaveRecursiveCall()
+
+
+cdef boolean _encode(WriterRef writer, object data) except False:
+ cdef boolean (*encoder)(WriterRef, object, EncType) except False
+ cdef EncType enc_type
+
+ if data is None:
+ enc_type = ENC_TYPE_NONE
+ elif PyUnicode_Check(data):
+ enc_type = ENC_TYPE_UNICODE
+ elif PyBool_Check(data):
+ enc_type = ENC_TYPE_BOOL
+ elif PyBytes_Check(data):
+ enc_type = ENC_TYPE_BYTES
+ elif PyLong_Check(data):
+ enc_type = ENC_TYPE_LONG
+ elif PyFloat_Check(data):
+ enc_type = ENC_TYPE_FLOAT
+ elif obj_has_iter(data):
+ if isinstance(data, (<Options> writer.options).mappingtypes):
+ enc_type = ENC_TYPE_MAPPING
+ else:
+ enc_type = ENC_TYPE_SEQUENCE
+ elif isinstance(data, Decimal):
+ enc_type = ENC_TYPE_DECIMAL
+ elif isinstance(data, DATETIME_CLASSES):
+ enc_type = ENC_TYPE_DATETIME
+ elif data == None:
+ enc_type = ENC_TYPE_NONE
+ else:
+ enc_type = ENC_TYPE_UNKNOWN
+
+ encoder = _encode_recursive
+ if enc_type in (ENC_TYPE_NONE, ENC_TYPE_BOOL):
+ encoder = _encode_constant
+ elif enc_type == ENC_TYPE_UNICODE:
+ encoder = _encode_unicode
+ elif enc_type == ENC_TYPE_BYTES:
+ encoder = _encode_bytes
+ elif enc_type in (ENC_TYPE_LONG, ENC_TYPE_DECIMAL, ENC_TYPE_FLOAT):
+ encoder = _encode_numeric
+ elif enc_type == ENC_TYPE_DATETIME:
+ encoder = _encode_datetime
+
+ encoder(writer, data, enc_type)
+
+ return True
+
+
+cdef boolean _encode_callback_bytes(object data, object cb, object options) except False:
+ cdef WriterCallback writer = WriterCallback(
+ Writer(
+ _WriterNoop_reserve,
+ _WriterCbBytes_append_c,
+ _WriterCbBytes_append_s,
+ <PyObject*> options,
+ ),
+ <PyObject*> cb,
+ )
+
+ if not callable(cb):
+ raise TypeError(f'type(cb)=={type(cb)!r} is callable')
+
+ _encode(writer.base, data)
+
+ return True
+
+
+cdef boolean _encode_callback_str(object data, object cb, object options) except False:
+ cdef WriterCallback writer = WriterCallback(
+ Writer(
+ _WriterNoop_reserve,
+ _WriterCbStr_append_c,
+ _WriterCbStr_append_s,
+ <PyObject*> options,
+ ),
+ <PyObject*> cb,
+ )
+
+ if not callable(cb):
+ raise TypeError(f'type(cb)=={type(cb)!r} is callable')
+
+ _encode(writer.base, data)
+
+ return True
diff --git a/src/_encoder_options.pyx b/src/_encoder_options.pyx
new file mode 100644
index 0000000..2c7da49
--- /dev/null
+++ b/src/_encoder_options.pyx
@@ -0,0 +1,221 @@
+cdef object DEFAULT_TOJSON = False
+cdef object DEFAULT_POSINFINITY = 'Infinity'
+cdef object DEFAULT_NEGINFINITY = '-Infinity'
+cdef object DEFAULT_NAN = 'NaN'
+cdef object DEFAULT_INTFORMAT = '%d'
+cdef object DEFAULT_FLOATFORMAT = '%.6e'
+cdef object DEFAULT_DECIMALFORMAT = '%s'
+cdef object DEFAULT_MAPPINGTYPES = (Mapping,)
+
+
+cdef object _options_ascii(object datum, boolean expect_ascii=True):
+ if datum is False:
+ return None
+ elif PyBytes_Check(datum):
+ datum = unicode(datum, 'UTF-8', 'strict')
+ elif not PyUnicode_Check(datum):
+ raise TypeError('Expected str instance or False')
+
+ PyUnicode_READY(datum)
+ if expect_ascii and not PyUnicode_IS_ASCII(datum):
+ raise ValueError('Expected ASCII data')
+
+ return datum
+
+
+cdef object _option_from_ascii(object name, object value, object default):
+ if value == default:
+ return
+ elif value is None:
+ return f'{name}=False'
+ else:
+ return f'{name}={value!r}'
+
+
+cdef _options_from_ascii(Options self):
+ return ', '.join(filter(bool, (
+ _option_from_ascii('tojson', self.tojson, None),
+ _option_from_ascii('posinfinity', self.posinfinity, DEFAULT_POSINFINITY),
+ _option_from_ascii('neginfinity', self.neginfinity, DEFAULT_NEGINFINITY),
+ _option_from_ascii('intformat', self.intformat, DEFAULT_INTFORMAT),
+ _option_from_ascii('floatformat', self.floatformat, DEFAULT_DECIMALFORMAT),
+ _option_from_ascii('decimalformat', self.decimalformat, DEFAULT_FLOATFORMAT),
+ _option_from_ascii('nan', self.nan, DEFAULT_NAN),
+ )))
+
+
+@final
+@auto_pickle(False)
+cdef class Options:
+ '''
+ Customizations for the ``encoder_*(...)`` function family.
+
+ Immutable. Use ``Options.update(**kw)`` to create a **new** Options instance.
+
+ Parameters
+ ----------
+ tojson : str|False|None
+ * **str:** A special method to call on objects to return a custom JSON encoded string. Must return ASCII data!
+ * **False:** No such member exists. (Default.)
+ * **None:** Use default.
+ posinfinity : str|False|None
+ * **str:** String to represent positive infinity. Must be ASCII.
+ * **False:** Throw an exception if ``float('+inf')`` is encountered.
+ * **None:** Use default: ``"Infinity"``.
+ neginfinity : str|False|None
+ * **str:** String to represent negative infinity. Must be ASCII.
+ * **False:** Throw an exception if ``float('-inf')`` is encountered.
+ * **None:** Use default: ``"-Infinity"``.
+ nan : str|False|None
+ * **str:** String to represent not-a-number. Must be ASCII.
+ * **False:** Throw an exception if ``float('NaN')`` is encountered.
+ * **None:** Use default: ``"NaN"``.
+ intformat : str|False|None
+ * **str:** Format string to use with ``int``.
+ * **False:** Throw an exception if an ``int`` is encountered.
+ * **None:** Use default: ``"%d"``.
+ floatformat : str|False|None
+ * **str:** Format string to use with ``float``.
+ * **False:** Throw an exception if a ``float`` is encountered.
+ * **None:** Use default: ``"%.6e"``.
+ decimalformat : str|False|None
+ * **str:** Format string to use with ``Decimal``.
+ * **False:** Throw an exception if a ``Decimal`` is encountered.
+ * **None:** Use default: ``"%s"``.
+ mappingtypes : Iterable[type]|False|None
+ * **Iterable[type]:** Classes the should be encoded to objects. \
+ Must be iterable over their keys, and implement ``__getitem__``.
+ * **False:** There are no objects. Any object will be encoded as list of key-value tuples.
+ * **None:** Use default: ``[collections.abc.Mapping]``.
+ '''
+ cdef readonly unicode tojson
+ '''The creation argument ``tojson``.
+ ``None`` if ``False`` was specified.
+ '''
+ cdef readonly unicode posinfinity
+ '''The creation argument ``posinfinity``.
+ ``None`` if ``False`` was specified.
+ '''
+ cdef readonly unicode neginfinity
+ '''The creation argument ``neginfinity``.
+ ``None`` if ``False`` was specified.
+ '''
+ cdef readonly unicode nan
+ '''The creation argument ``nan``.
+ ``None`` if ``False`` was specified.
+ '''
+ cdef readonly unicode intformat
+ '''The creation argument ``intformat``.
+ ``None`` if ``False`` was specified.
+ '''
+ cdef readonly unicode floatformat
+ '''The creation argument ``floatformat``.
+ ``None`` if ``False`` was specified.
+ '''
+ cdef readonly unicode decimalformat
+ '''The creation argument ``decimalformat``.
+ ``None`` if ``False`` was specified.
+ '''
+ cdef readonly tuple mappingtypes
+ '''The creation argument ``mappingtypes``.
+ ``()`` if ``False`` was specified.
+ '''
+
+
+ def __reduce__(self):
+ '''
+ Pickling is not supported (yet).
+ '''
+ raise NotImplementedError
+
+ def __reduce_ex__(self, protocol):
+ '''
+ Pickling is not supported (yet).
+ '''
+ raise NotImplementedError
+
+ def __repr__(self):
+ repr_options = _options_from_ascii(self)
+ repr_cls = (
+ ''
+ if self.mappingtypes == DEFAULT_MAPPINGTYPES else
+ repr(DEFAULT_MAPPINGTYPES)
+ )
+ return (f'Options('
+ f'{repr_options}'
+ f'{repr_options and repr_cls and ", "}'
+ f'{repr_cls}'
+ ')')
+
+ def __str__(self):
+ return self.__repr__()
+
+ def __cinit__(self, *,
+ tojson=None, posinfinity=None, neginfinity=None, nan=None,
+ decimalformat=None, intformat=None, floatformat=None,
+ mappingtypes=None):
+ cdef object cls
+
+ if tojson is None:
+ tojson = DEFAULT_TOJSON
+ if posinfinity is None:
+ posinfinity = DEFAULT_POSINFINITY
+ if neginfinity is None:
+ neginfinity = DEFAULT_NEGINFINITY
+ if nan is None:
+ nan = DEFAULT_NAN
+ if intformat is None:
+ intformat = DEFAULT_INTFORMAT
+ if floatformat is None:
+ floatformat = DEFAULT_FLOATFORMAT
+ if decimalformat is None:
+ decimalformat = DEFAULT_DECIMALFORMAT
+ if mappingtypes is None:
+ mappingtypes = DEFAULT_MAPPINGTYPES
+
+ self.tojson = _options_ascii(tojson, False)
+ self.posinfinity = _options_ascii(posinfinity)
+ self.neginfinity = _options_ascii(neginfinity)
+ self.intformat = _options_ascii(intformat)
+ self.floatformat = _options_ascii(floatformat)
+ self.decimalformat = _options_ascii(decimalformat)
+ self.nan = _options_ascii(nan)
+
+ if mappingtypes is False:
+ self.mappingtypes = ()
+ else:
+ self.mappingtypes = tuple(mappingtypes)
+ for cls in self.mappingtypes:
+ if not PyType_Check(cls):
+ raise TypeError('mappingtypes must be a sequence of types '
+ 'or False')
+
+ def update(self, **kw):
+ '''
+ Creates a new Options instance by modifying some members.
+ '''
+ return _to_options(self, kw)
+
+
+cdef Options DEFAULT_OPTIONS_OBJECT = Options()
+
+
+cdef object _to_options(Options arg, dict kw):
+ if arg is None:
+ if not kw:
+ return DEFAULT_OPTIONS_OBJECT
+ else:
+ return Options(**kw)
+ elif not kw:
+ return arg
+
+ PyDict_SetDefault(kw, 'tojson', (<Options> arg).tojson)
+ PyDict_SetDefault(kw, 'posinfinity', (<Options> arg).posinfinity)
+ PyDict_SetDefault(kw, 'neginfinity', (<Options> arg).neginfinity)
+ PyDict_SetDefault(kw, 'nan', (<Options> arg).nan)
+ PyDict_SetDefault(kw, 'intformat', (<Options> arg).intformat)
+ PyDict_SetDefault(kw, 'floatformat', (<Options> arg).floatformat)
+ PyDict_SetDefault(kw, 'decimalformat', (<Options> arg).decimalformat)
+ PyDict_SetDefault(kw, 'mappingtypes', (<Options> arg).mappingtypes)
+
+ return Options(**kw)
diff --git a/src/_exceptions.pyx b/src/_exceptions.pyx
new file mode 100644
index 0000000..286f409
--- /dev/null
+++ b/src/_exceptions.pyx
@@ -0,0 +1,12 @@
+@auto_pickle(False)
+cdef class Json5Exception(Exception):
+ '''
+ Base class of any exception thrown by PyJSON5.
+ '''
+ def __init__(self, message=None, *args):
+ super().__init__(message, *args)
+
+ @property
+ def message(self):
+ '''Human readable error description'''
+ return self.args[0]
diff --git a/src/_exceptions_decoder.pyx b/src/_exceptions_decoder.pyx
new file mode 100644
index 0000000..03fc233
--- /dev/null
+++ b/src/_exceptions_decoder.pyx
@@ -0,0 +1,92 @@
+@auto_pickle(False)
+cdef class Json5DecoderException(Json5Exception):
+ '''
+ Base class of any exception thrown by the parser.
+ '''
+ def __init__(self, message=None, result=None, *args):
+ super().__init__(message, result, *args)
+
+ @property
+ def result(self):
+ '''Deserialized data up until now.'''
+ return self.args[1]
+
+
+@final
+@auto_pickle(False)
+cdef class Json5NestingTooDeep(Json5DecoderException):
+ '''
+ The maximum nesting level on the input data was exceeded.
+ '''
+
+
+@final
+@auto_pickle(False)
+cdef class Json5EOF(Json5DecoderException):
+ '''
+ The input ended prematurely.
+ '''
+
+
+@final
+@auto_pickle(False)
+cdef class Json5IllegalCharacter(Json5DecoderException):
+ '''
+ An unexpected character was encountered.
+ '''
+ def __init__(self, message=None, result=None, character=None, *args):
+ super().__init__(message, result, character, *args)
+
+ @property
+ def character(self):
+ '''Extranous character.'''
+ return self.args[2]
+
+
+@final
+@auto_pickle(False)
+cdef class Json5ExtraData(Json5DecoderException):
+ '''
+ The input contained extranous data.
+ '''
+ def __init__(self, message=None, result=None, character=None, *args):
+ super().__init__(message, result, character, *args)
+
+ @property
+ def character(self):
+ '''
+ Extranous character.
+ '''
+ return self.args[2]
+
+
+@final
+@auto_pickle(False)
+cdef class Json5IllegalType(Json5DecoderException):
+ '''
+ The user supplied callback function returned illegal data.
+ '''
+ def __init__(self, message=None, result=None, value=None, *args):
+ super().__init__(message, result, value, *args)
+
+ @property
+ def value(self):
+ '''
+ Value that caused the problem.
+ '''
+ return self.args[2]
+
+
+@final
+@auto_pickle(False)
+cdef class _DecoderException(Exception):
+ cdef object cls
+ cdef object msg
+ cdef object extra
+ cdef object result
+
+ def __cinit__(self, cls, msg, extra, result):
+ self.cls = cls
+ self.msg = msg
+ self.extra = extra
+ self.result = result
diff --git a/src/_exceptions_encoder.pyx b/src/_exceptions_encoder.pyx
new file mode 100644
index 0000000..78665c1
--- /dev/null
+++ b/src/_exceptions_encoder.pyx
@@ -0,0 +1,21 @@
+@auto_pickle(False)
+cdef class Json5EncoderException(Json5Exception):
+ '''
+ Base class of any exception thrown by the serializer.
+ '''
+
+
+@auto_pickle(False)
+cdef class Json5UnstringifiableType(Json5EncoderException):
+ '''
+ The encoder was not able to stringify the input, or it was told not to by the supplied ``Options``.
+ '''
+ def __init__(self, message=None, unstringifiable=None):
+ super().__init__(message, unstringifiable)
+
+ @property
+ def unstringifiable(self):
+ '''
+ The value that caused the problem.
+ '''
+ return self.args[1]
diff --git a/src/_exports.pyx b/src/_exports.pyx
new file mode 100644
index 0000000..61fe156
--- /dev/null
+++ b/src/_exports.pyx
@@ -0,0 +1,605 @@
+DEFAULT_MAX_NESTING_LEVEL = 32
+'''
+Maximum nesting level of data to decode if no ``maxdepth`` argument is specified.
+'''
+
+__version__ = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, VERSION, VERSION_LENGTH)
+'''
+Current library version.
+'''
+
+
+def decode(object data, object maxdepth=None, object some=False):
+ '''
+ Decodes JSON5 serialized data from an ``str`` object.
+
+ .. code:: python
+
+ decode('["Hello", "world!"]') == ['Hello', 'world!']
+
+ Parameters
+ ----------
+ data : unicode
+ JSON5 serialized data
+ maxdepth : Optional[int]
+ Maximum nesting level before are the parsing is aborted.
+
+ * If ``None`` is supplied, then the value of the global variable \
+ ``DEFAULT_MAX_NESTING_LEVEL`` is used instead.
+ * If the value is ``0``, then only literals are accepted, e.g. ``false``, \
+ ``47.11``, or ``"string"``.
+ * If the value is negative, then the any nesting level is allowed until \
+ Python's recursion limit is hit.
+ some : bool
+ Allow trailing junk.
+
+ Raises
+ ------
+ Json5DecoderException
+ An exception occured while decoding.
+ TypeError
+ An argument had a wrong type.
+
+ Returns
+ -------
+ object
+ Deserialized data.
+ '''
+ if maxdepth is None:
+ maxdepth = DEFAULT_MAX_NESTING_LEVEL
+
+ if isinstance(data, unicode):
+ return _decode_unicode(data, maxdepth, bool(some))
+ else:
+ raise TypeError(f'type(data) == {type(data)!r} not supported')
+
+
+def decode_latin1(object data, object maxdepth=None, object some=False):
+ '''
+ Decodes JSON5 serialized data from a ``bytes`` object.
+
+ .. code:: python
+
+ decode_buffer(b'["Hello", "world!"]') == ['Hello', 'world!']
+
+ Parameters
+ ----------
+ data : bytes
+ JSON5 serialized data, encoded as Latin-1 or ASCII.
+ maxdepth : Optional[int]
+ see `decode(...) <pyjson5.decode_>`_
+ some : bool
+ see `decode(...) <pyjson5.decode_>`_
+
+ Raises
+ ------
+ Json5DecoderException
+ An exception occured while decoding.
+ TypeError
+ An argument had a wrong type.
+
+ Returns
+ -------
+ object
+ see `decode(...) <pyjson5.decode_>`_
+ '''
+ return decode_buffer(data, maxdepth, bool(some), 1)
+
+
+def decode_buffer(object obj, object maxdepth=None, object some=False,
+ object wordlength=None):
+ '''
+ Decodes JSON5 serialized data from an object that supports the buffer
+ protocol, e.g. bytearray.
+
+ .. code:: python
+
+ obj = memoryview(b'["Hello", "world!"]')
+
+ decode_buffer(obj) == ['Hello', 'world!']
+
+ Parameters
+ ----------
+ data : object
+ JSON5 serialized data.
+ The argument must support Python's buffer protocol, i.e.
+ ``memoryview(...)`` must work. The buffer must be contigious.
+ maxdepth : Optional[int]
+ see `decode(...) <pyjson5.decode_>`_
+ some : bool
+ see `decode(...) <pyjson5.decode_>`_
+ wordlength : Optional[int]
+ Must be 1, 2, 4 to denote UCS1, USC2 or USC4 data.
+ Surrogates are not supported. Decode the data to an ``str`` if need be.
+ If ``None`` is supplied, then the buffer's ``itemsize`` is used.
+
+ Raises
+ ------
+ Json5DecoderException
+ An exception occured while decoding.
+ TypeError
+ An argument had a wrong type.
+ ValueError
+ The value of ``wordlength`` was invalid.
+
+ Returns
+ -------
+ object
+ see `decode(...) <pyjson5.decode_>`_
+ '''
+ cdef Py_buffer view
+
+ if maxdepth is None:
+ maxdepth = DEFAULT_MAX_NESTING_LEVEL
+
+ PyObject_GetBuffer(obj, &view, PyBUF_CONTIG_RO)
+ try:
+ if wordlength is None:
+ wordlength = view.itemsize
+ return _decode_buffer(view, wordlength, maxdepth, bool(some))
+ finally:
+ PyBuffer_Release(&view)
+
+
+def decode_callback(object cb, object maxdepth=None, object some=False,
+ object args=None):
+ '''
+ Decodes JSON5 serialized data by invoking a callback.
+
+ .. code:: python
+
+ cb = iter('["Hello","world!"]').__next__
+
+ decode_callback(cb) == ['Hello', 'world!']
+
+ Parameters
+ ----------
+ cb : Callable[Any, Union[str|bytes|bytearray|int|None]]
+ A function to get values from.
+ The functions is called like ``cb(*args)``, and it returns:
+
+ * **str, bytes, bytearray:** \
+ ``len(...) == 0`` denotes exhausted input. \
+ ``len(...) == 1`` is the next character.
+ * **int:** \
+ ``< 0`` denotes exhausted input. \
+ ``>= 0`` is the ordinal value of the next character.
+ * **None:** \
+ input exhausted
+ maxdepth : Optional[int]
+ see `decode(...) <pyjson5.decode_>`_
+ some : bool
+ see `decode(...) <pyjson5.decode_>`_
+ args : Optional[Iterable[Any]]
+ Arguments to call ``cb`` with.
+
+ Raises
+ ------
+ Json5DecoderException
+ An exception occured while decoding.
+ TypeError
+ An argument had a wrong type.
+
+ Returns
+ -------
+ object
+ see ``decode(...)``
+ '''
+ if not callable(cb):
+ raise TypeError(f'type(cb)=={type(cb)!r} is not callable')
+
+ if maxdepth is None:
+ maxdepth = DEFAULT_MAX_NESTING_LEVEL
+
+ if args:
+ args = tuple(args)
+ else:
+ args = ()
+
+ return _decode_callback(cb, args, maxdepth, bool(some))
+
+
+def decode_io(object fp, object maxdepth=None, object some=True):
+ '''
+ Decodes JSON5 serialized data from a file-like object.
+
+ .. code:: python
+
+ fp = io.StringIO("""
+ ['Hello', /* TODO look into specs whom to greet */]
+ 'Wolrd' // FIXME: look for typos
+ """)
+
+ decode_io(fp) == ['Hello']
+ decode_io(fp) == 'Wolrd'
+
+ fp.seek(0)
+
+ decode_io(fp, some=False)
+ # raises Json5ExtraData('Extra data U+0027 near 56', ['Hello'], "'")
+
+ Parameters
+ ----------
+ fp : IOBase
+ A file-like object to parse from.
+ maxdepth : Optional[int] = None
+ see `decode(...) <pyjson5.decode_>`_
+ some : bool
+ see `decode(...) <pyjson5.decode_>`_
+
+ Raises
+ ------
+ Json5DecoderException
+ An exception occured while decoding.
+ TypeError
+ An argument had a wrong type.
+
+ Returns
+ -------
+ object
+ see ``decode(...)``
+ '''
+ if not isinstance(fp, IOBase):
+ raise TypeError(f'type(fp)=={type(fp)!r} is not IOBase compatible')
+ elif not fp.readable():
+ raise TypeError(f'fp is not readable')
+ elif fp.closed:
+ raise TypeError(f'fp is closed')
+
+ if maxdepth is None:
+ maxdepth = DEFAULT_MAX_NESTING_LEVEL
+
+ return _decode_callback(fp.read, (1,), maxdepth, bool(some))
+
+
+def encode(object data, *, options=None, **options_kw):
+ '''
+ Serializes a Python object to a JSON5 compatible unicode string.
+
+ .. code:: python
+
+ encode(['Hello', 'world!']) == '["Hello","world!"]'
+
+ Parameters
+ ----------
+ data : object
+ Python object to serialize.
+ options : Optional[Options]
+ Extra options for the encoder.
+ If ``options`` **and** ``options_kw`` are specified, then ``options.update(**options_kw)`` is used.
+ options_kw
+ See Option's arguments.
+
+ Raises
+ ------
+ Json5EncoderException
+ An exception occured while encoding.
+ TypeError
+ An argument had a wrong type.
+
+ Returns
+ -------
+ str
+ Unless ``float('inf')`` or ``float('nan')`` is encountered, the result
+ will be valid JSON data (as of RFC8259).
+
+ The result is always ASCII. All characters outside of the ASCII range
+ are encoded.
+
+ The result safe to use in an HTML template, e.g.
+ ``<a onclick='alert({{ encode(url) }})'>show message</a>``.
+ Apostrophes ``"'"`` are encoded as ``"\\u0027"``, less-than,
+ greater-than, and ampersand likewise.
+ '''
+ cdef void *temp = NULL
+ cdef object result
+ cdef Py_ssize_t start = (
+ <Py_ssize_t> <void*> &(<AsciiObject*> NULL).data[0]
+ )
+ cdef Py_ssize_t length
+ cdef object opts = _to_options(options, options_kw)
+ cdef WriterReallocatable writer = WriterReallocatable(
+ Writer(
+ _WriterReallocatable_reserve,
+ _WriterReallocatable_append_c,
+ _WriterReallocatable_append_s,
+ <PyObject*> opts,
+ ),
+ start, 0, NULL,
+ )
+
+ try:
+ _encode(writer.base, data)
+
+ length = writer.position - start
+ if length <= 0:
+ # impossible
+ return u''
+
+ temp = ObjectRealloc(writer.obj, writer.position + 1)
+ if temp is not NULL:
+ writer.obj = temp
+ (<char*> writer.obj)[writer.position] = 0
+
+ result = ObjectInit(<PyObject*> writer.obj, unicode)
+ writer.obj = NULL
+
+ (<PyASCIIObject*> result).length = length
+ (<PyASCIIObject*> result).hash = -1
+ (<PyASCIIObject*> result).wstr = NULL
+ (<PyASCIIObject*> result).state.interned = SSTATE_NOT_INTERNED
+ (<PyASCIIObject*> result).state.kind = PyUnicode_1BYTE_KIND
+ (<PyASCIIObject*> result).state.compact = True
+ (<PyASCIIObject*> result).state.ready = True
+ (<PyASCIIObject*> result).state.ascii = True
+
+ return result
+ finally:
+ if writer.obj is not NULL:
+ ObjectFree(writer.obj)
+
+
+def encode_bytes(object data, *, options=None, **options_kw):
+ '''
+ Serializes a Python object to a JSON5 compatible bytes string.
+
+ .. code:: python
+
+ encode_bytes(['Hello', 'world!']) == b'["Hello","world!"]'
+
+ Parameters
+ ----------
+ data : object
+ see `encode(...) <pyjson5.encode_>`_
+ options : Optional[Options]
+ see `encode(...) <pyjson5.encode_>`_
+ options_kw
+ see `encode(...) <pyjson5.encode_>`_
+
+ Raises
+ ------
+ Json5EncoderException
+ An exception occured while encoding.
+ TypeError
+ An argument had a wrong type.
+
+ Returns
+ -------
+ bytes
+ see `encode(...) <pyjson5.encode_>`_
+ '''
+ cdef void *temp = NULL
+ cdef object result
+ cdef Py_ssize_t start = (
+ <Py_ssize_t> <void*> &(<PyBytesObject*> NULL).ob_sval[0]
+ )
+ cdef Py_ssize_t length
+ cdef object opts = _to_options(options, options_kw)
+ cdef WriterReallocatable writer = WriterReallocatable(
+ Writer(
+ _WriterReallocatable_reserve,
+ _WriterReallocatable_append_c,
+ _WriterReallocatable_append_s,
+ <PyObject*> opts,
+ ),
+ start, 0, NULL,
+ )
+
+ try:
+ _encode(writer.base, data)
+
+ length = writer.position - start
+ if length <= 0:
+ # impossible
+ return b''
+
+ temp = ObjectRealloc(writer.obj, writer.position + 1)
+ if temp is not NULL:
+ writer.obj = temp
+ (<char*> writer.obj)[writer.position] = 0
+
+ result = <object> <PyObject*> ObjectInitVar(
+ (<PyVarObject*> writer.obj), bytes, length,
+ )
+ writer.obj = NULL
+
+ (<PyBytesObject*> result).ob_shash = -1
+
+ return result
+ finally:
+ if writer.obj is not NULL:
+ ObjectFree(writer.obj)
+
+
+def encode_callback(object data, object cb, object supply_bytes=False, *,
+ options=None, **options_kw):
+ '''
+ Serializes a Python object into a callback function.
+
+ The callback function ``cb`` gets called with single characters and strings
+ until the input ``data`` is fully serialized.
+
+ .. code:: python
+
+ encode_callback(['Hello', 'world!'], print)
+ #prints:
+ # [
+ # "
+ # Hello
+ # "
+ # ,
+ # "
+ # world!
+ # "
+ " ]
+
+ Parameters
+ ----------
+ data : object
+ see `encode(...) <pyjson5.encode_>`_
+ cb : Callable[[Union[bytes|str]], None]
+ A callback function.
+ Depending on the truthyness of ``supply_bytes`` either ``bytes`` or
+ ``str`` is supplied.
+ supply_bytes : bool
+ Call ``cb(...)`` with a ``bytes`` argument if true,
+ otherwise ``str``.
+ options : Optional[Options]
+ see `encode(...) <pyjson5.encode_>`_
+ options_kw
+ see `encode(...) <pyjson5.encode_>`_
+
+ Raises
+ ------
+ Json5EncoderException
+ An exception occured while encoding.
+ TypeError
+ An argument had a wrong type.
+
+ Returns
+ -------
+ Callable[[Union[bytes|str]], None]
+ The supplied argument ``cb``.
+ '''
+ cdef boolean (*encoder)(object obj, object cb, object options) except False
+ cdef Options opts = _to_options(options, options_kw)
+
+ if supply_bytes:
+ encoder = _encode_callback_bytes
+ else:
+ encoder = _encode_callback_str
+
+ encoder(data, cb, options=opts)
+
+ return cb
+
+
+def encode_io(object data, object fp, object supply_bytes=True, *,
+ options=None, **options_kw):
+ '''
+ Serializes a Python object into a file-object.
+
+ The return value of ``fp.write(...)`` is not checked.
+ If ``fp`` is unbuffered, then the result will be garbage!
+
+ Parameters
+ ----------
+ data : object
+ see `encode(...) <pyjson5.encode_>`_
+ fp : IOBase
+ A file-like object to serialize into.
+ supply_bytes : bool
+ Call ``fp.write(...)`` with a ``bytes`` argument if true,
+ otherwise ``str``.
+ options : Optional[Options]
+ see `encode(...) <pyjson5.encode_>`_
+ options_kw
+ see `encode(...) <pyjson5.encode_>`_
+
+ Raises
+ ------
+ Json5EncoderException
+ An exception occured while encoding.
+ TypeError
+ An argument had a wrong type.
+
+ Returns
+ -------
+ IOBase
+ The supplied argument ``fp``.
+ '''
+ cdef boolean (*encoder)(object obj, object cb, object options) except False
+ cdef object opts = _to_options(options, options_kw)
+
+ if not isinstance(fp, IOBase):
+ raise TypeError(f'type(fp)=={type(fp)!r} is not IOBase compatible')
+ elif not fp.writable():
+ raise TypeError(f'fp is not writable')
+ elif fp.closed:
+ raise TypeError(f'fp is closed')
+
+ if supply_bytes:
+ encoder = _encode_callback_bytes
+ else:
+ encoder = _encode_callback_str
+
+ encoder(data, fp.write, options=opts)
+
+ return fp
+
+
+def encode_noop(object data, *, options=None, **options_kw):
+ '''
+ Test if the input is serializable.
+
+ Most likely you want to serialize ``data`` directly, and catch exceptions
+ instead of using this function!
+
+ .. code:: python
+
+ encode_noop({47: 11}) == True
+ encode_noop({47: object()}) == False
+
+ Parameters
+ ----------
+ data : object
+ see `encode(...) <pyjson5.encode_>`_
+ options : Optional[Options]
+ see `encode(...) <pyjson5.encode_>`_
+ options_kw
+ see `encode(...) <pyjson5.encode_>`_
+
+ Returns
+ -------
+ bool
+ ``True`` iff ``data`` is serializable.
+ '''
+ cdef object opts = _to_options(options, options_kw)
+ cdef Writer writer = Writer(
+ _WriterNoop_reserve,
+ _WriterNoop_append_c,
+ _WriterNoop_append_s,
+ <PyObject*> opts,
+ )
+
+ try:
+ _encode(writer, data)
+ except Exception:
+ return False
+
+ return True
+
+
+__all__ = (
+ # DECODE
+ 'decode', 'decode_latin1', 'decode_buffer', 'decode_callback', 'decode_io',
+ # ENCODE
+ 'encode', 'encode_bytes', 'encode_callback', 'encode_io', 'encode_noop', 'Options',
+ # LEGACY
+ 'loads', 'load', 'dumps', 'dump',
+ # EXCEPTIONS
+ 'Json5Exception',
+ 'Json5EncoderException', 'Json5UnstringifiableType',
+ 'Json5DecoderException', 'Json5NestingTooDeep', 'Json5EOF', 'Json5IllegalCharacter', 'Json5ExtraData', 'Json5IllegalType',
+)
+
+__doc__ = '''\
+PyJSON5
+-------
+
+A JSON5 serializer and parser library for Python 3 written in Cython.
+
+The serializer returns ASCII data that can safely be used in an HTML template.
+Apostrophes, ampersands, greater-than, and less-then signs are encoded as
+unicode escaped sequences. E.g. this snippet is safe for any and all input:
+
+.. code:: python
+
+ "<a onclick='alert(" + encode(data) + ")'>show message</a>"
+
+Unless the input contains infinite or NaN values, the result will be valid
+JSON data.
+
+All valid JSON5 1.0.0 and JSON data can be read, unless the nesting level is
+absurdly high.
+'''
diff --git a/src/_imports.pyx b/src/_imports.pyx
new file mode 100644
index 0000000..c139f71
--- /dev/null
+++ b/src/_imports.pyx
@@ -0,0 +1,171 @@
+from cython import final, no_gc, auto_pickle
+from cpython cimport dict, int, list, long, tuple, type
+from cpython.bool cimport PyBool_Check
+from cpython.buffer cimport (
+ PyObject_GetBuffer, PyObject_GetBuffer, PyBUF_CONTIG_RO, PyBuffer_Release,
+)
+from cpython.bytes cimport (
+ PyBytes_AsStringAndSize, PyBytes_FromStringAndSize, PyBytes_Check,
+)
+from cpython.datetime cimport datetime, date, time
+from cpython.float cimport PyFloat_Check, PyFloat_AsDouble
+from cpython.int cimport PyInt_Check
+from cpython.long cimport PyLong_FromString, PyLong_Check
+from cpython.object cimport PyObject
+from cpython.type cimport PyType_Check
+from cpython.unicode cimport PyUnicode_Check
+from libcpp cimport bool as boolean
+from libcpp.vector cimport vector as std_vector
+
+
+cdef extern from '<cstddef>' namespace 'std' nogil:
+ ctypedef unsigned long size_t
+
+
+cdef extern from '<cstdint>' namespace 'std' nogil:
+ ctypedef unsigned char uint8_t
+ ctypedef unsigned short uint16_t
+ ctypedef unsigned long uint32_t
+ ctypedef unsigned long long uint64_t
+
+ ctypedef signed char int8_t
+ ctypedef signed short int16_t
+ ctypedef signed long int32_t
+ ctypedef signed long long int64_t
+
+
+cdef extern from '<cstdio>' namespace 'std' nogil:
+ int snprintf(char *buffer, size_t buf_size, const char *format, ...)
+ size_t strlen(const char *s)
+
+
+cdef extern from '<cstring>' namespace 'std' nogil:
+ void memcpy(void *dest, const void *std, size_t count)
+ void memset(void *dest, char value, size_t count)
+ size_t strlen(const char *s)
+
+
+cdef extern from '<cmath>' nogil:
+ enum:
+ FP_INFINITE, FP_NAN, FP_NORMAL, FP_SUBNORMAL, FP_ZERO
+
+cdef extern from '<cmath>' namespace 'std' nogil:
+ int fpclassify(...)
+
+
+cdef extern from '<utility>' namespace 'std' nogil:
+ void swap[T](T&, T&)
+
+
+cdef extern from 'src/native.hpp' namespace 'JSON5EncoderCpp' nogil:
+ int32_t cast_to_int32(...)
+ uint32_t cast_to_uint32(...)
+
+ ctypedef boolean AlwaysTrue
+ boolean obj_has_iter(object obj)
+
+ ctypedef char EscapeDctItem[8]
+ struct EscapeDct:
+ EscapeDctItem items[0x10000]
+ boolean is_escaped(uint32_t c)
+ Py_ssize_t find_unescaped_range(const char *start, Py_ssize_t length)
+ EscapeDct ESCAPE_DCT
+
+ enum:
+ VERSION_LENGTH
+ const char VERSION[]
+
+ enum:
+ LONGDESCRIPTION_LENGTH
+ const char LONGDESCRIPTION[]
+
+
+cdef extern from 'Python.h':
+ ctypedef signed char Py_UCS1
+ ctypedef signed short Py_UCS2
+ ctypedef signed long Py_UCS4
+
+ enum:
+ PyUnicode_WCHAR_KIND
+ PyUnicode_1BYTE_KIND
+ PyUnicode_2BYTE_KIND
+ PyUnicode_4BYTE_KIND
+
+ int PyUnicode_READY(object o) except -1
+ Py_ssize_t PyUnicode_GET_LENGTH(object o) nogil
+ int PyUnicode_KIND(object o) nogil
+ boolean PyUnicode_IS_ASCII(object) nogil
+ Py_UCS1 *PyUnicode_1BYTE_DATA(object o) nogil
+ Py_UCS2 *PyUnicode_2BYTE_DATA(object o) nogil
+ Py_UCS4 *PyUnicode_4BYTE_DATA(object o) nogil
+
+ boolean Py_EnterRecursiveCall(const char *where) except True
+ void Py_LeaveRecursiveCall()
+
+ bint Py_UNICODE_ISALPHA(Py_UCS4 ch) nogil
+ bint Py_UNICODE_ISDIGIT(Py_UCS4 ch) nogil
+ bint Py_UNICODE_IS_SURROGATE(Py_UCS4 ch) nogil
+ bint Py_UNICODE_IS_HIGH_SURROGATE(Py_UCS4 ch) nogil
+ bint Py_UNICODE_IS_LOW_SURROGATE(Py_UCS4 ch) nogil
+ Py_UCS4 Py_UNICODE_JOIN_SURROGATES(Py_UCS4 high, Py_UCS4 low) nogil
+
+ object PyUnicode_FromKindAndData(int kind, const void *buf, Py_ssize_t size)
+ char *PyUnicode_AsUTF8AndSize(object o, Py_ssize_t *size) except NULL
+
+ object PyDict_SetDefault(object p, object key, object value)
+
+ object CallFunction 'PyObject_CallFunction'(PyObject *cb, const char *format, ...)
+ object CallObject 'PyObject_CallObject'(PyObject *cb, PyObject *args)
+
+ ctypedef signed long Py_hash
+ ctypedef signed short wchar_t
+
+ enum:
+ SSTATE_NOT_INTERNED
+ SSTATE_INTERNED_MORTAL
+ SSTATE_INTERNED_IMMORTAL
+
+ ctypedef struct __ascii_object_state:
+ uint8_t interned
+ uint8_t kind
+ boolean compact
+ boolean ascii
+ boolean ready
+
+ ctypedef struct PyASCIIObject:
+ Py_ssize_t length
+ Py_hash hash
+ wchar_t *wstr
+ __ascii_object_state state
+
+ ctypedef struct PyVarObject:
+ pass
+
+ ctypedef struct PyBytesObject:
+ PyVarObject ob_base
+ Py_hash ob_shash
+ char ob_sval[1]
+
+ AlwaysTrue ErrNoMemory 'PyErr_NoMemory'() except True
+ void *ObjectRealloc 'PyObject_Realloc'(void *p, size_t n)
+ void ObjectFree 'PyObject_Free'(void *p)
+ object ObjectInit 'PyObject_INIT'(PyObject *obj, type cls)
+ PyVarObject *ObjectInitVar 'PyObject_InitVar'(PyVarObject *obj, type cls, Py_ssize_t size)
+
+
+ctypedef struct AsciiObject:
+ PyASCIIObject base
+ char data[1]
+
+
+cdef extern from * nogil:
+ boolean expect '__builtin_expect'(boolean actual, boolean expected)
+
+
+cdef type Decimal, Mapping, IOBase
+cdef object saferepr
+
+from collections.abc import Mapping
+from decimal import Decimal
+from io import IOBase
+from pprint import saferepr
diff --git a/src/_legacy.pyx b/src/_legacy.pyx
new file mode 100644
index 0000000..218ff83
--- /dev/null
+++ b/src/_legacy.pyx
@@ -0,0 +1,100 @@
+def loads(s, *, encoding='UTF-8', **kw):
+ '''
+ Decodes JSON5 serialized data from a string.
+
+ Use `decode(...) <pyjson5.decode_>`_ instead!
+
+ .. code:: python
+
+ loads(s) == decode(s)
+
+ Parameters
+ ----------
+ s : object
+ Unless the argument is an ``str``, it gets decoded according to the
+ parameter ``encoding``.
+ encoding : str
+ Codec to use if ``s`` is not an ``str``.
+ kw
+ Silently ignored.
+
+ Returns
+ -------
+ object
+ see ``decode(...)``
+ '''
+ if not isinstance(s, unicode):
+ s = unicode(s, encoding, 'strict')
+ return decode(s)
+
+
+def load(fp, **kw):
+ '''
+ Decodes JSON5 serialized data from a file-like object.
+
+ Use `decode_io(...) <pyjson5.decode_io_>`_ instead!
+
+ .. code:: python
+
+ load(fp) == decode_io(fp, None, False)
+
+ Parameters
+ ----------
+ fp : IOBase
+ A file-like object to parse from.
+ kw
+ Silently ignored.
+
+ Returns
+ -------
+ object
+ see ``decode(...)``
+ '''
+ return decode_io(fp, None, False)
+
+
+def dumps(obj, **kw):
+ '''
+ Serializes a Python object to a JSON5 compatible unicode string.
+
+ Use `encode(...) <pyjson5.encode_>`_ instead!
+
+ .. code:: python
+
+ dumps(obj) == encode(obj)
+
+ Parameters
+ ----------
+ obj : object
+ Python object to serialize.
+ kw
+ Silently ignored.
+
+ Returns
+ -------
+ unicode
+ see ``encode(data)``
+ '''
+ return encode(obj)
+
+
+def dump(object obj, object fp, **kw):
+ '''
+ Serializes a Python object to a JSON5 compatible unicode string.
+
+ Use `encode_io(...) <pyjson5.encode_io_>`_ instead!
+
+ .. code:: python
+
+ dump(obj, fp) == encode_io(obj, fp)
+
+ Parameters
+ ----------
+ obj : object
+ Python object to serialize.
+ fp : IOBase
+ A file-like object to serialize into.
+ kw
+ Silently ignored.
+ '''
+ encode_io(obj, fp)
diff --git a/src/_raise_decoder.pyx b/src/_raise_decoder.pyx
new file mode 100644
index 0000000..24cf494
--- /dev/null
+++ b/src/_raise_decoder.pyx
@@ -0,0 +1,81 @@
+cdef AlwaysTrue _raise_decoder(cls, msg, extra=None, result=None) except True:
+ raise _DecoderException(cls, msg, extra, result)
+
+
+cdef AlwaysTrue _raise_unclosed(const char *what, Py_ssize_t start) except True:
+ return _raise_decoder(
+ Json5EOF,
+ f'Unclosed {what} starting near {start}',
+ )
+
+
+cdef AlwaysTrue _raise_no_data(Py_ssize_t where) except True:
+ return _raise_decoder(
+ Json5EOF,
+ f'No JSON data found near {where}',
+ )
+
+
+cdef AlwaysTrue _raise_stray_character(const char *what, Py_ssize_t where) except True:
+ return _raise_decoder(
+ Json5IllegalCharacter,
+ f'Stray {what} near {where}',
+ what,
+ )
+
+
+cdef AlwaysTrue _raise_expected_sc(const char *char_a, uint32_t char_b, Py_ssize_t near, uint32_t found) except True:
+ return _raise_decoder(
+ Json5IllegalCharacter,
+ f'Expected {char_a} or U+{char_b:04x} near {near}, found U+{found:04x}',
+ f'{found:c}',
+ )
+
+
+cdef AlwaysTrue _raise_expected_s(const char *char_a, Py_ssize_t near, uint32_t found) except True:
+ return _raise_decoder(
+ Json5IllegalCharacter,
+ f'Expected {char_a} near {near}, found U+{found:04x}',
+ f'{found:c}',
+ )
+
+
+cdef AlwaysTrue _raise_expected_c(uint32_t char_a, Py_ssize_t near, uint32_t found) except True:
+ return _raise_decoder(
+ Json5IllegalCharacter,
+ f'Expected U+{char_a:04x} near {near}, found U+{found:04x}',
+ f'{found:c}',
+ )
+
+
+cdef AlwaysTrue _raise_extra_data(uint32_t found, Py_ssize_t where) except True:
+ return _raise_decoder(
+ Json5ExtraData,
+ f'Extra data U+{found:04X} near {where}',
+ f'{found:c}',
+ )
+
+
+cdef AlwaysTrue _raise_unframed_data(uint32_t found, Py_ssize_t where) except True:
+ return _raise_decoder(
+ Json5ExtraData,
+ f'Lost unframed data near {where}',
+ f'{found:c}',
+ )
+
+
+cdef AlwaysTrue _raise_nesting(Py_ssize_t where, object result=None) except True:
+ return _raise_decoder(
+ Json5NestingTooDeep,
+ f'Maximum nesting level exceeded near {where}',
+ None,
+ result,
+ )
+
+
+cdef AlwaysTrue _raise_not_ord(object value, Py_ssize_t where) except True:
+ return _raise_decoder(
+ Json5IllegalType,
+ f'type(value)=={type(value)!r} not in (int, str, bytes) near {where} or the value is not valid.',
+ value,
+ )
diff --git a/src/_raise_encoder.pyx b/src/_raise_encoder.pyx
new file mode 100644
index 0000000..5e9076d
--- /dev/null
+++ b/src/_raise_encoder.pyx
@@ -0,0 +1,6 @@
+cdef AlwaysTrue _raise_unstringifiable(object data) except True:
+ raise Json5UnstringifiableType(f'Unstringifiable type(data)={type(data)!r}', data)
+
+
+cdef AlwaysTrue _raise_illegal_wordlength(int32_t wordlength) except True:
+ raise ValueError(f'wordlength must be 1, 2 or 4, not {wordlength!r}')
diff --git a/src/_reader_callback.pyx b/src/_reader_callback.pyx
new file mode 100644
index 0000000..f99249c
--- /dev/null
+++ b/src/_reader_callback.pyx
@@ -0,0 +1,50 @@
+cdef struct ReaderCallbackBase:
+ Py_ssize_t position
+ Py_ssize_t maxdepth
+
+
+cdef struct ReaderCallback:
+ ReaderCallbackBase base
+ PyObject *callback
+ PyObject *args
+ int32_t lookahead
+
+ctypedef ReaderCallback &ReaderCallbackRef
+
+
+cdef inline uint32_t _reader_Callback_get(ReaderCallbackRef self):
+ cdef int32_t c = self.lookahead
+
+ self.lookahead = -1
+ self.base.position += 1
+
+ return cast_to_uint32(c)
+
+
+cdef int32_t _reader_Callback_good(ReaderCallbackRef self) except -1:
+ cdef Py_ssize_t c = -1
+
+ if self.lookahead >= 0:
+ return True
+
+ cdef object value = CallObject(self.callback, self.args)
+ if (value is None) or (value is False):
+ return False
+
+ if isinstance(value, int):
+ c = value
+ elif isinstance(value, ORD_CLASSES):
+ if not value:
+ return False
+ c = ord(value)
+ else:
+ _raise_not_ord(value, self.base.position)
+
+ if c < 0:
+ return False
+ elif c > 0x10ffff:
+ _raise_not_ord(value, self.base.position)
+
+ self.lookahead = c
+
+ return True
diff --git a/src/_reader_ucs.pyx b/src/_reader_ucs.pyx
new file mode 100644
index 0000000..f94a5f2
--- /dev/null
+++ b/src/_reader_ucs.pyx
@@ -0,0 +1,52 @@
+cdef struct ReaderUCS:
+ Py_ssize_t remaining
+ Py_ssize_t position
+ Py_ssize_t maxdepth
+
+
+cdef struct ReaderUCS1:
+ ReaderUCS base
+ const Py_UCS1 *string
+
+
+cdef struct ReaderUCS2:
+ ReaderUCS base
+ const Py_UCS2 *string
+
+
+cdef struct ReaderUCS4:
+ ReaderUCS base
+ const Py_UCS4 *string
+
+
+ctypedef ReaderUCS1 &ReaderUCS1Ref
+ctypedef ReaderUCS2 &ReaderUCS2Ref
+ctypedef ReaderUCS4 &ReaderUCS4Ref
+
+ctypedef Py_UCS1 *UCS1String
+ctypedef Py_UCS2 *UCS2String
+ctypedef Py_UCS4 *UCS4String
+
+ctypedef fused ReaderUCSRef:
+ ReaderUCS1Ref
+ ReaderUCS2Ref
+ ReaderUCS4Ref
+
+ctypedef fused UCSString:
+ UCS1String
+ UCS2String
+ UCS4String
+
+
+cdef inline int32_t _reader_ucs_good(ReaderUCSRef self):
+ return self.base.remaining > 0
+
+
+cdef inline uint32_t _reader_ucs_get(ReaderUCSRef self):
+ cdef int32_t c = self.string[0]
+
+ self.string += 1
+ self.base.remaining -= 1
+ self.base.position += 1
+
+ return cast_to_uint32(c)
diff --git a/src/_readers.pyx b/src/_readers.pyx
new file mode 100644
index 0000000..fff1ba0
--- /dev/null
+++ b/src/_readers.pyx
@@ -0,0 +1,37 @@
+ctypedef fused ReaderRef:
+ ReaderUCSRef
+ ReaderCallbackRef
+
+
+cdef boolean _reader_enter(ReaderRef self) except False:
+ if self.base.maxdepth == 0:
+ _raise_nesting(_reader_tell(self))
+
+ Py_EnterRecursiveCall(' while decoding nested JSON5 object')
+
+ self.base.maxdepth -= 1
+
+ return True
+
+
+cdef void _reader_leave(ReaderRef self):
+ Py_LeaveRecursiveCall()
+ self.base.maxdepth += 1
+
+
+cdef inline Py_ssize_t _reader_tell(ReaderRef self):
+ return self.base.position
+
+
+cdef inline uint32_t _reader_get(ReaderRef self):
+ if ReaderRef in ReaderUCSRef:
+ return _reader_ucs_get(self)
+ elif ReaderRef is ReaderCallbackRef:
+ return _reader_Callback_get(self)
+
+
+cdef int32_t _reader_good(ReaderRef self) except -1:
+ if ReaderRef in ReaderUCSRef:
+ return _reader_ucs_good(self)
+ elif ReaderRef is ReaderCallbackRef:
+ return _reader_Callback_good(self)
diff --git a/src/_unicode.pyx b/src/_unicode.pyx
new file mode 100644
index 0000000..3801b20
--- /dev/null
+++ b/src/_unicode.pyx
@@ -0,0 +1,131 @@
+cdef boolean _is_line_terminator(uint32_t c) nogil:
+ # https://www.ecma-international.org/ecma-262/5.1/#sec-7.3
+ if expect(c <= 0x00FF, True):
+ return c in (
+ 0x000A, # Line Feed <LF>
+ 0x000D, # Carriage Return <CR>
+ )
+ elif expect(c <= 0xFFFF, True):
+ return c in (
+ 0x2028, # Line separator <LS>
+ 0x2029, # Paragraph separator <PS>
+ )
+ else:
+ return False
+
+
+cdef boolean _is_ws_zs(uint32_t c) nogil:
+ # https://spec.json5.org/#white-space
+ # https://www.fileformat.info/info/unicode/category/Zs/list.htm
+ if expect(c <= 0x00FF, True):
+ return c in (
+ 0x0009, # Horizontal tab
+ 0x000A, # Line feed
+ 0x000B, # Vertical tab
+ 0x000C, # Form feed
+ 0x000D, # Carriage return
+ 0x0020, # Space
+ 0x0020, # SPACE
+ 0x00A0, # NO-BREAK SPACE
+ 0x00A0, # Non-breaking space
+ )
+ elif expect(c <= 0xFFFF, True):
+ return c in (
+ 0x1680, # OGHAM SPACE MARK
+ 0x2000, # EN QUAD
+ 0x2001, # EM QUAD
+ 0x2002, # EN SPACE
+ 0x2003, # EM SPACE
+ 0x2004, # THREE-PER-EM SPACE
+ 0x2005, # FOUR-PER-EM SPACE
+ 0x2006, # SIX-PER-EM SPACE
+ 0x2007, # FIGURE SPACE
+ 0x2008, # PUNCTUATION SPACE
+ 0x2009, # THIN SPACE
+ 0x200A, # HAIR SPACE
+ 0x2028, # Line separator
+ 0x2029, # Paragraph separator
+ 0x202F, # NARROW NO-BREAK SPACE
+ 0x205F, # MEDIUM MATHEMATICAL SPACE
+ 0x3000, # IDEOGRAPHIC SPACE
+ 0xFEFF, # Byte order mark
+ )
+ else:
+ return c in (
+ NO_EXTRA_DATA,
+ )
+
+
+cdef boolean _is_pc(uint32_t c) nogil:
+ # http://www.fileformat.info/info/unicode/category/Pc/list.htm
+ if expect(c <= 0x00FF, True):
+ return c in (
+ 0x005F, # LOW LINE
+ )
+ elif expect(c <= 0xFFFF, True):
+ return c in (
+ 0x203F, # UNDERTIE
+ 0x2040, # CHARACTER TIE
+ 0x2054, # INVERTED UNDERTIE
+ 0xFE33, # PRESENTATION FORM FOR VERTICAL LOW LINE
+ 0xFE34, # PRESENTATION FORM FOR VERTICAL WAVY LOW LINE
+ 0xFE4D, # DASHED LOW LINE
+ 0xFE4E, # CENTRELINE LOW LINE
+ 0xFE4F, # WAVY LOW LINE
+ 0xFF3F, # FULLWIDTH LOW LINE
+ )
+ else:
+ return False
+
+
+cdef boolean _is_identifier_start(uint32_t c) nogil:
+ return (
+ (b'A' <= c <= b'Z') or
+ (b'a' <= c <= b'z') or
+ (c in b'$_') or
+ Py_UNICODE_ISALPHA(c) or
+ False
+ )
+
+
+cdef boolean _is_identifier_part(uint32_t c) nogil:
+ return (
+ # IdentifierStart
+ _is_identifier_start(c) or
+ # UnicodeCombiningMark
+ _is_mn(c) or
+ _is_mc(c) or
+ # UnicodeDigit
+ Py_UNICODE_ISDIGIT(c) or
+ # UnicodeConnectorPunctuation
+ _is_pc(c) or
+ # ZWNJ and ZWJ
+ (c in (0x200C, 0x200D)) or
+ False
+ )
+
+
+cdef inline boolean _is_x(uint32_t c) nogil:
+ return (c | 0x20) == b'x'
+
+cdef inline boolean _is_e(uint32_t c) nogil:
+ return (c | 0x20) == b'e'
+
+cdef inline boolean _is_decimal(uint32_t c) nogil:
+ return b'0' <= c <= b'9'
+
+cdef inline boolean _is_hex(uint32_t c) nogil:
+ return b'a' <= (c | 0x20) <= b'f'
+
+cdef inline boolean _is_hexadecimal(uint32_t c) nogil:
+ return _is_decimal(c) or _is_hex(c)
+
+cdef boolean _is_in_float_representation(uint32_t c) nogil:
+ if _is_decimal(c):
+ return True
+ if _is_e(c):
+ return True
+ elif c in b'.+-':
+ return True
+ else:
+ return False
diff --git a/src/_unicode_mc.pyx b/src/_unicode_mc.pyx
new file mode 100644
index 0000000..af5b9d6
--- /dev/null
+++ b/src/_unicode_mc.pyx
@@ -0,0 +1,172 @@
+# http://www.fileformat.info/info/unicode/category/Mc/list.htm
+cdef boolean _is_mc(uint32_t c) nogil:
+ if expect(c <= 0x00FF, True):
+ return False
+ elif expect(c <= 0xFFFF, True):
+ return (
+ (c == 0x00000903) or
+ (c == 0x0000093b) or
+ (0x0000093e <= c and c <= 0x00000940) or
+ (0x00000949 <= c and c <= 0x0000094c) or
+ (0x0000094e <= c and c <= 0x0000094f) or
+ (0x00000982 <= c and c <= 0x00000983) or
+ (0x000009be <= c and c <= 0x000009c0) or
+ (0x000009c7 <= c and c <= 0x000009c8) or
+ (0x000009cb <= c and c <= 0x000009cc) or
+ (c == 0x000009d7) or
+ (c == 0x00000a03) or
+ (0x00000a3e <= c and c <= 0x00000a40) or
+ (c == 0x00000a83) or
+ (0x00000abe <= c and c <= 0x00000ac0) or
+ (c == 0x00000ac9) or
+ (0x00000acb <= c and c <= 0x00000acc) or
+ (0x00000b02 <= c and c <= 0x00000b03) or
+ (c == 0x00000b3e) or
+ (c == 0x00000b40) or
+ (0x00000b47 <= c and c <= 0x00000b48) or
+ (0x00000b4b <= c and c <= 0x00000b4c) or
+ (c == 0x00000b57) or
+ (0x00000bbe <= c and c <= 0x00000bbf) or
+ (0x00000bc1 <= c and c <= 0x00000bc2) or
+ (0x00000bc6 <= c and c <= 0x00000bc8) or
+ (0x00000bca <= c and c <= 0x00000bcc) or
+ (c == 0x00000bd7) or
+ (0x00000c01 <= c and c <= 0x00000c03) or
+ (0x00000c41 <= c and c <= 0x00000c44) or
+ (0x00000c82 <= c and c <= 0x00000c83) or
+ (c == 0x00000cbe) or
+ (0x00000cc0 <= c and c <= 0x00000cc4) or
+ (0x00000cc7 <= c and c <= 0x00000cc8) or
+ (0x00000cca <= c and c <= 0x00000ccb) or
+ (0x00000cd5 <= c and c <= 0x00000cd6) or
+ (0x00000d02 <= c and c <= 0x00000d03) or
+ (0x00000d3e <= c and c <= 0x00000d40) or
+ (0x00000d46 <= c and c <= 0x00000d48) or
+ (0x00000d4a <= c and c <= 0x00000d4c) or
+ (c == 0x00000d57) or
+ (0x00000d82 <= c and c <= 0x00000d83) or
+ (0x00000dcf <= c and c <= 0x00000dd1) or
+ (0x00000dd8 <= c and c <= 0x00000ddf) or
+ (0x00000df2 <= c and c <= 0x00000df3) or
+ (0x00000f3e <= c and c <= 0x00000f3f) or
+ (c == 0x00000f7f) or
+ False
+ )
+ else:
+ return (
+ (0x0000102b <= c and c <= 0x0000102c) or
+ (c == 0x00001031) or
+ (c == 0x00001038) or
+ (0x0000103b <= c and c <= 0x0000103c) or
+ (0x00001056 <= c and c <= 0x00001057) or
+ (0x00001062 <= c and c <= 0x00001064) or
+ (0x00001067 <= c and c <= 0x0000106d) or
+ (0x00001083 <= c and c <= 0x00001084) or
+ (0x00001087 <= c and c <= 0x0000108c) or
+ (c == 0x0000108f) or
+ (0x0000109a <= c and c <= 0x0000109c) or
+ (c == 0x000017b6) or
+ (0x000017be <= c and c <= 0x000017c5) or
+ (0x000017c7 <= c and c <= 0x000017c8) or
+ (0x00001923 <= c and c <= 0x00001926) or
+ (0x00001929 <= c and c <= 0x0000192b) or
+ (0x00001930 <= c and c <= 0x00001931) or
+ (0x00001933 <= c and c <= 0x00001938) or
+ (0x00001a19 <= c and c <= 0x00001a1a) or
+ (c == 0x00001a55) or
+ (c == 0x00001a57) or
+ (c == 0x00001a61) or
+ (0x00001a63 <= c and c <= 0x00001a64) or
+ (0x00001a6d <= c and c <= 0x00001a72) or
+ (c == 0x00001b04) or
+ (c == 0x00001b35) or
+ (c == 0x00001b3b) or
+ (0x00001b3d <= c and c <= 0x00001b41) or
+ (0x00001b43 <= c and c <= 0x00001b44) or
+ (c == 0x00001b82) or
+ (c == 0x00001ba1) or
+ (0x00001ba6 <= c and c <= 0x00001ba7) or
+ (c == 0x00001baa) or
+ (c == 0x00001be7) or
+ (0x00001bea <= c and c <= 0x00001bec) or
+ (c == 0x00001bee) or
+ (0x00001bf2 <= c and c <= 0x00001bf3) or
+ (0x00001c24 <= c and c <= 0x00001c2b) or
+ (0x00001c34 <= c and c <= 0x00001c35) or
+ (c == 0x00001ce1) or
+ (0x00001cf2 <= c and c <= 0x00001cf3) or
+ (c == 0x00001cf7) or
+ (0x0000302e <= c and c <= 0x0000302f) or
+ (0x0000a823 <= c and c <= 0x0000a824) or
+ (c == 0x0000a827) or
+ (0x0000a880 <= c and c <= 0x0000a881) or
+ (0x0000a8b4 <= c and c <= 0x0000a8c3) or
+ (0x0000a952 <= c and c <= 0x0000a953) or
+ (c == 0x0000a983) or
+ (0x0000a9b4 <= c and c <= 0x0000a9b5) or
+ (0x0000a9ba <= c and c <= 0x0000a9bb) or
+ (0x0000a9bd <= c and c <= 0x0000a9c0) or
+ (0x0000aa2f <= c and c <= 0x0000aa30) or
+ (0x0000aa33 <= c and c <= 0x0000aa34) or
+ (c == 0x0000aa4d) or
+ (c == 0x0000aa7b) or
+ (c == 0x0000aa7d) or
+ (c == 0x0000aaeb) or
+ (0x0000aaee <= c and c <= 0x0000aaef) or
+ (c == 0x0000aaf5) or
+ (0x0000abe3 <= c and c <= 0x0000abe4) or
+ (0x0000abe6 <= c and c <= 0x0000abe7) or
+ (0x0000abe9 <= c and c <= 0x0000abea) or
+ (c == 0x0000abec) or
+ (c == 0x00011000) or
+ (c == 0x00011002) or
+ (c == 0x00011082) or
+ (0x000110b0 <= c and c <= 0x000110b2) or
+ (0x000110b7 <= c and c <= 0x000110b8) or
+ (c == 0x0001112c) or
+ (c == 0x00011182) or
+ (0x000111b3 <= c and c <= 0x000111b5) or
+ (0x000111bf <= c and c <= 0x000111c0) or
+ (0x0001122c <= c and c <= 0x0001122e) or
+ (0x00011232 <= c and c <= 0x00011233) or
+ (c == 0x00011235) or
+ (0x000112e0 <= c and c <= 0x000112e2) or
+ (0x00011302 <= c and c <= 0x00011303) or
+ (0x0001133e <= c and c <= 0x0001133f) or
+ (0x00011341 <= c and c <= 0x00011344) or
+ (0x00011347 <= c and c <= 0x00011348) or
+ (0x0001134b <= c and c <= 0x0001134d) or
+ (c == 0x00011357) or
+ (0x00011362 <= c and c <= 0x00011363) or
+ (0x00011435 <= c and c <= 0x00011437) or
+ (0x00011440 <= c and c <= 0x00011441) or
+ (c == 0x00011445) or
+ (0x000114b0 <= c and c <= 0x000114b2) or
+ (c == 0x000114b9) or
+ (0x000114bb <= c and c <= 0x000114be) or
+ (c == 0x000114c1) or
+ (0x000115af <= c and c <= 0x000115b1) or
+ (0x000115b8 <= c and c <= 0x000115bb) or
+ (c == 0x000115be) or
+ (0x00011630 <= c and c <= 0x00011632) or
+ (0x0001163b <= c and c <= 0x0001163c) or
+ (c == 0x0001163e) or
+ (c == 0x000116ac) or
+ (0x000116ae <= c and c <= 0x000116af) or
+ (c == 0x000116b6) or
+ (0x00011720 <= c and c <= 0x00011721) or
+ (c == 0x00011726) or
+ (0x00011a07 <= c and c <= 0x00011a08) or
+ (c == 0x00011a39) or
+ (0x00011a57 <= c and c <= 0x00011a58) or
+ (c == 0x00011a97) or
+ (c == 0x00011c2f) or
+ (c == 0x00011c3e) or
+ (c == 0x00011ca9) or
+ (c == 0x00011cb1) or
+ (c == 0x00011cb4) or
+ (0x00016f51 <= c and c <= 0x00016f7e) or
+ (0x0001d165 <= c and c <= 0x0001d166) or
+ (0x0001d16d <= c and c <= 0x0001d172) or
+ False
+ )
diff --git a/src/_unicode_mn.pyx b/src/_unicode_mn.pyx
new file mode 100644
index 0000000..c25a354
--- /dev/null
+++ b/src/_unicode_mn.pyx
@@ -0,0 +1,330 @@
+# ranges = []
+# last = -2
+# for x in xx:
+# if x == last + 1:
+# ranges[-1][1] = x
+# else:
+# ranges.append([x, x])
+# last = x
+#
+# p = ' (\n'
+# for start, end in ranges:
+# if start < end:
+# p += f' (0x{start:08x} <= c and c <= 0x{end:08x}) or\n'
+# else:
+# p += f' (c == 0x{start:08x}) or\n'
+# p += ' False\n )\n'
+#
+
+# http://www.fileformat.info/info/unicode/category/Mn/list.htm
+cdef boolean _is_mn(uint32_t c) nogil:
+ if expect(c <= 0x00FF, True):
+ return False
+ elif expect(c <= 0xFFFF, True):
+ return (
+ (0x00000300 <= c and c <= 0x0000036f) or
+ (0x00000483 <= c and c <= 0x00000487) or
+ (0x00000591 <= c and c <= 0x000005bd) or
+ (c == 0x000005bf) or
+ (0x000005c1 <= c and c <= 0x000005c2) or
+ (0x000005c4 <= c and c <= 0x000005c5) or
+ (c == 0x000005c7) or
+ (0x00000610 <= c and c <= 0x0000061a) or
+ (0x0000064b <= c and c <= 0x0000065f) or
+ (c == 0x00000670) or
+ (0x000006d6 <= c and c <= 0x000006dc) or
+ (0x000006df <= c and c <= 0x000006e4) or
+ (0x000006e7 <= c and c <= 0x000006e8) or
+ (0x000006ea <= c and c <= 0x000006ed) or
+ (c == 0x00000711) or
+ (0x00000730 <= c and c <= 0x0000074a) or
+ (0x000007a6 <= c and c <= 0x000007b0) or
+ (0x000007eb <= c and c <= 0x000007f3) or
+ (0x00000816 <= c and c <= 0x00000819) or
+ (0x0000081b <= c and c <= 0x00000823) or
+ (0x00000825 <= c and c <= 0x00000827) or
+ (0x00000829 <= c and c <= 0x0000082d) or
+ (0x00000859 <= c and c <= 0x0000085b) or
+ (0x000008d4 <= c and c <= 0x000008e1) or
+ (0x000008e3 <= c and c <= 0x00000902) or
+ (c == 0x0000093a) or
+ (c == 0x0000093c) or
+ (0x00000941 <= c and c <= 0x00000948) or
+ (c == 0x0000094d) or
+ (0x00000951 <= c and c <= 0x00000957) or
+ (0x00000962 <= c and c <= 0x00000963) or
+ (c == 0x00000981) or
+ (c == 0x000009bc) or
+ (0x000009c1 <= c and c <= 0x000009c4) or
+ (c == 0x000009cd) or
+ (0x000009e2 <= c and c <= 0x000009e3) or
+ (0x00000a01 <= c and c <= 0x00000a02) or
+ (c == 0x00000a3c) or
+ (0x00000a41 <= c and c <= 0x00000a42) or
+ (0x00000a47 <= c and c <= 0x00000a48) or
+ (0x00000a4b <= c and c <= 0x00000a4d) or
+ (c == 0x00000a51) or
+ (0x00000a70 <= c and c <= 0x00000a71) or
+ (0x00000a81 <= c and c <= 0x00000a82) or
+ (c == 0x00000abc) or
+ (0x00000ac1 <= c and c <= 0x00000ac5) or
+ (0x00000ac7 <= c and c <= 0x00000ac8) or
+ (c == 0x00000acd) or
+ (0x00000ae2 <= c and c <= 0x00000ae3) or
+ (0x00000afa <= c and c <= 0x00000aff) or
+ (c == 0x00000b01) or
+ (c == 0x00000b3c) or
+ (c == 0x00000b3f) or
+ (0x00000b41 <= c and c <= 0x00000b44) or
+ (c == 0x00000b4d) or
+ (c == 0x00000b56) or
+ (0x00000b62 <= c and c <= 0x00000b63) or
+ (c == 0x00000b82) or
+ (c == 0x00000bc0) or
+ (c == 0x00000bcd) or
+ (c == 0x00000c00) or
+ (0x00000c3e <= c and c <= 0x00000c40) or
+ (0x00000c46 <= c and c <= 0x00000c48) or
+ (0x00000c4a <= c and c <= 0x00000c4d) or
+ (0x00000c55 <= c and c <= 0x00000c56) or
+ (0x00000c62 <= c and c <= 0x00000c63) or
+ (c == 0x00000c81) or
+ (c == 0x00000cbc) or
+ (c == 0x00000cbf) or
+ (c == 0x00000cc6) or
+ (0x00000ccc <= c and c <= 0x00000ccd) or
+ (0x00000ce2 <= c and c <= 0x00000ce3) or
+ (0x00000d00 <= c and c <= 0x00000d01) or
+ (0x00000d3b <= c and c <= 0x00000d3c) or
+ (0x00000d41 <= c and c <= 0x00000d44) or
+ (c == 0x00000d4d) or
+ (0x00000d62 <= c and c <= 0x00000d63) or
+ (c == 0x00000dca) or
+ (0x00000dd2 <= c and c <= 0x00000dd4) or
+ (c == 0x00000dd6) or
+ (c == 0x00000e31) or
+ (0x00000e34 <= c and c <= 0x00000e3a) or
+ (0x00000e47 <= c and c <= 0x00000e4e) or
+ (c == 0x00000eb1) or
+ (0x00000eb4 <= c and c <= 0x00000eb9) or
+ (0x00000ebb <= c and c <= 0x00000ebc) or
+ (0x00000ec8 <= c and c <= 0x00000ecd) or
+ (0x00000f18 <= c and c <= 0x00000f19) or
+ (c == 0x00000f35) or
+ (c == 0x00000f37) or
+ (c == 0x00000f39) or
+ (0x00000f71 <= c and c <= 0x00000f7e) or
+ (0x00000f80 <= c and c <= 0x00000f84) or
+ (0x00000f86 <= c and c <= 0x00000f87) or
+ (0x00000f8d <= c and c <= 0x00000f97) or
+ (0x00000f99 <= c and c <= 0x00000fbc) or
+ (c == 0x00000fc6) or
+ False
+ )
+ else:
+ return (
+ (0x0000102d <= c and c <= 0x00001030) or
+ (0x00001032 <= c and c <= 0x00001037) or
+ (0x00001039 <= c and c <= 0x0000103a) or
+ (0x0000103d <= c and c <= 0x0000103e) or
+ (0x00001058 <= c and c <= 0x00001059) or
+ (0x0000105e <= c and c <= 0x00001060) or
+ (0x00001071 <= c and c <= 0x00001074) or
+ (c == 0x00001082) or
+ (0x00001085 <= c and c <= 0x00001086) or
+ (c == 0x0000108d) or
+ (c == 0x0000109d) or
+ (0x0000135d <= c and c <= 0x0000135f) or
+ (0x00001712 <= c and c <= 0x00001714) or
+ (0x00001732 <= c and c <= 0x00001734) or
+ (0x00001752 <= c and c <= 0x00001753) or
+ (0x00001772 <= c and c <= 0x00001773) or
+ (0x000017b4 <= c and c <= 0x000017b5) or
+ (0x000017b7 <= c and c <= 0x000017bd) or
+ (c == 0x000017c6) or
+ (0x000017c9 <= c and c <= 0x000017d3) or
+ (c == 0x000017dd) or
+ (0x0000180b <= c and c <= 0x0000180d) or
+ (0x00001885 <= c and c <= 0x00001886) or
+ (c == 0x000018a9) or
+ (0x00001920 <= c and c <= 0x00001922) or
+ (0x00001927 <= c and c <= 0x00001928) or
+ (c == 0x00001932) or
+ (0x00001939 <= c and c <= 0x0000193b) or
+ (0x00001a17 <= c and c <= 0x00001a18) or
+ (c == 0x00001a1b) or
+ (c == 0x00001a56) or
+ (0x00001a58 <= c and c <= 0x00001a5e) or
+ (c == 0x00001a60) or
+ (c == 0x00001a62) or
+ (0x00001a65 <= c and c <= 0x00001a6c) or
+ (0x00001a73 <= c and c <= 0x00001a7c) or
+ (c == 0x00001a7f) or
+ (0x00001ab0 <= c and c <= 0x00001abd) or
+ (0x00001b00 <= c and c <= 0x00001b03) or
+ (c == 0x00001b34) or
+ (0x00001b36 <= c and c <= 0x00001b3a) or
+ (c == 0x00001b3c) or
+ (c == 0x00001b42) or
+ (0x00001b6b <= c and c <= 0x00001b73) or
+ (0x00001b80 <= c and c <= 0x00001b81) or
+ (0x00001ba2 <= c and c <= 0x00001ba5) or
+ (0x00001ba8 <= c and c <= 0x00001ba9) or
+ (0x00001bab <= c and c <= 0x00001bad) or
+ (c == 0x00001be6) or
+ (0x00001be8 <= c and c <= 0x00001be9) or
+ (c == 0x00001bed) or
+ (0x00001bef <= c and c <= 0x00001bf1) or
+ (0x00001c2c <= c and c <= 0x00001c33) or
+ (0x00001c36 <= c and c <= 0x00001c37) or
+ (0x00001cd0 <= c and c <= 0x00001cd2) or
+ (0x00001cd4 <= c and c <= 0x00001ce0) or
+ (0x00001ce2 <= c and c <= 0x00001ce8) or
+ (c == 0x00001ced) or
+ (c == 0x00001cf4) or
+ (0x00001cf8 <= c and c <= 0x00001cf9) or
+ (0x00001dc0 <= c and c <= 0x00001df9) or
+ (0x00001dfb <= c and c <= 0x00001dff) or
+ (0x000020d0 <= c and c <= 0x000020dc) or
+ (c == 0x000020e1) or
+ (0x000020e5 <= c and c <= 0x000020f0) or
+ (0x00002cef <= c and c <= 0x00002cf1) or
+ (c == 0x00002d7f) or
+ (0x00002de0 <= c and c <= 0x00002dff) or
+ (0x0000302a <= c and c <= 0x0000302d) or
+ (0x00003099 <= c and c <= 0x0000309a) or
+ (c == 0x0000a66f) or
+ (0x0000a674 <= c and c <= 0x0000a67d) or
+ (0x0000a69e <= c and c <= 0x0000a69f) or
+ (0x0000a6f0 <= c and c <= 0x0000a6f1) or
+ (c == 0x0000a802) or
+ (c == 0x0000a806) or
+ (c == 0x0000a80b) or
+ (0x0000a825 <= c and c <= 0x0000a826) or
+ (0x0000a8c4 <= c and c <= 0x0000a8c5) or
+ (0x0000a8e0 <= c and c <= 0x0000a8f1) or
+ (0x0000a926 <= c and c <= 0x0000a92d) or
+ (0x0000a947 <= c and c <= 0x0000a951) or
+ (0x0000a980 <= c and c <= 0x0000a982) or
+ (c == 0x0000a9b3) or
+ (0x0000a9b6 <= c and c <= 0x0000a9b9) or
+ (c == 0x0000a9bc) or
+ (c == 0x0000a9e5) or
+ (0x0000aa29 <= c and c <= 0x0000aa2e) or
+ (0x0000aa31 <= c and c <= 0x0000aa32) or
+ (0x0000aa35 <= c and c <= 0x0000aa36) or
+ (c == 0x0000aa43) or
+ (c == 0x0000aa4c) or
+ (c == 0x0000aa7c) or
+ (c == 0x0000aab0) or
+ (0x0000aab2 <= c and c <= 0x0000aab4) or
+ (0x0000aab7 <= c and c <= 0x0000aab8) or
+ (0x0000aabe <= c and c <= 0x0000aabf) or
+ (c == 0x0000aac1) or
+ (0x0000aaec <= c and c <= 0x0000aaed) or
+ (c == 0x0000aaf6) or
+ (c == 0x0000abe5) or
+ (c == 0x0000abe8) or
+ (c == 0x0000abed) or
+ (c == 0x0000fb1e) or
+ (0x0000fe00 <= c and c <= 0x0000fe0f) or
+ (0x0000fe20 <= c and c <= 0x0000fe2f) or
+ (c == 0x000101fd) or
+ (c == 0x000102e0) or
+ (0x00010376 <= c and c <= 0x0001037a) or
+ (0x00010a01 <= c and c <= 0x00010a03) or
+ (0x00010a05 <= c and c <= 0x00010a06) or
+ (0x00010a0c <= c and c <= 0x00010a0f) or
+ (0x00010a38 <= c and c <= 0x00010a3a) or
+ (c == 0x00010a3f) or
+ (0x00010ae5 <= c and c <= 0x00010ae6) or
+ (c == 0x00011001) or
+ (0x00011038 <= c and c <= 0x00011046) or
+ (0x0001107f <= c and c <= 0x00011081) or
+ (0x000110b3 <= c and c <= 0x000110b6) or
+ (0x000110b9 <= c and c <= 0x000110ba) or
+ (0x00011100 <= c and c <= 0x00011102) or
+ (0x00011127 <= c and c <= 0x0001112b) or
+ (0x0001112d <= c and c <= 0x00011134) or
+ (c == 0x00011173) or
+ (0x00011180 <= c and c <= 0x00011181) or
+ (0x000111b6 <= c and c <= 0x000111be) or
+ (0x000111ca <= c and c <= 0x000111cc) or
+ (0x0001122f <= c and c <= 0x00011231) or
+ (c == 0x00011234) or
+ (0x00011236 <= c and c <= 0x00011237) or
+ (c == 0x0001123e) or
+ (c == 0x000112df) or
+ (0x000112e3 <= c and c <= 0x000112ea) or
+ (0x00011300 <= c and c <= 0x00011301) or
+ (c == 0x0001133c) or
+ (c == 0x00011340) or
+ (0x00011366 <= c and c <= 0x0001136c) or
+ (0x00011370 <= c and c <= 0x00011374) or
+ (0x00011438 <= c and c <= 0x0001143f) or
+ (0x00011442 <= c and c <= 0x00011444) or
+ (c == 0x00011446) or
+ (0x000114b3 <= c and c <= 0x000114b8) or
+ (c == 0x000114ba) or
+ (0x000114bf <= c and c <= 0x000114c0) or
+ (0x000114c2 <= c and c <= 0x000114c3) or
+ (0x000115b2 <= c and c <= 0x000115b5) or
+ (0x000115bc <= c and c <= 0x000115bd) or
+ (0x000115bf <= c and c <= 0x000115c0) or
+ (0x000115dc <= c and c <= 0x000115dd) or
+ (0x00011633 <= c and c <= 0x0001163a) or
+ (c == 0x0001163d) or
+ (0x0001163f <= c and c <= 0x00011640) or
+ (c == 0x000116ab) or
+ (c == 0x000116ad) or
+ (0x000116b0 <= c and c <= 0x000116b5) or
+ (c == 0x000116b7) or
+ (0x0001171d <= c and c <= 0x0001171f) or
+ (0x00011722 <= c and c <= 0x00011725) or
+ (0x00011727 <= c and c <= 0x0001172b) or
+ (0x00011a01 <= c and c <= 0x00011a06) or
+ (0x00011a09 <= c and c <= 0x00011a0a) or
+ (0x00011a33 <= c and c <= 0x00011a38) or
+ (0x00011a3b <= c and c <= 0x00011a3e) or
+ (c == 0x00011a47) or
+ (0x00011a51 <= c and c <= 0x00011a56) or
+ (0x00011a59 <= c and c <= 0x00011a5b) or
+ (0x00011a8a <= c and c <= 0x00011a96) or
+ (0x00011a98 <= c and c <= 0x00011a99) or
+ (0x00011c30 <= c and c <= 0x00011c36) or
+ (0x00011c38 <= c and c <= 0x00011c3d) or
+ (c == 0x00011c3f) or
+ (0x00011c92 <= c and c <= 0x00011ca7) or
+ (0x00011caa <= c and c <= 0x00011cb0) or
+ (0x00011cb2 <= c and c <= 0x00011cb3) or
+ (0x00011cb5 <= c and c <= 0x00011cb6) or
+ (0x00011d31 <= c and c <= 0x00011d36) or
+ (c == 0x00011d3a) or
+ (0x00011d3c <= c and c <= 0x00011d3d) or
+ (0x00011d3f <= c and c <= 0x00011d45) or
+ (c == 0x00011d47) or
+ (0x00016af0 <= c and c <= 0x00016af4) or
+ (0x00016b30 <= c and c <= 0x00016b36) or
+ (0x00016f8f <= c and c <= 0x00016f92) or
+ (0x0001bc9d <= c and c <= 0x0001bc9e) or
+ (0x0001d167 <= c and c <= 0x0001d169) or
+ (0x0001d17b <= c and c <= 0x0001d182) or
+ (0x0001d185 <= c and c <= 0x0001d18b) or
+ (0x0001d1aa <= c and c <= 0x0001d1ad) or
+ (0x0001d242 <= c and c <= 0x0001d244) or
+ (0x0001da00 <= c and c <= 0x0001da36) or
+ (0x0001da3b <= c and c <= 0x0001da6c) or
+ (c == 0x0001da75) or
+ (c == 0x0001da84) or
+ (0x0001da9b <= c and c <= 0x0001da9f) or
+ (0x0001daa1 <= c and c <= 0x0001daaf) or
+ (0x0001e000 <= c and c <= 0x0001e006) or
+ (0x0001e008 <= c and c <= 0x0001e018) or
+ (0x0001e01b <= c and c <= 0x0001e021) or
+ (0x0001e023 <= c and c <= 0x0001e024) or
+ (0x0001e026 <= c and c <= 0x0001e02a) or
+ (0x0001e8d0 <= c and c <= 0x0001e8d6) or
+ (0x0001e944 <= c and c <= 0x0001e94a) or
+ (0x000e0100 <= c and c <= 0x000e01ef) or
+ False
+ )
diff --git a/src/_writer_callback.pyx b/src/_writer_callback.pyx
new file mode 100644
index 0000000..ebe5de6
--- /dev/null
+++ b/src/_writer_callback.pyx
@@ -0,0 +1,41 @@
+cdef struct WriterCallback:
+ Writer base
+ PyObject *callback
+
+
+cdef boolean _WriterCbBytes_append_c(Writer &writer_, char datum) except False:
+ cdef WriterCallback *writer = <WriterCallback*> &writer_
+
+ CallFunction(writer.callback, b'c', datum)
+
+ return True
+
+
+cdef boolean _WriterCbBytes_append_s(Writer &writer_, const char *s, Py_ssize_t length) except False:
+ cdef WriterCallback *writer = <WriterCallback*> &writer_
+
+ if expect(length <= 0, False):
+ return True
+
+ CallFunction(writer.callback, b'y#', s, <int> length)
+
+ return True
+
+
+cdef boolean _WriterCbStr_append_c(Writer &writer_, char datum) except False:
+ cdef WriterCallback *writer = <WriterCallback*> &writer_
+
+ CallFunction(writer.callback, b'C', datum)
+
+ return True
+
+
+cdef boolean _WriterCbStr_append_s(Writer &writer_, const char *s, Py_ssize_t length) except False:
+ cdef WriterCallback *writer = <WriterCallback*> &writer_
+
+ if expect(length <= 0, False):
+ return True
+
+ CallFunction(writer.callback, b'U#', s, <int> length)
+
+ return True
diff --git a/src/_writer_noop.pyx b/src/_writer_noop.pyx
new file mode 100644
index 0000000..3f4528b
--- /dev/null
+++ b/src/_writer_noop.pyx
@@ -0,0 +1,15 @@
+cdef struct WriterNoop:
+ Writer base
+
+
+cdef boolean _WriterNoop_reserve(WriterRef writer_, size_t amount) except False:
+ return True
+
+
+cdef boolean _WriterNoop_append_c(Writer &writer_, char datum) except False:
+ return True
+
+
+cdef boolean _WriterNoop_append_s(Writer &writer_, const char *s,
+ Py_ssize_t length) except False:
+ return True
diff --git a/src/_writer_reallocatable.pyx b/src/_writer_reallocatable.pyx
new file mode 100644
index 0000000..15bd3cd
--- /dev/null
+++ b/src/_writer_reallocatable.pyx
@@ -0,0 +1,61 @@
+cdef struct WriterReallocatable:
+ Writer base
+ size_t position
+ size_t length
+ void *obj
+
+
+cdef boolean _WriterReallocatable_reserve(WriterRef writer_, size_t amount) except False:
+ cdef size_t current_size
+ cdef size_t needed_size
+ cdef size_t new_size
+ cdef void *temp
+ cdef WriterReallocatable *writer = <WriterReallocatable*> &writer_
+
+ if expect(amount <= 0, False):
+ return True
+
+ needed_size = writer.position + amount
+ current_size = writer.length
+ if expect(needed_size < current_size, True):
+ return True
+
+ new_size = current_size
+ while new_size <= needed_size:
+ new_size = (new_size + 32) + (new_size // 4)
+ if expect(new_size < current_size, False):
+ ErrNoMemory()
+
+ temp = ObjectRealloc(writer.obj, new_size + 1)
+ if temp is NULL:
+ ErrNoMemory()
+
+ writer.obj = temp
+ writer.length = new_size
+
+ return True
+
+
+cdef boolean _WriterReallocatable_append_c(Writer &writer_, char datum) except False:
+ cdef WriterReallocatable *writer = <WriterReallocatable*> &writer_
+
+ _WriterReallocatable_reserve(writer.base, 1)
+ (<char*> writer.obj)[writer.position] = datum
+ writer.position += 1
+
+ return True
+
+
+cdef boolean _WriterReallocatable_append_s(Writer &writer_, const char *s, Py_ssize_t length) except False:
+ cdef WriterReallocatable *writer = <WriterReallocatable*> &writer_
+
+ if expect(length <= 0, False):
+ return True
+
+ _WriterReallocatable_reserve(writer.base, length)
+ memcpy(&(<char*> writer.obj)[writer.position], s, length)
+ writer.position += length
+
+ return True
+
+
diff --git a/src/_writers.pyx b/src/_writers.pyx
new file mode 100644
index 0000000..a64354b
--- /dev/null
+++ b/src/_writers.pyx
@@ -0,0 +1,8 @@
+cdef struct Writer:
+ boolean (*reserve)(Writer &writer, size_t amount) except False
+ boolean (*append_c)(Writer &writer, char datum) except False
+ boolean (*append_s)(Writer &writer, const char *s, Py_ssize_t length) except False
+ PyObject *options
+
+
+ctypedef Writer &WriterRef
diff --git a/src/native.hpp b/src/native.hpp
new file mode 100644
index 0000000..b608265
--- /dev/null
+++ b/src/native.hpp
@@ -0,0 +1,147 @@
+#pragma once
+
+#include <array>
+#include <cstdint>
+#include <type_traits>
+
+namespace JSON5EncoderCpp {
+inline namespace {
+
+template <class From>
+constexpr std::uint32_t cast_to_uint32(
+ const From &unsigned_from,
+ typename std::enable_if<
+ !std::is_signed<From>::value
+ >::type* = nullptr
+) {
+ return static_cast<std::uint32_t>(unsigned_from);
+}
+
+template <class From>
+constexpr std::uint32_t cast_to_uint32(
+ const From &from,
+ typename std::enable_if<
+ std::is_signed<From>::value
+ >::type* = nullptr
+) {
+ using UnsignedFrom = typename std::make_unsigned<From>::type;
+ UnsignedFrom unsigned_from = static_cast<UnsignedFrom>(from);
+ return cast_to_uint32(unsigned_from);
+}
+
+template <class From>
+constexpr std::int32_t cast_to_int32(const From &from) {
+ std::uint32_t unsigned_from = cast_to_uint32(from);
+ return static_cast<std::int32_t>(unsigned_from);
+}
+
+struct AlwaysTrue {
+ constexpr inline AlwaysTrue() = default;
+ inline ~AlwaysTrue() = default;
+
+ constexpr inline AlwaysTrue(const AlwaysTrue&) = default;
+ constexpr inline AlwaysTrue(AlwaysTrue&&) = default;
+ constexpr inline AlwaysTrue &operator =(const AlwaysTrue&) = default;
+ constexpr inline AlwaysTrue &operator =(AlwaysTrue&&) = default;
+
+ template <class T>
+ constexpr inline AlwaysTrue(T&&) : AlwaysTrue() {}
+
+ template <class T>
+ constexpr inline bool operator ==(T&&) const { return true; }
+
+ constexpr inline operator bool () const { return true; }
+};
+
+bool obj_has_iter(const PyObject *obj) {
+ auto *i = Py_TYPE(obj)->tp_iter;
+ return (i != nullptr) && (i != &_PyObject_NextNotImplemented);
+}
+
+struct EscapeDct {
+ using Item = std::array<char, 8>; // 7 are needed, 1 length
+ static constexpr std::size_t length = 0x10000;
+
+ Item items[length];
+ unsigned __int128 is_escaped_array;
+
+ static constexpr Item unicode_item(size_t index) {
+ constexpr char HEX[] = "0123456789abcdef";
+ return {{
+ '\\',
+ 'u',
+ HEX[(index / 16 / 16 / 16 % 16)],
+ HEX[(index / 16 / 16 % 16)],
+ HEX[(index / 16 % 16)],
+ HEX[(index % 16)],
+ 0,
+ 6,
+ }};
+ }
+
+ static constexpr Item escaped_item(char chr) {
+ return {{ '\\', chr, 0, 0, 0, 0, 0, 2 }};
+ }
+
+ static constexpr Item verbatim_item(size_t chr) {
+ return {{ (char) (unsigned char) chr, 0, 0, 0, 0, 0, 0, 1 }};
+ }
+
+ inline bool is_escaped(uint32_t c) const {
+ return (c >= 0x0080) || (is_escaped_array & (
+ static_cast<unsigned __int128>(1) <<
+ static_cast<std::uint8_t>(c)
+ ));
+ }
+
+ inline std::size_t find_unescaped_range(const char *start, Py_ssize_t length) const {
+ Py_ssize_t index = 0;
+ while ((index < length) && !is_escaped(start[index])) {
+ ++index;
+ }
+ return index;
+ }
+
+ constexpr EscapeDct() :
+ items(),
+ is_escaped_array(static_cast<unsigned __int128>(0) - 1)
+ {
+ for (std::size_t i = 0; i < length; ++i) {
+ items[i] = unicode_item(i);
+ }
+ for (std::size_t i = 0x20; i < 0x7f; ++i) {
+ switch (i) {
+ case '"': case '\'': case '&': case '<': case '>': case '\\':
+ break;
+ default:
+ items[i] = verbatim_item(i);
+
+ is_escaped_array &= ~(
+ static_cast<unsigned __int128>(1) <<
+ static_cast<std::uint8_t>(i)
+ );
+ }
+ }
+ items[(uint8_t) '\\'] = escaped_item('\\');
+ items[(uint8_t) '\b'] = escaped_item('b');
+ items[(uint8_t) '\f'] = escaped_item('f');
+ items[(uint8_t) '\n'] = escaped_item('n');
+ items[(uint8_t) '\r'] = escaped_item('r');
+ items[(uint8_t) '\t'] = escaped_item('t');
+ }
+};
+
+const EscapeDct ESCAPE_DCT;
+
+const char VERSION[] =
+# include "./VERSION"
+;
+static constexpr std::size_t VERSION_LENGTH = sizeof(VERSION) - 1;
+
+const char LONGDESCRIPTION[] =
+# include "./VERSION"
+;
+static constexpr std::size_t LONGDESCRIPTION_LENGTH = sizeof(LONGDESCRIPTION) - 1;
+
+}
+}