Faster floating-point decoding
diff --git a/.github/workflows/build_linux_wheels.yml b/.github/workflows/build_linux_wheels.yml
new file mode 100644
index 0000000..be08ac7
--- /dev/null
+++ b/.github/workflows/build_linux_wheels.yml
@@ -0,0 +1,60 @@
+name: Build wheels
+
+on:
+ workflow_dispatch:
+ inputs:
+ os:
+ description: OS
+ required: true
+ default: ubuntu-20.04
+ python:
+ description: Python
+ required: true
+ default: 3.9
+ manylinux:
+ description: Manylinux
+ required: true
+ default: manylinux2014
+
+jobs:
+ build_wheels:
+ name: Build compat:${{ github.event.inputs.manylinux }}, python:${{ github.event.inputs.python }}, os:${{ github.event.inputs.os }}
+ runs-on: ${{ github.event.inputs.os }}
+
+ steps:
+ - uses: actions/checkout@v2
+ with:
+ submodules: true
+
+ - name: Cache pip
+ uses: actions/cache@v2
+ with:
+ key: cache--${{ github.event.inputs.os }}--${{ github.event.inputs.python }}--${{ hashFiles('./requirements.txt') }}
+ path: ~/.cache/pip
+
+ - name: Setup python
+ uses: actions/setup-python@v2
+ with:
+ python-version: ${{ github.event.inputs.python }}
+
+ - name: Update pip
+ run: python -m pip install -U pip wheel setuptools
+
+ - name: Install requirements
+ run: python -m pip install -Ur requirements.txt
+
+ - name: Prepare for compilation
+ run: make prepare
+
+ - name: Build wheels
+ run: python -m cibuildwheel --output-dir wheelhouse
+ env:
+ CIBW_SKIP: "cp27-* pp27-*" # skip Python 2.7 wheels
+ CIBW_MANYLINUX_X86_64_IMAGE: ${{ github.event.inputs.manylinux }}
+ CIBW_MANYLINUX_I686_IMAGE: ${{ github.event.inputs.manylinux }}
+
+ - name: Store artifacts
+ uses: actions/upload-artifact@v2
+ with:
+ name: Wheelhouse
+ path: ./wheelhouse/*.whl
diff --git a/.github/workflows/compile_test.yml b/.github/workflows/compile_test.yml
index bd989b3..9928efc 100644
--- a/.github/workflows/compile_test.yml
+++ b/.github/workflows/compile_test.yml
@@ -12,12 +12,13 @@
- macos-latest
- windows-latest
python: [
- '3.5', '3.6', '3.7', '3.8', '3.9',
- 'pypy3',
+ '3.5',
+ # '3.6', '3.7', '3.8', # it takes too much GitHub action time to run tests on all versions in between
+ '3.9',
]
- exclude:
- - os: macos-latest
- python: pypy3
+ include:
+ - os: ubuntu-latest
+ python: pypy-3.6
name: Python ${{ matrix.python }} on ${{ matrix.os }}
@@ -26,8 +27,14 @@
with:
submodules: true
+ - name: Cache pip
+ uses: actions/cache@v2
+ with:
+ key: cache--${{ matrix.os }}--${{ matrix.python }}--${{ hashFiles('./requirements.txt') }}
+ path: ~/.cache/pip
+
- name: Setup python
- uses: actions/setup-python@v2.1.4 # https://github.com/actions/setup-python/issues/171
+ uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python }}
diff --git a/.gitmodules b/.gitmodules
index 158669b..eb19f3c 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -4,3 +4,6 @@
[submodule "third-party/JSONTestSuite"]
path = third-party/JSONTestSuite
url = https://github.com/nst/JSONTestSuite.git
+[submodule "third-party/fast_double_parser"]
+ path = third-party/fast_double_parser
+ url = https://github.com/lemire/fast_double_parser.git
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f14452a..1a97231 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,7 @@
+### 1.4.9
+
+* Faster floating-point number decoding using [fast_double_parser](https://github.com/lemire/fast_double_parser) by Daniel Lemire
+
### 1.4.8
* Update up Unicode 13.0.0
diff --git a/README.rst b/README.rst
index 6db15cd..a2b3cc9 100644
--- a/README.rst
+++ b/README.rst
@@ -40,5 +40,4 @@
Compatibility
-------------
-At least CPython 3.4 is needed.
-Other interpreters such as Pypy and older CPython versions are not supported and will not be supported.
+At least CPython 3.5 or a recent Pypy3 version is needed.
diff --git a/docs/performance.rst b/docs/performance.rst
index 7fc3af2..5756899 100644
--- a/docs/performance.rst
+++ b/docs/performance.rst
@@ -7,14 +7,13 @@
Decoder Performance
-------------------
-The library is a bit slower than the shipped ``json`` module for *pure* JSON data.
-If you know that your input does not use JSON5 extension, then this library is probably not what you need.
+The library has about the same speed as the shipped ``json`` module for *pure* JSON data.
* Dataset: https://github.com/zemirco/sf-city-lots-json
-* Version: 3.9.1 (default, Dec 8 2020, 07:51:42)
+* Version: Python 3.9.1+ (default, Feb 5 2021, 13:46:56)
* CPU: AMD Ryzen 7 2700 @ 3.7GHz
-* :func:`pyjson5.decode`: **3.18** s ± 9.79 ms per loop *(lower is better)*
-* :func:`json.loads`: **2.66** s ± 9.78 ms per loop
+* :func:`pyjson5.decode`: **2.08 s** ± 7.49 ms per loop *(lower is better)*
+* :func:`json.loads`: **2.71 s** ± 12.1 ms per loop
* The decoder works correcty: ``json.loads(content) == pyjson5.loads(content)``
diff --git a/src/VERSION b/src/VERSION
index 75e5c76..f9dde59 100644
--- a/src/VERSION
+++ b/src/VERSION
@@ -1 +1 @@
-"1.4.8"
+"1.4.9"
diff --git a/src/_decoder.pyx b/src/_decoder.pyx
index cd3c668..6038863 100644
--- a/src/_decoder.pyx
+++ b/src/_decoder.pyx
@@ -241,6 +241,18 @@
return result
+cdef object _decode_double(StackHeapString[char] &buf, Py_ssize_t start):
+ cdef double d0
+ cdef const char *end_of_double
+
+ d0 = 0.0 # silence warning
+ end_of_double = parse_number(buf.data(), &d0)
+ if end_of_double != NULL and end_of_double[0] == b'\0':
+ return PyFloat_FromDouble(d0)
+
+ _raise_unclosed('NumericLiteral', start)
+
+
cdef object _decode_number_leading_zero(ReaderRef reader, StackHeapString[char] &buf,
int32_t *c_in_out, Py_ssize_t start):
cdef uint32_t c0
@@ -289,11 +301,12 @@
c_in_out[0] = c1
- buf.push_back(b'\0')
- try:
- return PyOS_string_to_double(buf.data(), NULL, NULL)
- except Exception:
- _raise_unclosed('NumericLiteral', start)
+ if buf.data()[buf.size() - 1] == b'.':
+ (<char*> buf.data())[buf.size() - 1] = b'\0'
+ else:
+ buf.push_back(b'\0')
+
+ return _decode_double(buf, start)
elif _is_e(c0):
while True:
if not _reader_good(reader):
@@ -321,12 +334,18 @@
int32_t *c_in_out, Py_ssize_t start):
cdef uint32_t c0
cdef int32_t c1
- cdef boolean is_float
+ cdef boolean is_float = False
+ cdef boolean was_point = False
+ cdef boolean leading_point = False
c1 = c_in_out[0]
c0 = cast_to_uint32(c1)
- is_float = False
+ if c0 == b'.':
+ buf.push_back(b'0')
+ is_float = True
+ leading_point = True
+
while True:
if _is_decimal(c0):
pass
@@ -336,8 +355,18 @@
c1 = cast_to_int32(c0)
break
- if c0 != b'_':
+ if c0 == b'_':
+ pass
+ elif c0 != b'.':
+ if was_point:
+ was_point = False
+ if not _is_e(c0):
+ buf.push_back(b'.')
buf.push_back(<char> <unsigned char> c0)
+ elif not was_point:
+ was_point = True
+ else:
+ _raise_unclosed('NumericLiteral', start)
if not _reader_good(reader):
c1 = -1
@@ -347,15 +376,20 @@
c_in_out[0] = c1
- buf.push_back(b'\0')
- try:
- if is_float:
- return PyOS_string_to_double(buf.data(), NULL, NULL)
- else:
- return PyLong_FromString(buf.data(), NULL, 10)
- except Exception:
+ if leading_point and buf.size() == 1: # single '.'
_raise_unclosed('NumericLiteral', start)
+ buf.push_back(b'\0')
+
+ if not is_float:
+ try:
+ return PyLong_FromString(buf.data(), NULL, 10)
+ except Exception:
+ pass
+ _raise_unclosed('NumericLiteral', start)
+ else:
+ return _decode_double(buf, start)
+
cdef object _decode_number(ReaderRef reader, int32_t *c_in_out):
cdef uint32_t c0
diff --git a/src/_imports.pyx b/src/_imports.pyx
index 34e8ca3..75d9926 100644
--- a/src/_imports.pyx
+++ b/src/_imports.pyx
@@ -8,7 +8,7 @@
PyBytes_AsStringAndSize, PyBytes_FromStringAndSize, PyBytes_Check,
)
from cpython.dict cimport PyDict_SetItem
-from cpython.float cimport PyFloat_Check, PyFloat_AsDouble
+from cpython.float cimport PyFloat_Check, PyFloat_AsDouble, PyFloat_FromDouble
from cpython.int cimport PyInt_Check
from cpython.list cimport PyList_Append
from cpython.long cimport PyLong_FromString, PyLong_Check
@@ -97,10 +97,15 @@
AlwaysTrue exception_thrown() except True
void unreachable()
+
cdef extern from 'src/native.hpp' namespace 'JSON5EncoderCpp':
int iter_next(object iterator, PyObject **value) except -1
+cdef extern from 'src/native.hpp' nogil:
+ boolean expect 'JSON5EncoderCpp_expect'(boolean actual, boolean expected)
+
+
cdef extern from 'src/_unicode_cat_of.hpp' namespace 'JSON5EncoderCpp' nogil:
unsigned unicode_cat_of(uint32_t codepoint)
@@ -121,6 +126,10 @@
DrsKind drs_lookup[128]
+cdef extern from 'third-party/fast_double_parser/include/fast_double_parser.h' namespace 'fast_double_parser' nogil:
+ const char *parse_number(const char *p, double *outDouble)
+
+
cdef extern from 'Python.h':
enum:
PyUnicode_WCHAR_KIND
@@ -185,7 +194,6 @@
object ObjectInit 'PyObject_INIT'(PyObject *obj, type cls)
PyVarObject *ObjectInitVar 'PyObject_InitVar'(PyVarObject *obj, type cls, Py_ssize_t size)
- double PyOS_string_to_double(const char *s, char **endp, PyObject *overflow_exception) except? -1.0
object PyLong_FromString(const char *str, char **pend, int base)
@@ -194,10 +202,6 @@
char data[1]
-cdef extern from 'src/native.hpp' nogil:
- boolean expect 'JSON5EncoderCpp_expect'(boolean actual, boolean expected)
-
-
cdef extern from * nogil:
enum:
CYTHON_COMPILING_IN_PYPY
diff --git a/third-party/fast_double_parser b/third-party/fast_double_parser
new file mode 160000
index 0000000..6a47c2a
--- /dev/null
+++ b/third-party/fast_double_parser
@@ -0,0 +1 @@
+Subproject commit 6a47c2a5a1ab350f75bb167e983015d2a2be2647