Faster floating-point decoding
diff --git a/.github/workflows/build_linux_wheels.yml b/.github/workflows/build_linux_wheels.yml
new file mode 100644
index 0000000..be08ac7
--- /dev/null
+++ b/.github/workflows/build_linux_wheels.yml
@@ -0,0 +1,60 @@
+name: Build wheels
+
+on:
+  workflow_dispatch:
+    inputs:
+      os:
+        description: OS
+        required: true
+        default: ubuntu-20.04
+      python:
+        description: Python
+        required: true
+        default: 3.9
+      manylinux:
+        description: Manylinux
+        required: true
+        default: manylinux2014
+
+jobs:
+  build_wheels:
+    name: Build compat:${{ github.event.inputs.manylinux }}, python:${{ github.event.inputs.python }}, os:${{ github.event.inputs.os }}
+    runs-on: ${{ github.event.inputs.os }}
+
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          submodules: true
+
+      - name: Cache pip
+        uses: actions/cache@v2
+        with:
+          key: cache--${{ github.event.inputs.os }}--${{ github.event.inputs.python }}--${{ hashFiles('./requirements.txt') }}
+          path: ~/.cache/pip
+
+      - name: Setup python
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ github.event.inputs.python }}
+
+      - name: Update pip
+        run: python -m pip install -U pip wheel setuptools
+
+      - name: Install requirements
+        run: python -m pip install -Ur requirements.txt
+
+      - name: Prepare for compilation
+        run: make prepare
+
+      - name: Build wheels
+        run: python -m cibuildwheel --output-dir wheelhouse
+        env:
+          CIBW_SKIP: "cp27-* pp27-*"  # skip Python 2.7 wheels
+          CIBW_MANYLINUX_X86_64_IMAGE: ${{ github.event.inputs.manylinux }}
+          CIBW_MANYLINUX_I686_IMAGE: ${{ github.event.inputs.manylinux }}
+
+      - name: Store artifacts
+        uses: actions/upload-artifact@v2
+        with:
+          name: Wheelhouse
+          path: ./wheelhouse/*.whl
diff --git a/.github/workflows/compile_test.yml b/.github/workflows/compile_test.yml
index bd989b3..9928efc 100644
--- a/.github/workflows/compile_test.yml
+++ b/.github/workflows/compile_test.yml
@@ -12,12 +12,13 @@
           - macos-latest
           - windows-latest
         python: [
-          '3.5', '3.6', '3.7', '3.8', '3.9',
-          'pypy3',
+          '3.5',
+          # '3.6', '3.7', '3.8',  # it takes too much GitHub action time to run tests on all versions in between
+          '3.9',
         ]
-        exclude:
-          - os: macos-latest
-            python: pypy3
+        include:
+          - os: ubuntu-latest
+            python: pypy-3.6
         
     name: Python ${{ matrix.python }} on ${{ matrix.os }}
 
@@ -26,8 +27,14 @@
         with:
           submodules: true
 
+      - name: Cache pip
+        uses: actions/cache@v2
+        with:
+          key: cache--${{ matrix.os }}--${{ matrix.python }}--${{ hashFiles('./requirements.txt') }}
+          path: ~/.cache/pip
+
       - name: Setup python
-        uses: actions/setup-python@v2.1.4  # https://github.com/actions/setup-python/issues/171
+        uses: actions/setup-python@v2
         with:
           python-version: ${{ matrix.python }}
 
diff --git a/.gitmodules b/.gitmodules
index 158669b..eb19f3c 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -4,3 +4,6 @@
 [submodule "third-party/JSONTestSuite"]
 	path = third-party/JSONTestSuite
 	url = https://github.com/nst/JSONTestSuite.git
+[submodule "third-party/fast_double_parser"]
+	path = third-party/fast_double_parser
+	url = https://github.com/lemire/fast_double_parser.git
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f14452a..1a97231 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,7 @@
+### 1.4.9
+
+* Faster floating-point number decoding using [fast_double_parser](https://github.com/lemire/fast_double_parser) by Daniel Lemire
+
 ### 1.4.8
 
 * Update up Unicode 13.0.0
diff --git a/README.rst b/README.rst
index 6db15cd..a2b3cc9 100644
--- a/README.rst
+++ b/README.rst
@@ -40,5 +40,4 @@
 Compatibility
 -------------
 
-At least CPython 3.4 is needed.
-Other interpreters such as Pypy and older CPython versions are not supported and will not be supported.
+At least CPython 3.5 or a recent Pypy3 version is needed.
diff --git a/docs/performance.rst b/docs/performance.rst
index 7fc3af2..5756899 100644
--- a/docs/performance.rst
+++ b/docs/performance.rst
@@ -7,14 +7,13 @@
 Decoder Performance
 -------------------
 
-The library is a bit slower than the shipped ``json`` module for *pure* JSON data.
-If you know that your input does not use JSON5 extension, then this library is probably not what you need.
+The library has about the same speed as the shipped ``json`` module for *pure* JSON data.
 
 * Dataset: https://github.com/zemirco/sf-city-lots-json
-* Version: 3.9.1 (default, Dec  8 2020, 07:51:42)
+* Version: Python 3.9.1+ (default, Feb  5 2021, 13:46:56)
 * CPU: AMD Ryzen 7 2700 @ 3.7GHz
-* :func:`pyjson5.decode`: **3.18** s ± 9.79 ms per loop *(lower is better)*
-* :func:`json.loads`: **2.66** s ± 9.78 ms per loop
+* :func:`pyjson5.decode`: **2.08 s** ± 7.49 ms per loop *(lower is better)*
+* :func:`json.loads`: **2.71 s** ± 12.1 ms per loop
 * The decoder works correcty: ``json.loads(content) == pyjson5.loads(content)``
 
 
diff --git a/src/VERSION b/src/VERSION
index 75e5c76..f9dde59 100644
--- a/src/VERSION
+++ b/src/VERSION
@@ -1 +1 @@
-"1.4.8"
+"1.4.9"
diff --git a/src/_decoder.pyx b/src/_decoder.pyx
index cd3c668..6038863 100644
--- a/src/_decoder.pyx
+++ b/src/_decoder.pyx
@@ -241,6 +241,18 @@
     return result
 
 
+cdef object _decode_double(StackHeapString[char] &buf, Py_ssize_t start):
+    cdef double d0
+    cdef const char *end_of_double
+
+    d0 = 0.0  # silence warning
+    end_of_double = parse_number(buf.data(), &d0)
+    if end_of_double != NULL and end_of_double[0] == b'\0':
+        return PyFloat_FromDouble(d0)
+
+    _raise_unclosed('NumericLiteral', start)
+
+
 cdef object _decode_number_leading_zero(ReaderRef reader, StackHeapString[char] &buf,
                                         int32_t *c_in_out, Py_ssize_t start):
     cdef uint32_t c0
@@ -289,11 +301,12 @@
 
         c_in_out[0] = c1
 
-        buf.push_back(b'\0')
-        try:
-            return PyOS_string_to_double(buf.data(), NULL, NULL)
-        except Exception:
-            _raise_unclosed('NumericLiteral', start)
+        if buf.data()[buf.size() - 1] == b'.':
+            (<char*> buf.data())[buf.size() - 1] = b'\0'
+        else:
+            buf.push_back(b'\0')
+
+        return _decode_double(buf, start)
     elif _is_e(c0):
         while True:
             if not _reader_good(reader):
@@ -321,12 +334,18 @@
                                int32_t *c_in_out, Py_ssize_t start):
     cdef uint32_t c0
     cdef int32_t c1
-    cdef boolean is_float
+    cdef boolean is_float = False
+    cdef boolean was_point = False
+    cdef boolean leading_point = False
 
     c1 = c_in_out[0]
     c0 = cast_to_uint32(c1)
 
-    is_float = False
+    if c0 == b'.':
+        buf.push_back(b'0')
+        is_float = True
+        leading_point = True
+
     while True:
         if _is_decimal(c0):
             pass
@@ -336,8 +355,18 @@
             c1 = cast_to_int32(c0)
             break
 
-        if c0 != b'_':
+        if c0 == b'_':
+            pass
+        elif c0 != b'.':
+            if was_point:
+                was_point = False
+                if not _is_e(c0):
+                    buf.push_back(b'.')
             buf.push_back(<char> <unsigned char> c0)
+        elif not was_point:
+            was_point = True
+        else:
+            _raise_unclosed('NumericLiteral', start)
 
         if not _reader_good(reader):
             c1 = -1
@@ -347,15 +376,20 @@
 
     c_in_out[0] = c1
 
-    buf.push_back(b'\0')
-    try:
-        if is_float:
-            return PyOS_string_to_double(buf.data(), NULL, NULL)
-        else:
-            return PyLong_FromString(buf.data(), NULL, 10)
-    except Exception:
+    if leading_point and buf.size() == 1:  # single '.'
         _raise_unclosed('NumericLiteral', start)
 
+    buf.push_back(b'\0')
+
+    if not is_float:
+        try:
+            return PyLong_FromString(buf.data(), NULL, 10)
+        except Exception:
+            pass
+        _raise_unclosed('NumericLiteral', start)
+    else:
+        return _decode_double(buf, start)
+
 
 cdef object _decode_number(ReaderRef reader, int32_t *c_in_out):
     cdef uint32_t c0
diff --git a/src/_imports.pyx b/src/_imports.pyx
index 34e8ca3..75d9926 100644
--- a/src/_imports.pyx
+++ b/src/_imports.pyx
@@ -8,7 +8,7 @@
     PyBytes_AsStringAndSize, PyBytes_FromStringAndSize, PyBytes_Check,
 )
 from cpython.dict cimport PyDict_SetItem
-from cpython.float cimport PyFloat_Check, PyFloat_AsDouble
+from cpython.float cimport PyFloat_Check, PyFloat_AsDouble, PyFloat_FromDouble
 from cpython.int cimport PyInt_Check
 from cpython.list cimport PyList_Append
 from cpython.long cimport PyLong_FromString, PyLong_Check
@@ -97,10 +97,15 @@
     AlwaysTrue exception_thrown() except True
     void unreachable()
 
+
 cdef extern from 'src/native.hpp' namespace 'JSON5EncoderCpp':
     int iter_next(object iterator, PyObject **value) except -1
 
 
+cdef extern from 'src/native.hpp' nogil:
+    boolean expect 'JSON5EncoderCpp_expect'(boolean actual, boolean expected)
+
+
 cdef extern from 'src/_unicode_cat_of.hpp' namespace 'JSON5EncoderCpp' nogil:
     unsigned unicode_cat_of(uint32_t codepoint)
 
@@ -121,6 +126,10 @@
     DrsKind drs_lookup[128]
 
 
+cdef extern from 'third-party/fast_double_parser/include/fast_double_parser.h' namespace 'fast_double_parser' nogil:
+    const char *parse_number(const char *p, double *outDouble)
+
+
 cdef extern from 'Python.h':
     enum:
         PyUnicode_WCHAR_KIND
@@ -185,7 +194,6 @@
     object ObjectInit 'PyObject_INIT'(PyObject *obj, type cls)
     PyVarObject *ObjectInitVar 'PyObject_InitVar'(PyVarObject *obj, type cls, Py_ssize_t size)
 
-    double PyOS_string_to_double(const char *s, char **endp, PyObject *overflow_exception) except? -1.0
     object PyLong_FromString(const char *str, char **pend, int base)
 
 
@@ -194,10 +202,6 @@
     char data[1]
 
 
-cdef extern from 'src/native.hpp' nogil:
-    boolean expect 'JSON5EncoderCpp_expect'(boolean actual, boolean expected)
-
-
 cdef extern from * nogil:
     enum:
         CYTHON_COMPILING_IN_PYPY
diff --git a/third-party/fast_double_parser b/third-party/fast_double_parser
new file mode 160000
index 0000000..6a47c2a
--- /dev/null
+++ b/third-party/fast_double_parser
@@ -0,0 +1 @@
+Subproject commit 6a47c2a5a1ab350f75bb167e983015d2a2be2647