Make encoder a tiny bit faster and doc performance
diff --git a/docs/index.rst b/docs/index.rst
index 6f09e82..fd6374e 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -38,6 +38,7 @@
     encoder.rst
     decoder.rst
     exceptions.rst
+    performance.rst
 
 
 Quick Summary
diff --git a/docs/performance.rst b/docs/performance.rst
new file mode 100644
index 0000000..4cf000a
--- /dev/null
+++ b/docs/performance.rst
@@ -0,0 +1,34 @@
+Performance
+===========
+
+This library is written in Cython for a better performance than a pure-Python implementation could give you.
+
+
+Decoder Performance
+-------------------
+
+The library is a bit slower than the shipped ``json`` module for *pure* JSON data.
+If you know that your input does not use JSON5 extension, then this library is probably not what you need.
+
+* Dataset: https://github.com/zemirco/sf-city-lots-json
+* CPU: Core i7-3770 @ 3.40GHz
+* :func:`pyjson5.decode`: **4.58** s ± 68.6 ms per loop *(lower is better)*
+* :func:`json.loads`: **3.27** s ± 27.7 ms per loop
+* The decoder works correcty: ``json.loads(content) == pyjson5.loads(content)``
+
+
+Encoder Performance
+-------------------
+
+The encoder generates pure JSON data if there are no infinite or NaN values in the input, which are invalid in JSON.
+The serialized data is XML-safe, i.e. there are no cheverons ``<>``, ampersands ``&``, apostrophes ``'`` or control characters in the output.
+The output is always ASCII regardless if you call :func:`pyjson5.encode` or :func:`pyjson5.encode_bytes`.
+
+* Dataset: https://github.com/zemirco/sf-city-lots-json
+* CPU: Core i7-3770 @ 3.40GHz
+* :func:`pyjson5.encode`: **8.54** s ± 29.3 ms per loop *(lower is better)*
+* :func:`json.dumps`: **4.68** s ± 20.4 ms per loop
+* :func:`json.dumps` + :func:`xml.sax.saxutils.escape`: **5.02** s ± 141 ms per loop
+* The encoder works correcty: ``obj == json.loads(pyjson5.encode(obj, floatformat='%.16e'))``
+
+Unless you need the advanced settings in :class:`pyjson5.Options`, most most likely don't benefit from using this library as an encoder.
diff --git a/src/_decoder.pyx b/src/_decoder.pyx
index 9f6b8d1..3dc963d 100644
--- a/src/_decoder.pyx
+++ b/src/_decoder.pyx
@@ -511,13 +511,13 @@
             try:
                 value = _decode_recursive(reader, &c0)
             except _DecoderException as ex:
-                result[key] = (<_DecoderException> ex).result
+                PyDict_SetItem(result, key, (<_DecoderException> ex).result)
                 raise
 
             if expect(c0 < 0, False):
                 break
 
-            result[key] = value
+            PyDict_SetItem(result, key, value)
 
             done = _skip_comma(
                 reader, start, <unsigned char>b'}', b'object', &c0,
@@ -550,13 +550,13 @@
             try:
                 value = _decode_recursive(reader, &c0)
             except _DecoderException as ex:
-                result.append((<_DecoderException> ex).result)
+                PyList_Append(result, (<_DecoderException> ex).result)
                 raise
 
             if expect(c0 < 0, False):
                 break
 
-            result.append(value)
+            PyList_Append(result, value)
 
             done = _skip_comma(
                 reader, start, <unsigned char>b']', b'array', &c0,
diff --git a/src/_encoder.pyx b/src/_encoder.pyx
index f151597..1fafc51 100644
--- a/src/_encoder.pyx
+++ b/src/_encoder.pyx
@@ -14,62 +14,83 @@
 
 
 cdef boolean _encode_unicode_impl(WriterRef writer, UCSString data, Py_ssize_t length) except False:
-    cdef char buf[16]
+    cdef char buf[32]
     cdef uint32_t c
     cdef uint32_t s1, s2
-    cdef Py_ssize_t index
     cdef const char *escaped_string
     cdef Py_ssize_t escaped_length
-    cdef size_t unescaped_length
+    cdef size_t unescaped_length, index
+    cdef Py_ssize_t sublength
 
     if length > 0:
         writer.reserve(writer, 2 + length)
         writer.append_c(writer, <char> b'"')
-        if UCSString is UCS1String:
-            while True:
-                unescaped_length = ESCAPE_DCT.find_unescaped_range(<const char*> data, length)
-                if unescaped_length > 0:
+        while True:
+            if UCSString is UCS1String:
+                sublength = length
+            else:
+                sublength = min(length, <Py_ssize_t> sizeof(buf))
+
+            unescaped_length = ESCAPE_DCT.find_unescaped_range(data, sublength)
+            if unescaped_length > 0:
+                if UCSString is UCS1String:
                     writer.append_s(writer, <const char*> data, unescaped_length)
+                else:
+                    for index in range(unescaped_length):
+                        buf[index] = <const char> data[index]
+                    writer.append_s(writer, buf, unescaped_length)
 
-                    data += unescaped_length
-                    length -= unescaped_length
-                    if length <= 0:
-                        break
+                data += unescaped_length
+                length -= unescaped_length
+                if length <= 0:
+                    break
 
-                c = data[0]
+                if UCSString is not UCS1String:
+                    continue
+
+            c = data[0]
+            if (UCSString is UCS1String) or (c < 0x100):
                 escaped_string = &ESCAPE_DCT.items[c][0]
                 escaped_length = ESCAPE_DCT.items[c][7]
                 writer.append_s(writer, escaped_string, escaped_length)
+            elif (UCSString is UCS2String) or (c <= 0xffff):
+                buf[0] = '\\';
+                buf[1] = 'u';
+                buf[2] = HEX[(c >> (4*3)) & 0xf];
+                buf[3] = HEX[(c >> (4*2)) & 0xf];
+                buf[4] = HEX[(c >> (4*1)) & 0xf];
+                buf[5] = HEX[(c >> (4*0)) & 0xf];
+                buf[6] = 0;
 
-                data += 1
-                length -= 1
-                if length <= 0:
-                    break
-        else:
-            for index in range(length):
-                c = data[index]
-                if UCSString is UCS2String:
-                    if not ESCAPE_DCT.is_escaped(c):
-                        writer.append_c(writer, <char> <unsigned char> c)
-                    else:
-                        escaped_string = &ESCAPE_DCT.items[c][0]
-                        escaped_length = ESCAPE_DCT.items[c][7]
-                        writer.append_s(writer, escaped_string, escaped_length)
-                elif UCSString is UCS4String:
-                    if not ESCAPE_DCT.is_escaped(c):
-                        writer.append_c(writer, <char> <unsigned char> c)
-                    elif c < 0x10000:
-                        escaped_string = &ESCAPE_DCT.items[c][0]
-                        escaped_length = ESCAPE_DCT.items[c][7]
-                        writer.append_s(writer, escaped_string, escaped_length)
-                    else:
-                        # surrogate pair
-                        c -= 0x10000
-                        s1 = 0xd800 | ((c >> 10) & 0x3ff)
-                        s2 = 0xdc00 | (c & 0x3ff)
+                writer.append_s(writer, buf, 6);
+            else:
+                # surrogate pair
+                c -= 0x10000
+                s1 = 0xd800 | ((c >> 10) & 0x3ff)
+                s2 = 0xdc00 | (c & 0x3ff)
 
-                        snprintf(buf, sizeof(buf), b'\\u%04x\\u%04x', s1, s2)
-                        writer.append_s(writer, buf, 2 * 6)
+                buf[0x0] = '\\';
+                buf[0x1] = 'u';
+                buf[0x2] = HEX[(s1 >> (4*3)) & 0xf];
+                buf[0x3] = HEX[(s1 >> (4*2)) & 0xf];
+                buf[0x4] = HEX[(s1 >> (4*1)) & 0xf];
+                buf[0x5] = HEX[(s1 >> (4*0)) & 0xf];
+
+                buf[0x6] = '\\';
+                buf[0x7] = 'u';
+                buf[0x8] = HEX[(s2 >> (4*3)) & 0xf];
+                buf[0x9] = HEX[(s2 >> (4*2)) & 0xf];
+                buf[0xa] = HEX[(s2 >> (4*1)) & 0xf];
+                buf[0xb] = HEX[(s2 >> (4*0)) & 0xf];
+
+                buf[0xc] = 0;
+
+                writer.append_s(writer, buf, 12);
+
+            data += 1
+            length -= 1
+            if length <= 0:
+                break
         writer.append_c(writer, <char> b'"')
     else:
         writer.append_s(writer, b'""', 2)
diff --git a/src/_imports.pyx b/src/_imports.pyx
index c139f71..c4d47a8 100644
--- a/src/_imports.pyx
+++ b/src/_imports.pyx
@@ -8,8 +8,10 @@
     PyBytes_AsStringAndSize, PyBytes_FromStringAndSize, PyBytes_Check,
 )
 from cpython.datetime cimport datetime, date, time
+from cpython.dict cimport PyDict_SetItem
 from cpython.float cimport PyFloat_Check, PyFloat_AsDouble
 from cpython.int cimport PyInt_Check
+from cpython.list cimport PyList_Append
 from cpython.long cimport PyLong_FromString, PyLong_Check
 from cpython.object cimport PyObject
 from cpython.type cimport PyType_Check
@@ -57,6 +59,12 @@
     void swap[T](T&, T&)
 
 
+cdef extern from 'Python.h':
+    ctypedef signed char Py_UCS1
+    ctypedef signed short Py_UCS2
+    ctypedef signed long Py_UCS4
+
+
 cdef extern from 'src/native.hpp' namespace 'JSON5EncoderCpp' nogil:
     int32_t cast_to_int32(...)
     uint32_t cast_to_uint32(...)
@@ -65,10 +73,12 @@
     boolean obj_has_iter(object obj)
 
     ctypedef char EscapeDctItem[8]
-    struct EscapeDct:
-        EscapeDctItem items[0x10000]
+    cppclass EscapeDct:
+        EscapeDctItem items[0x100]
         boolean is_escaped(uint32_t c)
-        Py_ssize_t find_unescaped_range(const char *start, Py_ssize_t length)
+        Py_ssize_t find_unescaped_range(const Py_UCS1 *start, Py_ssize_t length)
+        Py_ssize_t find_unescaped_range(const Py_UCS2 *start, Py_ssize_t length)
+        Py_ssize_t find_unescaped_range(const Py_UCS4 *start, Py_ssize_t length)
     EscapeDct ESCAPE_DCT
 
     enum:
@@ -79,12 +89,10 @@
         LONGDESCRIPTION_LENGTH
     const char LONGDESCRIPTION[]
 
+    const char HEX[]
+
 
 cdef extern from 'Python.h':
-    ctypedef signed char Py_UCS1
-    ctypedef signed short Py_UCS2
-    ctypedef signed long Py_UCS4
-
     enum:
         PyUnicode_WCHAR_KIND
         PyUnicode_1BYTE_KIND
diff --git a/src/native.hpp b/src/native.hpp
index b608265..0be19ef 100644
--- a/src/native.hpp
+++ b/src/native.hpp
@@ -58,15 +58,16 @@
     return (i != nullptr) && (i != &_PyObject_NextNotImplemented);
 }
 
+constexpr char HEX[] = "0123456789abcdef";
+
 struct EscapeDct {
     using Item = std::array<char, 8>;  // 7 are needed, 1 length
-    static constexpr std::size_t length = 0x10000;
+    static constexpr std::size_t length = 0x100;
 
     Item items[length];
     unsigned __int128 is_escaped_array;
 
     static constexpr Item unicode_item(size_t index) {
-        constexpr char HEX[] = "0123456789abcdef";
         return {{
             '\\',
             'u',
@@ -87,14 +88,30 @@
         return {{ (char) (unsigned char) chr, 0, 0, 0, 0, 0, 0, 1 }};
     }
 
-    inline bool is_escaped(uint32_t c) const {
+    inline bool is_escaped(std::uint32_t c) const {
         return (c >= 0x0080) || (is_escaped_array & (
             static_cast<unsigned __int128>(1) <<
             static_cast<std::uint8_t>(c)
         ));
     }
 
-    inline std::size_t find_unescaped_range(const char *start, Py_ssize_t length) const {
+    inline std::size_t find_unescaped_range(const Py_UCS1 *start, Py_ssize_t length) const {
+        Py_ssize_t index = 0;
+        while ((index < length) && !is_escaped(start[index])) {
+            ++index;
+        }
+        return index;
+    }
+
+    inline std::size_t find_unescaped_range(const Py_UCS2 *start, Py_ssize_t length) const {
+        Py_ssize_t index = 0;
+        while ((index < length) && !is_escaped(start[index])) {
+            ++index;
+        }
+        return index;
+    }
+
+    inline std::size_t find_unescaped_range(const Py_UCS4 *start, Py_ssize_t length) const {
         Py_ssize_t index = 0;
         while ((index < length) && !is_escaped(start[index])) {
             ++index;
@@ -122,12 +139,12 @@
                     );
             }
         }
-        items[(uint8_t) '\\'] = escaped_item('\\');
-        items[(uint8_t) '\b'] = escaped_item('b');
-        items[(uint8_t) '\f'] = escaped_item('f');
-        items[(uint8_t) '\n'] = escaped_item('n');
-        items[(uint8_t) '\r'] = escaped_item('r');
-        items[(uint8_t) '\t'] = escaped_item('t');
+        items[(std::uint8_t) '\\'] = escaped_item('\\');
+        items[(std::uint8_t) '\b'] = escaped_item('b');
+        items[(std::uint8_t) '\f'] = escaped_item('f');
+        items[(std::uint8_t) '\n'] = escaped_item('n');
+        items[(std::uint8_t) '\r'] = escaped_item('r');
+        items[(std::uint8_t) '\t'] = escaped_item('t');
     }
 };