Add OSX and Windows compatibility
diff --git a/.github/workflows/compile_test.yml b/.github/workflows/compile_test.yml
index a3fee7c..bd989b3 100644
--- a/.github/workflows/compile_test.yml
+++ b/.github/workflows/compile_test.yml
@@ -7,27 +7,35 @@
runs-on: ${{ matrix.os }}
strategy:
matrix:
- os: [ubuntu-latest, macos-latest, windows-latest]
- python-version: [
+ os:
+ - ubuntu-latest
+ - macos-latest
+ - windows-latest
+ python: [
'3.5', '3.6', '3.7', '3.8', '3.9',
- 'pypy-3.5', 'pypy-3.6', 'pypy-3.7',
+ 'pypy3',
]
+ exclude:
+ - os: macos-latest
+ python: pypy3
- name: Python ${{ matrix.python-version }} on ${{ matrix.os }}
+ name: Python ${{ matrix.python }} on ${{ matrix.os }}
steps:
- - uses: actions/checkout@v1
-
- - name: Update submodules
- run: |
- git submodule init
- git submodule update
+ - uses: actions/checkout@v2
+ with:
+ submodules: true
- name: Setup python
- uses: actions/setup-python@v1
+ uses: actions/setup-python@v2.1.4 # https://github.com/actions/setup-python/issues/171
with:
- python-version: ${{ matrix.python-version }}
- architecture: x64
+ python-version: ${{ matrix.python }}
+
+ - name: Display Python version
+ run: python -c 'import sys; print(sys.version)'
+
+ - name: Update pip
+ run: python -m pip install -U pip wheel setuptools
- name: Install requirements
run: python -m pip install -Ur requirements.txt
diff --git a/MANIFEST.in b/MANIFEST.in
index 8162d85..af1f1b6 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,5 +1,6 @@
include LICENSE
include make_*.py
+include sha512sum.py
include Makefile
include pyjson5.cpp
include pyjson5.pyx
diff --git a/Makefile b/Makefile
index 744400a..b092944 100644
--- a/Makefile
+++ b/Makefile
@@ -4,6 +4,9 @@
.PHONY: all sdist bdist_wheel clean docs
+export PYTHONUTF8 := 1
+export PYTHONIOENCODING := UTF-8
+
INCLUDES := \
src/VERSION src/DESCRIPTION \
src/_decoder_recursive_select.hpp src/_unicode_cat_of.hpp \
@@ -11,9 +14,9 @@
FILES := Makefile MANIFEST.in pyjson5.pyx README.rst setup.py ${INCLUDES}
-DerivedGeneralCategory.txt:
- wget -O $@ https://www.unicode.org/Public/13.0.0/ucd/extracted/DerivedGeneralCategory.txt
- sha512sum -c $@.sha
+DerivedGeneralCategory.txt: DerivedGeneralCategory.txt.sha
+ curl -s -o $@ https://www.unicode.org/Public/13.0.0/ucd/extracted/DerivedGeneralCategory.txt
+ python sha512sum.py -c $@.sha
src/_unicode_cat_of.hpp: DerivedGeneralCategory.txt make_unicode_categories.py
python make_unicode_categories.py $< $@
diff --git a/make_unicode_categories.py b/make_unicode_categories.py
index bffd073..fe8eba2 100755
--- a/make_unicode_categories.py
+++ b/make_unicode_categories.py
@@ -32,7 +32,7 @@
'nd': IdentifierPart,
}
- planes = defaultdict(lambda: [0] * 0x1_0000)
+ planes = defaultdict(lambda: [0] * 0x10000)
for input_line in input_file:
m = match(r'^([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+([A-Z][a-z])', input_line)
@@ -45,7 +45,7 @@
end = int(end or start, 16)
start = int(start, 16)
for i in range(start, end + 1):
- planes[i // 0x1_0000][i % 0x1_0000] = idx
+ planes[i // 0x10000][i % 0x10000] = idx
# per: https://spec.json5.org/#white-space
for i in (0x9, 0xa, 0xb, 0xc, 0xd, 0x20, 0xa0, 0x2028, 0x2028, 0x2029, 0xfeff):
@@ -99,7 +99,7 @@
print(file=output_file)
print(' std::uint16_t plane_idx = std::uint16_t(codepoint / 0x10000);', file=output_file)
- print(' if (__builtin_expect(plane_idx > 16, false)) return 1;', file=output_file)
+ print(' if (JSON5EncoderCpp_expect(plane_idx > 16, false)) return 1;', file=output_file)
print(' std::uint16_t datum_idx = std::uint16_t(codepoint & 0xffff);', file=output_file)
print(' const std::uint8_t *plane = planes[plane_idx];', file=output_file)
print(' return (plane[datum_idx / 4] >> (2 * (datum_idx % 4))) % 4;', file=output_file)
@@ -117,6 +117,6 @@
if __name__ == '__main__':
args = argparser.parse_args()
- with open(args.input, 'rt') as input_file, \
- open(args.output, 'wt') as output_file:
+ with open(str(args.input.resolve()), 'rt') as input_file, \
+ open(str(args.output.resolve()), 'wt') as output_file:
raise SystemExit(main(input_file, output_file))
diff --git a/run-minefield-test.py b/run-minefield-test.py
index 23a34d2..749ee05 100755
--- a/run-minefield-test.py
+++ b/run-minefield-test.py
@@ -2,8 +2,10 @@
from argparse import ArgumentParser
from logging import basicConfig, INFO, getLogger
+from os import name
from pathlib import Path
from subprocess import Popen
+from sys import executable
from colorama import init, Fore
from pyjson5 import decode_io
@@ -26,6 +28,17 @@
good = bad = severe = 0
+ if name != 'nt':
+ code_severe = Fore.RED + 'π±'
+ code_good = Fore.CYAN + 'π'
+ code_bad = Fore.YELLOW + 'π '
+ code_ignored = Fore.BLUE + 'π
'
+ else:
+ code_severe = Fore.RED + 'SEVERE'
+ code_good = Fore.CYAN + 'GOOD'
+ code_bad = Fore.YELLOW + 'BAD'
+ code_ignored = Fore.BLUE + 'IGNORED'
+
args = argparser.parse_args()
index = 0
for path in sorted(args.tests.glob('?_?*.json')):
@@ -46,7 +59,7 @@
index += 1
try:
- p = Popen(('/usr/bin/env', 'python', 'transcode-to-json.py', str(path)))
+ p = Popen((executable, 'transcode-to-json.py', str(path)))
outcome = p.wait(5)
except Exception:
logger.error('Error while testing: %s', path, exc_info=True)
@@ -54,20 +67,20 @@
continue
if outcome not in (0, 1):
- code = Fore.RED + 'π±'
+ code = code_severe
severe += 1
elif category == 'y':
if outcome == 0:
- code = Fore.CYAN + 'π'
+ code = code_good
good += 1
else:
- code = Fore.YELLOW + 'π '
+ code = code_bad
bad += 1
else:
- code = Fore.BLUE + 'π
'
+ code = code_ignored
print(
- '#', index, ' ', code, ' '
+ '#', index, ' ', code, ' | '
'Category <', category, '> | '
'Test <', name, '> | '
'Actual <', 'pass' if outcome == 0 else 'FAIL', '>',
@@ -77,17 +90,13 @@
is_severe = severe > 0
is_good = bad == 0
- code = (
- Fore.RED + 'π±' if is_severe else
- Fore.CYAN + 'π' if is_good else
- Fore.YELLOW + 'π '
- )
+ code = code_severe if is_severe else code_good if is_good else code_bad
print()
print(
- code, ' ',
- good, ' × correct outcome | ',
- bad, ' × wrong outcome | ',
- severe, ' × severe errors',
+ code, ' | ',
+ good, ' correct outcomes | ',
+ bad, ' wrong outcomes | ',
+ severe, ' severe errors',
Fore.RESET,
sep=''
)
diff --git a/run-tests.py b/run-tests.py
index 32cf988..c3dddc0 100755
--- a/run-tests.py
+++ b/run-tests.py
@@ -2,8 +2,10 @@
from argparse import ArgumentParser
from logging import basicConfig, INFO, getLogger
+from os import name
from pathlib import Path
from subprocess import Popen
+from sys import executable
from colorama import init, Fore
from pyjson5 import decode_io
@@ -24,6 +26,15 @@
init()
+ if name != 'nt':
+ code_severe = Fore.RED + 'π±'
+ code_good = Fore.CYAN + 'π'
+ code_bad = Fore.YELLOW + 'π '
+ else:
+ code_severe = Fore.RED + 'SEVERE'
+ code_good = Fore.CYAN + 'GOOD'
+ code_bad = Fore.YELLOW + 'BAD'
+
good = 0
bad = 0
severe = 0
@@ -40,23 +51,18 @@
category = path.parent.name
name = path.stem
try:
- p = Popen(('/usr/bin/env', 'python', 'transcode-to-json.py', str(path)))
+ p = Popen((executable, 'transcode-to-json.py', str(path)))
outcome = p.wait(5)
except Exception:
logger.error('Error while testing: %s', path, exc_info=True)
- errors += 1
+ severe += 1
continue
is_success = outcome == 0
is_failure = outcome == 1
is_severe = outcome not in (0, 1)
is_good = is_success if expect_success else is_failure
-
- code = (
- Fore.RED + 'π±' if is_severe else
- Fore.CYAN + 'π' if is_good else
- Fore.YELLOW + 'π '
- )
+ code = code_severe if is_severe else code_good if is_good else code_bad
print(
'#', index, ' ', code, ' '
'Category <', category, '> | '
@@ -76,11 +82,7 @@
is_severe = severe > 0
is_good = bad == 0
- code = (
- Fore.RED + 'π±' if is_severe else
- Fore.CYAN + 'π' if is_good else
- Fore.YELLOW + 'π '
- )
+ code = code_severe if is_severe else code_good if is_good else code_bad
print()
print(
code, ' ',
diff --git a/setup.py b/setup.py
index 37dc532..86d4d1e 100755
--- a/setup.py
+++ b/setup.py
@@ -51,6 +51,8 @@
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
+ 'Programming Language :: Python :: 3.8',
+ 'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3 :: Only',
'Programming Language :: Python :: Implementation :: CPython',
'Topic :: Text Processing :: General',
diff --git a/sha512sum.py b/sha512sum.py
new file mode 100755
index 0000000..a6aad70
--- /dev/null
+++ b/sha512sum.py
@@ -0,0 +1,33 @@
+#!/usr/bin/env python
+
+from argparse import ArgumentParser
+from hashlib import sha512
+from logging import basicConfig, DEBUG
+from pathlib import Path
+from sys import argv, exit
+
+
+argparser = ArgumentParser(description='sha512sum replacement if coreutils isn\'t installed')
+argparser.add_argument('-c', '--check', type=Path, required=True)
+
+if __name__ == '__main__':
+ basicConfig(level=DEBUG)
+ args = argparser.parse_args()
+ errors = 0
+ with open(str(args.check.resolve()), 'rt') as f:
+ for line in f:
+ expected_hash, filename = line.rstrip('\r\n').split(' ', 1)
+ with open(str(Path(filename).resolve()), 'rb') as f:
+ actual_hash = sha512(f.read()).hexdigest()
+
+ if expected_hash == actual_hash:
+ print(filename + ': OK')
+ else:
+ errors += 1
+ print(filename + ': FAILED')
+
+ if errors:
+ print('%s: WARNING: %s computed checksum did NOT match' % (argv[0], errors))
+ exit(1)
+ else:
+ exit(0)
diff --git a/src/_decoder.pyx b/src/_decoder.pyx
index da2dcd9..ce2a9a6 100644
--- a/src/_decoder.pyx
+++ b/src/_decoder.pyx
@@ -728,7 +728,7 @@
elif kind == DRS_recursive:
decoder = _decode_recursive_enter
else:
- __builtin_unreachable()
+ unreachable()
decoder = _decoder_unknown
return decoder(reader, c_in_out)
@@ -823,7 +823,7 @@
elif kind == PyUnicode_4BYTE_KIND:
return _decode_ucs4(PyUnicode_4BYTE_DATA(data), length, maxdepth, some)
else:
- __builtin_unreachable()
+ unreachable()
cdef object _decode_buffer(Py_buffer &view, int32_t wordlength,
@@ -842,7 +842,7 @@
length = view.len // 4
else:
_raise_illegal_wordlength(wordlength)
- __builtin_unreachable()
+ unreachable()
length = 0
decoder = NULL
diff --git a/src/_imports.pyx b/src/_imports.pyx
index c3ddd45..139b356 100644
--- a/src/_imports.pyx
+++ b/src/_imports.pyx
@@ -95,6 +95,7 @@
void reset_hash[T](T *obj)
AlwaysTrue exception_thrown() except True
+ void unreachable()
cdef extern from 'src/native.hpp' namespace 'JSON5EncoderCpp':
int iter_next(object iterator, PyObject **value) except -1
@@ -193,9 +194,8 @@
char data[1]
-cdef extern from * nogil:
- boolean expect '__builtin_expect'(boolean actual, boolean expected)
- void __builtin_unreachable()
+cdef extern from 'src/native.hpp' nogil:
+ boolean expect 'JSON5EncoderCpp_expect'(boolean actual, boolean expected)
cdef type datetime, date, time, Decimal, Mapping, IOBase
diff --git a/src/_stack_heap_string.hpp b/src/_stack_heap_string.hpp
index 52bd4af..6d020a0 100644
--- a/src/_stack_heap_string.hpp
+++ b/src/_stack_heap_string.hpp
@@ -31,7 +31,7 @@
}
const T *data() const& {
- if (__builtin_expect(m_heap == nullptr, true)) {
+ if (JSON5EncoderCpp_expect(m_heap == nullptr, true)) {
return m_stack;
} else {
return m_heap;
@@ -43,7 +43,7 @@
}
bool push_back(T c) {
- if (__builtin_expect(m_left == 0, false)) {
+ if (JSON5EncoderCpp_expect(m_left == 0, false)) {
if (m_heap == nullptr) {
void *new_ptr = PyMem_RawMalloc(sizeof(T) * StackHeapStringHeapSize);
if (new_ptr == nullptr) {
@@ -66,7 +66,7 @@
}
}
- if (__builtin_expect(m_heap == nullptr, true)) {
+ if (JSON5EncoderCpp_expect(m_heap == nullptr, true)) {
m_stack[m_size] = c;
} else {
m_heap[m_size] = c;
diff --git a/src/_unicode_cat_of.hpp b/src/_unicode_cat_of.hpp
index 56ce6d6..47895fb 100644
--- a/src/_unicode_cat_of.hpp
+++ b/src/_unicode_cat_of.hpp
@@ -5154,7 +5154,7 @@
};
std::uint16_t plane_idx = std::uint16_t(codepoint / 0x10000);
- if (__builtin_expect(plane_idx > 16, false)) return 1;
+ if (JSON5EncoderCpp_expect(plane_idx > 16, false)) return 1;
std::uint16_t datum_idx = std::uint16_t(codepoint & 0xffff);
const std::uint8_t *plane = planes[plane_idx];
return (plane[datum_idx / 4] >> (2 * (datum_idx % 4))) % 4;
diff --git a/src/native.hpp b/src/native.hpp
index 418a2b7..eb9597b 100644
--- a/src/native.hpp
+++ b/src/native.hpp
@@ -68,9 +68,9 @@
static inline bool is_escaped(std::uint32_t c) {
if (c < 0x40) {
- return is_escaped_lo & (static_cast<std::uint64_t>(1) << c);
+ return (is_escaped_lo & (static_cast<std::uint64_t>(1) << c)) != 0;
} else if (c < 0x80) {
- return is_escaped_hi & (static_cast<std::uint64_t>(1) << (c - 0x40));
+ return (is_escaped_hi & (static_cast<std::uint64_t>(1) << (c - 0x40))) != 0;
} else {
return true;
}
@@ -158,6 +158,16 @@
return true;
}
+// https://stackoverflow.com/a/65258501/416224
+#ifdef __GNUC__ // GCC 4.8+, Clang, Intel and other compilers compatible with GCC (-std=c++0x or above)
+ [[noreturn]] inline __attribute__((always_inline)) void unreachable() { __builtin_unreachable(); }
+#elif defined(_MSC_VER) // MSVC
+ [[noreturn]] __forceinline void unreachable() { __assume(false); }
+#else // ???
+ inline void unreachable() {}
+#endif
+
+
#include "./_escape_dct.hpp"
const EscapeDct ESCAPE_DCT;
@@ -172,6 +182,12 @@
;
static constexpr std::size_t LONGDESCRIPTION_LENGTH = sizeof(LONGDESCRIPTION) - 1;
+#ifdef __GNUC__
+# define JSON5EncoderCpp_expect(cond, likely) __builtin_expect(!!(cond), !!(likely))
+#else
+# define JSON5EncoderCpp_expect(cond, likely) !!(cond)
+#endif
+
}
#endif
diff --git a/transcode-to-json.py b/transcode-to-json.py
index 2515578..110f10a 100755
--- a/transcode-to-json.py
+++ b/transcode-to-json.py
@@ -2,6 +2,7 @@
from argparse import ArgumentParser
from collections.abc import Mapping, Sequence
+from codecs import open as codecs_open
from itertools import zip_longest
from json import loads
from logging import basicConfig, DEBUG, getLogger
@@ -57,7 +58,7 @@
args = argparser.parse_args()
try:
# open() does not work with Paths in Python 3.5
- with open(str(args.input.resolve()), 'rt') as f:
+ with codecs_open(str(args.input.resolve()), 'r', 'UTF-8') as f:
data = f.read()
except Exception:
logger.error('Could not even read file: %s', args.input, exc_info=True)
@@ -87,7 +88,7 @@
if args.output is not None:
try:
# open() does not work with Paths in Python 3.5
- with open(str(args.output.resolve()), 'wt') as f:
+ with codecs_open(str(args.output.resolve()), 'w', 'UTF-8') as f:
f.write(data)
except Exception:
logger.error('Could open output file: %s', args.output, exc_info=True)