| /* |
| * sfparse |
| * |
| * Copyright (c) 2023 sfparse contributors |
| * Copyright (c) 2019 nghttp3 contributors |
| * Copyright (c) 2015 nghttp2 contributors |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining |
| * a copy of this software and associated documentation files (the |
| * "Software"), to deal in the Software without restriction, including |
| * without limitation the rights to use, copy, modify, merge, publish, |
| * distribute, sublicense, and/or sell copies of the Software, and to |
| * permit persons to whom the Software is furnished to do so, subject to |
| * the following conditions: |
| * |
| * The above copyright notice and this permission notice shall be |
| * included in all copies or substantial portions of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE |
| * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
| * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
| * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| */ |
| #include "sfparse.h" |
| |
| #include <string.h> |
| #include <assert.h> |
| #include <stdlib.h> |
| |
| #ifdef __AVX2__ |
| # include <immintrin.h> |
| #endif /* __AVX2__ */ |
| |
| #define SFPARSE_STATE_DICT 0x08u |
| #define SFPARSE_STATE_LIST 0x10u |
| #define SFPARSE_STATE_ITEM 0x18u |
| |
| #define SFPARSE_STATE_INNER_LIST 0x04u |
| |
| #define SFPARSE_STATE_BEFORE 0x00u |
| #define SFPARSE_STATE_BEFORE_PARAMS 0x01u |
| #define SFPARSE_STATE_PARAMS 0x02u |
| #define SFPARSE_STATE_AFTER 0x03u |
| |
| #define SFPARSE_STATE_OP_MASK 0x03u |
| |
| #define SFPARSE_SET_STATE_AFTER(NAME) \ |
| (SFPARSE_STATE_##NAME | SFPARSE_STATE_AFTER) |
| #define SFPARSE_SET_STATE_BEFORE_PARAMS(NAME) \ |
| (SFPARSE_STATE_##NAME | SFPARSE_STATE_BEFORE_PARAMS) |
| #define SFPARSE_SET_STATE_INNER_LIST_BEFORE(NAME) \ |
| (SFPARSE_STATE_##NAME | SFPARSE_STATE_INNER_LIST | SFPARSE_STATE_BEFORE) |
| |
| #define SFPARSE_STATE_DICT_AFTER SFPARSE_SET_STATE_AFTER(DICT) |
| #define SFPARSE_STATE_DICT_BEFORE_PARAMS SFPARSE_SET_STATE_BEFORE_PARAMS(DICT) |
| #define SFPARSE_STATE_DICT_INNER_LIST_BEFORE \ |
| SFPARSE_SET_STATE_INNER_LIST_BEFORE(DICT) |
| |
| #define SFPARSE_STATE_LIST_AFTER SFPARSE_SET_STATE_AFTER(LIST) |
| #define SFPARSE_STATE_LIST_BEFORE_PARAMS SFPARSE_SET_STATE_BEFORE_PARAMS(LIST) |
| #define SFPARSE_STATE_LIST_INNER_LIST_BEFORE \ |
| SFPARSE_SET_STATE_INNER_LIST_BEFORE(LIST) |
| |
| #define SFPARSE_STATE_ITEM_AFTER SFPARSE_SET_STATE_AFTER(ITEM) |
| #define SFPARSE_STATE_ITEM_BEFORE_PARAMS SFPARSE_SET_STATE_BEFORE_PARAMS(ITEM) |
| #define SFPARSE_STATE_ITEM_INNER_LIST_BEFORE \ |
| SFPARSE_SET_STATE_INNER_LIST_BEFORE(ITEM) |
| |
| #define SFPARSE_STATE_INITIAL 0x00u |
| |
| #define DIGIT_CASES \ |
| case '0': \ |
| case '1': \ |
| case '2': \ |
| case '3': \ |
| case '4': \ |
| case '5': \ |
| case '6': \ |
| case '7': \ |
| case '8': \ |
| case '9' |
| |
| #define LCALPHA_CASES \ |
| case 'a': \ |
| case 'b': \ |
| case 'c': \ |
| case 'd': \ |
| case 'e': \ |
| case 'f': \ |
| case 'g': \ |
| case 'h': \ |
| case 'i': \ |
| case 'j': \ |
| case 'k': \ |
| case 'l': \ |
| case 'm': \ |
| case 'n': \ |
| case 'o': \ |
| case 'p': \ |
| case 'q': \ |
| case 'r': \ |
| case 's': \ |
| case 't': \ |
| case 'u': \ |
| case 'v': \ |
| case 'w': \ |
| case 'x': \ |
| case 'y': \ |
| case 'z' |
| |
| #define UCALPHA_CASES \ |
| case 'A': \ |
| case 'B': \ |
| case 'C': \ |
| case 'D': \ |
| case 'E': \ |
| case 'F': \ |
| case 'G': \ |
| case 'H': \ |
| case 'I': \ |
| case 'J': \ |
| case 'K': \ |
| case 'L': \ |
| case 'M': \ |
| case 'N': \ |
| case 'O': \ |
| case 'P': \ |
| case 'Q': \ |
| case 'R': \ |
| case 'S': \ |
| case 'T': \ |
| case 'U': \ |
| case 'V': \ |
| case 'W': \ |
| case 'X': \ |
| case 'Y': \ |
| case 'Z' |
| |
| #define ALPHA_CASES \ |
| UCALPHA_CASES: \ |
| LCALPHA_CASES |
| |
| #define TOKEN_CASES \ |
| case '!': \ |
| case '#': \ |
| case '$': \ |
| case '%': \ |
| case '&': \ |
| case '\'': \ |
| case '*': \ |
| case '+': \ |
| case '-': \ |
| case '.': \ |
| case '/': \ |
| DIGIT_CASES: \ |
| case ':': \ |
| UCALPHA_CASES: \ |
| case '^': \ |
| case '_': \ |
| case '`': \ |
| LCALPHA_CASES: \ |
| case '|': \ |
| case '~' |
| |
| #define LCHEXALPHA_CASES \ |
| case 'a': \ |
| case 'b': \ |
| case 'c': \ |
| case 'd': \ |
| case 'e': \ |
| case 'f' |
| |
| #define X00_1F_CASES \ |
| case 0x00: \ |
| case 0x01: \ |
| case 0x02: \ |
| case 0x03: \ |
| case 0x04: \ |
| case 0x05: \ |
| case 0x06: \ |
| case 0x07: \ |
| case 0x08: \ |
| case 0x09: \ |
| case 0x0a: \ |
| case 0x0b: \ |
| case 0x0c: \ |
| case 0x0d: \ |
| case 0x0e: \ |
| case 0x0f: \ |
| case 0x10: \ |
| case 0x11: \ |
| case 0x12: \ |
| case 0x13: \ |
| case 0x14: \ |
| case 0x15: \ |
| case 0x16: \ |
| case 0x17: \ |
| case 0x18: \ |
| case 0x19: \ |
| case 0x1a: \ |
| case 0x1b: \ |
| case 0x1c: \ |
| case 0x1d: \ |
| case 0x1e: \ |
| case 0x1f |
| |
| #define X20_21_CASES \ |
| case ' ': \ |
| case '!' |
| |
| #define X23_5B_CASES \ |
| case '#': \ |
| case '$': \ |
| case '%': \ |
| case '&': \ |
| case '\'': \ |
| case '(': \ |
| case ')': \ |
| case '*': \ |
| case '+': \ |
| case ',': \ |
| case '-': \ |
| case '.': \ |
| case '/': \ |
| DIGIT_CASES: \ |
| case ':': \ |
| case ';': \ |
| case '<': \ |
| case '=': \ |
| case '>': \ |
| case '?': \ |
| case '@': \ |
| UCALPHA_CASES: \ |
| case '[' |
| |
| #define X5D_7E_CASES \ |
| case ']': \ |
| case '^': \ |
| case '_': \ |
| case '`': \ |
| LCALPHA_CASES: \ |
| case '{': \ |
| case '|': \ |
| case '}': \ |
| case '~' |
| |
| #define X7F_FF_CASES \ |
| case 0x7f: \ |
| case 0x80: \ |
| case 0x81: \ |
| case 0x82: \ |
| case 0x83: \ |
| case 0x84: \ |
| case 0x85: \ |
| case 0x86: \ |
| case 0x87: \ |
| case 0x88: \ |
| case 0x89: \ |
| case 0x8a: \ |
| case 0x8b: \ |
| case 0x8c: \ |
| case 0x8d: \ |
| case 0x8e: \ |
| case 0x8f: \ |
| case 0x90: \ |
| case 0x91: \ |
| case 0x92: \ |
| case 0x93: \ |
| case 0x94: \ |
| case 0x95: \ |
| case 0x96: \ |
| case 0x97: \ |
| case 0x98: \ |
| case 0x99: \ |
| case 0x9a: \ |
| case 0x9b: \ |
| case 0x9c: \ |
| case 0x9d: \ |
| case 0x9e: \ |
| case 0x9f: \ |
| case 0xa0: \ |
| case 0xa1: \ |
| case 0xa2: \ |
| case 0xa3: \ |
| case 0xa4: \ |
| case 0xa5: \ |
| case 0xa6: \ |
| case 0xa7: \ |
| case 0xa8: \ |
| case 0xa9: \ |
| case 0xaa: \ |
| case 0xab: \ |
| case 0xac: \ |
| case 0xad: \ |
| case 0xae: \ |
| case 0xaf: \ |
| case 0xb0: \ |
| case 0xb1: \ |
| case 0xb2: \ |
| case 0xb3: \ |
| case 0xb4: \ |
| case 0xb5: \ |
| case 0xb6: \ |
| case 0xb7: \ |
| case 0xb8: \ |
| case 0xb9: \ |
| case 0xba: \ |
| case 0xbb: \ |
| case 0xbc: \ |
| case 0xbd: \ |
| case 0xbe: \ |
| case 0xbf: \ |
| case 0xc0: \ |
| case 0xc1: \ |
| case 0xc2: \ |
| case 0xc3: \ |
| case 0xc4: \ |
| case 0xc5: \ |
| case 0xc6: \ |
| case 0xc7: \ |
| case 0xc8: \ |
| case 0xc9: \ |
| case 0xca: \ |
| case 0xcb: \ |
| case 0xcc: \ |
| case 0xcd: \ |
| case 0xce: \ |
| case 0xcf: \ |
| case 0xd0: \ |
| case 0xd1: \ |
| case 0xd2: \ |
| case 0xd3: \ |
| case 0xd4: \ |
| case 0xd5: \ |
| case 0xd6: \ |
| case 0xd7: \ |
| case 0xd8: \ |
| case 0xd9: \ |
| case 0xda: \ |
| case 0xdb: \ |
| case 0xdc: \ |
| case 0xdd: \ |
| case 0xde: \ |
| case 0xdf: \ |
| case 0xe0: \ |
| case 0xe1: \ |
| case 0xe2: \ |
| case 0xe3: \ |
| case 0xe4: \ |
| case 0xe5: \ |
| case 0xe6: \ |
| case 0xe7: \ |
| case 0xe8: \ |
| case 0xe9: \ |
| case 0xea: \ |
| case 0xeb: \ |
| case 0xec: \ |
| case 0xed: \ |
| case 0xee: \ |
| case 0xef: \ |
| case 0xf0: \ |
| case 0xf1: \ |
| case 0xf2: \ |
| case 0xf3: \ |
| case 0xf4: \ |
| case 0xf5: \ |
| case 0xf6: \ |
| case 0xf7: \ |
| case 0xf8: \ |
| case 0xf9: \ |
| case 0xfa: \ |
| case 0xfb: \ |
| case 0xfc: \ |
| case 0xfd: \ |
| case 0xfe: \ |
| case 0xff |
| |
| static int is_ws(uint8_t c) { |
| switch (c) { |
| case ' ': |
| case '\t': |
| return 1; |
| default: |
| return 0; |
| } |
| } |
| |
| #ifdef __AVX2__ |
| # ifdef _MSC_VER |
| # include <intrin.h> |
| |
| static int ctz(unsigned int v) { |
| unsigned long n; |
| |
| /* Assume that v is not 0. */ |
| _BitScanForward(&n, v); |
| |
| return (int)n; |
| } |
| # else /* !_MSC_VER */ |
| # define ctz __builtin_ctz |
| # endif /* !_MSC_VER */ |
| #endif /* __AVX2__ */ |
| |
| static int parser_eof(sfparse_parser *sfp) { return sfp->pos == sfp->end; } |
| |
| static void parser_discard_ows(sfparse_parser *sfp) { |
| for (; !parser_eof(sfp) && is_ws(*sfp->pos); ++sfp->pos) |
| ; |
| } |
| |
| static void parser_discard_sp(sfparse_parser *sfp) { |
| for (; !parser_eof(sfp) && *sfp->pos == ' '; ++sfp->pos) |
| ; |
| } |
| |
| static void parser_set_op_state(sfparse_parser *sfp, uint32_t op) { |
| sfp->state &= ~SFPARSE_STATE_OP_MASK; |
| sfp->state |= op; |
| } |
| |
| static void parser_unset_inner_list_state(sfparse_parser *sfp) { |
| sfp->state &= ~SFPARSE_STATE_INNER_LIST; |
| } |
| |
| #ifdef __AVX2__ |
| static const uint8_t *find_char_key(const uint8_t *first, const uint8_t *last) { |
| const __m256i us = _mm256_set1_epi8('_'); |
| const __m256i ds = _mm256_set1_epi8('-'); |
| const __m256i dot = _mm256_set1_epi8('.'); |
| const __m256i ast = _mm256_set1_epi8('*'); |
| const __m256i r0l = _mm256_set1_epi8('0' - 1); |
| const __m256i r0r = _mm256_set1_epi8('9' + 1); |
| const __m256i r1l = _mm256_set1_epi8('a' - 1); |
| const __m256i r1r = _mm256_set1_epi8('z' + 1); |
| __m256i s, x; |
| uint32_t m; |
| |
| for (; first != last; first += 32) { |
| s = _mm256_loadu_si256((void *)first); |
| |
| x = _mm256_cmpeq_epi8(s, us); |
| x = _mm256_or_si256(_mm256_cmpeq_epi8(s, ds), x); |
| x = _mm256_or_si256(_mm256_cmpeq_epi8(s, dot), x); |
| x = _mm256_or_si256(_mm256_cmpeq_epi8(s, ast), x); |
| x = _mm256_or_si256( |
| _mm256_and_si256(_mm256_cmpgt_epi8(s, r0l), _mm256_cmpgt_epi8(r0r, s)), |
| x); |
| x = _mm256_or_si256( |
| _mm256_and_si256(_mm256_cmpgt_epi8(s, r1l), _mm256_cmpgt_epi8(r1r, s)), |
| x); |
| |
| m = ~(uint32_t)_mm256_movemask_epi8(x); |
| if (m) { |
| return first + ctz(m); |
| } |
| } |
| |
| return last; |
| } |
| #endif /* __AVX2__ */ |
| |
| static int parser_key(sfparse_parser *sfp, sfparse_vec *dest) { |
| const uint8_t *base; |
| #ifdef __AVX2__ |
| const uint8_t *last; |
| #endif /* __AVX2__ */ |
| |
| switch (*sfp->pos) { |
| case '*': |
| LCALPHA_CASES: |
| break; |
| default: |
| return SFPARSE_ERR_PARSE; |
| } |
| |
| base = sfp->pos++; |
| |
| #ifdef __AVX2__ |
| if (sfp->end - sfp->pos >= 32) { |
| last = sfp->pos + ((sfp->end - sfp->pos) & ~0x1fu); |
| |
| sfp->pos = find_char_key(sfp->pos, last); |
| if (sfp->pos != last) { |
| goto fin; |
| } |
| } |
| #endif /* __AVX2__ */ |
| |
| for (; !parser_eof(sfp); ++sfp->pos) { |
| switch (*sfp->pos) { |
| case '_': |
| case '-': |
| case '.': |
| case '*': |
| DIGIT_CASES: |
| LCALPHA_CASES: |
| continue; |
| } |
| |
| break; |
| } |
| |
| #ifdef __AVX2__ |
| fin: |
| #endif /* __AVX2__ */ |
| if (dest) { |
| dest->base = (uint8_t *)base; |
| dest->len = (size_t)(sfp->pos - dest->base); |
| } |
| |
| return 0; |
| } |
| |
| static int parser_number(sfparse_parser *sfp, sfparse_value *dest) { |
| int sign = 1; |
| int64_t value = 0; |
| size_t len = 0; |
| size_t fpos = 0; |
| |
| if (*sfp->pos == '-') { |
| ++sfp->pos; |
| if (parser_eof(sfp)) { |
| return SFPARSE_ERR_PARSE; |
| } |
| |
| sign = -1; |
| } |
| |
| assert(!parser_eof(sfp)); |
| |
| for (; !parser_eof(sfp); ++sfp->pos) { |
| switch (*sfp->pos) { |
| DIGIT_CASES: |
| if (++len > 15) { |
| return SFPARSE_ERR_PARSE; |
| } |
| |
| value *= 10; |
| value += *sfp->pos - '0'; |
| |
| continue; |
| } |
| |
| break; |
| } |
| |
| if (len == 0) { |
| return SFPARSE_ERR_PARSE; |
| } |
| |
| if (parser_eof(sfp) || *sfp->pos != '.') { |
| if (dest) { |
| dest->type = SFPARSE_TYPE_INTEGER; |
| dest->flags = SFPARSE_VALUE_FLAG_NONE; |
| dest->integer = value * sign; |
| } |
| |
| return 0; |
| } |
| |
| /* decimal */ |
| |
| if (len > 12) { |
| return SFPARSE_ERR_PARSE; |
| } |
| |
| fpos = len; |
| |
| ++sfp->pos; |
| |
| for (; !parser_eof(sfp); ++sfp->pos) { |
| switch (*sfp->pos) { |
| DIGIT_CASES: |
| if (++len > 15) { |
| return SFPARSE_ERR_PARSE; |
| } |
| |
| value *= 10; |
| value += *sfp->pos - '0'; |
| |
| continue; |
| } |
| |
| break; |
| } |
| |
| if (fpos == len || len - fpos > 3) { |
| return SFPARSE_ERR_PARSE; |
| } |
| |
| if (dest) { |
| dest->type = SFPARSE_TYPE_DECIMAL; |
| dest->flags = SFPARSE_VALUE_FLAG_NONE; |
| dest->decimal.numer = value * sign; |
| |
| switch (len - fpos) { |
| case 1: |
| dest->decimal.denom = 10; |
| |
| break; |
| case 2: |
| dest->decimal.denom = 100; |
| |
| break; |
| case 3: |
| dest->decimal.denom = 1000; |
| |
| break; |
| } |
| } |
| |
| return 0; |
| } |
| |
| static int parser_date(sfparse_parser *sfp, sfparse_value *dest) { |
| int rv; |
| sfparse_value val; |
| |
| /* The first byte has already been validated by the caller. */ |
| assert('@' == *sfp->pos); |
| |
| ++sfp->pos; |
| |
| if (parser_eof(sfp)) { |
| return SFPARSE_ERR_PARSE; |
| } |
| |
| rv = parser_number(sfp, &val); |
| if (rv != 0) { |
| return rv; |
| } |
| |
| if (val.type != SFPARSE_TYPE_INTEGER) { |
| return SFPARSE_ERR_PARSE; |
| } |
| |
| if (dest) { |
| *dest = val; |
| dest->type = SFPARSE_TYPE_DATE; |
| } |
| |
| return 0; |
| } |
| |
| #ifdef __AVX2__ |
| static const uint8_t *find_char_string(const uint8_t *first, |
| const uint8_t *last) { |
| const __m256i bs = _mm256_set1_epi8('\\'); |
| const __m256i dq = _mm256_set1_epi8('"'); |
| const __m256i del = _mm256_set1_epi8(0x7f); |
| const __m256i sp = _mm256_set1_epi8(' '); |
| __m256i s, x; |
| uint32_t m; |
| |
| for (; first != last; first += 32) { |
| s = _mm256_loadu_si256((void *)first); |
| |
| x = _mm256_cmpgt_epi8(sp, s); |
| x = _mm256_or_si256(_mm256_cmpeq_epi8(s, bs), x); |
| x = _mm256_or_si256(_mm256_cmpeq_epi8(s, dq), x); |
| x = _mm256_or_si256(_mm256_cmpeq_epi8(s, del), x); |
| |
| m = (uint32_t)_mm256_movemask_epi8(x); |
| if (m) { |
| return first + ctz(m); |
| } |
| } |
| |
| return last; |
| } |
| #endif /* __AVX2__ */ |
| |
| static int parser_string(sfparse_parser *sfp, sfparse_value *dest) { |
| const uint8_t *base; |
| #ifdef __AVX2__ |
| const uint8_t *last; |
| #endif /* __AVX2__ */ |
| uint32_t flags = SFPARSE_VALUE_FLAG_NONE; |
| |
| /* The first byte has already been validated by the caller. */ |
| assert('"' == *sfp->pos); |
| |
| base = ++sfp->pos; |
| |
| #ifdef __AVX2__ |
| for (; sfp->end - sfp->pos >= 32; ++sfp->pos) { |
| last = sfp->pos + ((sfp->end - sfp->pos) & ~0x1fu); |
| |
| sfp->pos = find_char_string(sfp->pos, last); |
| if (sfp->pos == last) { |
| break; |
| } |
| |
| switch (*sfp->pos) { |
| case '\\': |
| ++sfp->pos; |
| if (parser_eof(sfp)) { |
| return SFPARSE_ERR_PARSE; |
| } |
| |
| switch (*sfp->pos) { |
| case '"': |
| case '\\': |
| flags = SFPARSE_VALUE_FLAG_ESCAPED_STRING; |
| |
| break; |
| default: |
| return SFPARSE_ERR_PARSE; |
| } |
| |
| break; |
| case '"': |
| goto fin; |
| default: |
| return SFPARSE_ERR_PARSE; |
| } |
| } |
| #endif /* __AVX2__ */ |
| |
| for (; !parser_eof(sfp); ++sfp->pos) { |
| switch (*sfp->pos) { |
| X20_21_CASES: |
| X23_5B_CASES: |
| X5D_7E_CASES: |
| break; |
| case '\\': |
| ++sfp->pos; |
| if (parser_eof(sfp)) { |
| return SFPARSE_ERR_PARSE; |
| } |
| |
| switch (*sfp->pos) { |
| case '"': |
| case '\\': |
| flags = SFPARSE_VALUE_FLAG_ESCAPED_STRING; |
| |
| break; |
| default: |
| return SFPARSE_ERR_PARSE; |
| } |
| |
| break; |
| case '"': |
| goto fin; |
| default: |
| return SFPARSE_ERR_PARSE; |
| } |
| } |
| |
| return SFPARSE_ERR_PARSE; |
| |
| fin: |
| if (dest) { |
| dest->type = SFPARSE_TYPE_STRING; |
| dest->flags = flags; |
| dest->vec.len = (size_t)(sfp->pos - base); |
| dest->vec.base = dest->vec.len == 0 ? NULL : (uint8_t *)base; |
| } |
| |
| ++sfp->pos; |
| |
| return 0; |
| } |
| |
| #ifdef __AVX2__ |
| static const uint8_t *find_char_token(const uint8_t *first, |
| const uint8_t *last) { |
| /* r0: !..:, excluding "(), |
| r1: A..Z |
| r2: ^..~, excluding {} */ |
| const __m256i r0l = _mm256_set1_epi8('!' - 1); |
| const __m256i r0r = _mm256_set1_epi8(':' + 1); |
| const __m256i dq = _mm256_set1_epi8('"'); |
| const __m256i prl = _mm256_set1_epi8('('); |
| const __m256i prr = _mm256_set1_epi8(')'); |
| const __m256i comma = _mm256_set1_epi8(','); |
| const __m256i r1l = _mm256_set1_epi8('A' - 1); |
| const __m256i r1r = _mm256_set1_epi8('Z' + 1); |
| const __m256i r2l = _mm256_set1_epi8('^' - 1); |
| const __m256i r2r = _mm256_set1_epi8('~' + 1); |
| const __m256i cbl = _mm256_set1_epi8('{'); |
| const __m256i cbr = _mm256_set1_epi8('}'); |
| __m256i s, x; |
| uint32_t m; |
| |
| for (; first != last; first += 32) { |
| s = _mm256_loadu_si256((void *)first); |
| |
| x = _mm256_andnot_si256( |
| _mm256_cmpeq_epi8(s, comma), |
| _mm256_andnot_si256( |
| _mm256_cmpeq_epi8(s, prr), |
| _mm256_andnot_si256( |
| _mm256_cmpeq_epi8(s, prl), |
| _mm256_andnot_si256(_mm256_cmpeq_epi8(s, dq), |
| _mm256_and_si256(_mm256_cmpgt_epi8(s, r0l), |
| _mm256_cmpgt_epi8(r0r, s)))))); |
| x = _mm256_or_si256( |
| _mm256_and_si256(_mm256_cmpgt_epi8(s, r1l), _mm256_cmpgt_epi8(r1r, s)), |
| x); |
| x = _mm256_or_si256( |
| _mm256_andnot_si256( |
| _mm256_cmpeq_epi8(s, cbr), |
| _mm256_andnot_si256(_mm256_cmpeq_epi8(s, cbl), |
| _mm256_and_si256(_mm256_cmpgt_epi8(s, r2l), |
| _mm256_cmpgt_epi8(r2r, s)))), |
| x); |
| |
| m = ~(uint32_t)_mm256_movemask_epi8(x); |
| if (m) { |
| return first + ctz(m); |
| } |
| } |
| |
| return last; |
| } |
| #endif /* __AVX2__ */ |
| |
| static int parser_token(sfparse_parser *sfp, sfparse_value *dest) { |
| const uint8_t *base; |
| #ifdef __AVX2__ |
| const uint8_t *last; |
| #endif /* __AVX2__ */ |
| |
| /* The first byte has already been validated by the caller. */ |
| base = sfp->pos++; |
| |
| #ifdef __AVX2__ |
| if (sfp->end - sfp->pos >= 32) { |
| last = sfp->pos + ((sfp->end - sfp->pos) & ~0x1fu); |
| |
| sfp->pos = find_char_token(sfp->pos, last); |
| if (sfp->pos != last) { |
| goto fin; |
| } |
| } |
| #endif /* __AVX2__ */ |
| |
| for (; !parser_eof(sfp); ++sfp->pos) { |
| switch (*sfp->pos) { |
| TOKEN_CASES: |
| continue; |
| } |
| |
| break; |
| } |
| |
| #ifdef __AVX2__ |
| fin: |
| #endif /* __AVX2__ */ |
| if (dest) { |
| dest->type = SFPARSE_TYPE_TOKEN; |
| dest->flags = SFPARSE_VALUE_FLAG_NONE; |
| dest->vec.base = (uint8_t *)base; |
| dest->vec.len = (size_t)(sfp->pos - base); |
| } |
| |
| return 0; |
| } |
| |
| #ifdef __AVX2__ |
| static const uint8_t *find_char_byteseq(const uint8_t *first, |
| const uint8_t *last) { |
| const __m256i pls = _mm256_set1_epi8('+'); |
| const __m256i fs = _mm256_set1_epi8('/'); |
| const __m256i r0l = _mm256_set1_epi8('0' - 1); |
| const __m256i r0r = _mm256_set1_epi8('9' + 1); |
| const __m256i r1l = _mm256_set1_epi8('A' - 1); |
| const __m256i r1r = _mm256_set1_epi8('Z' + 1); |
| const __m256i r2l = _mm256_set1_epi8('a' - 1); |
| const __m256i r2r = _mm256_set1_epi8('z' + 1); |
| __m256i s, x; |
| uint32_t m; |
| |
| for (; first != last; first += 32) { |
| s = _mm256_loadu_si256((void *)first); |
| |
| x = _mm256_cmpeq_epi8(s, pls); |
| x = _mm256_or_si256(_mm256_cmpeq_epi8(s, fs), x); |
| x = _mm256_or_si256( |
| _mm256_and_si256(_mm256_cmpgt_epi8(s, r0l), _mm256_cmpgt_epi8(r0r, s)), |
| x); |
| x = _mm256_or_si256( |
| _mm256_and_si256(_mm256_cmpgt_epi8(s, r1l), _mm256_cmpgt_epi8(r1r, s)), |
| x); |
| x = _mm256_or_si256( |
| _mm256_and_si256(_mm256_cmpgt_epi8(s, r2l), _mm256_cmpgt_epi8(r2r, s)), |
| x); |
| |
| m = ~(uint32_t)_mm256_movemask_epi8(x); |
| if (m) { |
| return first + ctz(m); |
| } |
| } |
| |
| return last; |
| } |
| #endif /* __AVX2__ */ |
| |
| static int parser_byteseq(sfparse_parser *sfp, sfparse_value *dest) { |
| const uint8_t *base; |
| #ifdef __AVX2__ |
| const uint8_t *last; |
| #endif /* __AVX2__ */ |
| |
| /* The first byte has already been validated by the caller. */ |
| assert(':' == *sfp->pos); |
| |
| base = ++sfp->pos; |
| |
| #ifdef __AVX2__ |
| if (sfp->end - sfp->pos >= 32) { |
| last = sfp->pos + ((sfp->end - sfp->pos) & ~0x1fu); |
| sfp->pos = find_char_byteseq(sfp->pos, last); |
| } |
| #endif /* __AVX2__ */ |
| |
| for (; !parser_eof(sfp); ++sfp->pos) { |
| switch (*sfp->pos) { |
| case '+': |
| case '/': |
| DIGIT_CASES: |
| ALPHA_CASES: |
| continue; |
| case '=': |
| switch ((sfp->pos - base) & 0x3) { |
| case 0: |
| case 1: |
| return SFPARSE_ERR_PARSE; |
| case 2: |
| ++sfp->pos; |
| |
| if (parser_eof(sfp)) { |
| return SFPARSE_ERR_PARSE; |
| } |
| |
| if (*sfp->pos == '=') { |
| ++sfp->pos; |
| } |
| |
| break; |
| case 3: |
| ++sfp->pos; |
| |
| break; |
| } |
| |
| if (parser_eof(sfp) || *sfp->pos != ':') { |
| return SFPARSE_ERR_PARSE; |
| } |
| |
| goto fin; |
| case ':': |
| if (((sfp->pos - base) & 0x3) == 1) { |
| return SFPARSE_ERR_PARSE; |
| } |
| |
| goto fin; |
| default: |
| return SFPARSE_ERR_PARSE; |
| } |
| } |
| |
| return SFPARSE_ERR_PARSE; |
| |
| fin: |
| if (dest) { |
| dest->type = SFPARSE_TYPE_BYTESEQ; |
| dest->flags = SFPARSE_VALUE_FLAG_NONE; |
| dest->vec.len = (size_t)(sfp->pos - base); |
| dest->vec.base = dest->vec.len == 0 ? NULL : (uint8_t *)base; |
| } |
| |
| ++sfp->pos; |
| |
| return 0; |
| } |
| |
| static int parser_boolean(sfparse_parser *sfp, sfparse_value *dest) { |
| int b; |
| |
| /* The first byte has already been validated by the caller. */ |
| assert('?' == *sfp->pos); |
| |
| ++sfp->pos; |
| |
| if (parser_eof(sfp)) { |
| return SFPARSE_ERR_PARSE; |
| } |
| |
| switch (*sfp->pos) { |
| case '0': |
| b = 0; |
| |
| break; |
| case '1': |
| b = 1; |
| |
| break; |
| default: |
| return SFPARSE_ERR_PARSE; |
| } |
| |
| ++sfp->pos; |
| |
| if (dest) { |
| dest->type = SFPARSE_TYPE_BOOLEAN; |
| dest->flags = SFPARSE_VALUE_FLAG_NONE; |
| dest->boolean = b; |
| } |
| |
| return 0; |
| } |
| |
| static int pctdecode(uint8_t *pc, const uint8_t **ppos) { |
| uint8_t c, b = **ppos; |
| |
| switch (b) { |
| DIGIT_CASES: |
| c = (uint8_t)((b - '0') << 4); |
| |
| break; |
| LCHEXALPHA_CASES: |
| c = (uint8_t)((b - 'a' + 10) << 4); |
| |
| break; |
| default: |
| return -1; |
| } |
| |
| b = *++*ppos; |
| |
| switch (b) { |
| DIGIT_CASES: |
| c |= (uint8_t)(b - '0'); |
| |
| break; |
| LCHEXALPHA_CASES: |
| c |= (uint8_t)(b - 'a' + 10); |
| |
| break; |
| default: |
| return -1; |
| } |
| |
| *pc = c; |
| ++*ppos; |
| |
| return 0; |
| } |
| |
| /* Start of utf8 dfa */ |
| /* Copyright (c) 2008-2010 Bjoern Hoehrmann <bjoern@hoehrmann.de> |
| * See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. |
| * |
| * Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de> |
| * |
| * Permission is hereby granted, free of charge, to any person |
| * obtaining a copy of this software and associated documentation |
| * files (the "Software"), to deal in the Software without |
| * restriction, including without limitation the rights to use, copy, |
| * modify, merge, publish, distribute, sublicense, and/or sell copies |
| * of the Software, and to permit persons to whom the Software is |
| * furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice shall be |
| * included in all copies or substantial portions of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
| * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
| * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
| * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| * SOFTWARE. |
| */ |
| #define UTF8_ACCEPT 0 |
| #define UTF8_REJECT 12 |
| |
| /* clang-format off */ |
| static const uint8_t utf8d[] = { |
| /* |
| * The first part of the table maps bytes to character classes that |
| * to reduce the size of the transition table and create bitmasks. |
| */ |
| 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
| 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
| 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
| 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
| 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, |
| 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, |
| 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, |
| 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, |
| |
| /* |
| * The second part is a transition table that maps a combination |
| * of a state of the automaton and a character class to a state. |
| */ |
| 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12, |
| 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12, |
| 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12, |
| 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12, |
| 12,36,12,12,12,12,12,12,12,12,12,12, |
| }; |
| /* clang-format on */ |
| |
| static void utf8_decode(uint32_t *state, uint8_t byte) { |
| *state = utf8d[256 + *state + utf8d[byte]]; |
| } |
| |
| /* End of utf8 dfa */ |
| |
| static int parser_dispstring(sfparse_parser *sfp, sfparse_value *dest) { |
| const uint8_t *base; |
| uint8_t c; |
| uint32_t utf8state = UTF8_ACCEPT; |
| |
| assert('%' == *sfp->pos); |
| |
| ++sfp->pos; |
| |
| if (parser_eof(sfp) || *sfp->pos != '"') { |
| return SFPARSE_ERR_PARSE; |
| } |
| |
| base = ++sfp->pos; |
| |
| for (; !parser_eof(sfp);) { |
| switch (*sfp->pos) { |
| X00_1F_CASES: |
| X7F_FF_CASES: |
| return SFPARSE_ERR_PARSE; |
| case '%': |
| ++sfp->pos; |
| |
| if (sfp->pos + 2 > sfp->end) { |
| return SFPARSE_ERR_PARSE; |
| } |
| |
| if (pctdecode(&c, &sfp->pos) != 0) { |
| return SFPARSE_ERR_PARSE; |
| } |
| |
| utf8_decode(&utf8state, c); |
| if (utf8state == UTF8_REJECT) { |
| return SFPARSE_ERR_PARSE; |
| } |
| |
| break; |
| case '"': |
| if (utf8state != UTF8_ACCEPT) { |
| return SFPARSE_ERR_PARSE; |
| } |
| |
| if (dest) { |
| dest->type = SFPARSE_TYPE_DISPSTRING; |
| dest->flags = SFPARSE_VALUE_FLAG_NONE; |
| dest->vec.len = (size_t)(sfp->pos - base); |
| dest->vec.base = dest->vec.len == 0 ? NULL : (uint8_t *)base; |
| } |
| |
| ++sfp->pos; |
| |
| return 0; |
| default: |
| if (utf8state != UTF8_ACCEPT) { |
| return SFPARSE_ERR_PARSE; |
| } |
| |
| ++sfp->pos; |
| } |
| } |
| |
| return SFPARSE_ERR_PARSE; |
| } |
| |
| static int parser_bare_item(sfparse_parser *sfp, sfparse_value *dest) { |
| switch (*sfp->pos) { |
| case '"': |
| return parser_string(sfp, dest); |
| case '-': |
| DIGIT_CASES: |
| return parser_number(sfp, dest); |
| case '@': |
| return parser_date(sfp, dest); |
| case ':': |
| return parser_byteseq(sfp, dest); |
| case '?': |
| return parser_boolean(sfp, dest); |
| case '*': |
| ALPHA_CASES: |
| return parser_token(sfp, dest); |
| case '%': |
| return parser_dispstring(sfp, dest); |
| default: |
| return SFPARSE_ERR_PARSE; |
| } |
| } |
| |
| static int parser_skip_inner_list(sfparse_parser *sfp); |
| |
| int sfparse_parser_param(sfparse_parser *sfp, sfparse_vec *dest_key, |
| sfparse_value *dest_value) { |
| int rv; |
| |
| switch (sfp->state & SFPARSE_STATE_OP_MASK) { |
| case SFPARSE_STATE_BEFORE: |
| rv = parser_skip_inner_list(sfp); |
| if (rv != 0) { |
| return rv; |
| } |
| |
| /* fall through */ |
| case SFPARSE_STATE_BEFORE_PARAMS: |
| parser_set_op_state(sfp, SFPARSE_STATE_PARAMS); |
| |
| break; |
| case SFPARSE_STATE_PARAMS: |
| break; |
| default: |
| assert(0); |
| abort(); |
| } |
| |
| if (parser_eof(sfp) || *sfp->pos != ';') { |
| parser_set_op_state(sfp, SFPARSE_STATE_AFTER); |
| |
| return SFPARSE_ERR_EOF; |
| } |
| |
| ++sfp->pos; |
| |
| parser_discard_sp(sfp); |
| if (parser_eof(sfp)) { |
| return SFPARSE_ERR_PARSE; |
| } |
| |
| rv = parser_key(sfp, dest_key); |
| if (rv != 0) { |
| return rv; |
| } |
| |
| if (parser_eof(sfp) || *sfp->pos != '=') { |
| if (dest_value) { |
| dest_value->type = SFPARSE_TYPE_BOOLEAN; |
| dest_value->flags = SFPARSE_VALUE_FLAG_NONE; |
| dest_value->boolean = 1; |
| } |
| |
| return 0; |
| } |
| |
| ++sfp->pos; |
| |
| if (parser_eof(sfp)) { |
| return SFPARSE_ERR_PARSE; |
| } |
| |
| return parser_bare_item(sfp, dest_value); |
| } |
| |
| static int parser_skip_params(sfparse_parser *sfp) { |
| int rv; |
| |
| for (;;) { |
| rv = sfparse_parser_param(sfp, NULL, NULL); |
| switch (rv) { |
| case 0: |
| break; |
| case SFPARSE_ERR_EOF: |
| return 0; |
| case SFPARSE_ERR_PARSE: |
| return rv; |
| default: |
| assert(0); |
| abort(); |
| } |
| } |
| } |
| |
| int sfparse_parser_inner_list(sfparse_parser *sfp, sfparse_value *dest) { |
| int rv; |
| |
| switch (sfp->state & SFPARSE_STATE_OP_MASK) { |
| case SFPARSE_STATE_BEFORE: |
| parser_discard_sp(sfp); |
| if (parser_eof(sfp)) { |
| return SFPARSE_ERR_PARSE; |
| } |
| |
| break; |
| case SFPARSE_STATE_BEFORE_PARAMS: |
| rv = parser_skip_params(sfp); |
| if (rv != 0) { |
| return rv; |
| } |
| |
| /* Technically, we are entering SFPARSE_STATE_AFTER, but we will set |
| another state without reading the state. */ |
| /* parser_set_op_state(sfp, SFPARSE_STATE_AFTER); */ |
| |
| /* fall through */ |
| case SFPARSE_STATE_AFTER: |
| if (parser_eof(sfp)) { |
| return SFPARSE_ERR_PARSE; |
| } |
| |
| switch (*sfp->pos) { |
| case ' ': |
| parser_discard_sp(sfp); |
| if (parser_eof(sfp)) { |
| return SFPARSE_ERR_PARSE; |
| } |
| |
| break; |
| case ')': |
| break; |
| default: |
| return SFPARSE_ERR_PARSE; |
| } |
| |
| break; |
| default: |
| assert(0); |
| abort(); |
| } |
| |
| if (*sfp->pos == ')') { |
| ++sfp->pos; |
| |
| parser_unset_inner_list_state(sfp); |
| parser_set_op_state(sfp, SFPARSE_STATE_BEFORE_PARAMS); |
| |
| return SFPARSE_ERR_EOF; |
| } |
| |
| rv = parser_bare_item(sfp, dest); |
| if (rv != 0) { |
| return rv; |
| } |
| |
| parser_set_op_state(sfp, SFPARSE_STATE_BEFORE_PARAMS); |
| |
| return 0; |
| } |
| |
| static int parser_skip_inner_list(sfparse_parser *sfp) { |
| int rv; |
| |
| for (;;) { |
| rv = sfparse_parser_inner_list(sfp, NULL); |
| switch (rv) { |
| case 0: |
| break; |
| case SFPARSE_ERR_EOF: |
| return 0; |
| case SFPARSE_ERR_PARSE: |
| return rv; |
| default: |
| assert(0); |
| abort(); |
| } |
| } |
| } |
| |
| static int parser_next_key_or_item(sfparse_parser *sfp) { |
| parser_discard_ows(sfp); |
| |
| if (parser_eof(sfp)) { |
| return SFPARSE_ERR_EOF; |
| } |
| |
| if (*sfp->pos != ',') { |
| return SFPARSE_ERR_PARSE; |
| } |
| |
| ++sfp->pos; |
| |
| parser_discard_ows(sfp); |
| if (parser_eof(sfp)) { |
| return SFPARSE_ERR_PARSE; |
| } |
| |
| return 0; |
| } |
| |
| static int parser_dict_value(sfparse_parser *sfp, sfparse_value *dest) { |
| int rv; |
| |
| if (parser_eof(sfp) || *(sfp->pos) != '=') { |
| /* Boolean true */ |
| if (dest) { |
| dest->type = SFPARSE_TYPE_BOOLEAN; |
| dest->flags = SFPARSE_VALUE_FLAG_NONE; |
| dest->boolean = 1; |
| } |
| |
| sfp->state = SFPARSE_STATE_DICT_BEFORE_PARAMS; |
| |
| return 0; |
| } |
| |
| ++sfp->pos; |
| |
| if (parser_eof(sfp)) { |
| return SFPARSE_ERR_PARSE; |
| } |
| |
| if (*sfp->pos == '(') { |
| if (dest) { |
| dest->type = SFPARSE_TYPE_INNER_LIST; |
| dest->flags = SFPARSE_VALUE_FLAG_NONE; |
| } |
| |
| ++sfp->pos; |
| |
| sfp->state = SFPARSE_STATE_DICT_INNER_LIST_BEFORE; |
| |
| return 0; |
| } |
| |
| rv = parser_bare_item(sfp, dest); |
| if (rv != 0) { |
| return rv; |
| } |
| |
| sfp->state = SFPARSE_STATE_DICT_BEFORE_PARAMS; |
| |
| return 0; |
| } |
| |
| int sfparse_parser_dict(sfparse_parser *sfp, sfparse_vec *dest_key, |
| sfparse_value *dest_value) { |
| int rv; |
| |
| switch (sfp->state) { |
| case SFPARSE_STATE_DICT_INNER_LIST_BEFORE: |
| rv = parser_skip_inner_list(sfp); |
| if (rv != 0) { |
| return rv; |
| } |
| |
| /* fall through */ |
| case SFPARSE_STATE_DICT_BEFORE_PARAMS: |
| rv = parser_skip_params(sfp); |
| if (rv != 0) { |
| return rv; |
| } |
| |
| /* fall through */ |
| case SFPARSE_STATE_DICT_AFTER: |
| rv = parser_next_key_or_item(sfp); |
| if (rv != 0) { |
| return rv; |
| } |
| |
| break; |
| case SFPARSE_STATE_INITIAL: |
| parser_discard_sp(sfp); |
| |
| if (parser_eof(sfp)) { |
| return SFPARSE_ERR_EOF; |
| } |
| |
| break; |
| default: |
| assert(0); |
| abort(); |
| } |
| |
| rv = parser_key(sfp, dest_key); |
| if (rv != 0) { |
| return rv; |
| } |
| |
| return parser_dict_value(sfp, dest_value); |
| } |
| |
| int sfparse_parser_list(sfparse_parser *sfp, sfparse_value *dest) { |
| int rv; |
| |
| switch (sfp->state) { |
| case SFPARSE_STATE_LIST_INNER_LIST_BEFORE: |
| rv = parser_skip_inner_list(sfp); |
| if (rv != 0) { |
| return rv; |
| } |
| |
| /* fall through */ |
| case SFPARSE_STATE_LIST_BEFORE_PARAMS: |
| rv = parser_skip_params(sfp); |
| if (rv != 0) { |
| return rv; |
| } |
| |
| /* fall through */ |
| case SFPARSE_STATE_LIST_AFTER: |
| rv = parser_next_key_or_item(sfp); |
| if (rv != 0) { |
| return rv; |
| } |
| |
| break; |
| case SFPARSE_STATE_INITIAL: |
| parser_discard_sp(sfp); |
| |
| if (parser_eof(sfp)) { |
| return SFPARSE_ERR_EOF; |
| } |
| |
| break; |
| default: |
| assert(0); |
| abort(); |
| } |
| |
| if (*sfp->pos == '(') { |
| if (dest) { |
| dest->type = SFPARSE_TYPE_INNER_LIST; |
| dest->flags = SFPARSE_VALUE_FLAG_NONE; |
| } |
| |
| ++sfp->pos; |
| |
| sfp->state = SFPARSE_STATE_LIST_INNER_LIST_BEFORE; |
| |
| return 0; |
| } |
| |
| rv = parser_bare_item(sfp, dest); |
| if (rv != 0) { |
| return rv; |
| } |
| |
| sfp->state = SFPARSE_STATE_LIST_BEFORE_PARAMS; |
| |
| return 0; |
| } |
| |
| int sfparse_parser_item(sfparse_parser *sfp, sfparse_value *dest) { |
| int rv; |
| |
| switch (sfp->state) { |
| case SFPARSE_STATE_INITIAL: |
| parser_discard_sp(sfp); |
| |
| if (parser_eof(sfp)) { |
| return SFPARSE_ERR_PARSE; |
| } |
| |
| break; |
| case SFPARSE_STATE_ITEM_INNER_LIST_BEFORE: |
| rv = parser_skip_inner_list(sfp); |
| if (rv != 0) { |
| return rv; |
| } |
| |
| /* fall through */ |
| case SFPARSE_STATE_ITEM_BEFORE_PARAMS: |
| rv = parser_skip_params(sfp); |
| if (rv != 0) { |
| return rv; |
| } |
| |
| /* fall through */ |
| case SFPARSE_STATE_ITEM_AFTER: |
| parser_discard_sp(sfp); |
| |
| if (!parser_eof(sfp)) { |
| return SFPARSE_ERR_PARSE; |
| } |
| |
| return SFPARSE_ERR_EOF; |
| default: |
| assert(0); |
| abort(); |
| } |
| |
| if (*sfp->pos == '(') { |
| if (dest) { |
| dest->type = SFPARSE_TYPE_INNER_LIST; |
| dest->flags = SFPARSE_VALUE_FLAG_NONE; |
| } |
| |
| ++sfp->pos; |
| |
| sfp->state = SFPARSE_STATE_ITEM_INNER_LIST_BEFORE; |
| |
| return 0; |
| } |
| |
| rv = parser_bare_item(sfp, dest); |
| if (rv != 0) { |
| return rv; |
| } |
| |
| sfp->state = SFPARSE_STATE_ITEM_BEFORE_PARAMS; |
| |
| return 0; |
| } |
| |
| void sfparse_parser_init(sfparse_parser *sfp, const uint8_t *data, |
| size_t datalen) { |
| if (datalen == 0) { |
| sfp->pos = sfp->end = NULL; |
| } else { |
| sfp->pos = data; |
| sfp->end = data + datalen; |
| } |
| |
| sfp->state = SFPARSE_STATE_INITIAL; |
| } |
| |
| void sfparse_unescape(sfparse_vec *dest, const sfparse_vec *src) { |
| const uint8_t *p, *q; |
| uint8_t *o; |
| size_t len, slen; |
| |
| if (src->len == 0) { |
| dest->len = 0; |
| |
| return; |
| } |
| |
| o = dest->base; |
| p = src->base; |
| len = src->len; |
| |
| for (;;) { |
| q = memchr(p, '\\', len); |
| if (q == NULL) { |
| memcpy(o, p, len); |
| o += len; |
| |
| dest->len = (size_t)(o - dest->base); |
| |
| return; |
| } |
| |
| slen = (size_t)(q - p); |
| memcpy(o, p, slen); |
| o += slen; |
| |
| p = q + 1; |
| *o++ = *p++; |
| len -= slen + 2; |
| } |
| } |
| |
| void sfparse_base64decode(sfparse_vec *dest, const sfparse_vec *src) { |
| static const int index_tbl[] = { |
| -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
| -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
| -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, |
| 61, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, |
| 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, |
| -1, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, |
| 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
| -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
| -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
| -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
| -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
| -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
| -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
| -1, -1, -1, -1, -1, -1, -1, -1, -1}; |
| uint8_t *o; |
| const uint8_t *p, *end; |
| uint32_t n; |
| size_t i, left; |
| int idx; |
| |
| if (src->len == 0) { |
| dest->len = 0; |
| |
| return; |
| } |
| |
| o = dest->base; |
| p = src->base; |
| left = src->len & 0x3; |
| if (left == 0 && src->base[src->len - 1] == '=') { |
| left = 4; |
| } |
| end = src->base + src->len - left; |
| |
| for (; p != end;) { |
| n = 0; |
| |
| for (i = 1; i <= 4; ++i, ++p) { |
| idx = index_tbl[*p]; |
| |
| assert(idx != -1); |
| |
| n += (uint32_t)(idx << (24 - i * 6)); |
| } |
| |
| *o++ = (uint8_t)(n >> 16); |
| *o++ = (n >> 8) & 0xffu; |
| *o++ = n & 0xffu; |
| } |
| |
| switch (left) { |
| case 0: |
| goto fin; |
| case 1: |
| assert(0); |
| abort(); |
| case 3: |
| if (src->base[src->len - 1] == '=') { |
| left = 2; |
| } |
| |
| break; |
| case 4: |
| assert('=' == src->base[src->len - 1]); |
| |
| if (src->base[src->len - 2] == '=') { |
| left = 2; |
| } else { |
| left = 3; |
| } |
| |
| break; |
| } |
| |
| switch (left) { |
| case 2: |
| *o = (uint8_t)(index_tbl[*p++] << 2); |
| *o++ |= (uint8_t)(index_tbl[*p++] >> 4); |
| |
| break; |
| case 3: |
| n = (uint32_t)(index_tbl[*p++] << 10); |
| n += (uint32_t)(index_tbl[*p++] << 4); |
| n += (uint32_t)(index_tbl[*p++] >> 2); |
| *o++ = (n >> 8) & 0xffu; |
| *o++ = n & 0xffu; |
| |
| break; |
| } |
| |
| fin: |
| dest->len = (size_t)(o - dest->base); |
| } |
| |
| void sfparse_pctdecode(sfparse_vec *dest, const sfparse_vec *src) { |
| const uint8_t *p, *q; |
| uint8_t *o; |
| size_t len, slen; |
| |
| if (src->len == 0) { |
| dest->len = 0; |
| |
| return; |
| } |
| |
| o = dest->base; |
| p = src->base; |
| len = src->len; |
| |
| for (;;) { |
| q = memchr(p, '%', len); |
| if (q == NULL) { |
| memcpy(o, p, len); |
| o += len; |
| |
| dest->len = (size_t)(o - dest->base); |
| |
| return; |
| } |
| |
| slen = (size_t)(q - p); |
| memcpy(o, p, slen); |
| o += slen; |
| |
| p = q + 1; |
| |
| pctdecode(o++, &p); |
| |
| len -= slen + 3; |
| } |
| } |