| /************************************************* |
| * Perl-Compatible Regular Expressions * |
| *************************************************/ |
| |
| /* PCRE is a library of functions to support regular expressions whose syntax |
| and semantics are as close as possible to those of the Perl 5 language. |
| |
| Written by Philip Hazel |
| Copyright (c) 1997-2012 University of Cambridge |
| |
| The machine code generator part (this module) was written by Zoltan Herczeg |
| Copyright (c) 2010-2012 |
| |
| ----------------------------------------------------------------------------- |
| Redistribution and use in source and binary forms, with or without |
| modification, are permitted provided that the following conditions are met: |
| |
| * Redistributions of source code must retain the above copyright notice, |
| this list of conditions and the following disclaimer. |
| |
| * Redistributions in binary form must reproduce the above copyright |
| notice, this list of conditions and the following disclaimer in the |
| documentation and/or other materials provided with the distribution. |
| |
| * Neither the name of the University of Cambridge nor the names of its |
| contributors may be used to endorse or promote products derived from |
| this software without specific prior written permission. |
| |
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| POSSIBILITY OF SUCH DAMAGE. |
| ----------------------------------------------------------------------------- |
| */ |
| |
| #include "config.h" |
| |
| #include "pcre_internal.h" |
| |
| #ifdef SUPPORT_JIT |
| |
| /* All-in-one: Since we use the JIT compiler only from here, |
| we just include it. This way we don't need to touch the build |
| system files. */ |
| |
| #define SLJIT_MALLOC(size) (PUBL(malloc))(size) |
| #define SLJIT_FREE(ptr) (PUBL(free))(ptr) |
| #define SLJIT_CONFIG_AUTO 1 |
| #define SLJIT_CONFIG_STATIC 1 |
| #define SLJIT_VERBOSE 0 |
| #define SLJIT_DEBUG 0 |
| |
| #include "sljit/sljitLir.c" |
| |
| #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED |
| #error Unsupported architecture |
| #endif |
| |
| /* Allocate memory on the stack. Fast, but limited size. */ |
| #define LOCAL_SPACE_SIZE 32768 |
| |
| #define STACK_GROWTH_RATE 8192 |
| |
| /* Enable to check that the allocation could destroy temporaries. */ |
| #if defined SLJIT_DEBUG && SLJIT_DEBUG |
| #define DESTROY_REGISTERS 1 |
| #endif |
| |
| /* |
| Short summary about the backtracking mechanism empolyed by the jit code generator: |
| |
| The code generator follows the recursive nature of the PERL compatible regular |
| expressions. The basic blocks of regular expressions are condition checkers |
| whose execute different commands depending on the result of the condition check. |
| The relationship between the operators can be horizontal (concatenation) and |
| vertical (sub-expression) (See struct backtrack_common for more details). |
| |
| 'ab' - 'a' and 'b' regexps are concatenated |
| 'a+' - 'a' is the sub-expression of the '+' operator |
| |
| The condition checkers are boolean (true/false) checkers. Machine code is generated |
| for the checker itself and for the actions depending on the result of the checker. |
| The 'true' case is called as the try path (expected path), and the other is called as |
| the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken |
| branches on the try path. |
| |
| Greedy star operator (*) : |
| Try path: match happens. |
| Backtrack path: match failed. |
| Non-greedy star operator (*?) : |
| Try path: no need to perform a match. |
| Backtrack path: match is required. |
| |
| The following example shows how the code generated for a capturing bracket |
| with two alternatives. Let A, B, C, D are arbirary regular expressions, and |
| we have the following regular expression: |
| |
| A(B|C)D |
| |
| The generated code will be the following: |
| |
| A try path |
| '(' try path (pushing arguments to the stack) |
| B try path |
| ')' try path (pushing arguments to the stack) |
| D try path |
| return with successful match |
| |
| D backtrack path |
| ')' backtrack path (If we arrived from "C" jump to the backtrack of "C") |
| B backtrack path |
| C expected path |
| jump to D try path |
| C backtrack path |
| A backtrack path |
| |
| Notice, that the order of backtrack code paths are the opposite of the fast |
| code paths. In this way the topmost value on the stack is always belong |
| to the current backtrack code path. The backtrack path must check |
| whether there is a next alternative. If so, it needs to jump back to |
| the try path eventually. Otherwise it needs to clear out its own stack |
| frame and continue the execution on the backtrack code paths. |
| */ |
| |
| /* |
| Saved stack frames: |
| |
| Atomic blocks and asserts require reloading the values of local variables |
| when the backtrack mechanism performed. Because of OP_RECURSE, the locals |
| are not necessarly known in compile time, thus we need a dynamic restore |
| mechanism. |
| |
| The stack frames are stored in a chain list, and have the following format: |
| ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ] |
| |
| Thus we can restore the locals to a particular point in the stack. |
| */ |
| |
| typedef struct jit_arguments { |
| /* Pointers first. */ |
| struct sljit_stack *stack; |
| const pcre_uchar *str; |
| const pcre_uchar *begin; |
| const pcre_uchar *end; |
| int *offsets; |
| pcre_uchar *uchar_ptr; |
| pcre_uchar *mark_ptr; |
| /* Everything else after. */ |
| int offsetcount; |
| int calllimit; |
| pcre_uint8 notbol; |
| pcre_uint8 noteol; |
| pcre_uint8 notempty; |
| pcre_uint8 notempty_atstart; |
| } jit_arguments; |
| |
| typedef struct executable_functions { |
| void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES]; |
| PUBL(jit_callback) callback; |
| void *userdata; |
| sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES]; |
| } executable_functions; |
| |
| typedef struct jump_list { |
| struct sljit_jump *jump; |
| struct jump_list *next; |
| } jump_list; |
| |
| enum stub_types { stack_alloc }; |
| |
| typedef struct stub_list { |
| enum stub_types type; |
| int data; |
| struct sljit_jump *start; |
| struct sljit_label *leave; |
| struct stub_list *next; |
| } stub_list; |
| |
| typedef int (SLJIT_CALL *jit_function)(jit_arguments *args); |
| |
| /* The following structure is the key data type for the recursive |
| code generator. It is allocated by compile_trypath, and contains |
| the aguments for compile_backtrackpath. Must be the first member |
| of its descendants. */ |
| typedef struct backtrack_common { |
| /* Concatenation stack. */ |
| struct backtrack_common *prev; |
| jump_list *nextbacktracks; |
| /* Internal stack (for component operators). */ |
| struct backtrack_common *top; |
| jump_list *topbacktracks; |
| /* Opcode pointer. */ |
| pcre_uchar *cc; |
| } backtrack_common; |
| |
| typedef struct assert_backtrack { |
| backtrack_common common; |
| jump_list *condfailed; |
| /* Less than 0 (-1) if a frame is not needed. */ |
| int framesize; |
| /* Points to our private memory word on the stack. */ |
| int localptr; |
| /* For iterators. */ |
| struct sljit_label *trypath; |
| } assert_backtrack; |
| |
| typedef struct bracket_backtrack { |
| backtrack_common common; |
| /* Where to coninue if an alternative is successfully matched. */ |
| struct sljit_label *alttrypath; |
| /* For rmin and rmax iterators. */ |
| struct sljit_label *recursivetrypath; |
| /* For greedy ? operator. */ |
| struct sljit_label *zerotrypath; |
| /* Contains the branches of a failed condition. */ |
| union { |
| /* Both for OP_COND, OP_SCOND. */ |
| jump_list *condfailed; |
| assert_backtrack *assert; |
| /* For OP_ONCE. -1 if not needed. */ |
| int framesize; |
| } u; |
| /* Points to our private memory word on the stack. */ |
| int localptr; |
| } bracket_backtrack; |
| |
| typedef struct bracketpos_backtrack { |
| backtrack_common common; |
| /* Points to our private memory word on the stack. */ |
| int localptr; |
| /* Reverting stack is needed. */ |
| int framesize; |
| /* Allocated stack size. */ |
| int stacksize; |
| } bracketpos_backtrack; |
| |
| typedef struct braminzero_backtrack { |
| backtrack_common common; |
| struct sljit_label *trypath; |
| } braminzero_backtrack; |
| |
| typedef struct iterator_backtrack { |
| backtrack_common common; |
| /* Next iteration. */ |
| struct sljit_label *trypath; |
| } iterator_backtrack; |
| |
| typedef struct recurse_entry { |
| struct recurse_entry *next; |
| /* Contains the function entry. */ |
| struct sljit_label *entry; |
| /* Collects the calls until the function is not created. */ |
| jump_list *calls; |
| /* Points to the starting opcode. */ |
| int start; |
| } recurse_entry; |
| |
| typedef struct recurse_backtrack { |
| backtrack_common common; |
| } recurse_backtrack; |
| |
| typedef struct compiler_common { |
| struct sljit_compiler *compiler; |
| pcre_uchar *start; |
| |
| /* Opcode local area direct map. */ |
| int *localptrs; |
| int cbraptr; |
| /* OVector starting point. Must be divisible by 2. */ |
| int ovector_start; |
| /* Last known position of the requested byte. */ |
| int req_char_ptr; |
| /* Head of the last recursion. */ |
| int recursive_head; |
| /* First inspected character for partial matching. */ |
| int start_used_ptr; |
| /* Starting pointer for partial soft matches. */ |
| int hit_start; |
| /* End pointer of the first line. */ |
| int first_line_end; |
| /* Points to the marked string. */ |
| int mark_ptr; |
| |
| /* Other */ |
| const pcre_uint8 *fcc; |
| sljit_w lcc; |
| int mode; |
| int nltype; |
| int newline; |
| int bsr_nltype; |
| int endonly; |
| BOOL has_set_som; |
| sljit_w ctypes; |
| sljit_uw name_table; |
| sljit_w name_count; |
| sljit_w name_entry_size; |
| |
| /* Labels and jump lists. */ |
| struct sljit_label *partialmatchlabel; |
| struct sljit_label *leavelabel; |
| struct sljit_label *acceptlabel; |
| stub_list *stubs; |
| recurse_entry *entries; |
| recurse_entry *currententry; |
| jump_list *partialmatch; |
| jump_list *leave; |
| jump_list *accept; |
| jump_list *calllimit; |
| jump_list *stackalloc; |
| jump_list *revertframes; |
| jump_list *wordboundary; |
| jump_list *anynewline; |
| jump_list *hspace; |
| jump_list *vspace; |
| jump_list *casefulcmp; |
| jump_list *caselesscmp; |
| BOOL jscript_compat; |
| #ifdef SUPPORT_UTF |
| BOOL utf; |
| #ifdef SUPPORT_UCP |
| BOOL use_ucp; |
| #endif |
| jump_list *utfreadchar; |
| #ifdef COMPILE_PCRE8 |
| jump_list *utfreadtype8; |
| #endif |
| #endif /* SUPPORT_UTF */ |
| #ifdef SUPPORT_UCP |
| jump_list *getunichartype; |
| jump_list *getunichartype_2; |
| jump_list *getunicharscript; |
| #endif |
| } compiler_common; |
| |
| /* For byte_sequence_compare. */ |
| |
| typedef struct compare_context { |
| int length; |
| int sourcereg; |
| #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED |
| int ucharptr; |
| union { |
| sljit_i asint; |
| sljit_uh asushort; |
| #ifdef COMPILE_PCRE8 |
| sljit_ub asbyte; |
| sljit_ub asuchars[4]; |
| #else |
| #ifdef COMPILE_PCRE16 |
| sljit_uh asuchars[2]; |
| #endif |
| #endif |
| } c; |
| union { |
| sljit_i asint; |
| sljit_uh asushort; |
| #ifdef COMPILE_PCRE8 |
| sljit_ub asbyte; |
| sljit_ub asuchars[4]; |
| #else |
| #ifdef COMPILE_PCRE16 |
| sljit_uh asuchars[2]; |
| #endif |
| #endif |
| } oc; |
| #endif |
| } compare_context; |
| |
| enum { |
| frame_end = 0, |
| frame_setstrbegin = -1, |
| frame_setmark = -2 |
| }; |
| |
| /* Undefine sljit macros. */ |
| #undef CMP |
| |
| /* Used for accessing the elements of the stack. */ |
| #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_w)) |
| |
| #define TMP1 SLJIT_TEMPORARY_REG1 |
| #define TMP2 SLJIT_TEMPORARY_REG3 |
| #define TMP3 SLJIT_TEMPORARY_EREG2 |
| #define STR_PTR SLJIT_SAVED_REG1 |
| #define STR_END SLJIT_SAVED_REG2 |
| #define STACK_TOP SLJIT_TEMPORARY_REG2 |
| #define STACK_LIMIT SLJIT_SAVED_REG3 |
| #define ARGUMENTS SLJIT_SAVED_EREG1 |
| #define CALL_COUNT SLJIT_SAVED_EREG2 |
| #define RETURN_ADDR SLJIT_TEMPORARY_EREG1 |
| |
| /* Locals layout. */ |
| /* These two locals can be used by the current opcode. */ |
| #define LOCALS0 (0 * sizeof(sljit_w)) |
| #define LOCALS1 (1 * sizeof(sljit_w)) |
| /* Two local variables for possessive quantifiers (char1 cannot use them). */ |
| #define POSSESSIVE0 (2 * sizeof(sljit_w)) |
| #define POSSESSIVE1 (3 * sizeof(sljit_w)) |
| /* Max limit of recursions. */ |
| #define CALL_LIMIT (4 * sizeof(sljit_w)) |
| /* The output vector is stored on the stack, and contains pointers |
| to characters. The vector data is divided into two groups: the first |
| group contains the start / end character pointers, and the second is |
| the start pointers when the end of the capturing group has not yet reached. */ |
| #define OVECTOR_START (common->ovector_start) |
| #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_w)) |
| #define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_w)) |
| #define PRIV_DATA(cc) (common->localptrs[(cc) - common->start]) |
| |
| #ifdef COMPILE_PCRE8 |
| #define MOV_UCHAR SLJIT_MOV_UB |
| #define MOVU_UCHAR SLJIT_MOVU_UB |
| #else |
| #ifdef COMPILE_PCRE16 |
| #define MOV_UCHAR SLJIT_MOV_UH |
| #define MOVU_UCHAR SLJIT_MOVU_UH |
| #else |
| #error Unsupported compiling mode |
| #endif |
| #endif |
| |
| /* Shortcuts. */ |
| #define DEFINE_COMPILER \ |
| struct sljit_compiler *compiler = common->compiler |
| #define OP1(op, dst, dstw, src, srcw) \ |
| sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw)) |
| #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \ |
| sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w)) |
| #define LABEL() \ |
| sljit_emit_label(compiler) |
| #define JUMP(type) \ |
| sljit_emit_jump(compiler, (type)) |
| #define JUMPTO(type, label) \ |
| sljit_set_label(sljit_emit_jump(compiler, (type)), (label)) |
| #define JUMPHERE(jump) \ |
| sljit_set_label((jump), sljit_emit_label(compiler)) |
| #define CMP(type, src1, src1w, src2, src2w) \ |
| sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)) |
| #define CMPTO(type, src1, src1w, src2, src2w, label) \ |
| sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label)) |
| #define COND_VALUE(op, dst, dstw, type) \ |
| sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type)) |
| #define GET_LOCAL_BASE(dst, dstw, offset) \ |
| sljit_get_local_base(compiler, (dst), (dstw), (offset)) |
| |
| static pcre_uchar* bracketend(pcre_uchar* cc) |
| { |
| SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND)); |
| do cc += GET(cc, 1); while (*cc == OP_ALT); |
| SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS); |
| cc += 1 + LINK_SIZE; |
| return cc; |
| } |
| |
| /* Functions whose might need modification for all new supported opcodes: |
| next_opcode |
| get_localspace |
| set_localptrs |
| get_framesize |
| init_frame |
| get_localsize |
| copy_locals |
| compile_trypath |
| compile_backtrackpath |
| */ |
| |
| static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc) |
| { |
| SLJIT_UNUSED_ARG(common); |
| switch(*cc) |
| { |
| case OP_SOD: |
| case OP_SOM: |
| case OP_SET_SOM: |
| case OP_NOT_WORD_BOUNDARY: |
| case OP_WORD_BOUNDARY: |
| case OP_NOT_DIGIT: |
| case OP_DIGIT: |
| case OP_NOT_WHITESPACE: |
| case OP_WHITESPACE: |
| case OP_NOT_WORDCHAR: |
| case OP_WORDCHAR: |
| case OP_ANY: |
| case OP_ALLANY: |
| case OP_ANYNL: |
| case OP_NOT_HSPACE: |
| case OP_HSPACE: |
| case OP_NOT_VSPACE: |
| case OP_VSPACE: |
| case OP_EXTUNI: |
| case OP_EODN: |
| case OP_EOD: |
| case OP_CIRC: |
| case OP_CIRCM: |
| case OP_DOLL: |
| case OP_DOLLM: |
| case OP_TYPESTAR: |
| case OP_TYPEMINSTAR: |
| case OP_TYPEPLUS: |
| case OP_TYPEMINPLUS: |
| case OP_TYPEQUERY: |
| case OP_TYPEMINQUERY: |
| case OP_TYPEPOSSTAR: |
| case OP_TYPEPOSPLUS: |
| case OP_TYPEPOSQUERY: |
| case OP_CRSTAR: |
| case OP_CRMINSTAR: |
| case OP_CRPLUS: |
| case OP_CRMINPLUS: |
| case OP_CRQUERY: |
| case OP_CRMINQUERY: |
| case OP_DEF: |
| case OP_BRAZERO: |
| case OP_BRAMINZERO: |
| case OP_BRAPOSZERO: |
| case OP_COMMIT: |
| case OP_FAIL: |
| case OP_ACCEPT: |
| case OP_ASSERT_ACCEPT: |
| case OP_SKIPZERO: |
| return cc + 1; |
| |
| case OP_ANYBYTE: |
| #ifdef SUPPORT_UTF |
| if (common->utf) return NULL; |
| #endif |
| return cc + 1; |
| |
| case OP_CHAR: |
| case OP_CHARI: |
| case OP_NOT: |
| case OP_NOTI: |
| case OP_STAR: |
| case OP_MINSTAR: |
| case OP_PLUS: |
| case OP_MINPLUS: |
| case OP_QUERY: |
| case OP_MINQUERY: |
| case OP_POSSTAR: |
| case OP_POSPLUS: |
| case OP_POSQUERY: |
| case OP_STARI: |
| case OP_MINSTARI: |
| case OP_PLUSI: |
| case OP_MINPLUSI: |
| case OP_QUERYI: |
| case OP_MINQUERYI: |
| case OP_POSSTARI: |
| case OP_POSPLUSI: |
| case OP_POSQUERYI: |
| case OP_NOTSTAR: |
| case OP_NOTMINSTAR: |
| case OP_NOTPLUS: |
| case OP_NOTMINPLUS: |
| case OP_NOTQUERY: |
| case OP_NOTMINQUERY: |
| case OP_NOTPOSSTAR: |
| case OP_NOTPOSPLUS: |
| case OP_NOTPOSQUERY: |
| case OP_NOTSTARI: |
| case OP_NOTMINSTARI: |
| case OP_NOTPLUSI: |
| case OP_NOTMINPLUSI: |
| case OP_NOTQUERYI: |
| case OP_NOTMINQUERYI: |
| case OP_NOTPOSSTARI: |
| case OP_NOTPOSPLUSI: |
| case OP_NOTPOSQUERYI: |
| cc += 2; |
| #ifdef SUPPORT_UTF |
| if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); |
| #endif |
| return cc; |
| |
| case OP_UPTO: |
| case OP_MINUPTO: |
| case OP_EXACT: |
| case OP_POSUPTO: |
| case OP_UPTOI: |
| case OP_MINUPTOI: |
| case OP_EXACTI: |
| case OP_POSUPTOI: |
| case OP_NOTUPTO: |
| case OP_NOTMINUPTO: |
| case OP_NOTEXACT: |
| case OP_NOTPOSUPTO: |
| case OP_NOTUPTOI: |
| case OP_NOTMINUPTOI: |
| case OP_NOTEXACTI: |
| case OP_NOTPOSUPTOI: |
| cc += 2 + IMM2_SIZE; |
| #ifdef SUPPORT_UTF |
| if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); |
| #endif |
| return cc; |
| |
| case OP_NOTPROP: |
| case OP_PROP: |
| return cc + 1 + 2; |
| |
| case OP_TYPEUPTO: |
| case OP_TYPEMINUPTO: |
| case OP_TYPEEXACT: |
| case OP_TYPEPOSUPTO: |
| case OP_REF: |
| case OP_REFI: |
| case OP_CREF: |
| case OP_NCREF: |
| case OP_RREF: |
| case OP_NRREF: |
| case OP_CLOSE: |
| cc += 1 + IMM2_SIZE; |
| return cc; |
| |
| case OP_CRRANGE: |
| case OP_CRMINRANGE: |
| return cc + 1 + 2 * IMM2_SIZE; |
| |
| case OP_CLASS: |
| case OP_NCLASS: |
| return cc + 1 + 32 / sizeof(pcre_uchar); |
| |
| #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 |
| case OP_XCLASS: |
| return cc + GET(cc, 1); |
| #endif |
| |
| case OP_RECURSE: |
| case OP_ASSERT: |
| case OP_ASSERT_NOT: |
| case OP_ASSERTBACK: |
| case OP_ASSERTBACK_NOT: |
| case OP_REVERSE: |
| case OP_ONCE: |
| case OP_ONCE_NC: |
| case OP_BRA: |
| case OP_BRAPOS: |
| case OP_COND: |
| case OP_SBRA: |
| case OP_SBRAPOS: |
| case OP_SCOND: |
| case OP_ALT: |
| case OP_KET: |
| case OP_KETRMAX: |
| case OP_KETRMIN: |
| case OP_KETRPOS: |
| return cc + 1 + LINK_SIZE; |
| |
| case OP_CBRA: |
| case OP_CBRAPOS: |
| case OP_SCBRA: |
| case OP_SCBRAPOS: |
| return cc + 1 + LINK_SIZE + IMM2_SIZE; |
| |
| case OP_MARK: |
| return cc + 1 + 2 + cc[1]; |
| |
| default: |
| return NULL; |
| } |
| } |
| |
| static int get_localspace(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend) |
| { |
| int localspace = 0; |
| pcre_uchar *alternative; |
| /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */ |
| while (cc < ccend) |
| { |
| switch(*cc) |
| { |
| case OP_SET_SOM: |
| common->has_set_som = TRUE; |
| cc += 1; |
| break; |
| |
| case OP_ASSERT: |
| case OP_ASSERT_NOT: |
| case OP_ASSERTBACK: |
| case OP_ASSERTBACK_NOT: |
| case OP_ONCE: |
| case OP_ONCE_NC: |
| case OP_BRAPOS: |
| case OP_SBRA: |
| case OP_SBRAPOS: |
| case OP_SCOND: |
| localspace += sizeof(sljit_w); |
| cc += 1 + LINK_SIZE; |
| break; |
| |
| case OP_CBRAPOS: |
| case OP_SCBRAPOS: |
| localspace += sizeof(sljit_w); |
| cc += 1 + LINK_SIZE + IMM2_SIZE; |
| break; |
| |
| case OP_COND: |
| /* Might be a hidden SCOND. */ |
| alternative = cc + GET(cc, 1); |
| if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) |
| localspace += sizeof(sljit_w); |
| cc += 1 + LINK_SIZE; |
| break; |
| |
| case OP_RECURSE: |
| /* Set its value only once. */ |
| if (common->recursive_head == 0) |
| { |
| common->recursive_head = common->ovector_start; |
| common->ovector_start += sizeof(sljit_w); |
| } |
| cc += 1 + LINK_SIZE; |
| break; |
| |
| case OP_MARK: |
| if (common->mark_ptr == 0) |
| { |
| common->mark_ptr = common->ovector_start; |
| common->ovector_start += sizeof(sljit_w); |
| } |
| cc += 1 + 2 + cc[1]; |
| break; |
| |
| default: |
| cc = next_opcode(common, cc); |
| if (cc == NULL) |
| return -1; |
| break; |
| } |
| } |
| return localspace; |
| } |
| |
| static void set_localptrs(compiler_common *common, int localptr, pcre_uchar *ccend) |
| { |
| pcre_uchar *cc = common->start; |
| pcre_uchar *alternative; |
| while (cc < ccend) |
| { |
| switch(*cc) |
| { |
| case OP_ASSERT: |
| case OP_ASSERT_NOT: |
| case OP_ASSERTBACK: |
| case OP_ASSERTBACK_NOT: |
| case OP_ONCE: |
| case OP_ONCE_NC: |
| case OP_BRAPOS: |
| case OP_SBRA: |
| case OP_SBRAPOS: |
| case OP_SCOND: |
| common->localptrs[cc - common->start] = localptr; |
| localptr += sizeof(sljit_w); |
| cc += 1 + LINK_SIZE; |
| break; |
| |
| case OP_CBRAPOS: |
| case OP_SCBRAPOS: |
| common->localptrs[cc - common->start] = localptr; |
| localptr += sizeof(sljit_w); |
| cc += 1 + LINK_SIZE + IMM2_SIZE; |
| break; |
| |
| case OP_COND: |
| /* Might be a hidden SCOND. */ |
| alternative = cc + GET(cc, 1); |
| if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) |
| { |
| common->localptrs[cc - common->start] = localptr; |
| localptr += sizeof(sljit_w); |
| } |
| cc += 1 + LINK_SIZE; |
| break; |
| |
| default: |
| cc = next_opcode(common, cc); |
| SLJIT_ASSERT(cc != NULL); |
| break; |
| } |
| } |
| } |
| |
| /* Returns with -1 if no need for frame. */ |
| static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive) |
| { |
| pcre_uchar *ccend = bracketend(cc); |
| int length = 0; |
| BOOL possessive = FALSE; |
| BOOL setsom_found = recursive; |
| BOOL setmark_found = recursive; |
| |
| if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS)) |
| { |
| length = 3; |
| possessive = TRUE; |
| } |
| |
| cc = next_opcode(common, cc); |
| SLJIT_ASSERT(cc != NULL); |
| while (cc < ccend) |
| switch(*cc) |
| { |
| case OP_SET_SOM: |
| SLJIT_ASSERT(common->has_set_som); |
| if (!setsom_found) |
| { |
| length += 2; |
| setsom_found = TRUE; |
| } |
| cc += 1; |
| break; |
| |
| case OP_MARK: |
| SLJIT_ASSERT(common->mark_ptr != 0); |
| if (!setmark_found) |
| { |
| length += 2; |
| setmark_found = TRUE; |
| } |
| cc += 1 + 2 + cc[1]; |
| break; |
| |
| case OP_RECURSE: |
| if (common->has_set_som && !setsom_found) |
| { |
| length += 2; |
| setsom_found = TRUE; |
| } |
| if (common->mark_ptr != 0 && !setmark_found) |
| { |
| length += 2; |
| setmark_found = TRUE; |
| } |
| cc += 1 + LINK_SIZE; |
| break; |
| |
| case OP_CBRA: |
| case OP_CBRAPOS: |
| case OP_SCBRA: |
| case OP_SCBRAPOS: |
| length += 3; |
| cc += 1 + LINK_SIZE + IMM2_SIZE; |
| break; |
| |
| default: |
| cc = next_opcode(common, cc); |
| SLJIT_ASSERT(cc != NULL); |
| break; |
| } |
| |
| /* Possessive quantifiers can use a special case. */ |
| if (SLJIT_UNLIKELY(possessive) && length == 3) |
| return -1; |
| |
| if (length > 0) |
| return length + 1; |
| return -1; |
| } |
| |
| static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive) |
| { |
| DEFINE_COMPILER; |
| pcre_uchar *ccend = bracketend(cc); |
| BOOL setsom_found = recursive; |
| BOOL setmark_found = recursive; |
| int offset; |
| |
| /* >= 1 + shortest item size (2) */ |
| SLJIT_UNUSED_ARG(stacktop); |
| SLJIT_ASSERT(stackpos >= stacktop + 2); |
| |
| stackpos = STACK(stackpos); |
| if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)) |
| cc = next_opcode(common, cc); |
| SLJIT_ASSERT(cc != NULL); |
| while (cc < ccend) |
| switch(*cc) |
| { |
| case OP_SET_SOM: |
| SLJIT_ASSERT(common->has_set_som); |
| if (!setsom_found) |
| { |
| OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)); |
| OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin); |
| stackpos += (int)sizeof(sljit_w); |
| OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); |
| stackpos += (int)sizeof(sljit_w); |
| setsom_found = TRUE; |
| } |
| cc += 1; |
| break; |
| |
| case OP_MARK: |
| SLJIT_ASSERT(common->mark_ptr != 0); |
| if (!setmark_found) |
| { |
| OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr); |
| OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark); |
| stackpos += (int)sizeof(sljit_w); |
| OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); |
| stackpos += (int)sizeof(sljit_w); |
| setmark_found = TRUE; |
| } |
| cc += 1 + 2 + cc[1]; |
| break; |
| |
| case OP_RECURSE: |
| if (common->has_set_som && !setsom_found) |
| { |
| OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)); |
| OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin); |
| stackpos += (int)sizeof(sljit_w); |
| OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); |
| stackpos += (int)sizeof(sljit_w); |
| setsom_found = TRUE; |
| } |
| if (common->mark_ptr != 0 && !setmark_found) |
| { |
| OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr); |
| OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark); |
| stackpos += (int)sizeof(sljit_w); |
| OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); |
| stackpos += (int)sizeof(sljit_w); |
| setmark_found = TRUE; |
| } |
| cc += 1 + LINK_SIZE; |
| break; |
| |
| case OP_CBRA: |
| case OP_CBRAPOS: |
| case OP_SCBRA: |
| case OP_SCBRAPOS: |
| offset = (GET2(cc, 1 + LINK_SIZE)) << 1; |
| OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset)); |
| stackpos += (int)sizeof(sljit_w); |
| OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset)); |
| OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1)); |
| OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); |
| stackpos += (int)sizeof(sljit_w); |
| OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0); |
| stackpos += (int)sizeof(sljit_w); |
| |
| cc += 1 + LINK_SIZE + IMM2_SIZE; |
| break; |
| |
| default: |
| cc = next_opcode(common, cc); |
| SLJIT_ASSERT(cc != NULL); |
| break; |
| } |
| |
| OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end); |
| SLJIT_ASSERT(stackpos == STACK(stacktop)); |
| } |
| |
| static SLJIT_INLINE int get_localsize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend) |
| { |
| int localsize = 2; |
| pcre_uchar *alternative; |
| /* Calculate the sum of the local variables. */ |
| while (cc < ccend) |
| { |
| switch(*cc) |
| { |
| case OP_ASSERT: |
| case OP_ASSERT_NOT: |
| case OP_ASSERTBACK: |
| case OP_ASSERTBACK_NOT: |
| case OP_ONCE: |
| case OP_ONCE_NC: |
| case OP_BRAPOS: |
| case OP_SBRA: |
| case OP_SBRAPOS: |
| case OP_SCOND: |
| localsize++; |
| cc += 1 + LINK_SIZE; |
| break; |
| |
| case OP_CBRA: |
| case OP_SCBRA: |
| localsize++; |
| cc += 1 + LINK_SIZE + IMM2_SIZE; |
| break; |
| |
| case OP_CBRAPOS: |
| case OP_SCBRAPOS: |
| localsize += 2; |
| cc += 1 + LINK_SIZE + IMM2_SIZE; |
| break; |
| |
| case OP_COND: |
| /* Might be a hidden SCOND. */ |
| alternative = cc + GET(cc, 1); |
| if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) |
| localsize++; |
| cc += 1 + LINK_SIZE; |
| break; |
| |
| default: |
| cc = next_opcode(common, cc); |
| SLJIT_ASSERT(cc != NULL); |
| break; |
| } |
| } |
| SLJIT_ASSERT(cc == ccend); |
| return localsize; |
| } |
| |
| static void copy_locals(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, |
| BOOL save, int stackptr, int stacktop) |
| { |
| DEFINE_COMPILER; |
| int srcw[2]; |
| int count; |
| BOOL tmp1next = TRUE; |
| BOOL tmp1empty = TRUE; |
| BOOL tmp2empty = TRUE; |
| pcre_uchar *alternative; |
| enum { |
| start, |
| loop, |
| end |
| } status; |
| |
| status = save ? start : loop; |
| stackptr = STACK(stackptr - 2); |
| stacktop = STACK(stacktop - 1); |
| |
| if (!save) |
| { |
| stackptr += sizeof(sljit_w); |
| if (stackptr < stacktop) |
| { |
| OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr); |
| stackptr += sizeof(sljit_w); |
| tmp1empty = FALSE; |
| } |
| if (stackptr < stacktop) |
| { |
| OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr); |
| stackptr += sizeof(sljit_w); |
| tmp2empty = FALSE; |
| } |
| /* The tmp1next must be TRUE in either way. */ |
| } |
| |
| while (status != end) |
| { |
| count = 0; |
| switch(status) |
| { |
| case start: |
| SLJIT_ASSERT(save && common->recursive_head != 0); |
| count = 1; |
| srcw[0] = common->recursive_head; |
| status = loop; |
| break; |
| |
| case loop: |
| if (cc >= ccend) |
| { |
| status = end; |
| break; |
| } |
| |
| switch(*cc) |
| { |
| case OP_ASSERT: |
| case OP_ASSERT_NOT: |
| case OP_ASSERTBACK: |
| case OP_ASSERTBACK_NOT: |
| case OP_ONCE: |
| case OP_ONCE_NC: |
| case OP_BRAPOS: |
| case OP_SBRA: |
| case OP_SBRAPOS: |
| case OP_SCOND: |
| count = 1; |
| srcw[0] = PRIV_DATA(cc); |
| SLJIT_ASSERT(srcw[0] != 0); |
| cc += 1 + LINK_SIZE; |
| break; |
| |
| case OP_CBRA: |
| case OP_SCBRA: |
| count = 1; |
| srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE)); |
| cc += 1 + LINK_SIZE + IMM2_SIZE; |
| break; |
| |
| case OP_CBRAPOS: |
| case OP_SCBRAPOS: |
| count = 2; |
| srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE)); |
| srcw[0] = PRIV_DATA(cc); |
| SLJIT_ASSERT(srcw[0] != 0); |
| cc += 1 + LINK_SIZE + IMM2_SIZE; |
| break; |
| |
| case OP_COND: |
| /* Might be a hidden SCOND. */ |
| alternative = cc + GET(cc, 1); |
| if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) |
| { |
| count = 1; |
| srcw[0] = PRIV_DATA(cc); |
| SLJIT_ASSERT(srcw[0] != 0); |
| } |
| cc += 1 + LINK_SIZE; |
| break; |
| |
| default: |
| cc = next_opcode(common, cc); |
| SLJIT_ASSERT(cc != NULL); |
| break; |
| } |
| break; |
| |
| case end: |
| SLJIT_ASSERT_STOP(); |
| break; |
| } |
| |
| while (count > 0) |
| { |
| count--; |
| if (save) |
| { |
| if (tmp1next) |
| { |
| if (!tmp1empty) |
| { |
| OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0); |
| stackptr += sizeof(sljit_w); |
| } |
| OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]); |
| tmp1empty = FALSE; |
| tmp1next = FALSE; |
| } |
| else |
| { |
| if (!tmp2empty) |
| { |
| OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0); |
| stackptr += sizeof(sljit_w); |
| } |
| OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]); |
| tmp2empty = FALSE; |
| tmp1next = TRUE; |
| } |
| } |
| else |
| { |
| if (tmp1next) |
| { |
| SLJIT_ASSERT(!tmp1empty); |
| OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0); |
| tmp1empty = stackptr >= stacktop; |
| if (!tmp1empty) |
| { |
| OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr); |
| stackptr += sizeof(sljit_w); |
| } |
| tmp1next = FALSE; |
| } |
| else |
| { |
| SLJIT_ASSERT(!tmp2empty); |
| OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0); |
| tmp2empty = stackptr >= stacktop; |
| if (!tmp2empty) |
| { |
| OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr); |
| stackptr += sizeof(sljit_w); |
| } |
| tmp1next = TRUE; |
| } |
| } |
| } |
| } |
| |
| if (save) |
| { |
| if (tmp1next) |
| { |
| if (!tmp1empty) |
| { |
| OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0); |
| stackptr += sizeof(sljit_w); |
| } |
| if (!tmp2empty) |
| { |
| OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0); |
| stackptr += sizeof(sljit_w); |
| } |
| } |
| else |
| { |
| if (!tmp2empty) |
| { |
| OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0); |
| stackptr += sizeof(sljit_w); |
| } |
| if (!tmp1empty) |
| { |
| OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0); |
| stackptr += sizeof(sljit_w); |
| } |
| } |
| } |
| SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty))); |
| } |
| |
| static SLJIT_INLINE BOOL ispowerof2(unsigned int value) |
| { |
| return (value & (value - 1)) == 0; |
| } |
| |
| static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label) |
| { |
| while (list) |
| { |
| /* sljit_set_label is clever enough to do nothing |
| if either the jump or the label is NULL */ |
| sljit_set_label(list->jump, label); |
| list = list->next; |
| } |
| } |
| |
| static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump) |
| { |
| jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list)); |
| if (list_item) |
| { |
| list_item->next = *list; |
| list_item->jump = jump; |
| *list = list_item; |
| } |
| } |
| |
| static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start) |
| { |
| DEFINE_COMPILER; |
| stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list)); |
| |
| if (list_item) |
| { |
| list_item->type = type; |
| list_item->data = data; |
| list_item->start = start; |
| list_item->leave = LABEL(); |
| list_item->next = common->stubs; |
| common->stubs = list_item; |
| } |
| } |
| |
| static void flush_stubs(compiler_common *common) |
| { |
| DEFINE_COMPILER; |
| stub_list* list_item = common->stubs; |
| |
| while (list_item) |
| { |
| JUMPHERE(list_item->start); |
| switch(list_item->type) |
| { |
| case stack_alloc: |
| add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL)); |
| break; |
| } |
| JUMPTO(SLJIT_JUMP, list_item->leave); |
| list_item = list_item->next; |
| } |
| common->stubs = NULL; |
| } |
| |
| static SLJIT_INLINE void decrease_call_count(compiler_common *common) |
| { |
| DEFINE_COMPILER; |
| |
| OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1); |
| add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO)); |
| } |
| |
| static SLJIT_INLINE void allocate_stack(compiler_common *common, int size) |
| { |
| /* May destroy all locals and registers except TMP2. */ |
| DEFINE_COMPILER; |
| |
| OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w)); |
| #ifdef DESTROY_REGISTERS |
| OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345); |
| OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); |
| OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); |
| OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0); |
| OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0); |
| #endif |
| add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0)); |
| } |
| |
| static SLJIT_INLINE void free_stack(compiler_common *common, int size) |
| { |
| DEFINE_COMPILER; |
| OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w)); |
| } |
| |
| static SLJIT_INLINE void reset_ovector(compiler_common *common, int length) |
| { |
| DEFINE_COMPILER; |
| struct sljit_label *loop; |
| int i; |
| /* At this point we can freely use all temporary registers. */ |
| /* TMP1 returns with begin - 1. */ |
| OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1)); |
| if (length < 8) |
| { |
| for (i = 0; i < length; i++) |
| OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_TEMPORARY_REG1, 0); |
| } |
| else |
| { |
| GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, OVECTOR_START - sizeof(sljit_w)); |
| OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, length); |
| loop = LABEL(); |
| OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(sljit_w), SLJIT_TEMPORARY_REG1, 0); |
| OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1); |
| JUMPTO(SLJIT_C_NOT_ZERO, loop); |
| } |
| } |
| |
| static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket) |
| { |
| DEFINE_COMPILER; |
| struct sljit_label *loop; |
| struct sljit_jump *earlyexit; |
| |
| /* At this point we can freely use all registers. */ |
| OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)); |
| OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0); |
| |
| OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, ARGUMENTS, 0); |
| if (common->mark_ptr != 0) |
| OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr); |
| OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount)); |
| if (common->mark_ptr != 0) |
| OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_TEMPORARY_REG3, 0); |
| OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int)); |
| OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin)); |
| GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START); |
| /* Unlikely, but possible */ |
| earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 0); |
| loop = LABEL(); |
| OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_TEMPORARY_REG1, 0); |
| OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_w)); |
| /* Copy the integer value to the output buffer */ |
| #ifdef COMPILE_PCRE16 |
| OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1); |
| #endif |
| OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_SAVED_REG2, 0); |
| OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1); |
| JUMPTO(SLJIT_C_NOT_ZERO, loop); |
| JUMPHERE(earlyexit); |
| |
| /* Calculate the return value, which is the maximum ovector value. */ |
| if (topbracket > 1) |
| { |
| GET_LOCAL_BASE(SLJIT_TEMPORARY_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_w)); |
| OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, topbracket + 1); |
| |
| /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */ |
| loop = LABEL(); |
| OP1(SLJIT_MOVU, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), -(2 * (sljit_w)sizeof(sljit_w))); |
| OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1); |
| CMPTO(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG3, 0, loop); |
| OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_TEMPORARY_REG2, 0); |
| } |
| else |
| OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1); |
| } |
| |
| static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *leave) |
| { |
| DEFINE_COMPILER; |
| |
| SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2); |
| SLJIT_ASSERT(common->start_used_ptr != 0 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0)); |
| |
| OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0); |
| OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL); |
| OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsetcount)); |
| CMPTO(SLJIT_C_LESS, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 2, leave); |
| |
| /* Store match begin and end. */ |
| OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, begin)); |
| OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsets)); |
| OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start); |
| OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0); |
| #ifdef COMPILE_PCRE16 |
| OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1); |
| #endif |
| OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(int), SLJIT_SAVED_REG2, 0); |
| |
| OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG1, 0); |
| #ifdef COMPILE_PCRE16 |
| OP2(SLJIT_ASHR, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1); |
| #endif |
| OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), 0, SLJIT_TEMPORARY_REG3, 0); |
| |
| JUMPTO(SLJIT_JUMP, leave); |
| } |
| |
| static SLJIT_INLINE void check_start_used_ptr(compiler_common *common) |
| { |
| /* May destroy TMP1. */ |
| DEFINE_COMPILER; |
| struct sljit_jump *jump; |
| |
| if (common->mode == JIT_PARTIAL_SOFT_COMPILE) |
| { |
| /* The value of -1 must be kept for start_used_ptr! */ |
| OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1); |
| /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting |
| is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */ |
| jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0); |
| OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0); |
| JUMPHERE(jump); |
| } |
| else if (common->mode == JIT_PARTIAL_HARD_COMPILE) |
| { |
| jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0); |
| OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0); |
| JUMPHERE(jump); |
| } |
| } |
| |
| static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc) |
| { |
| /* Detects if the character has an othercase. */ |
| unsigned int c; |
| |
| #ifdef SUPPORT_UTF |
| if (common->utf) |
| { |
| GETCHAR(c, cc); |
| if (c > 127) |
| { |
| #ifdef SUPPORT_UCP |
| return c != UCD_OTHERCASE(c); |
| #else |
| return FALSE; |
| #endif |
| } |
| #ifndef COMPILE_PCRE8 |
| return common->fcc[c] != c; |
| #endif |
| } |
| else |
| #endif |
| c = *cc; |
| return MAX_255(c) ? common->fcc[c] != c : FALSE; |
| } |
| |
| static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c) |
| { |
| /* Returns with the othercase. */ |
| #ifdef SUPPORT_UTF |
| if (common->utf && c > 127) |
| { |
| #ifdef SUPPORT_UCP |
| return UCD_OTHERCASE(c); |
| #else |
| return c; |
| #endif |
| } |
| #endif |
| return TABLE_GET(c, common->fcc, c); |
| } |
| |
| static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc) |
| { |
| /* Detects if the character and its othercase has only 1 bit difference. */ |
| unsigned int c, oc, bit; |
| #if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
| int n; |
| #endif |
| |
| #ifdef SUPPORT_UTF |
| if (common->utf) |
| { |
| GETCHAR(c, cc); |
| if (c <= 127) |
| oc = common->fcc[c]; |
| else |
| { |
| #ifdef SUPPORT_UCP |
| oc = UCD_OTHERCASE(c); |
| #else |
| oc = c; |
| #endif |
| } |
| } |
| else |
| { |
| c = *cc; |
| oc = TABLE_GET(c, common->fcc, c); |
| } |
| #else |
| c = *cc; |
| oc = TABLE_GET(c, common->fcc, c); |
| #endif |
| |
| SLJIT_ASSERT(c != oc); |
| |
| bit = c ^ oc; |
| /* Optimized for English alphabet. */ |
| if (c <= 127 && bit == 0x20) |
| return (0 << 8) | 0x20; |
| |
| /* Since c != oc, they must have at least 1 bit difference. */ |
| if (!ispowerof2(bit)) |
| return 0; |
| |
| #ifdef COMPILE_PCRE8 |
| |
| #ifdef SUPPORT_UTF |
| if (common->utf && c > 127) |
| { |
| n = GET_EXTRALEN(*cc); |
| while ((bit & 0x3f) == 0) |
| { |
| n--; |
| bit >>= 6; |
| } |
| return (n << 8) | bit; |
| } |
| #endif /* SUPPORT_UTF */ |
| return (0 << 8) | bit; |
| |
| #else /* COMPILE_PCRE8 */ |
| |
| #ifdef COMPILE_PCRE16 |
| #ifdef SUPPORT_UTF |
| if (common->utf && c > 65535) |
| { |
| if (bit >= (1 << 10)) |
| bit >>= 10; |
| else |
| return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8)); |
| } |
| #endif /* SUPPORT_UTF */ |
| return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8)); |
| #endif /* COMPILE_PCRE16 */ |
| |
| #endif /* COMPILE_PCRE8 */ |
| } |
| |
| static void check_partial(compiler_common *common, BOOL force) |
| { |
| /* Checks whether a partial matching is occured. Does not modify registers. */ |
| DEFINE_COMPILER; |
| struct sljit_jump *jump = NULL; |
| |
| SLJIT_ASSERT(!force || common->mode != JIT_COMPILE); |
| |
| if (common->mode == JIT_COMPILE) |
| return; |
| |
| if (!force) |
| jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0); |
| else if (common->mode == JIT_PARTIAL_SOFT_COMPILE) |
| jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1); |
| |
| if (common->mode == JIT_PARTIAL_SOFT_COMPILE) |
| OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1); |
| else |
| { |
| if (common->partialmatchlabel != NULL) |
| JUMPTO(SLJIT_JUMP, common->partialmatchlabel); |
| else |
| add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP)); |
| } |
| |
| if (jump != NULL) |
| JUMPHERE(jump); |
| } |
| |
| static struct sljit_jump *check_str_end(compiler_common *common) |
| { |
| /* Does not affect registers. Usually used in a tight spot. */ |
| DEFINE_COMPILER; |
| struct sljit_jump *jump; |
| struct sljit_jump *nohit; |
| struct sljit_jump *return_value; |
| |
| if (common->mode == JIT_COMPILE) |
| return CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
| |
| jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0); |
| if (common->mode == JIT_PARTIAL_SOFT_COMPILE) |
| { |
| nohit = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0); |
| OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1); |
| JUMPHERE(nohit); |
| return_value = JUMP(SLJIT_JUMP); |
| } |
| else |
| { |
| return_value = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0); |
| if (common->partialmatchlabel != NULL) |
| JUMPTO(SLJIT_JUMP, common->partialmatchlabel); |
| else |
| add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP)); |
| } |
| JUMPHERE(jump); |
| return return_value; |
| } |
| |
| static void detect_partial_match(compiler_common *common, jump_list **backtracks) |
| { |
| DEFINE_COMPILER; |
| struct sljit_jump *jump; |
| |
| if (common->mode == JIT_COMPILE) |
| { |
| add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); |
| return; |
| } |
| |
| /* Partial matching mode. */ |
| jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0); |
| add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0)); |
| if (common->mode == JIT_PARTIAL_SOFT_COMPILE) |
| { |
| OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1); |
| add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); |
| } |
| else |
| { |
| if (common->partialmatchlabel != NULL) |
| JUMPTO(SLJIT_JUMP, common->partialmatchlabel); |
| else |
| add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP)); |
| } |
| JUMPHERE(jump); |
| } |
| |
| static void read_char(compiler_common *common) |
| { |
| /* Reads the character into TMP1, updates STR_PTR. |
| Does not check STR_END. TMP2 Destroyed. */ |
| DEFINE_COMPILER; |
| #ifdef SUPPORT_UTF |
| struct sljit_jump *jump; |
| #endif |
| |
| OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); |
| #ifdef SUPPORT_UTF |
| if (common->utf) |
| { |
| #ifdef COMPILE_PCRE8 |
| jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0); |
| #else |
| #ifdef COMPILE_PCRE16 |
| jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800); |
| #endif |
| #endif /* COMPILE_PCRE8 */ |
| add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL)); |
| JUMPHERE(jump); |
| } |
| #endif |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
| } |
| |
| static void peek_char(compiler_common *common) |
| { |
| /* Reads the character into TMP1, keeps STR_PTR. |
| Does not check STR_END. TMP2 Destroyed. */ |
| DEFINE_COMPILER; |
| #ifdef SUPPORT_UTF |
| struct sljit_jump *jump; |
| #endif |
| |
| OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); |
| #ifdef SUPPORT_UTF |
| if (common->utf) |
| { |
| #ifdef COMPILE_PCRE8 |
| jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0); |
| #else |
| #ifdef COMPILE_PCRE16 |
| jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800); |
| #endif |
| #endif /* COMPILE_PCRE8 */ |
| add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL)); |
| OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
| JUMPHERE(jump); |
| } |
| #endif |
| } |
| |
| static void read_char8_type(compiler_common *common) |
| { |
| /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */ |
| DEFINE_COMPILER; |
| #if defined SUPPORT_UTF || defined COMPILE_PCRE16 |
| struct sljit_jump *jump; |
| #endif |
| |
| #ifdef SUPPORT_UTF |
| if (common->utf) |
| { |
| OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0); |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
| #ifdef COMPILE_PCRE8 |
| /* This can be an extra read in some situations, but hopefully |
| it is needed in most cases. */ |
| OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); |
| jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0); |
| add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL)); |
| JUMPHERE(jump); |
| #else |
| #ifdef COMPILE_PCRE16 |
| OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); |
| jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255); |
| OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); |
| JUMPHERE(jump); |
| /* Skip low surrogate if necessary. */ |
| OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00); |
| OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800); |
| COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL); |
| OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
| #endif |
| #endif /* COMPILE_PCRE8 */ |
| return; |
| } |
| #endif |
| OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0); |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
| #ifdef COMPILE_PCRE16 |
| /* The ctypes array contains only 256 values. */ |
| OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); |
| jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255); |
| #endif |
| OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); |
| #ifdef COMPILE_PCRE16 |
| JUMPHERE(jump); |
| #endif |
| } |
| |
| static void skip_char_back(compiler_common *common) |
| { |
| /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */ |
| DEFINE_COMPILER; |
| #if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
| struct sljit_label *label; |
| |
| if (common->utf) |
| { |
| label = LABEL(); |
| OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1)); |
| OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
| OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0); |
| CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label); |
| return; |
| } |
| #endif |
| #if defined SUPPORT_UTF && defined COMPILE_PCRE16 |
| if (common->utf) |
| { |
| OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1)); |
| OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
| /* Skip low surrogate if necessary. */ |
| OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); |
| OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00); |
| COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL); |
| OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); |
| OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
| return; |
| } |
| #endif |
| OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
| } |
| |
| static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue) |
| { |
| /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */ |
| DEFINE_COMPILER; |
| |
| if (nltype == NLTYPE_ANY) |
| { |
| add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL)); |
| add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO)); |
| } |
| else if (nltype == NLTYPE_ANYCRLF) |
| { |
| OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR); |
| COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL); |
| OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL); |
| COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL); |
| add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO)); |
| } |
| else |
| { |
| SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256); |
| add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline)); |
| } |
| } |
| |
| #ifdef SUPPORT_UTF |
| |
| #ifdef COMPILE_PCRE8 |
| static void do_utfreadchar(compiler_common *common) |
| { |
| /* Fast decoding a UTF-8 character. TMP1 contains the first byte |
| of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */ |
| DEFINE_COMPILER; |
| struct sljit_jump *jump; |
| |
| sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); |
| /* Searching for the first zero. */ |
| OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20); |
| jump = JUMP(SLJIT_C_NOT_ZERO); |
| /* Two byte sequence. */ |
| OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
| OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f); |
| OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); |
| OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
| OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
| OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); |
| sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
| JUMPHERE(jump); |
| |
| OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10); |
| jump = JUMP(SLJIT_C_NOT_ZERO); |
| /* Three byte sequence. */ |
| OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); |
| OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f); |
| OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12); |
| OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
| OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); |
| OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
| OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2)); |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); |
| OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
| OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
| OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2)); |
| sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
| JUMPHERE(jump); |
| |
| /* Four byte sequence. */ |
| OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); |
| OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07); |
| OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18); |
| OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
| OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12); |
| OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
| OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2)); |
| OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
| OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); |
| OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
| OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3)); |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3)); |
| OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
| OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
| OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3)); |
| sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
| } |
| |
| static void do_utfreadtype8(compiler_common *common) |
| { |
| /* Fast decoding a UTF-8 character type. TMP2 contains the first byte |
| of the character (>= 0xc0). Return value in TMP1. */ |
| DEFINE_COMPILER; |
| struct sljit_jump *jump; |
| struct sljit_jump *compare; |
| |
| sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); |
| |
| OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20); |
| jump = JUMP(SLJIT_C_NOT_ZERO); |
| /* Two byte sequence. */ |
| OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
| OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f); |
| OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); |
| OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); |
| OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0); |
| compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255); |
| OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); |
| sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
| |
| JUMPHERE(compare); |
| OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); |
| sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
| JUMPHERE(jump); |
| |
| /* We only have types for characters less than 256. */ |
| OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(utf8_table4) - 0xc0); |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
| OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); |
| sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
| } |
| |
| #else /* COMPILE_PCRE8 */ |
| |
| #ifdef COMPILE_PCRE16 |
| static void do_utfreadchar(compiler_common *common) |
| { |
| /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char |
| of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */ |
| DEFINE_COMPILER; |
| struct sljit_jump *jump; |
| |
| sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); |
| jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00); |
| /* Do nothing, only return. */ |
| sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
| |
| JUMPHERE(jump); |
| /* Combine two 16 bit characters. */ |
| OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
| OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff); |
| OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10); |
| OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff); |
| OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
| OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); |
| OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000); |
| sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
| } |
| #endif /* COMPILE_PCRE16 */ |
| |
| #endif /* COMPILE_PCRE8 */ |
| |
| #endif /* SUPPORT_UTF */ |
| |
| #ifdef SUPPORT_UCP |
| |
| static sljit_w SLJIT_CALL getunichartype(sljit_w c) |
| { |
| return (sljit_w)(unsigned int)UCD_CHARTYPE((unsigned int)c); |
| } |
| |
| static sljit_w SLJIT_CALL getunicharscript(sljit_w c) |
| { |
| return (sljit_w)(unsigned int)UCD_SCRIPT((unsigned int)c); |
| } |
| |
| static void do_getunichartype(compiler_common *common) |
| { |
| /* Character comes in TMP1. Returns chartype in TMP1 */ |
| DEFINE_COMPILER; |
| |
| sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); |
| /* Save registers */ |
| OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0); |
| sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(getunichartype)); |
| /* Restore registers */ |
| OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); |
| sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
| } |
| |
| static void do_getunichartype_2(compiler_common *common) |
| { |
| /* Character comes in TMP1. Returns chartype in TMP1 */ |
| DEFINE_COMPILER; |
| |
| sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); |
| /* Save registers */ |
| OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STACK_TOP, 0); |
| sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(getunichartype)); |
| /* Restore registers */ |
| OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0); |
| sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
| } |
| |
| static void do_getunicharscript(compiler_common *common) |
| { |
| /* Character comes in TMP1. Returns chartype in TMP1 */ |
| DEFINE_COMPILER; |
| |
| sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); |
| /* Save registers */ |
| OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0); |
| sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(getunicharscript)); |
| /* Restore registers */ |
| OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); |
| sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
| } |
| #endif |
| |
| static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline) |
| { |
| DEFINE_COMPILER; |
| struct sljit_label *mainloop; |
| struct sljit_label *newlinelabel = NULL; |
| struct sljit_jump *start; |
| struct sljit_jump *end = NULL; |
| struct sljit_jump *nl = NULL; |
| #ifdef SUPPORT_UTF |
| struct sljit_jump *singlechar; |
| #endif |
| jump_list *newline = NULL; |
| BOOL newlinecheck = FALSE; |
| BOOL readuchar = FALSE; |
| |
| if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY || |
| common->nltype == NLTYPE_ANYCRLF || common->newline > 255)) |
| newlinecheck = TRUE; |
| |
| if (firstline) |
| { |
| /* Search for the end of the first line. */ |
| SLJIT_ASSERT(common->first_line_end != 0); |
| OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STR_PTR, 0); |
| OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_END, 0); |
| |
| if (common->nltype == NLTYPE_FIXED && common->newline > 255) |
| { |
| mainloop = LABEL(); |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
| end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
| OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); |
| OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
| CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop); |
| CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop); |
| OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
| } |
| else |
| { |
| end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
| mainloop = LABEL(); |
| /* Continual stores does not cause data dependency. */ |
| OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0); |
| read_char(common); |
| check_newlinechar(common, common->nltype, &newline, TRUE); |
| CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop); |
| OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0); |
| set_jumps(newline, LABEL()); |
| } |
| |
| JUMPHERE(end); |
| OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); |
| } |
| |
| start = JUMP(SLJIT_JUMP); |
| |
| if (newlinecheck) |
| { |
| newlinelabel = LABEL(); |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
| end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
| OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); |
| OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff); |
| COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL); |
| #ifdef COMPILE_PCRE16 |
| OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); |
| #endif |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
| nl = JUMP(SLJIT_JUMP); |
| } |
| |
| mainloop = LABEL(); |
| |
| /* Increasing the STR_PTR here requires one less jump in the most common case. */ |
| #ifdef SUPPORT_UTF |
| if (common->utf) readuchar = TRUE; |
| #endif |
| if (newlinecheck) readuchar = TRUE; |
| |
| if (readuchar) |
| OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); |
| |
| if (newlinecheck) |
| CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel); |
| |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
| #if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
| if (common->utf) |
| { |
| singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0); |
| OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0); |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
| JUMPHERE(singlechar); |
| } |
| #endif |
| #if defined SUPPORT_UTF && defined COMPILE_PCRE16 |
| if (common->utf) |
| { |
| singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800); |
| OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); |
| OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800); |
| COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL); |
| OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
| JUMPHERE(singlechar); |
| } |
| #endif |
| JUMPHERE(start); |
| |
| if (newlinecheck) |
| { |
| JUMPHERE(end); |
| JUMPHERE(nl); |
| } |
| |
| return mainloop; |
| } |
| |
| static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline) |
| { |
| DEFINE_COMPILER; |
| struct sljit_label *start; |
| struct sljit_jump *leave; |
| struct sljit_jump *found; |
| pcre_uchar oc, bit; |
| |
| if (firstline) |
| { |
| OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0); |
| OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end); |
| } |
| |
| start = LABEL(); |
| leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
| OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); |
| |
| oc = first_char; |
| if (caseless) |
| { |
| oc = TABLE_GET(first_char, common->fcc, first_char); |
| #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) |
| if (first_char > 127 && common->utf) |
| oc = UCD_OTHERCASE(first_char); |
| #endif |
| } |
| if (first_char == oc) |
| found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char); |
| else |
| { |
| bit = first_char ^ oc; |
| if (ispowerof2(bit)) |
| { |
| OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit); |
| found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit); |
| } |
| else |
| { |
| OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char); |
| COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL); |
| OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc); |
| COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL); |
| found = JUMP(SLJIT_C_NOT_ZERO); |
| } |
| } |
| |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
| #if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
| if (common->utf) |
| { |
| CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start); |
| OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0); |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
| } |
| #endif |
| #if defined SUPPORT_UTF && defined COMPILE_PCRE16 |
| if (common->utf) |
| { |
| CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start); |
| OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); |
| OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800); |
| COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL); |
| OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
| } |
| #endif |
| JUMPTO(SLJIT_JUMP, start); |
| JUMPHERE(found); |
| JUMPHERE(leave); |
| |
| if (firstline) |
| OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0); |
| } |
| |
| static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline) |
| { |
| DEFINE_COMPILER; |
| struct sljit_label *loop; |
| struct sljit_jump *lastchar; |
| struct sljit_jump *firstchar; |
| struct sljit_jump *leave; |
| struct sljit_jump *foundcr = NULL; |
| struct sljit_jump *notfoundnl; |
| jump_list *newline = NULL; |
| |
| if (firstline) |
| { |
| OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0); |
| OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end); |
| } |
| |
| if (common->nltype == NLTYPE_FIXED && common->newline > 255) |
| { |
| lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
| OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); |
| OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); |
| OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); |
| firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0); |
| |
| OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2)); |
| OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0); |
| COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER_EQUAL); |
| #ifdef COMPILE_PCRE16 |
| OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); |
| #endif |
| OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
| |
| loop = LABEL(); |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
| leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
| OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); |
| OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); |
| CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop); |
| CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop); |
| |
| JUMPHERE(leave); |
| JUMPHERE(firstchar); |
| JUMPHERE(lastchar); |
| |
| if (firstline) |
| OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0); |
| return; |
| } |
| |
| OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); |
| OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); |
| firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0); |
| skip_char_back(common); |
| |
| loop = LABEL(); |
| read_char(common); |
| lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
| if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF) |
| foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); |
| check_newlinechar(common, common->nltype, &newline, FALSE); |
| set_jumps(newline, loop); |
| |
| if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF) |
| { |
| leave = JUMP(SLJIT_JUMP); |
| JUMPHERE(foundcr); |
| notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
| OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); |
| OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL); |
| COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL); |
| #ifdef COMPILE_PCRE16 |
| OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); |
| #endif |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
| JUMPHERE(notfoundnl); |
| JUMPHERE(leave); |
| } |
| JUMPHERE(lastchar); |
| JUMPHERE(firstchar); |
| |
| if (firstline) |
| OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0); |
| } |
| |
| static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline) |
| { |
| DEFINE_COMPILER; |
| struct sljit_label *start; |
| struct sljit_jump *leave; |
| struct sljit_jump *found; |
| #ifndef COMPILE_PCRE8 |
| struct sljit_jump *jump; |
| #endif |
| |
| if (firstline) |
| { |
| OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0); |
| OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end); |
| } |
| |
| start = LABEL(); |
| leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
| OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); |
| #ifdef SUPPORT_UTF |
| if (common->utf) |
| OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); |
| #endif |
| #ifndef COMPILE_PCRE8 |
| jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255); |
| OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255); |
| JUMPHERE(jump); |
| #endif |
| OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); |
| OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); |
| OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits); |
| OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); |
| OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); |
| found = JUMP(SLJIT_C_NOT_ZERO); |
| |
| #ifdef SUPPORT_UTF |
| if (common->utf) |
| OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); |
| #endif |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
| #if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
| if (common->utf) |
| { |
| CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start); |
| OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0); |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
| } |
| #endif |
| #if defined SUPPORT_UTF && defined COMPILE_PCRE16 |
| if (common->utf) |
| { |
| CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start); |
| OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); |
| OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800); |
| COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL); |
| OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
| } |
| #endif |
| JUMPTO(SLJIT_JUMP, start); |
| JUMPHERE(found); |
| JUMPHERE(leave); |
| |
| if (firstline) |
| OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0); |
| } |
| |
| static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar) |
| { |
| DEFINE_COMPILER; |
| struct sljit_label *loop; |
| struct sljit_jump *toolong; |
| struct sljit_jump *alreadyfound; |
| struct sljit_jump *found; |
| struct sljit_jump *foundoc = NULL; |
| struct sljit_jump *notfound; |
| pcre_uchar oc, bit; |
| |
| SLJIT_ASSERT(common->req_char_ptr != 0); |
| OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr); |
| OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX); |
| toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0); |
| alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0); |
| |
| if (has_firstchar) |
| OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
| else |
| OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0); |
| |
| loop = LABEL(); |
| notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0); |
| |
| OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0); |
| oc = req_char; |
| if (caseless) |
| { |
| oc = TABLE_GET(req_char, common->fcc, req_char); |
| #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) |
| if (req_char > 127 && common->utf) |
| oc = UCD_OTHERCASE(req_char); |
| #endif |
| } |
| if (req_char == oc) |
| found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char); |
| else |
| { |
| bit = req_char ^ oc; |
| if (ispowerof2(bit)) |
| { |
| OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit); |
| found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit); |
| } |
| else |
| { |
| found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char); |
| foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc); |
| } |
| } |
| OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); |
| JUMPTO(SLJIT_JUMP, loop); |
| |
| JUMPHERE(found); |
| if (foundoc) |
| JUMPHERE(foundoc); |
| OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0); |
| JUMPHERE(alreadyfound); |
| JUMPHERE(toolong); |
| return notfound; |
| } |
| |
| static void do_revertframes(compiler_common *common) |
| { |
| DEFINE_COMPILER; |
| struct sljit_jump *jump; |
| struct sljit_label *mainloop; |
| |
| sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); |
| OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0); |
| GET_LOCAL_BASE(TMP3, 0, 0); |
| |
| /* Drop frames until we reach STACK_TOP. */ |
| mainloop = LABEL(); |
| OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0); |
| jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end); |
| OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0); |
| OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_w)); |
| OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_w), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_w)); |
| OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_w)); |
| JUMPTO(SLJIT_JUMP, mainloop); |
| |
| JUMPHERE(jump); |
| jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_end); |
| /* End of dropping frames. */ |
| sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
| |
| JUMPHERE(jump); |
| jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setstrbegin); |
| /* Set string begin. */ |
| OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w)); |
| OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w)); |
| OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0); |
| JUMPTO(SLJIT_JUMP, mainloop); |
| |
| JUMPHERE(jump); |
| if (common->mark_ptr != 0) |
| { |
| jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setmark); |
| OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w)); |
| OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w)); |
| OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0); |
| JUMPTO(SLJIT_JUMP, mainloop); |
| |
| JUMPHERE(jump); |
| } |
| |
| /* Unknown command. */ |
| OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w)); |
| JUMPTO(SLJIT_JUMP, mainloop); |
| } |
| |
| static void check_wordboundary(compiler_common *common) |
| { |
| DEFINE_COMPILER; |
| struct sljit_jump *skipread; |
| #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF |
| struct sljit_jump *jump; |
| #endif |
| |
| SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16); |
| |
| sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); |
| /* Get type of the previous char, and put it to LOCALS1. */ |
| OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); |
| OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); |
| OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0); |
| skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0); |
| skip_char_back(common); |
| check_start_used_ptr(common); |
| read_char(common); |
| |
| /* Testing char type. */ |
| #ifdef SUPPORT_UCP |
| if (common->use_ucp) |
| { |
| OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1); |
| jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE); |
| add_jump(compiler, &common->getunichartype_2, JUMP(SLJIT_FAST_CALL)); |
| OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll); |
| OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll); |
| COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL); |
| OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll); |
| OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd); |
| COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL); |
| JUMPHERE(jump); |
| OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0); |
| } |
| else |
| #endif |
| { |
| #ifndef COMPILE_PCRE8 |
| jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); |
| #elif defined SUPPORT_UTF |
| /* Here LOCALS1 has already been zeroed. */ |
| jump = NULL; |
| if (common->utf) |
| jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); |
| #endif /* COMPILE_PCRE8 */ |
| OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes); |
| OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */); |
| OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); |
| OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0); |
| #ifndef COMPILE_PCRE8 |
| JUMPHERE(jump); |
| #elif defined SUPPORT_UTF |
| if (jump != NULL) |
| JUMPHERE(jump); |
| #endif /* COMPILE_PCRE8 */ |
| } |
| JUMPHERE(skipread); |
| |
| OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0); |
| skipread = check_str_end(common); |
| peek_char(common); |
| |
| /* Testing char type. This is a code duplication. */ |
| #ifdef SUPPORT_UCP |
| if (common->use_ucp) |
| { |
| OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1); |
| jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE); |
| add_jump(compiler, &common->getunichartype_2, JUMP(SLJIT_FAST_CALL)); |
| OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll); |
| OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll); |
| COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL); |
| OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll); |
| OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd); |
| COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL); |
| JUMPHERE(jump); |
| } |
| else |
| #endif |
| { |
| #ifndef COMPILE_PCRE8 |
| /* TMP2 may be destroyed by peek_char. */ |
| OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0); |
| jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); |
| #elif defined SUPPORT_UTF |
| OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0); |
| jump = NULL; |
| if (common->utf) |
| jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); |
| #endif |
| OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes); |
| OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */); |
| OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); |
| #ifndef COMPILE_PCRE8 |
| JUMPHERE(jump); |
| #elif defined SUPPORT_UTF |
| if (jump != NULL) |
| JUMPHERE(jump); |
| #endif /* COMPILE_PCRE8 */ |
| } |
| JUMPHERE(skipread); |
| |
| OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1); |
| sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); |
| } |
| |
| static void check_anynewline(compiler_common *common) |
| { |
| /* Check whether TMP1 contains a newline character. TMP2 destroyed. */ |
| DEFINE_COMPILER; |
| |
| sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); |
| |
| OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a); |
| OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a); |
| COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL); |
| OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a); |
| #if defined SUPPORT_UTF || defined COMPILE_PCRE16 |
| #ifdef COMPILE_PCRE8 |
| if (common->utf) |
| { |
| #endif |
| COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL); |
| OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1); |
| OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a); |
| #ifdef COMPILE_PCRE8 |
| } |
| #endif |
| #endif /* SUPPORT_UTF || COMPILE_PCRE16 */ |
| COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL); |
| sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
| } |
| |
| static void check_hspace(compiler_common *common) |
| { |
| /* Check whether TMP1 contains a newline character. TMP2 destroyed. */ |
| DEFINE_COMPILER; |
| |
| sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); |
| |
| OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09); |
| COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL); |
| OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20); |
| COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL); |
| OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0); |
| #if defined SUPPORT_UTF || defined COMPILE_PCRE16 |
| #ifdef COMPILE_PCRE8 |
| if (common->utf) |
| { |
| #endif |
| COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL); |
| OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680); |
| COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL); |
| OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e); |
| COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL); |
| OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000); |
| OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000); |
| COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL); |
| OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000); |
| COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL); |
| OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000); |
| COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL); |
| OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000); |
| #ifdef COMPILE_PCRE8 |
| } |
| #endif |
| #endif /* SUPPORT_UTF || COMPILE_PCRE16 */ |
| COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL); |
| |
| sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
| } |
| |
| static void check_vspace(compiler_common *common) |
| { |
| /* Check whether TMP1 contains a newline character. TMP2 destroyed. */ |
| DEFINE_COMPILER; |
| |
| sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); |
| |
| OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a); |
| OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a); |
| COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL); |
| OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a); |
| #if defined SUPPORT_UTF || defined COMPILE_PCRE16 |
| #ifdef COMPILE_PCRE8 |
| if (common->utf) |
| { |
| #endif |
| COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL); |
| OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1); |
| OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a); |
| #ifdef COMPILE_PCRE8 |
| } |
| #endif |
| #endif /* SUPPORT_UTF || COMPILE_PCRE16 */ |
| COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL); |
| |
| sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
| } |
| |
| #define CHAR1 STR_END |
| #define CHAR2 STACK_TOP |
| |
| static void do_casefulcmp(compiler_common *common) |
| { |
| DEFINE_COMPILER; |
| struct sljit_jump *jump; |
| struct sljit_label *label; |
| |
| sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); |
| OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
| OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0); |
| OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0); |
| OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); |
| OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
| |
| label = LABEL(); |
| OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1)); |
| OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); |
| jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0); |
| OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); |
| JUMPTO(SLJIT_C_NOT_ZERO, label); |
| |
| JUMPHERE(jump); |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
| OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0); |
| OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); |
| sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
| } |
| |
| #define LCC_TABLE STACK_LIMIT |
| |
| static void do_caselesscmp(compiler_common *common) |
| { |
| DEFINE_COMPILER; |
| struct sljit_jump *jump; |
| struct sljit_label *label; |
| |
| sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); |
| OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
| |
| OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0); |
| OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0); |
| OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0); |
| OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc); |
| OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); |
| OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
| |
| label = LABEL(); |
| OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1)); |
| OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); |
| #ifndef COMPILE_PCRE8 |
| jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255); |
| #endif |
| OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0); |
| #ifndef COMPILE_PCRE8 |
| JUMPHERE(jump); |
| jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255); |
| #endif |
| OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0); |
| #ifndef COMPILE_PCRE8 |
| JUMPHERE(jump); |
| #endif |
| jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0); |
| OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); |
| JUMPTO(SLJIT_C_NOT_ZERO, label); |
| |
| JUMPHERE(jump); |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
| OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0); |
| OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); |
| OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1); |
| sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
| } |
| |
| #undef LCC_TABLE |
| #undef CHAR1 |
| #undef CHAR2 |
| |
| #if defined SUPPORT_UTF && defined SUPPORT_UCP |
| |
| static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1) |
| { |
| /* This function would be ineffective to do in JIT level. */ |
| int c1, c2; |
| const pcre_uchar *src2 = args->uchar_ptr; |
| const pcre_uchar *end2 = args->end; |
| |
| while (src1 < end1) |
| { |
| if (src2 >= end2) |
| return (pcre_uchar*)1; |
| GETCHARINC(c1, src1); |
| GETCHARINC(c2, src2); |
| if (c1 != c2 && c1 != UCD_OTHERCASE(c2)) return NULL; |
| } |
| return src2; |
| } |
| |
| #endif /* SUPPORT_UTF && SUPPORT_UCP */ |
| |
| static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc, |
| compare_context* context, jump_list **backtracks) |
| { |
| DEFINE_COMPILER; |
| unsigned int othercasebit = 0; |
| pcre_uchar *othercasechar = NULL; |
| #ifdef SUPPORT_UTF |
| int utflength; |
| #endif |
| |
| if (caseless && char_has_othercase(common, cc)) |
| { |
| othercasebit = char_get_othercase_bit(common, cc); |
| SLJIT_ASSERT(othercasebit); |
| /* Extracting bit difference info. */ |
| #ifdef COMPILE_PCRE8 |
| othercasechar = cc + (othercasebit >> 8); |
| othercasebit &= 0xff; |
| #else |
| #ifdef COMPILE_PCRE16 |
| othercasechar = cc + (othercasebit >> 9); |
| if ((othercasebit & 0x100) != 0) |
| othercasebit = (othercasebit & 0xff) << 8; |
| else |
| othercasebit &= 0xff; |
| #endif |
| #endif |
| } |
| |
| if (context->sourcereg == -1) |
| { |
| #ifdef COMPILE_PCRE8 |
| #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED |
| if (context->length >= 4) |
| OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); |
| else if (context->length >= 2) |
| OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); |
| else |
| #endif |
| OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); |
| #else |
| #ifdef COMPILE_PCRE16 |
| #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED |
| if (context->length >= 4) |
| OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); |
| else |
| #endif |
| OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); |
| #endif |
| #endif /* COMPILE_PCRE8 */ |
| context->sourcereg = TMP2; |
| } |
| |
| #ifdef SUPPORT_UTF |
| utflength = 1; |
| if (common->utf && HAS_EXTRALEN(*cc)) |
| utflength += GET_EXTRALEN(*cc); |
| |
| do |
| { |
| #endif |
| |
| context->length -= IN_UCHARS(1); |
| #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED |
| |
| /* Unaligned read is supported. */ |
| if (othercasebit != 0 && othercasechar == cc) |
| { |
| context->c.asuchars[context->ucharptr] = *cc | othercasebit; |
| context->oc.asuchars[context->ucharptr] = othercasebit; |
| } |
| else |
| { |
| context->c.asuchars[context->ucharptr] = *cc; |
|