| /* vi:set ts=8 sts=4 sw=4: |
| * |
| * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE |
| * |
| * This is NOT the original regular expression code as written by Henry |
| * Spencer. This code has been modified specifically for use with Vim, and |
| * should not be used apart from compiling Vim. If you want a good regular |
| * expression library, get the original code. |
| * |
| * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE |
| */ |
| |
| #ifndef _REGEXP_H |
| #define _REGEXP_H |
| |
| /* |
| * The number of sub-matches is limited to 10. |
| * The first one (index 0) is the whole match, referenced with "\0". |
| * The second one (index 1) is the first sub-match, referenced with "\1". |
| * This goes up to the tenth (index 9), referenced with "\9". |
| */ |
| #define NSUBEXP 10 |
| |
| /* |
| * In the NFA engine: how many braces are allowed. |
| * TODO(RE): Use dynamic memory allocation instead of static, like here |
| */ |
| #define NFA_MAX_BRACES 20 |
| |
| /* |
| * In the NFA engine: how many states are allowed |
| */ |
| #define NFA_MAX_STATES 100000 |
| #define NFA_TOO_EXPENSIVE -1 |
| |
| /* Which regexp engine to use? Needed for vim_regcomp(). |
| * Must match with 'regexpengine'. */ |
| #define AUTOMATIC_ENGINE 0 |
| #define BACKTRACKING_ENGINE 1 |
| #define NFA_ENGINE 2 |
| |
| typedef struct regengine regengine_T; |
| |
| /* |
| * Structure returned by vim_regcomp() to pass on to vim_regexec(). |
| * This is the general structure. For the actual matcher, two specific |
| * structures are used. See code below. |
| */ |
| typedef struct regprog |
| { |
| regengine_T *engine; |
| unsigned regflags; |
| unsigned re_engine; /* automatic, backtracking or nfa engine */ |
| unsigned re_flags; /* second argument for vim_regcomp() */ |
| } regprog_T; |
| |
| /* |
| * Structure used by the back track matcher. |
| * These fields are only to be used in regexp.c! |
| * See regexp.c for an explanation. |
| */ |
| typedef struct |
| { |
| /* These four members implement regprog_T */ |
| regengine_T *engine; |
| unsigned regflags; |
| unsigned re_engine; |
| unsigned re_flags; /* second argument for vim_regcomp() */ |
| |
| int regstart; |
| char_u reganch; |
| char_u *regmust; |
| int regmlen; |
| #ifdef FEAT_SYN_HL |
| char_u reghasz; |
| #endif |
| char_u program[1]; /* actually longer.. */ |
| } bt_regprog_T; |
| |
| /* |
| * Structure representing a NFA state. |
| * A NFA state may have no outgoing edge, when it is a NFA_MATCH state. |
| */ |
| typedef struct nfa_state nfa_state_T; |
| struct nfa_state |
| { |
| int c; |
| nfa_state_T *out; |
| nfa_state_T *out1; |
| int id; |
| int lastlist[2]; /* 0: normal, 1: recursive */ |
| int val; |
| }; |
| |
| /* |
| * Structure used by the NFA matcher. |
| */ |
| typedef struct |
| { |
| /* These three members implement regprog_T */ |
| regengine_T *engine; |
| unsigned regflags; |
| unsigned re_engine; |
| unsigned re_flags; /* second argument for vim_regcomp() */ |
| |
| nfa_state_T *start; /* points into state[] */ |
| |
| int reganch; /* pattern starts with ^ */ |
| int regstart; /* char at start of pattern */ |
| char_u *match_text; /* plain text to match with */ |
| |
| int has_zend; /* pattern contains \ze */ |
| int has_backref; /* pattern contains \1 .. \9 */ |
| #ifdef FEAT_SYN_HL |
| int reghasz; |
| #endif |
| char_u *pattern; |
| int nsubexp; /* number of () */ |
| int nstate; |
| nfa_state_T state[1]; /* actually longer.. */ |
| } nfa_regprog_T; |
| |
| /* |
| * Structure to be used for single-line matching. |
| * Sub-match "no" starts at "startp[no]" and ends just before "endp[no]". |
| * When there is no match, the pointer is NULL. |
| */ |
| typedef struct |
| { |
| regprog_T *regprog; |
| char_u *startp[NSUBEXP]; |
| char_u *endp[NSUBEXP]; |
| int rm_ic; |
| } regmatch_T; |
| |
| /* |
| * Structure to be used for multi-line matching. |
| * Sub-match "no" starts in line "startpos[no].lnum" column "startpos[no].col" |
| * and ends in line "endpos[no].lnum" just before column "endpos[no].col". |
| * The line numbers are relative to the first line, thus startpos[0].lnum is |
| * always 0. |
| * When there is no match, the line number is -1. |
| */ |
| typedef struct |
| { |
| regprog_T *regprog; |
| lpos_T startpos[NSUBEXP]; |
| lpos_T endpos[NSUBEXP]; |
| int rmm_ic; |
| colnr_T rmm_maxcol; /* when not zero: maximum column */ |
| } regmmatch_T; |
| |
| /* |
| * Structure used to store external references: "\z\(\)" to "\z\1". |
| * Use a reference count to avoid the need to copy this around. When it goes |
| * from 1 to zero the matches need to be freed. |
| */ |
| typedef struct |
| { |
| short refcnt; |
| char_u *matches[NSUBEXP]; |
| } reg_extmatch_T; |
| |
| struct regengine |
| { |
| regprog_T *(*regcomp)(char_u*, int); |
| void (*regfree)(regprog_T *); |
| int (*regexec_nl)(regmatch_T*, char_u*, colnr_T, int); |
| long (*regexec_multi)(regmmatch_T*, win_T*, buf_T*, linenr_T, colnr_T, proftime_T*); |
| char_u *expr; |
| }; |
| |
| #endif /* _REGEXP_H */ |