blob: 79c1e7d82176adebfa09bdcbf69d2e7b4d5761f3 [file] [log] [blame]
/* vi:set ts=8 sts=4 sw=4 noet:
*
* NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
*
* This is NOT the original regular expression code as written by Henry
* Spencer. This code has been modified specifically for use with Vim, and
* should not be used apart from compiling Vim. If you want a good regular
* expression library, get the original code.
*
* NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
*/
#ifndef _REGEXP_H
#define _REGEXP_H
/*
* The number of sub-matches is limited to 10.
* The first one (index 0) is the whole match, referenced with "\0".
* The second one (index 1) is the first sub-match, referenced with "\1".
* This goes up to the tenth (index 9), referenced with "\9".
*/
#define NSUBEXP 10
/*
* In the NFA engine: how many braces are allowed.
* TODO(RE): Use dynamic memory allocation instead of static, like here
*/
#define NFA_MAX_BRACES 20
/*
* In the NFA engine: how many states are allowed
*/
#define NFA_MAX_STATES 100000
#define NFA_TOO_EXPENSIVE -1
/* Which regexp engine to use? Needed for vim_regcomp().
* Must match with 'regexpengine'. */
#define AUTOMATIC_ENGINE 0
#define BACKTRACKING_ENGINE 1
#define NFA_ENGINE 2
typedef struct regengine regengine_T;
/*
* Structure returned by vim_regcomp() to pass on to vim_regexec().
* This is the general structure. For the actual matcher, two specific
* structures are used. See code below.
*/
typedef struct regprog
{
regengine_T *engine;
unsigned regflags;
unsigned re_engine; /* automatic, backtracking or nfa engine */
unsigned re_flags; /* second argument for vim_regcomp() */
} regprog_T;
/*
* Structure used by the back track matcher.
* These fields are only to be used in regexp.c!
* See regexp.c for an explanation.
*/
typedef struct
{
/* These four members implement regprog_T */
regengine_T *engine;
unsigned regflags;
unsigned re_engine;
unsigned re_flags; /* second argument for vim_regcomp() */
int regstart;
char_u reganch;
char_u *regmust;
int regmlen;
#ifdef FEAT_SYN_HL
char_u reghasz;
#endif
char_u program[1]; /* actually longer.. */
} bt_regprog_T;
/*
* Structure representing a NFA state.
* A NFA state may have no outgoing edge, when it is a NFA_MATCH state.
*/
typedef struct nfa_state nfa_state_T;
struct nfa_state
{
int c;
nfa_state_T *out;
nfa_state_T *out1;
int id;
int lastlist[2]; /* 0: normal, 1: recursive */
int val;
};
/*
* Structure used by the NFA matcher.
*/
typedef struct
{
/* These three members implement regprog_T */
regengine_T *engine;
unsigned regflags;
unsigned re_engine;
unsigned re_flags; /* second argument for vim_regcomp() */
nfa_state_T *start; /* points into state[] */
int reganch; /* pattern starts with ^ */
int regstart; /* char at start of pattern */
char_u *match_text; /* plain text to match with */
int has_zend; /* pattern contains \ze */
int has_backref; /* pattern contains \1 .. \9 */
#ifdef FEAT_SYN_HL
int reghasz;
#endif
char_u *pattern;
int nsubexp; /* number of () */
int nstate;
nfa_state_T state[1]; /* actually longer.. */
} nfa_regprog_T;
/*
* Structure to be used for single-line matching.
* Sub-match "no" starts at "startp[no]" and ends just before "endp[no]".
* When there is no match, the pointer is NULL.
*/
typedef struct
{
regprog_T *regprog;
char_u *startp[NSUBEXP];
char_u *endp[NSUBEXP];
int rm_ic;
} regmatch_T;
/*
* Structure to be used for multi-line matching.
* Sub-match "no" starts in line "startpos[no].lnum" column "startpos[no].col"
* and ends in line "endpos[no].lnum" just before column "endpos[no].col".
* The line numbers are relative to the first line, thus startpos[0].lnum is
* always 0.
* When there is no match, the line number is -1.
*/
typedef struct
{
regprog_T *regprog;
lpos_T startpos[NSUBEXP];
lpos_T endpos[NSUBEXP];
int rmm_ic;
colnr_T rmm_maxcol; /* when not zero: maximum column */
} regmmatch_T;
/*
* Structure used to store external references: "\z\(\)" to "\z\1".
* Use a reference count to avoid the need to copy this around. When it goes
* from 1 to zero the matches need to be freed.
*/
typedef struct
{
short refcnt;
char_u *matches[NSUBEXP];
} reg_extmatch_T;
struct regengine
{
regprog_T *(*regcomp)(char_u*, int);
void (*regfree)(regprog_T *);
int (*regexec_nl)(regmatch_T*, char_u*, colnr_T, int);
long (*regexec_multi)(regmmatch_T*, win_T*, buf_T*, linenr_T, colnr_T, proftime_T*);
char_u *expr;
};
#endif /* _REGEXP_H */