MultiSource/Benchmarks/MallocBench/perl/regexec.c - third_party/llvm-test-suite - Git at Google

 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
  * confused with the original package (see point 3 below).  Thanks, Henry!
  */

 /* Additional note: this code is very heavily munged from Henry's version
  * in places.  In some spots I've traded clarity for efficiency, so don't
  * blame Henry for some of the lack of readability.
  */

 /* $RCSfile$$Revision$$Date$
  *
  * $Log$
  * Revision 1.1  2004/02/17 22:21:16  criswell
  * Initial commit of the perl Malloc Benchmark.  I've cheated a little by
  * generating the yacc output files and committing them directly, but it was
  * easier than disabling the Bison Voodoo that gets executed by default.
  *
  * Revision 4.0.1.1  91/04/12  09:07:39  lwall
  * patch1: regexec only allocated space for 9 subexpresssions
  *
  * Revision 4.0  91/03/20  01:39:16  lwall
  * 4.0 baseline.
  *
  */

 /*
  * regcomp and regexec -- regsub and regerror are not used in perl
  *
  *	Copyright (c) 1986 by University of Toronto.
  *	Written by Henry Spencer.  Not derived from licensed software.
  *
  *	Permission is granted to anyone to use this software for any
  *	purpose on any computer system, and to redistribute it freely,
  *	subject to the following restrictions:
  *
  *	1. The author is not responsible for the consequences of use of
  *		this software, no matter how awful, even if they arise
  *		from defects in it.
  *
  *	2. The origin of this software must not be misrepresented, either
  *		by explicit claim or by omission.
  *
  *	3. Altered versions must be plainly marked as such, and must not
  *		be misrepresented as being the original software.
  *
  ****    Alterations to Henry's code are...
  ****
  ****    Copyright (c) 1989, Larry Wall
  ****
  ****    You may distribute under the terms of the GNU General Public License
  ****    as specified in the README file that comes with the perl 3.0 kit.
  *
  * Beware that some of this code is subtly aware of the way operator
  * precedence is structured in regular expressions.  Serious changes in
  * regular-expression syntax might require a total rethink.
  */
 #include "EXTERN.h"
 #include "perl.h"
 #include "regcomp.h"

 #ifndef STATIC
 #define	STATIC	static
 #endif

 #ifdef DEBUGGING
 int regnarrate = 0;
 #endif

 #define isALNUM(c) (isascii(c) && (isalpha(c) || isdigit(c) || c == '_'))
 #define isSPACE(c) (isascii(c) && isspace(c))
 #define isDIGIT(c) (isascii(c) && isdigit(c))
 #define isUPPER(c) (isascii(c) && isupper(c))

 /*
  * regexec and friends
  */

 /*
  * Global work variables for regexec().
  */
 static char *regprecomp;
 static char *reginput;		/* String-input pointer. */
 static char regprev;		/* char before regbol, \n if none */
 static char *regbol;		/* Beginning of input, for ^ check. */
 static char *regeol;		/* End of input, for $ check. */
 static char **regstartp;	/* Pointer to startp array. */
 static char **regendp;		/* Ditto for endp. */
 static char *reglastparen;	/* Similarly for lastparen. */
 static char *regtill;

 static int regmyp_size = 0;
 static char **regmystartp = Null(char**);
 static char **regmyendp   = Null(char**);

 /*
  * Forwards.
  */
 STATIC int regtry();
 STATIC int regmatch();
 STATIC int regrepeat();

 extern int multiline;

 /*
  - regexec - match a regexp against a string
  */
 int
 regexec(prog, stringarg, strend, strbeg, minend, screamer, safebase)
 register regexp *prog;
 char *stringarg;
 register char *strend;	/* pointer to null at end of string */
 char *strbeg;	/* real beginning of string */
 int minend;	/* end of match must be at least minend after stringarg */
 STR *screamer;
 int safebase;	/* no need to remember string in subbase */
 {
 	register char *s;
 	register int i;
 	register char *c;
 	register char *string = stringarg;
 	register int tmp;
 	int minlen = 0;		/* must match at least this many chars */
 	int dontbother = 0;	/* how many characters not to try at end */

 	/* Be paranoid... */
 	if (prog == NULL || string == NULL) {
 		fatal("NULL regexp parameter");
 		return(0);
 	}

 	if (string == strbeg)	/* is ^ valid at stringarg? */
 	    regprev = '\n';
 	else {
 	    regprev = stringarg[-1];
 	    if (!multiline && regprev == '\n')
 		regprev = '\0';		/* force ^ to NOT match */
 	}
 	regprecomp = prog->precomp;
 	/* Check validity of program. */
 	if (UCHARAT(prog->program) != MAGIC) {
 		FAIL("corrupted regexp program");
 	}

 	if (prog->do_folding) {
 		safebase = FALSE;
 		i = strend - string;
 		New(1101,c,i+1,char);
 		(void)bcopy(string, c, i+1);
 		string = c;
 		strend = string + i;
 		for (s = string; s < strend; s++)
 			if (isUPPER(*s))
 				*s = tolower(*s);
 	}

 	/* If there is a "must appear" string, look for it. */
 	s = string;
 	if (prog->regmust != Nullstr &&
 	    (!(prog->reganch & 1) || (multiline && prog->regback >= 0)) ) {
 		if (stringarg == strbeg && screamer) {
 			if (screamfirst[prog->regmust->str_rare] >= 0)
 				s = screaminstr(screamer,prog->regmust);
 			else
 				s = Nullch;
 		}
 #ifndef lint
 		else
 			s = fbminstr((unsigned char*)s, (unsigned char*)strend,
 			    prog->regmust);
 #endif
 		if (!s) {
 			++prog->regmust->str_u.str_useful;	/* hooray */
 			goto phooey;	/* not present */
 		}
 		else if (prog->regback >= 0) {
 			s -= prog->regback;
 			if (s < string)
 			    s = string;
 			minlen = prog->regback + prog->regmust->str_cur;
 		}
 		else if (--prog->regmust->str_u.str_useful < 0) { /* boo */
 			str_free(prog->regmust);
 			prog->regmust = Nullstr;	/* disable regmust */
 			s = string;
 		}
 		else {
 			s = string;
 			minlen = prog->regmust->str_cur;
 		}
 	}

 	/* Mark beginning of line for ^ . */
 	regbol = string;

 	/* Mark end of line for $ (and such) */
 	regeol = strend;

 	/* see how far we have to get to not match where we matched before */
 	regtill = string+minend;

 	/* Allocate our backreference arrays */
 	if ( regmyp_size < prog->nparens + 1 ) {
 	    /* Allocate or enlarge the arrays */
 	    regmyp_size = prog->nparens + 1;
 	    if ( regmyp_size < 10 ) regmyp_size = 10;	/* minimum */
 	    if ( regmystartp ) {
 		/* reallocate larger */
 		Renew(regmystartp,regmyp_size,char*);
 		Renew(regmyendp,  regmyp_size,char*);
 	    }
 	    else {
 		/* Initial allocation */
 		New(1102,regmystartp,regmyp_size,char*);
 		New(1102,regmyendp,  regmyp_size,char*);
 	    }

 	}

 	/* Simplest case:  anchored match need be tried only once. */
 	/*  [unless multiline is set] */
 	if (prog->reganch & 1) {
 		if (regtry(prog, string))
 			goto got_it;
 		else if (multiline) {
 			if (minlen)
 			    dontbother = minlen - 1;
 			strend -= dontbother;
 			/* for multiline we only have to try after newlines */
 			if (s > string)
 			    s--;
 			while (s < strend) {
 			    if (*s++ == '\n') {
 				if (s < strend && regtry(prog, s))
 				    goto got_it;
 			    }
 			}
 		}
 		goto phooey;
 	}

 	/* Messy cases:  unanchored match. */
 	if (prog->regstart) {
 		if (prog->reganch & 2) {	/* we have /x+whatever/ */
 		    /* it must be a one character string */
 		    i = prog->regstart->str_ptr[0];
 		    while (s < strend) {
 			    if (*s == i) {
 				    if (regtry(prog, s))
 					    goto got_it;
 				    s++;
 				    while (s < strend && *s == i)
 					s++;
 			    }
 			    s++;
 		    }
 		}
 		else if (prog->regstart->str_pok == 3) {
 		    /* We know what string it must start with. */
 #ifndef lint
 		    while ((s = fbminstr((unsigned char*)s,
 		      (unsigned char*)strend, prog->regstart)) != NULL)
 #else
 		    while (s = Nullch)
 #endif
 		    {
 			    if (regtry(prog, s))
 				    goto got_it;
 			    s++;
 		    }
 		}
 		else {
 		    c = prog->regstart->str_ptr;
 		    while ((s = ninstr(s, strend,
 		      c, c + prog->regstart->str_cur )) != NULL) {
 			    if (regtry(prog, s))
 				    goto got_it;
 			    s++;
 		    }
 		}
 		goto phooey;
 	}
 	if (c = prog->regstclass) {
 		int doevery = (prog->reganch & 2) == 0;

 		if (minlen)
 		    dontbother = minlen - 1;
 		strend -= dontbother;	/* don't bother with what can't match */
 		tmp = 1;
 		/* We know what class it must start with. */
 		switch (OP(c)) {
 		case ANYOF:
 		    c = OPERAND(c);
 		    while (s < strend) {
 			    i = UCHARAT(s);
 			    if (!(c[i >> 3] & (1 << (i&7)))) {
 				    if (tmp && regtry(prog, s))
 					    goto got_it;
 				    else
 					    tmp = doevery;
 			    }
 			    else
 				    tmp = 1;
 			    s++;
 		    }
 		    break;
 		case BOUND:
 		    if (minlen)
 			dontbother++,strend--;
 		    if (s != string) {
 			i = s[-1];
 			tmp = isALNUM(i);
 		    }
 		    else
 			tmp = isALNUM(regprev);	/* assume not alphanumeric */
 		    while (s < strend) {
 			    i = *s;
 			    if (tmp != isALNUM(i)) {
 				    tmp = !tmp;
 				    if (regtry(prog, s))
 					    goto got_it;
 			    }
 			    s++;
 		    }
 		    if ((minlen || tmp) && regtry(prog,s))
 			    goto got_it;
 		    break;
 		case NBOUND:
 		    if (minlen)
 			dontbother++,strend--;
 		    if (s != string) {
 			i = s[-1];
 			tmp = isALNUM(i);
 		    }
 		    else
 			tmp = isALNUM(regprev);	/* assume not alphanumeric */
 		    while (s < strend) {
 			    i = *s;
 			    if (tmp != isALNUM(i))
 				    tmp = !tmp;
 			    else if (regtry(prog, s))
 				    goto got_it;
 			    s++;
 		    }
 		    if ((minlen || !tmp) && regtry(prog,s))
 			    goto got_it;
 		    break;
 		case ALNUM:
 		    while (s < strend) {
 			    i = *s;
 			    if (isALNUM(i)) {
 				    if (tmp && regtry(prog, s))
 					    goto got_it;
 				    else
 					    tmp = doevery;
 			    }
 			    else
 				    tmp = 1;
 			    s++;
 		    }
 		    break;
 		case NALNUM:
 		    while (s < strend) {
 			    i = *s;
 			    if (!isALNUM(i)) {
 				    if (tmp && regtry(prog, s))
 					    goto got_it;
 				    else
 					    tmp = doevery;
 			    }
 			    else
 				    tmp = 1;
 			    s++;
 		    }
 		    break;
 		case SPACE:
 		    while (s < strend) {
 			    if (isSPACE(*s)) {
 				    if (tmp && regtry(prog, s))
 					    goto got_it;
 				    else
 					    tmp = doevery;
 			    }
 			    else
 				    tmp = 1;
 			    s++;
 		    }
 		    break;
 		case NSPACE:
 		    while (s < strend) {
 			    if (!isSPACE(*s)) {
 				    if (tmp && regtry(prog, s))
 					    goto got_it;
 				    else
 					    tmp = doevery;
 			    }
 			    else
 				    tmp = 1;
 			    s++;
 		    }
 		    break;
 		case DIGIT:
 		    while (s < strend) {
 			    if (isDIGIT(*s)) {
 				    if (tmp && regtry(prog, s))
 					    goto got_it;
 				    else
 					    tmp = doevery;
 			    }
 			    else
 				    tmp = 1;
 			    s++;
 		    }
 		    break;
 		case NDIGIT:
 		    while (s < strend) {
 			    if (!isDIGIT(*s)) {
 				    if (tmp && regtry(prog, s))
 					    goto got_it;
 				    else
 					    tmp = doevery;
 			    }
 			    else
 				    tmp = 1;
 			    s++;
 		    }
 		    break;
 		}
 	}
 	else {
 		if (minlen)
 		    dontbother = minlen - 1;
 		strend -= dontbother;
 		/* We don't know much -- general case. */
 		do {
 			if (regtry(prog, s))
 				goto got_it;
 		} while (s++ < strend);
 	}

 	/* Failure. */
 	goto phooey;

     got_it:
 	if ((!safebase && (prog->nparens || sawampersand)) || prog->do_folding){
 		strend += dontbother;	/* uncheat */
 		if (safebase)			/* no need for $digit later */
 		    s = strbeg;
 		else if (strbeg != prog->subbase) {
 		    i = strend - string + (stringarg - strbeg);
 		    s = nsavestr(strbeg,i);	/* so $digit will work later */
 		    if (prog->subbase)
 			    Safefree(prog->subbase);
 		    prog->subbase = s;
 		    prog->subend = s+i;
 		}
 		else
 		    s = prog->subbase;
 		s += (stringarg - strbeg);
 		for (i = 0; i <= prog->nparens; i++) {
 			if (prog->endp[i]) {
 			    prog->startp[i] = s + (prog->startp[i] - string);
 			    prog->endp[i] = s + (prog->endp[i] - string);
 			}
 		}
 		if (prog->do_folding)
 			Safefree(string);
 	}
 	return(1);

     phooey:
 	if (prog->do_folding)
 		Safefree(string);
 	return(0);
 }

 /*
  - regtry - try match at specific point
  */
 static int			/* 0 failure, 1 success */
 regtry(prog, string)
 regexp *prog;
 char *string;
 {
 	register int i;
 	register char **sp;
 	register char **ep;

 	reginput = string;
 	regstartp = prog->startp;
 	regendp = prog->endp;
 	reglastparen = &prog->lastparen;
 	prog->lastparen = 0;

 	sp = prog->startp;
 	ep = prog->endp;
 	if (prog->nparens) {
 		for (i = prog->nparens; i >= 0; i--) {
 			*sp++ = NULL;
 			*ep++ = NULL;
 		}
 	}
 	if (regmatch(prog->program + 1) && reginput >= regtill) {
 		prog->startp[0] = string;
 		prog->endp[0] = reginput;
 		return(1);
 	} else
 		return(0);
 }

 /*
  - regmatch - main matching routine
  *
  * Conceptually the strategy is simple:  check to see whether the current
  * node matches, call self recursively to see whether the rest matches,
  * and then act accordingly.  In practice we make some effort to avoid
  * recursion, in particular by going through "ordinary" nodes (that don't
  * need to know whether the rest of the match failed) by a loop instead of
  * by recursion.
  */
 /* [lwall] I've hoisted the register declarations to the outer block in order to
  * maybe save a little bit of pushing and popping on the stack.  It also takes
  * advantage of machines that use a register save mask on subroutine entry.
  */
 static int			/* 0 failure, 1 success */
 regmatch(prog)
 char *prog;
 {
 	register char *scan;	/* Current node. */
 	char *next;		/* Next node. */
 	register int nextchar;
 	register int n;		/* no or next */
 	register int ln;        /* len or last */
 	register char *s;	/* operand or save */
 	register char *locinput = reginput;

 	nextchar = *locinput;
 	scan = prog;
 #ifdef DEBUGGING
 	if (scan != NULL && regnarrate)
 		fprintf(stderr, "%s(\n", regprop(scan));
 #endif
 	while (scan != NULL) {
 #ifdef DEBUGGING
 		if (regnarrate)
 			fprintf(stderr, "%s...\n", regprop(scan));
 #endif

 #ifdef REGALIGN
 		next = scan + NEXT(scan);
 		if (next == scan)
 		    next = NULL;
 #else
 		next = regnext(scan);
 #endif

 		switch (OP(scan)) {
 		case BOL:
 			if (locinput == regbol ? regprev == '\n' :
 			    ((nextchar || locinput < regeol) &&
 			      locinput[-1] == '\n') )
 			{
 				/* regtill = regbol; */
 				break;
 			}
 			return(0);
 		case EOL:
 			if ((nextchar || locinput < regeol) && nextchar != '\n')
 				return(0);
 			if (!multiline && regeol - locinput > 1)
 				return 0;
 			/* regtill = regbol; */
 			break;
 		case ANY:
 			if ((nextchar == '\0' && locinput >= regeol) ||
 			  nextchar == '\n')
 				return(0);
 			nextchar = *++locinput;
 			break;
 		case EXACTLY:
 			s = OPERAND(scan);
 			ln = *s++;
 			/* Inline the first character, for speed. */
 			if (*s != nextchar)
 				return(0);
 			if (regeol - locinput < ln)
 				return 0;
 			if (ln > 1 && bcmp(s, locinput, ln) != 0)
 				return(0);
 			locinput += ln;
 			nextchar = *locinput;
 			break;
 		case ANYOF:
 			s = OPERAND(scan);
 			if (nextchar < 0)
 				nextchar = UCHARAT(locinput);
 			if (s[nextchar >> 3] & (1 << (nextchar&7)))
 				return(0);
 			if (!nextchar && locinput >= regeol)
 				return 0;
 			nextchar = *++locinput;
 			break;
 		case ALNUM:
 			if (!nextchar)
 				return(0);
 			if (!isALNUM(nextchar))
 				return(0);
 			nextchar = *++locinput;
 			break;
 		case NALNUM:
 			if (!nextchar && locinput >= regeol)
 				return(0);
 			if (isALNUM(nextchar))
 				return(0);
 			nextchar = *++locinput;
 			break;
 		case NBOUND:
 		case BOUND:
 			if (locinput == regbol)	/* was last char in word? */
 				ln = isALNUM(regprev);
 			else
 				ln = isALNUM(locinput[-1]);
 			n = isALNUM(nextchar); /* is next char in word? */
 			if ((ln == n) == (OP(scan) == BOUND))
 				return(0);
 			break;
 		case SPACE:
 			if (!nextchar && locinput >= regeol)
 				return(0);
 			if (!isSPACE(nextchar))
 				return(0);
 			nextchar = *++locinput;
 			break;
 		case NSPACE:
 			if (!nextchar)
 				return(0);
 			if (isSPACE(nextchar))
 				return(0);
 			nextchar = *++locinput;
 			break;
 		case DIGIT:
 			if (!isDIGIT(nextchar))
 				return(0);
 			nextchar = *++locinput;
 			break;
 		case NDIGIT:
 			if (!nextchar && locinput >= regeol)
 				return(0);
 			if (isDIGIT(nextchar))
 				return(0);
 			nextchar = *++locinput;
 			break;
 		case REF:
 			n = ARG1(scan);  /* which paren pair */
 			s = regmystartp[n];
 			if (!s)
 			    return(0);
 			if (!regmyendp[n])
 			    return(0);
 			if (s == regmyendp[n])
 			    break;
 			/* Inline the first character, for speed. */
 			if (*s != nextchar)
 				return(0);
 			ln = regmyendp[n] - s;
 			if (locinput + ln > regeol)
 				return 0;
 			if (ln > 1 && bcmp(s, locinput, ln) != 0)
 				return(0);
 			locinput += ln;
 			nextchar = *locinput;
 			break;

 		case NOTHING:
 			break;
 		case BACK:
 			break;
 		case OPEN:
 			n = ARG1(scan);  /* which paren pair */
 			reginput = locinput;

 			regmystartp[n] = locinput;	/* for REF */
 			if (regmatch(next)) {
 				/*
 				 * Don't set startp if some later
 				 * invocation of the same parentheses
 				 * already has.
 				 */
 				if (regstartp[n] == NULL)
 					regstartp[n] = locinput;
 				return(1);
 			} else
 				return(0);
 			/* NOTREACHED */
 		case CLOSE: {
 				n = ARG1(scan);  /* which paren pair */
 				reginput = locinput;

 				regmyendp[n] = locinput;	/* for REF */
 				if (regmatch(next)) {
 					/*
 					 * Don't set endp if some later
 					 * invocation of the same parentheses
 					 * already has.
 					 */
 					if (regendp[n] == NULL) {
 						regendp[n] = locinput;
 						if (n > *reglastparen)
 						    *reglastparen = n;
 					}
 					return(1);
 				} else
 					return(0);
 			}
 			/*NOTREACHED*/
 		case BRANCH: {
 				if (OP(next) != BRANCH)		/* No choice. */
 					next = NEXTOPER(scan);	/* Avoid recursion. */
 				else {
 					do {
 						reginput = locinput;
 						if (regmatch(NEXTOPER(scan)))
 							return(1);
 #ifdef REGALIGN
 						if (n = NEXT(scan))
 						    scan += n;
 						else
 						    scan = NULL;
 #else
 						scan = regnext(scan);
 #endif
 					} while (scan != NULL && OP(scan) == BRANCH);
 					return(0);
 					/* NOTREACHED */
 				}
 			}
 			break;
 		case CURLY:
 			ln = ARG1(scan);  /* min to match */
 			n  = ARG2(scan);  /* max to match */
 			scan = NEXTOPER(scan) + 4;
 			goto repeat;
 		case STAR:
 			ln = 0;
 			n = 0;
 			scan = NEXTOPER(scan);
 			goto repeat;
 		case PLUS:
 			/*
 			 * Lookahead to avoid useless match attempts
 			 * when we know what character comes next.
 			 */
 			ln = 1;
 			n = 0;
 			scan = NEXTOPER(scan);
 		    repeat:
 			if (OP(next) == EXACTLY)
 				nextchar = *(OPERAND(next)+1);
 			else
 				nextchar = -1000;
 			reginput = locinput;
 			n = regrepeat(scan, n);
 			if (!multiline && OP(next) == EOL && ln < n)
 			    ln = n;			/* why back off? */
 			while (n >= ln) {
 				/* If it could work, try it. */
 				if (nextchar == -1000 || *reginput == nextchar)
 					if (regmatch(next))
 						return(1);
 				/* Couldn't or didn't -- back up. */
 				n--;
 				reginput = locinput + n;
 			}
 			return(0);
 		case END:
 			reginput = locinput; /* put where regtry can find it */
 			return(1);	/* Success! */
 		default:
 			printf("%x %d\n",scan,scan[1]);
 			FAIL("regexp memory corruption");
 		}

 		scan = next;
 	}

 	/*
 	 * We get here only if there's trouble -- normally "case END" is
 	 * the terminating point.
 	 */
 	FAIL("corrupted regexp pointers");
 	/*NOTREACHED*/
 #ifdef lint
 	return 0;
 #endif
 }

 /*
  - regrepeat - repeatedly match something simple, report how many
  */
 /*
  * [This routine now assumes that it will only match on things of length 1.
  * That was true before, but now we assume scan - reginput is the count,
  * rather than incrementing count on every character.]
  */
 static int
 regrepeat(p, max)
 char *p;
 int max;
 {
 	register char *scan;
 	register char *opnd;
 	register int c;
 	register char *loceol = regeol;

 	scan = reginput;
 	if (max && max < loceol - scan)
 	    loceol = scan + max;
 	opnd = OPERAND(p);
 	switch (OP(p)) {
 	case ANY:
 		while (scan < loceol && *scan != '\n')
 			scan++;
 		break;
 	case EXACTLY:		/* length of string is 1 */
 		opnd++;
 		while (scan < loceol && *opnd == *scan)
 			scan++;
 		break;
 	case ANYOF:
 		c = UCHARAT(scan);
 		while (scan < loceol && !(opnd[c >> 3] & (1 << (c & 7)))) {
 			scan++;
 			c = UCHARAT(scan);
 		}
 		break;
 	case ALNUM:
 		while (scan < loceol && isALNUM(*scan))
 			scan++;
 		break;
 	case NALNUM:
 		while (scan < loceol && !isALNUM(*scan))
 			scan++;
 		break;
 	case SPACE:
 		while (scan < loceol && isSPACE(*scan))
 			scan++;
 		break;
 	case NSPACE:
 		while (scan < loceol && !isSPACE(*scan))
 			scan++;
 		break;
 	case DIGIT:
 		while (scan < loceol && isDIGIT(*scan))
 			scan++;
 		break;
 	case NDIGIT:
 		while (scan < loceol && !isDIGIT(*scan))
 			scan++;
 		break;
 	default:		/* Oh dear.  Called inappropriately. */
 		FAIL("internal regexp foulup");
 		/* NOTREACHED */
 	}

 	c = scan - reginput;
 	reginput = scan;

 	return(c);
 }

 /*
  - regnext - dig the "next" pointer out of a node
  *
  * [Note, when REGALIGN is defined there are two places in regmatch()
  * that bypass this code for speed.]
  */
 char *
 regnext(p)
 register char *p;
 {
 	register int offset;

 	if (p == &regdummy)
 		return(NULL);

 	offset = NEXT(p);
 	if (offset == 0)
 		return(NULL);

 #ifdef REGALIGN
 	return(p+offset);
 #else
 	if (OP(p) == BACK)
 		return(p-offset);
 	else
 		return(p+offset);
 #endif
 }
	/* NOTE: this is derived from Henry Spencer's regexp code, and should not
	* confused with the original package (see point 3 below). Thanks, Henry!
	*/

	/* Additional note: this code is very heavily munged from Henry's version
	* in places. In some spots I've traded clarity for efficiency, so don't
	* blame Henry for some of the lack of readability.
	*/

	/* $RCSfile$$Revision$$Date$
	*
	* $Log$
	* Revision 1.1 2004/02/17 22:21:16 criswell
	* Initial commit of the perl Malloc Benchmark. I've cheated a little by
	* generating the yacc output files and committing them directly, but it was
	* easier than disabling the Bison Voodoo that gets executed by default.
	*
	* Revision 4.0.1.1 91/04/12 09:07:39 lwall
	* patch1: regexec only allocated space for 9 subexpresssions
	*
	* Revision 4.0 91/03/20 01:39:16 lwall
	* 4.0 baseline.
	*
	*/

	/*
	* regcomp and regexec -- regsub and regerror are not used in perl
	*
	* Copyright (c) 1986 by University of Toronto.
	* Written by Henry Spencer. Not derived from licensed software.
	*
	* Permission is granted to anyone to use this software for any
	* purpose on any computer system, and to redistribute it freely,
	* subject to the following restrictions:
	*
	* 1. The author is not responsible for the consequences of use of
	* this software, no matter how awful, even if they arise
	* from defects in it.
	*
	* 2. The origin of this software must not be misrepresented, either
	* by explicit claim or by omission.
	*
	* 3. Altered versions must be plainly marked as such, and must not
	* be misrepresented as being the original software.
	*
	**** Alterations to Henry's code are...
	****
	**** Copyright (c) 1989, Larry Wall
	****
	**** You may distribute under the terms of the GNU General Public License
	**** as specified in the README file that comes with the perl 3.0 kit.
	*
	* Beware that some of this code is subtly aware of the way operator
	* precedence is structured in regular expressions. Serious changes in
	* regular-expression syntax might require a total rethink.
	*/
	#include "EXTERN.h"
	#include "perl.h"
	#include "regcomp.h"

	#ifndef STATIC
	#define STATIC static
	#endif

	#ifdef DEBUGGING
	int regnarrate = 0;
	#endif

	#define isALNUM(c) (isascii(c) && (isalpha(c) \|\| isdigit(c) \|\| c == '_'))
	#define isSPACE(c) (isascii(c) && isspace(c))
	#define isDIGIT(c) (isascii(c) && isdigit(c))
	#define isUPPER(c) (isascii(c) && isupper(c))

	/*
	* regexec and friends
	*/

	/*
	* Global work variables for regexec().
	*/
	static char *regprecomp;
	static char reginput; / String-input pointer. */
	static char regprev; /* char before regbol, \n if none */
	static char regbol; / Beginning of input, for ^ check. */
	static char regeol; / End of input, for $ check. */
	static char *regstartp; / Pointer to startp array. */
	static char *regendp; / Ditto for endp. */
	static char reglastparen; / Similarly for lastparen. */
	static char *regtill;

	static int regmyp_size = 0;
	static char regmystartp = Null(char);
	static char regmyendp = Null(char);

	/*
	* Forwards.
	*/
	STATIC int regtry();
	STATIC int regmatch();
	STATIC int regrepeat();

	extern int multiline;

	/*
	- regexec - match a regexp against a string
	*/
	int
	regexec(prog, stringarg, strend, strbeg, minend, screamer, safebase)
	register regexp *prog;
	char *stringarg;
	register char strend; / pointer to null at end of string */
	char strbeg; / real beginning of string */
	int minend; /* end of match must be at least minend after stringarg */
	STR *screamer;
	int safebase; /* no need to remember string in subbase */
	{
	register char *s;
	register int i;
	register char *c;
	register char *string = stringarg;
	register int tmp;
	int minlen = 0; /* must match at least this many chars */
	int dontbother = 0; /* how many characters not to try at end */

	/* Be paranoid... */
	if (prog == NULL \|\| string == NULL) {
	fatal("NULL regexp parameter");
	return(0);
	}

	if (string == strbeg) /* is ^ valid at stringarg? */
	regprev = '\n';
	else {
	regprev = stringarg[-1];
	if (!multiline && regprev == '\n')
	regprev = '\0'; /* force ^ to NOT match */
	}
	regprecomp = prog->precomp;
	/* Check validity of program. */
	if (UCHARAT(prog->program) != MAGIC) {
	FAIL("corrupted regexp program");
	}

	if (prog->do_folding) {
	safebase = FALSE;
	i = strend - string;
	New(1101,c,i+1,char);
	(void)bcopy(string, c, i+1);
	string = c;
	strend = string + i;
	for (s = string; s < strend; s++)
	if (isUPPER(*s))
	s = tolower(s);
	}

	/* If there is a "must appear" string, look for it. */
	s = string;
	if (prog->regmust != Nullstr &&
	(!(prog->reganch & 1) \|\| (multiline && prog->regback >= 0)) ) {
	if (stringarg == strbeg && screamer) {
	if (screamfirst[prog->regmust->str_rare] >= 0)
	s = screaminstr(screamer,prog->regmust);
	else
	s = Nullch;
	}
	#ifndef lint
	else
	s = fbminstr((unsigned char)s, (unsigned char)strend,
	prog->regmust);
	#endif
	if (!s) {
	++prog->regmust->str_u.str_useful; /* hooray */
	goto phooey; /* not present */
	}
	else if (prog->regback >= 0) {
	s -= prog->regback;
	if (s < string)
	s = string;
	minlen = prog->regback + prog->regmust->str_cur;
	}
	else if (--prog->regmust->str_u.str_useful < 0) { /* boo */
	str_free(prog->regmust);
	prog->regmust = Nullstr; /* disable regmust */
	s = string;
	}
	else {
	s = string;
	minlen = prog->regmust->str_cur;
	}
	}

	/* Mark beginning of line for ^ . */
	regbol = string;

	/* Mark end of line for $ (and such) */
	regeol = strend;

	/* see how far we have to get to not match where we matched before */
	regtill = string+minend;

	/* Allocate our backreference arrays */
	if ( regmyp_size < prog->nparens + 1 ) {
	/* Allocate or enlarge the arrays */
	regmyp_size = prog->nparens + 1;
	if ( regmyp_size < 10 ) regmyp_size = 10; /* minimum */
	if ( regmystartp ) {
	/* reallocate larger */
	Renew(regmystartp,regmyp_size,char*);
	Renew(regmyendp, regmyp_size,char*);
	}
	else {
	/* Initial allocation */
	New(1102,regmystartp,regmyp_size,char*);
	New(1102,regmyendp, regmyp_size,char*);
	}

	}

	/* Simplest case: anchored match need be tried only once. */
	/* [unless multiline is set] */
	if (prog->reganch & 1) {
	if (regtry(prog, string))
	goto got_it;
	else if (multiline) {
	if (minlen)
	dontbother = minlen - 1;
	strend -= dontbother;
	/* for multiline we only have to try after newlines */
	if (s > string)
	s--;
	while (s < strend) {
	if (*s++ == '\n') {
	if (s < strend && regtry(prog, s))
	goto got_it;
	}
	}
	}
	goto phooey;
	}

	/* Messy cases: unanchored match. */
	if (prog->regstart) {
	if (prog->reganch & 2) { /* we have /x+whatever/ */
	/* it must be a one character string */
	i = prog->regstart->str_ptr[0];
	while (s < strend) {
	if (*s == i) {
	if (regtry(prog, s))
	goto got_it;
	s++;
	while (s < strend && *s == i)
	s++;
	}
	s++;
	}
	}
	else if (prog->regstart->str_pok == 3) {
	/* We know what string it must start with. */
	#ifndef lint
	while ((s = fbminstr((unsigned char*)s,
	(unsigned char*)strend, prog->regstart)) != NULL)
	#else
	while (s = Nullch)
	#endif
	{
	if (regtry(prog, s))
	goto got_it;
	s++;
	}
	}
	else {
	c = prog->regstart->str_ptr;
	while ((s = ninstr(s, strend,
	c, c + prog->regstart->str_cur )) != NULL) {
	if (regtry(prog, s))
	goto got_it;
	s++;
	}
	}
	goto phooey;
	}
	if (c = prog->regstclass) {
	int doevery = (prog->reganch & 2) == 0;

	if (minlen)
	dontbother = minlen - 1;
	strend -= dontbother; /* don't bother with what can't match */
	tmp = 1;
	/* We know what class it must start with. */
	switch (OP(c)) {
	case ANYOF:
	c = OPERAND(c);
	while (s < strend) {
	i = UCHARAT(s);
	if (!(c[i >> 3] & (1 << (i&7)))) {
	if (tmp && regtry(prog, s))
	goto got_it;
	else
	tmp = doevery;
	}
	else
	tmp = 1;
	s++;
	}
	break;
	case BOUND:
	if (minlen)
	dontbother++,strend--;
	if (s != string) {
	i = s[-1];
	tmp = isALNUM(i);
	}
	else
	tmp = isALNUM(regprev); /* assume not alphanumeric */
	while (s < strend) {
	i = *s;
	if (tmp != isALNUM(i)) {
	tmp = !tmp;
	if (regtry(prog, s))
	goto got_it;
	}
	s++;
	}
	if ((minlen \|\| tmp) && regtry(prog,s))
	goto got_it;
	break;
	case NBOUND:
	if (minlen)
	dontbother++,strend--;
	if (s != string) {
	i = s[-1];
	tmp = isALNUM(i);
	}
	else
	tmp = isALNUM(regprev); /* assume not alphanumeric */
	while (s < strend) {
	i = *s;
	if (tmp != isALNUM(i))
	tmp = !tmp;
	else if (regtry(prog, s))
	goto got_it;
	s++;
	}
	if ((minlen \|\| !tmp) && regtry(prog,s))
	goto got_it;
	break;
	case ALNUM:
	while (s < strend) {
	i = *s;
	if (isALNUM(i)) {
	if (tmp && regtry(prog, s))
	goto got_it;
	else
	tmp = doevery;
	}
	else
	tmp = 1;
	s++;
	}
	break;
	case NALNUM:
	while (s < strend) {
	i = *s;
	if (!isALNUM(i)) {
	if (tmp && regtry(prog, s))
	goto got_it;
	else
	tmp = doevery;
	}
	else
	tmp = 1;
	s++;
	}
	break;
	case SPACE:
	while (s < strend) {
	if (isSPACE(*s)) {
	if (tmp && regtry(prog, s))
	goto got_it;
	else
	tmp = doevery;
	}
	else
	tmp = 1;
	s++;
	}
	break;
	case NSPACE:
	while (s < strend) {
	if (!isSPACE(*s)) {
	if (tmp && regtry(prog, s))
	goto got_it;
	else
	tmp = doevery;
	}
	else
	tmp = 1;
	s++;
	}
	break;
	case DIGIT:
	while (s < strend) {
	if (isDIGIT(*s)) {
	if (tmp && regtry(prog, s))
	goto got_it;
	else
	tmp = doevery;
	}
	else
	tmp = 1;
	s++;
	}
	break;
	case NDIGIT:
	while (s < strend) {
	if (!isDIGIT(*s)) {
	if (tmp && regtry(prog, s))
	goto got_it;
	else
	tmp = doevery;
	}
	else
	tmp = 1;
	s++;
	}
	break;
	}
	}
	else {
	if (minlen)
	dontbother = minlen - 1;
	strend -= dontbother;
	/* We don't know much -- general case. */
	do {
	if (regtry(prog, s))
	goto got_it;
	} while (s++ < strend);
	}

	/* Failure. */
	goto phooey;

	got_it:
	if ((!safebase && (prog->nparens \|\| sawampersand)) \|\| prog->do_folding){
	strend += dontbother; /* uncheat */
	if (safebase) /* no need for $digit later */
	s = strbeg;
	else if (strbeg != prog->subbase) {
	i = strend - string + (stringarg - strbeg);
	s = nsavestr(strbeg,i); /* so $digit will work later */
	if (prog->subbase)
	Safefree(prog->subbase);
	prog->subbase = s;
	prog->subend = s+i;
	}
	else
	s = prog->subbase;
	s += (stringarg - strbeg);
	for (i = 0; i <= prog->nparens; i++) {
	if (prog->endp[i]) {
	prog->startp[i] = s + (prog->startp[i] - string);
	prog->endp[i] = s + (prog->endp[i] - string);
	}
	}
	if (prog->do_folding)
	Safefree(string);
	}
	return(1);

	phooey:
	if (prog->do_folding)
	Safefree(string);
	return(0);
	}

	/*
	- regtry - try match at specific point
	*/
	static int /* 0 failure, 1 success */
	regtry(prog, string)
	regexp *prog;
	char *string;
	{
	register int i;
	register char **sp;
	register char **ep;

	reginput = string;
	regstartp = prog->startp;
	regendp = prog->endp;
	reglastparen = &prog->lastparen;
	prog->lastparen = 0;

	sp = prog->startp;
	ep = prog->endp;
	if (prog->nparens) {
	for (i = prog->nparens; i >= 0; i--) {
	*sp++ = NULL;
	*ep++ = NULL;
	}
	}
	if (regmatch(prog->program + 1) && reginput >= regtill) {
	prog->startp[0] = string;
	prog->endp[0] = reginput;
	return(1);
	} else
	return(0);
	}

	/*
	- regmatch - main matching routine
	*
	* Conceptually the strategy is simple: check to see whether the current
	* node matches, call self recursively to see whether the rest matches,
	* and then act accordingly. In practice we make some effort to avoid
	* recursion, in particular by going through "ordinary" nodes (that don't
	* need to know whether the rest of the match failed) by a loop instead of
	* by recursion.
	*/
	/* [lwall] I've hoisted the register declarations to the outer block in order to
	* maybe save a little bit of pushing and popping on the stack. It also takes
	* advantage of machines that use a register save mask on subroutine entry.
	*/
	static int /* 0 failure, 1 success */
	regmatch(prog)
	char *prog;
	{
	register char scan; / Current node. */
	char next; / Next node. */
	register int nextchar;
	register int n; /* no or next */
	register int ln; /* len or last */
	register char s; / operand or save */
	register char *locinput = reginput;

	nextchar = *locinput;
	scan = prog;
	#ifdef DEBUGGING
	if (scan != NULL && regnarrate)
	fprintf(stderr, "%s(\n", regprop(scan));
	#endif
	while (scan != NULL) {
	#ifdef DEBUGGING
	if (regnarrate)
	fprintf(stderr, "%s...\n", regprop(scan));
	#endif

	#ifdef REGALIGN
	next = scan + NEXT(scan);
	if (next == scan)
	next = NULL;
	#else
	next = regnext(scan);
	#endif

	switch (OP(scan)) {
	case BOL:
	if (locinput == regbol ? regprev == '\n' :
	((nextchar \|\| locinput < regeol) &&
	locinput[-1] == '\n') )
	{
	/* regtill = regbol; */
	break;
	}
	return(0);
	case EOL:
	if ((nextchar \|\| locinput < regeol) && nextchar != '\n')
	return(0);
	if (!multiline && regeol - locinput > 1)
	return 0;
	/* regtill = regbol; */
	break;
	case ANY:
	if ((nextchar == '\0' && locinput >= regeol) \|\|
	nextchar == '\n')
	return(0);
	nextchar = *++locinput;
	break;
	case EXACTLY:
	s = OPERAND(scan);
	ln = *s++;
	/* Inline the first character, for speed. */
	if (*s != nextchar)
	return(0);
	if (regeol - locinput < ln)
	return 0;
	if (ln > 1 && bcmp(s, locinput, ln) != 0)
	return(0);
	locinput += ln;
	nextchar = *locinput;
	break;
	case ANYOF:
	s = OPERAND(scan);
	if (nextchar < 0)
	nextchar = UCHARAT(locinput);
	if (s[nextchar >> 3] & (1 << (nextchar&7)))
	return(0);
	if (!nextchar && locinput >= regeol)
	return 0;
	nextchar = *++locinput;
	break;
	case ALNUM:
	if (!nextchar)
	return(0);
	if (!isALNUM(nextchar))
	return(0);
	nextchar = *++locinput;
	break;
	case NALNUM:
	if (!nextchar && locinput >= regeol)
	return(0);
	if (isALNUM(nextchar))
	return(0);
	nextchar = *++locinput;
	break;
	case NBOUND:
	case BOUND:
	if (locinput == regbol) /* was last char in word? */
	ln = isALNUM(regprev);
	else
	ln = isALNUM(locinput[-1]);
	n = isALNUM(nextchar); /* is next char in word? */
	if ((ln == n) == (OP(scan) == BOUND))
	return(0);
	break;
	case SPACE:
	if (!nextchar && locinput >= regeol)
	return(0);
	if (!isSPACE(nextchar))
	return(0);
	nextchar = *++locinput;
	break;
	case NSPACE:
	if (!nextchar)
	return(0);
	if (isSPACE(nextchar))
	return(0);
	nextchar = *++locinput;
	break;
	case DIGIT:
	if (!isDIGIT(nextchar))
	return(0);
	nextchar = *++locinput;
	break;
	case NDIGIT:
	if (!nextchar && locinput >= regeol)
	return(0);
	if (isDIGIT(nextchar))
	return(0);
	nextchar = *++locinput;
	break;
	case REF:
	n = ARG1(scan); /* which paren pair */
	s = regmystartp[n];
	if (!s)
	return(0);
	if (!regmyendp[n])
	return(0);
	if (s == regmyendp[n])
	break;
	/* Inline the first character, for speed. */
	if (*s != nextchar)
	return(0);
	ln = regmyendp[n] - s;
	if (locinput + ln > regeol)
	return 0;
	if (ln > 1 && bcmp(s, locinput, ln) != 0)
	return(0);
	locinput += ln;
	nextchar = *locinput;
	break;

	case NOTHING:
	break;
	case BACK:
	break;
	case OPEN:
	n = ARG1(scan); /* which paren pair */
	reginput = locinput;

	regmystartp[n] = locinput; /* for REF */
	if (regmatch(next)) {
	/*
	* Don't set startp if some later
	* invocation of the same parentheses
	* already has.
	*/
	if (regstartp[n] == NULL)
	regstartp[n] = locinput;
	return(1);
	} else
	return(0);
	/* NOTREACHED */
	case CLOSE: {
	n = ARG1(scan); /* which paren pair */
	reginput = locinput;

	regmyendp[n] = locinput; /* for REF */
	if (regmatch(next)) {
	/*
	* Don't set endp if some later
	* invocation of the same parentheses
	* already has.
	*/
	if (regendp[n] == NULL) {
	regendp[n] = locinput;
	if (n > *reglastparen)
	*reglastparen = n;
	}
	return(1);
	} else
	return(0);
	}
	/NOTREACHED/
	case BRANCH: {
	if (OP(next) != BRANCH) /* No choice. */
	next = NEXTOPER(scan); /* Avoid recursion. */
	else {
	do {
	reginput = locinput;
	if (regmatch(NEXTOPER(scan)))
	return(1);
	#ifdef REGALIGN
	if (n = NEXT(scan))
	scan += n;
	else
	scan = NULL;
	#else
	scan = regnext(scan);
	#endif
	} while (scan != NULL && OP(scan) == BRANCH);
	return(0);
	/* NOTREACHED */
	}
	}
	break;
	case CURLY:
	ln = ARG1(scan); /* min to match */
	n = ARG2(scan); /* max to match */
	scan = NEXTOPER(scan) + 4;
	goto repeat;
	case STAR:
	ln = 0;
	n = 0;
	scan = NEXTOPER(scan);
	goto repeat;
	case PLUS:
	/*
	* Lookahead to avoid useless match attempts
	* when we know what character comes next.
	*/
	ln = 1;
	n = 0;
	scan = NEXTOPER(scan);
	repeat:
	if (OP(next) == EXACTLY)
	nextchar = *(OPERAND(next)+1);
	else
	nextchar = -1000;
	reginput = locinput;
	n = regrepeat(scan, n);
	if (!multiline && OP(next) == EOL && ln < n)
	ln = n; /* why back off? */
	while (n >= ln) {
	/* If it could work, try it. */
	if (nextchar == -1000 \|\| *reginput == nextchar)
	if (regmatch(next))
	return(1);
	/* Couldn't or didn't -- back up. */
	n--;
	reginput = locinput + n;
	}
	return(0);
	case END:
	reginput = locinput; /* put where regtry can find it */
	return(1); /* Success! */
	default:
	printf("%x %d\n",scan,scan[1]);
	FAIL("regexp memory corruption");
	}

	scan = next;
	}

	/*
	* We get here only if there's trouble -- normally "case END" is
	* the terminating point.
	*/
	FAIL("corrupted regexp pointers");
	/NOTREACHED/
	#ifdef lint
	return 0;
	#endif
	}

	/*
	- regrepeat - repeatedly match something simple, report how many
	*/
	/*
	* [This routine now assumes that it will only match on things of length 1.
	* That was true before, but now we assume scan - reginput is the count,
	* rather than incrementing count on every character.]
	*/
	static int
	regrepeat(p, max)
	char *p;
	int max;
	{
	register char *scan;
	register char *opnd;
	register int c;
	register char *loceol = regeol;

	scan = reginput;
	if (max && max < loceol - scan)
	loceol = scan + max;
	opnd = OPERAND(p);
	switch (OP(p)) {
	case ANY:
	while (scan < loceol && *scan != '\n')
	scan++;
	break;
	case EXACTLY: /* length of string is 1 */
	opnd++;
	while (scan < loceol && opnd == scan)
	scan++;
	break;
	case ANYOF:
	c = UCHARAT(scan);
	while (scan < loceol && !(opnd[c >> 3] & (1 << (c & 7)))) {
	scan++;
	c = UCHARAT(scan);
	}
	break;
	case ALNUM:
	while (scan < loceol && isALNUM(*scan))
	scan++;
	break;
	case NALNUM:
	while (scan < loceol && !isALNUM(*scan))
	scan++;
	break;
	case SPACE:
	while (scan < loceol && isSPACE(*scan))
	scan++;
	break;
	case NSPACE:
	while (scan < loceol && !isSPACE(*scan))
	scan++;
	break;
	case DIGIT:
	while (scan < loceol && isDIGIT(*scan))
	scan++;
	break;
	case NDIGIT:
	while (scan < loceol && !isDIGIT(*scan))
	scan++;
	break;
	default: /* Oh dear. Called inappropriately. */
	FAIL("internal regexp foulup");
	/* NOTREACHED */
	}

	c = scan - reginput;
	reginput = scan;

	return(c);
	}

	/*
	- regnext - dig the "next" pointer out of a node
	*
	* [Note, when REGALIGN is defined there are two places in regmatch()
	* that bypass this code for speed.]
	*/
	char *
	regnext(p)
	register char *p;
	{
	register int offset;

	if (p == &regdummy)
	return(NULL);

	offset = NEXT(p);
	if (offset == 0)
	return(NULL);

	#ifdef REGALIGN
	return(p+offset);
	#else
	if (OP(p) == BACK)
	return(p-offset);
	else
	return(p+offset);
	#endif
	}