blob: 5f464d42b2d5aeb084f4ffe42f1b4a9da4601b18 [file] [log] [blame]
/* Copyright (c) 1991 Sun Wu and Udi Manber. All Rights Reserved. */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include "agrep.h"
#include "checkfile.h"
#define CHARTYPE unsigned char
unsigned Mask[MAXSYM];
unsigned Init1, NO_ERR_MASK, Init[MaxError];
unsigned Bit[WORD+1];
CHAR buffer[BlockSize+Maxline+1];
unsigned Next[MaxNext], Next1[MaxNext];
unsigned wildmask, endposition, D_endpos;
int REGEX, RE_ERR, FNAME, WHOLELINE, SIMPLEPATTERN;
int COUNT, HEAD, TAIL, LINENUM, INVERSE, I, S, DD, AND, SGREP, JUMP;
int Num_Pat, PSIZE, num_of_matched, SILENT, NOPROMPT, BESTMATCH, NOUPPER;
int NOMATCH, TRUNCATE, FIRST_IN_RE, FIRSTOUTPUT;
int WORDBOUND, DELIMITER, D_length;
int EATFIRST, OUTTAIL;
int FILEOUT;
int DNA = 0;
int APPROX = 0;
int PAT_FILE = 0;
int CONSTANT = 0;
int total_line = 0; /* used in mgrep */
CHAR **Textfiles; /* array of filenames to be searched */
CHAR old_D_pat[MaxDelimit] = "\n"; /* to hold original D_pattern */
CHAR CurrentFileName[MAXNAME];
CHAR Progname[MAXNAME];
CHAR D_pattern[MaxDelimit] = "\n; "; /* string which delimits records --
defaults to newline */
int NOFILENAME = 0, /* Boolean flag, set for -h option */
FILENAMEONLY = 0,/* Boolean flag, set for -l option */
Numfiles = 0; /* indicates how many files in Textfiles */
extern int init(char *s, int table[32][32]);
extern void bitap(char old_D_pat[], char *Pattern, int fd, int M, int D);
extern void prepf(int fp);
extern void compat(void);
extern void mgrep(int fd);
extern void sgrep(CHARTYPE *pat, int m, int fd, int D);
extern void preprocess(CHAR *D_pattern, CHAR *Pattern);
extern int maskgen(unsigned char *Pattern, int D);
extern int check_file(char *fname);
int table[WORD][WORD];
void initial_value(void);
void compute_next(int M, unsigned *Next, unsigned *Next1);
int exponen(int m);
void re1(int Text, int M, int D);
void re(int Text, int M, int D);
void r_output(CHAR *buffer, int i, int end, int j);
void file_out(char *fname);
void usage(void);
void checksg(CHAR *Pattern, int D);
void output(register CHAR *buffer, int i1, int i2, int j);
int main(int argc, char *argv[])
{
int M, D=0, fp, fd, i;
char c;
int filetype;
unsigned char Pattern[MAXPAT], OldPattern[MAXPAT];
initial_value();
strcpy(Progname, "agrep");
if (argc < 2) usage();
Pattern[0] = '\0';
while(--argc > 0 && (*++argv)[0] == '-') {
c = *(argv[0]+1);
switch(c) {
case 'c' : COUNT = ON; /* output the # of matched */
break;
case 's' : SILENT = ON; /* silent mode */
break;
case 'p' : I = 0; /* insertion cost is 0 */
break;
case 'x' : WHOLELINE = ON; /* match the whole line */
if(WORDBOUND) {
fprintf(stderr, "%s: illegal option combination\n", Progname);
exit(2);
}
break;
case 'L' : break;
case 'd' : DELIMITER = ON; /* user defines delimiter */
if(argc <= 1) usage();
if (argv[0][2] == '\0') {/* space after -d option */
argv++;
if ((D_length = strlen(argv[0])) > MaxDelimit) {
fprintf(stderr, "%s: delimiter pattern too long\n", Progname);
exit(2);
}
D_pattern[0] = '<';
strcpy(D_pattern+1, argv[0]);
argc--;
} else {
if ((D_length = strlen(argv[0]+2)) > MaxDelimit) {
fprintf(stderr, "%s: delimiter pattern too long\n", Progname);
exit(2);
}
D_pattern[0] = '<';
strcpy(D_pattern+1, argv[0]+2);
} /* else */
strcat(D_pattern, ">; ");
D_length++; /* to count ';' as one */
break;
case 'e' : argc--;
if(argc == 0) {
fprintf(stderr, "%s: the pattern should immediately follow the -e option\n", Progname);
usage();
}
if((++argv)[0][0] == '-') {
Pattern[0] = '\\';
strcat(Pattern, (argv)[0]);
}
else strcat(Pattern, argv[0]);
break;
case 'f' : PAT_FILE = ON;
argv++;
argc--;
if((fp = open(argv[0], 0)) < 0) {
fprintf(stderr, "%s: Can't open pattern file %s\n", Progname, argv[0]);
exit(2);
}
break;
case 'h' : NOFILENAME = ON;
break;
case 'i' : NOUPPER = ON;
break;
case 'k' : argc--;
if(argc == 0) {
fprintf(stderr, "%s: the pattern should immediately follow the -k option\n", Progname);
usage();
}
CONSTANT = ON;
argv++;
strcat(Pattern, argv[0]);
if(argc > 1 && argv[1][0] == '-') {
fprintf(stderr, "%s: -k should be the last option in the command\n", Progname);
exit(2);
}
break;
case 'l' : FILENAMEONLY = ON;
break;
case 'n' : LINENUM = ON; /* output prefixed by line no*/
break;
case 'v' : INVERSE = ON; /* output no-matched lines */
break;
case 't' : OUTTAIL = ON; /* output from tail of delimiter */
break;
case 'B' : BESTMATCH = ON;
break;
case 'w' : WORDBOUND = ON;/* match to words */
if(WHOLELINE) {
fprintf(stderr, "%s: illegal option combination\n", Progname);
exit(2);
}
break;
case 'y' : NOPROMPT = ON;
break;
case 'I' : I = atoi(argv[0]+2); /* Insertion Cost */
JUMP = ON;
break;
case 'S' : S = atoi(argv[0]+2); /* Substitution Cost */
JUMP = ON;
break;
case 'D' : DD = atoi(argv[0]+2); /* Deletion Cost */
JUMP = ON;
break;
case 'G' : FILEOUT = ON;
COUNT = ON;
break;
default : if (isdigit(c)) {
APPROX = ON;
D = atoi(argv[0]+1);
if (D > MaxError) {
fprintf(stderr,"%s: the maximum number of errors is %d \n", Progname, MaxError);
exit(2);
}
} else {
fprintf(stderr, "%s: illegal option -%c\n",Progname, c);
usage();
}
} /* switch(c) */
} /* while (--argc > 0 && (*++argv)[0] == '-') */
if (FILENAMEONLY && NOFILENAME) {
fprintf(stderr, "%s: -h and -l options are mutually exclusive\n",Progname);
}
if (COUNT && (FILENAMEONLY || NOFILENAME)) {
FILENAMEONLY = OFF;
if(!FILEOUT) NOFILENAME = OFF;
}
if (!(PAT_FILE) && Pattern[0] == '\0') { /* Pattern not set with -e option */
if (argc == 0) usage();
strcpy(Pattern, *argv);
argc--;
argv++;
}
Numfiles = 0;
fd = 3; /* make sure it's not 0 */
if (argc == 0) /* check Pattern against stdin */
fd = 0;
else {
if (!(Textfiles = (CHAR **)malloc(argc * sizeof(CHAR *) ))) {
fprintf(stderr, "%s: malloc failure (you probably don't have enough memory)\n", Progname);
exit(2);
}
while (argc--) { /* one or more filenames on command line -- put
the valid filenames in a array of strings */
/*
if ((filetype = check_file(*argv)) != ISASCIIFILE) {
if(filetype == NOSUCHFILE) fprintf(stderr,"%s: %s: no such file or directory\n",Progname,*argv);
argv++;
*/
if ((filetype = check_file(*argv)) == NOSUCHFILE) {
if(filetype == NOSUCHFILE) fprintf(stderr,"%s: %s: no such file or directory\n",Progname,*argv);
argv++;
} else { /* file is ascii*/
if (!(Textfiles[Numfiles] = (CHAR *)malloc((strlen(*argv)+1)))) {
fprintf(stderr, "%s: malloc failure (you probably don't have enough memory)\n", Progname);
exit(2);
}
strcpy(Textfiles[Numfiles++], *argv++);
} /* else */
} /* while (argc--) */
} /* else */
checksg(Pattern, D); /* check if the pattern is simple */
strcpy(OldPattern, Pattern);
if (SGREP == 0) {
preprocess(D_pattern, Pattern);
strcpy(old_D_pat, D_pattern);
M = maskgen(Pattern, D);
}
else M = strlen(OldPattern);
if (PAT_FILE) prepf(fp);
if (Numfiles > 1) FNAME = ON;
if (NOFILENAME) FNAME = 0;
num_of_matched = 0;
compat(); /* check compatibility between options */
if (fd == 0) {
if(FILENAMEONLY) {
fprintf(stderr, "%s: -l option is not compatible with standard input\n", Progname);
exit(2);
}
if(PAT_FILE) mgrep(fd);
else {
if(SGREP) sgrep(OldPattern, strlen(OldPattern), fd, D);
else bitap(old_D_pat, Pattern, fd, M, D);
}
if (COUNT) {
if(INVERSE && PAT_FILE) printf("%d\n", total_line-num_of_matched);
else printf("%d\n", num_of_matched);
}
}
else {
for (i = 0; i < Numfiles; i++, close(fd), num_of_matched = 0) {
strcpy(CurrentFileName, Textfiles[i]);
if ((fd = open(Textfiles[i], 0)) <= 0) {
fprintf(stderr, "%s: can't open file %s\n",Progname, Textfiles[i]);
}
else {
if(PAT_FILE) mgrep(fd);
else {
if(SGREP) sgrep(OldPattern, strlen(OldPattern), fd, D);
else bitap(old_D_pat, Pattern, fd, M, D);
}
if (num_of_matched) NOMATCH = OFF;
if (COUNT && !FILEOUT) {
if(INVERSE && PAT_FILE) {
if(FNAME) printf("%s: %d\n", CurrentFileName, total_line - num_of_matched);
else printf("%d\n", total_line - num_of_matched);
}
else {
if(FNAME) printf("%s: %d\n", CurrentFileName, num_of_matched);
else printf("%d\n", num_of_matched);
}
} /* if COUNT */
if(FILEOUT && num_of_matched) {
file_out(CurrentFileName);
}
} /* else */
} /* for i < Numfiles */
if(NOMATCH && BESTMATCH) {
if(WORDBOUND || WHOLELINE || LINENUM || INVERSE) {
SGREP = 0;
preprocess(D_pattern, Pattern);
strcpy(old_D_pat, D_pattern);
M = maskgen(Pattern, D);
}
COUNT=ON; D=1;
while(D<M && D<=MaxError && num_of_matched == 0) {
for (i = 0; i < Numfiles; i++, close(fd)) {
strcpy(CurrentFileName, Textfiles[i]);
if ((fd = open(Textfiles[i], 0)) > 0) {
if(PAT_FILE) mgrep(fd);
else {
if(SGREP) sgrep(OldPattern,strlen(OldPattern),fd,D);
else bitap(old_D_pat,Pattern,fd,M,D);
}
}
} /* for i < Numfiles */
D++;
} /* while */
if(num_of_matched > 0) {
D--; COUNT = 0;
if(NOPROMPT) goto GO_AHEAD;
if(D==1) fprintf(stderr, "best match has 1 error, ");
else fprintf(stderr, "best match has %d errors, ", D);
fflush(stderr);
if(num_of_matched == 1) fprintf(stderr,"there is 1 match, output it? (y/n)");
else fprintf(stderr,"there are %d matches, output them? (y/n)", num_of_matched);
scanf("%c",&c);
if(c != 'y') goto CONT;
GO_AHEAD:
for (i = 0; i < Numfiles; i++, close(fd)) {
strcpy(CurrentFileName, Textfiles[i]);
if ((fd = open(Textfiles[i], 0)) > 0) {
if(PAT_FILE) mgrep(fd);
else {
if(SGREP) sgrep(OldPattern,strlen(OldPattern),fd,D);
else bitap(old_D_pat,Pattern,fd,M,D);
}
}
} /* for i < Numfiles */
NOMATCH = 0;
}
}
}
CONT:
if(EATFIRST) {
printf("\n");
EATFIRST = OFF;
}
if(num_of_matched) NOMATCH = OFF;
if(NOMATCH) exit(1);
exit(0);
} /* end of main() */
void initial_value(void)
{
int i;
JUMP = REGEX = FNAME = BESTMATCH = NOPROMPT = NOUPPER = 0;
COUNT = LINENUM = WHOLELINE = SGREP = 0;
EATFIRST = INVERSE = AND = TRUNCATE = OUTTAIL = 0;
FIRST_IN_RE = NOMATCH = FIRSTOUTPUT = ON;
I = DD = S = 1;
HEAD = TAIL = ON;
D_length = 2;
SILENT = Num_Pat = PSIZE = SIMPLEPATTERN = num_of_matched = 0 ;
WORDBOUND = DELIMITER = RE_ERR = 0;
Bit[WORD] = 1;
for (i = WORD - 1; i > 0 ; i--) Bit[i] = Bit[i+1] << 1;
for (i=0; i< MAXSYM; i++) Mask[i] = 0;
}
void compute_next(int M, unsigned *Next, unsigned *Next1)
{
int i, j=0, n, k, temp;
int mid, pp;
int MM, base;
unsigned V[WORD];
base = WORD - M;
temp = Bit[base]; Bit[base] = 0;
for (i=0; i<WORD; i++) V[i] = 0;
for (i=1; i<M; i++)
{
j=0;
while (table[i][j] > 0 && j < 10) {
V[i] = V[i] | Bit[base + table[i][j++]];
}
}
Bit[base]=temp;
if(M <= SHORTREG)
{
k = exponen(M);
pp = 2*k;
for(i=k; i<pp ; i++)
{ n = i;
Next[i]= (k>>1);
for(j=M; j>=1; j--)
{
if(n & Bit[WORD]) Next[i] = Next[i] | V[j];
n = (n>>1);
}
}
return;
}
if(M > MAXREG) fprintf(stderr, "%s: regular expression too long\n", Progname);
MM = M;
if(M & 1) M=M+1;
k = exponen(M/2);
pp = 2*k;
mid = MM/2;
for(i=k; i<pp ; i++)
{ n = i;
Next[i]= (Bit[base]>>1);
for(j=MM; j>mid ; j--)
{
if(n & Bit[WORD]) Next[i] = Next[i] | V[j-mid];
n = (n>>1);
}
n=i-k;
Next1[i-k] = 0;
for(j = 0; j<mid; j++)
{
if(n & Bit[WORD]) Next1[i-k] = Next1[i-k] | V[MM-j];
n = (n>>1);
}
}
return;
}
int exponen(int m)
{ int i, ex;
ex= 1;
for (i=0; i<m; i++) ex= ex*2;
return(ex);
}
void re1(int Text, int M, int D)
{
register unsigned i, c, r0, r1, r2, r3, CMask, Newline, Init0, r_NO_ERR;
register unsigned end;
register unsigned hh, LL=0, k; /* Lower part */
int FIRST_TIME=ON, num_read , j=0, base;
unsigned A[MaxRerror+1], B[MaxRerror+1];
unsigned Next[MaxNext], Next1[MaxNext];
CHAR buffer[BlockSize+Maxline+1];
int FIRST_LOOP = 1;
r_NO_ERR = NO_ERR_MASK;
if(M > 30) {
fprintf(stderr, "%s: regular expression too long\n", Progname);
exit(2);
}
base = WORD - M;
hh = M/2;
for(i=WORD, j=0; j < hh ; i--, j++) LL = LL | Bit[i];
if(FIRST_IN_RE) compute_next(M, Next, Next1);
/*SUN: try: change to memory allocation */
FIRST_IN_RE = 0;
Newline = '\n';
Init[0] = Bit[base];
if(HEAD) Init[0] = Init[0] | Bit[base+1];
for(i=1; i<= D; i++) Init[i] = Init[i-1] | Next[Init[i-1]>>hh] | Next1[Init[i-1]&LL];
Init1 = Init[0] | 1;
Init0 = Init[0];
r2 = r3 = Init[0];
for(k=0; k<= D; k++) { A[k] = B[k] = Init[k]; }
if ( D == 0 )
{
while ((num_read = read(Text, buffer + Maxline, BlockSize)) > 0)
{
i=Maxline; end = num_read + Maxline;
if((num_read < BlockSize) && buffer[end-1] != '\n') buffer[end] = '\n';
if(FIRST_LOOP) { /* if first time in the loop add a newline */
buffer[i-1] = '\n'; /* in front the text. */
i--;
FIRST_LOOP = 0;
}
while ( i < end )
{
c = buffer[i++];
CMask = Mask[c];
if(c != Newline)
{ if(CMask != 0) {
r1 = Init1 & r3;
r2 = ((Next[r3>>hh] | Next1[r3&LL]) & CMask) | r1;
}
else {
r2 = r3 & Init1;
}
}
else { j++;
r1 = Init1 & r3; /* match against endofline */
r2 = ((Next[r3>>hh] | Next1[r3&LL]) & CMask) | r1;
if(TAIL) r2 = (Next[r2>>hh] | Next1[r2&LL]) | r2; /* epsilon move */
if(( r2 & 1 ) ^ INVERSE) {
if(FILENAMEONLY) {
num_of_matched++;
printf("%s\n", CurrentFileName);
return;
}
r_output(buffer, i-1, end, j);
}
r3 = Init0;
r2 = (Next[r3>>hh] | Next1[r3&LL]) & CMask | Init0;
/* match begin of line */
}
c = buffer[i++];
CMask = Mask[c];
if(c != Newline)
{ if(CMask != 0) {
r1 = Init1 & r2;
r3 = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | r1;
}
else r3 = r2 & Init1;
} /* if(NOT Newline) */
else { j++;
r1 = Init1 & r2; /* match against endofline */
r3 = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | r1;
if(TAIL) r3 = ( Next[r3>>hh] | Next1[r3&LL] ) | r3;
/* epsilon move */
if(( r3 & 1 ) ^ INVERSE) {
if(FILENAMEONLY) {
num_of_matched++;
printf("%s\n", CurrentFileName);
return;
}
r_output(buffer, i-1, end, j);
}
r2 = Init0;
r3 = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | Init0;
/* match begin of line */
}
} /* while i < end ... */
strncpy(buffer, buffer+num_read, Maxline);
} /* end while read()... */
return;
} /* end if (D == 0) */
while ((num_read = read(Text, buffer + Maxline, BlockSize)) > 0)
{
i=Maxline; end = Maxline + num_read;
if((num_read < BlockSize) && buffer[end-1] != '\n') buffer[end] = '\n';
if(FIRST_TIME) { /* if first time in the loop add a newline */
buffer[i-1] = '\n'; /* in front the text. */
i--;
FIRST_TIME = 0;
}
while (i < end )
{
c = buffer[i];
CMask = Mask[c];
if(c != Newline)
{
if(CMask != 0) {
r2 = B[0];
r1 = Init1 & r2;
A[0] = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | r1;
r3 = B[1];
r1 = Init1 & r3;
r0 = r2 | A[0]; /* A[0] | B[0] */
A[1] = ((Next[r3>>hh] | Next1[r3&LL]) & CMask) | (( r2 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ;
if(D == 1) goto Nextchar;
r2 = B[2];
r1 = Init1 & r2;
r0 = r3 | A[1];
A[2] = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | ((r3 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ;
if(D == 2) goto Nextchar;
r3 = B[3];
r1 = Init1 & r3;
r0 = r2 | A[2];
A[3] = ((Next[r3>>hh] | Next1[r3&LL]) & CMask) | ((r2 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ;
if(D == 3) goto Nextchar;
r2 = B[4];
r1 = Init1 & r2;
r0 = r3 | A[3];
A[4] = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | ((r3 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ;
if(D == 4) goto Nextchar;
} /* if(CMask) */
else {
r2 = B[0];
A[0] = r2 & Init1;
r3 = B[1];
r1 = Init1 & r3;
r0 = r2 | A[0];
A[1] = ((r2 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ;
if(D == 1) goto Nextchar;
r2 = B[2];
r1 = Init1 & r2;
r0 = r3 | A[1];
A[2] = ((r3 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ;
if(D == 2) goto Nextchar;
r3 = B[3];
r1 = Init1 & r3;
r0 = r2 | A[2];
A[3] = ((r2 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ;
if(D == 3) goto Nextchar;
r2 = B[4];
r1 = Init1 & r2;
r0 = r3 | A[3];
A[4] = ((r3 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ;
if(D == 4) goto Nextchar;
}
}
else { j++;
r1 = Init1 & B[D]; /* match against endofline */
A[D] = ((Next[B[D]>>hh] | Next1[B[D]&LL]) & CMask) | r1;
if(TAIL) A[D] = ( Next[A[D]>>hh] | Next1[A[D]&LL] ) | A[D];
/* epsilon move */
if(( A[D] & 1 ) ^ INVERSE) {
if(FILENAMEONLY) {
num_of_matched++;
printf("%s\n", CurrentFileName);
return;
}
r_output(buffer, i, end, j);
}
for(k=0; k<=D; k++) B[k] = Init[0];
r1 = Init1 & B[0];
A[0] = (( Next[B[0]>>hh] | Next1[B[0]&LL]) & CMask) | r1;
for(k=1; k<=D; k++) {
r3 = B[k];
r1 = Init1 & r3;
r2 = A[k-1] | B[k-1];
A[k] = ((Next[r3>>hh] | Next1[r3&LL]) & CMask) | ((B[k-1] | Next[r2>>hh] | Next1[r2&LL]) & r_NO_ERR) | r1;
}
}
Nextchar: i=i+1;
c = buffer[i];
CMask = Mask[c];
if(c != Newline)
{
if(CMask != 0) {
r2 = A[0];
r1 = Init1 & r2;
B[0] = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | r1;
r3 = A[1];
r1 = Init1 & r3;
r0 = B[0] | r2;
B[1] = ((Next[r3>>hh] | Next1[r3&LL]) & CMask) | ((r2 | Next[r0>>hh] | Next1[r0&LL]) & r_NO_ERR) | r1 ;
if(D == 1) goto Nextchar1;
r2 = A[2];
r1 = Init1 & r2;
r0 = B[1] | r3;
B[2] = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | ((r3 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ;
if(D == 2) goto Nextchar1;
r3 = A[3];
r1 = Init1 & r3;
r0 = B[2] | r2;
B[3] = ((Next[r3>>hh] | Next1[r3&LL]) & CMask) | ((r2 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ;
if(D == 3) goto Nextchar1;
r2 = A[4];
r1 = Init1 & r2;
r0 = B[3] | r3;
B[4] = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | ((r3 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ;
if(D == 4) goto Nextchar1;
} /* if(CMask) */
else {
r2 = A[0];
B[0] = r2 & Init1;
r3 = A[1];
r1 = Init1 & r3;
r0 = B[0] | r2;
B[1] = ((r2 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ;
if(D == 1) goto Nextchar1;
r2 = A[2];
r1 = Init1 & r2;
r0 = B[1] | r3;
B[2] = ((r3 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ;
if(D == 2) goto Nextchar1;
r3 = A[3];
r1 = Init1 & r3;
r0 = B[2] | r2;
B[3] = ((r2 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ;
if(D == 3) goto Nextchar1;
r2 = A[4];
r1 = Init1 & r2;
r0 = B[3] | r3;
B[4] = ((r3 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ;
if(D == 4) goto Nextchar1;
}
} /* if(NOT Newline) */
else { j++;
r1 = Init1 & A[D]; /* match against endofline */
B[D] = ((Next[A[D]>>hh] | Next1[A[D]&LL]) & CMask) | r1;
if(TAIL) B[D] = ( Next[B[D]>>hh] | Next1[B[D]&LL] ) | B[D];
/* epsilon move */
if(( B[D] & 1 ) ^ INVERSE) {
if(FILENAMEONLY) {
num_of_matched++;
printf("%s\n", CurrentFileName);
return;
}
r_output(buffer, i, end, j);
}
for(k=0; k<=D; k++) A[k] = Init0;
r1 = Init1 & A[0];
B[0] = ((Next[A[0]>>hh] | Next1[A[0]&LL]) & CMask) | r1;
for(k=1; k<=D; k++) {
r3 = A[k];
r1 = Init1 & r3;
r2 = A[k-1] | B[k-1];
B[k] = ((Next[r3>>hh] | Next1[r3&LL]) & CMask) | ((A[k-1] | Next[r2>>hh] | Next1[r2&LL]) & r_NO_ERR) | r1;
}
}
Nextchar1: i=i+1;
} /* while */
strncpy(buffer, buffer+num_read, Maxline);
} /* while */
return;
} /* re1 */
void re(int Text, int M, int D)
{
register unsigned i, c, r1, r2, r3, CMask, k, Newline, Init0, Init1, end;
register unsigned r_even, r_odd, r_NO_ERR ;
unsigned RMask[MAXSYM];
unsigned A[MaxRerror+1], B[MaxRerror+1];
int num_read, j=0, lasti, base, ResidueSize;
int FIRST_TIME; /* Flag */
base = WORD - M;
k = 2*exponen(M);
if(FIRST_IN_RE) {
compute_next(M, Next, Next1);
FIRST_IN_RE = 0; }
for(i=0; i< MAXSYM; i++) RMask[i] = Mask[i];
r_NO_ERR = NO_ERR_MASK;
Newline = '\n';
lasti = Maxline;
Init0 = Init[0] = Bit[base];
if(HEAD) Init0 = Init[0] = Init0 | Bit[base+1] ;
for(i=1; i<= D; i++) Init[i] = Init[i-1] | Next[Init[i-1]]; /* can be out? */
Init1 = Init0 | 1;
r2 = r3 = Init0;
for(k=0; k<= D; k++) { A[k] = B[k] = Init[0]; } /* can be out? */
FIRST_TIME = ON;
if ( D == 0 )
{
while ((num_read = read(Text, buffer + Maxline, BlockSize)) > 0)
{
i=Maxline; end = Maxline + num_read ;
if((num_read < BlockSize)&&buffer[end-1] != '\n') buffer[end] = '\n';
if(FIRST_TIME) {
buffer[i-1] = '\n';
i--;
FIRST_TIME = 0;
}
while (i < end)
{
c = buffer[i++];
CMask = RMask[c];
if(c != Newline)
{
r1 = Init1 & r3;
r2 = (Next[r3] & CMask) | r1;
}
else {
r1 = Init1 & r3; /* match against '\n' */
r2 = Next[r3] & CMask | r1;
j++;
if(TAIL) r2 = Next[r2] | r2 ; /* epsilon move */
if(( r2 & 1) ^ INVERSE) {
if(FILENAMEONLY) {
num_of_matched++;
printf("%s\n", CurrentFileName);
return;
}
r_output(buffer, i-1, end, j);
}
lasti = i - 1;
r3 = Init0;
r2 = (Next[r3] & CMask) | Init0;
}
c = buffer[i++];
CMask = RMask[c];
if(c != Newline)
{
r1 = Init1 & r2;
r3 = (Next[r2] & CMask) | r1;
}
else { j++;
r1 = Init1 & r2; /* match against endofline */
r3 = Next[r2] & CMask | r1;
if(TAIL) r3 = Next[r3] | r3;
if(( r3 & 1) ^ INVERSE) {
if(FILENAMEONLY) {
num_of_matched++;
printf("%s\n", CurrentFileName);
return;
}
r_output(buffer, i-1, end, j);
}
lasti = i - 1;
r2 = Init0;
r3 = (Next[r2] & CMask) | Init0; /* match the newline */
}
} /* while */
ResidueSize = Maxline + num_read - lasti;
if(ResidueSize > Maxline) {
ResidueSize = Maxline; }
strncpy(buffer+Maxline-ResidueSize, buffer+lasti, ResidueSize);
lasti = Maxline - ResidueSize;
} /* while */
return;
} /* end if(D==0) */
while ((num_read = read(Text, buffer + Maxline, BlockSize)) > 0)
{
i=Maxline; end = Maxline+num_read;
if((num_read < BlockSize) && buffer[end-1] != '\n') buffer[end] = '\n';
if(FIRST_TIME) {
buffer[i-1] = '\n';
i--;
FIRST_TIME = 0;
}
while (i < end)
{ c = buffer[i++];
CMask = RMask[c];
if (c != Newline)
{
r_even = B[0];
r1 = Init1 & r_even;
A[0] = (Next[r_even] & CMask) | r1;
r_odd = B[1];
r1 = Init1 & r_odd;
r2 = (r_even | Next[r_even|A[0]]) &r_NO_ERR;
A[1] = (Next[r_odd] & CMask) | r2 | r1 ;
if(D == 1) goto Nextchar;
r_even = B[2];
r1 = Init1 & r_even;
r2 = (r_odd | Next[r_odd|A[1]]) &r_NO_ERR;
A[2] = (Next[r_even] & CMask) | r2 | r1 ;
if(D == 2) goto Nextchar;
r_odd = B[3];
r1 = Init1 & r_odd;
r2 = (r_even | Next[r_even|A[2]]) &r_NO_ERR;
A[3] = (Next[r_odd] & CMask) | r2 | r1 ;
if(D == 3) goto Nextchar;
r_even = B[4];
r1 = Init1 & r_even;
r2 = (r_odd | Next[r_odd|A[3]]) &r_NO_ERR;
A[4] = (Next[r_even] & CMask) | r2 | r1 ;
goto Nextchar;
} /* if NOT Newline */
else { j++;
r1 = Init1 & B[D]; /* match endofline */
A[D] = (Next[B[D]] & CMask) | r1;
if(TAIL) A[D] = Next[A[D]] | A[D];
if((A[D] & 1) ^ INVERSE ) {
if(FILENAMEONLY) {
num_of_matched++;
printf("%s\n", CurrentFileName);
return;
}
r_output(buffer, i-1, end, j);
}
for(k=0; k<= D; k++) { A[k] = B[k] = Init[k]; }
r1 = Init1 & B[0];
A[0] = (Next[B[0]] & CMask) | r1;
for(k=1; k<= D; k++) {
r1 = Init1 & B[k];
r2 = (B[k-1] | Next[A[k-1]|B[k-1]]) &r_NO_ERR;
A[k] = (Next[B[k]] & CMask) | r1 | r2;
}
}
Nextchar:
c = buffer[i];
CMask = RMask[c];
if(c != Newline)
{
r1 = Init1 & A[0];
B[0] = (Next[A[0]] & CMask) | r1;
r1 = Init1 & A[1];
B[1] = (Next[A[1]] & CMask) | ((A[0] | Next[A[0] | B[0]]) & r_NO_ERR) | r1 ;
if(D == 1) goto Nextchar1;
r1 = Init1 & A[2];
B[2] = (Next[A[2]] & CMask) | ((A[1] | Next[A[1] | B[1]]) &r_NO_ERR) | r1 ;
if(D == 2) goto Nextchar1;
r1 = Init1 & A[3];
B[3] = (Next[A[3]] & CMask) | ((A[2] | Next[A[2] | B[2]])&r_NO_ERR) | r1 ;
if(D == 3) goto Nextchar1;
r1 = Init1 & A[4];
B[4] = (Next[A[4]] & CMask) | ((A[3] | Next[A[3] | B[3]])&r_NO_ERR) | r1 ;
goto Nextchar1;
} /* if(NOT Newline) */
else { j++;
r1 = Init1 & A[D]; /* match endofline */
B[D] = (Next[A[D]] & CMask) | r1;
if(TAIL) B[D] = Next[B[D]] | B[D];
if((B[D] & 1) ^ INVERSE ) {
if(FILENAMEONLY) {
num_of_matched++;
printf("%s\n", CurrentFileName);
return;
}
r_output(buffer, i, end, j);
}
for(k=0; k<= D; k++) { A[k] = B[k] = Init[k]; }
r1 = Init1 & A[0];
B[0] = (Next[A[0]] & CMask) | r1;
for(k=1; k<= D; k++) {
r1 = Init1 & A[k];
r2 = (A[k-1] | Next[A[k-1]|B[k-1]])&r_NO_ERR;
B[k] = (Next[A[k]] & CMask) | r1 | r2;
}
}
Nextchar1: i++;
} /* while i < end */
strncpy(buffer, buffer+num_read, Maxline);
} /* while read() */
return;
} /* re */
void r_output(CHAR *buffer, int i, int end, int j)
{
int bp;
if(i >= end) return;
num_of_matched++;
if(COUNT) return;
if(FNAME) printf("%s: ", CurrentFileName);
bp = i-1;
while ((buffer[bp] != '\n') && (bp > 0)) bp--;
if(LINENUM) printf("%d: ", j);
if(buffer[bp] != '\n') bp = Maxline-1;
bp++;
while (bp <= i ) putchar(buffer[bp++]);
}
void file_out(char *fname)
{
int num_read;
int fd;
int i, len;
CHAR buf[4097];
if(FNAME) {
len = strlen(fname);
putchar('\n');
for(i=0; i< len; i++) putchar(':');
putchar('\n');
printf("%s\n", CurrentFileName);
len = strlen(fname);
for(i=0; i< len; i++) putchar(':');
putchar('\n');
fflush(stdout);
}
fd = open(fname, 0);
while((num_read = read(fd, buf, 4096)) > 0)
write(1, buf, num_read);
}
void usage(void)
{
fprintf(stderr, "usage: %s [-#cdehiklnpstvwxBDGIS] [-f patternfile] pattern [files]\n", Progname);
printf("\n");
fprintf(stderr, "summary of frequently used options:\n");
fprintf(stderr, "-#: find matches with at most # errors\n");
fprintf(stderr, "-c: output the number of matched records\n");
fprintf(stderr, "-d: define record delimiter\n");
fprintf(stderr, "-h: do not output file names\n");
fprintf(stderr, "-i: case-insensitive search, e.g., 'a' = 'A'\n");
fprintf(stderr, "-l: output the names of files that contain a match\n");
fprintf(stderr, "-n: output record prefixed by record number\n");
fprintf(stderr, "-v: output those records containing no matches\n");
fprintf(stderr, "-w: pattern has to match as a word, e.g., 'win' will not match 'wind'\n");
fprintf(stderr, "-B: best match mode. find the closest matches to the pattern\n");
fprintf(stderr, "-G: output the files that contain a match\n");
printf("\n");
exit(2);
}
void checksg(CHAR *Pattern, int D)
{
char c;
int i, m;
m = strlen(Pattern);
if(!(PAT_FILE) && m <= D) {
fprintf(stderr, "%s: size of pattern must be greater than number of errors\n", Progname);
exit(2);
}
SIMPLEPATTERN = ON;
for (i=0; i < m; i++)
{
switch(Pattern[i])
{
case ';' : SIMPLEPATTERN = OFF; break;
case ',' : SIMPLEPATTERN = OFF; break;
case '.' : SIMPLEPATTERN = OFF; break;
case '*' : SIMPLEPATTERN = OFF; break;
case '-' : SIMPLEPATTERN = OFF; break;
case '[' : SIMPLEPATTERN = OFF; break;
case ']' : SIMPLEPATTERN = OFF; break;
case '(' : SIMPLEPATTERN = OFF; break;
case ')' : SIMPLEPATTERN = OFF; break;
case '<' : SIMPLEPATTERN = OFF; break;
case '>' : SIMPLEPATTERN = OFF; break;
case '^' : if(D > 0) SIMPLEPATTERN = OFF;
break;
case '$' : if(D > 0) SIMPLEPATTERN = OFF;
break;
case '|' : SIMPLEPATTERN = OFF; break;
case '#' : SIMPLEPATTERN = OFF; break;
case '\\' : SIMPLEPATTERN = OFF; break;
default : break;
}
}
if (CONSTANT) SIMPLEPATTERN = ON;
if (SIMPLEPATTERN == OFF) return;
if (NOUPPER && D) return;
if (JUMP == ON) return;
if (I == 0) return;
if (LINENUM) return;
if (DELIMITER) return;
if (INVERSE) return;
if (WORDBOUND && D > 0) return;
if (WHOLELINE && D > 0) return;
if (SILENT) return; /* REMINDER: to be removed */
SGREP = ON;
if(m >= 16) DNA = ON;
for(i=0; i<m; i++) {
c = Pattern[i];
if(c == 'a' || c == 'c' || c == 't' || c == 'g' ) ;
else DNA = OFF;
}
return;
}
void output(register CHAR *buffer, int i1, int i2, int j)
{
register CHAR *bp, *outend;
if(i1 > i2) return;
num_of_matched++;
if(COUNT) return;
if(SILENT) return;
if(OUTTAIL) {
i1 = i1 + D_length;
i2 = i2 + D_length;
}
if(DELIMITER) j = j+1;
if(FIRSTOUTPUT) {
if (buffer[i1] == '\n') {
i1++;
EATFIRST = ON;
}
FIRSTOUTPUT = 0;
}
if(TRUNCATE) {
fprintf(stderr, "WARNING!!! some lines have been truncated in output record #%d\n", num_of_matched-1);
}
while(buffer[i1] == '\n' && i1 <= i2) {
printf("\n");
i1++;
}
if(FNAME == ON) printf("%s: ", CurrentFileName);
if(LINENUM) printf("%d: ", j-1);
bp = buffer + i1;
outend = buffer + i2;
while(bp <= outend) putchar(*bp++);
}
/* end of main.c */