blob: 5dde3a15e9c369899d4f900a45a6efd29638690b [file] [log] [blame]
/* Copyright (c) 1991 Sun Wu and Udi Manber. All Rights Reserved. */
/* multipattern matcher */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <ctype.h>
#define MAXPAT 256
#define MAXLINE 1024
#define MAXSYM 256
#define MAXMEMBER1 4096
#define MAXPATFILE 260000
#define BLOCKSIZE 8192
#define MAXHASH 8192
#define mm 8191
#define max_num 30000
#define W_DELIM 128
#define L_DELIM 10
extern COUNT, FNAME, SILENT, FILENAMEONLY, num_of_matched;
extern INVERSE;
extern WORDBOUND, WHOLELINE, NOUPPER;
extern unsigned char CurrentFileName[], Progname[];
extern total_line;
int LONG = 0;
int SHORT = 0;
int p_size=0;
unsigned char SHIFT1[MAXMEMBER1];
unsigned char tr[MAXSYM];
unsigned char tr1[MAXSYM];
struct pat_list {
int index;
struct pat_list *next;
} *HASH[MAXHASH];
struct pat_list *pt, *qt;
unsigned char buf[MAXPATFILE+BLOCKSIZE];
unsigned char pat_spool[MAXPATFILE+2*max_num+MAXPAT];
unsigned char *patt[max_num];
unsigned char pat_len[max_num];
void countline(unsigned char *text, int len)
{
int i;
for (i=0; i<len; i++) if(text[i] == '\n') total_line++;
}
void m_short( register unsigned char *text, int start, int end )
{
register unsigned char *textend;
register int j;
register struct pat_list *p;
register int pat_index;
int MATCHED=0;
int OUT=0;
unsigned char *lastout;
unsigned char *qx;
textend = text + end;
lastout = text + start + 1;
text = text + start - 1 ;
while (++text <= textend) {
p = HASH[*text];
while(p != 0) {
pat_index = p->index;
p = p->next;
qx = text;
j = 0;
while(tr[patt[pat_index][j]] == tr[*(qx++)]) j++;
if(pat_len[pat_index] <= j) {
if(text >= textend) return;
num_of_matched++;
if(FILENAMEONLY || SILENT) return;
if(COUNT) {
while (*text != '\n') text++;
}
else {
if(FNAME) printf("%s: ",CurrentFileName);
if(!INVERSE) {
while(*(--text) != '\n');
while(*(++text) != '\n') putchar(*text);
printf("\n");
MATCHED = 1;
}
else {
while(*(--text) != '\n');
if(lastout < text) OUT=1;
while(lastout < text) putchar(*lastout++);
if(OUT) {
putchar('\n');
OUT=0;
}
while(*(++text) != '\n');
lastout=text+1;
MATCHED = 1;
}
}
}
if(MATCHED) break;
}
MATCHED = 0;
} /* while */
if(INVERSE && !COUNT) while(lastout <= textend) putchar(*lastout++);
}
void f_prep(int pat_index, unsigned char *Pattern)
{
int i, m;
register unsigned hash, Mask=15;
m = p_size;
for (i = m-1; i>=(1+LONG); i--) {
hash = (Pattern[i] & Mask);
hash = (hash << 4) + (Pattern[i-1]& Mask);
if(LONG) hash = (hash << 4) + (Pattern[i-2] & Mask);
if(SHIFT1[hash] >= m-1-i) SHIFT1[hash] = m-1-i;
}
if(SHORT) Mask = 255; /* 011111111 */
hash = 0;
for(i = m-1; i>=0; i--) {
hash = (hash << 4) + (tr[Pattern[i]]&Mask);
}
/*
if(INVERSE) hash = Pattern[1];
*/
hash=hash&mm;
qt = (struct pat_list *) malloc(sizeof(struct pat_list));
qt->index = pat_index;
pt = HASH[hash];
qt->next = pt;
HASH[hash] = qt;
}
void prepf(int fp)
{
int length=0, i, p=1, num_pat;
unsigned char *pat_ptr=pat_spool;
unsigned Mask = 15;
int num_read;
while((num_read = read(fp, buf+length, BLOCKSIZE)) > 0) {
length = length + num_read;
if(length > MAXPATFILE) {
fprintf(stderr, "%s: maximum pattern file size is %d\n", Progname, MAXPATFILE);
exit(2);
}
}
buf[length] = '\n';
i = 0; p=1;
while(i<length) {
patt[p] = pat_ptr;
if(WORDBOUND) *pat_ptr++ = W_DELIM;
if(WHOLELINE) *pat_ptr++ = L_DELIM;
while((*pat_ptr = buf[i++]) != '\n') pat_ptr++;
if(WORDBOUND) *pat_ptr++ = W_DELIM;
if(WHOLELINE) *pat_ptr++ = L_DELIM; /* Can't be both on */
*pat_ptr++ = 0;
p++;
}
if(p>max_num) {
fprintf(stderr, "%s: maximum number of patterns is %d\n", Progname, max_num);
exit(2);
}
for(i=1; i<20; i++) *pat_ptr = i; /* boundary safety zone */
for(i=0; i< MAXSYM; i++) tr[i] = i;
if(NOUPPER) {
for(i='A'; i<= 'Z'; i++) tr[i] = i + 'a' - 'A';
}
if(WORDBOUND) {
for(i=0; i<128; i++) if(!isalnum(i)) tr[i] = W_DELIM;
}
for(i=0; i< MAXSYM; i++) tr1[i] = tr[i]&Mask;
num_pat = p-1;
p_size = MAXPAT;
for(i=1 ; i <= num_pat; i++) {
p = strlen(patt[i]);
pat_len[i] = p;
if(p!=0 && p < p_size) p_size = p;
}
if(p_size == 0) {
fprintf(stderr, "the pattern file is empty\n");
exit(2);
}
if(length > 400 && p_size > 2) LONG = 1;
if(p_size == 1) SHORT = 1;
for(i=0; i<MAXMEMBER1; i++) SHIFT1[i] = p_size - 2;
for(i=0; i<MAXHASH; i++) {
HASH[i] = 0;
}
for(i=1; i<= num_pat; i++) f_prep(i, patt[i]);
}
void monkey1( register unsigned char *text, int start, int end )
{
register unsigned char *textend;
register unsigned hash, i;
register unsigned char shift;
register int m1, j, Long=LONG;
int pat_index, m=p_size;
int MATCHED=0;
register unsigned char *qx;
register struct pat_list *p;
unsigned char *lastout;
int OUT=0;
textend = text + end;
m1 = m - 1;
lastout = text+start+1;
text = text + start + m1 ;
while (text <= textend) {
hash=tr1[*text];
hash=(hash<<4)+(tr1[*(text-1)]);
if(Long) hash=(hash<<4)+(tr1[*(text-2)]);
shift = SHIFT1[hash];
if(shift == 0) {
hash=0;
for(i=0;i<=m1;i++) {
hash=(hash<<4)+(tr1[*(text-i)]);
}
hash=hash&mm;
p = HASH[hash];
while(p != 0) {
pat_index = p->index;
p = p->next;
qx = text-m1;
j = 0;
while(tr[patt[pat_index][j]] == tr[*(qx++)]) j++;
if (j > m1 ) {
if(pat_len[pat_index] <= j) {
if(text > textend) return;
num_of_matched++;
if(FILENAMEONLY || SILENT) return;
MATCHED=1;
if(COUNT) {
while (*text != '\n') text++;
}
else {
if(!INVERSE) {
if(FNAME) printf("%s: ",CurrentFileName);
while(*(--text) != '\n');
while(*(++text) != '\n') putchar(*text);
printf("\n");
}
else {
if(FNAME) printf("%s: ",CurrentFileName);
while(*(--text) != '\n');
if(lastout < text) OUT=1;
while(lastout < text) putchar(*lastout++);
if(OUT) {
putchar('\n');
OUT=0;
}
while(*(++text) != '\n');
lastout=text+1;
}
}
/*
else {
if(FNAME) printf("%s: ",CurrentFileName);
while(*(--text) != '\n');
while(*(++text) != '\n') putchar(*text);
printf("\n");
}
*/
}
}
if(MATCHED) break;
}
if(!MATCHED) shift = 1;
else {
MATCHED = 0;
shift = m1;
}
}
text = text + shift;
}
if(INVERSE && !COUNT) while(lastout <= textend) putchar(*lastout++);
}
void mgrep(int fd)
{
register char r_newline = '\n';
unsigned char text[2*BLOCKSIZE+MAXLINE];
register int buf_end, num_read, start, end, residue = 0;
text[MAXLINE-1] = '\n'; /* initial case */
start = MAXLINE-1;
while( (num_read = read(fd, text+MAXLINE, BLOCKSIZE)) > 0)
{
if(INVERSE && COUNT) countline(text+MAXLINE, num_read);
buf_end = end = MAXLINE + num_read -1 ;
while(text[end] != r_newline && end > MAXLINE) end--;
residue = buf_end - end + 1 ;
text[start-1] = r_newline;
if(SHORT) m_short(text, start, end);
else monkey1(text, start, end);
if(FILENAMEONLY && num_of_matched) {
printf("%s\n", CurrentFileName);
return;
}
start = MAXLINE - residue;
if(start < 0) {
start = 1;
}
strncpy(text+start, text+end, residue);
} /* end of while(num_read = ... */
text[MAXLINE] = '\n';
text[start-1] = '\n';
if(residue > 1) {
if(SHORT) m_short(text, start, end);
else monkey1(text, start, end);
}
return;
} /* end mgrep */