blob: b46e7fdbed9f37757133f7328fb341de32f2548d [file] [log] [blame]
/* Copyright (c) 1988 Bellcore
** All Rights Reserved
** Permission is granted to copy or use this program, EXCEPT that it
** may not be sold for profit, the copyright notice must be reproduced
** on copies, and credit should be given to Bellcore where it is due.
** BELLCORE MAKES NO WARRANTY AND ACCEPTS NO LIABILITY FOR THIS PROGRAM.
*/
#ifndef lint
static char rcsid[]= "$Header$";
#endif
#include "misc.h"
#include "flagdefs.h"
#include "float.h"
#include "tol.h"
#include "token.h"
#include "line.h"
#include "command.h"
#include "comment.h"
#include "parse.h"
#include <ctype.h>
#define _P_PARSE_CHATTER 1000
static int _P_realline; /* loop counter */
static int _P_fnumb;
static char *_P_nextchr; /* pointer to the next character to parse */
static char *_P_firstchr; /* pointer to the beginning of the line being parsed */
static int _P_next_tol; /* number of floats seen on this line */
static int _P_stringsize; /* count of number of characters that are being
read into a comment or literal */
static int _P_has_content; /* flag to indicate if the line being
parsed has any tokens on it */
static int _P_start; /* first line to parse */
static int _P_lcount; /* number of lines to parse */
static int _P_flags; /* location for global flags */
/*
** by default, "words" can be made up of numbers and letters
** the following code allows for extending the alphabet that can
** be used in words. this is useful for handling languages such
** as C where the underscore character is an allowable character
** in an identifier. If a character (such as underscore) is NOT added
** to the alphabet, the identifier will be broken into 2 or more "words"
** by the parser. as such the two sequences
** one_two
** and
** one _ two
** would look identical to spiff.
*/
#define _P_ALPHALEN 256
static char _P_alpha[_P_ALPHALEN];
static void
_P_alpha_clear()
{
*_P_alpha = '\0';
}
static int
_P_in_alpha(chr)
char chr;
{
#ifndef ATT
extern char *index(const char *s, int c);
#endif
/*
** special case when string terminator
** is handed to us
*/
if ('\0' == chr)
return(0);
#ifdef ATT
return((int) strchr(_P_alpha,chr));
#else
return((int) index(_P_alpha,chr));
#endif
}
void
P_addalpha(ptr)
char *ptr;
{
char buf[Z_LINELEN];
S_wordcpy(buf,ptr); /* copy up to (but not including)
the first whitespace char */
if ((strlen(_P_alpha) + strlen(buf)) >= _P_ALPHALEN)
{
Z_fatal("too many characters added to extended alphabet");
}
(void) strcat(_P_alpha,buf);
}
/*
** put parser in a default state
*/
static char _P_dummyline[2]; /* a place to aim wild pointers */
static void
_P_initparser()
{
_P_dummyline[0] = '\0';
/*
** now reset all the state of each module
*/
C_clear_cmd(); /* disable embedded command key word */
T_clear_tols();
W_clearcoms();
W_clearlits();
_P_alpha_clear(); /* disable extended alphabet */
/*
** and set state as defined by execute-time commands.
*/
C_docmds();
return;
}
static int
_P_needmore()
{
return(*_P_nextchr == '\0');
}
static int
_P_nextline()
{
/*
** if the line that we just finished had
** some content, increment the count
*/
if (_P_has_content)
{
L_incclmax(_P_fnumb);
/*
** if the previous line had a token
** increment the line
*/
if (L_getcount(_P_fnumb,L_gettlmax(_P_fnumb)))
{
L_inctlmax(_P_fnumb);
L_setcount(_P_fnumb,L_gettlmax(_P_fnumb),0);
}
_P_has_content = 0;
}
/*
** reset the number of floats seen on the line
*/
_P_next_tol = 0;
/*
** get another line if there is one available
*/
_P_realline++;
if (_P_realline >= _P_start+_P_lcount)
{
return(1);
}
_P_firstchr = _P_nextchr = L_getrline(_P_fnumb,_P_realline);
/*
** and look for a command
*/
if (C_is_cmd(_P_firstchr))
{
_P_nextchr = _P_dummyline;
_P_has_content = 0;
}
else
{
/*
** we have a real line, so set up the index
*/
L_setclindex(_P_fnumb,L_getclmax(_P_fnumb),_P_realline);
_P_has_content = 1;
}
return(0);
}
/*
** the following three routines (_P_litsnarf, _P_bolsnarf, and _P_comsnarf
** all do roughly the same thing. they scan ahead and collect the
** specified string, move _P_nextchr to the end of the
** comment or literal and return 1 if we run off the end of file,
** 0 otherwise. it would have been nice to have 1 routine handle
** all three task (there is much common code), however there were
** so enough differences, (for instance, only comments check for nesting,
** only literals need to set _P_stringsize, etc)
** that I decided to split them up.
*/
static int
_P_litsnarf(litptr)
W_lit litptr;
{
_P_stringsize = 0;
/*
** skip the start of literal string
*/
_P_nextchr += strlen(W_litbegin(litptr));
_P_stringsize += strlen(W_litbegin(litptr));
/*
** is there a separate end string?
** if not, then we're done
*/
if ('\0' == *(W_litend(litptr)))
{
return(0);
}
/*
** loop once for each character in the literal
*/
while(1)
{
/*
** if we are out of characters, move on to next line
*/
if (_P_needmore())
{
if (_P_nextline())
{
return(1);
}
if (!_P_has_content)
{
/*
** since we've just gotten a command
** check to see if this literal
** is still legit ...
** could have just been reset
** by the command
*/
if (!W_is_lit(litptr))
{
return(0);
}
}
} /* if _P_needmore */
/*
** see if we have an escaped end of literal string
*/
if (('\0' != *(W_litescape(litptr))) && /* escape string exists */
!S_wordcmp(_P_nextchr,
W_litescape(litptr)) && /* and escape matches */
!S_wordcmp(_P_nextchr+strlen(W_litescape(litptr)),
W_litend(litptr))) /* and endstring matches */
{
_P_nextchr += strlen(W_litescape(litptr))
+ strlen(W_litend(litptr));
_P_stringsize += strlen(W_litescape(litptr))
+ strlen(W_litend(litptr));
continue;
}
/*
** see if we have an end of literal string
*/
if (!S_wordcmp(_P_nextchr,W_litend(litptr))) /* escape matches */
{
_P_nextchr += strlen(W_litend(litptr));
_P_stringsize += strlen(W_litend(litptr));
return(0);
}
/*
** this must be yet another character in the literal, so
** just snarf it up
*/
_P_nextchr++;
_P_stringsize++;
} /* while loop once for each character */
#ifndef lint
Z_fatal("shouldn't execute this line at the end of _P_litsnarf");
#endif
} /* _P_litsnarf */
static int
_P_bolsnarf(bolptr)
W_bol bolptr;
{
/*
** skip the start of comment string
*/
_P_nextchr += strlen(W_bolbegin(bolptr));
/*
** is there a separate end string
** if not, then we're done
*/
if ('\0' == *(W_bolend(bolptr)))
{
return(0);
}
/*
** loop once for each character in the comment
*/
while(1)
{
/*
** if we are out of characters,move on to next line
*/
if (_P_needmore())
{
if (_P_nextline())
{
return(1);
}
if (!_P_has_content)
{
/*
** since we've just gotten a command
** check to see if this comment
** is still legit ... comments
** could have just been reset
** by the command
*/
if (!W_is_bol(bolptr))
{
return(0);
}
}
} /* if at end of line */
/*
** see if we have an escaped end of comment string
*/
if ('\0' != *(W_bolescape(bolptr)) && /* escape string exists */
!S_wordcmp(_P_nextchr,
W_bolescape(bolptr)) && /* and escape matches */
!S_wordcmp(_P_nextchr+strlen(W_bolescape(bolptr)),
W_bolend(bolptr))) /* and end string matches */
{
_P_nextchr += strlen(W_bolescape(bolptr))
+ strlen(W_bolend(bolptr));
continue;
}
/*
** see if we have an end of comment string
*/
if (!S_wordcmp(_P_nextchr,W_bolend(bolptr)))
{
_P_nextchr += strlen(W_bolend(bolptr));
return(0);
}
/*
** this must be yet another character in the comment, so
** just snarf it up
*/
_P_nextchr++;
} /* while loop once for each character */
#ifndef lint
Z_fatal("shouldn't execute this line in at end of _P_bolsnarf");
#endif
} /* _P_bolsnarf */
/*
** pass over a comment -- look for nexting
*/
static int
_P_comsnarf(comptr)
W_com comptr;
{
int depth = 1; /* nesting depth */
/*
** skip the start of comment string
*/
_P_nextchr += strlen(W_combegin(comptr));
/*
** is there a separate end string
** if not, then we're done
*/
if ('\0' == *(W_comend(comptr)))
{
return(0);
}
/*
** loop once for each character in the comment
*/
while(1)
{
/*
** if we are out of characters, move on to next line
*/
if (_P_needmore())
{
if (_P_nextline())
{
return(1);
}
if (!_P_has_content)
{
/*
** since we've just gotten a command
** check to see if this comment
** is still legit ... comments
** could have just been reset
** by the command
*/
if (!W_is_com(comptr))
{
return(0);
}
}
} /* if at end of line */
/*
** see if we have an escaped end of comment string
*/
if ('\0' != *(W_comescape(comptr)) && /* escape string exists */
!S_wordcmp(_P_nextchr,
W_comescape(comptr)) && /* and escape matches */
!S_wordcmp(_P_nextchr+strlen(W_comescape(comptr)),
W_comend(comptr))) /* and end string matches */
{
/*
** skip over the escape sequence and the end sequence
*/
_P_nextchr += strlen(W_comescape(comptr))
+ strlen(W_comend(comptr));
continue;
}
/*
** see if we have an end of comment string
*/
if (!S_wordcmp(_P_nextchr,W_comend(comptr))) /* end matches */
{
/*
** skip over the end sequence
*/
_P_nextchr += strlen(W_comend(comptr));
if (W_is_nesting(comptr))
{
depth--;
if (0 == depth)
return(0);
}
else
{
return(0);
}
continue;
}
/*
** see if we have another beginning of comment string
*/
if (W_is_nesting(comptr) &&
!S_wordcmp(_P_nextchr,W_comend(comptr))) /* end matches */
{
_P_nextchr += strlen(W_comend(comptr));
depth++;
continue;
}
/*
** this must be yet another character in the comment, so
** just snarf it up
*/
_P_nextchr++;
} /* while loop once for each character */
#ifndef lint
Z_fatal("should not execute this line in _P_comsnarf\n");
#endif
} /* _P_comsnarf */
/*
** parse a file
*/
static void
_P_do_parse()
{
char *ptr; /* scratch space */
int tmp;
int ret_code;
K_token newtoken;
W_bol bolptr;
W_com comptr;
W_lit litptr;
int startline, endline, startpos;
/*
** main parsing loop
*/
while (1)
{
/*
** get more text if necessary
*/
if (_P_needmore())
{
if (_P_nextline())
{
return;
}
/*
** if the line contains nothing of interest,
** try again
*/
if (!_P_has_content)
{
continue;
}
/*
** check to see if this line starts a comment
*/
if ((bolptr = W_isbol(_P_firstchr)) != W_BOLNULL)
{
if (_P_bolsnarf(bolptr))
{
return;
}
continue;
}
} /* if _P_needmore */
/*
** skip whitespace
*/
if (!(U_INCLUDE_WS & _P_flags) && isspace(*_P_nextchr))
{
_P_nextchr++;
continue;
}
/*
** check to see if this character starts a comment
*/
if ((comptr = W_iscom(_P_nextchr)) != W_COMNULL)
{
if (_P_comsnarf(comptr))
{
return;
}
continue;
}
/*
** if there aren't any tokens on this line already
** set up the index from the token line to the content line
*/
if (!L_getcount(_P_fnumb,L_gettlmax(_P_fnumb)))
{
L_settlindex(_P_fnumb,
L_gettlmax(_P_fnumb),
L_getclmax(_P_fnumb));
/*
** and the pointer from the token line to the
** first token on the line
*/
L_setindex(_P_fnumb,
L_gettlmax(_P_fnumb),
K_gettmax(_P_fnumb));
}
startline = L_tl2cl(_P_fnumb,L_gettlmax(_P_fnumb));
startpos = _P_nextchr-_P_firstchr;
newtoken = K_maketoken();
K_setline(newtoken,L_gettlmax(_P_fnumb));
K_setpos(newtoken,startpos);
ret_code = 0;
/*
** check to see if this character starts a
** delimited literal string
*/
if ((litptr = W_islit(_P_nextchr)) != W_LITNULL)
{
ret_code = _P_litsnarf(litptr);
K_settype(newtoken,K_LIT);
S_allocstr(&ptr,_P_stringsize);
/*
** fixed nasty memory bug here by adding else
** old code copied entire line even if literal
** ended before the end of line
** should check into getting strcpy loaded
** locally
*/
endline = L_getclmax(_P_fnumb);
if (endline > startline)
{
/*
** copy in the first line of the literal
*/
(void) strcpy(ptr,
L_getcline(_P_fnumb,startline)
+startpos);
/*
** now copy all the lines between
** the first and last
*/
for (tmp=startline+1;tmp<endline;tmp++)
{
(void) strcat(ptr,
L_getcline(_P_fnumb,tmp));
}
/*
** and now copy in the last line
*/
(void) strncat(ptr,
L_getcline(_P_fnumb,endline),
_P_stringsize-strlen(ptr));
}
else
{
(void) strncpy(ptr,
L_getcline(_P_fnumb,startline)
+startpos,
_P_stringsize);
/*
** terminate the string you just copied
*/
ptr[_P_stringsize] = '\0';
}
K_settext(newtoken,ptr);
} /* if is_lit */
/*
** see if this is a floating point number
*/
else if ((tmp = F_isfloat(_P_nextchr,
_P_flags & U_NEED_DECIMAL,
_P_flags & U_INC_SIGN)) != 0)
{
K_saventext(newtoken,_P_nextchr,tmp);
K_settype(newtoken,K_FLO_NUM);
if (!(_P_flags & U_BYTE_COMPARE))
{
K_setfloat(newtoken,
F_atof(K_gettext(newtoken),
USE_ALL));
/*
** assign the curent tolerance
*/
K_settol(newtoken,T_gettol(_P_next_tol));
}
/*
** use next tolerance in the
** specification if there is one
*/
if (T_moretols(_P_next_tol))
{
_P_next_tol++;
}
/*
** and move pointer past the float
*/
_P_nextchr += tmp;
}
/*
** is this a fixed point number
*/
else if (isdigit(*_P_nextchr))
{
for(ptr=_P_nextchr; isdigit(*ptr); ptr++)
{
}
K_saventext(newtoken,_P_nextchr,ptr-_P_nextchr);
K_settype(newtoken,K_LIT);
_P_nextchr = ptr;
}
/*
** try an alpha-numeric word
*/
else if (isalpha(*_P_nextchr) || _P_in_alpha(*_P_nextchr))
{
/*
** it's a multi character word
*/
for(ptr = _P_nextchr;
isalpha(*ptr)
|| isdigit(*ptr)
|| _P_in_alpha(*ptr);
ptr++)
{
}
K_saventext(newtoken,_P_nextchr,ptr-_P_nextchr);
K_settype(newtoken,K_LIT);
_P_nextchr = ptr;
}
else
{
/*
** otherwise, treat the char itself as a token
*/
K_saventext(newtoken,_P_nextchr,1);
K_settype(newtoken,K_LIT);
_P_nextchr++;
}
K_settoken(_P_fnumb,K_gettmax(_P_fnumb),newtoken);
L_inccount(_P_fnumb,L_gettlmax(_P_fnumb));
/*
** if we are out of space, complain and quit
*/
if (K_inctmax(_P_fnumb))
{
(void) sprintf(Z_err_buf,
"warning -- to many tokens in file only first %d tokens will be used.\n",
K_MAXTOKENS);
Z_complain(Z_err_buf);
return;
}
#ifndef NOCHATTER
if (0 == (K_gettmax(_P_fnumb) % _P_PARSE_CHATTER))
{
int max = K_gettmax(_P_fnumb);
(void) sprintf(Z_err_buf,
"scanned %d words from file #%d\n",
max,_P_fnumb+1);
Z_chatter(Z_err_buf);
}
#endif
/*
** are we done?
*/
if(ret_code)
{
return;
}
} /* loop once per object on a line */
#ifndef lint
Z_fatal("this line should never execute");
#endif
}
void
P_file_parse(num,strt,lcnt,flags)
int num; /* file number */
int strt; /* first line to parse expressed in real line numbers */
int lcnt; /* max number of lines to parse */
int flags; /* flags for controlling the parse mode */
{
/*
** set module-wide state variables
*/
_P_fnumb = num;
_P_start = strt;
_P_lcount = lcnt;
_P_flags = flags;
_P_initparser();
_P_nextchr = _P_dummyline;
_P_has_content = 0;
_P_next_tol = 0;
L_setcount(_P_fnumb,L_gettlmax(_P_fnumb),0);
/*
** start everything back one line (it will be incremented
** just before the first line is accessed
*/
_P_realline = _P_start-1;
_P_do_parse();
/*
** if the last line had content, increment the count
*/
if (_P_has_content)
{
/*
** this code will get executed if we stopped parsing in the middle
** of a line. i haven't looked at this case carefully.
** so, there is a good chance that it is buggy.
*/
(void) sprintf(Z_err_buf,"parser got confused at end of file\n");
Z_complain(Z_err_buf);
L_incclmax(_P_fnumb);
if (L_getcount(_P_fnumb,L_gettlmax(_P_fnumb)))
L_inctlmax(_P_fnumb);
}
return;
}