| /* Copyright (c) 1988 Bellcore |
| ** All Rights Reserved |
| ** Permission is granted to copy or use this program, EXCEPT that it |
| ** may not be sold for profit, the copyright notice must be reproduced |
| ** on copies, and credit should be given to Bellcore where it is due. |
| ** BELLCORE MAKES NO WARRANTY AND ACCEPTS NO LIABILITY FOR THIS PROGRAM. |
| */ |
| |
| |
| #ifndef lint |
| static char rcsid[]= "$Header$"; |
| #endif |
| |
| #include "misc.h" |
| #include "flagdefs.h" |
| #include "float.h" |
| #include "tol.h" |
| #include "token.h" |
| #include "line.h" |
| #include "command.h" |
| #include "comment.h" |
| #include "parse.h" |
| |
| |
| #include <ctype.h> |
| |
| #define _P_PARSE_CHATTER 1000 |
| |
| |
| static int _P_realline; /* loop counter */ |
| static int _P_fnumb; |
| |
| static char *_P_nextchr; /* pointer to the next character to parse */ |
| static char *_P_firstchr; /* pointer to the beginning of the line being parsed */ |
| static int _P_next_tol; /* number of floats seen on this line */ |
| static int _P_stringsize; /* count of number of characters that are being |
| read into a comment or literal */ |
| static int _P_has_content; /* flag to indicate if the line being |
| parsed has any tokens on it */ |
| static int _P_start; /* first line to parse */ |
| static int _P_lcount; /* number of lines to parse */ |
| |
| static int _P_flags; /* location for global flags */ |
| |
| /* |
| ** by default, "words" can be made up of numbers and letters |
| ** the following code allows for extending the alphabet that can |
| ** be used in words. this is useful for handling languages such |
| ** as C where the underscore character is an allowable character |
| ** in an identifier. If a character (such as underscore) is NOT added |
| ** to the alphabet, the identifier will be broken into 2 or more "words" |
| ** by the parser. as such the two sequences |
| ** one_two |
| ** and |
| ** one _ two |
| ** would look identical to spiff. |
| */ |
| #define _P_ALPHALEN 256 |
| static char _P_alpha[_P_ALPHALEN]; |
| |
| static void |
| _P_alpha_clear() |
| { |
| *_P_alpha = '\0'; |
| } |
| |
| |
| static int |
| _P_in_alpha(chr) |
| char chr; |
| { |
| #ifndef ATT |
| extern char *index(const char *s, int c); |
| #endif |
| /* |
| ** special case when string terminator |
| ** is handed to us |
| */ |
| if ('\0' == chr) |
| return(0); |
| |
| #ifdef ATT |
| return((int) strchr(_P_alpha,chr)); |
| #else |
| return((int) index(_P_alpha,chr)); |
| #endif |
| } |
| |
| void |
| P_addalpha(ptr) |
| char *ptr; |
| { |
| char buf[Z_LINELEN]; |
| |
| S_wordcpy(buf,ptr); /* copy up to (but not including) |
| the first whitespace char */ |
| |
| if ((strlen(_P_alpha) + strlen(buf)) >= _P_ALPHALEN) |
| { |
| Z_fatal("too many characters added to extended alphabet"); |
| } |
| (void) strcat(_P_alpha,buf); |
| } |
| |
| /* |
| ** put parser in a default state |
| */ |
| |
| static char _P_dummyline[2]; /* a place to aim wild pointers */ |
| static void |
| _P_initparser() |
| { |
| _P_dummyline[0] = '\0'; |
| |
| /* |
| ** now reset all the state of each module |
| */ |
| C_clear_cmd(); /* disable embedded command key word */ |
| T_clear_tols(); |
| W_clearcoms(); |
| W_clearlits(); |
| _P_alpha_clear(); /* disable extended alphabet */ |
| |
| /* |
| ** and set state as defined by execute-time commands. |
| */ |
| C_docmds(); |
| return; |
| } |
| |
| |
| static int |
| _P_needmore() |
| { |
| return(*_P_nextchr == '\0'); |
| } |
| |
| static int |
| _P_nextline() |
| { |
| /* |
| ** if the line that we just finished had |
| ** some content, increment the count |
| */ |
| if (_P_has_content) |
| { |
| L_incclmax(_P_fnumb); |
| /* |
| ** if the previous line had a token |
| ** increment the line |
| */ |
| if (L_getcount(_P_fnumb,L_gettlmax(_P_fnumb))) |
| { |
| L_inctlmax(_P_fnumb); |
| L_setcount(_P_fnumb,L_gettlmax(_P_fnumb),0); |
| } |
| _P_has_content = 0; |
| } |
| |
| /* |
| ** reset the number of floats seen on the line |
| */ |
| _P_next_tol = 0; |
| |
| /* |
| ** get another line if there is one available |
| */ |
| _P_realline++; |
| if (_P_realline >= _P_start+_P_lcount) |
| { |
| return(1); |
| } |
| |
| _P_firstchr = _P_nextchr = L_getrline(_P_fnumb,_P_realline); |
| /* |
| ** and look for a command |
| */ |
| if (C_is_cmd(_P_firstchr)) |
| { |
| _P_nextchr = _P_dummyline; |
| _P_has_content = 0; |
| } |
| else |
| { |
| /* |
| ** we have a real line, so set up the index |
| */ |
| L_setclindex(_P_fnumb,L_getclmax(_P_fnumb),_P_realline); |
| _P_has_content = 1; |
| } |
| return(0); |
| } |
| |
| /* |
| ** the following three routines (_P_litsnarf, _P_bolsnarf, and _P_comsnarf |
| ** all do roughly the same thing. they scan ahead and collect the |
| ** specified string, move _P_nextchr to the end of the |
| ** comment or literal and return 1 if we run off the end of file, |
| ** 0 otherwise. it would have been nice to have 1 routine handle |
| ** all three task (there is much common code), however there were |
| ** so enough differences, (for instance, only comments check for nesting, |
| ** only literals need to set _P_stringsize, etc) |
| ** that I decided to split them up. |
| */ |
| static int |
| _P_litsnarf(litptr) |
| W_lit litptr; |
| { |
| _P_stringsize = 0; |
| /* |
| ** skip the start of literal string |
| */ |
| _P_nextchr += strlen(W_litbegin(litptr)); |
| _P_stringsize += strlen(W_litbegin(litptr)); |
| /* |
| ** is there a separate end string? |
| ** if not, then we're done |
| */ |
| if ('\0' == *(W_litend(litptr))) |
| { |
| return(0); |
| } |
| /* |
| ** loop once for each character in the literal |
| */ |
| while(1) |
| { |
| /* |
| ** if we are out of characters, move on to next line |
| */ |
| if (_P_needmore()) |
| { |
| if (_P_nextline()) |
| { |
| return(1); |
| } |
| if (!_P_has_content) |
| { |
| /* |
| ** since we've just gotten a command |
| ** check to see if this literal |
| ** is still legit ... |
| ** could have just been reset |
| ** by the command |
| */ |
| if (!W_is_lit(litptr)) |
| { |
| return(0); |
| } |
| } |
| } /* if _P_needmore */ |
| |
| /* |
| ** see if we have an escaped end of literal string |
| */ |
| if (('\0' != *(W_litescape(litptr))) && /* escape string exists */ |
| !S_wordcmp(_P_nextchr, |
| W_litescape(litptr)) && /* and escape matches */ |
| !S_wordcmp(_P_nextchr+strlen(W_litescape(litptr)), |
| W_litend(litptr))) /* and endstring matches */ |
| { |
| _P_nextchr += strlen(W_litescape(litptr)) |
| + strlen(W_litend(litptr)); |
| _P_stringsize += strlen(W_litescape(litptr)) |
| + strlen(W_litend(litptr)); |
| continue; |
| } |
| |
| /* |
| ** see if we have an end of literal string |
| */ |
| if (!S_wordcmp(_P_nextchr,W_litend(litptr))) /* escape matches */ |
| { |
| _P_nextchr += strlen(W_litend(litptr)); |
| _P_stringsize += strlen(W_litend(litptr)); |
| return(0); |
| } |
| /* |
| ** this must be yet another character in the literal, so |
| ** just snarf it up |
| */ |
| _P_nextchr++; |
| _P_stringsize++; |
| } /* while loop once for each character */ |
| |
| #ifndef lint |
| Z_fatal("shouldn't execute this line at the end of _P_litsnarf"); |
| #endif |
| } /* _P_litsnarf */ |
| |
| static int |
| _P_bolsnarf(bolptr) |
| W_bol bolptr; |
| { |
| /* |
| ** skip the start of comment string |
| */ |
| _P_nextchr += strlen(W_bolbegin(bolptr)); |
| /* |
| ** is there a separate end string |
| ** if not, then we're done |
| */ |
| if ('\0' == *(W_bolend(bolptr))) |
| { |
| return(0); |
| } |
| /* |
| ** loop once for each character in the comment |
| */ |
| while(1) |
| { |
| /* |
| ** if we are out of characters,move on to next line |
| */ |
| if (_P_needmore()) |
| { |
| if (_P_nextline()) |
| { |
| return(1); |
| } |
| if (!_P_has_content) |
| { |
| /* |
| ** since we've just gotten a command |
| ** check to see if this comment |
| ** is still legit ... comments |
| ** could have just been reset |
| ** by the command |
| */ |
| if (!W_is_bol(bolptr)) |
| { |
| return(0); |
| } |
| } |
| } /* if at end of line */ |
| |
| /* |
| ** see if we have an escaped end of comment string |
| */ |
| if ('\0' != *(W_bolescape(bolptr)) && /* escape string exists */ |
| !S_wordcmp(_P_nextchr, |
| W_bolescape(bolptr)) && /* and escape matches */ |
| !S_wordcmp(_P_nextchr+strlen(W_bolescape(bolptr)), |
| W_bolend(bolptr))) /* and end string matches */ |
| { |
| _P_nextchr += strlen(W_bolescape(bolptr)) |
| + strlen(W_bolend(bolptr)); |
| continue; |
| } |
| |
| /* |
| ** see if we have an end of comment string |
| */ |
| if (!S_wordcmp(_P_nextchr,W_bolend(bolptr))) |
| { |
| _P_nextchr += strlen(W_bolend(bolptr)); |
| return(0); |
| } |
| /* |
| ** this must be yet another character in the comment, so |
| ** just snarf it up |
| */ |
| _P_nextchr++; |
| } /* while loop once for each character */ |
| |
| #ifndef lint |
| Z_fatal("shouldn't execute this line in at end of _P_bolsnarf"); |
| #endif |
| } /* _P_bolsnarf */ |
| |
| /* |
| ** pass over a comment -- look for nexting |
| */ |
| static int |
| _P_comsnarf(comptr) |
| W_com comptr; |
| { |
| int depth = 1; /* nesting depth */ |
| /* |
| ** skip the start of comment string |
| */ |
| _P_nextchr += strlen(W_combegin(comptr)); |
| |
| /* |
| ** is there a separate end string |
| ** if not, then we're done |
| */ |
| if ('\0' == *(W_comend(comptr))) |
| { |
| return(0); |
| } |
| /* |
| ** loop once for each character in the comment |
| */ |
| while(1) |
| { |
| /* |
| ** if we are out of characters, move on to next line |
| */ |
| if (_P_needmore()) |
| { |
| if (_P_nextline()) |
| { |
| return(1); |
| } |
| if (!_P_has_content) |
| { |
| /* |
| ** since we've just gotten a command |
| ** check to see if this comment |
| ** is still legit ... comments |
| ** could have just been reset |
| ** by the command |
| */ |
| if (!W_is_com(comptr)) |
| { |
| return(0); |
| } |
| } |
| } /* if at end of line */ |
| |
| /* |
| ** see if we have an escaped end of comment string |
| */ |
| if ('\0' != *(W_comescape(comptr)) && /* escape string exists */ |
| !S_wordcmp(_P_nextchr, |
| W_comescape(comptr)) && /* and escape matches */ |
| !S_wordcmp(_P_nextchr+strlen(W_comescape(comptr)), |
| W_comend(comptr))) /* and end string matches */ |
| { |
| /* |
| ** skip over the escape sequence and the end sequence |
| */ |
| _P_nextchr += strlen(W_comescape(comptr)) |
| + strlen(W_comend(comptr)); |
| continue; |
| } |
| |
| /* |
| ** see if we have an end of comment string |
| */ |
| if (!S_wordcmp(_P_nextchr,W_comend(comptr))) /* end matches */ |
| { |
| /* |
| ** skip over the end sequence |
| */ |
| _P_nextchr += strlen(W_comend(comptr)); |
| if (W_is_nesting(comptr)) |
| { |
| depth--; |
| if (0 == depth) |
| return(0); |
| } |
| else |
| { |
| return(0); |
| } |
| continue; |
| } |
| /* |
| ** see if we have another beginning of comment string |
| */ |
| if (W_is_nesting(comptr) && |
| !S_wordcmp(_P_nextchr,W_comend(comptr))) /* end matches */ |
| { |
| _P_nextchr += strlen(W_comend(comptr)); |
| depth++; |
| continue; |
| } |
| /* |
| ** this must be yet another character in the comment, so |
| ** just snarf it up |
| */ |
| _P_nextchr++; |
| } /* while loop once for each character */ |
| |
| #ifndef lint |
| Z_fatal("should not execute this line in _P_comsnarf\n"); |
| #endif |
| |
| } /* _P_comsnarf */ |
| |
| |
| /* |
| ** parse a file |
| */ |
| static void |
| _P_do_parse() |
| { |
| |
| char *ptr; /* scratch space */ |
| int tmp; |
| int ret_code; |
| |
| K_token newtoken; |
| W_bol bolptr; |
| W_com comptr; |
| W_lit litptr; |
| |
| int startline, endline, startpos; |
| |
| /* |
| ** main parsing loop |
| */ |
| while (1) |
| { |
| /* |
| ** get more text if necessary |
| */ |
| if (_P_needmore()) |
| { |
| if (_P_nextline()) |
| { |
| return; |
| } |
| |
| /* |
| ** if the line contains nothing of interest, |
| ** try again |
| */ |
| if (!_P_has_content) |
| { |
| continue; |
| } |
| |
| /* |
| ** check to see if this line starts a comment |
| */ |
| if ((bolptr = W_isbol(_P_firstchr)) != W_BOLNULL) |
| { |
| if (_P_bolsnarf(bolptr)) |
| { |
| return; |
| } |
| continue; |
| } |
| } /* if _P_needmore */ |
| |
| /* |
| ** skip whitespace |
| */ |
| if (!(U_INCLUDE_WS & _P_flags) && isspace(*_P_nextchr)) |
| { |
| _P_nextchr++; |
| continue; |
| } |
| |
| /* |
| ** check to see if this character starts a comment |
| */ |
| if ((comptr = W_iscom(_P_nextchr)) != W_COMNULL) |
| { |
| if (_P_comsnarf(comptr)) |
| { |
| return; |
| } |
| continue; |
| } |
| |
| /* |
| ** if there aren't any tokens on this line already |
| ** set up the index from the token line to the content line |
| */ |
| if (!L_getcount(_P_fnumb,L_gettlmax(_P_fnumb))) |
| { |
| L_settlindex(_P_fnumb, |
| L_gettlmax(_P_fnumb), |
| L_getclmax(_P_fnumb)); |
| /* |
| ** and the pointer from the token line to the |
| ** first token on the line |
| */ |
| L_setindex(_P_fnumb, |
| L_gettlmax(_P_fnumb), |
| K_gettmax(_P_fnumb)); |
| } |
| |
| startline = L_tl2cl(_P_fnumb,L_gettlmax(_P_fnumb)); |
| startpos = _P_nextchr-_P_firstchr; |
| |
| newtoken = K_maketoken(); |
| K_setline(newtoken,L_gettlmax(_P_fnumb)); |
| K_setpos(newtoken,startpos); |
| |
| ret_code = 0; |
| /* |
| ** check to see if this character starts a |
| ** delimited literal string |
| */ |
| if ((litptr = W_islit(_P_nextchr)) != W_LITNULL) |
| { |
| ret_code = _P_litsnarf(litptr); |
| K_settype(newtoken,K_LIT); |
| S_allocstr(&ptr,_P_stringsize); |
| /* |
| ** fixed nasty memory bug here by adding else |
| ** old code copied entire line even if literal |
| ** ended before the end of line |
| ** should check into getting strcpy loaded |
| ** locally |
| */ |
| endline = L_getclmax(_P_fnumb); |
| if (endline > startline) |
| { |
| /* |
| ** copy in the first line of the literal |
| */ |
| (void) strcpy(ptr, |
| L_getcline(_P_fnumb,startline) |
| +startpos); |
| /* |
| ** now copy all the lines between |
| ** the first and last |
| */ |
| for (tmp=startline+1;tmp<endline;tmp++) |
| { |
| (void) strcat(ptr, |
| L_getcline(_P_fnumb,tmp)); |
| } |
| /* |
| ** and now copy in the last line |
| */ |
| (void) strncat(ptr, |
| L_getcline(_P_fnumb,endline), |
| _P_stringsize-strlen(ptr)); |
| } |
| else |
| { |
| (void) strncpy(ptr, |
| L_getcline(_P_fnumb,startline) |
| +startpos, |
| _P_stringsize); |
| /* |
| ** terminate the string you just copied |
| */ |
| ptr[_P_stringsize] = '\0'; |
| } |
| K_settext(newtoken,ptr); |
| } /* if is_lit */ |
| |
| /* |
| ** see if this is a floating point number |
| */ |
| else if ((tmp = F_isfloat(_P_nextchr, |
| _P_flags & U_NEED_DECIMAL, |
| _P_flags & U_INC_SIGN)) != 0) |
| { |
| K_saventext(newtoken,_P_nextchr,tmp); |
| K_settype(newtoken,K_FLO_NUM); |
| if (!(_P_flags & U_BYTE_COMPARE)) |
| { |
| K_setfloat(newtoken, |
| F_atof(K_gettext(newtoken), |
| USE_ALL)); |
| |
| /* |
| ** assign the curent tolerance |
| */ |
| K_settol(newtoken,T_gettol(_P_next_tol)); |
| } |
| |
| /* |
| ** use next tolerance in the |
| ** specification if there is one |
| */ |
| if (T_moretols(_P_next_tol)) |
| { |
| _P_next_tol++; |
| } |
| /* |
| ** and move pointer past the float |
| */ |
| _P_nextchr += tmp; |
| } |
| |
| /* |
| ** is this a fixed point number |
| */ |
| else if (isdigit(*_P_nextchr)) |
| { |
| for(ptr=_P_nextchr; isdigit(*ptr); ptr++) |
| { |
| } |
| K_saventext(newtoken,_P_nextchr,ptr-_P_nextchr); |
| K_settype(newtoken,K_LIT); |
| _P_nextchr = ptr; |
| } |
| |
| /* |
| ** try an alpha-numeric word |
| */ |
| else if (isalpha(*_P_nextchr) || _P_in_alpha(*_P_nextchr)) |
| { |
| /* |
| ** it's a multi character word |
| */ |
| for(ptr = _P_nextchr; |
| isalpha(*ptr) |
| || isdigit(*ptr) |
| || _P_in_alpha(*ptr); |
| ptr++) |
| { |
| } |
| K_saventext(newtoken,_P_nextchr,ptr-_P_nextchr); |
| K_settype(newtoken,K_LIT); |
| _P_nextchr = ptr; |
| } |
| else |
| { |
| /* |
| ** otherwise, treat the char itself as a token |
| */ |
| K_saventext(newtoken,_P_nextchr,1); |
| K_settype(newtoken,K_LIT); |
| _P_nextchr++; |
| } |
| |
| K_settoken(_P_fnumb,K_gettmax(_P_fnumb),newtoken); |
| L_inccount(_P_fnumb,L_gettlmax(_P_fnumb)); |
| /* |
| ** if we are out of space, complain and quit |
| */ |
| if (K_inctmax(_P_fnumb)) |
| { |
| (void) sprintf(Z_err_buf, |
| "warning -- to many tokens in file only first %d tokens will be used.\n", |
| K_MAXTOKENS); |
| Z_complain(Z_err_buf); |
| return; |
| } |
| #ifndef NOCHATTER |
| if (0 == (K_gettmax(_P_fnumb) % _P_PARSE_CHATTER)) |
| { |
| int max = K_gettmax(_P_fnumb); |
| (void) sprintf(Z_err_buf, |
| "scanned %d words from file #%d\n", |
| max,_P_fnumb+1); |
| Z_chatter(Z_err_buf); |
| } |
| #endif |
| |
| /* |
| ** are we done? |
| */ |
| if(ret_code) |
| { |
| return; |
| } |
| } /* loop once per object on a line */ |
| |
| #ifndef lint |
| Z_fatal("this line should never execute"); |
| #endif |
| } |
| |
| void |
| P_file_parse(num,strt,lcnt,flags) |
| int num; /* file number */ |
| int strt; /* first line to parse expressed in real line numbers */ |
| int lcnt; /* max number of lines to parse */ |
| int flags; /* flags for controlling the parse mode */ |
| { |
| /* |
| ** set module-wide state variables |
| */ |
| _P_fnumb = num; |
| _P_start = strt; |
| _P_lcount = lcnt; |
| _P_flags = flags; |
| |
| _P_initparser(); |
| |
| _P_nextchr = _P_dummyline; |
| |
| _P_has_content = 0; |
| _P_next_tol = 0; |
| L_setcount(_P_fnumb,L_gettlmax(_P_fnumb),0); |
| /* |
| ** start everything back one line (it will be incremented |
| ** just before the first line is accessed |
| */ |
| _P_realline = _P_start-1; |
| |
| _P_do_parse(); |
| |
| /* |
| ** if the last line had content, increment the count |
| */ |
| if (_P_has_content) |
| { |
| /* |
| ** this code will get executed if we stopped parsing in the middle |
| ** of a line. i haven't looked at this case carefully. |
| ** so, there is a good chance that it is buggy. |
| */ |
| (void) sprintf(Z_err_buf,"parser got confused at end of file\n"); |
| Z_complain(Z_err_buf); |
| L_incclmax(_P_fnumb); |
| if (L_getcount(_P_fnumb,L_gettlmax(_P_fnumb))) |
| L_inctlmax(_P_fnumb); |
| } |
| return; |
| } |