| #ifndef lint |
| static char Rcs_Id[] = |
| "$Id$"; |
| #endif |
| |
| /* |
| * Copyright 1988, 1989, 1992, 1993, Geoff Kuenning, Granada Hills, CA |
| * All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * 3. All modifications to the source code must be clearly marked as |
| * such. Binary redistributions based on modified source code |
| * must be clearly marked as modified versions in the documentation |
| * and/or other materials provided with the distribution. |
| * 4. All advertising materials mentioning features or use of this software |
| * must display the following acknowledgment: |
| * This product includes software developed by Geoff Kuenning and |
| * other unpaid contributors. |
| * 5. The name of Geoff Kuenning may not be used to endorse or promote |
| * products derived from this software without specific prior |
| * written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND |
| * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| * ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE |
| * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| * SUCH DAMAGE. |
| */ |
| |
| /* |
| * $Log$ |
| * Revision 1.1 2007/01/09 23:57:18 lattner |
| * initial recheckin of mibench |
| * |
| * Revision 1.1.1.1 2007/01/09 02:59:03 evancheng |
| * Add selected tests from MiBench 1.0 to LLVM test suite. |
| * |
| * Revision 1.45 1994/12/27 23:08:52 geoff |
| * Add code to makedent to reject words that contain non-word characters. |
| * This helps protect people who use ISO 8-bit characters when ispell |
| * isn't configured for that option. |
| * |
| * Revision 1.44 1994/10/25 05:46:20 geoff |
| * Fix some incorrect declarations in the lint versions of some routines. |
| * |
| * Revision 1.43 1994/09/16 03:32:34 geoff |
| * Issue an error message for bad affix flags |
| * |
| * Revision 1.42 1994/02/07 04:23:43 geoff |
| * Correctly identify the deformatter when changing file types |
| * |
| * Revision 1.41 1994/01/25 07:11:55 geoff |
| * Get rid of all old RCS log lines in preparation for the 3.1 release. |
| * |
| */ |
| |
| #include "config.h" |
| #include "ispell.h" |
| #include "proto.h" |
| #include "msgs.h" |
| |
| int makedent P ((char * lbuf, int lbuflen, struct dent * ent)); |
| #ifndef NO_CAPITALIZATION_SUPPORT |
| long whatcap P ((ichar_t * word)); |
| #endif |
| int addvheader P ((struct dent * ent)); |
| int combinecaps P ((struct dent * hdr, struct dent * newent)); |
| #ifndef NO_CAPITALIZATION_SUPPORT |
| static void forcevheader P ((struct dent * hdrp, struct dent * oldp, |
| struct dent * newp)); |
| #endif /* NO_CAPITALIZATION_SUPPORT */ |
| static int combine_two_entries P ((struct dent * hdrp, |
| struct dent * oldp, struct dent * newp)); |
| static int acoversb P ((struct dent * enta, struct dent * entb)); |
| void upcase P ((ichar_t * string)); |
| void lowcase P ((ichar_t * string)); |
| void chupcase P ((char * s)); |
| static int issubset P ((struct dent * ent1, struct dent * ent2)); |
| static void combineaffixes P ((struct dent * ent1, struct dent * ent2)); |
| void toutent P ((FILE * outfile, struct dent * hent, |
| int onlykeep)); |
| static void toutword P ((FILE * outfile, char * word, |
| struct dent * cent)); |
| static void flagout P ((FILE * outfile, int flag)); |
| int stringcharlen P ((char * bufp, int canonical)); |
| int strtoichar P ((ichar_t * out, char * in, int outlen, |
| int canonical)); |
| int ichartostr P ((char * out, ichar_t * in, int outlen, |
| int canonical)); |
| ichar_t * strtosichar P ((char * in, int canonical)); |
| char * ichartosstr P ((ichar_t * in, int canonical)); |
| char * printichar P ((int in)); |
| #ifndef ICHAR_IS_CHAR |
| ichar_t * icharcpy P ((ichar_t * out, ichar_t * in)); |
| int icharlen P ((ichar_t * str)); |
| int icharcmp P ((ichar_t * s1, ichar_t * s2)); |
| int icharncmp P ((ichar_t * s1, ichar_t * s2, int n)); |
| #endif /* ICHAR_IS_CHAR */ |
| int findfiletype P ((char * name, int searchnames, |
| int * deformatter)); |
| |
| static int has_marker; |
| |
| /* |
| * Fill in a directory entry, including setting the capitalization flags, and |
| * allocate and initialize memory for the d->word field. Returns -1 |
| * if there was trouble. The input word must be in canonical form. |
| */ |
| |
| int makedent (lbuf, lbuflen, d) |
| char * lbuf; |
| int lbuflen; |
| struct dent * d; |
| { |
| ichar_t ibuf[INPUTWORDLEN + MAXAFFIXLEN]; |
| ichar_t * ip; |
| char * p; |
| int bit; |
| int len; |
| |
| /* Strip off any trailing newline */ |
| len = strlen (lbuf) - 1; |
| if (lbuf[len] == '\n') |
| lbuf[len] = '\0'; |
| |
| d->next = NULL; |
| /* WARNING: flagfield might be the same as mask! See ispell.h. */ |
| d->flagfield = 0; |
| (void) bzero ((char *) d->mask, sizeof (d->mask)); |
| d->flagfield |= USED; |
| d->flagfield &= ~KEEP; |
| |
| p = index (lbuf, hashheader.flagmarker); |
| if (p != NULL) |
| *p = 0; |
| |
| /* |
| ** Convert the word to an ichar_t and back; this makes sure that |
| ** it is in canonical form and thus that the length is correct. |
| */ |
| if (strtoichar (ibuf, lbuf, INPUTWORDLEN * sizeof (ichar_t), 1) |
| || ichartostr (lbuf, ibuf, lbuflen, 1)) |
| { |
| (void) fprintf (stderr, WORD_TOO_LONG (lbuf)); |
| return (-1); |
| } |
| /* |
| ** Make sure the word is well-formed (contains only legal characters). |
| */ |
| for (ip = ibuf; *ip != 0; ip++) |
| { |
| if (!iswordch (*ip)) |
| { |
| /* Boundary characters are legal as long as they're not at edges */ |
| if (!isboundarych (*ip) |
| || ip == ibuf || ip[1] == 0) |
| { |
| (void) fprintf (stderr, MAKEDENT_C_BAD_WORD_CHAR, lbuf); |
| return -1; |
| } |
| } |
| } |
| len = strlen (lbuf); |
| #ifndef NO_CAPITALIZATION_SUPPORT |
| /* |
| ** Figure out the capitalization rules from the capitalization of |
| ** the sample entry. |
| */ |
| d->flagfield |= whatcap (ibuf); |
| #endif |
| |
| if (len > INPUTWORDLEN - 1) |
| { |
| (void) fprintf (stderr, WORD_TOO_LONG (lbuf)); |
| return (-1); |
| } |
| |
| d->word = mymalloc ((unsigned) len + 1); |
| if (d->word == NULL) |
| { |
| (void) fprintf (stderr, MAKEDENT_C_NO_WORD_SPACE, lbuf); |
| return -1; |
| } |
| |
| (void) strcpy (d->word, lbuf); |
| #ifdef NO_CAPITALIZATION_SUPPORT |
| chupcase (d->word); |
| #else /* NO_CAPITALIZATION_SUPPORT */ |
| if (captype (d->flagfield) != FOLLOWCASE) |
| chupcase (d->word); |
| #endif /* NO_CAPITALIZATION_SUPPORT */ |
| if (p == NULL) |
| return (0); |
| |
| p++; |
| while (*p != '\0' && *p != '\n') |
| { |
| bit = CHARTOBIT ((unsigned char) *p); |
| if (bit >= 0 && bit <= LARGESTFLAG) |
| SETMASKBIT (d->mask, bit); |
| else |
| (void) fprintf (stderr, BAD_FLAG, (unsigned char) *p); |
| p++; |
| if (*p == hashheader.flagmarker) |
| p++; /* Handle old-format dictionaries too */ |
| } |
| return (0); |
| } |
| |
| #ifndef NO_CAPITALIZATION_SUPPORT |
| /* |
| ** Classify the capitalization of a sample entry. Returns one of the |
| ** four capitalization codes ANYCASE, ALLCAPS, CAPITALIZED, or FOLLOWCASE. |
| */ |
| |
| long whatcap (word) |
| register ichar_t * word; |
| { |
| register ichar_t * p; |
| |
| for (p = word; *p; p++) |
| { |
| if (mylower (*p)) |
| break; |
| } |
| if (*p == '\0') |
| return ALLCAPS; |
| else |
| { |
| for ( ; *p; p++) |
| { |
| if (myupper (*p)) |
| break; |
| } |
| if (*p == '\0') |
| { |
| /* |
| ** No uppercase letters follow the lowercase ones. |
| ** If there is more than one uppercase letter, it's |
| ** "followcase". If only the first one is capitalized, |
| ** it's "capitalize". If there are no capitals |
| ** at all, it's ANYCASE. |
| */ |
| if (myupper (word[0])) |
| { |
| for (p = word + 1; *p != '\0'; p++) |
| { |
| if (myupper (*p)) |
| return FOLLOWCASE; |
| } |
| return CAPITALIZED; |
| } |
| else |
| return ANYCASE; |
| } |
| else |
| return FOLLOWCASE; /* .../lower/upper */ |
| } |
| } |
| |
| /* |
| ** Add a variant-capitalization header to a word. This routine may be |
| ** called even for a followcase word that doesn't yet have a header. |
| ** |
| ** Returns 0 if all was ok, -1 if allocation error. |
| */ |
| int addvheader (dp) |
| register struct dent * dp; /* Entry to update */ |
| { |
| register struct dent * tdent; /* Copy of entry */ |
| |
| /* |
| ** Add a second entry with the correct capitalization, and then make |
| ** dp into a special dummy entry. |
| */ |
| tdent = (struct dent *) mymalloc (sizeof (struct dent)); |
| if (tdent == NULL) |
| { |
| (void) fprintf (stderr, MAKEDENT_C_NO_WORD_SPACE, dp->word); |
| return -1; |
| } |
| *tdent = *dp; |
| if (captype (tdent->flagfield) != FOLLOWCASE) |
| tdent->word = NULL; |
| else |
| { |
| /* Followcase words need a copy of the capitalization */ |
| tdent->word = mymalloc ((unsigned int) strlen (tdent->word) + 1); |
| if (tdent->word == NULL) |
| { |
| (void) fprintf (stderr, MAKEDENT_C_NO_WORD_SPACE, dp->word); |
| myfree ((char *) tdent); |
| return -1; |
| } |
| (void) strcpy (tdent->word, dp->word); |
| } |
| chupcase (dp->word); |
| dp->next = tdent; |
| dp->flagfield &= ~CAPTYPEMASK; |
| dp->flagfield |= (ALLCAPS | MOREVARIANTS); |
| return 0; |
| } |
| #endif /* NO_CAPITALIZATION_SUPPORT */ |
| |
| /* |
| ** Combine and resolve the entries describing two capitalizations of the same |
| ** word. This may require allocating yet more entries. |
| ** |
| ** Hdrp is a pointer into a hash table. If the word covered by hdrp has |
| ** variations, hdrp must point to the header. Newp is a pointer to temporary |
| ** storage, and space is malloc'ed if newp is to be kept. The newp->word |
| ** field must have been allocated with mymalloc, so that this routine may free |
| ** the space if it keeps newp but not the word. |
| ** |
| ** Return value: 0 if the word was added, 1 if the word was combined |
| ** with an existing entry, and -1 if trouble occurred (e.g., malloc). |
| ** If 1 is returned, newp->word may have been be freed using myfree. |
| ** |
| ** Life is made much more difficult by the KEEP flag's possibilities. We |
| ** must ensure that a !KEEP word doesn't find its way into the personal |
| ** dictionary as a result of this routine's actions. However, a !KEEP |
| ** word that has affixes must have come from the main dictionary, so it |
| ** is acceptable to combine entries in that case (got that?). |
| ** |
| ** The net result of all this is a set of rules that is a bloody pain |
| ** to figure out. Basically, we want to choose one of the following actions: |
| ** |
| ** (1) Add newp's affixes and KEEP flag to oldp, and discard newp. |
| ** (2) Add oldp's affixes and KEEP flag to newp, replace oldp with |
| ** newp, and discard newp. |
| #ifndef NO_CAPITALIZATION_SUPPORT |
| ** (3) Insert newp as a new entry in the variants list. If there is |
| ** currently no variant header, this requires adding one. Adding a |
| ** header splits into two sub-cases: |
| ** |
| ** (3a) If oldp is ALLCAPS and the KEEP flags match, just turn it |
| ** into the header. |
| ** (3b) Otherwise, add a new entry to serve as the header. |
| ** To ease list linking, this is done by copying oldp into |
| ** the new entry, and then performing (3a). |
| ** |
| ** After newp has been added as a variant, its affixes and KEEP |
| ** flag are OR-ed into the variant header. |
| #endif |
| ** |
| ** So how to choose which? The default is always case (3), which adds newp |
| ** as a new entry in the variants list. Cases (1) and (2) are symmetrical |
| ** except for which entry is discarded. We can use case (1) or (2) whenever |
| ** one entry "covers" the other. "Covering" is defined as follows: |
| ** |
| ** (4) For entries with matching capitalization types, A covers B |
| ** if: |
| ** |
| ** (4a) B's affix flags are a subset of A's, or the KEEP flags |
| ** match, and |
| ** (4b) either the KEEP flags match, or A's KEEP flag is set. |
| ** (Since A has more suffixes, combining B with it won't |
| ** cause any extra suffixes to be added to the dictionary.) |
| ** (4c) If the words are FOLLOWCASE, the capitalizations match |
| ** exactly. |
| ** |
| #ifndef NO_CAPITALIZATION_SUPPORT |
| ** (5) For entries with mismatched capitalization types, A covers B |
| ** if (4a) and (4b) are true, and: |
| ** |
| ** (5a) B is ALLCAPS, or |
| ** (5b) A is ANYCASE, and B is CAPITALIZED. |
| #endif |
| ** |
| ** For any "hdrp" without variants, oldp is the same as hdrp. Otherwise, |
| ** the above tests are applied using each variant in turn for oldp. |
| */ |
| int combinecaps (hdrp, newp) |
| struct dent * hdrp; /* Header of entry currently in dictionary */ |
| register struct dent * |
| newp; /* Entry to add */ |
| { |
| register struct dent * |
| oldp; /* Current "oldp" entry */ |
| #ifndef NO_CAPITALIZATION_SUPPORT |
| register struct dent * |
| tdent; /* Entry we'll add to the dictionary */ |
| #endif /* NO_CAPITALIZATION_SUPPORT */ |
| register int retval = 0; /* Return value from combine_two_entries */ |
| |
| /* |
| ** First, see if we can combine the two entries (cases 1 and 2). If |
| ** combine_two_entries does so, it will return 1. If it has trouble, |
| ** it will return zero. |
| */ |
| oldp = hdrp; |
| #ifdef NO_CAPITALIZATION_SUPPORT |
| retval = combine_two_entries (hdrp, oldp, newp); |
| #else /* NO_CAPITALIZATION_SUPPORT */ |
| if ((oldp->flagfield & (CAPTYPEMASK | MOREVARIANTS)) |
| == (ALLCAPS | MOREVARIANTS)) |
| { |
| while (oldp->flagfield & MOREVARIANTS) |
| { |
| oldp = oldp->next; |
| retval = combine_two_entries (hdrp, oldp, newp); |
| if (retval != 0) /* Did we combine them? */ |
| break; |
| } |
| } |
| else |
| retval = combine_two_entries (hdrp, oldp, newp); |
| if (retval == 0) |
| { |
| /* |
| ** Couldn't combine the two entries. Add a new variant. For |
| ** ease, we'll stick it right behind the header, rather than |
| ** at the end of the list. |
| */ |
| forcevheader (hdrp, oldp, newp); |
| tdent = (struct dent *) mymalloc (sizeof (struct dent)); |
| if (tdent == NULL) |
| { |
| (void) fprintf (stderr, MAKEDENT_C_NO_WORD_SPACE, newp->word); |
| return -1; |
| } |
| *tdent = *newp; |
| tdent->next = hdrp->next; |
| hdrp->next = tdent; |
| tdent->flagfield |= (hdrp->flagfield & MOREVARIANTS); |
| hdrp->flagfield |= MOREVARIANTS; |
| combineaffixes (hdrp, newp); |
| hdrp->flagfield |= (newp->flagfield & KEEP); |
| if (captype (newp->flagfield) == FOLLOWCASE) |
| tdent->word = newp->word; |
| else |
| { |
| tdent->word = NULL; |
| myfree (newp->word); /* newp->word isn't needed */ |
| } |
| } |
| #endif /* NO_CAPITALIZATION_SUPPORT */ |
| return retval; |
| } |
| |
| #ifndef NO_CAPITALIZATION_SUPPORT |
| /* |
| ** The following routine implements steps 3a and 3b in the commentary |
| ** for "combinecaps". |
| */ |
| static void forcevheader (hdrp, oldp, newp) |
| register struct dent * hdrp; |
| struct dent * oldp; |
| struct dent * newp; |
| { |
| |
| if ((hdrp->flagfield & (CAPTYPEMASK | MOREVARIANTS)) == ALLCAPS |
| && ((oldp->flagfield ^ newp->flagfield) & KEEP) == 0) |
| return; /* Caller will set MOREVARIANTS */ |
| else if ((hdrp->flagfield & (CAPTYPEMASK | MOREVARIANTS)) |
| != (ALLCAPS | MOREVARIANTS)) |
| (void) addvheader (hdrp); |
| } |
| #endif /* NO_CAPITALIZATION_SUPPORT */ |
| |
| /* |
| ** This routine implements steps 4 and 5 of the commentary for "combinecaps". |
| ** |
| ** Returns 1 if newp can be discarded, 0 if nothing done. |
| */ |
| static int combine_two_entries (hdrp, oldp, newp) |
| struct dent * hdrp; /* (Possible) header of variant chain */ |
| register struct dent * |
| oldp; /* Pre-existing dictionary entry */ |
| register struct dent * |
| newp; /* Entry to possibly combine */ |
| { |
| |
| if (acoversb (oldp, newp)) |
| { |
| /* newp is superfluous. Drop it, preserving affixes and keep flag */ |
| combineaffixes (oldp, newp); |
| oldp->flagfield |= (newp->flagfield & KEEP); |
| hdrp->flagfield |= (newp->flagfield & KEEP); |
| myfree (newp->word); |
| return 1; |
| } |
| else if (acoversb (newp, oldp)) |
| { |
| /* |
| ** oldp is superfluous. Replace it with newp, preserving affixes and |
| ** the keep flag. |
| */ |
| combineaffixes (newp, oldp); |
| #ifdef NO_CAPITALIZATION_SUPPORT |
| newp->flagfield |= (oldp->flagfield & KEEP); |
| #else /* NO_CAPITALIZATION_SUPPORT */ |
| newp->flagfield |= (oldp->flagfield & (KEEP | MOREVARIANTS)); |
| #endif /* NO_CAPITALIZATION_SUPPORT */ |
| hdrp->flagfield |= (newp->flagfield & KEEP); |
| newp->next = oldp->next; |
| /* |
| ** We really want to free oldp->word, but that might be part of |
| ** "hashstrings". So we'll futz around to arrange things so we can |
| ** free newp->word instead. This depends very much on the fact |
| ** that both words are the same length. |
| */ |
| if (oldp->word != NULL) |
| (void) strcpy (oldp->word, newp->word); |
| myfree (newp->word); /* No longer needed */ |
| newp->word = oldp->word; |
| *oldp = *newp; |
| #ifndef NO_CAPITALIZATION_SUPPORT |
| /* We may need to add a header if newp is followcase */ |
| if (captype (newp->flagfield) == FOLLOWCASE |
| && (hdrp->flagfield & (CAPTYPEMASK | MOREVARIANTS)) |
| != (ALLCAPS | MOREVARIANTS)) |
| (void) addvheader (hdrp); |
| #endif /* NO_CAPITALIZATION_SUPPORT */ |
| return 1; |
| } |
| else |
| return 0; |
| } |
| |
| /* |
| ** Determine if enta covers entb, according to the rules in steps 4 and 5 |
| ** of the commentary for "combinecaps". |
| */ |
| static int acoversb (enta, entb) |
| register struct dent * enta; /* "A" in the rules */ |
| register struct dent * entb; /* "B" in the rules */ |
| { |
| int subset; /* NZ if entb is a subset of enta */ |
| |
| if ((subset = issubset (entb, enta)) != 0) |
| { |
| /* entb is a subset of enta; thus enta might cover entb */ |
| if (((enta->flagfield ^ entb->flagfield) & KEEP) != 0 |
| && (enta->flagfield & KEEP) == 0) /* Inverse of condition (4b) */ |
| return 0; |
| } |
| else |
| { |
| /* not a subset; KEEP flags must match exactly (both (4a) and (4b)) */ |
| if (((enta->flagfield ^ entb->flagfield) & KEEP) != 0) |
| return 0; |
| } |
| |
| /* Rules (4a) and (4b) are satisfied; check for capitalization match */ |
| #ifdef NO_CAPITALIZATION_SUPPORT |
| #ifdef lint |
| return subset; /* Just so it gets used */ |
| #else /* lint */ |
| return 1; /* All words match */ |
| #endif /* lint */ |
| #else /* NO_CAPITALIZATION_SUPPORT */ |
| if (((enta->flagfield ^ entb->flagfield) & CAPTYPEMASK) == 0) |
| { |
| if (captype (enta->flagfield) != FOLLOWCASE /* Condition (4c) */ |
| || strcmp (enta->word, entb->word) == 0) |
| return 1; /* Perfect match */ |
| else |
| return 0; |
| } |
| else if (subset == 0) /* No flag subset, refuse */ |
| return 0; /* ..near matches */ |
| else if (captype (entb->flagfield) == ALLCAPS) |
| return 1; |
| else if (captype (enta->flagfield) == ANYCASE |
| && captype (entb->flagfield) == CAPITALIZED) |
| return 1; |
| else |
| return 0; |
| #endif /* NO_CAPITALIZATION_SUPPORT */ |
| } |
| |
| void upcase (s) |
| register ichar_t * s; |
| { |
| |
| while (*s) |
| { |
| *s = mytoupper (*s); |
| s++; |
| } |
| } |
| |
| void lowcase (s) |
| register ichar_t * s; |
| { |
| |
| while (*s) |
| { |
| *s = mytolower (*s); |
| s++; |
| } |
| } |
| |
| /* |
| * Upcase variant that works on normal strings. Note that it is a lot |
| * slower than the normal upcase. The input must be in canonical form. |
| */ |
| void chupcase (s) |
| char * s; |
| { |
| ichar_t * is; |
| |
| is = strtosichar (s, 1); |
| upcase (is); |
| (void) ichartostr (s, is, strlen (s) + 1, 1); |
| } |
| |
| /* |
| ** See if one affix field is a subset of another. Returns NZ if ent1 |
| ** is a subset of ent2. The KEEP flag is not taken into consideration. |
| */ |
| static int issubset (ent1, ent2) |
| register struct dent * ent1; |
| register struct dent * ent2; |
| { |
| /* The following is really testing for MASKSIZE > 1, but cpp can't do that */ |
| #if MASKBITS > 32 |
| register int flagword; |
| |
| #ifdef FULLMASKSET |
| #define MASKMAX MASKSIZE |
| #else |
| #define MASKMAX MASKSIZE - 1 |
| #endif /* FULLMASKSET */ |
| for (flagword = MASKMAX; --flagword >= 0; ) |
| { |
| if ((ent1->mask[flagword] & ent2->mask[flagword]) |
| != ent1->mask[flagword]) |
| return 0; |
| } |
| #endif /* MASKBITS > 32 */ |
| #ifdef FULLMASKSET |
| return ((ent1->mask[MASKSIZE - 1] & ent2->mask[MASKSIZE - 1]) |
| == ent1->mask[MASKSIZE - 1]); |
| #else |
| if (((ent1->mask[MASKSIZE - 1] & ent2->mask[MASKSIZE - 1]) |
| ^ ent1->mask[MASKSIZE - 1]) & ~ALLFLAGS) |
| return 0; |
| else |
| return 1; |
| #endif /* FULLMASKSET */ |
| } |
| |
| /* |
| ** Add ent2's affix flags to ent1. |
| */ |
| static void combineaffixes (ent1, ent2) |
| register struct dent * ent1; |
| register struct dent * ent2; |
| { |
| /* The following is really testing for MASKSIZE > 1, but cpp can't do that */ |
| #if MASKBITS > 32 |
| register int flagword; |
| |
| if (ent1 == ent2) |
| return; |
| /* MASKMAX is defined in issubset, just above */ |
| for (flagword = MASKMAX; --flagword >= 0; ) |
| ent1->mask[flagword] |= ent2->mask[flagword]; |
| #endif /* MASKBITS > 32 */ |
| #ifndef FULLMASKSET |
| ent1->mask[MASKSIZE - 1] |= ent2->mask[MASKSIZE - 1] & ~ALLFLAGS; |
| #endif |
| } |
| |
| /* |
| ** Write out a dictionary entry, including capitalization variants. |
| ** If onlykeep is true, only those variants with KEEP set will be |
| ** written. |
| */ |
| void toutent (toutfile, hent, onlykeep) |
| register FILE * toutfile; |
| struct dent * hent; |
| register int onlykeep; |
| { |
| #ifdef NO_CAPITALIZATION_SUPPORT |
| if (!onlykeep || (hent->flagfield & KEEP)) |
| toutword (toutfile, hent->word, hent); |
| #else |
| register struct dent * cent; |
| ichar_t wbuf[INPUTWORDLEN + MAXAFFIXLEN]; |
| |
| cent = hent; |
| if (strtoichar (wbuf, cent->word, INPUTWORDLEN, 1)) |
| (void) fprintf (stderr, WORD_TOO_LONG (cent->word)); |
| for ( ; ; ) |
| { |
| if (!onlykeep || (cent->flagfield & KEEP)) |
| { |
| switch (captype (cent->flagfield)) |
| { |
| case ANYCASE: |
| lowcase (wbuf); |
| toutword (toutfile, ichartosstr (wbuf, 1), cent); |
| break; |
| case ALLCAPS: |
| if ((cent->flagfield & MOREVARIANTS) == 0 |
| || cent != hent) |
| { |
| upcase (wbuf); |
| toutword (toutfile, ichartosstr (wbuf, 1), cent); |
| } |
| break; |
| case CAPITALIZED: |
| lowcase (wbuf); |
| wbuf[0] = mytoupper (wbuf[0]); |
| toutword (toutfile, ichartosstr (wbuf, 1), cent); |
| break; |
| case FOLLOWCASE: |
| toutword (toutfile, cent->word, cent); |
| break; |
| } |
| } |
| if (cent->flagfield & MOREVARIANTS) |
| cent = cent->next; |
| else |
| break; |
| } |
| #endif |
| } |
| |
| static void toutword (toutfile, word, cent) |
| register FILE * toutfile; |
| char * word; |
| register struct dent * cent; |
| { |
| register int bit; |
| |
| has_marker = 0; |
| (void) fprintf (toutfile, "%s", word); |
| for (bit = 0; bit < LARGESTFLAG; bit++) |
| { |
| if (TSTMASKBIT (cent->mask, bit)) |
| flagout (toutfile, BITTOCHAR (bit)); |
| } |
| (void) fprintf (toutfile, "\n"); |
| } |
| |
| static void flagout (toutfile, flag) |
| register FILE * toutfile; |
| int flag; |
| { |
| if (!has_marker) |
| (void) putc (hashheader.flagmarker, toutfile); |
| has_marker = 1; |
| (void) putc (flag, toutfile); |
| } |
| |
| /* |
| * If the string under the given pointer begins with a string character, |
| * return the length of that "character". If not, return 0. |
| * May be called any time, but it's best if "isstrstart" is first |
| * used to filter out unnecessary calls. |
| * |
| * As a side effect, "laststringch" is set to the number of the string |
| * found, or to -1 if none was found. This can be useful for such things |
| * as case conversion. |
| */ |
| int stringcharlen (bufp, canonical) |
| char * bufp; |
| int canonical; /* NZ if input is in canonical form */ |
| { |
| #ifdef SLOWMULTIPLY |
| static char * sp[MAXSTRINGCHARS]; |
| static int inited = 0; |
| #endif /* SLOWMULTIPLY */ |
| register char * bufcur; |
| register char * stringcur; |
| register int stringno; |
| register int lowstringno; |
| register int highstringno; |
| int dupwanted; |
| |
| #ifdef SLOWMULTIPLY |
| if (!inited) |
| { |
| inited = 1; |
| for (stringno = 0; stringno < MAXSTRINGCHARS; stringno++) |
| sp[stringno] = &hashheader.stringchars[stringno][0]; |
| } |
| #endif /* SLOWMULTIPLY */ |
| lowstringno = 0; |
| highstringno = hashheader.nstrchars - 1; |
| dupwanted = canonical ? 0 : defdupchar; |
| while (lowstringno <= highstringno) |
| { |
| stringno = (lowstringno + highstringno) >> 1; |
| #ifdef SLOWMULTIPLY |
| stringcur = sp[stringno]; |
| #else /* SLOWMULTIPLY */ |
| stringcur = &hashheader.stringchars[stringno][0]; |
| #endif /* SLOWMULTIPLY */ |
| bufcur = bufp; |
| while (*stringcur) |
| { |
| #ifdef NO8BIT |
| if (((*bufcur++ ^ *stringcur) & 0x7F) != 0) |
| #else /* NO8BIT */ |
| if (*bufcur++ != *stringcur) |
| #endif /* NO8BIT */ |
| break; |
| /* |
| ** We can't use autoincrement above because of the |
| ** test below. |
| */ |
| stringcur++; |
| } |
| if (*stringcur == '\0') |
| { |
| if (hashheader.dupnos[stringno] == dupwanted) |
| { |
| /* We have a match */ |
| laststringch = hashheader.stringdups[stringno]; |
| #ifdef SLOWMULTIPLY |
| return stringcur - sp[stringno]; |
| #else /* SLOWMULTIPLY */ |
| return stringcur - &hashheader.stringchars[stringno][0]; |
| #endif /* SLOWMULTIPLY */ |
| } |
| else |
| --stringcur; |
| } |
| /* No match - choose which side to search on */ |
| #ifdef NO8BIT |
| if ((*--bufcur & 0x7F) < (*stringcur & 0x7F)) |
| highstringno = stringno - 1; |
| else if ((*bufcur & 0x7F) > (*stringcur & 0x7F)) |
| lowstringno = stringno + 1; |
| #else /* NO8BIT */ |
| if (*--bufcur < *stringcur) |
| highstringno = stringno - 1; |
| else if (*bufcur > *stringcur) |
| lowstringno = stringno + 1; |
| #endif /* NO8BIT */ |
| else if (dupwanted < hashheader.dupnos[stringno]) |
| highstringno = stringno - 1; |
| else |
| lowstringno = stringno + 1; |
| } |
| laststringch = -1; |
| return 0; /* Not a string character */ |
| } |
| |
| /* |
| * Convert an external string to an ichar_t string. If necessary, the parity |
| * bit is stripped off as part of the process. |
| * |
| * Returns NZ if the output string overflowed. |
| */ |
| int strtoichar (out, in, outlen, canonical) |
| register ichar_t * out; /* Where to put result */ |
| register char * in; /* String to convert */ |
| int outlen; /* Size of output buffer, *BYTES* */ |
| int canonical; /* NZ if input is in canonical form */ |
| { |
| register int len; /* Length of next character */ |
| |
| outlen /= sizeof (ichar_t); /* Convert to an ichar_t count */ |
| for ( ; --outlen > 0 && *in != '\0'; in += len) |
| { |
| if (l1_isstringch (in, len, canonical)) |
| *out++ = SET_SIZE + laststringch; |
| else |
| *out++ = *in & NOPARITY; |
| } |
| *out = 0; |
| return outlen <= 0; |
| } |
| |
| /* |
| * Convert an ichar_t string to an external string. |
| * |
| * WARNING: the resulting string may wind up being longer than the |
| * original. In fact, even the sequence strtoichar->ichartostr may |
| * produce a result longer than the original, because the output form |
| * may use a different string type set than the original input form. |
| * |
| * Returns NZ if the output string overflowed. |
| */ |
| int ichartostr (out, in, outlen, canonical) |
| register char * out; /* Where to put result */ |
| register ichar_t * in; /* String to convert */ |
| int outlen; /* Size of output buffer, bytes */ |
| int canonical; /* NZ for canonical form */ |
| { |
| register int ch; /* Next character to store */ |
| register int i; /* Index into duplicates list */ |
| register char * scharp; /* Pointer into a string char */ |
| |
| while (--outlen > 0 && (ch = *in++) != 0) |
| { |
| if (ch < SET_SIZE) |
| *out++ = (char) ch; |
| else |
| { |
| ch -= SET_SIZE; |
| if (!canonical) |
| { |
| for (i = hashheader.nstrchars; --i >= 0; ) |
| { |
| if (hashheader.dupnos[i] == defdupchar |
| && hashheader.stringdups[i] == ch) |
| { |
| ch = i; |
| break; |
| } |
| } |
| } |
| scharp = hashheader.stringchars[(unsigned) ch]; |
| while ((*out++ = *scharp++) != '\0') |
| ; |
| out--; |
| } |
| } |
| *out = '\0'; |
| return outlen <= 0; |
| } |
| |
| /* |
| * Convert a string to an ichar_t, storing the result in a static area. |
| */ |
| ichar_t * strtosichar (in, canonical) |
| char * in; /* String to convert */ |
| int canonical; /* NZ if input is in canonical form */ |
| { |
| static ichar_t out[STRTOSICHAR_SIZE / sizeof (ichar_t)]; |
| |
| if (strtoichar (out, in, sizeof out, canonical)) |
| (void) fprintf (stderr, WORD_TOO_LONG (in)); |
| return out; |
| } |
| |
| /* |
| * Convert an ichar_t to a string, storing the result in a static area. |
| */ |
| char * ichartosstr (in, canonical) |
| ichar_t * in; /* Internal string to convert */ |
| int canonical; /* NZ for canonical conversion */ |
| { |
| static char out[ICHARTOSSTR_SIZE]; |
| |
| if (ichartostr (out, in, sizeof out, canonical)) |
| (void) fprintf (stderr, WORD_TOO_LONG (out)); |
| return out; |
| } |
| |
| /* |
| * Convert a single ichar to a printable string, storing the result in |
| * a static area. |
| */ |
| char * printichar (in) |
| int in; |
| { |
| static char out[MAXSTRINGCHARLEN + 1]; |
| |
| if (in < SET_SIZE) |
| { |
| out[0] = (char) in; |
| out[1] = '\0'; |
| } |
| else |
| (void) strcpy (out, hashheader.stringchars[(unsigned) in - SET_SIZE]); |
| return out; |
| } |
| |
| #ifndef ICHAR_IS_CHAR |
| /* |
| * Copy an ichar_t. |
| */ |
| ichar_t * icharcpy (out, in) |
| register ichar_t * out; /* Destination */ |
| register ichar_t * in; /* Source */ |
| { |
| ichar_t * origout; /* Copy of destination for return */ |
| |
| origout = out; |
| while ((*out++ = *in++) != 0) |
| ; |
| return origout; |
| } |
| |
| /* |
| * Return the length of an ichar_t. |
| */ |
| int icharlen (in) |
| register ichar_t * in; /* String to count */ |
| { |
| register int len; /* Length so far */ |
| |
| for (len = 0; *in++ != 0; len++) |
| ; |
| return len; |
| } |
| |
| /* |
| * Compare two ichar_t's. |
| */ |
| int icharcmp (s1, s2) |
| register ichar_t * s1; |
| register ichar_t * s2; |
| { |
| |
| while (*s1 != 0) |
| { |
| if (*s1++ != *s2++) |
| return *--s1 - *--s2; |
| } |
| return *s1 - *s2; |
| } |
| |
| /* |
| * Strncmp for two ichar_t's. |
| */ |
| int icharncmp (s1, s2, n) |
| register ichar_t * s1; |
| register ichar_t * s2; |
| register int n; |
| { |
| |
| while (--n >= 0 && *s1 != 0) |
| { |
| if (*s1++ != *s2++) |
| return *--s1 - *--s2; |
| } |
| if (n < 0) |
| return 0; |
| else |
| return *s1 - *s2; |
| } |
| |
| #endif /* ICHAR_IS_CHAR */ |
| |
| int findfiletype (name, searchnames, deformatter) |
| char * name; /* Name to look up in suffix table */ |
| int searchnames; /* NZ to search name field of table */ |
| int * deformatter; /* Where to set deformatter type */ |
| { |
| char * cp; /* Pointer into suffix list */ |
| int cplen; /* Length of current suffix */ |
| register int i; /* Index into type table */ |
| int len; /* Length of the name */ |
| |
| /* |
| * Note: for now, the deformatter is set to 1 for tex, 0 for nroff. |
| * Further, we assume that it's one or the other, so that a test |
| * for tex is sufficient. This needs to be generalized. |
| */ |
| len = strlen (name); |
| if (searchnames) |
| { |
| for (i = 0; i < hashheader.nstrchartype; i++) |
| { |
| if (strcmp (name, chartypes[i].name) == 0) |
| { |
| if (deformatter != NULL) |
| *deformatter = |
| (strcmp (chartypes[i].deformatter, "tex") == 0); |
| return i; |
| } |
| } |
| } |
| for (i = 0; i < hashheader.nstrchartype; i++) |
| { |
| for (cp = chartypes[i].suffixes; *cp != '\0'; cp += cplen + 1) |
| { |
| cplen = strlen (cp); |
| if (len >= cplen && strcmp (&name[len - cplen], cp) == 0) |
| { |
| if (deformatter != NULL) |
| *deformatter = |
| (strcmp (chartypes[i].deformatter, "tex") == 0); |
| return i; |
| } |
| } |
| } |
| return -1; |
| } |
| |
| /* |
| * The following routines are all dummies for the benefit of lint. |
| */ |
| #ifdef lint |
| int TSTMASKBIT (mask, bit) MASKTYPE * mask; int bit; |
| { return bit + (int) *mask; } |
| void CLRMASKBIT (mask, bit) MASKTYPE * mask; int bit; { bit += (int) *mask; } |
| void SETMASKBIT (mask, bit) MASKTYPE * mask; int bit; { bit += (int) *mask; } |
| int BITTOCHAR (bit) int bit; { return bit; } |
| int CHARTOBIT (ch) int ch; { return ch; } |
| int myupper (ch) unsigned int ch; { return (int) ch; } |
| int mylower (ch) unsigned int ch; { return (int) ch; } |
| int myspace (ch) unsigned int ch; { return (int) ch; } |
| int iswordch (ch) unsigned int ch; { return (int) ch; } |
| int isboundarych (ch) unsigned int ch; { return (int) ch; } |
| int isstringstart (ch) unsigned int ch; { return ch; } |
| ichar_t mytolower (ch) unsigned int ch; { return (ichar_t) ch; } |
| ichar_t mytoupper (ch) unsigned int ch; { return (ichar_t) ch; } |
| #endif /* lint */ |