| #ifndef lint |
| static char Rcs_Id[] = |
| "$Id$"; |
| #endif |
| |
| /* |
| * lookup.c - see if a word appears in the dictionary |
| * |
| * Pace Willisson, 1983 |
| * |
| * Copyright 1987, 1988, 1989, 1992, 1993, Geoff Kuenning, Granada Hills, CA |
| * All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * 3. All modifications to the source code must be clearly marked as |
| * such. Binary redistributions based on modified source code |
| * must be clearly marked as modified versions in the documentation |
| * and/or other materials provided with the distribution. |
| * 4. All advertising materials mentioning features or use of this software |
| * must display the following acknowledgment: |
| * This product includes software developed by Geoff Kuenning and |
| * other unpaid contributors. |
| * 5. The name of Geoff Kuenning may not be used to endorse or promote |
| * products derived from this software without specific prior |
| * written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND |
| * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| * ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE |
| * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| * SUCH DAMAGE. |
| */ |
| |
| /* |
| * $Log$ |
| * Revision 1.1 2007/01/09 23:57:18 lattner |
| * initial recheckin of mibench |
| * |
| * Revision 1.1.1.1 2007/01/09 02:59:02 evancheng |
| * Add selected tests from MiBench 1.0 to LLVM test suite. |
| * |
| * Revision 1.42 1995/01/08 23:23:42 geoff |
| * Support MSDOS_BINARY_OPEN when opening the hash file to read it in. |
| * |
| * Revision 1.41 1994/01/25 07:11:51 geoff |
| * Get rid of all old RCS log lines in preparation for the 3.1 release. |
| * |
| */ |
| |
| #include "config.h" |
| #include "ispell.h" |
| #include "proto.h" |
| #include "msgs.h" |
| |
| int linit P ((void)); |
| #ifdef INDEXDUMP |
| static void dumpindex P ((struct flagptr * indexp, int depth)); |
| #endif /* INDEXDUMP */ |
| struct dent * lookup P ((ichar_t * word, int dotree)); |
| |
| static inited = 0; |
| |
| int linit () |
| { |
| int hashfd; |
| register int i; |
| register struct dent * dp; |
| struct flagent * entry; |
| struct flagptr * ind; |
| int nextchar; |
| int viazero; |
| register ichar_t * cp; |
| |
| if (inited) |
| return 0; |
| |
| if ((hashfd = open (hashname, 0 | MSDOS_BINARY_OPEN)) < 0) |
| { |
| (void) fprintf (stderr, CANT_OPEN, hashname); |
| return (-1); |
| } |
| |
| hashsize = read (hashfd, (char *) &hashheader, sizeof hashheader); |
| if (hashsize < sizeof hashheader) |
| { |
| if (hashsize < 0) |
| (void) fprintf (stderr, LOOKUP_C_CANT_READ, hashname); |
| else if (hashsize == 0) |
| (void) fprintf (stderr, LOOKUP_C_NULL_HASH, hashname); |
| else |
| (void) fprintf (stderr, |
| LOOKUP_C_SHORT_HASH (hashname, hashsize, |
| (int) sizeof hashheader)); |
| return (-1); |
| } |
| else if (hashheader.magic != MAGIC) |
| { |
| (void) fprintf (stderr, |
| LOOKUP_C_BAD_MAGIC (hashname, (unsigned int) MAGIC, |
| (unsigned int) hashheader.magic)); |
| return (-1); |
| } |
| else if (hashheader.magic2 != MAGIC) |
| { |
| (void) fprintf (stderr, |
| LOOKUP_C_BAD_MAGIC2 (hashname, (unsigned int) MAGIC, |
| (unsigned int) hashheader.magic2)); |
| return (-1); |
| } |
| else if (hashheader.compileoptions != COMPILEOPTIONS |
| || hashheader.maxstringchars != MAXSTRINGCHARS |
| || hashheader.maxstringcharlen != MAXSTRINGCHARLEN) |
| { |
| (void) fprintf (stderr, |
| LOOKUP_C_BAD_OPTIONS ((unsigned int) hashheader.compileoptions, |
| hashheader.maxstringchars, hashheader.maxstringcharlen, |
| (unsigned int) COMPILEOPTIONS, MAXSTRINGCHARS, MAXSTRINGCHARLEN)); |
| return (-1); |
| } |
| if (nodictflag) |
| { |
| /* |
| * Dictionary is not needed - create an empty dummy table. We |
| * actually have to have one entry since the hash |
| * algorithm involves a divide by the table size |
| * (actually modulo, but zero is still unacceptable). |
| * So we create an empty entry. |
| */ |
| hashsize = 1; /* This prevents divides by zero */ |
| hashtbl = (struct dent *) calloc (1, sizeof (struct dent)); |
| if (hashtbl == NULL) |
| { |
| (void) fprintf (stderr, LOOKUP_C_NO_HASH_SPACE); |
| return (-1); |
| } |
| hashtbl[0].word = NULL; |
| hashtbl[0].next = NULL; |
| hashtbl[0].flagfield &= ~(USED | KEEP); |
| /* The flag bits don't matter, but calloc cleared them. */ |
| hashstrings = (char *) malloc ((unsigned) hashheader.lstringsize); |
| } |
| else |
| { |
| hashtbl = |
| (struct dent *) |
| malloc ((unsigned) hashheader.tblsize * sizeof (struct dent)); |
| hashsize = hashheader.tblsize; |
| hashstrings = (char *) malloc ((unsigned) hashheader.stringsize); |
| } |
| numsflags = hashheader.stblsize; |
| numpflags = hashheader.ptblsize; |
| sflaglist = (struct flagent *) |
| malloc ((numsflags + numpflags) * sizeof (struct flagent)); |
| if (hashtbl == NULL || hashstrings == NULL || sflaglist == NULL) |
| { |
| (void) fprintf (stderr, LOOKUP_C_NO_HASH_SPACE); |
| return (-1); |
| } |
| pflaglist = sflaglist + numsflags; |
| |
| if (nodictflag) |
| { |
| /* |
| * Read just the strings for the language table, and |
| * skip over the rest of the strings and all of the |
| * hash table. |
| */ |
| if (read (hashfd, hashstrings, (unsigned) hashheader.lstringsize) |
| != hashheader.lstringsize) |
| { |
| (void) fprintf (stderr, LOOKUP_C_BAD_FORMAT); |
| return (-1); |
| } |
| (void) lseek (hashfd, |
| (long) hashheader.stringsize - (long) hashheader.lstringsize |
| + (long) hashheader.tblsize * (long) sizeof (struct dent), |
| 1); |
| } |
| else |
| { |
| if (read (hashfd, hashstrings, (unsigned) hashheader.stringsize) |
| != hashheader.stringsize |
| || read (hashfd, (char *) hashtbl, |
| (unsigned) hashheader.tblsize * sizeof (struct dent)) |
| != hashheader.tblsize * sizeof (struct dent)) |
| { |
| (void) fprintf (stderr, LOOKUP_C_BAD_FORMAT); |
| return (-1); |
| } |
| } |
| if (read (hashfd, (char *) sflaglist, |
| (unsigned) (numsflags + numpflags) * sizeof (struct flagent)) |
| != (numsflags + numpflags) * sizeof (struct flagent)) |
| { |
| (void) fprintf (stderr, LOOKUP_C_BAD_FORMAT); |
| return (-1); |
| } |
| (void) close (hashfd); |
| |
| if (!nodictflag) |
| { |
| for (i = hashsize, dp = hashtbl; --i >= 0; dp++) |
| { |
| if (dp->word == (char *) -1) |
| dp->word = NULL; |
| else |
| dp->word = &hashstrings [ (int)(dp->word) ]; |
| if (dp->next == (struct dent *) -1) |
| dp->next = NULL; |
| else |
| dp->next = &hashtbl [ (int)(dp->next) ]; |
| } |
| } |
| |
| for (i = numsflags + numpflags, entry = sflaglist; --i >= 0; entry++) |
| { |
| if (entry->stripl) |
| entry->strip = (ichar_t *) &hashstrings[(int) entry->strip]; |
| else |
| entry->strip = NULL; |
| if (entry->affl) |
| entry->affix = (ichar_t *) &hashstrings[(int) entry->affix]; |
| else |
| entry->affix = NULL; |
| } |
| /* |
| ** Warning - 'entry' and 'i' are reset in the body of the loop |
| ** below. Don't try to optimize it by (e.g.) moving the decrement |
| ** of i into the loop condition. |
| */ |
| for (i = numsflags, entry = sflaglist; i > 0; i--, entry++) |
| { |
| if (entry->affl == 0) |
| { |
| cp = NULL; |
| ind = &sflagindex[0]; |
| viazero = 1; |
| } |
| else |
| { |
| cp = entry->affix + entry->affl - 1; |
| ind = &sflagindex[*cp]; |
| viazero = 0; |
| while (ind->numents == 0 && ind->pu.fp != NULL) |
| { |
| if (cp == entry->affix) |
| { |
| ind = &ind->pu.fp[0]; |
| viazero = 1; |
| } |
| else |
| { |
| ind = &ind->pu.fp[*--cp]; |
| viazero = 0; |
| } |
| } |
| } |
| if (ind->numents == 0) |
| ind->pu.ent = entry; |
| ind->numents++; |
| /* |
| ** If this index entry has more than MAXSEARCH flags in |
| ** it, we will split it into subentries to reduce the |
| ** searching. However, the split doesn't make sense in |
| ** two cases: (a) if we are already at the end of the |
| ** current affix, or (b) if all the entries in the list |
| ** have identical affixes. Since the list is sorted, (b) |
| ** is true if the first and last affixes in the list |
| ** are identical. |
| */ |
| if (!viazero && ind->numents >= MAXSEARCH |
| && icharcmp (entry->affix, ind->pu.ent->affix) != 0) |
| { |
| /* Sneaky trick: back up and reprocess */ |
| entry = ind->pu.ent - 1; /* -1 is for entry++ in loop */ |
| i = numsflags - (entry - sflaglist); |
| ind->pu.fp = |
| (struct flagptr *) |
| calloc ((unsigned) (SET_SIZE + hashheader.nstrchars), |
| sizeof (struct flagptr)); |
| if (ind->pu.fp == NULL) |
| { |
| (void) fprintf (stderr, LOOKUP_C_NO_LANG_SPACE); |
| return (-1); |
| } |
| ind->numents = 0; |
| } |
| } |
| /* |
| ** Warning - 'entry' and 'i' are reset in the body of the loop |
| ** below. Don't try to optimize it by (e.g.) moving the decrement |
| ** of i into the loop condition. |
| */ |
| for (i = numpflags, entry = pflaglist; i > 0; i--, entry++) |
| { |
| if (entry->affl == 0) |
| { |
| cp = NULL; |
| ind = &pflagindex[0]; |
| viazero = 1; |
| } |
| else |
| { |
| cp = entry->affix; |
| ind = &pflagindex[*cp++]; |
| viazero = 0; |
| while (ind->numents == 0 && ind->pu.fp != NULL) |
| { |
| if (*cp == 0) |
| { |
| ind = &ind->pu.fp[0]; |
| viazero = 1; |
| } |
| else |
| { |
| ind = &ind->pu.fp[*cp++]; |
| viazero = 0; |
| } |
| } |
| } |
| if (ind->numents == 0) |
| ind->pu.ent = entry; |
| ind->numents++; |
| /* |
| ** If this index entry has more than MAXSEARCH flags in |
| ** it, we will split it into subentries to reduce the |
| ** searching. However, the split doesn't make sense in |
| ** two cases: (a) if we are already at the end of the |
| ** current affix, or (b) if all the entries in the list |
| ** have identical affixes. Since the list is sorted, (b) |
| ** is true if the first and last affixes in the list |
| ** are identical. |
| */ |
| if (!viazero && ind->numents >= MAXSEARCH |
| && icharcmp (entry->affix, ind->pu.ent->affix) != 0) |
| { |
| /* Sneaky trick: back up and reprocess */ |
| entry = ind->pu.ent - 1; /* -1 is for entry++ in loop */ |
| i = numpflags - (entry - pflaglist); |
| ind->pu.fp = |
| (struct flagptr *) calloc (SET_SIZE + hashheader.nstrchars, |
| sizeof (struct flagptr)); |
| if (ind->pu.fp == NULL) |
| { |
| (void) fprintf (stderr, LOOKUP_C_NO_LANG_SPACE); |
| return (-1); |
| } |
| ind->numents = 0; |
| } |
| } |
| #ifdef INDEXDUMP |
| (void) fprintf (stderr, "Prefix index table:\n"); |
| dumpindex (pflagindex, 0); |
| (void) fprintf (stderr, "Suffix index table:\n"); |
| dumpindex (sflagindex, 0); |
| #endif |
| if (hashheader.nstrchartype == 0) |
| chartypes = NULL; |
| else |
| { |
| chartypes = (struct strchartype *) |
| malloc (hashheader.nstrchartype * sizeof (struct strchartype)); |
| if (chartypes == NULL) |
| { |
| (void) fprintf (stderr, LOOKUP_C_NO_LANG_SPACE); |
| return (-1); |
| } |
| for (i = 0, nextchar = hashheader.strtypestart; |
| i < hashheader.nstrchartype; |
| i++) |
| { |
| chartypes[i].name = &hashstrings[nextchar]; |
| nextchar += strlen (chartypes[i].name) + 1; |
| chartypes[i].deformatter = &hashstrings[nextchar]; |
| nextchar += strlen (chartypes[i].deformatter) + 1; |
| chartypes[i].suffixes = &hashstrings[nextchar]; |
| while (hashstrings[nextchar] != '\0') |
| nextchar += strlen (&hashstrings[nextchar]) + 1; |
| nextchar++; |
| } |
| } |
| inited = 1; |
| return (0); |
| } |
| |
| #ifdef INDEXDUMP |
| static void dumpindex (indexp, depth) |
| register struct flagptr * indexp; |
| register int depth; |
| { |
| register int i; |
| int j; |
| int k; |
| char stripbuf[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4]; |
| |
| for (i = 0; i < SET_SIZE + hashheader.nstrchars; i++, indexp++) |
| { |
| if (indexp->numents == 0 && indexp->pu.fp != NULL) |
| { |
| for (j = depth; --j >= 0; ) |
| (void) putc (' ', stderr); |
| if (i >= ' ' && i <= '~') |
| (void) putc (i, stderr); |
| else |
| (void) fprintf (stderr, "0x%x", i); |
| (void) putc ('\n', stderr); |
| dumpindex (indexp->pu.fp, depth + 1); |
| } |
| else if (indexp->numents) |
| { |
| for (j = depth; --j >= 0; ) |
| (void) putc (' ', stderr); |
| if (i >= ' ' && i <= '~') |
| (void) putc (i, stderr); |
| else |
| (void) fprintf (stderr, "0x%x", i); |
| (void) fprintf (stderr, " -> %d entries\n", indexp->numents); |
| for (k = 0; k < indexp->numents; k++) |
| { |
| for (j = depth; --j >= 0; ) |
| (void) putc (' ', stderr); |
| if (indexp->pu.ent[k].stripl) |
| { |
| (void) ichartostr (stripbuf, indexp->pu.ent[k].strip, |
| sizeof stripbuf, 1); |
| (void) fprintf (stderr, " entry %d (-%s,%s)\n", |
| &indexp->pu.ent[k] - sflaglist, |
| stripbuf, |
| indexp->pu.ent[k].affl |
| ? ichartosstr (indexp->pu.ent[k].affix, 1) : "-"); |
| } |
| else |
| (void) fprintf (stderr, " entry %d (%s)\n", |
| &indexp->pu.ent[k] - sflaglist, |
| ichartosstr (indexp->pu.ent[k].affix, 1)); |
| } |
| } |
| } |
| } |
| #endif |
| |
| /* n is length of s */ |
| struct dent * lookup (s, dotree) |
| register ichar_t * s; |
| int dotree; |
| { |
| register struct dent * dp; |
| register char * s1; |
| char schar[INPUTWORDLEN + MAXAFFIXLEN]; |
| |
| dp = &hashtbl[hash (s, hashsize)]; |
| if (ichartostr (schar, s, sizeof schar, 1)) |
| (void) fprintf (stderr, WORD_TOO_LONG (schar)); |
| for ( ; dp != NULL; dp = dp->next) |
| { |
| /* quick strcmp, but only for equality */ |
| s1 = dp->word; |
| if (s1 && s1[0] == schar[0] && strcmp (s1 + 1, schar + 1) == 0) |
| return dp; |
| #ifndef NO_CAPITALIZATION_SUPPORT |
| while (dp->flagfield & MOREVARIANTS) /* Skip variations */ |
| dp = dp->next; |
| #endif |
| } |
| if (dotree) |
| { |
| dp = treelookup (s); |
| return dp; |
| } |
| else |
| return NULL; |
| } |