blob: bcbd053d13ad26620330da6f71e91fa13a749bd2 [file] [log] [blame]
#ifndef lint
static char Rcs_Id[] =
"$Id$";
#endif
/*
* Copyright 1988, 1989, 1992, 1993, Geoff Kuenning, Granada Hills, CA
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All modifications to the source code must be clearly marked as
* such. Binary redistributions based on modified source code
* must be clearly marked as modified versions in the documentation
* and/or other materials provided with the distribution.
* 4. All advertising materials mentioning features or use of this software
* must display the following acknowledgment:
* This product includes software developed by Geoff Kuenning and
* other unpaid contributors.
* 5. The name of Geoff Kuenning may not be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* $Log$
* Revision 1.1 2007/01/09 23:57:18 lattner
* initial recheckin of mibench
*
* Revision 1.1.1.1 2007/01/09 02:59:03 evancheng
* Add selected tests from MiBench 1.0 to LLVM test suite.
*
* Revision 1.45 1994/12/27 23:08:52 geoff
* Add code to makedent to reject words that contain non-word characters.
* This helps protect people who use ISO 8-bit characters when ispell
* isn't configured for that option.
*
* Revision 1.44 1994/10/25 05:46:20 geoff
* Fix some incorrect declarations in the lint versions of some routines.
*
* Revision 1.43 1994/09/16 03:32:34 geoff
* Issue an error message for bad affix flags
*
* Revision 1.42 1994/02/07 04:23:43 geoff
* Correctly identify the deformatter when changing file types
*
* Revision 1.41 1994/01/25 07:11:55 geoff
* Get rid of all old RCS log lines in preparation for the 3.1 release.
*
*/
#include "config.h"
#include "ispell.h"
#include "proto.h"
#include "msgs.h"
int makedent P ((char * lbuf, int lbuflen, struct dent * ent));
#ifndef NO_CAPITALIZATION_SUPPORT
long whatcap P ((ichar_t * word));
#endif
int addvheader P ((struct dent * ent));
int combinecaps P ((struct dent * hdr, struct dent * newent));
#ifndef NO_CAPITALIZATION_SUPPORT
static void forcevheader P ((struct dent * hdrp, struct dent * oldp,
struct dent * newp));
#endif /* NO_CAPITALIZATION_SUPPORT */
static int combine_two_entries P ((struct dent * hdrp,
struct dent * oldp, struct dent * newp));
static int acoversb P ((struct dent * enta, struct dent * entb));
void upcase P ((ichar_t * string));
void lowcase P ((ichar_t * string));
void chupcase P ((char * s));
static int issubset P ((struct dent * ent1, struct dent * ent2));
static void combineaffixes P ((struct dent * ent1, struct dent * ent2));
void toutent P ((FILE * outfile, struct dent * hent,
int onlykeep));
static void toutword P ((FILE * outfile, char * word,
struct dent * cent));
static void flagout P ((FILE * outfile, int flag));
int stringcharlen P ((char * bufp, int canonical));
int strtoichar P ((ichar_t * out, char * in, int outlen,
int canonical));
int ichartostr P ((char * out, ichar_t * in, int outlen,
int canonical));
ichar_t * strtosichar P ((char * in, int canonical));
char * ichartosstr P ((ichar_t * in, int canonical));
char * printichar P ((int in));
#ifndef ICHAR_IS_CHAR
ichar_t * icharcpy P ((ichar_t * out, ichar_t * in));
int icharlen P ((ichar_t * str));
int icharcmp P ((ichar_t * s1, ichar_t * s2));
int icharncmp P ((ichar_t * s1, ichar_t * s2, int n));
#endif /* ICHAR_IS_CHAR */
int findfiletype P ((char * name, int searchnames,
int * deformatter));
static int has_marker;
/*
* Fill in a directory entry, including setting the capitalization flags, and
* allocate and initialize memory for the d->word field. Returns -1
* if there was trouble. The input word must be in canonical form.
*/
int makedent (lbuf, lbuflen, d)
char * lbuf;
int lbuflen;
struct dent * d;
{
ichar_t ibuf[INPUTWORDLEN + MAXAFFIXLEN];
ichar_t * ip;
char * p;
int bit;
int len;
/* Strip off any trailing newline */
len = strlen (lbuf) - 1;
if (lbuf[len] == '\n')
lbuf[len] = '\0';
d->next = NULL;
/* WARNING: flagfield might be the same as mask! See ispell.h. */
d->flagfield = 0;
(void) bzero ((char *) d->mask, sizeof (d->mask));
d->flagfield |= USED;
d->flagfield &= ~KEEP;
p = index (lbuf, hashheader.flagmarker);
if (p != NULL)
*p = 0;
/*
** Convert the word to an ichar_t and back; this makes sure that
** it is in canonical form and thus that the length is correct.
*/
if (strtoichar (ibuf, lbuf, INPUTWORDLEN * sizeof (ichar_t), 1)
|| ichartostr (lbuf, ibuf, lbuflen, 1))
{
(void) fprintf (stderr, WORD_TOO_LONG (lbuf));
return (-1);
}
/*
** Make sure the word is well-formed (contains only legal characters).
*/
for (ip = ibuf; *ip != 0; ip++)
{
if (!iswordch (*ip))
{
/* Boundary characters are legal as long as they're not at edges */
if (!isboundarych (*ip)
|| ip == ibuf || ip[1] == 0)
{
(void) fprintf (stderr, MAKEDENT_C_BAD_WORD_CHAR, lbuf);
return -1;
}
}
}
len = strlen (lbuf);
#ifndef NO_CAPITALIZATION_SUPPORT
/*
** Figure out the capitalization rules from the capitalization of
** the sample entry.
*/
d->flagfield |= whatcap (ibuf);
#endif
if (len > INPUTWORDLEN - 1)
{
(void) fprintf (stderr, WORD_TOO_LONG (lbuf));
return (-1);
}
d->word = mymalloc ((unsigned) len + 1);
if (d->word == NULL)
{
(void) fprintf (stderr, MAKEDENT_C_NO_WORD_SPACE, lbuf);
return -1;
}
(void) strcpy (d->word, lbuf);
#ifdef NO_CAPITALIZATION_SUPPORT
chupcase (d->word);
#else /* NO_CAPITALIZATION_SUPPORT */
if (captype (d->flagfield) != FOLLOWCASE)
chupcase (d->word);
#endif /* NO_CAPITALIZATION_SUPPORT */
if (p == NULL)
return (0);
p++;
while (*p != '\0' && *p != '\n')
{
bit = CHARTOBIT ((unsigned char) *p);
if (bit >= 0 && bit <= LARGESTFLAG)
SETMASKBIT (d->mask, bit);
else
(void) fprintf (stderr, BAD_FLAG, (unsigned char) *p);
p++;
if (*p == hashheader.flagmarker)
p++; /* Handle old-format dictionaries too */
}
return (0);
}
#ifndef NO_CAPITALIZATION_SUPPORT
/*
** Classify the capitalization of a sample entry. Returns one of the
** four capitalization codes ANYCASE, ALLCAPS, CAPITALIZED, or FOLLOWCASE.
*/
long whatcap (word)
register ichar_t * word;
{
register ichar_t * p;
for (p = word; *p; p++)
{
if (mylower (*p))
break;
}
if (*p == '\0')
return ALLCAPS;
else
{
for ( ; *p; p++)
{
if (myupper (*p))
break;
}
if (*p == '\0')
{
/*
** No uppercase letters follow the lowercase ones.
** If there is more than one uppercase letter, it's
** "followcase". If only the first one is capitalized,
** it's "capitalize". If there are no capitals
** at all, it's ANYCASE.
*/
if (myupper (word[0]))
{
for (p = word + 1; *p != '\0'; p++)
{
if (myupper (*p))
return FOLLOWCASE;
}
return CAPITALIZED;
}
else
return ANYCASE;
}
else
return FOLLOWCASE; /* .../lower/upper */
}
}
/*
** Add a variant-capitalization header to a word. This routine may be
** called even for a followcase word that doesn't yet have a header.
**
** Returns 0 if all was ok, -1 if allocation error.
*/
int addvheader (dp)
register struct dent * dp; /* Entry to update */
{
register struct dent * tdent; /* Copy of entry */
/*
** Add a second entry with the correct capitalization, and then make
** dp into a special dummy entry.
*/
tdent = (struct dent *) mymalloc (sizeof (struct dent));
if (tdent == NULL)
{
(void) fprintf (stderr, MAKEDENT_C_NO_WORD_SPACE, dp->word);
return -1;
}
*tdent = *dp;
if (captype (tdent->flagfield) != FOLLOWCASE)
tdent->word = NULL;
else
{
/* Followcase words need a copy of the capitalization */
tdent->word = mymalloc ((unsigned int) strlen (tdent->word) + 1);
if (tdent->word == NULL)
{
(void) fprintf (stderr, MAKEDENT_C_NO_WORD_SPACE, dp->word);
myfree ((char *) tdent);
return -1;
}
(void) strcpy (tdent->word, dp->word);
}
chupcase (dp->word);
dp->next = tdent;
dp->flagfield &= ~CAPTYPEMASK;
dp->flagfield |= (ALLCAPS | MOREVARIANTS);
return 0;
}
#endif /* NO_CAPITALIZATION_SUPPORT */
/*
** Combine and resolve the entries describing two capitalizations of the same
** word. This may require allocating yet more entries.
**
** Hdrp is a pointer into a hash table. If the word covered by hdrp has
** variations, hdrp must point to the header. Newp is a pointer to temporary
** storage, and space is malloc'ed if newp is to be kept. The newp->word
** field must have been allocated with mymalloc, so that this routine may free
** the space if it keeps newp but not the word.
**
** Return value: 0 if the word was added, 1 if the word was combined
** with an existing entry, and -1 if trouble occurred (e.g., malloc).
** If 1 is returned, newp->word may have been be freed using myfree.
**
** Life is made much more difficult by the KEEP flag's possibilities. We
** must ensure that a !KEEP word doesn't find its way into the personal
** dictionary as a result of this routine's actions. However, a !KEEP
** word that has affixes must have come from the main dictionary, so it
** is acceptable to combine entries in that case (got that?).
**
** The net result of all this is a set of rules that is a bloody pain
** to figure out. Basically, we want to choose one of the following actions:
**
** (1) Add newp's affixes and KEEP flag to oldp, and discard newp.
** (2) Add oldp's affixes and KEEP flag to newp, replace oldp with
** newp, and discard newp.
#ifndef NO_CAPITALIZATION_SUPPORT
** (3) Insert newp as a new entry in the variants list. If there is
** currently no variant header, this requires adding one. Adding a
** header splits into two sub-cases:
**
** (3a) If oldp is ALLCAPS and the KEEP flags match, just turn it
** into the header.
** (3b) Otherwise, add a new entry to serve as the header.
** To ease list linking, this is done by copying oldp into
** the new entry, and then performing (3a).
**
** After newp has been added as a variant, its affixes and KEEP
** flag are OR-ed into the variant header.
#endif
**
** So how to choose which? The default is always case (3), which adds newp
** as a new entry in the variants list. Cases (1) and (2) are symmetrical
** except for which entry is discarded. We can use case (1) or (2) whenever
** one entry "covers" the other. "Covering" is defined as follows:
**
** (4) For entries with matching capitalization types, A covers B
** if:
**
** (4a) B's affix flags are a subset of A's, or the KEEP flags
** match, and
** (4b) either the KEEP flags match, or A's KEEP flag is set.
** (Since A has more suffixes, combining B with it won't
** cause any extra suffixes to be added to the dictionary.)
** (4c) If the words are FOLLOWCASE, the capitalizations match
** exactly.
**
#ifndef NO_CAPITALIZATION_SUPPORT
** (5) For entries with mismatched capitalization types, A covers B
** if (4a) and (4b) are true, and:
**
** (5a) B is ALLCAPS, or
** (5b) A is ANYCASE, and B is CAPITALIZED.
#endif
**
** For any "hdrp" without variants, oldp is the same as hdrp. Otherwise,
** the above tests are applied using each variant in turn for oldp.
*/
int combinecaps (hdrp, newp)
struct dent * hdrp; /* Header of entry currently in dictionary */
register struct dent *
newp; /* Entry to add */
{
register struct dent *
oldp; /* Current "oldp" entry */
#ifndef NO_CAPITALIZATION_SUPPORT
register struct dent *
tdent; /* Entry we'll add to the dictionary */
#endif /* NO_CAPITALIZATION_SUPPORT */
register int retval = 0; /* Return value from combine_two_entries */
/*
** First, see if we can combine the two entries (cases 1 and 2). If
** combine_two_entries does so, it will return 1. If it has trouble,
** it will return zero.
*/
oldp = hdrp;
#ifdef NO_CAPITALIZATION_SUPPORT
retval = combine_two_entries (hdrp, oldp, newp);
#else /* NO_CAPITALIZATION_SUPPORT */
if ((oldp->flagfield & (CAPTYPEMASK | MOREVARIANTS))
== (ALLCAPS | MOREVARIANTS))
{
while (oldp->flagfield & MOREVARIANTS)
{
oldp = oldp->next;
retval = combine_two_entries (hdrp, oldp, newp);
if (retval != 0) /* Did we combine them? */
break;
}
}
else
retval = combine_two_entries (hdrp, oldp, newp);
if (retval == 0)
{
/*
** Couldn't combine the two entries. Add a new variant. For
** ease, we'll stick it right behind the header, rather than
** at the end of the list.
*/
forcevheader (hdrp, oldp, newp);
tdent = (struct dent *) mymalloc (sizeof (struct dent));
if (tdent == NULL)
{
(void) fprintf (stderr, MAKEDENT_C_NO_WORD_SPACE, newp->word);
return -1;
}
*tdent = *newp;
tdent->next = hdrp->next;
hdrp->next = tdent;
tdent->flagfield |= (hdrp->flagfield & MOREVARIANTS);
hdrp->flagfield |= MOREVARIANTS;
combineaffixes (hdrp, newp);
hdrp->flagfield |= (newp->flagfield & KEEP);
if (captype (newp->flagfield) == FOLLOWCASE)
tdent->word = newp->word;
else
{
tdent->word = NULL;
myfree (newp->word); /* newp->word isn't needed */
}
}
#endif /* NO_CAPITALIZATION_SUPPORT */
return retval;
}
#ifndef NO_CAPITALIZATION_SUPPORT
/*
** The following routine implements steps 3a and 3b in the commentary
** for "combinecaps".
*/
static void forcevheader (hdrp, oldp, newp)
register struct dent * hdrp;
struct dent * oldp;
struct dent * newp;
{
if ((hdrp->flagfield & (CAPTYPEMASK | MOREVARIANTS)) == ALLCAPS
&& ((oldp->flagfield ^ newp->flagfield) & KEEP) == 0)
return; /* Caller will set MOREVARIANTS */
else if ((hdrp->flagfield & (CAPTYPEMASK | MOREVARIANTS))
!= (ALLCAPS | MOREVARIANTS))
(void) addvheader (hdrp);
}
#endif /* NO_CAPITALIZATION_SUPPORT */
/*
** This routine implements steps 4 and 5 of the commentary for "combinecaps".
**
** Returns 1 if newp can be discarded, 0 if nothing done.
*/
static int combine_two_entries (hdrp, oldp, newp)
struct dent * hdrp; /* (Possible) header of variant chain */
register struct dent *
oldp; /* Pre-existing dictionary entry */
register struct dent *
newp; /* Entry to possibly combine */
{
if (acoversb (oldp, newp))
{
/* newp is superfluous. Drop it, preserving affixes and keep flag */
combineaffixes (oldp, newp);
oldp->flagfield |= (newp->flagfield & KEEP);
hdrp->flagfield |= (newp->flagfield & KEEP);
myfree (newp->word);
return 1;
}
else if (acoversb (newp, oldp))
{
/*
** oldp is superfluous. Replace it with newp, preserving affixes and
** the keep flag.
*/
combineaffixes (newp, oldp);
#ifdef NO_CAPITALIZATION_SUPPORT
newp->flagfield |= (oldp->flagfield & KEEP);
#else /* NO_CAPITALIZATION_SUPPORT */
newp->flagfield |= (oldp->flagfield & (KEEP | MOREVARIANTS));
#endif /* NO_CAPITALIZATION_SUPPORT */
hdrp->flagfield |= (newp->flagfield & KEEP);
newp->next = oldp->next;
/*
** We really want to free oldp->word, but that might be part of
** "hashstrings". So we'll futz around to arrange things so we can
** free newp->word instead. This depends very much on the fact
** that both words are the same length.
*/
if (oldp->word != NULL)
(void) strcpy (oldp->word, newp->word);
myfree (newp->word); /* No longer needed */
newp->word = oldp->word;
*oldp = *newp;
#ifndef NO_CAPITALIZATION_SUPPORT
/* We may need to add a header if newp is followcase */
if (captype (newp->flagfield) == FOLLOWCASE
&& (hdrp->flagfield & (CAPTYPEMASK | MOREVARIANTS))
!= (ALLCAPS | MOREVARIANTS))
(void) addvheader (hdrp);
#endif /* NO_CAPITALIZATION_SUPPORT */
return 1;
}
else
return 0;
}
/*
** Determine if enta covers entb, according to the rules in steps 4 and 5
** of the commentary for "combinecaps".
*/
static int acoversb (enta, entb)
register struct dent * enta; /* "A" in the rules */
register struct dent * entb; /* "B" in the rules */
{
int subset; /* NZ if entb is a subset of enta */
if ((subset = issubset (entb, enta)) != 0)
{
/* entb is a subset of enta; thus enta might cover entb */
if (((enta->flagfield ^ entb->flagfield) & KEEP) != 0
&& (enta->flagfield & KEEP) == 0) /* Inverse of condition (4b) */
return 0;
}
else
{
/* not a subset; KEEP flags must match exactly (both (4a) and (4b)) */
if (((enta->flagfield ^ entb->flagfield) & KEEP) != 0)
return 0;
}
/* Rules (4a) and (4b) are satisfied; check for capitalization match */
#ifdef NO_CAPITALIZATION_SUPPORT
#ifdef lint
return subset; /* Just so it gets used */
#else /* lint */
return 1; /* All words match */
#endif /* lint */
#else /* NO_CAPITALIZATION_SUPPORT */
if (((enta->flagfield ^ entb->flagfield) & CAPTYPEMASK) == 0)
{
if (captype (enta->flagfield) != FOLLOWCASE /* Condition (4c) */
|| strcmp (enta->word, entb->word) == 0)
return 1; /* Perfect match */
else
return 0;
}
else if (subset == 0) /* No flag subset, refuse */
return 0; /* ..near matches */
else if (captype (entb->flagfield) == ALLCAPS)
return 1;
else if (captype (enta->flagfield) == ANYCASE
&& captype (entb->flagfield) == CAPITALIZED)
return 1;
else
return 0;
#endif /* NO_CAPITALIZATION_SUPPORT */
}
void upcase (s)
register ichar_t * s;
{
while (*s)
{
*s = mytoupper (*s);
s++;
}
}
void lowcase (s)
register ichar_t * s;
{
while (*s)
{
*s = mytolower (*s);
s++;
}
}
/*
* Upcase variant that works on normal strings. Note that it is a lot
* slower than the normal upcase. The input must be in canonical form.
*/
void chupcase (s)
char * s;
{
ichar_t * is;
is = strtosichar (s, 1);
upcase (is);
(void) ichartostr (s, is, strlen (s) + 1, 1);
}
/*
** See if one affix field is a subset of another. Returns NZ if ent1
** is a subset of ent2. The KEEP flag is not taken into consideration.
*/
static int issubset (ent1, ent2)
register struct dent * ent1;
register struct dent * ent2;
{
/* The following is really testing for MASKSIZE > 1, but cpp can't do that */
#if MASKBITS > 32
register int flagword;
#ifdef FULLMASKSET
#define MASKMAX MASKSIZE
#else
#define MASKMAX MASKSIZE - 1
#endif /* FULLMASKSET */
for (flagword = MASKMAX; --flagword >= 0; )
{
if ((ent1->mask[flagword] & ent2->mask[flagword])
!= ent1->mask[flagword])
return 0;
}
#endif /* MASKBITS > 32 */
#ifdef FULLMASKSET
return ((ent1->mask[MASKSIZE - 1] & ent2->mask[MASKSIZE - 1])
== ent1->mask[MASKSIZE - 1]);
#else
if (((ent1->mask[MASKSIZE - 1] & ent2->mask[MASKSIZE - 1])
^ ent1->mask[MASKSIZE - 1]) & ~ALLFLAGS)
return 0;
else
return 1;
#endif /* FULLMASKSET */
}
/*
** Add ent2's affix flags to ent1.
*/
static void combineaffixes (ent1, ent2)
register struct dent * ent1;
register struct dent * ent2;
{
/* The following is really testing for MASKSIZE > 1, but cpp can't do that */
#if MASKBITS > 32
register int flagword;
if (ent1 == ent2)
return;
/* MASKMAX is defined in issubset, just above */
for (flagword = MASKMAX; --flagword >= 0; )
ent1->mask[flagword] |= ent2->mask[flagword];
#endif /* MASKBITS > 32 */
#ifndef FULLMASKSET
ent1->mask[MASKSIZE - 1] |= ent2->mask[MASKSIZE - 1] & ~ALLFLAGS;
#endif
}
/*
** Write out a dictionary entry, including capitalization variants.
** If onlykeep is true, only those variants with KEEP set will be
** written.
*/
void toutent (toutfile, hent, onlykeep)
register FILE * toutfile;
struct dent * hent;
register int onlykeep;
{
#ifdef NO_CAPITALIZATION_SUPPORT
if (!onlykeep || (hent->flagfield & KEEP))
toutword (toutfile, hent->word, hent);
#else
register struct dent * cent;
ichar_t wbuf[INPUTWORDLEN + MAXAFFIXLEN];
cent = hent;
if (strtoichar (wbuf, cent->word, INPUTWORDLEN, 1))
(void) fprintf (stderr, WORD_TOO_LONG (cent->word));
for ( ; ; )
{
if (!onlykeep || (cent->flagfield & KEEP))
{
switch (captype (cent->flagfield))
{
case ANYCASE:
lowcase (wbuf);
toutword (toutfile, ichartosstr (wbuf, 1), cent);
break;
case ALLCAPS:
if ((cent->flagfield & MOREVARIANTS) == 0
|| cent != hent)
{
upcase (wbuf);
toutword (toutfile, ichartosstr (wbuf, 1), cent);
}
break;
case CAPITALIZED:
lowcase (wbuf);
wbuf[0] = mytoupper (wbuf[0]);
toutword (toutfile, ichartosstr (wbuf, 1), cent);
break;
case FOLLOWCASE:
toutword (toutfile, cent->word, cent);
break;
}
}
if (cent->flagfield & MOREVARIANTS)
cent = cent->next;
else
break;
}
#endif
}
static void toutword (toutfile, word, cent)
register FILE * toutfile;
char * word;
register struct dent * cent;
{
register int bit;
has_marker = 0;
(void) fprintf (toutfile, "%s", word);
for (bit = 0; bit < LARGESTFLAG; bit++)
{
if (TSTMASKBIT (cent->mask, bit))
flagout (toutfile, BITTOCHAR (bit));
}
(void) fprintf (toutfile, "\n");
}
static void flagout (toutfile, flag)
register FILE * toutfile;
int flag;
{
if (!has_marker)
(void) putc (hashheader.flagmarker, toutfile);
has_marker = 1;
(void) putc (flag, toutfile);
}
/*
* If the string under the given pointer begins with a string character,
* return the length of that "character". If not, return 0.
* May be called any time, but it's best if "isstrstart" is first
* used to filter out unnecessary calls.
*
* As a side effect, "laststringch" is set to the number of the string
* found, or to -1 if none was found. This can be useful for such things
* as case conversion.
*/
int stringcharlen (bufp, canonical)
char * bufp;
int canonical; /* NZ if input is in canonical form */
{
#ifdef SLOWMULTIPLY
static char * sp[MAXSTRINGCHARS];
static int inited = 0;
#endif /* SLOWMULTIPLY */
register char * bufcur;
register char * stringcur;
register int stringno;
register int lowstringno;
register int highstringno;
int dupwanted;
#ifdef SLOWMULTIPLY
if (!inited)
{
inited = 1;
for (stringno = 0; stringno < MAXSTRINGCHARS; stringno++)
sp[stringno] = &hashheader.stringchars[stringno][0];
}
#endif /* SLOWMULTIPLY */
lowstringno = 0;
highstringno = hashheader.nstrchars - 1;
dupwanted = canonical ? 0 : defdupchar;
while (lowstringno <= highstringno)
{
stringno = (lowstringno + highstringno) >> 1;
#ifdef SLOWMULTIPLY
stringcur = sp[stringno];
#else /* SLOWMULTIPLY */
stringcur = &hashheader.stringchars[stringno][0];
#endif /* SLOWMULTIPLY */
bufcur = bufp;
while (*stringcur)
{
#ifdef NO8BIT
if (((*bufcur++ ^ *stringcur) & 0x7F) != 0)
#else /* NO8BIT */
if (*bufcur++ != *stringcur)
#endif /* NO8BIT */
break;
/*
** We can't use autoincrement above because of the
** test below.
*/
stringcur++;
}
if (*stringcur == '\0')
{
if (hashheader.dupnos[stringno] == dupwanted)
{
/* We have a match */
laststringch = hashheader.stringdups[stringno];
#ifdef SLOWMULTIPLY
return stringcur - sp[stringno];
#else /* SLOWMULTIPLY */
return stringcur - &hashheader.stringchars[stringno][0];
#endif /* SLOWMULTIPLY */
}
else
--stringcur;
}
/* No match - choose which side to search on */
#ifdef NO8BIT
if ((*--bufcur & 0x7F) < (*stringcur & 0x7F))
highstringno = stringno - 1;
else if ((*bufcur & 0x7F) > (*stringcur & 0x7F))
lowstringno = stringno + 1;
#else /* NO8BIT */
if (*--bufcur < *stringcur)
highstringno = stringno - 1;
else if (*bufcur > *stringcur)
lowstringno = stringno + 1;
#endif /* NO8BIT */
else if (dupwanted < hashheader.dupnos[stringno])
highstringno = stringno - 1;
else
lowstringno = stringno + 1;
}
laststringch = -1;
return 0; /* Not a string character */
}
/*
* Convert an external string to an ichar_t string. If necessary, the parity
* bit is stripped off as part of the process.
*
* Returns NZ if the output string overflowed.
*/
int strtoichar (out, in, outlen, canonical)
register ichar_t * out; /* Where to put result */
register char * in; /* String to convert */
int outlen; /* Size of output buffer, *BYTES* */
int canonical; /* NZ if input is in canonical form */
{
register int len; /* Length of next character */
outlen /= sizeof (ichar_t); /* Convert to an ichar_t count */
for ( ; --outlen > 0 && *in != '\0'; in += len)
{
if (l1_isstringch (in, len, canonical))
*out++ = SET_SIZE + laststringch;
else
*out++ = *in & NOPARITY;
}
*out = 0;
return outlen <= 0;
}
/*
* Convert an ichar_t string to an external string.
*
* WARNING: the resulting string may wind up being longer than the
* original. In fact, even the sequence strtoichar->ichartostr may
* produce a result longer than the original, because the output form
* may use a different string type set than the original input form.
*
* Returns NZ if the output string overflowed.
*/
int ichartostr (out, in, outlen, canonical)
register char * out; /* Where to put result */
register ichar_t * in; /* String to convert */
int outlen; /* Size of output buffer, bytes */
int canonical; /* NZ for canonical form */
{
register int ch; /* Next character to store */
register int i; /* Index into duplicates list */
register char * scharp; /* Pointer into a string char */
while (--outlen > 0 && (ch = *in++) != 0)
{
if (ch < SET_SIZE)
*out++ = (char) ch;
else
{
ch -= SET_SIZE;
if (!canonical)
{
for (i = hashheader.nstrchars; --i >= 0; )
{
if (hashheader.dupnos[i] == defdupchar
&& hashheader.stringdups[i] == ch)
{
ch = i;
break;
}
}
}
scharp = hashheader.stringchars[(unsigned) ch];
while ((*out++ = *scharp++) != '\0')
;
out--;
}
}
*out = '\0';
return outlen <= 0;
}
/*
* Convert a string to an ichar_t, storing the result in a static area.
*/
ichar_t * strtosichar (in, canonical)
char * in; /* String to convert */
int canonical; /* NZ if input is in canonical form */
{
static ichar_t out[STRTOSICHAR_SIZE / sizeof (ichar_t)];
if (strtoichar (out, in, sizeof out, canonical))
(void) fprintf (stderr, WORD_TOO_LONG (in));
return out;
}
/*
* Convert an ichar_t to a string, storing the result in a static area.
*/
char * ichartosstr (in, canonical)
ichar_t * in; /* Internal string to convert */
int canonical; /* NZ for canonical conversion */
{
static char out[ICHARTOSSTR_SIZE];
if (ichartostr (out, in, sizeof out, canonical))
(void) fprintf (stderr, WORD_TOO_LONG (out));
return out;
}
/*
* Convert a single ichar to a printable string, storing the result in
* a static area.
*/
char * printichar (in)
int in;
{
static char out[MAXSTRINGCHARLEN + 1];
if (in < SET_SIZE)
{
out[0] = (char) in;
out[1] = '\0';
}
else
(void) strcpy (out, hashheader.stringchars[(unsigned) in - SET_SIZE]);
return out;
}
#ifndef ICHAR_IS_CHAR
/*
* Copy an ichar_t.
*/
ichar_t * icharcpy (out, in)
register ichar_t * out; /* Destination */
register ichar_t * in; /* Source */
{
ichar_t * origout; /* Copy of destination for return */
origout = out;
while ((*out++ = *in++) != 0)
;
return origout;
}
/*
* Return the length of an ichar_t.
*/
int icharlen (in)
register ichar_t * in; /* String to count */
{
register int len; /* Length so far */
for (len = 0; *in++ != 0; len++)
;
return len;
}
/*
* Compare two ichar_t's.
*/
int icharcmp (s1, s2)
register ichar_t * s1;
register ichar_t * s2;
{
while (*s1 != 0)
{
if (*s1++ != *s2++)
return *--s1 - *--s2;
}
return *s1 - *s2;
}
/*
* Strncmp for two ichar_t's.
*/
int icharncmp (s1, s2, n)
register ichar_t * s1;
register ichar_t * s2;
register int n;
{
while (--n >= 0 && *s1 != 0)
{
if (*s1++ != *s2++)
return *--s1 - *--s2;
}
if (n < 0)
return 0;
else
return *s1 - *s2;
}
#endif /* ICHAR_IS_CHAR */
int findfiletype (name, searchnames, deformatter)
char * name; /* Name to look up in suffix table */
int searchnames; /* NZ to search name field of table */
int * deformatter; /* Where to set deformatter type */
{
char * cp; /* Pointer into suffix list */
int cplen; /* Length of current suffix */
register int i; /* Index into type table */
int len; /* Length of the name */
/*
* Note: for now, the deformatter is set to 1 for tex, 0 for nroff.
* Further, we assume that it's one or the other, so that a test
* for tex is sufficient. This needs to be generalized.
*/
len = strlen (name);
if (searchnames)
{
for (i = 0; i < hashheader.nstrchartype; i++)
{
if (strcmp (name, chartypes[i].name) == 0)
{
if (deformatter != NULL)
*deformatter =
(strcmp (chartypes[i].deformatter, "tex") == 0);
return i;
}
}
}
for (i = 0; i < hashheader.nstrchartype; i++)
{
for (cp = chartypes[i].suffixes; *cp != '\0'; cp += cplen + 1)
{
cplen = strlen (cp);
if (len >= cplen && strcmp (&name[len - cplen], cp) == 0)
{
if (deformatter != NULL)
*deformatter =
(strcmp (chartypes[i].deformatter, "tex") == 0);
return i;
}
}
}
return -1;
}
/*
* The following routines are all dummies for the benefit of lint.
*/
#ifdef lint
int TSTMASKBIT (mask, bit) MASKTYPE * mask; int bit;
{ return bit + (int) *mask; }
void CLRMASKBIT (mask, bit) MASKTYPE * mask; int bit; { bit += (int) *mask; }
void SETMASKBIT (mask, bit) MASKTYPE * mask; int bit; { bit += (int) *mask; }
int BITTOCHAR (bit) int bit; { return bit; }
int CHARTOBIT (ch) int ch; { return ch; }
int myupper (ch) unsigned int ch; { return (int) ch; }
int mylower (ch) unsigned int ch; { return (int) ch; }
int myspace (ch) unsigned int ch; { return (int) ch; }
int iswordch (ch) unsigned int ch; { return (int) ch; }
int isboundarych (ch) unsigned int ch; { return (int) ch; }
int isstringstart (ch) unsigned int ch; { return ch; }
ichar_t mytolower (ch) unsigned int ch; { return (ichar_t) ch; }
ichar_t mytoupper (ch) unsigned int ch; { return (ichar_t) ch; }
#endif /* lint */