blob: 80514f163daa2c8ab4c22d9f5e858cce69fa0d0d [file] [log] [blame]
/*************************************************************************/
/* */
/* Language Technologies Institute */
/* Carnegie Mellon University */
/* Copyright (c) 2001 */
/* All Rights Reserved. */
/* */
/* Permission is hereby granted, free of charge, to use and distribute */
/* this software and its documentation without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of this work, and to */
/* permit persons to whom this work is furnished to do so, subject to */
/* the following conditions: */
/* 1. The code must retain the above copyright notice, this list of */
/* conditions and the following disclaimer. */
/* 2. Any modifications must be clearly marked as such. */
/* 3. Original authors' names are not deleted. */
/* 4. The authors' names are not used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK */
/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
/* SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE */
/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
/* THIS SOFTWARE. */
/* */
/*************************************************************************/
/* Author: Alan W Black (awb@cs.cmu.edu) */
/* Date: January 2001 */
/*************************************************************************/
/* */
/* English text expanders */
/* */
/* numbers, digits, ids (years), money */
/* */
/*************************************************************************/
#include <ctype.h>
#include "us_text.h"
static const char * const digit2num[] = {
"zero",
"one",
"two",
"three",
"four",
"five",
"six",
"seven",
"eight",
"nine" };
static const char * const digit2teen[] = {
"ten", /* shouldn't get called */
"eleven",
"twelve",
"thirteen",
"fourteen",
"fifteen",
"sixteen",
"seventeen",
"eighteen",
"nineteen" };
static const char * const digit2enty[] = {
"zero", /* shouldn't get called */
"ten",
"twenty",
"thirty",
"forty",
"fifty",
"sixty",
"seventy",
"eighty",
"ninety" };
static const char * const ord2num[] = {
"zeroth",
"first",
"second",
"third",
"fourth",
"fifth",
"sixth",
"seventh",
"eighth",
"ninth" };
static const char * const ord2teen[] = {
"tenth", /* shouldn't get called */
"eleventh",
"twelfth",
"thirteenth",
"fourteenth",
"fifteenth",
"sixteenth",
"seventeenth",
"eighteenth",
"nineteenth" };
static const char * const ord2enty[] = {
"zeroth", /* shouldn't get called */
"tenth",
"twentieth",
"thirtieth",
"fortieth",
"fiftieth",
"sixtieth",
"seventieth",
"eightieth",
"ninetieth" };
cst_val *en_exp_number(const char *numstring)
{
/* Expand given token to list of words pronouncing it as a number */
int num_digits = cst_strlen(numstring);
char part[4];
cst_val *p;
int i;
if (num_digits == 0)
return NULL;
else if (num_digits == 1)
return en_exp_digits(numstring);
else if (num_digits == 2)
{
if (numstring[0] == '0')
{
if (numstring[1] == '0')
return 0;
else
return cons_val(string_val(digit2num[numstring[1]-'0']),0);
}
else if (numstring[1] == '0')
return cons_val(string_val(digit2enty[numstring[0]-'0']),0);
else if (numstring[0] == '1')
return cons_val(string_val(digit2teen[numstring[1]-'0']),0);
else
return cons_val(string_val(digit2enty[numstring[0]-'0']),
en_exp_digits(numstring+1));
}
else if (num_digits == 3)
{
if (numstring[0] == '0')
return en_exp_number(numstring+1);
else
return cons_val(string_val(digit2num[numstring[0]-'0']),
cons_val(string_val("hundred"),
en_exp_number(numstring+1)));
}
else if (num_digits < 7)
{
for (i=0; i < num_digits-3; i++)
part[i] = numstring[i];
part[i]='\0';
p = en_exp_number(part);
if (p == 0) /* no thousands */
return en_exp_number(numstring+i);
else
return val_append(p,cons_val(string_val("thousand"),
en_exp_number(numstring+i)));
}
else if (num_digits < 10)
{
for (i=0; i < num_digits-6; i++)
part[i] = numstring[i];
part[i]='\0';
p = en_exp_number(part);
if (p == 0) /* no millions */
return en_exp_number(numstring+i);
else
return val_append(p,cons_val(string_val("million"),
en_exp_number(numstring+i)));
}
else if (num_digits < 13)
{ /* If there are pedantic brits out there, tough!, 10^9 is a billion */
for (i=0; i < num_digits-9; i++)
part[i] = numstring[i];
part[i]='\0';
p = en_exp_number(part);
if (p == 0) /* no billions */
return en_exp_number(numstring+i);
else
return val_append(p,cons_val(string_val("billion"),
en_exp_number(numstring+i)));
}
else /* Way too many digits here, to be a number */
{
return en_exp_digits(numstring);
}
}
cst_val *en_exp_ordinal(const char *rawnumstring)
{
/* return ordinal for digit string */
cst_val *card, *o;
const cst_val *t;
const char *l;
const char *ord;
char *numstring;
int i,j;
numstring = cst_strdup(rawnumstring);
for (j=i=0; i < cst_strlen(rawnumstring); i++)
if (rawnumstring[i] != ',')
{
numstring[j] = rawnumstring[i];
j++;
}
numstring[j] = '\0';
card = val_reverse(en_exp_number(numstring));
cst_free(numstring);
l = val_string(val_car(card));
ord = 0;
for (i=0; i<10; i++)
if (cst_streq(l,digit2num[i]))
ord = ord2num[i];
if (!ord)
for (i=0; i<10; i++)
if (cst_streq(l,digit2teen[i]))
ord = ord2teen[i];
if (!ord)
for (i=0; i<10; i++)
if (cst_streq(l,digit2enty[i]))
ord = ord2enty[i];
if (cst_streq(l,"hundred"))
ord = "hundredth";
if (cst_streq(l,"thousand"))
ord = "thousandth";
if (cst_streq(l,"billion"))
ord = "billtionth";
if (!ord) /* dunno, so don't convert anything */
return card;
o = cons_val(string_val(ord),0);
for (t=val_cdr(card); t; t=val_cdr(t))
o = cons_val(val_car(t),o);
delete_val(card);
return o;
}
cst_val *en_exp_id(const char *numstring)
{
/* Expand numstring as pairs as in years or ids */
char aaa[3];
if ((cst_strlen(numstring) == 4) &&
(numstring[2] == '0') &&
(numstring[3] == '0'))
{
if (numstring[1] == '0')
return en_exp_number(numstring); /* 2000, 3000 */
else
{
aaa[0] = numstring[0];
aaa[1] = numstring[1];
aaa[2] = '\0';
return val_append(en_exp_number(aaa),
cons_val(string_val("hundred"),0));
}
}
else if ((cst_strlen(numstring) == 3) &&
(numstring[0] != '0') &&
(numstring[1] == '0') &&
(numstring[2] == '0'))
{
return cons_val(string_val(digit2num[numstring[0]-'0']),
cons_val(string_val("hundred"),0));
}
else if ((cst_strlen(numstring) == 2) && (numstring[0] == '0')
&& (numstring[1] == '0'))
return cons_val(string_val("zero"),
cons_val(string_val("zero"),NULL));
else if ((cst_strlen(numstring) == 2) && (numstring[0] == '0'))
return cons_val(string_val("oh"),
en_exp_digits(&numstring[1]));
else if (((cst_strlen(numstring) == 4) &&
(numstring[1] == '0') && (numstring[2] == '0')) ||
(cst_strlen(numstring) < 3))
return en_exp_number(numstring);
else if (cst_strlen(numstring)%2 == 1)
{
return cons_val(string_val(digit2num[numstring[0]-'0']),
en_exp_id(&numstring[1]));
}
else
{
aaa[0] = numstring[0];
aaa[1] = numstring[1];
aaa[2] = '\0';
return val_append(en_exp_number(aaa),en_exp_id(&numstring[2]));
}
}
cst_val *en_exp_real(const char *numstring)
{
char *aaa, *p;
cst_val *r;
if (numstring && (numstring[0] == '-'))
r = cons_val(string_val("minus"),
en_exp_real(&numstring[1]));
else if (numstring && (numstring[0] == '+'))
r = cons_val(string_val("plus"),
en_exp_real(&numstring[1]));
else if (((p=strchr(numstring,'e')) != 0) ||
((p=strchr(numstring,'E')) != 0))
{
aaa = cst_strdup(numstring);
aaa[cst_strlen(numstring)-cst_strlen(p)] = '\0';
r = val_append(en_exp_real(aaa),
cons_val(string_val("e"),
en_exp_real(p+1)));
cst_free(aaa);
}
else if ((p=strchr(numstring,'.')) != 0)
{
aaa = cst_strdup(numstring);
aaa[cst_strlen(numstring)-cst_strlen(p)] = '\0';
r = val_append(en_exp_number(aaa),
cons_val(string_val("point"),
en_exp_digits(p+1)));
cst_free(aaa);
}
else
r = en_exp_number(numstring); /* I don't think you can get here */
return r;
}
cst_val *en_exp_digits(const char *numstring)
{
/* Expand given token to list of words pronouncing it as digits */
cst_val *d = 0;
const char *p;
for (p=numstring; *p; p++)
{
if ((*p >= '0') && (*p <= '9'))
d = cons_val(string_val(digit2num[*p-'0']),d);
else
d = cons_val(string_val("umpty"),d);
}
return val_reverse(d);
}
cst_val *en_exp_letters(const char *lets)
{
/* returns these as list of single char symbols */
char *aaa;
cst_val *r;
int i;
aaa = cst_alloc(char,2);
aaa[1] = '\0';
for (r=0,i=0; lets[i] != '\0'; i++)
{
aaa[0] = lets[i];
if (isupper((int)aaa[0]))
aaa[0] = tolower((int)aaa[0]);
if (strchr("0123456789",aaa[0]))
r = cons_val(string_val(digit2num[aaa[0]-'0']),r);
else if (cst_streq(aaa,"a"))
r = cons_val(string_val("_a"),r);
else
r = cons_val(string_val(aaa),r);
}
cst_free(aaa);
return val_reverse(r);
}
int en_exp_roman(const char *roman)
{
int val;
const char *p;
val = 0;
for (p=roman; *p != 0; p++)
{
if (*p == 'X')
val += 10;
else if (*p == 'V')
val += 5;
else if (*p == 'I')
{
if (p[1] == 'V')
{
val += 4;
p++;
}
else if (p[1] == 'X')
{
val += 9;
p++;
}
else
val += 1;
}
}
return val;
}