blob: 505fb4c396e2c45c5e78ff9b2fe91e0bc3225518 [file] [log] [blame]
/*************************************************************************/
/* */
/* Language Technologies Institute */
/* Carnegie Mellon University */
/* Copyright (c) 1999 */
/* All Rights Reserved. */
/* */
/* Permission is hereby granted, free of charge, to use and distribute */
/* this software and its documentation without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of this work, and to */
/* permit persons to whom this work is furnished to do so, subject to */
/* the following conditions: */
/* 1. The code must retain the above copyright notice, this list of */
/* conditions and the following disclaimer. */
/* 2. Any modifications must be clearly marked as such. */
/* 3. Original authors' names are not deleted. */
/* 4. The authors' names are not used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK */
/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
/* SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE */
/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
/* THIS SOFTWARE. */
/* */
/*************************************************************************/
/* Author: Alan W Black (awb@cs.cmu.edu) */
/* Date: December 1999 */
/*************************************************************************/
/* */
/* Letter to sound rule support */
/* */
/*************************************************************************/
#include "cst_features.h"
#include "cst_lts.h"
#include "cst_endian.h"
static cst_lts_phone apply_model(cst_lts_letter *vals,
cst_lts_addr start,
const cst_lts_model *model);
cst_lts_rules *new_lts_rules()
{
cst_lts_rules *lt = cst_alloc(cst_lts_rules,1);
lt->name = 0;
lt->letter_index = 0;
lt->models = 0;
lt->phone_table = 0;
lt->context_window_size = 0;
lt->context_extra_feats = 0;
lt->letter_table = 0;
return lt;
}
cst_val *lts_apply_val(const cst_val *wlist,const char *feats,const cst_lts_rules *r)
{
/* for symbol to symbol mapping */
const cst_val *v;
cst_val *p;
char *word;
int i,j;
word = cst_alloc(char,val_length(wlist)+1);
for (v=wlist,i=0; v; v=val_cdr(v),i++)
{
for (j=0; r->letter_table[j]; j++)
if (cst_streq(val_string(val_car(v)),r->letter_table[j]))
{
word[i] = j;
break;
}
if (!r->letter_table[j])
{
#if 0
printf("awb_debug unknown letter >%s<\n",val_string(val_car(v)));
#endif
i--; /* can't find this letter so skip it */
}
}
p = lts_apply(word,feats,r);
cst_free(word);
return p;
}
cst_val *lts_apply(const char *word,const char *feats,const cst_lts_rules *r)
{
int pos, index, i;
cst_val *phones=0;
cst_lts_letter *fval_buff;
cst_lts_letter *full_buff;
cst_lts_phone phone;
char *left, *right, *p;
char hash;
char zeros[8];
/* For feature vals for each letter */
fval_buff = cst_alloc(cst_lts_letter,
(r->context_window_size*2)+
r->context_extra_feats);
/* Buffer with added contexts */
full_buff = cst_alloc(cst_lts_letter,
(r->context_window_size*2)+
cst_strlen(word)+1); /* TBD assumes single POS feat */
if (r->letter_table)
{
for (i=0; i<8; i++) zeros[i] = 2;
cst_sprintf((char *)full_buff,
"%.*s%c%s%c%.*s",
r->context_window_size-1, zeros,
1,
word,
1,
r->context_window_size-1, zeros);
hash = 1;
}
else
{
/* Assumes l_letter is a char and context < 8 */
cst_sprintf((char *)full_buff,
"%.*s#%s#%.*s",
r->context_window_size-1, "00000000",
word,
r->context_window_size-1, "00000000");
hash = '#';
}
/* Do the prediction backwards so we don't need to reverse the answer */
for (pos = r->context_window_size + cst_strlen(word) - 1;
full_buff[pos] != hash;
pos--)
{
/* Fill the features buffer for the predictor */
cst_sprintf((char *)fval_buff,
"%.*s%.*s%s",
r->context_window_size,
full_buff+pos-r->context_window_size,
r->context_window_size,
full_buff+pos+1,
feats);
if ((!r->letter_table
&& ((full_buff[pos] < 'a') || (full_buff[pos] > 'z'))))
{
#ifdef EXCESSIVELY_CHATTY
cst_errmsg("lts:skipping unknown char \"%c\"\n",
full_buff[pos]);
#endif
continue;
}
if (r->letter_table)
index = full_buff[pos] - 3;
else
index = (full_buff[pos]-'a')%26;
phone = apply_model(fval_buff,
r->letter_index[index],
r->models);
/* delete epsilons and split dual-phones */
if (cst_streq("epsilon",r->phone_table[phone]))
continue;
else if ((p=strchr(r->phone_table[phone],'-')) != NULL)
{
left = cst_substr(r->phone_table[phone],0,
cst_strlen(r->phone_table[phone])-cst_strlen(p));
right = cst_substr(r->phone_table[phone],
(cst_strlen(r->phone_table[phone])-cst_strlen(p))+1,
(cst_strlen(p)-1));
phones = cons_val(string_val(left),
cons_val(string_val(right),phones));
cst_free(left);
cst_free(right);
}
else
phones = cons_val(string_val(r->phone_table[phone]),phones);
}
cst_free(full_buff);
cst_free(fval_buff);
return phones;
}
static void cst_lts_get_state(cst_lts_rule *state,
const cst_lts_model *model,
unsigned short n,
int rule_size)
{ /* As some OS's require a more elaborate access than a simple lookup */
memmove(state,&model[n*rule_size],rule_size);
}
static cst_lts_phone apply_model(cst_lts_letter *vals,cst_lts_addr start,
const cst_lts_model *model)
{
/* because some machines (arm/mips) can't deal with addrs not on */
/* word boundaries we use a static and copy the rule values each time */
/* so we know its properly aligned */
/* Hmm this still might be wrong on some machines that align the */
/* structure cst_lts_rules differently */
cst_lts_rule state;
unsigned short nstate;
static const int sizeof_cst_lts_rule = 6;
cst_lts_get_state(&state,model,start,sizeof_cst_lts_rule);
for ( ;
state.feat != CST_LTS_EOR;
)
{
/* printf("awb_debug %s %c %c %d\n",vals,vals[state.feat],state.val,
(vals[state.feat] == state.val) ? 1 : 0); */
if (vals[state.feat] == state.val)
nstate = state.qtrue;
else
nstate = state.qfalse;
/* This should really happen at compilation time */
if (CST_BIG_ENDIAN)
nstate = SWAPSHORT(nstate);
cst_lts_get_state(&state,model,nstate,sizeof_cst_lts_rule);
}
return (cst_lts_phone)state.val;
}