src/lexicon/cst_lts.c - third_party/flite - Git at Google

 /*************************************************************************/
 /*                                                                       */
 /*                  Language Technologies Institute                      */
 /*                     Carnegie Mellon University                        */
 /*                        Copyright (c) 1999                             */
 /*                        All Rights Reserved.                           */
 /*                                                                       */
 /*  Permission is hereby granted, free of charge, to use and distribute  */
 /*  this software and its documentation without restriction, including   */
 /*  without limitation the rights to use, copy, modify, merge, publish,  */
 /*  distribute, sublicense, and/or sell copies of this work, and to      */
 /*  permit persons to whom this work is furnished to do so, subject to   */
 /*  the following conditions:                                            */
 /*   1. The code must retain the above copyright notice, this list of    */
 /*      conditions and the following disclaimer.                         */
 /*   2. Any modifications must be clearly marked as such.                */
 /*   3. Original authors' names are not deleted.                         */
 /*   4. The authors' names are not used to endorse or promote products   */
 /*      derived from this software without specific prior written        */
 /*      permission.                                                      */
 /*                                                                       */
 /*  CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK         */
 /*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
 /*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
 /*  SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE      */
 /*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
 /*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
 /*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
 /*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
 /*  THIS SOFTWARE.                                                       */
 /*                                                                       */
 /*************************************************************************/
 /*             Author:  Alan W Black (awb@cs.cmu.edu)                    */
 /*               Date:  December 1999                                    */
 /*************************************************************************/
 /*                                                                       */
 /*  Letter to sound rule support                                         */
 /*                                                                       */
 /*************************************************************************/

 #include "cst_features.h"
 #include "cst_lts.h"
 #include "cst_endian.h"

 static cst_lts_phone apply_model(cst_lts_letter *vals,
 				 cst_lts_addr start,
 				 const cst_lts_model *model);

 cst_lts_rules *new_lts_rules()
 {
     cst_lts_rules *lt = cst_alloc(cst_lts_rules,1);
     lt->name = 0;
     lt->letter_index = 0;
     lt->models = 0;
     lt->phone_table = 0;
     lt->context_window_size = 0;
     lt->context_extra_feats = 0;
     lt->letter_table = 0;
     return lt;
 }

 cst_val *lts_apply_val(const cst_val *wlist,const char *feats,const cst_lts_rules *r)
 {
     /* for symbol to symbol mapping */
     const cst_val *v;
     cst_val *p;
     char *word;
     int i,j;

     word = cst_alloc(char,val_length(wlist)+1);

     for (v=wlist,i=0; v; v=val_cdr(v),i++)
     {
 	for (j=0; r->letter_table[j]; j++)
 	    if (cst_streq(val_string(val_car(v)),r->letter_table[j]))
 	    {
 		word[i] = j;
 		break;
 	    }
         if (!r->letter_table[j])
         {
 #if 0
             printf("awb_debug unknown letter >%s<\n",val_string(val_car(v)));
 #endif
             i--;  /* can't find this letter so skip it */
         }
     }

     p = lts_apply(word,feats,r);
     cst_free(word);

     return p;
 }

 cst_val *lts_apply(const char *word,const char *feats,const cst_lts_rules *r)
 {
     int pos, index, i;
     cst_val *phones=0;
     cst_lts_letter *fval_buff;
     cst_lts_letter *full_buff;
     cst_lts_phone phone;
     char *left, *right, *p;
     char hash;
     char zeros[8];

     /* For feature vals for each letter */
     fval_buff = cst_alloc(cst_lts_letter,
 			  (r->context_window_size*2)+
 			   r->context_extra_feats);
     /* Buffer with added contexts */
     full_buff = cst_alloc(cst_lts_letter,
 			  (r->context_window_size*2)+
 			  cst_strlen(word)+1); /* TBD assumes single POS feat */
     if (r->letter_table)
     {
 	for (i=0; i<8; i++) zeros[i] = 2;
 	cst_sprintf((char *)full_buff,
                     "%.*s%c%s%c%.*s",
 		    r->context_window_size-1, zeros,
 		    1,
 		    word,
 		    1,
 		    r->context_window_size-1, zeros);
 	hash = 1;
     }
     else
     {
 	/* Assumes l_letter is a char and context < 8 */
 	cst_sprintf((char *)full_buff,
                     "%.*s#%s#%.*s",
 		    r->context_window_size-1, "00000000",
 		    word,
 		    r->context_window_size-1, "00000000");
 	hash = '#';
     }

     /* Do the prediction backwards so we don't need to reverse the answer */
     for (pos = r->context_window_size + cst_strlen(word) - 1;
 	 full_buff[pos] != hash;
 	 pos--)
     {
 	/* Fill the features buffer for the predictor */
 	cst_sprintf((char *)fval_buff,
                     "%.*s%.*s%s",
 		    r->context_window_size,
 		    full_buff+pos-r->context_window_size,
 		    r->context_window_size,
 		    full_buff+pos+1,
 		    feats);
 	if ((!r->letter_table
 	     && ((full_buff[pos] < 'a') || (full_buff[pos] > 'z'))))
 	{
 #ifdef EXCESSIVELY_CHATTY
 	    cst_errmsg("lts:skipping unknown char \"%c\"\n",
 		       full_buff[pos]);
 #endif
 	    continue;
 	}
 	if (r->letter_table)
 	    index = full_buff[pos] - 3;
 	else
 	    index = (full_buff[pos]-'a')%26;
 	phone = apply_model(fval_buff,
 			    r->letter_index[index],
 			    r->models);
 	/* delete epsilons and split dual-phones */
 	if (cst_streq("epsilon",r->phone_table[phone]))
 	    continue;
 	else if ((p=strchr(r->phone_table[phone],'-')) != NULL)
 	{
 	    left = cst_substr(r->phone_table[phone],0,
 			      cst_strlen(r->phone_table[phone])-cst_strlen(p));
 	    right = cst_substr(r->phone_table[phone],
 			       (cst_strlen(r->phone_table[phone])-cst_strlen(p))+1,
 			       (cst_strlen(p)-1));
 	    phones = cons_val(string_val(left),
 			      cons_val(string_val(right),phones));
 	    cst_free(left);
 	    cst_free(right);
 	}
 	else
 	    phones = cons_val(string_val(r->phone_table[phone]),phones);
     }

     cst_free(full_buff);
     cst_free(fval_buff);

     return phones;
 }

 static void cst_lts_get_state(cst_lts_rule *state,
 			      const cst_lts_model *model,
 			      unsigned short n,
 			      int rule_size)
 {   /* As some OS's require a more elaborate access than a simple lookup */
     memmove(state,&model[n*rule_size],rule_size);
 }

 static cst_lts_phone apply_model(cst_lts_letter *vals,cst_lts_addr start,
 				 const cst_lts_model *model)
 {
     /* because some machines (arm/mips) can't deal with addrs not on     */
     /* word boundaries we use a static and copy the rule values each time */
     /* so we know its properly aligned                                    */
     /* Hmm this still might be wrong on some machines that align the      */
     /* structure cst_lts_rules differently                                */
     cst_lts_rule state;
     unsigned short nstate;
     static const int sizeof_cst_lts_rule = 6;

     cst_lts_get_state(&state,model,start,sizeof_cst_lts_rule);
     for ( ;
 	 state.feat != CST_LTS_EOR;
 	)
     {
 	/* printf("awb_debug %s %c %c %d\n",vals,vals[state.feat],state.val,
            (vals[state.feat] == state.val) ? 1 : 0);  */
 	if (vals[state.feat] == state.val)
 	    nstate = state.qtrue;
 	else
 	    nstate = state.qfalse;
 	/* This should really happen at compilation time */
 	if (CST_BIG_ENDIAN)
 	    nstate = SWAPSHORT(nstate);

 	cst_lts_get_state(&state,model,nstate,sizeof_cst_lts_rule);
     }

     return (cst_lts_phone)state.val;
 }
	/*************************************************************************/
	/* */
	/* Language Technologies Institute */
	/* Carnegie Mellon University */
	/* Copyright (c) 1999 */
	/* All Rights Reserved. */
	/* */
	/* Permission is hereby granted, free of charge, to use and distribute */
	/* this software and its documentation without restriction, including */
	/* without limitation the rights to use, copy, modify, merge, publish, */
	/* distribute, sublicense, and/or sell copies of this work, and to */
	/* permit persons to whom this work is furnished to do so, subject to */
	/* the following conditions: */
	/* 1. The code must retain the above copyright notice, this list of */
	/* conditions and the following disclaimer. */
	/* 2. Any modifications must be clearly marked as such. */
	/* 3. Original authors' names are not deleted. */
	/* 4. The authors' names are not used to endorse or promote products */
	/* derived from this software without specific prior written */
	/* permission. */
	/* */
	/* CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK */
	/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
	/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
	/* SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE */
	/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
	/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
	/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
	/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
	/* THIS SOFTWARE. */
	/* */
	/*************************************************************************/
	/* Author: Alan W Black (awb@cs.cmu.edu) */
	/* Date: December 1999 */
	/*************************************************************************/
	/* */
	/* Letter to sound rule support */
	/* */
	/*************************************************************************/

	#include "cst_features.h"
	#include "cst_lts.h"
	#include "cst_endian.h"

	static cst_lts_phone apply_model(cst_lts_letter *vals,
	cst_lts_addr start,
	const cst_lts_model *model);

	cst_lts_rules *new_lts_rules()
	{
	cst_lts_rules *lt = cst_alloc(cst_lts_rules,1);
	lt->name = 0;
	lt->letter_index = 0;
	lt->models = 0;
	lt->phone_table = 0;
	lt->context_window_size = 0;
	lt->context_extra_feats = 0;
	lt->letter_table = 0;
	return lt;
	}

	cst_val lts_apply_val(const cst_val wlist,const char feats,const cst_lts_rules r)
	{
	/* for symbol to symbol mapping */
	const cst_val *v;
	cst_val *p;
	char *word;
	int i,j;

	word = cst_alloc(char,val_length(wlist)+1);

	for (v=wlist,i=0; v; v=val_cdr(v),i++)
	{
	for (j=0; r->letter_table[j]; j++)
	if (cst_streq(val_string(val_car(v)),r->letter_table[j]))
	{
	word[i] = j;
	break;
	}
	if (!r->letter_table[j])
	{
	#if 0
	printf("awb_debug unknown letter >%s<\n",val_string(val_car(v)));
	#endif
	i--; /* can't find this letter so skip it */
	}
	}

	p = lts_apply(word,feats,r);
	cst_free(word);

	return p;
	}

	cst_val lts_apply(const char word,const char feats,const cst_lts_rules r)
	{
	int pos, index, i;
	cst_val *phones=0;
	cst_lts_letter *fval_buff;
	cst_lts_letter *full_buff;
	cst_lts_phone phone;
	char left, right, *p;
	char hash;
	char zeros[8];

	/* For feature vals for each letter */
	fval_buff = cst_alloc(cst_lts_letter,
	(r->context_window_size*2)+
	r->context_extra_feats);
	/* Buffer with added contexts */
	full_buff = cst_alloc(cst_lts_letter,
	(r->context_window_size*2)+
	cst_strlen(word)+1); /* TBD assumes single POS feat */
	if (r->letter_table)
	{
	for (i=0; i<8; i++) zeros[i] = 2;
	cst_sprintf((char *)full_buff,
	"%.s%c%s%c%.s",
	r->context_window_size-1, zeros,
	1,
	word,
	1,
	r->context_window_size-1, zeros);
	hash = 1;
	}
	else
	{
	/* Assumes l_letter is a char and context < 8 */
	cst_sprintf((char *)full_buff,
	"%.s#%s#%.s",
	r->context_window_size-1, "00000000",
	word,
	r->context_window_size-1, "00000000");
	hash = '#';
	}

	/* Do the prediction backwards so we don't need to reverse the answer */
	for (pos = r->context_window_size + cst_strlen(word) - 1;
	full_buff[pos] != hash;
	pos--)
	{
	/* Fill the features buffer for the predictor */
	cst_sprintf((char *)fval_buff,
	"%.s%.s%s",
	r->context_window_size,
	full_buff+pos-r->context_window_size,
	r->context_window_size,
	full_buff+pos+1,
	feats);
	if ((!r->letter_table
	&& ((full_buff[pos] < 'a') \|\| (full_buff[pos] > 'z'))))
	{
	#ifdef EXCESSIVELY_CHATTY
	cst_errmsg("lts:skipping unknown char \"%c\"\n",
	full_buff[pos]);
	#endif
	continue;
	}
	if (r->letter_table)
	index = full_buff[pos] - 3;
	else
	index = (full_buff[pos]-'a')%26;
	phone = apply_model(fval_buff,
	r->letter_index[index],
	r->models);
	/* delete epsilons and split dual-phones */
	if (cst_streq("epsilon",r->phone_table[phone]))
	continue;
	else if ((p=strchr(r->phone_table[phone],'-')) != NULL)
	{
	left = cst_substr(r->phone_table[phone],0,
	cst_strlen(r->phone_table[phone])-cst_strlen(p));
	right = cst_substr(r->phone_table[phone],
	(cst_strlen(r->phone_table[phone])-cst_strlen(p))+1,
	(cst_strlen(p)-1));
	phones = cons_val(string_val(left),
	cons_val(string_val(right),phones));
	cst_free(left);
	cst_free(right);
	}
	else
	phones = cons_val(string_val(r->phone_table[phone]),phones);
	}

	cst_free(full_buff);
	cst_free(fval_buff);

	return phones;
	}

	static void cst_lts_get_state(cst_lts_rule *state,
	const cst_lts_model *model,
	unsigned short n,
	int rule_size)
	{ /* As some OS's require a more elaborate access than a simple lookup */
	memmove(state,&model[n*rule_size],rule_size);
	}

	static cst_lts_phone apply_model(cst_lts_letter *vals,cst_lts_addr start,
	const cst_lts_model *model)
	{
	/* because some machines (arm/mips) can't deal with addrs not on */
	/* word boundaries we use a static and copy the rule values each time */
	/* so we know its properly aligned */
	/* Hmm this still might be wrong on some machines that align the */
	/* structure cst_lts_rules differently */
	cst_lts_rule state;
	unsigned short nstate;
	static const int sizeof_cst_lts_rule = 6;

	cst_lts_get_state(&state,model,start,sizeof_cst_lts_rule);
	for ( ;
	state.feat != CST_LTS_EOR;
	)
	{
	/* printf("awb_debug %s %c %c %d\n",vals,vals[state.feat],state.val,
	(vals[state.feat] == state.val) ? 1 : 0); */
	if (vals[state.feat] == state.val)
	nstate = state.qtrue;
	else
	nstate = state.qfalse;
	/* This should really happen at compilation time */
	if (CST_BIG_ENDIAN)
	nstate = SWAPSHORT(nstate);

	cst_lts_get_state(&state,model,nstate,sizeof_cst_lts_rule);
	}

	return (cst_lts_phone)state.val;
	}