lang/cmulex/cmu_lex.c - third_party/flite - Git at Google

 /*************************************************************************/
 /*                                                                       */
 /*                  Language Technologies Institute                      */
 /*                     Carnegie Mellon University                        */
 /*                        Copyright (c) 2001                             */
 /*                        All Rights Reserved.                           */
 /*                                                                       */
 /*  Permission is hereby granted, free of charge, to use and distribute  */
 /*  this software and its documentation without restriction, including   */
 /*  without limitation the rights to use, copy, modify, merge, publish,  */
 /*  distribute, sublicense, and/or sell copies of this work, and to      */
 /*  permit persons to whom this work is furnished to do so, subject to   */
 /*  the following conditions:                                            */
 /*   1. The code must retain the above copyright notice, this list of    */
 /*      conditions and the following disclaimer.                         */
 /*   2. Any modifications must be clearly marked as such.                */
 /*   3. Original authors' names are not deleted.                         */
 /*   4. The authors' names are not used to endorse or promote products   */
 /*      derived from this software without specific prior written        */
 /*      permission.                                                      */
 /*                                                                       */
 /*  CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK         */
 /*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
 /*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
 /*  SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE      */
 /*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
 /*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
 /*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
 /*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
 /*  THIS SOFTWARE.                                                       */
 /*                                                                       */
 /*************************************************************************/
 /*             Author:  Alan W Black (awb@cs.cmu.edu)                    */
 /*               Date:  January 2001                                     */
 /*************************************************************************/
 /*                                                                       */
 /*  CMU Lexicon definition                                               */
 /*                                                                       */
 /*************************************************************************/

 #include "flite.h"

 #include "cmu_lex.h"

 extern const int cmu_lex_entry[];
 extern const unsigned char cmu_lex_data[];
 extern const int cmu_lex_num_entries;
 extern const int cmu_lex_num_bytes;
 extern const char * const cmu_lex_phone_table[54];
 extern const char * const cmu_lex_phones_huff_table[];
 extern const char * const cmu_lex_entries_huff_table[];

 static int cmu_is_vowel(const char *p);
 static int cmu_is_silence(const char *p);
 static int cmu_has_vowel_in_list(const cst_val *v);
 static int cmu_has_vowel_in_syl(const cst_item *i);
 static int cmu_sonority(const char *p);

 static const char * const addenda0[] = { "p,", NULL };
 static const char * const addenda1[] = { "p.", NULL };
 static const char * const addenda2[] = { "p(", NULL };
 static const char * const addenda3[] = { "p)", NULL };
 static const char * const addenda4[] = { "p[", NULL };
 static const char * const addenda5[] = { "p]", NULL };
 static const char * const addenda6[] = { "p{", NULL };
 static const char * const addenda7[] = { "p}", NULL };
 static const char * const addenda8[] = { "p:", NULL };
 static const char * const addenda9[] = { "p;", NULL };
 static const char * const addenda10[] = { "p?", NULL};
 static const char * const addenda11[] = { "p!", NULL };
 static const char * const addenda12[] = { "n@", "ae1", "t", NULL };
 static const char * const addenda13[] = { "n#", "hh", "ae1","sh", NULL };
 static const char * const addenda14[] = { "n$", "d", "aa1", "l", "er", NULL };
 static const char * const addenda15[] = { "n%", "p", "er", "s", "eh1", "n", "t", NULL };
 static const char * const addenda16[] = { "n^", "k", "eh1", "r", "eh1", "t",  NULL };
 static const char * const addenda17[] = { "n&","ae1","m","p","er","s","ae1","n","d", NULL };
 static const char * const addenda18[] = { "n*","ae1","s","t","er","ih1","s","k",NULL };
 static const char * const addenda19[] = { "n|","b","aa1","r",NULL };
 static const char * const addenda20[] = { "n\\","b","ae1","k","s","l","ae1","sh",NULL };
 static const char * const addenda21[] = { "n=","iy1","k","w","ax","l","z",NULL};
 static const char * const addenda22[] = { "n+","p","l","ah1","s",NULL};
 static const char * const addenda23[] = { "n~","t","ih1","l","d","ax",NULL};
 static const char * const addenda24[] = { "p'",NULL};
 static const char * const addenda25[] = { "p`",NULL};
 static const char * const addenda26[] = { "p\"",NULL};
 static const char * const addenda27[] = { "p-",NULL};
 static const char * const addenda28[] = { "p<",NULL};
 static const char * const addenda29[] = { "p>",NULL};
 static const char * const addenda30[] = { "n_","ah1","n","d","er","s","k","ao1","r",NULL};
 static const char * const addenda31[] = { "s's","z",NULL};
 static const char * const addenda32[] = { "nim","ay1","m",NULL};
 static const char * const addenda33[] = { "vdoesnt","d","ah1","z","n","t",NULL};
 static const char * const addenda34[] = { "vyoull","y","uw1","l",NULL};
 static const char * const addenda35[] = { "n/","s","l","ae1","sh",NULL};

 static const char * const addenda36[] = { "nin","ih","n",NULL};
 static const char * const addenda37[] = { "nto","t","ax",NULL};
 static const char * const addenda38[] = { "0_a","ey",NULL};
 static const char * const addenda39[] = { "vhavent","hh","ae1","v","ax","n","t",NULL};
 static const char * const addenda40[] = { "nemail","iy1","m","ey1","l",NULL};
 static const char * const addenda41[] = { "nshit","sh","ih1","t",NULL};

 static const char * const * const addenda[] = {
     addenda0,
     addenda1,
     addenda2,
     addenda3,
     addenda4,
     addenda5,
     addenda6,
     addenda7,
     addenda8,
     addenda9,
     addenda10,
     addenda11,
     addenda12,
     addenda13,
     addenda14,
     addenda15,
     addenda16,
     addenda17,
     addenda18,
     addenda19,
     addenda20,
     addenda21,
     addenda22,
     addenda23,
     addenda24,
     addenda25,
     addenda26,
     addenda27,
     addenda28,
     addenda29,
     addenda30,
     addenda31,
     addenda32,
     addenda33,
     addenda34,
     addenda35,

     addenda36,
     addenda37,
     addenda38,
     addenda39,
     addenda40,
     addenda41,
     NULL };

 static int cmu_is_silence(const char *p)
 {
     if (cst_streq(p,"pau"))
 	return TRUE;
     else
 	return FALSE;
 }

 static int cmu_has_vowel_in_list(const cst_val *v)
 {
     const cst_val *t;

     for (t=v; t; t=val_cdr(t))
 	if (cmu_is_vowel(val_string(val_car(t))))
 	    return TRUE;
     return FALSE;
 }

 static int cmu_has_vowel_in_syl(const cst_item *i)
 {
     const cst_item *n;

     for (n=i; n; n=item_prev(n))
 	if (cmu_is_vowel(item_feat_string(n,"name")))
 	    return TRUE;
     return FALSE;
 }

 static int cmu_is_vowel(const char *p)
 {
     /* this happens to work for US English phoneset */
     if (strchr("aeiou",p[0]) == NULL)
 	return FALSE;
     else
 	return TRUE;
 }

 static int cmu_sonority(const char *p)
 {
     /* A bunch of hacks for US English phoneset */
     if (cmu_is_vowel(p) || (cmu_is_silence(p)))
 	return 5;
     else if (strchr("wylr",p[0]) != NULL)
 	return 4;  /* glides/liquids */
     else if (strchr("nm",p[0]) != NULL)
 	return 3;  /* nasals */
     else if (strchr("bdgjlmnnnrvwyz",p[0]) != NULL)
 	return 2;  /* voiced obstruents */
     else
 	return 1;
 }

 int cmu_syl_boundary(const cst_item *i,const cst_val *rest)
 {
     /* Returns TRUE if this should be a syllable boundary */
     /* This is of course phone set dependent              */
     int p, n, nn;

     if (rest == NULL)
 	return TRUE;
     else if (cmu_is_silence(val_string(val_car(rest))))
 	return TRUE;
     else if (!cmu_has_vowel_in_list(rest)) /* no more vowels so rest *all* coda */
 	return FALSE;
     else if (!cmu_has_vowel_in_syl(i))  /* need a vowel */
 	return FALSE;
     else if (cmu_is_vowel(val_string(val_car(rest))))
 	return TRUE;
     else if (val_cdr(rest) == NULL)
 	return FALSE;
     else
     {   /* so there is following vowel, and multiple phones left */
 	p = cmu_sonority(item_feat_string(i,"name"));
 	n = cmu_sonority(val_string(val_car(rest)));
 	nn = cmu_sonority(val_string(val_car(val_cdr(rest))));

 	if ((p <= n) && (n <= nn))
 	    return TRUE;
 	else
 	    return FALSE;
     }
 }

 static int cmulex_dist_to_vowel(const cst_val *rest)
 {
     if (rest == 0)
         return 0;  /* shouldn't get here */
     else if (cmu_is_vowel(val_string(val_car(rest))))
         return 0;
     else
         return 1+cmulex_dist_to_vowel(val_cdr(rest));
 }

 static const char * const cmulex_onset_trigrams[] = {
     "str", "spy", "spr", "spl", "sky", "skw", "skr", "skl", NULL
 };
 static const char * const cmulex_onset_bigrams[] = {
     "zw", "zl",
     "vy", "vr", "vl",
     "thw", "thr",
     "ty", "tw",
     "tr", /* "ts", */
     "shw", "shr", "shn", "shm", "shl",
     "sw", "sv", "st", "sr", "sp", "sn", "sm", "sl", "sk", "sf",
     "py", "pw", "pr", "pl",
     "ny",
     "my", "mr",
     "ly",
     "ky", "kw", "kr", "kl",
     "hhy", "hhw", "hhr", "hhl",
     "gy", "gw", "gr", "gl",
     "fy", "fr", "fl",
     "dy", "dw", "dr",
     "by", "bw", "br", "bl",
     NULL
 };

 static int cmulex_onset_bigram(const cst_val *rest)
 {
     char x[10];
     int i;

     cst_sprintf(x,"%s%s",val_string(val_car(rest)),
            val_string(val_car(val_cdr(rest))));
     for (i=0; cmulex_onset_bigrams[i]; i++)
         if (cst_streq(x,cmulex_onset_bigrams[i]))
             return TRUE;
     return FALSE;
 }

 static int cmulex_onset_trigram(const cst_val *rest)
 {
     char x[15];
     int i;

     cst_sprintf(x,"%s%s%s",val_string(val_car(rest)),
            val_string(val_car(val_cdr(rest))),
            val_string(val_car(val_cdr(val_cdr(rest)))));
     for (i=0; cmulex_onset_trigrams[i]; i++)
         if (cst_streq(x,cmulex_onset_trigrams[i]))
             return TRUE;
     return FALSE;
 }

 int cmu_syl_boundary_mo(const cst_item *i,const cst_val *rest)
 {
     /* syl boundary maximal onset */
     int d2v;

     if (rest == NULL)
 	return TRUE;
     else if (cmu_is_silence(val_string(val_car(rest))))
 	return TRUE;
     else if (!cmu_has_vowel_in_list(rest))
         /* no more vowels so rest *all* coda */
 	return FALSE;
     else if (!cmu_has_vowel_in_syl(i))  /* need a vowel */
         /* no vowel yet in syl so keep copying */
 	return FALSE;
     else if (cmu_is_vowel(val_string(val_car(rest))))
         /* next is a vowel, syl has vowel, so this is a break */
 	return TRUE;
     else if (cst_streq("ng",val_string(val_car(rest))))
         /* next is "ng" which can't start a word internal syl */
 	return FALSE;
     else
     {
         /* want to know if from rest to the next vowel is a valid onset */
         d2v = cmulex_dist_to_vowel(rest);
         if (d2v < 2)
             return TRUE;
         else if (d2v > 3)
             return FALSE;
         else if (d2v == 2)
             return cmulex_onset_bigram(rest);
         else /* if (d2v == 3) */
             return cmulex_onset_trigram(rest);
         return TRUE;
     }

 }

 cst_lexicon cmu_lex;
 cst_lts_rules cmu_lts_rules;
 extern const char * const cmu_lts_phone_table[];
 extern const char * const cmu_lts_letter_table[];
 extern const cst_lts_addr cmu_lts_letter_index[];
 extern const cst_lts_model cmu_lts_model[];

 cst_lexicon *cmulex_init()
 {
     /* We actually need the init function match the directory name */
     return cmu_lex_init();
 }

 cst_lexicon *cmu_lex_init()
 {
     /* I'd like to do this as a const but it needs everything in this */
     /* file and already the bits are too big for some compilers */

     if (cmu_lts_rules.name)
         return &cmu_lex;  /* Already initialized */

     cmu_lts_rules.name = "cmu";
     cmu_lts_rules.letter_index = cmu_lts_letter_index;
 #ifdef CST_NO_STATIC_LTS_MODEL
     /* cmu_lts_rules.models will be set elsewhere */
 #else
     cmu_lts_rules.models = cmu_lts_model;
 #endif
     cmu_lts_rules.phone_table = cmu_lts_phone_table;
     cmu_lts_rules.context_window_size = 4;
     cmu_lts_rules.context_extra_feats = 1;
     cmu_lts_rules.letter_table = 0 /* cmu_lts_letter_table */;

     cmu_lex.name = "cmu";
     cmu_lex.num_entries = cmu_lex_num_entries;
 #ifdef CST_NO_STATIC_LEX
     /* cmu_lex.data will be set elsewhere */
 #else
     /* as the data is const, we cast it through void * */
     cmu_lex.data = (unsigned char *)(void *)cmu_lex_data;
 #endif
     cmu_lex.num_bytes = cmu_lex_num_bytes;
     cmu_lex.phone_table = (char **) cmu_lex_phone_table;
     cmu_lex.syl_boundary = cmu_syl_boundary_mo;
     cmu_lex.addenda = (char ***) addenda;
     cmu_lex.lts_rule_set = (cst_lts_rules *) &cmu_lts_rules;

     cmu_lex.phone_hufftable = cmu_lex_phones_huff_table;
     cmu_lex.entry_hufftable = cmu_lex_entries_huff_table;

     cmu_lex.postlex = cmu_postlex;

     return &cmu_lex;

 }
	/*************************************************************************/
	/* */
	/* Language Technologies Institute */
	/* Carnegie Mellon University */
	/* Copyright (c) 2001 */
	/* All Rights Reserved. */
	/* */
	/* Permission is hereby granted, free of charge, to use and distribute */
	/* this software and its documentation without restriction, including */
	/* without limitation the rights to use, copy, modify, merge, publish, */
	/* distribute, sublicense, and/or sell copies of this work, and to */
	/* permit persons to whom this work is furnished to do so, subject to */
	/* the following conditions: */
	/* 1. The code must retain the above copyright notice, this list of */
	/* conditions and the following disclaimer. */
	/* 2. Any modifications must be clearly marked as such. */
	/* 3. Original authors' names are not deleted. */
	/* 4. The authors' names are not used to endorse or promote products */
	/* derived from this software without specific prior written */
	/* permission. */
	/* */
	/* CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK */
	/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
	/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
	/* SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE */
	/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
	/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
	/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
	/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
	/* THIS SOFTWARE. */
	/* */
	/*************************************************************************/
	/* Author: Alan W Black (awb@cs.cmu.edu) */
	/* Date: January 2001 */
	/*************************************************************************/
	/* */
	/* CMU Lexicon definition */
	/* */
	/*************************************************************************/

	#include "flite.h"

	#include "cmu_lex.h"

	extern const int cmu_lex_entry[];
	extern const unsigned char cmu_lex_data[];
	extern const int cmu_lex_num_entries;
	extern const int cmu_lex_num_bytes;
	extern const char * const cmu_lex_phone_table[54];
	extern const char * const cmu_lex_phones_huff_table[];
	extern const char * const cmu_lex_entries_huff_table[];

	static int cmu_is_vowel(const char *p);
	static int cmu_is_silence(const char *p);
	static int cmu_has_vowel_in_list(const cst_val *v);
	static int cmu_has_vowel_in_syl(const cst_item *i);
	static int cmu_sonority(const char *p);

	static const char * const addenda0[] = { "p,", NULL };
	static const char * const addenda1[] = { "p.", NULL };
	static const char * const addenda2[] = { "p(", NULL };
	static const char * const addenda3[] = { "p)", NULL };
	static const char * const addenda4[] = { "p[", NULL };
	static const char * const addenda5[] = { "p]", NULL };
	static const char * const addenda6[] = { "p{", NULL };
	static const char * const addenda7[] = { "p}", NULL };
	static const char * const addenda8[] = { "p:", NULL };
	static const char * const addenda9[] = { "p;", NULL };
	static const char * const addenda10[] = { "p?", NULL};
	static const char * const addenda11[] = { "p!", NULL };
	static const char * const addenda12[] = { "n@", "ae1", "t", NULL };
	static const char * const addenda13[] = { "n#", "hh", "ae1","sh", NULL };
	static const char * const addenda14[] = { "n$", "d", "aa1", "l", "er", NULL };
	static const char * const addenda15[] = { "n%", "p", "er", "s", "eh1", "n", "t", NULL };
	static const char * const addenda16[] = { "n^", "k", "eh1", "r", "eh1", "t", NULL };
	static const char * const addenda17[] = { "n&","ae1","m","p","er","s","ae1","n","d", NULL };
	static const char * const addenda18[] = { "n*","ae1","s","t","er","ih1","s","k",NULL };
	static const char * const addenda19[] = { "n\|","b","aa1","r",NULL };
	static const char * const addenda20[] = { "n\\","b","ae1","k","s","l","ae1","sh",NULL };
	static const char * const addenda21[] = { "n=","iy1","k","w","ax","l","z",NULL};
	static const char * const addenda22[] = { "n+","p","l","ah1","s",NULL};
	static const char * const addenda23[] = { "n~","t","ih1","l","d","ax",NULL};
	static const char * const addenda24[] = { "p'",NULL};
	static const char * const addenda25[] = { "p`",NULL};
	static const char * const addenda26[] = { "p\"",NULL};
	static const char * const addenda27[] = { "p-",NULL};
	static const char * const addenda28[] = { "p<",NULL};
	static const char * const addenda29[] = { "p>",NULL};
	static const char * const addenda30[] = { "n_","ah1","n","d","er","s","k","ao1","r",NULL};
	static const char * const addenda31[] = { "s's","z",NULL};
	static const char * const addenda32[] = { "nim","ay1","m",NULL};
	static const char * const addenda33[] = { "vdoesnt","d","ah1","z","n","t",NULL};
	static const char * const addenda34[] = { "vyoull","y","uw1","l",NULL};
	static const char * const addenda35[] = { "n/","s","l","ae1","sh",NULL};

	static const char * const addenda36[] = { "nin","ih","n",NULL};
	static const char * const addenda37[] = { "nto","t","ax",NULL};
	static const char * const addenda38[] = { "0_a","ey",NULL};
	static const char * const addenda39[] = { "vhavent","hh","ae1","v","ax","n","t",NULL};
	static const char * const addenda40[] = { "nemail","iy1","m","ey1","l",NULL};
	static const char * const addenda41[] = { "nshit","sh","ih1","t",NULL};

	static const char * const * const addenda[] = {
	addenda0,
	addenda1,
	addenda2,
	addenda3,
	addenda4,
	addenda5,
	addenda6,
	addenda7,
	addenda8,
	addenda9,
	addenda10,
	addenda11,
	addenda12,
	addenda13,
	addenda14,
	addenda15,
	addenda16,
	addenda17,
	addenda18,
	addenda19,
	addenda20,
	addenda21,
	addenda22,
	addenda23,
	addenda24,
	addenda25,
	addenda26,
	addenda27,
	addenda28,
	addenda29,
	addenda30,
	addenda31,
	addenda32,
	addenda33,
	addenda34,
	addenda35,

	addenda36,
	addenda37,
	addenda38,
	addenda39,
	addenda40,
	addenda41,
	NULL };

	static int cmu_is_silence(const char *p)
	{
	if (cst_streq(p,"pau"))
	return TRUE;
	else
	return FALSE;
	}

	static int cmu_has_vowel_in_list(const cst_val *v)
	{
	const cst_val *t;

	for (t=v; t; t=val_cdr(t))
	if (cmu_is_vowel(val_string(val_car(t))))
	return TRUE;
	return FALSE;
	}

	static int cmu_has_vowel_in_syl(const cst_item *i)
	{
	const cst_item *n;

	for (n=i; n; n=item_prev(n))
	if (cmu_is_vowel(item_feat_string(n,"name")))
	return TRUE;
	return FALSE;
	}

	static int cmu_is_vowel(const char *p)
	{
	/* this happens to work for US English phoneset */
	if (strchr("aeiou",p[0]) == NULL)
	return FALSE;
	else
	return TRUE;
	}

	static int cmu_sonority(const char *p)
	{
	/* A bunch of hacks for US English phoneset */
	if (cmu_is_vowel(p) \|\| (cmu_is_silence(p)))
	return 5;
	else if (strchr("wylr",p[0]) != NULL)
	return 4; /* glides/liquids */
	else if (strchr("nm",p[0]) != NULL)
	return 3; /* nasals */
	else if (strchr("bdgjlmnnnrvwyz",p[0]) != NULL)
	return 2; /* voiced obstruents */
	else
	return 1;
	}

	int cmu_syl_boundary(const cst_item i,const cst_val rest)
	{
	/* Returns TRUE if this should be a syllable boundary */
	/* This is of course phone set dependent */
	int p, n, nn;

	if (rest == NULL)
	return TRUE;
	else if (cmu_is_silence(val_string(val_car(rest))))
	return TRUE;
	else if (!cmu_has_vowel_in_list(rest)) /* no more vowels so rest all coda */
	return FALSE;
	else if (!cmu_has_vowel_in_syl(i)) /* need a vowel */
	return FALSE;
	else if (cmu_is_vowel(val_string(val_car(rest))))
	return TRUE;
	else if (val_cdr(rest) == NULL)
	return FALSE;
	else
	{ /* so there is following vowel, and multiple phones left */
	p = cmu_sonority(item_feat_string(i,"name"));
	n = cmu_sonority(val_string(val_car(rest)));
	nn = cmu_sonority(val_string(val_car(val_cdr(rest))));

	if ((p <= n) && (n <= nn))
	return TRUE;
	else
	return FALSE;
	}
	}

	static int cmulex_dist_to_vowel(const cst_val *rest)
	{
	if (rest == 0)
	return 0; /* shouldn't get here */
	else if (cmu_is_vowel(val_string(val_car(rest))))
	return 0;
	else
	return 1+cmulex_dist_to_vowel(val_cdr(rest));
	}

	static const char * const cmulex_onset_trigrams[] = {
	"str", "spy", "spr", "spl", "sky", "skw", "skr", "skl", NULL
	};
	static const char * const cmulex_onset_bigrams[] = {
	"zw", "zl",
	"vy", "vr", "vl",
	"thw", "thr",
	"ty", "tw",
	"tr", /* "ts", */
	"shw", "shr", "shn", "shm", "shl",
	"sw", "sv", "st", "sr", "sp", "sn", "sm", "sl", "sk", "sf",
	"py", "pw", "pr", "pl",
	"ny",
	"my", "mr",
	"ly",
	"ky", "kw", "kr", "kl",
	"hhy", "hhw", "hhr", "hhl",
	"gy", "gw", "gr", "gl",
	"fy", "fr", "fl",
	"dy", "dw", "dr",
	"by", "bw", "br", "bl",
	NULL
	};

	static int cmulex_onset_bigram(const cst_val *rest)
	{
	char x[10];
	int i;

	cst_sprintf(x,"%s%s",val_string(val_car(rest)),
	val_string(val_car(val_cdr(rest))));
	for (i=0; cmulex_onset_bigrams[i]; i++)
	if (cst_streq(x,cmulex_onset_bigrams[i]))
	return TRUE;
	return FALSE;
	}

	static int cmulex_onset_trigram(const cst_val *rest)
	{
	char x[15];
	int i;

	cst_sprintf(x,"%s%s%s",val_string(val_car(rest)),
	val_string(val_car(val_cdr(rest))),
	val_string(val_car(val_cdr(val_cdr(rest)))));
	for (i=0; cmulex_onset_trigrams[i]; i++)
	if (cst_streq(x,cmulex_onset_trigrams[i]))
	return TRUE;
	return FALSE;
	}

	int cmu_syl_boundary_mo(const cst_item i,const cst_val rest)
	{
	/* syl boundary maximal onset */
	int d2v;

	if (rest == NULL)
	return TRUE;
	else if (cmu_is_silence(val_string(val_car(rest))))
	return TRUE;
	else if (!cmu_has_vowel_in_list(rest))
	/* no more vowels so rest all coda */
	return FALSE;
	else if (!cmu_has_vowel_in_syl(i)) /* need a vowel */
	/* no vowel yet in syl so keep copying */
	return FALSE;
	else if (cmu_is_vowel(val_string(val_car(rest))))
	/* next is a vowel, syl has vowel, so this is a break */
	return TRUE;
	else if (cst_streq("ng",val_string(val_car(rest))))
	/* next is "ng" which can't start a word internal syl */
	return FALSE;
	else
	{
	/* want to know if from rest to the next vowel is a valid onset */
	d2v = cmulex_dist_to_vowel(rest);
	if (d2v < 2)
	return TRUE;
	else if (d2v > 3)
	return FALSE;
	else if (d2v == 2)
	return cmulex_onset_bigram(rest);
	else /* if (d2v == 3) */
	return cmulex_onset_trigram(rest);
	return TRUE;
	}

	}

	cst_lexicon cmu_lex;
	cst_lts_rules cmu_lts_rules;
	extern const char * const cmu_lts_phone_table[];
	extern const char * const cmu_lts_letter_table[];
	extern const cst_lts_addr cmu_lts_letter_index[];
	extern const cst_lts_model cmu_lts_model[];

	cst_lexicon *cmulex_init()
	{
	/* We actually need the init function match the directory name */
	return cmu_lex_init();
	}

	cst_lexicon *cmu_lex_init()
	{
	/* I'd like to do this as a const but it needs everything in this */
	/* file and already the bits are too big for some compilers */

	if (cmu_lts_rules.name)
	return &cmu_lex; /* Already initialized */

	cmu_lts_rules.name = "cmu";
	cmu_lts_rules.letter_index = cmu_lts_letter_index;
	#ifdef CST_NO_STATIC_LTS_MODEL
	/* cmu_lts_rules.models will be set elsewhere */
	#else
	cmu_lts_rules.models = cmu_lts_model;
	#endif
	cmu_lts_rules.phone_table = cmu_lts_phone_table;
	cmu_lts_rules.context_window_size = 4;
	cmu_lts_rules.context_extra_feats = 1;
	cmu_lts_rules.letter_table = 0 /* cmu_lts_letter_table */;

	cmu_lex.name = "cmu";
	cmu_lex.num_entries = cmu_lex_num_entries;
	#ifdef CST_NO_STATIC_LEX
	/* cmu_lex.data will be set elsewhere */
	#else
	/* as the data is const, we cast it through void * */
	cmu_lex.data = (unsigned char )(void )cmu_lex_data;
	#endif
	cmu_lex.num_bytes = cmu_lex_num_bytes;
	cmu_lex.phone_table = (char **) cmu_lex_phone_table;
	cmu_lex.syl_boundary = cmu_syl_boundary_mo;
	cmu_lex.addenda = (char ***) addenda;
	cmu_lex.lts_rule_set = (cst_lts_rules *) &cmu_lts_rules;

	cmu_lex.phone_hufftable = cmu_lex_phones_huff_table;
	cmu_lex.entry_hufftable = cmu_lex_entries_huff_table;

	cmu_lex.postlex = cmu_postlex;

	return &cmu_lex;

	}