| /*************************************************************************/ |
| /* */ |
| /* Language Technologies Institute */ |
| /* Carnegie Mellon University */ |
| /* Copyright (c) 2001 */ |
| /* All Rights Reserved. */ |
| /* */ |
| /* Permission is hereby granted, free of charge, to use and distribute */ |
| /* this software and its documentation without restriction, including */ |
| /* without limitation the rights to use, copy, modify, merge, publish, */ |
| /* distribute, sublicense, and/or sell copies of this work, and to */ |
| /* permit persons to whom this work is furnished to do so, subject to */ |
| /* the following conditions: */ |
| /* 1. The code must retain the above copyright notice, this list of */ |
| /* conditions and the following disclaimer. */ |
| /* 2. Any modifications must be clearly marked as such. */ |
| /* 3. Original authors' names are not deleted. */ |
| /* 4. The authors' names are not used to endorse or promote products */ |
| /* derived from this software without specific prior written */ |
| /* permission. */ |
| /* */ |
| /* CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK */ |
| /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */ |
| /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */ |
| /* SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE */ |
| /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */ |
| /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */ |
| /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */ |
| /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */ |
| /* THIS SOFTWARE. */ |
| /* */ |
| /*************************************************************************/ |
| /* Author: Alan W Black (awb@cs.cmu.edu) */ |
| /* Date: January 2001 */ |
| /*************************************************************************/ |
| /* */ |
| /* An F0 model */ |
| /* This is derived fromthe f2b model freely distributed in Festival */ |
| /* */ |
| /*************************************************************************/ |
| |
| #include "cst_hrg.h" |
| #include "cst_phoneset.h" |
| #include "us_f0.h" |
| |
| static void apply_lr_model(cst_item *s, |
| const us_f0_lr_term *f0_lr_terms, |
| float *start, |
| float *mid, |
| float *end) |
| { |
| int i; |
| const cst_val *v=0; |
| float fv; |
| |
| /* Interceptors */ |
| *start = f0_lr_terms[0].start; |
| *mid = f0_lr_terms[0].mid; |
| *end = f0_lr_terms[0].end; |
| for (i=1; f0_lr_terms[i].feature; i++) |
| { |
| if (!cst_streq(f0_lr_terms[i].feature,f0_lr_terms[i-1].feature)) |
| v = ffeature(s,f0_lr_terms[i].feature); |
| if (f0_lr_terms[i].type) |
| { |
| if (cst_streq(val_string(v),f0_lr_terms[i].type)) |
| fv = 1.0; |
| else |
| fv = 0.0; |
| } |
| else |
| fv = val_float(v); |
| (*start) += fv*f0_lr_terms[i].start; |
| (*mid) += fv*f0_lr_terms[i].mid; |
| (*end) += fv*f0_lr_terms[i].end; |
| /* printf("f %s start %f mid %f end %f\n", |
| f0_lr_terms[i].feature, |
| *start,*mid,*end); */ |
| } |
| } |
| |
| static void add_target_point(cst_relation *targ,float pos, float f0) |
| { |
| cst_item *t; |
| |
| /* printf("target %f at %f\n",f0,pos); */ |
| t = relation_append(targ,NULL); |
| item_set_float(t,"pos",pos); |
| /* them there can sometimes do silly things, so guard for that */ |
| if (f0 > 500.0) |
| item_set_float(t,"f0",500.0); |
| else if (f0 < 50.0) |
| item_set_float(t,"f0",50.0); |
| else |
| item_set_float(t,"f0",f0); |
| } |
| |
| /* model mean and stddev take from f2b/kal_diphone */ |
| #define model_mean 170.0 |
| #define model_stddev 34 |
| #define map_f0(v,m,s) ((((v-model_mean)/model_stddev)*s)+m) |
| |
| static int post_break(cst_item *syl) |
| { |
| if ((item_prev(syl) == 0) || |
| (cst_streq("pau", |
| ffeature_string(syl, |
| "R:SylStructure.daughter.R:Segment.p.name")))) |
| return TRUE; |
| else |
| return FALSE; |
| } |
| |
| static int pre_break(cst_item *syl) |
| { |
| if ((item_next(syl) == 0) || |
| (cst_streq("pau", |
| ffeature_string(syl, |
| "R:SylStructure.daughtern.R:Segment.n.name")))) |
| return TRUE; |
| else |
| return FALSE; |
| } |
| |
| static float vowel_mid(cst_item *syl) |
| { |
| /* return time point mid way in vowel in this syl */ |
| cst_item *s; |
| cst_item *ts; |
| const cst_phoneset *ps = item_phoneset(syl); |
| |
| ts = item_daughter(item_as(syl,"SylStructure")); |
| for (s=ts; s; s = item_next(s)) |
| { |
| if (cst_streq("+", phone_feature_string(ps,item_feat_string(s,"name"), |
| "vc"))) |
| { |
| return (item_feat_float(s,"end")+ |
| ffeature_float(s,"R:Segment.p.end"))/2.0; |
| } |
| } |
| |
| /* no segments, shouldn't happen */ |
| if (ts == 0) |
| return 0; |
| |
| /* no vowel in syllable, shouldn't happen */ |
| return (item_feat_float(ts,"end")+ |
| ffeature_float(ts,"R:Segment.p.end"))/2.0; |
| } |
| |
| cst_utterance *us_f0_model(cst_utterance *u) |
| { |
| /* F0 target model: Black and Hunt ICSLP96, three points per syl */ |
| cst_item *syl, *t, *nt; |
| cst_relation *targ_rel; |
| float mean, stddev, local_mean, local_stddev; |
| float start, mid, end, lend; |
| float seg_end; |
| |
| if (feat_present(u->features,"no_f0_target_model")) |
| return u; |
| |
| targ_rel = utt_relation_create(u,"Target"); |
| mean = get_param_float(u->features,"int_f0_target_mean", 100.0); |
| mean *= get_param_float(u->features,"f0_shift", 1.0); |
| stddev = get_param_float(u->features,"int_f0_target_stddev", 12.0); |
| |
| lend = 0; |
| for (syl=relation_head(utt_relation(u,"Syllable")); |
| syl; |
| syl = item_next(syl)) |
| |
| { |
| /* printf("word %s, accent %s endtone %s\n", |
| ffeature_string(syl,"R:SylStructure.parent.name"), |
| ffeature_string(syl,"accent"), |
| ffeature_string(syl,"endtone")); */ |
| if (!item_daughter(item_as(syl,"SylStructure"))) |
| continue; /* no segs in syl */ |
| |
| local_mean = ffeature_float(syl,"R:SylStructure.parent.R:Token.parent.local_f0_shift"); |
| if (local_mean) |
| local_mean *= mean; |
| else |
| local_mean = mean; |
| local_stddev = ffeature_float(syl,"R:SylStructure.parent.R:Token.parent.local_f0_range"); |
| if (local_stddev == 0.0) |
| local_stddev = stddev; |
| |
| apply_lr_model(syl,f0_lr_terms,&start,&mid,&end); |
| if (post_break(syl)) |
| lend = map_f0(start,local_mean,local_stddev); |
| add_target_point(targ_rel, |
| ffeature_float(syl, |
| "R:SylStructure.daughter.R:Segment.p.end"), |
| map_f0((start+lend)/2.0,local_mean,local_stddev)); |
| add_target_point(targ_rel, |
| vowel_mid(syl), |
| map_f0(mid,local_mean,local_stddev)); |
| lend = map_f0(end,local_mean,local_stddev); |
| if (pre_break(syl)) |
| add_target_point(targ_rel, |
| ffeature_float(syl,"R:SylStructure.daughtern.end"), |
| map_f0(end,local_mean,local_stddev)); |
| } |
| |
| /* Guarantee targets go from start to end of utterance */ |
| t = relation_head(targ_rel); |
| if (t == 0) |
| add_target_point(targ_rel,0,mean); |
| else if (item_feat_float(t,"pos") > 0) |
| { |
| nt = item_prepend(t,NULL); |
| item_set_float(nt,"pos",0.0); |
| item_set_float(nt,"f0",item_feat_float(t,"f0")); |
| } |
| |
| t = relation_tail(targ_rel); |
| seg_end = item_feat_float(relation_tail(utt_relation(u,"Segment")),"end"); |
| if (item_feat_float(t,"pos") < seg_end) |
| add_target_point(targ_rel,seg_end,item_feat_float(t,"f0")); |
| |
| return u; |
| } |