blob: b2a703611b2118e21fae8a89c6732a2bc80f6875 [file] [log] [blame]
/*************************************************************************/
/* */
/* Language Technologies Institute */
/* Carnegie Mellon University */
/* Copyright (c) 2001 */
/* All Rights Reserved. */
/* */
/* Permission is hereby granted, free of charge, to use and distribute */
/* this software and its documentation without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of this work, and to */
/* permit persons to whom this work is furnished to do so, subject to */
/* the following conditions: */
/* 1. The code must retain the above copyright notice, this list of */
/* conditions and the following disclaimer. */
/* 2. Any modifications must be clearly marked as such. */
/* 3. Original authors' names are not deleted. */
/* 4. The authors' names are not used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK */
/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
/* SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE */
/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
/* THIS SOFTWARE. */
/* */
/*************************************************************************/
/* Author: Alan W Black (awb@cs.cmu.edu) */
/* Date: January 2001 */
/*************************************************************************/
/* */
/* An F0 model */
/* This is derived fromthe f2b model freely distributed in Festival */
/* */
/*************************************************************************/
#include "cst_hrg.h"
#include "cst_phoneset.h"
#include "us_f0.h"
static void apply_lr_model(cst_item *s,
const us_f0_lr_term *f0_lr_terms,
float *start,
float *mid,
float *end)
{
int i;
const cst_val *v=0;
float fv;
/* Interceptors */
*start = f0_lr_terms[0].start;
*mid = f0_lr_terms[0].mid;
*end = f0_lr_terms[0].end;
for (i=1; f0_lr_terms[i].feature; i++)
{
if (!cst_streq(f0_lr_terms[i].feature,f0_lr_terms[i-1].feature))
v = ffeature(s,f0_lr_terms[i].feature);
if (f0_lr_terms[i].type)
{
if (cst_streq(val_string(v),f0_lr_terms[i].type))
fv = 1.0;
else
fv = 0.0;
}
else
fv = val_float(v);
(*start) += fv*f0_lr_terms[i].start;
(*mid) += fv*f0_lr_terms[i].mid;
(*end) += fv*f0_lr_terms[i].end;
/* printf("f %s start %f mid %f end %f\n",
f0_lr_terms[i].feature,
*start,*mid,*end); */
}
}
static void add_target_point(cst_relation *targ,float pos, float f0)
{
cst_item *t;
/* printf("target %f at %f\n",f0,pos); */
t = relation_append(targ,NULL);
item_set_float(t,"pos",pos);
/* them there can sometimes do silly things, so guard for that */
if (f0 > 500.0)
item_set_float(t,"f0",500.0);
else if (f0 < 50.0)
item_set_float(t,"f0",50.0);
else
item_set_float(t,"f0",f0);
}
/* model mean and stddev take from f2b/kal_diphone */
#define model_mean 170.0
#define model_stddev 34
#define map_f0(v,m,s) ((((v-model_mean)/model_stddev)*s)+m)
static int post_break(cst_item *syl)
{
if ((item_prev(syl) == 0) ||
(cst_streq("pau",
ffeature_string(syl,
"R:SylStructure.daughter.R:Segment.p.name"))))
return TRUE;
else
return FALSE;
}
static int pre_break(cst_item *syl)
{
if ((item_next(syl) == 0) ||
(cst_streq("pau",
ffeature_string(syl,
"R:SylStructure.daughtern.R:Segment.n.name"))))
return TRUE;
else
return FALSE;
}
static float vowel_mid(cst_item *syl)
{
/* return time point mid way in vowel in this syl */
cst_item *s;
cst_item *ts;
const cst_phoneset *ps = item_phoneset(syl);
ts = item_daughter(item_as(syl,"SylStructure"));
for (s=ts; s; s = item_next(s))
{
if (cst_streq("+", phone_feature_string(ps,item_feat_string(s,"name"),
"vc")))
{
return (item_feat_float(s,"end")+
ffeature_float(s,"R:Segment.p.end"))/2.0;
}
}
/* no segments, shouldn't happen */
if (ts == 0)
return 0;
/* no vowel in syllable, shouldn't happen */
return (item_feat_float(ts,"end")+
ffeature_float(ts,"R:Segment.p.end"))/2.0;
}
cst_utterance *us_f0_model(cst_utterance *u)
{
/* F0 target model: Black and Hunt ICSLP96, three points per syl */
cst_item *syl, *t, *nt;
cst_relation *targ_rel;
float mean, stddev, local_mean, local_stddev;
float start, mid, end, lend;
float seg_end;
if (feat_present(u->features,"no_f0_target_model"))
return u;
targ_rel = utt_relation_create(u,"Target");
mean = get_param_float(u->features,"int_f0_target_mean", 100.0);
mean *= get_param_float(u->features,"f0_shift", 1.0);
stddev = get_param_float(u->features,"int_f0_target_stddev", 12.0);
lend = 0;
for (syl=relation_head(utt_relation(u,"Syllable"));
syl;
syl = item_next(syl))
{
/* printf("word %s, accent %s endtone %s\n",
ffeature_string(syl,"R:SylStructure.parent.name"),
ffeature_string(syl,"accent"),
ffeature_string(syl,"endtone")); */
if (!item_daughter(item_as(syl,"SylStructure")))
continue; /* no segs in syl */
local_mean = ffeature_float(syl,"R:SylStructure.parent.R:Token.parent.local_f0_shift");
if (local_mean)
local_mean *= mean;
else
local_mean = mean;
local_stddev = ffeature_float(syl,"R:SylStructure.parent.R:Token.parent.local_f0_range");
if (local_stddev == 0.0)
local_stddev = stddev;
apply_lr_model(syl,f0_lr_terms,&start,&mid,&end);
if (post_break(syl))
lend = map_f0(start,local_mean,local_stddev);
add_target_point(targ_rel,
ffeature_float(syl,
"R:SylStructure.daughter.R:Segment.p.end"),
map_f0((start+lend)/2.0,local_mean,local_stddev));
add_target_point(targ_rel,
vowel_mid(syl),
map_f0(mid,local_mean,local_stddev));
lend = map_f0(end,local_mean,local_stddev);
if (pre_break(syl))
add_target_point(targ_rel,
ffeature_float(syl,"R:SylStructure.daughtern.end"),
map_f0(end,local_mean,local_stddev));
}
/* Guarantee targets go from start to end of utterance */
t = relation_head(targ_rel);
if (t == 0)
add_target_point(targ_rel,0,mean);
else if (item_feat_float(t,"pos") > 0)
{
nt = item_prepend(t,NULL);
item_set_float(nt,"pos",0.0);
item_set_float(nt,"f0",item_feat_float(t,"f0"));
}
t = relation_tail(targ_rel);
seg_end = item_feat_float(relation_tail(utt_relation(u,"Segment")),"end");
if (item_feat_float(t,"pos") < seg_end)
add_target_point(targ_rel,seg_end,item_feat_float(t,"f0"));
return u;
}