blob: 81379d881d1818ea97253fce6fe2a95103462809 [file] [log] [blame]
/*************************************************************************/
/* */
/* Language Technologies Institute */
/* Carnegie Mellon University */
/* Copyright (c) 2000-2008 */
/* All Rights Reserved. */
/* */
/* Permission is hereby granted, free of charge, to use and distribute */
/* this software and its documentation without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of this work, and to */
/* permit persons to whom this work is furnished to do so, subject to */
/* the following conditions: */
/* 1. The code must retain the above copyright notice, this list of */
/* conditions and the following disclaimer. */
/* 2. Any modifications must be clearly marked as such. */
/* 3. Original authors' names are not deleted. */
/* 4. The authors' names are not used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK */
/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
/* SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE */
/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
/* THIS SOFTWARE. */
/* */
/*************************************************************************/
/* Author: Alan W Black (awb@cs.cmu.edu) */
/* Date: September 2000 */
/*************************************************************************/
/* */
/* Basic user level functions */
/* */
/*************************************************************************/
#include "cst_tokenstream.h"
#include "flite.h"
#include "cst_alloc.h"
#include "cst_clunits.h"
#include "cst_cg.h"
/* This is a global, which isn't ideal, this may change */
/* It is set when flite_set_voice_list() is called which happens in */
/* flite_main() */
cst_val *flite_voice_list = 0;
cst_lang flite_lang_list[20];
int flite_lang_list_length = 0;
int flite_init()
{
cst_regex_init();
return 0;
}
int flite_voice_dump(cst_voice *voice, const char *filename)
{
return cst_cg_dump_voice(voice,filename);
}
cst_voice *flite_voice_load(const char *filename)
{
/* Currently only supported for CG voices */
/* filename make be a local pathname or a url (http:/file:) */
cst_voice *v = NULL;
v = cst_cg_load_voice(filename,flite_lang_list);
return v;
}
int flite_add_voice(cst_voice *voice)
{
const cst_val *x;
if (voice)
{
/* add to second place -- first is default voice */
/* This is thread unsafe */
if (flite_voice_list)
{ /* Other voices -- first is default, add this second */
x = cons_val(voice_val(voice),
val_cdr(flite_voice_list));
set_cdr((cst_val *)(void *)flite_voice_list,x);
}
else
{ /* Only voice so goes on front */
flite_voice_list = cons_val(voice_val(voice),flite_voice_list);
}
return TRUE;
}
else
return FALSE;
}
int flite_add_lang(const char *langname,
void (*lang_init)(cst_voice *vox),
cst_lexicon *(*lex_init)())
{
if (flite_lang_list_length < 19)
{
flite_lang_list[flite_lang_list_length].lang = langname;
flite_lang_list[flite_lang_list_length].lang_init = lang_init;
flite_lang_list[flite_lang_list_length].lex_init = lex_init;
flite_lang_list_length++;
flite_lang_list[flite_lang_list_length].lang = NULL;
}
return TRUE;
}
cst_voice *flite_voice_select(const char *name)
{
const cst_val *v;
cst_voice *voice;
if (flite_voice_list == NULL)
return NULL; /* oops, not good */
if (name == NULL)
return val_voice(val_car(flite_voice_list));
for (v=flite_voice_list; v; v=val_cdr(v))
{
voice = val_voice(val_car(v));
if (cst_streq(name,voice->name)) /* short name */
return voice;
if (cst_streq(name,get_param_string(voice->features,"name","")))
/* longer name */
return voice;
if (cst_streq(name,get_param_string(voice->features,"pathname","")))
/* even longer name (url) */
return voice;
}
if (cst_urlp(name) || /* naive check if its a url */
cst_strchr(name,'/'))
{
voice = flite_voice_load(name);
if (!voice)
cst_errmsg("Error load voice: failed to load voice from %s\n",name); flite_add_voice(voice);
return voice;
}
return flite_voice_select(NULL);
}
int flite_voice_add_lex_addenda(cst_voice *v, const cst_string *lexfile)
{
/* Add addenda in lexfile to current voice */
cst_lexicon *lex;
const cst_val *lex_addenda = NULL;
cst_val *new_addenda;
lex = val_lexicon(feat_val(v->features,"lexicon"));
if (feat_present(v->features, "lex_addenda"))
lex_addenda = feat_val(v->features, "lex_addenda");
new_addenda = cst_lex_load_addenda(lex,lexfile);
#if 0
printf("\naddenda: ");
val_print(stdout,new_addenda);
printf("\n");
#endif
new_addenda = val_append(new_addenda,(cst_val *)lex_addenda);
if (lex->lex_addenda)
delete_val(lex->lex_addenda);
lex->lex_addenda = new_addenda;
return 0;
}
cst_utterance *flite_do_synth(cst_utterance *u,
cst_voice *voice,
cst_uttfunc synth)
{
utt_init(u, voice);
if ((*synth)(u) == NULL)
{
delete_utterance(u);
return NULL;
}
else
return u;
}
cst_utterance *flite_synth_text(const char *text, cst_voice *voice)
{
cst_utterance *u;
u = new_utterance();
utt_set_input_text(u,text);
return flite_do_synth(u, voice, utt_synth);
}
cst_utterance *flite_synth_phones(const char *text, cst_voice *voice)
{
cst_utterance *u;
u = new_utterance();
utt_set_input_text(u,text);
return flite_do_synth(u, voice, utt_synth_phones);
}
cst_wave *flite_text_to_wave(const char *text, cst_voice *voice)
{
cst_utterance *u;
cst_wave *w;
if ((u = flite_synth_text(text,voice)) == NULL)
return NULL;
w = copy_wave(utt_wave(u));
delete_utterance(u);
return w;
}
float flite_file_to_speech(const char *filename,
cst_voice *voice,
const char *outtype)
{
cst_tokenstream *ts;
if ((ts = ts_open(filename,
get_param_string(voice->features,"text_whitespace",NULL),
get_param_string(voice->features,"text_singlecharsymbols",NULL),
get_param_string(voice->features,"text_prepunctuation",NULL),
get_param_string(voice->features,"text_postpunctuation",NULL)))
== NULL)
{
cst_errmsg("failed to open file \"%s\" for reading\n",
filename);
return 1;
}
return flite_ts_to_speech(ts,voice,outtype);
}
float flite_ts_to_speech(cst_tokenstream *ts,
cst_voice *voice,
const char *outtype)
{
cst_utterance *utt;
const char *token;
cst_item *t;
cst_relation *tokrel;
float durs = 0;
int num_tokens;
cst_wave *w;
cst_breakfunc breakfunc = default_utt_break;
cst_uttfunc utt_user_callback = 0;
int fp;
fp = get_param_int(voice->features,"file_start_position",0);
if (fp > 0)
ts_set_stream_pos(ts,fp);
if (feat_present(voice->features,"utt_break"))
breakfunc = val_breakfunc(feat_val(voice->features,"utt_break"));
if (feat_present(voice->features,"utt_user_callback"))
utt_user_callback = val_uttfunc(feat_val(voice->features,"utt_user_callback"));
/* If its a file to write to, create and save an empty wave file */
/* as we are going to incrementally append to it */
if (!cst_streq(outtype,"play") &&
!cst_streq(outtype,"none") &&
!cst_streq(outtype,"stream"))
{
w = new_wave();
cst_wave_resize(w,0,1);
cst_wave_set_sample_rate(w,16000);
cst_wave_save_riff(w,outtype); /* an empty wave */
delete_wave(w);
}
num_tokens = 0;
utt = new_utterance();
tokrel = utt_relation_create(utt, "Token");
while (!ts_eof(ts) || num_tokens > 0)
{
token = ts_get(ts);
if ((cst_strlen(token) == 0) ||
(num_tokens > 500) || /* need an upper bound */
(relation_head(tokrel) &&
breakfunc(ts,token,tokrel)))
{
/* An end of utt, so synthesize it */
if (utt_user_callback)
utt = (utt_user_callback)(utt);
if (utt)
{
utt = flite_do_synth(utt,voice,utt_synth_tokens);
if (feat_present(utt->features,"Interrupted"))
{
delete_utterance(utt); utt = NULL;
break;
}
durs += flite_process_output(utt,outtype,TRUE);
delete_utterance(utt); utt = NULL;
}
else
break;
if (ts_eof(ts)) break;
utt = new_utterance();
tokrel = utt_relation_create(utt, "Token");
num_tokens = 0;
}
num_tokens++;
t = relation_append(tokrel, NULL);
item_set_string(t,"name",token);
item_set_string(t,"whitespace",ts->whitespace);
item_set_string(t,"prepunctuation",ts->prepunctuation);
item_set_string(t,"punc",ts->postpunctuation);
/* Mark it at the beginning of the token */
item_set_int(t,"file_pos",
ts->file_pos-(1+ /* as we are already on the next char */
cst_strlen(token)+
cst_strlen(ts->prepunctuation)+
cst_strlen(ts->postpunctuation)));
item_set_int(t,"line_number",ts->line_number);
}
if (utt) delete_utterance(utt);
ts_close(ts);
return durs;
}
float flite_text_to_speech(const char *text,
cst_voice *voice,
const char *outtype)
{
cst_utterance *u;
float dur;
u = flite_synth_text(text,voice);
dur = flite_process_output(u,outtype,FALSE);
delete_utterance(u);
return dur;
}
float flite_phones_to_speech(const char *text,
cst_voice *voice,
const char *outtype)
{
cst_utterance *u;
float dur;
u = flite_synth_phones(text,voice);
dur = flite_process_output(u,outtype,FALSE);
delete_utterance(u);
return dur;
}
float flite_process_output(cst_utterance *u, const char *outtype,
int append)
{
/* Play or save (append) output to output file */
cst_wave *w;
float dur;
if (!u) return 0.0;
w = utt_wave(u);
dur = (float)w->num_samples/(float)w->sample_rate;
if (cst_streq(outtype,"play"))
play_wave(w);
else if (cst_streq(outtype,"stream"))
{
/* It's already been played so do nothing */
}
else if (!cst_streq(outtype,"none"))
{
if (append)
cst_wave_append_riff(w,outtype);
else
cst_wave_save_riff(w,outtype);
}
return dur;
}
int flite_get_param_int(const cst_features *f, const char *name,int def)
{
return get_param_int(f,name,def);
}
float flite_get_param_float(const cst_features *f, const char *name, float def)
{
return get_param_float(f,name,def);
}
const char *flite_get_param_string(const cst_features *f, const char *name, const char *def)
{
return get_param_string(f,name,def);
}
const cst_val *flite_get_param_val(const cst_features *f, const char *name, cst_val *def)
{
return get_param_val(f,name,def);
}
void flite_feat_set_int(cst_features *f, const char *name, int v)
{
feat_set_int(f,name,v);
}
void flite_feat_set_float(cst_features *f, const char *name, float v)
{
feat_set_float(f,name,v);
}
void flite_feat_set_string(cst_features *f, const char *name, const char *v)
{
feat_set_string(f,name,v);
}
void flite_feat_set(cst_features *f, const char *name,const cst_val *v)
{
feat_set(f,name,v);
}
int flite_feat_remove(cst_features *f, const char *name)
{
return feat_remove(f,name);
}
const char *flite_ffeature_string(const cst_item *item,const char *featpath)
{
return ffeature_string(item,featpath);
}
int flite_ffeature_int(const cst_item *item,const char *featpath)
{
return ffeature_int(item,featpath);
}
float flite_ffeature_float(const cst_item *item,const char *featpath)
{
return ffeature_float(item,featpath);
}
const cst_val *flite_ffeature(const cst_item *item,const char *featpath)
{
return ffeature(item,featpath);
}
cst_item* flite_path_to_item(const cst_item *item,const char *featpath)
{
return path_to_item(item,featpath);
}
int flite_mmap_clunit_voxdata(const char *voxdir, cst_voice *voice)
{
/* Map clunit_db in voice data for giveb voice */
char *path;
const char *name;
const char *x;
int *indexes;
cst_filemap *vd;
cst_clunit_db *clunit_db;
int i;
name = get_param_string(voice->features,"name","voice");
path = cst_alloc(char,cst_strlen(voxdir)+1+cst_strlen(name)+1+cst_strlen("voxdata")+1);
cst_sprintf(path,"%s/%s.voxdata",voxdir,name);
vd = cst_mmap_file(path);
flite_feat_set_string(voice->features,"voxdir",path);
cst_free(path);
if (vd == NULL)
return -1;
x = (const char *)vd->mem;
if (!cst_streq("CMUFLITE",x))
{ /* Not a Flite voice data file */
cst_munmap_file(vd);
return -1;
}
for (i=9; x[i] &&i<64; i++)
if (x[i] != ' ')
break;
if (!cst_streq(name,&x[i]))
{ /* Not a voice data file for this voice */
cst_munmap_file(vd);
return -1;
}
/* This uses a hack to put in a void pointer to the cst_filemap */
flite_feat_set(voice->features,"voxdata",userdata_val(vd));
indexes = (int *)&x[64];
clunit_db = val_clunit_db(feat_val(voice->features,"clunit_db"));
clunit_db->sts->resoffs =
(const unsigned int *)&x[64+20];
clunit_db->sts->frames =
(const unsigned short *)&x[64+20+indexes[0]];
clunit_db->mcep->frames =
(const unsigned short *)&x[64+20+indexes[0]+indexes[1]];
clunit_db->sts->residuals =
(const unsigned char *)&x[64+20+indexes[0]+indexes[1]+indexes[2]];
clunit_db->sts->ressizes =
(const unsigned char *)&x[64+20+indexes[0]+indexes[1]+indexes[2]+indexes[3]];
return 0;
}
int flite_munmap_clunit_voxdata(cst_voice *voice)
{
cst_filemap *vd;
const cst_val *val_vd;
const cst_val *val_clunit_database;
cst_clunit_db *clunit_db;
val_vd = flite_get_param_val(voice->features,"voxdata",NULL);
val_clunit_database = flite_get_param_val(voice->features,"clunit_db",NULL);
if (val_vd && val_clunit_database)
{
clunit_db = val_clunit_db(val_clunit_database);
clunit_db->sts->resoffs = NULL;
clunit_db->sts->frames = NULL;
clunit_db->mcep->frames = NULL;
clunit_db->sts->residuals = NULL;
clunit_db->sts->ressizes = NULL;
vd = (cst_filemap *)val_userdata(val_vd);
cst_munmap_file(vd);
}
return 0;
}