blob: 23b2a1550e58afc47626ea88888d7fb976f6ba00 [file] [log] [blame]
/* --------------------------------------------------------------- */
/* The HMM-Based Speech Synthesis System (HTS): version 1.1.1 */
/* HTS Working Group */
/* */
/* Department of Computer Science */
/* Nagoya Institute of Technology */
/* and */
/* Interdisciplinary Graduate School of Science and Engineering */
/* Tokyo Institute of Technology */
/* Copyright (c) 2001-2003 */
/* All Rights Reserved. */
/* */
/* Permission is hereby granted, free of charge, to use and */
/* distribute this software and its documentation without */
/* restriction, including without limitation the rights to use, */
/* copy, modify, merge, publish, distribute, sublicense, and/or */
/* sell copies of this work, and to permit persons to whom this */
/* work is furnished to do so, subject to the following conditions: */
/* */
/* 1. The code must retain the above copyright notice, this list */
/* of conditions and the following disclaimer. */
/* */
/* 2. Any modifications must be clearly marked as such. */
/* */
/* NAGOYA INSTITUTE OF TECHNOLOGY, TOKYO INSITITUTE OF TECHNOLOGY, */
/* HTS WORKING GROUP, AND THE CONTRIBUTORS TO THIS WORK DISCLAIM */
/* ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL */
/* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
/* SHALL NAGOYA INSTITUTE OF TECHNOLOGY, TOKYO INSITITUTE OF */
/* TECHNOLOGY, HTS WORKING GROUP, NOR THE CONTRIBUTORS BE LIABLE */
/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY */
/* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, */
/* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS */
/* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR */
/* PERFORMANCE OF THIS SOFTWARE. */
/* */
/* --------------------------------------------------------------- */
/* mlpg.c : speech parameter generation from pdf sequence */
/* */
/* 2003/12/26 by Heiga Zen */
/* --------------------------------------------------------------- */
/*********************************************************************/
/* */
/* Nagoya Institute of Technology, Aichi, Japan, */
/* and */
/* Carnegie Mellon University, Pittsburgh, PA */
/* Copyright (c) 2003-2004,2008 */
/* All Rights Reserved. */
/* */
/* Permission is hereby granted, free of charge, to use and */
/* distribute this software and its documentation without */
/* restriction, including without limitation the rights to use, */
/* copy, modify, merge, publish, distribute, sublicense, and/or */
/* sell copies of this work, and to permit persons to whom this */
/* work is furnished to do so, subject to the following conditions: */
/* */
/* 1. The code must retain the above copyright notice, this list */
/* of conditions and the following disclaimer. */
/* 2. Any modifications must be clearly marked as such. */
/* 3. Original authors' names are not deleted. */
/* */
/* NAGOYA INSTITUTE OF TECHNOLOGY, CARNEGIE MELLON UNIVERSITY, AND */
/* THE CONTRIBUTORS TO THIS WORK DISCLAIM ALL WARRANTIES WITH */
/* REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL NAGOYA INSTITUTE */
/* OF TECHNOLOGY, CARNEGIE MELLON UNIVERSITY, NOR THE CONTRIBUTORS */
/* BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR */
/* ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR */
/* PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER */
/* TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE */
/* OR PERFORMANCE OF THIS SOFTWARE. */
/* */
/*********************************************************************/
/* */
/* Author : Tomoki Toda (tomoki@ics.nitech.ac.jp) */
/* Date : June 2004 */
/* */
/* Modified as a single file for inclusion in festival/flite */
/* May 2008 awb@cs.cmu.edu */
/*-------------------------------------------------------------------*/
/* */
/* ML-Based Parameter Generation */
/* */
/*-------------------------------------------------------------------*/
#include "cst_alloc.h"
#include "cst_string.h"
#include "cst_math.h"
#include "cst_track.h"
#include "cst_wave.h"
#include "cst_vc.h"
#include "cst_mlpg.h"
#define mlpg_alloc(X,Y) (cst_alloc(Y,X))
#define mlpg_free cst_free
static MLPGPARA xmlpgpara_init(int dim, int dim2, int dnum,
int clsnum)
{
MLPGPARA param;
// memory allocation
param = mlpg_alloc(1,struct MLPGPARA_STRUCT);
param->ov = xdvalloc(dim);
param->iuv = NODATA;
param->iumv = NODATA;
param->flkv = xdvalloc(dnum);
param->stm = NODATA;
param->dltm = xdmalloc(dnum, dim2);
param->pdf = NODATA;
param->detvec = NODATA;
param->wght = xdmalloc(clsnum, 1);
param->mean = xdmalloc(clsnum, dim);
param->cov = NODATA;
param->clsidxv = NODATA;
/* dia_flag */
param->clsdetv = xdvalloc(1);
param->clscov = xdmalloc(1, dim);
param->vdet = 1.0;
param->vm = NODATA;
param->vv = NODATA;
param->var = NODATA;
return param;
}
static void xmlpgparafree(MLPGPARA param)
{
if (param != NODATA) {
if (param->ov != NODATA) xdvfree(param->ov);
if (param->iuv != NODATA) xdvfree(param->iuv);
if (param->iumv != NODATA) xdvfree(param->iumv);
if (param->flkv != NODATA) xdvfree(param->flkv);
if (param->stm != NODATA) xdmfree(param->stm);
if (param->dltm != NODATA) xdmfree(param->dltm);
if (param->pdf != NODATA) xdmfree(param->pdf);
if (param->detvec != NODATA) xdvfree(param->detvec);
if (param->wght != NODATA) xdmfree(param->wght);
if (param->mean != NODATA) xdmfree(param->mean);
if (param->cov != NODATA) xdmfree(param->cov);
if (param->clsidxv != NODATA) xlvfree(param->clsidxv);
if (param->clsdetv != NODATA) xdvfree(param->clsdetv);
if (param->clscov != NODATA) xdmfree(param->clscov);
if (param->vm != NODATA) xdvfree(param->vm);
if (param->vv != NODATA) xdvfree(param->vv);
if (param->var != NODATA) xdvfree(param->var);
mlpg_free(param);
}
return;
}
static double get_like_pdfseq_vit(int dim, int dim2, int dnum, int clsnum,
MLPGPARA param,
float **model,
XBOOL dia_flag)
{
long d, c, k, l, j;
double sumgauss;
double like = 0.0;
for (d = 0, like = 0.0; d < dnum; d++) {
// read weight and mean sequences
param->wght->data[0][0] = 0.9; /* FIXME weights */
for (j=0; j<dim; j++)
param->mean->data[0][j] = model[d][(j+1)*2];
// observation vector
for (k = 0; k < dim2; k++) {
param->ov->data[k] = param->stm->data[d][k];
param->ov->data[k + dim2] = param->dltm->data[d][k];
}
// mixture index
c = d;
param->clsdetv->data[0] = param->detvec->data[c];
// calculating likelihood
if (dia_flag == XTRUE) {
for (k = 0; k < param->clscov->col; k++)
param->clscov->data[0][k] = param->cov->data[c][k];
sumgauss = get_gauss_dia(0, param->ov, param->clsdetv,
param->wght, param->mean, param->clscov);
} else {
for (k = 0; k < param->clscov->row; k++)
for (l = 0; l < param->clscov->col; l++)
param->clscov->data[k][l] =
param->cov->data[k + param->clscov->row * c][l];
sumgauss = get_gauss_full(0, param->ov, param->clsdetv,
param->wght, param->mean, param->clscov);
}
if (sumgauss <= 0.0) param->flkv->data[d] = -1.0 * INFTY2;
else param->flkv->data[d] = log(sumgauss);
like += param->flkv->data[d];
// estimating U', U'*M
if (dia_flag == XTRUE) {
// PDF [U'*M U']
for (k = 0; k < dim; k++) {
param->pdf->data[d][k] =
param->clscov->data[0][k] * param->mean->data[0][k];
param->pdf->data[d][k + dim] = param->clscov->data[0][k];
}
} else {
// PDF [U'*M U']
for (k = 0; k < dim; k++) {
param->pdf->data[d][k] = 0.0;
for (l = 0; l < dim; l++) {
param->pdf->data[d][k * dim + dim + l] =
param->clscov->data[k][l];
param->pdf->data[d][k] +=
param->clscov->data[k][l] * param->mean->data[0][l];
}
}
}
}
like /= (double)dnum;
return like;
}
#if 0
static double get_like_gv(long dim2, long dnum, MLPGPARA param)
{
long k;
double av = 0.0, dif = 0.0;
double vlike = -INFTY;
if (param->vm != NODATA && param->vv != NODATA) {
for (k = 0; k < dim2; k++)
calc_varstats(param->stm->data, k, dnum, &av,
&(param->var->data[k]), &dif);
vlike = log(get_gauss_dia5(param->vdet, 1.0, param->var,
param->vm, param->vv));
}
return vlike;
}
static void sm_mvav(DMATRIX mat, long hlen)
{
long k, l, m, p;
double d, sd;
DVECTOR vec = NODATA;
DVECTOR win = NODATA;
vec = xdvalloc(mat->row);
// smoothing window
win = xdvalloc(hlen * 2 + 1);
for (k = 0, d = 1.0, sd = 0.0; k < hlen; k++, d += 1.0) {
win->data[k] = d; win->data[win->length - k - 1] = d;
sd += d + d;
}
win->data[k] = d; sd += d;
for (k = 0; k < win->length; k++) win->data[k] /= sd;
for (l = 0; l < mat->col; l++) {
for (k = 0; k < mat->row; k++) {
for (m = 0, vec->data[k] = 0.0; m < win->length; m++) {
p = k - hlen + m;
if (p >= 0 && p < mat->row)
vec->data[k] += mat->data[p][l] * win->data[m];
}
}
for (k = 0; k < mat->row; k++) mat->data[k][l] = vec->data[k];
}
xdvfree(win);
xdvfree(vec);
return;
}
#endif
static void get_dltmat(DMATRIX mat, DWin *dw, int dno, DMATRIX dmat)
{
int i, j, k, tmpnum;
tmpnum = (int)mat->row - dw->width[dno][WRIGHT];
for (k = dw->width[dno][WRIGHT]; k < tmpnum; k++) // time index
for (i = 0; i < (int)mat->col; i++) // dimension index
for (j = dw->width[dno][WLEFT], dmat->data[k][i] = 0.0;
j <= dw->width[dno][WRIGHT]; j++)
dmat->data[k][i] += mat->data[k + j][i] * dw->coef[dno][j];
for (i = 0; i < (int)mat->col; i++) { // dimension index
for (k = 0; k < dw->width[dno][WRIGHT]; k++) // time index
for (j = dw->width[dno][WLEFT], dmat->data[k][i] = 0.0;
j <= dw->width[dno][WRIGHT]; j++)
if (k + j >= 0)
dmat->data[k][i] += mat->data[k + j][i] * dw->coef[dno][j];
else
dmat->data[k][i] += (2.0 * mat->data[0][i] - mat->data[-k - j][i]) * dw->coef[dno][j];
for (k = tmpnum; k < (int)mat->row; k++) // time index
for (j = dw->width[dno][WLEFT], dmat->data[k][i] = 0.0;
j <= dw->width[dno][WRIGHT]; j++)
if (k + j < (int)mat->row)
dmat->data[k][i] += mat->data[k + j][i] * dw->coef[dno][j];
else
dmat->data[k][i] += (2.0 * mat->data[mat->row - 1][i] - mat->data[mat->row - k - j + mat->row - 2][i]) * dw->coef[dno][j];
}
return;
}
static double *dcalloc(int x, int xoff)
{
double *ptr;
ptr = mlpg_alloc(x,double);
/* ptr += xoff; */ /* Just not going to allow this */
return(ptr);
}
static double **ddcalloc(int x, int y, int xoff, int yoff)
{
double **ptr;
register int i;
ptr = mlpg_alloc(x,double *);
for (i = 0; i < x; i++) ptr[i] = dcalloc(y, yoff);
/* ptr += xoff; */ /* Just not going to allow this */
return(ptr);
}
/////////////////////////////////////
// ML using Choleski decomposition //
/////////////////////////////////////
static void InitDWin(PStreamChol *pst, const float *dynwin, int fsize)
{
int i,j;
int leng;
pst->dw.num = 1; // only static
if (dynwin) {
pst->dw.num = 2; // static + dyn
}
// memory allocation
pst->dw.width = mlpg_alloc(pst->dw.num,int *);
for (i = 0; i < pst->dw.num; i++)
pst->dw.width[i] = mlpg_alloc(2,int);
pst->dw.coef = mlpg_alloc(pst->dw.num, double *);
pst->dw.coef_ptrs = mlpg_alloc(pst->dw.num, double *);
// window for static parameter WLEFT = 0, WRIGHT = 1
pst->dw.width[0][WLEFT] = pst->dw.width[0][WRIGHT] = 0;
pst->dw.coef_ptrs[0] = mlpg_alloc(1,double);
pst->dw.coef[0] = pst->dw.coef_ptrs[0];
pst->dw.coef[0][0] = 1.0;
// set delta coefficients
for (i = 1; i < pst->dw.num; i++) {
pst->dw.coef_ptrs[i] = mlpg_alloc(fsize, double);
pst->dw.coef[i] = pst->dw.coef_ptrs[i];
for (j=0; j<fsize; j++) /* FIXME make dynwin doubles for memmove */
pst->dw.coef[i][j] = (double)dynwin[j];
// set pointer
leng = fsize / 2; // L (fsize = 2 * L + 1)
pst->dw.coef[i] += leng; // [L] -> [0] center
pst->dw.width[i][WLEFT] = -leng; // -L left
pst->dw.width[i][WRIGHT] = leng; // L right
if (fsize % 2 == 0) pst->dw.width[i][WRIGHT]--;
}
pst->dw.maxw[WLEFT] = pst->dw.maxw[WRIGHT] = 0;
for (i = 0; i < pst->dw.num; i++) {
if (pst->dw.maxw[WLEFT] > pst->dw.width[i][WLEFT])
pst->dw.maxw[WLEFT] = pst->dw.width[i][WLEFT];
if (pst->dw.maxw[WRIGHT] < pst->dw.width[i][WRIGHT])
pst->dw.maxw[WRIGHT] = pst->dw.width[i][WRIGHT];
}
return;
}
static void InitPStreamChol(PStreamChol *pst, const float *dynwin, int fsize,
int order, int T)
{
// order of cepstrum
pst->order = order;
// windows for dynamic feature
InitDWin(pst, dynwin, fsize);
// dimension of observed vector
pst->vSize = (pst->order + 1) * pst->dw.num; // odim = dim * (1--3)
// memory allocation
pst->T = T; // number of frames
pst->width = pst->dw.maxw[WRIGHT] * 2 + 1; // width of R
pst->mseq = ddcalloc(T, pst->vSize, 0, 0); // [T][odim]
pst->ivseq = ddcalloc(T, pst->vSize, 0, 0); // [T][odim]
pst->R = ddcalloc(T, pst->width, 0, 0); // [T][width]
pst->r = dcalloc(T, 0); // [T]
pst->g = dcalloc(T, 0); // [T]
pst->c = ddcalloc(T, pst->order + 1, 0, 0); // [T][dim]
return;
}
static void mlgparaChol(DMATRIX pdf, PStreamChol *pst, DMATRIX mlgp)
{
int t, d;
// error check
if (pst->vSize * 2 != pdf->col || pst->order + 1 != mlgp->col) {
cst_errmsg("Error mlgparaChol: Different dimension\n");
cst_error();
}
// mseq: U^{-1}*M, ifvseq: U^{-1}
for (t = 0; t < pst->T; t++) {
for (d = 0; d < pst->vSize; d++) {
pst->mseq[t][d] = pdf->data[t][d];
pst->ivseq[t][d] = pdf->data[t][pst->vSize + d];
}
}
// ML parameter generation
mlpgChol(pst);
// extracting parameters
for (t = 0; t < pst->T; t++)
for (d = 0; d <= pst->order; d++)
mlgp->data[t][d] = pst->c[t][d];
return;
}
// generate parameter sequence from pdf sequence using Choleski decomposition
static void mlpgChol(PStreamChol *pst)
{
register int m;
// generating parameter in each dimension
for (m = 0; m <= pst->order; m++) {
calc_R_and_r(pst, m);
Choleski(pst);
Choleski_forward(pst);
Choleski_backward(pst, m);
}
return;
}
//------ parameter generation fuctions
// calc_R_and_r: calculate R = W'U^{-1}W and r = W'U^{-1}M
static void calc_R_and_r(PStreamChol *pst, const int m)
{
register int i, j, k, l, n;
double wu;
for (i = 0; i < pst->T; i++) {
pst->r[i] = pst->mseq[i][m];
pst->R[i][0] = pst->ivseq[i][m];
for (j = 1; j < pst->width; j++) pst->R[i][j] = 0.0;
for (j = 1; j < pst->dw.num; j++) {
for (k = pst->dw.width[j][0]; k <= pst->dw.width[j][1]; k++) {
n = i + k;
if (n >= 0 && n < pst->T && pst->dw.coef[j][-k] != 0.0) {
l = j * (pst->order + 1) + m;
pst->r[i] += pst->dw.coef[j][-k] * pst->mseq[n][l];
wu = pst->dw.coef[j][-k] * pst->ivseq[n][l];
for (l = 0; l < pst->width; l++) {
n = l-k;
if (n <= pst->dw.width[j][1] && i + l < pst->T &&
pst->dw.coef[j][n] != 0.0)
pst->R[i][l] += wu * pst->dw.coef[j][n];
}
}
}
}
}
return;
}
// Choleski: Choleski factorization of Matrix R
static void Choleski(PStreamChol *pst)
{
register int t, j, k;
pst->R[0][0] = sqrt(pst->R[0][0]);
for (j = 1; j < pst->width; j++) pst->R[0][j] /= pst->R[0][0];
for (t = 1; t < pst->T; t++) {
for (j = 1; j < pst->width; j++)
if (t - j >= 0)
pst->R[t][0] -= pst->R[t - j][j] * pst->R[t - j][j];
pst->R[t][0] = sqrt(pst->R[t][0]);
for (j = 1; j < pst->width; j++) {
for (k = 0; k < pst->dw.maxw[WRIGHT]; k++)
if (j != pst->width - 1)
pst->R[t][j] -=
pst->R[t - k - 1][j - k] * pst->R[t - k - 1][j + 1];
pst->R[t][j] /= pst->R[t][0];
}
}
return;
}
// Choleski_forward: forward substitution to solve linear equations
static void Choleski_forward(PStreamChol *pst)
{
register int t, j;
double hold;
pst->g[0] = pst->r[0] / pst->R[0][0];
for (t=1; t < pst->T; t++) {
hold = 0.0;
for (j = 1; j < pst->width; j++)
if (t - j >= 0 && pst->R[t - j][j] != 0.0)
hold += pst->R[t - j][j] * pst->g[t - j];
pst->g[t] = (pst->r[t] - hold) / pst->R[t][0];
}
return;
}
// Choleski_backward: backward substitution to solve linear equations
static void Choleski_backward(PStreamChol *pst, const int m)
{
register int t, j;
double hold;
pst->c[pst->T - 1][m] = pst->g[pst->T - 1] / pst->R[pst->T - 1][0];
for (t = pst->T - 2; t >= 0; t--) {
hold = 0.0;
for (j = 1; j < pst->width; j++)
if (t + j < pst->T && pst->R[t][j] != 0.0)
hold += pst->R[t][j] * pst->c[t + j][m];
pst->c[t][m] = (pst->g[t] - hold) / pst->R[t][0];
}
return;
}
////////////////////////////////////
// ML Considering Global Variance //
////////////////////////////////////
#if 0
static void varconv(double **c, const int m, const int T, const double var)
{
register int n;
double sd, osd;
double oav = 0.0, ovar = 0.0, odif = 0.0;
calc_varstats(c, m, T, &oav, &ovar, &odif);
osd = sqrt(ovar); sd = sqrt(var);
for (n = 0; n < T; n++)
c[n][m] = (c[n][m] - oav) / osd * sd + oav;
return;
}
static void calc_varstats(double **c, const int m, const int T,
double *av, double *var, double *dif)
{
register int i;
register double d;
*av = 0.0;
*var = 0.0;
*dif = 0.0;
for (i = 0; i < T; i++) *av += c[i][m];
*av /= (double)T;
for (i = 0; i < T; i++) {
d = c[i][m] - *av;
*var += d * d; *dif += d;
}
*var /= (double)T;
return;
}
// Diagonal Covariance Version
static void mlgparaGrad(DMATRIX pdf, PStreamChol *pst, DMATRIX mlgp, const int max,
double th, double e, double alpha, DVECTOR vm, DVECTOR vv,
XBOOL nrmflag, XBOOL extvflag)
{
int t, d;
// error check
if (pst->vSize * 2 != pdf->col || pst->order + 1 != mlgp->col) {
cst_errmsg("Error mlgparaChol: Different dimension\n");
cst_error();
}
// mseq: U^{-1}*M, ifvseq: U^{-1}
for (t = 0; t < pst->T; t++) {
for (d = 0; d < pst->vSize; d++) {
pst->mseq[t][d] = pdf->data[t][d];
pst->ivseq[t][d] = pdf->data[t][pst->vSize + d];
}
}
// ML parameter generation
mlpgChol(pst);
// extend variance
if (extvflag == XTRUE)
for (d = 0; d <= pst->order; d++)
varconv(pst->c, d, pst->T, vm->data[d]);
// estimating parameters
mlpgGrad(pst, max, th, e, alpha, vm, vv, nrmflag);
// extracting parameters
for (t = 0; t < pst->T; t++)
for (d = 0; d <= pst->order; d++)
mlgp->data[t][d] = pst->c[t][d];
return;
}
// generate parameter sequence from pdf sequence using gradient
static void mlpgGrad(PStreamChol *pst, const int max, double th, double e,
double alpha, DVECTOR vm, DVECTOR vv, XBOOL nrmflag)
{
register int m, i, t;
double diff, n, dth;
if (nrmflag == XTRUE)
n = (double)(pst->T * pst->vSize) / (double)(vm->length);
else n = 1.0;
// generating parameter in each dimension
for (m = 0; m <= pst->order; m++) {
calc_R_and_r(pst, m);
dth = th * sqrt(vm->data[m]);
for (i = 0; i < max; i++) {
calc_grad(pst, m);
if (vm != NODATA && vv != NODATA)
calc_vargrad(pst, m, alpha, n, vm->data[m], vv->data[m]);
for (t = 0, diff = 0.0; t < pst->T; t++) {
diff += pst->g[t] * pst->g[t];
pst->c[t][m] += e * pst->g[t];
}
diff = sqrt(diff / (double)pst->T);
if (diff < dth || diff == 0.0) break;
}
}
return;
}
// calc_grad: calculate -RX + r = -W'U^{-1}W * X + W'U^{-1}M
void calc_grad(PStreamChol *pst, const int m)
{
register int i, j;
for (i = 0; i < pst->T; i++) {
pst->g[i] = pst->r[i] - pst->c[i][m] * pst->R[i][0];
for (j = 1; j < pst->width; j++) {
if (i + j < pst->T) pst->g[i] -= pst->c[i + j][m] * pst->R[i][j];
if (i - j >= 0) pst->g[i] -= pst->c[i - j][m] * pst->R[i - j][j];
}
}
return;
}
static void calc_vargrad(PStreamChol *pst, const int m, double alpha, double n,
double vm, double vv)
{
register int i;
double vg, w1, w2;
double av = 0.0, var = 0.0, dif = 0.0;
if (alpha > 1.0 || alpha < 0.0) {
w1 = 1.0; w2 = 1.0;
} else {
w1 = alpha; w2 = 1.0 - alpha;
}
calc_varstats(pst->c, m, pst->T, &av, &var, &dif);
for (i = 0; i < pst->T; i++) {
vg = -(var - vm) * (pst->c[i][m] - av) * vv * 2.0 / (double)pst->T;
pst->g[i] = w1 * pst->g[i] / n + w2 * vg;
}
return;
}
#endif
// diagonal covariance
static DVECTOR xget_detvec_diamat2inv(DMATRIX covmat) // [num class][dim]
{
long dim, clsnum;
long i, j;
double det;
DVECTOR detvec = NODATA;
clsnum = covmat->row;
dim = covmat->col;
// memory allocation
detvec = xdvalloc(clsnum);
for (i = 0; i < clsnum; i++) {
for (j = 0, det = 1.0; j < dim; j++) {
det *= covmat->data[i][j];
if (det > 0.0) {
covmat->data[i][j] = 1.0 / covmat->data[i][j];
} else {
cst_errmsg("error:(class %ld) determinant <= 0, det = %f\n", i, det);
xdvfree(detvec);
return NODATA;
}
}
detvec->data[i] = det;
}
return detvec;
}
static double get_gauss_full(long clsidx,
DVECTOR vec, // [dim]
DVECTOR detvec, // [clsnum]
DMATRIX weightmat, // [clsnum][1]
DMATRIX meanvec, // [clsnum][dim]
DMATRIX invcovmat) // [clsnum * dim][dim]
{
double gauss;
if (detvec->data[clsidx] <= 0.0) {
cst_errmsg("#error: det <= 0.0\n");
cst_error();
}
gauss = weightmat->data[clsidx][0]
/ sqrt(pow(2.0 * PI, (double)vec->length) * detvec->data[clsidx])
* exp(-1.0 * cal_xmcxmc(clsidx, vec, meanvec, invcovmat) / 2.0);
return gauss;
}
static double cal_xmcxmc(long clsidx,
DVECTOR x,
DMATRIX mm, // [num class][dim]
DMATRIX cm) // [num class * dim][dim]
{
long clsnum, k, l, b, dim;
double *vec = NULL;
double td, d;
dim = x->length;
clsnum = mm->row;
b = clsidx * dim;
if (mm->col != dim || cm->col != dim || clsnum * dim != cm->row) {
cst_errmsg("Error cal_xmcxmc: different dimension\n");
cst_error();
}
// memory allocation
vec = mlpg_alloc((int)dim, double);
for (k = 0; k < dim; k++) vec[k] = x->data[k] - mm->data[clsidx][k];
for (k = 0, d = 0.0; k < dim; k++) {
for (l = 0, td = 0.0; l < dim; l++) td += vec[l] * cm->data[l + b][k];
d += td * vec[k];
}
// memory free
mlpg_free(vec); vec = NULL;
return d;
}
#if 0
// diagonal covariance
static double get_gauss_dia5(double det,
double weight,
DVECTOR vec, // dim
DVECTOR meanvec, // dim
DVECTOR invcovvec) // dim
{
double gauss, sb;
long k;
if (det <= 0.0) {
cst_errmsg("#error: det <= 0.0\n");
cst_error();
}
for (k = 0, gauss = 0.0; k < vec->length; k++) {
sb = vec->data[k] - meanvec->data[k];
gauss += sb * invcovvec->data[k] * sb;
}
gauss = weight / sqrt(pow(2.0 * PI, (double)vec->length) * det)
* exp(-gauss / 2.0);
return gauss;
}
#endif
static double get_gauss_dia(long clsidx,
DVECTOR vec, // [dim]
DVECTOR detvec, // [clsnum]
DMATRIX weightmat, // [clsnum][1]
DMATRIX meanmat, // [clsnum][dim]
DMATRIX invcovmat) // [clsnum][dim]
{
double gauss, sb;
long k;
if (detvec->data[clsidx] <= 0.0) {
cst_errmsg("#error: det <= 0.0\n");
cst_error();
}
for (k = 0, gauss = 0.0; k < vec->length; k++) {
sb = vec->data[k] - meanmat->data[clsidx][k];
gauss += sb * invcovmat->data[clsidx][k] * sb;
}
gauss = weightmat->data[clsidx][0]
/ sqrt(pow(2.0 * PI, (double)vec->length) * detvec->data[clsidx])
* exp(-gauss / 2.0);
return gauss;
}
static void pst_free(PStreamChol *pst)
{
int i;
for (i=0; i<pst->dw.num; i++)
mlpg_free(pst->dw.width[i]);
mlpg_free(pst->dw.width); pst->dw.width = NULL;
for (i=0; i<pst->dw.num; i++)
mlpg_free(pst->dw.coef_ptrs[i]);
mlpg_free(pst->dw.coef); pst->dw.coef = NULL;
mlpg_free(pst->dw.coef_ptrs); pst->dw.coef_ptrs = NULL;
for (i=0; i<pst->T; i++)
mlpg_free(pst->mseq[i]);
mlpg_free(pst->mseq);
for (i=0; i<pst->T; i++)
mlpg_free(pst->ivseq[i]);
mlpg_free(pst->ivseq);
for (i=0; i<pst->T; i++)
mlpg_free(pst->R[i]);
mlpg_free(pst->R);
mlpg_free(pst->r);
mlpg_free(pst->g);
for (i=0; i<pst->T; i++)
mlpg_free(pst->c[i]);
mlpg_free(pst->c);
return;
}
cst_track *mlpg(const cst_track *param_track, cst_cg_db *cg_db)
{
/* Generate an (mcep) track using Maximum Likelihood Parameter Generation */
MLPGPARA param = NODATA;
cst_track *out;
int dim, dim_st;
// float like;
int i,j;
int nframes;
PStreamChol pst;
nframes = param_track->num_frames;
dim = (param_track->num_channels/2)-1;
dim_st = dim/2; /* dim2 in original code */
out = new_track();
cst_track_resize(out,nframes,dim_st+1);
param = xmlpgpara_init(dim,dim_st,nframes,nframes);
// mixture-index sequence
param->clsidxv = xlvalloc(nframes);
for (i=0; i<nframes; i++)
param->clsidxv->data[i] = i;
// initial static feature sequence
param->stm = xdmalloc(nframes,dim_st);
for (i=0; i<nframes; i++)
{
for (j=0; j<dim_st; j++)
param->stm->data[i][j] = param_track->frames[i][(j+1)*2];
}
/* Load cluster means */
for (i=0; i<nframes; i++)
for (j=0; j<dim_st; j++)
param->mean->data[i][j] = param_track->frames[i][(j+1)*2];
/* GMM parameters diagonal covariance */
InitPStreamChol(&pst, cg_db->dynwin, cg_db->dynwinsize, dim_st-1, nframes);
param->pdf = xdmalloc(nframes,dim*2);
param->cov = xdmalloc(nframes,dim);
for (i=0; i<nframes; i++)
for (j=0; j<dim; j++)
param->cov->data[i][j] =
param_track->frames[i][(j+1)*2+1] *
param_track->frames[i][(j+1)*2+1];
param->detvec = xget_detvec_diamat2inv(param->cov);
/* global variance parameters */
/* TBD get_gv_mlpgpara(param, vmfile, vvfile, dim2, msg_flag); */
get_dltmat(param->stm, &pst.dw, 1, param->dltm);
//like =
get_like_pdfseq_vit(dim, dim_st, nframes, nframes, param,
param_track->frames, XTRUE);
/* vlike = get_like_gv(dim2, dnum, param); */
mlgparaChol(param->pdf, &pst, param->stm);
/* Put the answer back into the output track */
for (i=0; i<nframes; i++)
{
out->times[i] = param_track->times[i];
out->frames[i][0] = param_track->frames[i][0]; /* F0 */
for (j=0; j<dim_st; j++)
out->frames[i][j+1] = param->stm->data[i][j];
}
// memory free
xmlpgparafree(param);
pst_free(&pst);
return out;
}