mirror of
https://github.com/classilla/tenfourfox.git
synced 2024-06-06 22:29:34 +00:00
368 lines
12 KiB
C
368 lines
12 KiB
C
/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
|
/* ====================================================================
|
|
* Copyright (c) 2008 Carnegie Mellon University. All rights
|
|
* reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
*
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
*
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in
|
|
* the documentation and/or other materials provided with the
|
|
* distribution.
|
|
*
|
|
* This work was supported in part by funding from the Defense Advanced
|
|
* Research Projects Agency and the National Science Foundation of the
|
|
* United States of America, and the CMU Sphinx Speech Consortium.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
|
|
* ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
|
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
|
|
* NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*
|
|
* ====================================================================
|
|
*
|
|
*/
|
|
|
|
/**
|
|
* @file phone_loop_search.h Fast and rough context-independent phoneme loop search.
|
|
*/
|
|
|
|
#include <sphinxbase/err.h>
|
|
|
|
#include "phone_loop_search.h"
|
|
|
|
static int phone_loop_search_start(ps_search_t *search);
|
|
static int phone_loop_search_step(ps_search_t *search, int frame_idx);
|
|
static int phone_loop_search_finish(ps_search_t *search);
|
|
static int phone_loop_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p);
|
|
static void phone_loop_search_free(ps_search_t *search);
|
|
static char const *phone_loop_search_hyp(ps_search_t *search, int32 *out_score, int32 *out_is_final);
|
|
static int32 phone_loop_search_prob(ps_search_t *search);
|
|
static ps_seg_t *phone_loop_search_seg_iter(ps_search_t *search, int32 *out_score);
|
|
|
|
static ps_searchfuncs_t phone_loop_search_funcs = {
|
|
/* name: */ "phone_loop",
|
|
/* start: */ phone_loop_search_start,
|
|
/* step: */ phone_loop_search_step,
|
|
/* finish: */ phone_loop_search_finish,
|
|
/* reinit: */ phone_loop_search_reinit,
|
|
/* free: */ phone_loop_search_free,
|
|
/* lattice: */ NULL,
|
|
/* hyp: */ phone_loop_search_hyp,
|
|
/* prob: */ phone_loop_search_prob,
|
|
/* seg_iter: */ phone_loop_search_seg_iter,
|
|
};
|
|
|
|
static int
|
|
phone_loop_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
|
|
{
|
|
phone_loop_search_t *pls = (phone_loop_search_t *)search;
|
|
cmd_ln_t *config = ps_search_config(search);
|
|
acmod_t *acmod = ps_search_acmod(search);
|
|
int i;
|
|
|
|
/* Free old dict2pid, dict, if necessary. */
|
|
ps_search_base_reinit(search, dict, d2p);
|
|
|
|
/* Initialize HMM context. */
|
|
if (pls->hmmctx)
|
|
hmm_context_free(pls->hmmctx);
|
|
pls->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef),
|
|
acmod->tmat->tp, NULL, acmod->mdef->sseq);
|
|
if (pls->hmmctx == NULL)
|
|
return -1;
|
|
|
|
/* Initialize penalty storage */
|
|
pls->n_phones = bin_mdef_n_ciphone(acmod->mdef);
|
|
pls->window = cmd_ln_int32_r(config, "-pl_window");
|
|
if (pls->penalties)
|
|
ckd_free(pls->penalties);
|
|
pls->penalties = (int32 *)ckd_calloc(pls->n_phones, sizeof(*pls->penalties));
|
|
if (pls->pen_buf)
|
|
ckd_free_2d(pls->pen_buf);
|
|
pls->pen_buf = (int32 **)ckd_calloc_2d(pls->window, pls->n_phones, sizeof(**pls->pen_buf));
|
|
|
|
/* Initialize phone HMMs. */
|
|
if (pls->hmms) {
|
|
for (i = 0; i < pls->n_phones; ++i)
|
|
hmm_deinit((hmm_t *)&pls->hmms[i]);
|
|
ckd_free(pls->hmms);
|
|
}
|
|
pls->hmms = (hmm_t *)ckd_calloc(pls->n_phones, sizeof(*pls->hmms));
|
|
for (i = 0; i < pls->n_phones; ++i) {
|
|
hmm_init(pls->hmmctx, (hmm_t *)&pls->hmms[i],
|
|
FALSE,
|
|
bin_mdef_pid2ssid(acmod->mdef, i),
|
|
bin_mdef_pid2tmatid(acmod->mdef, i));
|
|
}
|
|
pls->penalty_weight = cmd_ln_float64_r(config, "-pl_weight");
|
|
pls->beam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pl_beam")) >> SENSCR_SHIFT;
|
|
pls->pbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pl_pbeam")) >> SENSCR_SHIFT;
|
|
pls->pip = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-pl_pip")) >> SENSCR_SHIFT;
|
|
E_INFO("State beam %d Phone exit beam %d Insertion penalty %d\n",
|
|
pls->beam, pls->pbeam, pls->pip);
|
|
|
|
return 0;
|
|
}
|
|
|
|
ps_search_t *
|
|
phone_loop_search_init(cmd_ln_t *config,
|
|
acmod_t *acmod,
|
|
dict_t *dict)
|
|
{
|
|
phone_loop_search_t *pls;
|
|
|
|
/* Allocate and initialize. */
|
|
pls = (phone_loop_search_t *)ckd_calloc(1, sizeof(*pls));
|
|
ps_search_init(ps_search_base(pls), &phone_loop_search_funcs,
|
|
config, acmod, dict, NULL);
|
|
phone_loop_search_reinit(ps_search_base(pls), ps_search_dict(pls),
|
|
ps_search_dict2pid(pls));
|
|
|
|
return ps_search_base(pls);
|
|
}
|
|
|
|
static void
|
|
phone_loop_search_free_renorm(phone_loop_search_t *pls)
|
|
{
|
|
gnode_t *gn;
|
|
for (gn = pls->renorm; gn; gn = gnode_next(gn))
|
|
ckd_free(gnode_ptr(gn));
|
|
glist_free(pls->renorm);
|
|
pls->renorm = NULL;
|
|
}
|
|
|
|
static void
|
|
phone_loop_search_free(ps_search_t *search)
|
|
{
|
|
phone_loop_search_t *pls = (phone_loop_search_t *)search;
|
|
int i;
|
|
|
|
ps_search_deinit(search);
|
|
for (i = 0; i < pls->n_phones; ++i)
|
|
hmm_deinit((hmm_t *)&pls->hmms[i]);
|
|
phone_loop_search_free_renorm(pls);
|
|
ckd_free_2d(pls->pen_buf);
|
|
ckd_free(pls->hmms);
|
|
ckd_free(pls->penalties);
|
|
hmm_context_free(pls->hmmctx);
|
|
ckd_free(pls);
|
|
}
|
|
|
|
static int
|
|
phone_loop_search_start(ps_search_t *search)
|
|
{
|
|
phone_loop_search_t *pls = (phone_loop_search_t *)search;
|
|
int i;
|
|
|
|
/* Reset and enter all phone HMMs. */
|
|
for (i = 0; i < pls->n_phones; ++i) {
|
|
hmm_t *hmm = (hmm_t *)&pls->hmms[i];
|
|
hmm_clear(hmm);
|
|
hmm_enter(hmm, 0, -1, 0);
|
|
}
|
|
memset(pls->penalties, 0, pls->n_phones * sizeof(*pls->penalties));
|
|
for (i = 0; i < pls->window; i++)
|
|
memset(pls->pen_buf[i], 0, pls->n_phones * sizeof(*pls->pen_buf[i]));
|
|
phone_loop_search_free_renorm(pls);
|
|
pls->best_score = 0;
|
|
pls->pen_buf_ptr = 0;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
renormalize_hmms(phone_loop_search_t *pls, int frame_idx, int32 norm)
|
|
{
|
|
phone_loop_renorm_t *rn = (phone_loop_renorm_t *)ckd_calloc(1, sizeof(*rn));
|
|
int i;
|
|
|
|
pls->renorm = glist_add_ptr(pls->renorm, rn);
|
|
rn->frame_idx = frame_idx;
|
|
rn->norm = norm;
|
|
|
|
for (i = 0; i < pls->n_phones; ++i) {
|
|
hmm_normalize((hmm_t *)&pls->hmms[i], norm);
|
|
}
|
|
}
|
|
|
|
static void
|
|
evaluate_hmms(phone_loop_search_t *pls, int16 const *senscr, int frame_idx)
|
|
{
|
|
int32 bs = WORST_SCORE;
|
|
int i;
|
|
|
|
hmm_context_set_senscore(pls->hmmctx, senscr);
|
|
|
|
for (i = 0; i < pls->n_phones; ++i) {
|
|
hmm_t *hmm = (hmm_t *)&pls->hmms[i];
|
|
int32 score;
|
|
|
|
if (hmm_frame(hmm) < frame_idx)
|
|
continue;
|
|
score = hmm_vit_eval(hmm);
|
|
if (score BETTER_THAN bs) {
|
|
bs = score;
|
|
}
|
|
}
|
|
pls->best_score = bs;
|
|
}
|
|
|
|
static void
|
|
store_scores(phone_loop_search_t *pls, int frame_idx)
|
|
{
|
|
int i, j, itr;
|
|
|
|
for (i = 0; i < pls->n_phones; ++i) {
|
|
hmm_t *hmm = (hmm_t *)&pls->hmms[i];
|
|
pls->pen_buf[pls->pen_buf_ptr][i] = (hmm_bestscore(hmm) - pls->best_score) * pls->penalty_weight;
|
|
}
|
|
pls->pen_buf_ptr++;
|
|
pls->pen_buf_ptr = pls->pen_buf_ptr % pls->window;
|
|
|
|
//update penalties
|
|
for (i = 0; i < pls->n_phones; ++i) {
|
|
pls->penalties[i] = WORST_SCORE;
|
|
for (j = 0, itr = pls->pen_buf_ptr + 1; j < pls->window; j++, itr++) {
|
|
itr = itr % pls->window;
|
|
if (pls->pen_buf[itr][i] > pls->penalties[i])
|
|
pls->penalties[i] = pls->pen_buf[itr][i];
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
prune_hmms(phone_loop_search_t *pls, int frame_idx)
|
|
{
|
|
int32 thresh = pls->best_score + pls->beam;
|
|
int nf = frame_idx + 1;
|
|
int i;
|
|
|
|
/* Check all phones to see if they remain active in the next frame. */
|
|
for (i = 0; i < pls->n_phones; ++i) {
|
|
hmm_t *hmm = (hmm_t *)&pls->hmms[i];
|
|
|
|
if (hmm_frame(hmm) < frame_idx)
|
|
continue;
|
|
/* Retain if score better than threshold. */
|
|
if (hmm_bestscore(hmm) BETTER_THAN thresh) {
|
|
hmm_frame(hmm) = nf;
|
|
}
|
|
else
|
|
hmm_clear_scores(hmm);
|
|
}
|
|
}
|
|
|
|
static void
|
|
phone_transition(phone_loop_search_t *pls, int frame_idx)
|
|
{
|
|
int32 thresh = pls->best_score + pls->pbeam;
|
|
int nf = frame_idx + 1;
|
|
int i;
|
|
|
|
/* Now transition out of phones whose last states are inside the
|
|
* phone transition beam. */
|
|
for (i = 0; i < pls->n_phones; ++i) {
|
|
hmm_t *hmm = (hmm_t *)&pls->hmms[i];
|
|
int32 newphone_score;
|
|
int j;
|
|
|
|
if (hmm_frame(hmm) != nf)
|
|
continue;
|
|
|
|
newphone_score = hmm_out_score(hmm) + pls->pip;
|
|
if (newphone_score BETTER_THAN thresh) {
|
|
/* Transition into all phones using the usual Viterbi rule. */
|
|
for (j = 0; j < pls->n_phones; ++j) {
|
|
hmm_t *nhmm = (hmm_t *)&pls->hmms[j];
|
|
|
|
if (hmm_frame(nhmm) < frame_idx
|
|
|| newphone_score BETTER_THAN hmm_in_score(nhmm)) {
|
|
hmm_enter(nhmm, newphone_score, hmm_out_history(hmm), nf);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static int
|
|
phone_loop_search_step(ps_search_t *search, int frame_idx)
|
|
{
|
|
phone_loop_search_t *pls = (phone_loop_search_t *)search;
|
|
acmod_t *acmod = ps_search_acmod(search);
|
|
int16 const *senscr;
|
|
int i;
|
|
|
|
/* All CI senones are active all the time. */
|
|
if (!ps_search_acmod(pls)->compallsen) {
|
|
acmod_clear_active(ps_search_acmod(pls));
|
|
for (i = 0; i < pls->n_phones; ++i)
|
|
acmod_activate_hmm(acmod, (hmm_t *)&pls->hmms[i]);
|
|
}
|
|
|
|
/* Calculate senone scores for current frame. */
|
|
senscr = acmod_score(acmod, &frame_idx);
|
|
|
|
/* Renormalize, if necessary. */
|
|
if (pls->best_score + (2 * pls->beam) WORSE_THAN WORST_SCORE) {
|
|
E_INFO("Renormalizing Scores at frame %d, best score %d\n",
|
|
frame_idx, pls->best_score);
|
|
renormalize_hmms(pls, frame_idx, pls->best_score);
|
|
}
|
|
|
|
/* Evaluate phone HMMs for current frame. */
|
|
evaluate_hmms(pls, senscr, frame_idx);
|
|
|
|
/* Store hmm scores for senone penaly calculation */
|
|
store_scores(pls, frame_idx);
|
|
|
|
/* Prune phone HMMs. */
|
|
prune_hmms(pls, frame_idx);
|
|
|
|
/* Do phone transitions. */
|
|
phone_transition(pls, frame_idx);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
phone_loop_search_finish(ps_search_t *search)
|
|
{
|
|
/* Actually nothing to do here really. */
|
|
return 0;
|
|
}
|
|
|
|
static char const *
|
|
phone_loop_search_hyp(ps_search_t *search, int32 *out_score, int32 *out_is_final)
|
|
{
|
|
E_WARN("Hypotheses are not returned from phone loop search");
|
|
return NULL;
|
|
}
|
|
|
|
static int32
|
|
phone_loop_search_prob(ps_search_t *search)
|
|
{
|
|
/* FIXME: Actually... they ought to be. */
|
|
E_WARN("Posterior probabilities are not returned from phone loop search");
|
|
return 0;
|
|
}
|
|
|
|
static ps_seg_t *
|
|
phone_loop_search_seg_iter(ps_search_t *search, int32 *out_score)
|
|
{
|
|
E_WARN("Hypotheses are not returned from phone loop search");
|
|
return NULL;
|
|
}
|