mirror of
https://github.com/classilla/tenfourfox.git
synced 2024-06-25 20:29:33 +00:00
1574 lines
58 KiB
C
1574 lines
58 KiB
C
/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
|
/* ====================================================================
|
|
* Copyright (c) 2008 Carnegie Mellon University. All rights
|
|
* reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
*
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
*
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in
|
|
* the documentation and/or other materials provided with the
|
|
* distribution.
|
|
*
|
|
* This work was supported in part by funding from the Defense Advanced
|
|
* Research Projects Agency and the National Science Foundation of the
|
|
* United States of America, and the CMU Sphinx Speech Consortium.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
|
|
* ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
|
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
|
|
* NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*
|
|
* ====================================================================
|
|
*
|
|
*/
|
|
|
|
/**
|
|
* @file ngram_search_fwdtree.c Lexicon tree search.
|
|
*/
|
|
|
|
/* System headers. */
|
|
#include <string.h>
|
|
#include <assert.h>
|
|
|
|
/* SphinxBase headers. */
|
|
#include <sphinxbase/ckd_alloc.h>
|
|
#include <sphinxbase/listelem_alloc.h>
|
|
#include <sphinxbase/err.h>
|
|
|
|
/* Local headers. */
|
|
#include "ngram_search_fwdtree.h"
|
|
#include "phone_loop_search.h"
|
|
|
|
/* Turn this on to dump channels for debugging */
|
|
#define __CHAN_DUMP__ 0
|
|
#if __CHAN_DUMP__
|
|
#define chan_v_eval(chan) hmm_dump_vit_eval(&(chan)->hmm, stderr)
|
|
#else
|
|
#define chan_v_eval(chan) hmm_vit_eval(&(chan)->hmm)
|
|
#endif
|
|
|
|
/*
|
|
* Allocate that part of the search channel tree structure that is independent of the
|
|
* LM in use.
|
|
*/
|
|
static void
|
|
init_search_tree(ngram_search_t *ngs)
|
|
{
|
|
int32 w, ndiph, i, n_words, n_ci;
|
|
dict_t *dict = ps_search_dict(ngs);
|
|
bitvec_t *dimap;
|
|
|
|
n_words = ps_search_n_words(ngs);
|
|
ngs->homophone_set = ckd_calloc(n_words, sizeof(*ngs->homophone_set));
|
|
|
|
/* Find #single phone words, and #unique first diphones (#root channels) in dict. */
|
|
ndiph = 0;
|
|
ngs->n_1ph_words = 0;
|
|
n_ci = bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef);
|
|
/* Allocate a bitvector with flags for each possible diphone. */
|
|
dimap = bitvec_alloc(n_ci * n_ci);
|
|
for (w = 0; w < n_words; w++) {
|
|
if (!dict_real_word(dict, w))
|
|
continue;
|
|
if (dict_is_single_phone(dict, w))
|
|
++ngs->n_1ph_words;
|
|
else {
|
|
int ph0, ph1;
|
|
ph0 = dict_first_phone(dict, w);
|
|
ph1 = dict_second_phone(dict, w);
|
|
/* Increment ndiph the first time we see a diphone. */
|
|
if (bitvec_is_clear(dimap, ph0 * n_ci + ph1)) {
|
|
bitvec_set(dimap, ph0 * n_ci + ph1);
|
|
++ndiph;
|
|
}
|
|
}
|
|
}
|
|
E_INFO("%d unique initial diphones\n", ndiph);
|
|
bitvec_free(dimap);
|
|
|
|
/* Add remaining dict words (</s>, <s>, <sil>, noise words) to single-phone words */
|
|
ngs->n_1ph_words += dict_num_fillers(dict) + 2;
|
|
ngs->n_root_chan_alloc = ndiph + 1;
|
|
/* Verify that these are all *actually* single-phone words,
|
|
* otherwise really bad things will happen to us. */
|
|
for (w = 0; w < n_words; ++w) {
|
|
if (dict_real_word(dict, w))
|
|
continue;
|
|
if (!dict_is_single_phone(dict, w)) {
|
|
E_WARN("Filler word %d = %s has more than one phone, ignoring it.\n",
|
|
w, dict_wordstr(dict, w));
|
|
--ngs->n_1ph_words;
|
|
}
|
|
}
|
|
|
|
/* Allocate and initialize root channels */
|
|
ngs->root_chan =
|
|
ckd_calloc(ngs->n_root_chan_alloc, sizeof(*ngs->root_chan));
|
|
for (i = 0; i < ngs->n_root_chan_alloc; i++) {
|
|
hmm_init(ngs->hmmctx, &ngs->root_chan[i].hmm, TRUE, -1, -1);
|
|
ngs->root_chan[i].penult_phn_wid = -1;
|
|
ngs->root_chan[i].next = NULL;
|
|
}
|
|
|
|
/* Permanently allocate and initialize channels for single-phone
|
|
* words (1/word). */
|
|
ngs->rhmm_1ph = ckd_calloc(ngs->n_1ph_words, sizeof(*ngs->rhmm_1ph));
|
|
i = 0;
|
|
for (w = 0; w < n_words; w++) {
|
|
if (!dict_is_single_phone(dict, w))
|
|
continue;
|
|
/* Use SIL as right context for these. */
|
|
ngs->rhmm_1ph[i].ci2phone = bin_mdef_silphone(ps_search_acmod(ngs)->mdef);
|
|
ngs->rhmm_1ph[i].ciphone = dict_first_phone(dict, w);
|
|
hmm_init(ngs->hmmctx, &ngs->rhmm_1ph[i].hmm, TRUE,
|
|
bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef, ngs->rhmm_1ph[i].ciphone),
|
|
bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, ngs->rhmm_1ph[i].ciphone));
|
|
ngs->rhmm_1ph[i].next = NULL;
|
|
|
|
ngs->word_chan[w] = (chan_t *) &(ngs->rhmm_1ph[i]);
|
|
i++;
|
|
}
|
|
|
|
ngs->single_phone_wid = ckd_calloc(ngs->n_1ph_words,
|
|
sizeof(*ngs->single_phone_wid));
|
|
E_INFO("%d root, %d non-root channels, %d single-phone words\n",
|
|
ngs->n_root_chan, ngs->n_nonroot_chan, ngs->n_1ph_words);
|
|
}
|
|
|
|
/*
|
|
* One-time initialization of internal channels in HMM tree.
|
|
*/
|
|
static void
|
|
init_nonroot_chan(ngram_search_t *ngs, chan_t * hmm, int32 ph, int32 ci, int32 tmatid)
|
|
{
|
|
hmm->next = NULL;
|
|
hmm->alt = NULL;
|
|
hmm->info.penult_phn_wid = -1;
|
|
hmm->ciphone = ci;
|
|
hmm_init(ngs->hmmctx, &hmm->hmm, FALSE, ph, tmatid);
|
|
}
|
|
|
|
/*
|
|
* Allocate and initialize search channel-tree structure.
|
|
* At this point, all the root-channels have been allocated and partly initialized
|
|
* (as per init_search_tree()), and channels for all the single-phone words have been
|
|
* allocated and initialized. None of the interior channels of search-trees have
|
|
* been allocated.
|
|
* This routine may be called on every utterance, after reinit_search_tree() clears
|
|
* the search tree created for the previous utterance. Meant for reconfiguring the
|
|
* search tree to suit the currently active LM.
|
|
*/
|
|
static void
|
|
create_search_tree(ngram_search_t *ngs)
|
|
{
|
|
chan_t *hmm;
|
|
root_chan_t *rhmm;
|
|
int32 w, i, j, p, ph, tmatid;
|
|
int32 n_words;
|
|
dict_t *dict = ps_search_dict(ngs);
|
|
dict2pid_t *d2p = ps_search_dict2pid(ngs);
|
|
|
|
n_words = ps_search_n_words(ngs);
|
|
|
|
E_INFO("Creating search tree\n");
|
|
|
|
for (w = 0; w < n_words; w++)
|
|
ngs->homophone_set[w] = -1;
|
|
|
|
E_INFO("before: %d root, %d non-root channels, %d single-phone words\n",
|
|
ngs->n_root_chan, ngs->n_nonroot_chan, ngs->n_1ph_words);
|
|
|
|
ngs->n_1ph_LMwords = 0;
|
|
ngs->n_root_chan = 0;
|
|
ngs->n_nonroot_chan = 0;
|
|
|
|
for (w = 0; w < n_words; w++) {
|
|
int ciphone, ci2phone;
|
|
|
|
/* Ignore dictionary words not in LM */
|
|
if (!ngram_model_set_known_wid(ngs->lmset, dict_basewid(dict, w)))
|
|
continue;
|
|
|
|
/* Handle single-phone words individually; not in channel tree */
|
|
if (dict_is_single_phone(dict, w)) {
|
|
E_DEBUG(1,("single_phone_wid[%d] = %s\n",
|
|
ngs->n_1ph_LMwords, dict_wordstr(dict, w)));
|
|
ngs->single_phone_wid[ngs->n_1ph_LMwords++] = w;
|
|
continue;
|
|
}
|
|
|
|
/* Find a root channel matching the initial diphone, or
|
|
* allocate one if not found. */
|
|
ciphone = dict_first_phone(dict, w);
|
|
ci2phone = dict_second_phone(dict, w);
|
|
for (i = 0; i < ngs->n_root_chan; ++i) {
|
|
if (ngs->root_chan[i].ciphone == ciphone
|
|
&& ngs->root_chan[i].ci2phone == ci2phone)
|
|
break;
|
|
}
|
|
if (i == ngs->n_root_chan) {
|
|
rhmm = &(ngs->root_chan[ngs->n_root_chan]);
|
|
rhmm->hmm.tmatid = bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, ciphone);
|
|
/* Begin with CI phone? Not sure this makes a difference... */
|
|
hmm_mpx_ssid(&rhmm->hmm, 0) =
|
|
bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef, ciphone);
|
|
rhmm->ciphone = ciphone;
|
|
rhmm->ci2phone = ci2phone;
|
|
ngs->n_root_chan++;
|
|
}
|
|
else
|
|
rhmm = &(ngs->root_chan[i]);
|
|
|
|
E_DEBUG(3,("word %s rhmm %d\n", dict_wordstr(dict, w), rhmm - ngs->root_chan));
|
|
/* Now, rhmm = root channel for w. Go on to remaining phones */
|
|
if (dict_pronlen(dict, w) == 2) {
|
|
/* Next phone is the last; not kept in tree; add w to penult_phn_wid set */
|
|
if ((j = rhmm->penult_phn_wid) < 0)
|
|
rhmm->penult_phn_wid = w;
|
|
else {
|
|
for (; ngs->homophone_set[j] >= 0; j = ngs->homophone_set[j]);
|
|
ngs->homophone_set[j] = w;
|
|
}
|
|
}
|
|
else {
|
|
/* Add remaining phones, except the last, to tree */
|
|
ph = dict2pid_internal(d2p, w, 1);
|
|
tmatid = bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, dict_pron(dict, w, 1));
|
|
hmm = rhmm->next;
|
|
if (hmm == NULL) {
|
|
rhmm->next = hmm = listelem_malloc(ngs->chan_alloc);
|
|
init_nonroot_chan(ngs, hmm, ph, dict_pron(dict, w, 1), tmatid);
|
|
ngs->n_nonroot_chan++;
|
|
}
|
|
else {
|
|
chan_t *prev_hmm = NULL;
|
|
|
|
for (; hmm && (hmm_nonmpx_ssid(&hmm->hmm) != ph); hmm = hmm->alt)
|
|
prev_hmm = hmm;
|
|
if (!hmm) { /* thanks, rkm! */
|
|
prev_hmm->alt = hmm = listelem_malloc(ngs->chan_alloc);
|
|
init_nonroot_chan(ngs, hmm, ph, dict_pron(dict, w, 1), tmatid);
|
|
ngs->n_nonroot_chan++;
|
|
}
|
|
}
|
|
E_DEBUG(3,("phone %s = %d\n",
|
|
bin_mdef_ciphone_str(ps_search_acmod(ngs)->mdef,
|
|
dict_second_phone(dict, w)), ph));
|
|
for (p = 2; p < dict_pronlen(dict, w) - 1; p++) {
|
|
ph = dict2pid_internal(d2p, w, p);
|
|
tmatid = bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, dict_pron(dict, w, p));
|
|
if (!hmm->next) {
|
|
hmm->next = listelem_malloc(ngs->chan_alloc);
|
|
hmm = hmm->next;
|
|
init_nonroot_chan(ngs, hmm, ph, dict_pron(dict, w, p), tmatid);
|
|
ngs->n_nonroot_chan++;
|
|
}
|
|
else {
|
|
chan_t *prev_hmm = NULL;
|
|
|
|
for (hmm = hmm->next; hmm && (hmm_nonmpx_ssid(&hmm->hmm) != ph);
|
|
hmm = hmm->alt)
|
|
prev_hmm = hmm;
|
|
if (!hmm) { /* thanks, rkm! */
|
|
prev_hmm->alt = hmm = listelem_malloc(ngs->chan_alloc);
|
|
init_nonroot_chan(ngs, hmm, ph, dict_pron(dict, w, p), tmatid);
|
|
ngs->n_nonroot_chan++;
|
|
}
|
|
}
|
|
E_DEBUG(3,("phone %s = %d\n",
|
|
bin_mdef_ciphone_str(ps_search_acmod(ngs)->mdef,
|
|
dict_pron(dict, w, p)), ph));
|
|
}
|
|
|
|
/* All but last phone of w in tree; add w to hmm->info.penult_phn_wid set */
|
|
if ((j = hmm->info.penult_phn_wid) < 0)
|
|
hmm->info.penult_phn_wid = w;
|
|
else {
|
|
for (; ngs->homophone_set[j] >= 0; j = ngs->homophone_set[j]);
|
|
ngs->homophone_set[j] = w;
|
|
}
|
|
}
|
|
}
|
|
|
|
ngs->n_1ph_words = ngs->n_1ph_LMwords;
|
|
|
|
/* Add filler words to the array of 1ph words. */
|
|
for (w = 0; w < n_words; ++w) {
|
|
/* Skip anything that doesn't actually have a single phone. */
|
|
if (!dict_is_single_phone(dict, w))
|
|
continue;
|
|
/* Also skip "real words" and things that are in the LM. */
|
|
if (dict_real_word(dict, w))
|
|
continue;
|
|
if (ngram_model_set_known_wid(ngs->lmset, dict_basewid(dict, w)))
|
|
continue;
|
|
E_DEBUG(1,("single_phone_wid[%d] = %s\n",
|
|
ngs->n_1ph_words, dict_wordstr(dict, w)));
|
|
ngs->single_phone_wid[ngs->n_1ph_words++] = w;
|
|
}
|
|
|
|
if (ngs->n_nonroot_chan >= ngs->max_nonroot_chan) {
|
|
/* Give some room for channels for new words added dynamically at run time */
|
|
ngs->max_nonroot_chan = ngs->n_nonroot_chan + 128;
|
|
E_INFO("after: max nonroot chan increased to %d\n", ngs->max_nonroot_chan);
|
|
|
|
/* Free old active channel list array if any and allocate new one */
|
|
if (ngs->active_chan_list)
|
|
ckd_free_2d(ngs->active_chan_list);
|
|
ngs->active_chan_list = ckd_calloc_2d(2, ngs->max_nonroot_chan,
|
|
sizeof(**ngs->active_chan_list));
|
|
}
|
|
|
|
if (!ngs->n_root_chan)
|
|
E_ERROR("No word from the language model has pronunciation in the dictionary\n");
|
|
|
|
E_INFO("after: %d root, %d non-root channels, %d single-phone words\n",
|
|
ngs->n_root_chan, ngs->n_nonroot_chan, ngs->n_1ph_words);
|
|
}
|
|
|
|
static void
|
|
reinit_search_subtree(ngram_search_t *ngs, chan_t * hmm)
|
|
{
|
|
chan_t *child, *sibling;
|
|
|
|
/* First free all children under hmm */
|
|
for (child = hmm->next; child; child = sibling) {
|
|
sibling = child->alt;
|
|
reinit_search_subtree(ngs, child);
|
|
}
|
|
|
|
/* Now free hmm */
|
|
hmm_deinit(&hmm->hmm);
|
|
listelem_free(ngs->chan_alloc, hmm);
|
|
}
|
|
|
|
/*
|
|
* Delete search tree by freeing all interior channels within search tree and
|
|
* restoring root channel state to the init state (i.e., just after init_search_tree()).
|
|
*/
|
|
static void
|
|
reinit_search_tree(ngram_search_t *ngs)
|
|
{
|
|
int32 i;
|
|
chan_t *hmm, *sibling;
|
|
|
|
for (i = 0; i < ngs->n_root_chan; i++) {
|
|
hmm = ngs->root_chan[i].next;
|
|
|
|
while (hmm) {
|
|
sibling = hmm->alt;
|
|
reinit_search_subtree(ngs, hmm);
|
|
hmm = sibling;
|
|
}
|
|
|
|
ngs->root_chan[i].penult_phn_wid = -1;
|
|
ngs->root_chan[i].next = NULL;
|
|
}
|
|
ngs->n_nonroot_chan = 0;
|
|
}
|
|
|
|
void
|
|
ngram_fwdtree_init(ngram_search_t *ngs)
|
|
{
|
|
/* Allocate bestbp_rc, lastphn_cand, last_ltrans */
|
|
ngs->bestbp_rc = ckd_calloc(bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef),
|
|
sizeof(*ngs->bestbp_rc));
|
|
ngs->lastphn_cand = ckd_calloc(ps_search_n_words(ngs),
|
|
sizeof(*ngs->lastphn_cand));
|
|
init_search_tree(ngs);
|
|
create_search_tree(ngs);
|
|
}
|
|
|
|
static void
|
|
deinit_search_tree(ngram_search_t *ngs)
|
|
{
|
|
int i, w, n_words;
|
|
|
|
n_words = ps_search_n_words(ngs);
|
|
for (i = 0; i < ngs->n_root_chan_alloc; i++) {
|
|
hmm_deinit(&ngs->root_chan[i].hmm);
|
|
}
|
|
if (ngs->rhmm_1ph) {
|
|
for (i = w = 0; w < n_words; ++w) {
|
|
if (!dict_is_single_phone(ps_search_dict(ngs), w))
|
|
continue;
|
|
hmm_deinit(&ngs->rhmm_1ph[i].hmm);
|
|
++i;
|
|
}
|
|
ckd_free(ngs->rhmm_1ph);
|
|
ngs->rhmm_1ph = NULL;
|
|
}
|
|
ngs->n_root_chan = 0;
|
|
ngs->n_root_chan_alloc = 0;
|
|
ckd_free(ngs->root_chan);
|
|
ngs->root_chan = NULL;
|
|
ckd_free(ngs->single_phone_wid);
|
|
ngs->single_phone_wid = NULL;
|
|
ckd_free(ngs->homophone_set);
|
|
ngs->homophone_set = NULL;
|
|
}
|
|
|
|
void
|
|
ngram_fwdtree_deinit(ngram_search_t *ngs)
|
|
{
|
|
double n_speech = (double)ngs->n_tot_frame
|
|
/ cmd_ln_int32_r(ps_search_config(ngs), "-frate");
|
|
|
|
E_INFO("TOTAL fwdtree %.2f CPU %.3f xRT\n",
|
|
ngs->fwdtree_perf.t_tot_cpu,
|
|
ngs->fwdtree_perf.t_tot_cpu / n_speech);
|
|
E_INFO("TOTAL fwdtree %.2f wall %.3f xRT\n",
|
|
ngs->fwdtree_perf.t_tot_elapsed,
|
|
ngs->fwdtree_perf.t_tot_elapsed / n_speech);
|
|
|
|
/* Reset non-root channels. */
|
|
reinit_search_tree(ngs);
|
|
/* Free the search tree. */
|
|
deinit_search_tree(ngs);
|
|
/* Free other stuff. */
|
|
ngs->max_nonroot_chan = 0;
|
|
ckd_free_2d(ngs->active_chan_list);
|
|
ngs->active_chan_list = NULL;
|
|
ckd_free(ngs->cand_sf);
|
|
ngs->cand_sf = NULL;
|
|
ckd_free(ngs->bestbp_rc);
|
|
ngs->bestbp_rc = NULL;
|
|
ckd_free(ngs->lastphn_cand);
|
|
ngs->lastphn_cand = NULL;
|
|
}
|
|
|
|
int
|
|
ngram_fwdtree_reinit(ngram_search_t *ngs)
|
|
{
|
|
/* Reset non-root channels. */
|
|
reinit_search_tree(ngs);
|
|
/* Free the search tree. */
|
|
deinit_search_tree(ngs);
|
|
/* Reallocate things that depend on the number of words. */
|
|
ckd_free(ngs->lastphn_cand);
|
|
ngs->lastphn_cand = ckd_calloc(ps_search_n_words(ngs),
|
|
sizeof(*ngs->lastphn_cand));
|
|
ckd_free(ngs->word_chan);
|
|
ngs->word_chan = ckd_calloc(ps_search_n_words(ngs),
|
|
sizeof(*ngs->word_chan));
|
|
/* Rebuild the search tree. */
|
|
init_search_tree(ngs);
|
|
create_search_tree(ngs);
|
|
return 0;
|
|
}
|
|
|
|
void
|
|
ngram_fwdtree_start(ngram_search_t *ngs)
|
|
{
|
|
ps_search_t *base = (ps_search_t *)ngs;
|
|
int32 i, w, n_words;
|
|
root_chan_t *rhmm;
|
|
|
|
n_words = ps_search_n_words(ngs);
|
|
|
|
/* Reset utterance statistics. */
|
|
memset(&ngs->st, 0, sizeof(ngs->st));
|
|
ptmr_reset(&ngs->fwdtree_perf);
|
|
ptmr_start(&ngs->fwdtree_perf);
|
|
|
|
/* Reset backpointer table. */
|
|
ngs->bpidx = 0;
|
|
ngs->bss_head = 0;
|
|
|
|
/* Reset word lattice. */
|
|
for (i = 0; i < n_words; ++i)
|
|
ngs->word_lat_idx[i] = NO_BP;
|
|
|
|
/* Reset active HMM and word lists. */
|
|
ngs->n_active_chan[0] = ngs->n_active_chan[1] = 0;
|
|
ngs->n_active_word[0] = ngs->n_active_word[1] = 0;
|
|
|
|
/* Reset scores. */
|
|
ngs->best_score = 0;
|
|
ngs->renormalized = 0;
|
|
|
|
/* Reset other stuff. */
|
|
for (i = 0; i < n_words; i++)
|
|
ngs->last_ltrans[i].sf = -1;
|
|
ngs->n_frame = 0;
|
|
|
|
/* Clear the hypothesis string. */
|
|
ckd_free(base->hyp_str);
|
|
base->hyp_str = NULL;
|
|
|
|
/* Reset the permanently allocated single-phone words, since they
|
|
* may have junk left over in them from FWDFLAT. */
|
|
for (i = 0; i < ngs->n_1ph_words; i++) {
|
|
w = ngs->single_phone_wid[i];
|
|
rhmm = (root_chan_t *) ngs->word_chan[w];
|
|
hmm_clear(&rhmm->hmm);
|
|
}
|
|
|
|
/* Start search with <s>; word_chan[<s>] is permanently allocated */
|
|
rhmm = (root_chan_t *) ngs->word_chan[dict_startwid(ps_search_dict(ngs))];
|
|
hmm_clear(&rhmm->hmm);
|
|
hmm_enter(&rhmm->hmm, 0, NO_BP, 0);
|
|
}
|
|
|
|
/*
|
|
* Mark the active senones for all senones belonging to channels that are active in the
|
|
* current frame.
|
|
*/
|
|
static void
|
|
compute_sen_active(ngram_search_t *ngs, int frame_idx)
|
|
{
|
|
root_chan_t *rhmm;
|
|
chan_t *hmm, **acl;
|
|
int32 i, w, *awl;
|
|
|
|
acmod_clear_active(ps_search_acmod(ngs));
|
|
|
|
/* Flag active senones for root channels */
|
|
for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
|
|
if (hmm_frame(&rhmm->hmm) == frame_idx)
|
|
acmod_activate_hmm(ps_search_acmod(ngs), &rhmm->hmm);
|
|
}
|
|
|
|
/* Flag active senones for nonroot channels in HMM tree */
|
|
i = ngs->n_active_chan[frame_idx & 0x1];
|
|
acl = ngs->active_chan_list[frame_idx & 0x1];
|
|
for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
|
|
acmod_activate_hmm(ps_search_acmod(ngs), &hmm->hmm);
|
|
}
|
|
|
|
/* Flag active senones for individual word channels */
|
|
i = ngs->n_active_word[frame_idx & 0x1];
|
|
awl = ngs->active_word_list[frame_idx & 0x1];
|
|
for (w = *(awl++); i > 0; --i, w = *(awl++)) {
|
|
for (hmm = ngs->word_chan[w]; hmm; hmm = hmm->next) {
|
|
acmod_activate_hmm(ps_search_acmod(ngs), &hmm->hmm);
|
|
}
|
|
}
|
|
for (i = 0; i < ngs->n_1ph_words; i++) {
|
|
w = ngs->single_phone_wid[i];
|
|
rhmm = (root_chan_t *) ngs->word_chan[w];
|
|
|
|
if (hmm_frame(&rhmm->hmm) == frame_idx)
|
|
acmod_activate_hmm(ps_search_acmod(ngs), &rhmm->hmm);
|
|
}
|
|
}
|
|
|
|
static void
|
|
renormalize_scores(ngram_search_t *ngs, int frame_idx, int32 norm)
|
|
{
|
|
root_chan_t *rhmm;
|
|
chan_t *hmm, **acl;
|
|
int32 i, w, *awl;
|
|
|
|
/* Renormalize root channels */
|
|
for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
|
|
if (hmm_frame(&rhmm->hmm) == frame_idx) {
|
|
hmm_normalize(&rhmm->hmm, norm);
|
|
}
|
|
}
|
|
|
|
/* Renormalize nonroot channels in HMM tree */
|
|
i = ngs->n_active_chan[frame_idx & 0x1];
|
|
acl = ngs->active_chan_list[frame_idx & 0x1];
|
|
for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
|
|
hmm_normalize(&hmm->hmm, norm);
|
|
}
|
|
|
|
/* Renormalize individual word channels */
|
|
i = ngs->n_active_word[frame_idx & 0x1];
|
|
awl = ngs->active_word_list[frame_idx & 0x1];
|
|
for (w = *(awl++); i > 0; --i, w = *(awl++)) {
|
|
for (hmm = ngs->word_chan[w]; hmm; hmm = hmm->next) {
|
|
hmm_normalize(&hmm->hmm, norm);
|
|
}
|
|
}
|
|
for (i = 0; i < ngs->n_1ph_words; i++) {
|
|
w = ngs->single_phone_wid[i];
|
|
rhmm = (root_chan_t *) ngs->word_chan[w];
|
|
if (hmm_frame(&rhmm->hmm) == frame_idx) {
|
|
hmm_normalize(&rhmm->hmm, norm);
|
|
}
|
|
}
|
|
|
|
ngs->renormalized = TRUE;
|
|
}
|
|
|
|
static int32
|
|
eval_root_chan(ngram_search_t *ngs, int frame_idx)
|
|
{
|
|
root_chan_t *rhmm;
|
|
int32 i, bestscore;
|
|
|
|
bestscore = WORST_SCORE;
|
|
for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
|
|
if (hmm_frame(&rhmm->hmm) == frame_idx) {
|
|
int32 score = chan_v_eval(rhmm);
|
|
if (score BETTER_THAN bestscore)
|
|
bestscore = score;
|
|
++ngs->st.n_root_chan_eval;
|
|
}
|
|
}
|
|
return (bestscore);
|
|
}
|
|
|
|
static int32
|
|
eval_nonroot_chan(ngram_search_t *ngs, int frame_idx)
|
|
{
|
|
chan_t *hmm, **acl;
|
|
int32 i, bestscore;
|
|
|
|
i = ngs->n_active_chan[frame_idx & 0x1];
|
|
acl = ngs->active_chan_list[frame_idx & 0x1];
|
|
bestscore = WORST_SCORE;
|
|
ngs->st.n_nonroot_chan_eval += i;
|
|
|
|
for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
|
|
int32 score = chan_v_eval(hmm);
|
|
assert(hmm_frame(&hmm->hmm) == frame_idx);
|
|
if (score BETTER_THAN bestscore)
|
|
bestscore = score;
|
|
}
|
|
|
|
return bestscore;
|
|
}
|
|
|
|
static int32
|
|
eval_word_chan(ngram_search_t *ngs, int frame_idx)
|
|
{
|
|
root_chan_t *rhmm;
|
|
chan_t *hmm;
|
|
int32 i, w, bestscore, *awl, j, k;
|
|
|
|
k = 0;
|
|
bestscore = WORST_SCORE;
|
|
awl = ngs->active_word_list[frame_idx & 0x1];
|
|
|
|
i = ngs->n_active_word[frame_idx & 0x1];
|
|
for (w = *(awl++); i > 0; --i, w = *(awl++)) {
|
|
assert(bitvec_is_set(ngs->word_active, w));
|
|
bitvec_clear(ngs->word_active, w);
|
|
assert(ngs->word_chan[w] != NULL);
|
|
|
|
for (hmm = ngs->word_chan[w]; hmm; hmm = hmm->next) {
|
|
int32 score;
|
|
|
|
assert(hmm_frame(&hmm->hmm) == frame_idx);
|
|
score = chan_v_eval(hmm);
|
|
/*printf("eval word chan %d score %d\n", w, score); */
|
|
|
|
if (score BETTER_THAN bestscore)
|
|
bestscore = score;
|
|
|
|
k++;
|
|
}
|
|
}
|
|
|
|
/* Similarly for statically allocated single-phone words */
|
|
j = 0;
|
|
for (i = 0; i < ngs->n_1ph_words; i++) {
|
|
int32 score;
|
|
|
|
w = ngs->single_phone_wid[i];
|
|
rhmm = (root_chan_t *) ngs->word_chan[w];
|
|
if (hmm_frame(&rhmm->hmm) < frame_idx)
|
|
continue;
|
|
|
|
score = chan_v_eval(rhmm);
|
|
/* printf("eval 1ph word chan %d score %d\n", w, score); */
|
|
if (score BETTER_THAN bestscore && w != ps_search_finish_wid(ngs))
|
|
bestscore = score;
|
|
|
|
j++;
|
|
}
|
|
|
|
ngs->st.n_last_chan_eval += k + j;
|
|
ngs->st.n_nonroot_chan_eval += k + j;
|
|
ngs->st.n_word_lastchan_eval +=
|
|
ngs->n_active_word[frame_idx & 0x1] + j;
|
|
|
|
return bestscore;
|
|
}
|
|
|
|
static int32
|
|
evaluate_channels(ngram_search_t *ngs, int16 const *senone_scores, int frame_idx)
|
|
{
|
|
int32 bs;
|
|
|
|
hmm_context_set_senscore(ngs->hmmctx, senone_scores);
|
|
ngs->best_score = eval_root_chan(ngs, frame_idx);
|
|
if ((bs = eval_nonroot_chan(ngs, frame_idx)) BETTER_THAN ngs->best_score)
|
|
ngs->best_score = bs;
|
|
if ((bs = eval_word_chan(ngs, frame_idx)) BETTER_THAN ngs->best_score)
|
|
ngs->best_score = bs;
|
|
ngs->last_phone_best_score = bs;
|
|
|
|
return ngs->best_score;
|
|
}
|
|
|
|
/*
|
|
* Prune currently active root channels for next frame. Also, perform exit
|
|
* transitions out of them and activate successors.
|
|
* score[] of pruned root chan set to WORST_SCORE elsewhere.
|
|
*/
|
|
static void
|
|
prune_root_chan(ngram_search_t *ngs, int frame_idx)
|
|
{
|
|
root_chan_t *rhmm;
|
|
chan_t *hmm;
|
|
int32 i, nf, w;
|
|
int32 thresh, newphone_thresh, lastphn_thresh, newphone_score;
|
|
chan_t **nacl; /* next active list */
|
|
lastphn_cand_t *candp;
|
|
phone_loop_search_t *pls;
|
|
|
|
nf = frame_idx + 1;
|
|
thresh = ngs->best_score + ngs->dynamic_beam;
|
|
newphone_thresh = ngs->best_score + ngs->pbeam;
|
|
lastphn_thresh = ngs->best_score + ngs->lpbeam;
|
|
nacl = ngs->active_chan_list[nf & 0x1];
|
|
pls = (phone_loop_search_t *)ps_search_lookahead(ngs);
|
|
|
|
for (i = 0, rhmm = ngs->root_chan; i < ngs->n_root_chan; i++, rhmm++) {
|
|
E_DEBUG(3,("Root channel %d frame %d score %d thresh %d\n",
|
|
i, hmm_frame(&rhmm->hmm), hmm_bestscore(&rhmm->hmm), thresh));
|
|
/* First check if this channel was active in current frame */
|
|
if (hmm_frame(&rhmm->hmm) < frame_idx)
|
|
continue;
|
|
|
|
if (hmm_bestscore(&rhmm->hmm) BETTER_THAN thresh) {
|
|
hmm_frame(&rhmm->hmm) = nf; /* rhmm will be active in next frame */
|
|
E_DEBUG(3,("Preserving root channel %d score %d\n", i, hmm_bestscore(&rhmm->hmm)));
|
|
/* transitions out of this root channel */
|
|
/* transition to all next-level channels in the HMM tree */
|
|
newphone_score = hmm_out_score(&rhmm->hmm) + ngs->pip;
|
|
if (pls != NULL || newphone_score BETTER_THAN newphone_thresh) {
|
|
for (hmm = rhmm->next; hmm; hmm = hmm->alt) {
|
|
int32 pl_newphone_score = newphone_score
|
|
+ phone_loop_search_score(pls, hmm->ciphone);
|
|
if (pl_newphone_score BETTER_THAN newphone_thresh) {
|
|
if ((hmm_frame(&hmm->hmm) < frame_idx)
|
|
|| (newphone_score BETTER_THAN hmm_in_score(&hmm->hmm))) {
|
|
hmm_enter(&hmm->hmm, newphone_score,
|
|
hmm_out_history(&rhmm->hmm), nf);
|
|
*(nacl++) = hmm;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Transition to last phone of all words for which this is the
|
|
* penultimate phone (the last phones may need multiple right contexts).
|
|
* Remember to remove the temporary newword_penalty.
|
|
*/
|
|
if (pls != NULL || newphone_score BETTER_THAN lastphn_thresh) {
|
|
for (w = rhmm->penult_phn_wid; w >= 0;
|
|
w = ngs->homophone_set[w]) {
|
|
int32 pl_newphone_score = newphone_score
|
|
+ phone_loop_search_score
|
|
(pls, dict_last_phone(ps_search_dict(ngs),w));
|
|
E_DEBUG(3,("word %s newphone_score %d\n", dict_wordstr(ps_search_dict(ngs), w), newphone_score));
|
|
if (pl_newphone_score BETTER_THAN lastphn_thresh) {
|
|
candp = ngs->lastphn_cand + ngs->n_lastphn_cand;
|
|
ngs->n_lastphn_cand++;
|
|
candp->wid = w;
|
|
candp->score =
|
|
newphone_score - ngs->nwpen;
|
|
candp->bp = hmm_out_history(&rhmm->hmm);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
ngs->n_active_chan[nf & 0x1] = (int)(nacl - ngs->active_chan_list[nf & 0x1]);
|
|
}
|
|
|
|
/*
|
|
* Prune currently active nonroot channels in HMM tree for next frame. Also, perform
|
|
* exit transitions out of such channels and activate successors.
|
|
*/
|
|
static void
|
|
prune_nonroot_chan(ngram_search_t *ngs, int frame_idx)
|
|
{
|
|
chan_t *hmm, *nexthmm;
|
|
int32 nf, w, i;
|
|
int32 thresh, newphone_thresh, lastphn_thresh, newphone_score;
|
|
chan_t **acl, **nacl; /* active list, next active list */
|
|
lastphn_cand_t *candp;
|
|
phone_loop_search_t *pls;
|
|
|
|
nf = frame_idx + 1;
|
|
|
|
thresh = ngs->best_score + ngs->dynamic_beam;
|
|
newphone_thresh = ngs->best_score + ngs->pbeam;
|
|
lastphn_thresh = ngs->best_score + ngs->lpbeam;
|
|
pls = (phone_loop_search_t *)ps_search_lookahead(ngs);
|
|
|
|
acl = ngs->active_chan_list[frame_idx & 0x1]; /* currently active HMMs in tree */
|
|
nacl = ngs->active_chan_list[nf & 0x1] + ngs->n_active_chan[nf & 0x1];
|
|
|
|
for (i = ngs->n_active_chan[frame_idx & 0x1], hmm = *(acl++); i > 0;
|
|
--i, hmm = *(acl++)) {
|
|
assert(hmm_frame(&hmm->hmm) >= frame_idx);
|
|
|
|
if (hmm_bestscore(&hmm->hmm) BETTER_THAN thresh) {
|
|
/* retain this channel in next frame */
|
|
if (hmm_frame(&hmm->hmm) != nf) {
|
|
hmm_frame(&hmm->hmm) = nf;
|
|
*(nacl++) = hmm;
|
|
}
|
|
|
|
/* transition to all next-level channel in the HMM tree */
|
|
newphone_score = hmm_out_score(&hmm->hmm) + ngs->pip;
|
|
if (pls != NULL || newphone_score BETTER_THAN newphone_thresh) {
|
|
for (nexthmm = hmm->next; nexthmm; nexthmm = nexthmm->alt) {
|
|
int32 pl_newphone_score = newphone_score
|
|
+ phone_loop_search_score(pls, nexthmm->ciphone);
|
|
if ((pl_newphone_score BETTER_THAN newphone_thresh)
|
|
&& ((hmm_frame(&nexthmm->hmm) < frame_idx)
|
|
|| (newphone_score
|
|
BETTER_THAN hmm_in_score(&nexthmm->hmm)))) {
|
|
if (hmm_frame(&nexthmm->hmm) != nf) {
|
|
/* Keep this HMM on the active list */
|
|
*(nacl++) = nexthmm;
|
|
}
|
|
hmm_enter(&nexthmm->hmm, newphone_score,
|
|
hmm_out_history(&hmm->hmm), nf);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Transition to last phone of all words for which this is the
|
|
* penultimate phone (the last phones may need multiple right contexts).
|
|
* Remember to remove the temporary newword_penalty.
|
|
*/
|
|
if (pls != NULL || newphone_score BETTER_THAN lastphn_thresh) {
|
|
for (w = hmm->info.penult_phn_wid; w >= 0;
|
|
w = ngs->homophone_set[w]) {
|
|
int32 pl_newphone_score = newphone_score
|
|
+ phone_loop_search_score
|
|
(pls, dict_last_phone(ps_search_dict(ngs),w));
|
|
if (pl_newphone_score BETTER_THAN lastphn_thresh) {
|
|
candp = ngs->lastphn_cand + ngs->n_lastphn_cand;
|
|
ngs->n_lastphn_cand++;
|
|
candp->wid = w;
|
|
candp->score =
|
|
newphone_score - ngs->nwpen;
|
|
candp->bp = hmm_out_history(&hmm->hmm);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else if (hmm_frame(&hmm->hmm) != nf) {
|
|
hmm_clear(&hmm->hmm);
|
|
}
|
|
}
|
|
ngs->n_active_chan[nf & 0x1] = (int)(nacl - ngs->active_chan_list[nf & 0x1]);
|
|
}
|
|
|
|
/*
|
|
* Execute the transition into the last phone for all candidates words emerging from
|
|
* the HMM tree. Attach LM scores to such transitions.
|
|
* (Executed after pruning root and non-root, but before pruning word-chan.)
|
|
*/
|
|
static void
|
|
last_phone_transition(ngram_search_t *ngs, int frame_idx)
|
|
{
|
|
int32 i, j, k, nf, bp, bpend, w;
|
|
lastphn_cand_t *candp;
|
|
int32 *nawl;
|
|
int32 thresh;
|
|
int32 bestscore, dscr;
|
|
chan_t *hmm;
|
|
bptbl_t *bpe;
|
|
int32 n_cand_sf = 0;
|
|
|
|
nf = frame_idx + 1;
|
|
nawl = ngs->active_word_list[nf & 0x1];
|
|
ngs->st.n_lastphn_cand_utt += ngs->n_lastphn_cand;
|
|
|
|
/* For each candidate word (entering its last phone) */
|
|
/* If best LM score and bp for candidate known use it, else sort cands by startfrm */
|
|
for (i = 0, candp = ngs->lastphn_cand; i < ngs->n_lastphn_cand; i++, candp++) {
|
|
int32 start_score;
|
|
|
|
/* This can happen if recognition fails. */
|
|
if (candp->bp == -1)
|
|
continue;
|
|
/* Backpointer entry for it. */
|
|
bpe = &(ngs->bp_table[candp->bp]);
|
|
|
|
/* Subtract starting score for candidate, leave it with only word score */
|
|
start_score = ngram_search_exit_score
|
|
(ngs, bpe, dict_first_phone(ps_search_dict(ngs), candp->wid));
|
|
assert(start_score BETTER_THAN WORST_SCORE);
|
|
candp->score -= start_score;
|
|
|
|
/*
|
|
* If this candidate not occurred in an earlier frame, prepare for finding
|
|
* best transition score into last phone; sort by start frame.
|
|
*/
|
|
/* i.e. if we don't have an entry in last_ltrans for this
|
|
* <word,sf>, then create one */
|
|
if (ngs->last_ltrans[candp->wid].sf != bpe->frame + 1) {
|
|
/* Look for an entry in cand_sf matching the backpointer
|
|
* for this candidate. */
|
|
for (j = 0; j < n_cand_sf; j++) {
|
|
if (ngs->cand_sf[j].bp_ef == bpe->frame)
|
|
break;
|
|
}
|
|
/* Oh, we found one, so chain onto it. */
|
|
if (j < n_cand_sf)
|
|
candp->next = ngs->cand_sf[j].cand;
|
|
else {
|
|
/* Nope, let's make a new one, allocating cand_sf if necessary. */
|
|
if (n_cand_sf >= ngs->cand_sf_alloc) {
|
|
if (ngs->cand_sf_alloc == 0) {
|
|
ngs->cand_sf =
|
|
ckd_calloc(CAND_SF_ALLOCSIZE,
|
|
sizeof(*ngs->cand_sf));
|
|
ngs->cand_sf_alloc = CAND_SF_ALLOCSIZE;
|
|
}
|
|
else {
|
|
ngs->cand_sf_alloc += CAND_SF_ALLOCSIZE;
|
|
ngs->cand_sf = ckd_realloc(ngs->cand_sf,
|
|
ngs->cand_sf_alloc
|
|
* sizeof(*ngs->cand_sf));
|
|
E_INFO("cand_sf[] increased to %d entries\n",
|
|
ngs->cand_sf_alloc);
|
|
}
|
|
}
|
|
|
|
/* Use the newly created cand_sf. */
|
|
j = n_cand_sf++;
|
|
candp->next = -1; /* End of the chain. */
|
|
ngs->cand_sf[j].bp_ef = bpe->frame;
|
|
}
|
|
/* Update it to point to this candidate. */
|
|
ngs->cand_sf[j].cand = i;
|
|
|
|
ngs->last_ltrans[candp->wid].dscr = WORST_SCORE;
|
|
ngs->last_ltrans[candp->wid].sf = bpe->frame + 1;
|
|
}
|
|
}
|
|
|
|
/* Compute best LM score and bp for new cands entered in the sorted lists above */
|
|
for (i = 0; i < n_cand_sf; i++) {
|
|
/* For the i-th unique end frame... */
|
|
bp = ngs->bp_table_idx[ngs->cand_sf[i].bp_ef];
|
|
bpend = ngs->bp_table_idx[ngs->cand_sf[i].bp_ef + 1];
|
|
for (bpe = &(ngs->bp_table[bp]); bp < bpend; bp++, bpe++) {
|
|
if (!bpe->valid)
|
|
continue;
|
|
/* For each candidate at the start frame find bp->cand transition-score */
|
|
for (j = ngs->cand_sf[i].cand; j >= 0; j = candp->next) {
|
|
int32 n_used;
|
|
candp = &(ngs->lastphn_cand[j]);
|
|
dscr =
|
|
ngram_search_exit_score
|
|
(ngs, bpe, dict_first_phone(ps_search_dict(ngs), candp->wid));
|
|
if (dscr BETTER_THAN WORST_SCORE) {
|
|
assert(!dict_filler_word(ps_search_dict(ngs), candp->wid));
|
|
dscr += ngram_tg_score(ngs->lmset,
|
|
dict_basewid(ps_search_dict(ngs), candp->wid),
|
|
bpe->real_wid,
|
|
bpe->prev_real_wid,
|
|
&n_used)>>SENSCR_SHIFT;
|
|
}
|
|
|
|
if (dscr BETTER_THAN ngs->last_ltrans[candp->wid].dscr) {
|
|
ngs->last_ltrans[candp->wid].dscr = dscr;
|
|
ngs->last_ltrans[candp->wid].bp = bp;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Update best transitions for all candidates; also update best lastphone score */
|
|
bestscore = ngs->last_phone_best_score;
|
|
for (i = 0, candp = ngs->lastphn_cand; i < ngs->n_lastphn_cand; i++, candp++) {
|
|
candp->score += ngs->last_ltrans[candp->wid].dscr;
|
|
candp->bp = ngs->last_ltrans[candp->wid].bp;
|
|
|
|
if (candp->score BETTER_THAN bestscore)
|
|
bestscore = candp->score;
|
|
}
|
|
ngs->last_phone_best_score = bestscore;
|
|
|
|
/* At this pt, we know the best entry score (with LM component) for all candidates */
|
|
thresh = bestscore + ngs->lponlybeam;
|
|
for (i = ngs->n_lastphn_cand, candp = ngs->lastphn_cand; i > 0; --i, candp++) {
|
|
if (candp->score BETTER_THAN thresh) {
|
|
w = candp->wid;
|
|
|
|
ngram_search_alloc_all_rc(ngs, w);
|
|
|
|
k = 0;
|
|
for (hmm = ngs->word_chan[w]; hmm; hmm = hmm->next) {
|
|
if ((hmm_frame(&hmm->hmm) < frame_idx)
|
|
|| (candp->score BETTER_THAN hmm_in_score(&hmm->hmm))) {
|
|
assert(hmm_frame(&hmm->hmm) != nf);
|
|
hmm_enter(&hmm->hmm,
|
|
candp->score, candp->bp, nf);
|
|
k++;
|
|
}
|
|
}
|
|
if (k > 0) {
|
|
assert(bitvec_is_clear(ngs->word_active, w));
|
|
assert(!dict_is_single_phone(ps_search_dict(ngs), w));
|
|
*(nawl++) = w;
|
|
bitvec_set(ngs->word_active, w);
|
|
}
|
|
}
|
|
}
|
|
ngs->n_active_word[nf & 0x1] = (int)(nawl - ngs->active_word_list[nf & 0x1]);
|
|
}
|
|
|
|
/*
|
|
* Prune currently active word channels for next frame. Also, perform exit
|
|
* transitions out of such channels and active successors.
|
|
*/
|
|
static void
|
|
prune_word_chan(ngram_search_t *ngs, int frame_idx)
|
|
{
|
|
root_chan_t *rhmm;
|
|
chan_t *hmm, *thmm;
|
|
chan_t **phmmp; /* previous HMM-pointer */
|
|
int32 nf, w, i, k;
|
|
int32 newword_thresh, lastphn_thresh;
|
|
int32 *awl, *nawl;
|
|
|
|
nf = frame_idx + 1;
|
|
newword_thresh = ngs->last_phone_best_score + ngs->wbeam;
|
|
lastphn_thresh = ngs->last_phone_best_score + ngs->lponlybeam;
|
|
|
|
awl = ngs->active_word_list[frame_idx & 0x1];
|
|
nawl = ngs->active_word_list[nf & 0x1] + ngs->n_active_word[nf & 0x1];
|
|
|
|
/* Dynamically allocated last channels of multi-phone words */
|
|
for (i = ngs->n_active_word[frame_idx & 0x1], w = *(awl++); i > 0;
|
|
--i, w = *(awl++)) {
|
|
k = 0;
|
|
phmmp = &(ngs->word_chan[w]);
|
|
for (hmm = ngs->word_chan[w]; hmm; hmm = thmm) {
|
|
assert(hmm_frame(&hmm->hmm) >= frame_idx);
|
|
|
|
thmm = hmm->next;
|
|
if (hmm_bestscore(&hmm->hmm) BETTER_THAN lastphn_thresh) {
|
|
/* retain this channel in next frame */
|
|
hmm_frame(&hmm->hmm) = nf;
|
|
k++;
|
|
phmmp = &(hmm->next);
|
|
|
|
/* Could if ((! skip_alt_frm) || (frame_idx & 0x1)) the following */
|
|
if (hmm_out_score(&hmm->hmm) BETTER_THAN newword_thresh) {
|
|
/* can exit channel and recognize word */
|
|
ngram_search_save_bp(ngs, frame_idx, w,
|
|
hmm_out_score(&hmm->hmm),
|
|
hmm_out_history(&hmm->hmm),
|
|
hmm->info.rc_id);
|
|
}
|
|
}
|
|
else if (hmm_frame(&hmm->hmm) == nf) {
|
|
phmmp = &(hmm->next);
|
|
}
|
|
else {
|
|
hmm_deinit(&hmm->hmm);
|
|
listelem_free(ngs->chan_alloc, hmm);
|
|
*phmmp = thmm;
|
|
}
|
|
}
|
|
if ((k > 0) && (bitvec_is_clear(ngs->word_active, w))) {
|
|
assert(!dict_is_single_phone(ps_search_dict(ngs), w));
|
|
*(nawl++) = w;
|
|
bitvec_set(ngs->word_active, w);
|
|
}
|
|
}
|
|
ngs->n_active_word[nf & 0x1] = (int)(nawl - ngs->active_word_list[nf & 0x1]);
|
|
|
|
/*
|
|
* Prune permanently allocated single-phone channels.
|
|
* NOTES: score[] of pruned channels set to WORST_SCORE elsewhere.
|
|
*/
|
|
for (i = 0; i < ngs->n_1ph_words; i++) {
|
|
w = ngs->single_phone_wid[i];
|
|
rhmm = (root_chan_t *) ngs->word_chan[w];
|
|
E_DEBUG(3,("Single phone word %s frame %d score %d thresh %d outscore %d nwthresh %d\n",
|
|
dict_wordstr(ps_search_dict(ngs),w),
|
|
hmm_frame(&rhmm->hmm), hmm_bestscore(&rhmm->hmm),
|
|
lastphn_thresh, hmm_out_score(&rhmm->hmm), newword_thresh));
|
|
if (hmm_frame(&rhmm->hmm) < frame_idx)
|
|
continue;
|
|
if (hmm_bestscore(&rhmm->hmm) BETTER_THAN lastphn_thresh) {
|
|
hmm_frame(&rhmm->hmm) = nf;
|
|
|
|
/* Could if ((! skip_alt_frm) || (frame_idx & 0x1)) the following */
|
|
if (hmm_out_score(&rhmm->hmm) BETTER_THAN newword_thresh) {
|
|
E_DEBUG(4,("Exiting single phone word %s with %d > %d, %d\n",
|
|
dict_wordstr(ps_search_dict(ngs),w),
|
|
hmm_out_score(&rhmm->hmm),
|
|
lastphn_thresh, newword_thresh));
|
|
ngram_search_save_bp(ngs, frame_idx, w,
|
|
hmm_out_score(&rhmm->hmm),
|
|
hmm_out_history(&rhmm->hmm), 0);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
prune_channels(ngram_search_t *ngs, int frame_idx)
|
|
{
|
|
/* Clear last phone candidate list. */
|
|
ngs->n_lastphn_cand = 0;
|
|
/* Set the dynamic beam based on maxhmmpf here. */
|
|
ngs->dynamic_beam = ngs->beam;
|
|
if (ngs->maxhmmpf != -1
|
|
&& ngs->st.n_root_chan_eval + ngs->st.n_nonroot_chan_eval > ngs->maxhmmpf) {
|
|
/* Build a histogram to approximately prune them. */
|
|
int32 bins[256], bw, nhmms, i;
|
|
root_chan_t *rhmm;
|
|
chan_t **acl, *hmm;
|
|
|
|
/* Bins go from zero (best score) to edge of beam. */
|
|
bw = -ngs->beam / 256;
|
|
memset(bins, 0, sizeof(bins));
|
|
/* For each active root channel. */
|
|
for (i = 0, rhmm = ngs->root_chan; i < ngs->n_root_chan; i++, rhmm++) {
|
|
int32 b;
|
|
|
|
/* Put it in a bin according to its bestscore. */
|
|
b = (ngs->best_score - hmm_bestscore(&rhmm->hmm)) / bw;
|
|
if (b >= 256)
|
|
b = 255;
|
|
++bins[b];
|
|
}
|
|
/* For each active non-root channel. */
|
|
acl = ngs->active_chan_list[frame_idx & 0x1]; /* currently active HMMs in tree */
|
|
for (i = ngs->n_active_chan[frame_idx & 0x1], hmm = *(acl++);
|
|
i > 0; --i, hmm = *(acl++)) {
|
|
int32 b;
|
|
|
|
/* Put it in a bin according to its bestscore. */
|
|
b = (ngs->best_score - hmm_bestscore(&hmm->hmm)) / bw;
|
|
if (b >= 256)
|
|
b = 255;
|
|
++bins[b];
|
|
}
|
|
/* Walk down the bins to find the new beam. */
|
|
for (i = nhmms = 0; i < 256; ++i) {
|
|
nhmms += bins[i];
|
|
if (nhmms > ngs->maxhmmpf)
|
|
break;
|
|
}
|
|
ngs->dynamic_beam = -(i * bw);
|
|
}
|
|
|
|
prune_root_chan(ngs, frame_idx);
|
|
prune_nonroot_chan(ngs, frame_idx);
|
|
last_phone_transition(ngs, frame_idx);
|
|
prune_word_chan(ngs, frame_idx);
|
|
}
|
|
|
|
/*
|
|
* Limit the number of word exits in each frame to maxwpf. And also limit the number of filler
|
|
* words to 1.
|
|
*/
|
|
static void
|
|
bptable_maxwpf(ngram_search_t *ngs, int frame_idx)
|
|
{
|
|
int32 bp, n;
|
|
int32 bestscr, worstscr;
|
|
bptbl_t *bpe, *bestbpe, *worstbpe;
|
|
|
|
/* Don't prune if no pruing. */
|
|
if (ngs->maxwpf == -1 || ngs->maxwpf == ps_search_n_words(ngs))
|
|
return;
|
|
|
|
/* Allow only one filler word exit (the best) per frame */
|
|
bestscr = (int32) 0x80000000;
|
|
bestbpe = NULL;
|
|
n = 0;
|
|
for (bp = ngs->bp_table_idx[frame_idx]; bp < ngs->bpidx; bp++) {
|
|
bpe = &(ngs->bp_table[bp]);
|
|
if (dict_filler_word(ps_search_dict(ngs), bpe->wid)) {
|
|
if (bpe->score BETTER_THAN bestscr) {
|
|
bestscr = bpe->score;
|
|
bestbpe = bpe;
|
|
}
|
|
bpe->valid = FALSE;
|
|
n++; /* No. of filler words */
|
|
}
|
|
}
|
|
/* Restore bestbpe to valid state */
|
|
if (bestbpe != NULL) {
|
|
bestbpe->valid = TRUE;
|
|
--n;
|
|
}
|
|
|
|
/* Allow up to maxwpf best entries to survive; mark the remaining with valid = 0 */
|
|
n = (ngs->bpidx
|
|
- ngs->bp_table_idx[frame_idx]) - n; /* No. of entries after limiting fillers */
|
|
for (; n > ngs->maxwpf; --n) {
|
|
/* Find worst BPTable entry */
|
|
worstscr = (int32) 0x7fffffff;
|
|
worstbpe = NULL;
|
|
for (bp = ngs->bp_table_idx[frame_idx]; (bp < ngs->bpidx); bp++) {
|
|
bpe = &(ngs->bp_table[bp]);
|
|
if (bpe->valid && (bpe->score WORSE_THAN worstscr)) {
|
|
worstscr = bpe->score;
|
|
worstbpe = bpe;
|
|
}
|
|
}
|
|
/* FIXME: Don't panic! */
|
|
if (worstbpe == NULL)
|
|
E_FATAL("PANIC: No worst BPtable entry remaining\n");
|
|
worstbpe->valid = FALSE;
|
|
}
|
|
}
|
|
|
|
static void
|
|
word_transition(ngram_search_t *ngs, int frame_idx)
|
|
{
|
|
int32 i, k, bp, w, nf;
|
|
int32 rc;
|
|
int32 thresh, newscore, pl_newscore;
|
|
bptbl_t *bpe;
|
|
root_chan_t *rhmm;
|
|
struct bestbp_rc_s *bestbp_rc_ptr;
|
|
phone_loop_search_t *pls;
|
|
dict_t *dict = ps_search_dict(ngs);
|
|
dict2pid_t *d2p = ps_search_dict2pid(ngs);
|
|
|
|
/*
|
|
* Transition to start of new word instances (HMM tree roots); but only if words
|
|
* other than </s> finished here.
|
|
* But, first, find the best starting score for each possible right context phone.
|
|
*/
|
|
for (i = bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef) - 1; i >= 0; --i)
|
|
ngs->bestbp_rc[i].score = WORST_SCORE;
|
|
k = 0;
|
|
pls = (phone_loop_search_t *)ps_search_lookahead(ngs);
|
|
/* Ugh, this is complicated. Scan all word exits for this frame
|
|
* (they have already been created by prune_word_chan()). */
|
|
for (bp = ngs->bp_table_idx[frame_idx]; bp < ngs->bpidx; bp++) {
|
|
bpe = &(ngs->bp_table[bp]);
|
|
ngs->word_lat_idx[bpe->wid] = NO_BP;
|
|
|
|
if (bpe->wid == ps_search_finish_wid(ngs))
|
|
continue;
|
|
k++;
|
|
|
|
/* DICT2PID */
|
|
/* Array of HMM scores corresponding to all the possible right
|
|
* context expansions of the final phone. It's likely that a
|
|
* lot of these are going to be missing, actually. */
|
|
if (bpe->last2_phone == -1) { /* implies s_idx == -1 */
|
|
/* No right context expansion. */
|
|
for (rc = 0; rc < bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef); ++rc) {
|
|
if (bpe->score BETTER_THAN ngs->bestbp_rc[rc].score) {
|
|
E_DEBUG(4,("bestbp_rc[0] = %d lc %d\n",
|
|
bpe->score, bpe->last_phone));
|
|
ngs->bestbp_rc[rc].score = bpe->score;
|
|
ngs->bestbp_rc[rc].path = bp;
|
|
ngs->bestbp_rc[rc].lc = bpe->last_phone;
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
xwdssid_t *rssid = dict2pid_rssid(d2p, bpe->last_phone, bpe->last2_phone);
|
|
int32 *rcss = &(ngs->bscore_stack[bpe->s_idx]);
|
|
for (rc = 0; rc < bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef); ++rc) {
|
|
if (rcss[rssid->cimap[rc]] BETTER_THAN ngs->bestbp_rc[rc].score) {
|
|
E_DEBUG(4,("bestbp_rc[%d] = %d lc %d\n",
|
|
rc, rcss[rssid->cimap[rc]], bpe->last_phone));
|
|
ngs->bestbp_rc[rc].score = rcss[rssid->cimap[rc]];
|
|
ngs->bestbp_rc[rc].path = bp;
|
|
ngs->bestbp_rc[rc].lc = bpe->last_phone;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (k == 0)
|
|
return;
|
|
|
|
nf = frame_idx + 1;
|
|
thresh = ngs->best_score + ngs->dynamic_beam;
|
|
/*
|
|
* Hypothesize successors to words finished in this frame.
|
|
* Main dictionary, multi-phone words transition to HMM-trees roots.
|
|
*/
|
|
for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
|
|
bestbp_rc_ptr = &(ngs->bestbp_rc[rhmm->ciphone]);
|
|
|
|
newscore = bestbp_rc_ptr->score + ngs->nwpen + ngs->pip;
|
|
pl_newscore = newscore
|
|
+ phone_loop_search_score(pls, rhmm->ciphone);
|
|
if (pl_newscore BETTER_THAN thresh) {
|
|
if ((hmm_frame(&rhmm->hmm) < frame_idx)
|
|
|| (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
|
|
hmm_enter(&rhmm->hmm, newscore,
|
|
bestbp_rc_ptr->path, nf);
|
|
/* DICT2PID: Another place where mpx ssids are entered. */
|
|
/* Look up the ssid to use when entering this mpx triphone. */
|
|
hmm_mpx_ssid(&rhmm->hmm, 0) =
|
|
dict2pid_ldiph_lc(d2p, rhmm->ciphone, rhmm->ci2phone, bestbp_rc_ptr->lc);
|
|
assert(hmm_mpx_ssid(&rhmm->hmm, 0) != BAD_SSID);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Single phone words; no right context for these. Cannot use bestbp_rc as
|
|
* LM scores have to be included. First find best transition to these words.
|
|
*/
|
|
for (i = 0; i < ngs->n_1ph_LMwords; i++) {
|
|
w = ngs->single_phone_wid[i];
|
|
ngs->last_ltrans[w].dscr = (int32) 0x80000000;
|
|
}
|
|
for (bp = ngs->bp_table_idx[frame_idx]; bp < ngs->bpidx; bp++) {
|
|
bpe = &(ngs->bp_table[bp]);
|
|
if (!bpe->valid)
|
|
continue;
|
|
|
|
for (i = 0; i < ngs->n_1ph_LMwords; i++) {
|
|
int32 n_used;
|
|
w = ngs->single_phone_wid[i];
|
|
newscore = ngram_search_exit_score
|
|
(ngs, bpe, dict_first_phone(dict, w));
|
|
E_DEBUG(4, ("initial newscore for %s: %d\n",
|
|
dict_wordstr(dict, w), newscore));
|
|
if (newscore != WORST_SCORE)
|
|
newscore += ngram_tg_score(ngs->lmset,
|
|
dict_basewid(dict, w),
|
|
bpe->real_wid,
|
|
bpe->prev_real_wid,
|
|
&n_used)>>SENSCR_SHIFT;
|
|
|
|
/* FIXME: Not sure how WORST_SCORE could be better, but it
|
|
* apparently happens. */
|
|
if (newscore BETTER_THAN ngs->last_ltrans[w].dscr) {
|
|
ngs->last_ltrans[w].dscr = newscore;
|
|
ngs->last_ltrans[w].bp = bp;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Now transition to in-LM single phone words */
|
|
for (i = 0; i < ngs->n_1ph_LMwords; i++) {
|
|
w = ngs->single_phone_wid[i];
|
|
/* Never transition into the start word (for one thing, it is
|
|
a non-event in the language model.) */
|
|
if (w == dict_startwid(ps_search_dict(ngs)))
|
|
continue;
|
|
rhmm = (root_chan_t *) ngs->word_chan[w];
|
|
newscore = ngs->last_ltrans[w].dscr + ngs->pip;
|
|
pl_newscore = newscore + phone_loop_search_score(pls, rhmm->ciphone);
|
|
if (pl_newscore BETTER_THAN thresh) {
|
|
bpe = ngs->bp_table + ngs->last_ltrans[w].bp;
|
|
if ((hmm_frame(&rhmm->hmm) < frame_idx)
|
|
|| (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
|
|
hmm_enter(&rhmm->hmm,
|
|
newscore, ngs->last_ltrans[w].bp, nf);
|
|
/* DICT2PID: another place where mpx ssids are entered. */
|
|
/* Look up the ssid to use when entering this mpx triphone. */
|
|
hmm_mpx_ssid(&rhmm->hmm, 0) =
|
|
dict2pid_ldiph_lc(d2p, rhmm->ciphone, rhmm->ci2phone,
|
|
dict_last_phone(dict, bpe->wid));
|
|
assert(hmm_mpx_ssid(&rhmm->hmm, 0) != BAD_SSID);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Remaining words: <sil>, noise words. No mpx for these! */
|
|
w = ps_search_silence_wid(ngs);
|
|
rhmm = (root_chan_t *) ngs->word_chan[w];
|
|
bestbp_rc_ptr = &(ngs->bestbp_rc[ps_search_acmod(ngs)->mdef->sil]);
|
|
newscore = bestbp_rc_ptr->score + ngs->silpen + ngs->pip;
|
|
pl_newscore = newscore
|
|
+ phone_loop_search_score(pls, rhmm->ciphone);
|
|
if (pl_newscore BETTER_THAN thresh) {
|
|
if ((hmm_frame(&rhmm->hmm) < frame_idx)
|
|
|| (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
|
|
hmm_enter(&rhmm->hmm,
|
|
newscore, bestbp_rc_ptr->path, nf);
|
|
}
|
|
}
|
|
for (w = dict_filler_start(dict); w <= dict_filler_end(dict); w++) {
|
|
if (w == ps_search_silence_wid(ngs))
|
|
continue;
|
|
/* Never transition into the start word (for one thing, it is
|
|
a non-event in the language model.) */
|
|
if (w == dict_startwid(ps_search_dict(ngs)))
|
|
continue;
|
|
rhmm = (root_chan_t *) ngs->word_chan[w];
|
|
/* If this was not actually a single-phone word, rhmm will be NULL. */
|
|
if (rhmm == NULL)
|
|
continue;
|
|
newscore = bestbp_rc_ptr->score + ngs->fillpen + ngs->pip;
|
|
pl_newscore = newscore
|
|
+ phone_loop_search_score(pls, rhmm->ciphone);
|
|
if (pl_newscore BETTER_THAN thresh) {
|
|
if ((hmm_frame(&rhmm->hmm) < frame_idx)
|
|
|| (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
|
|
hmm_enter(&rhmm->hmm,
|
|
newscore, bestbp_rc_ptr->path, nf);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
deactivate_channels(ngram_search_t *ngs, int frame_idx)
|
|
{
|
|
root_chan_t *rhmm;
|
|
int i;
|
|
|
|
/* Clear score[] of pruned root channels */
|
|
for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
|
|
if (hmm_frame(&rhmm->hmm) == frame_idx) {
|
|
hmm_clear(&rhmm->hmm);
|
|
}
|
|
}
|
|
/* Clear score[] of pruned single-phone channels */
|
|
for (i = 0; i < ngs->n_1ph_words; i++) {
|
|
int32 w = ngs->single_phone_wid[i];
|
|
rhmm = (root_chan_t *) ngs->word_chan[w];
|
|
if (hmm_frame(&rhmm->hmm) == frame_idx) {
|
|
hmm_clear(&rhmm->hmm);
|
|
}
|
|
}
|
|
}
|
|
|
|
int
|
|
ngram_fwdtree_search(ngram_search_t *ngs, int frame_idx)
|
|
{
|
|
int16 const *senscr;
|
|
|
|
/* Activate our HMMs for the current frame if need be. */
|
|
if (!ps_search_acmod(ngs)->compallsen)
|
|
compute_sen_active(ngs, frame_idx);
|
|
|
|
/* Compute GMM scores for the current frame. */
|
|
if ((senscr = acmod_score(ps_search_acmod(ngs), &frame_idx)) == NULL)
|
|
return 0;
|
|
ngs->st.n_senone_active_utt += ps_search_acmod(ngs)->n_senone_active;
|
|
|
|
/* Mark backpointer table for current frame. */
|
|
ngram_search_mark_bptable(ngs, frame_idx);
|
|
|
|
/* If the best score is equal to or worse than WORST_SCORE,
|
|
* recognition has failed, don't bother to keep trying. */
|
|
if (ngs->best_score == WORST_SCORE || ngs->best_score WORSE_THAN WORST_SCORE)
|
|
return 0;
|
|
/* Renormalize if necessary */
|
|
if (ngs->best_score + (2 * ngs->beam) WORSE_THAN WORST_SCORE) {
|
|
E_INFO("Renormalizing Scores at frame %d, best score %d\n",
|
|
frame_idx, ngs->best_score);
|
|
renormalize_scores(ngs, frame_idx, ngs->best_score);
|
|
}
|
|
|
|
/* Evaluate HMMs */
|
|
evaluate_channels(ngs, senscr, frame_idx);
|
|
/* Prune HMMs and do phone transitions. */
|
|
prune_channels(ngs, frame_idx);
|
|
/* Do absolute pruning on word exits. */
|
|
bptable_maxwpf(ngs, frame_idx);
|
|
/* Do word transitions. */
|
|
word_transition(ngs, frame_idx);
|
|
/* Deactivate pruned HMMs. */
|
|
deactivate_channels(ngs, frame_idx);
|
|
|
|
++ngs->n_frame;
|
|
/* Return the number of frames processed. */
|
|
return 1;
|
|
}
|
|
|
|
void
|
|
ngram_fwdtree_finish(ngram_search_t *ngs)
|
|
{
|
|
int32 i, w, cf, *awl;
|
|
root_chan_t *rhmm;
|
|
chan_t *hmm, **acl;
|
|
|
|
/* This is the number of frames processed. */
|
|
cf = ps_search_acmod(ngs)->output_frame;
|
|
/* Add a mark in the backpointer table for one past the final frame. */
|
|
ngram_search_mark_bptable(ngs, cf);
|
|
|
|
/* Deactivate channels lined up for the next frame */
|
|
/* First, root channels of HMM tree */
|
|
for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
|
|
hmm_clear(&rhmm->hmm);
|
|
}
|
|
|
|
/* nonroot channels of HMM tree */
|
|
i = ngs->n_active_chan[cf & 0x1];
|
|
acl = ngs->active_chan_list[cf & 0x1];
|
|
for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
|
|
hmm_clear(&hmm->hmm);
|
|
}
|
|
|
|
/* word channels */
|
|
i = ngs->n_active_word[cf & 0x1];
|
|
awl = ngs->active_word_list[cf & 0x1];
|
|
for (w = *(awl++); i > 0; --i, w = *(awl++)) {
|
|
/* Don't accidentally free single-phone words! */
|
|
if (dict_is_single_phone(ps_search_dict(ngs), w))
|
|
continue;
|
|
bitvec_clear(ngs->word_active, w);
|
|
if (ngs->word_chan[w] == NULL)
|
|
continue;
|
|
ngram_search_free_all_rc(ngs, w);
|
|
}
|
|
|
|
/*
|
|
* The previous search code did a postprocessing of the
|
|
* backpointer table here, but we will postpone this until it is
|
|
* absolutely necessary, i.e. when generating a word graph.
|
|
* Likewise we don't actually have to decide what the exit word is
|
|
* until somebody requests a backtrace.
|
|
*/
|
|
|
|
ptmr_stop(&ngs->fwdtree_perf);
|
|
/* Print out some statistics. */
|
|
if (cf > 0) {
|
|
double n_speech = (double)(cf + 1)
|
|
/ cmd_ln_int32_r(ps_search_config(ngs), "-frate");
|
|
E_INFO("%8d words recognized (%d/fr)\n",
|
|
ngs->bpidx, (ngs->bpidx + (cf >> 1)) / (cf + 1));
|
|
E_INFO("%8d senones evaluated (%d/fr)\n", ngs->st.n_senone_active_utt,
|
|
(ngs->st.n_senone_active_utt + (cf >> 1)) / (cf + 1));
|
|
E_INFO("%8d channels searched (%d/fr), %d 1st, %d last\n",
|
|
ngs->st.n_root_chan_eval + ngs->st.n_nonroot_chan_eval,
|
|
(ngs->st.n_root_chan_eval + ngs->st.n_nonroot_chan_eval) / (cf + 1),
|
|
ngs->st.n_root_chan_eval, ngs->st.n_last_chan_eval);
|
|
E_INFO("%8d words for which last channels evaluated (%d/fr)\n",
|
|
ngs->st.n_word_lastchan_eval,
|
|
ngs->st.n_word_lastchan_eval / (cf + 1));
|
|
E_INFO("%8d candidate words for entering last phone (%d/fr)\n",
|
|
ngs->st.n_lastphn_cand_utt, ngs->st.n_lastphn_cand_utt / (cf + 1));
|
|
E_INFO("fwdtree %.2f CPU %.3f xRT\n",
|
|
ngs->fwdtree_perf.t_cpu,
|
|
ngs->fwdtree_perf.t_cpu / n_speech);
|
|
E_INFO("fwdtree %.2f wall %.3f xRT\n",
|
|
ngs->fwdtree_perf.t_elapsed,
|
|
ngs->fwdtree_perf.t_elapsed / n_speech);
|
|
}
|
|
/* dump_bptable(ngs); */
|
|
}
|