tenfourfox/media/pocketsphinx/src/acmod.c
Cameron Kaiser c9b2922b70 hello FPR
2017-04-19 00:56:45 -07:00

1378 lines
41 KiB
C

/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
/* ====================================================================
* Copyright (c) 2008 Carnegie Mellon University. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* This work was supported in part by funding from the Defense Advanced
* Research Projects Agency and the National Science Foundation of the
* United States of America, and the CMU Sphinx Speech Consortium.
*
* THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
* ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
* NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* ====================================================================
*
*/
/**
* @file acmod.c Acoustic model structures for PocketSphinx.
* @author David Huggins-Daines <dhuggins@cs.cmu.edu>
*/
/* System headers. */
#include <assert.h>
#include <string.h>
#include <math.h>
/* SphinxBase headers. */
#include <sphinxbase/prim_type.h>
#include <sphinxbase/err.h>
#include <sphinxbase/cmd_ln.h>
#include <sphinxbase/strfuncs.h>
#include <sphinxbase/byteorder.h>
#include <sphinxbase/feat.h>
#include <sphinxbase/bio.h>
/* Local headers. */
#include "cmdln_macro.h"
#include "acmod.h"
#include "s2_semi_mgau.h"
#include "ptm_mgau.h"
#include "ms_mgau.h"
/* Feature and front-end parameters that may be in feat.params */
static const arg_t feat_defn[] = {
waveform_to_cepstral_command_line_macro(),
cepstral_to_feature_command_line_macro(),
CMDLN_EMPTY_OPTION
};
#ifndef WORDS_BIGENDIAN
#define WORDS_BIGENDIAN 1
#endif
static int32 acmod_process_mfcbuf(acmod_t *acmod);
static int
acmod_init_am(acmod_t *acmod)
{
char const *mdeffn, *tmatfn, *mllrfn, *hmmdir;
/* Read model definition. */
if ((mdeffn = cmd_ln_str_r(acmod->config, "-mdef")) == NULL) {
if ((hmmdir = cmd_ln_str_r(acmod->config, "-hmm")) == NULL)
E_ERROR("Acoustic model definition is not specified either "
"with -mdef option or with -hmm\n");
else
E_ERROR("Folder '%s' does not contain acoustic model "
"definition 'mdef'\n", hmmdir);
return -1;
}
if ((acmod->mdef = bin_mdef_read(acmod->config, mdeffn)) == NULL) {
E_ERROR("Failed to read acoustic model definition from %s\n", mdeffn);
return -1;
}
/* Read transition matrices. */
if ((tmatfn = cmd_ln_str_r(acmod->config, "-tmat")) == NULL) {
E_ERROR("No tmat file specified\n");
return -1;
}
acmod->tmat = tmat_init(tmatfn, acmod->lmath,
cmd_ln_float32_r(acmod->config, "-tmatfloor"),
TRUE);
/* Read the acoustic models. */
if ((cmd_ln_str_r(acmod->config, "-mean") == NULL)
|| (cmd_ln_str_r(acmod->config, "-var") == NULL)
|| (cmd_ln_str_r(acmod->config, "-tmat") == NULL)) {
E_ERROR("No mean/var/tmat files specified\n");
return -1;
}
if (cmd_ln_str_r(acmod->config, "-senmgau")) {
E_INFO("Using general multi-stream GMM computation\n");
acmod->mgau = ms_mgau_init(acmod, acmod->lmath, acmod->mdef);
if (acmod->mgau == NULL)
return -1;
}
else {
E_INFO("Attempting to use PTM computation module\n");
if ((acmod->mgau = ptm_mgau_init(acmod, acmod->mdef)) == NULL) {
E_INFO("Attempting to use semi-continuous computation module\n");
if ((acmod->mgau = s2_semi_mgau_init(acmod)) == NULL) {
E_INFO("Falling back to general multi-stream GMM computation\n");
acmod->mgau = ms_mgau_init(acmod, acmod->lmath, acmod->mdef);
if (acmod->mgau == NULL)
return -1;
}
}
}
/* If there is an MLLR transform, apply it. */
if ((mllrfn = cmd_ln_str_r(acmod->config, "-mllr"))) {
ps_mllr_t *mllr = ps_mllr_read(mllrfn);
if (mllr == NULL)
return -1;
acmod_update_mllr(acmod, mllr);
}
return 0;
}
static int
acmod_init_feat(acmod_t *acmod)
{
acmod->fcb =
feat_init(cmd_ln_str_r(acmod->config, "-feat"),
cmn_type_from_str(cmd_ln_str_r(acmod->config,"-cmn")),
cmd_ln_boolean_r(acmod->config, "-varnorm"),
agc_type_from_str(cmd_ln_str_r(acmod->config, "-agc")),
1, cmd_ln_int32_r(acmod->config, "-ceplen"));
if (acmod->fcb == NULL)
return -1;
if (cmd_ln_str_r(acmod->config, "-lda")) {
E_INFO("Reading linear feature transformation from %s\n",
cmd_ln_str_r(acmod->config, "-lda"));
if (feat_read_lda(acmod->fcb,
cmd_ln_str_r(acmod->config, "-lda"),
cmd_ln_int32_r(acmod->config, "-ldadim")) < 0)
return -1;
}
if (cmd_ln_str_r(acmod->config, "-svspec")) {
int32 **subvecs;
E_INFO("Using subvector specification %s\n",
cmd_ln_str_r(acmod->config, "-svspec"));
if ((subvecs = parse_subvecs(cmd_ln_str_r(acmod->config, "-svspec"))) == NULL)
return -1;
if ((feat_set_subvecs(acmod->fcb, subvecs)) < 0)
return -1;
}
if (cmd_ln_exists_r(acmod->config, "-agcthresh")
&& 0 != strcmp(cmd_ln_str_r(acmod->config, "-agc"), "none")) {
agc_set_threshold(acmod->fcb->agc_struct,
cmd_ln_float32_r(acmod->config, "-agcthresh"));
}
if (acmod->fcb->cmn_struct
&& cmd_ln_exists_r(acmod->config, "-cmninit")) {
char *c, *cc, *vallist;
int32 nvals;
vallist = ckd_salloc(cmd_ln_str_r(acmod->config, "-cmninit"));
c = vallist;
nvals = 0;
while (nvals < acmod->fcb->cmn_struct->veclen
&& (cc = strchr(c, ',')) != NULL) {
*cc = '\0';
acmod->fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof_c(c));
c = cc + 1;
++nvals;
}
if (nvals < acmod->fcb->cmn_struct->veclen && *c != '\0') {
acmod->fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof_c(c));
}
ckd_free(vallist);
}
return 0;
}
int
acmod_fe_mismatch(acmod_t *acmod, fe_t *fe)
{
/* Output vector dimension needs to be the same. */
if (cmd_ln_int32_r(acmod->config, "-ceplen") != fe_get_output_size(fe)) {
E_ERROR("Configured feature length %d doesn't match feature "
"extraction output size %d\n",
cmd_ln_int32_r(acmod->config, "-ceplen"),
fe_get_output_size(fe));
return TRUE;
}
/* Feature parameters need to be the same. */
/* ... */
return FALSE;
}
int
acmod_feat_mismatch(acmod_t *acmod, feat_t *fcb)
{
/* Feature type needs to be the same. */
if (0 != strcmp(cmd_ln_str_r(acmod->config, "-feat"), feat_name(fcb)))
return TRUE;
/* Input vector dimension needs to be the same. */
if (cmd_ln_int32_r(acmod->config, "-ceplen") != feat_cepsize(fcb))
return TRUE;
/* FIXME: Need to check LDA and stuff too. */
return FALSE;
}
acmod_t *
acmod_init(cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb)
{
acmod_t *acmod;
char const *featparams;
acmod = ckd_calloc(1, sizeof(*acmod));
acmod->config = cmd_ln_retain(config);
acmod->lmath = lmath;
acmod->state = ACMOD_IDLE;
/* Look for feat.params in acoustic model dir. */
if ((featparams = cmd_ln_str_r(acmod->config, "-featparams"))) {
if (NULL !=
cmd_ln_parse_file_r(acmod->config, feat_defn, featparams, FALSE))
E_INFO("Parsed model-specific feature parameters from %s\n",
featparams);
}
/* Initialize feature computation. */
if (fe) {
if (acmod_fe_mismatch(acmod, fe))
goto error_out;
fe_retain(fe);
acmod->fe = fe;
}
else {
/* Initialize a new front end. */
acmod->fe = fe_init_auto_r(config);
if (acmod->fe == NULL)
goto error_out;
if (acmod_fe_mismatch(acmod, acmod->fe))
goto error_out;
}
if (fcb) {
if (acmod_feat_mismatch(acmod, fcb))
goto error_out;
feat_retain(fcb);
acmod->fcb = fcb;
}
else {
/* Initialize a new fcb. */
if (acmod_init_feat(acmod) < 0)
goto error_out;
}
/* Load acoustic model parameters. */
if (acmod_init_am(acmod) < 0)
goto error_out;
/* The MFCC buffer needs to be at least as large as the dynamic
* feature window. */
acmod->n_mfc_alloc = acmod->fcb->window_size * 2 + 1;
acmod->mfc_buf = (mfcc_t **)
ckd_calloc_2d(acmod->n_mfc_alloc, acmod->fcb->cepsize,
sizeof(**acmod->mfc_buf));
/* Feature buffer has to be at least as large as MFCC buffer. */
acmod->n_feat_alloc = acmod->n_mfc_alloc + cmd_ln_int32_r(config, "-pl_window");
acmod->feat_buf = feat_array_alloc(acmod->fcb, acmod->n_feat_alloc);
acmod->framepos = ckd_calloc(acmod->n_feat_alloc, sizeof(*acmod->framepos));
acmod->utt_start_frame = 0;
/* Senone computation stuff. */
acmod->senone_scores = ckd_calloc(bin_mdef_n_sen(acmod->mdef),
sizeof(*acmod->senone_scores));
acmod->senone_active_vec = bitvec_alloc(bin_mdef_n_sen(acmod->mdef));
acmod->senone_active = ckd_calloc(bin_mdef_n_sen(acmod->mdef),
sizeof(*acmod->senone_active));
acmod->log_zero = logmath_get_zero(acmod->lmath);
acmod->compallsen = cmd_ln_boolean_r(config, "-compallsen");
return acmod;
error_out:
acmod_free(acmod);
return NULL;
}
void
acmod_free(acmod_t *acmod)
{
if (acmod == NULL)
return;
feat_free(acmod->fcb);
fe_free(acmod->fe);
cmd_ln_free_r(acmod->config);
if (acmod->mfc_buf)
ckd_free_2d((void **)acmod->mfc_buf);
if (acmod->feat_buf)
feat_array_free(acmod->feat_buf);
if (acmod->mfcfh)
fclose(acmod->mfcfh);
if (acmod->rawfh)
fclose(acmod->rawfh);
if (acmod->senfh)
fclose(acmod->senfh);
ckd_free(acmod->framepos);
ckd_free(acmod->senone_scores);
ckd_free(acmod->senone_active_vec);
ckd_free(acmod->senone_active);
ckd_free(acmod->rawdata);
if (acmod->mdef)
bin_mdef_free(acmod->mdef);
if (acmod->tmat)
tmat_free(acmod->tmat);
if (acmod->mgau)
ps_mgau_free(acmod->mgau);
if (acmod->mllr)
ps_mllr_free(acmod->mllr);
ckd_free(acmod);
}
ps_mllr_t *
acmod_update_mllr(acmod_t *acmod, ps_mllr_t *mllr)
{
if (acmod->mllr)
ps_mllr_free(acmod->mllr);
acmod->mllr = mllr;
ps_mgau_transform(acmod->mgau, mllr);
return mllr;
}
int
acmod_write_senfh_header(acmod_t *acmod, FILE *logfh)
{
char nsenstr[64], logbasestr[64];
sprintf(nsenstr, "%d", bin_mdef_n_sen(acmod->mdef));
sprintf(logbasestr, "%f", logmath_get_base(acmod->lmath));
return bio_writehdr(logfh,
"version", "0.1",
"mdef_file", cmd_ln_str_r(acmod->config, "-mdef"),
"n_sen", nsenstr,
"logbase", logbasestr, NULL);
}
int
acmod_set_senfh(acmod_t *acmod, FILE *logfh)
{
if (acmod->senfh)
fclose(acmod->senfh);
acmod->senfh = logfh;
if (logfh == NULL)
return 0;
return acmod_write_senfh_header(acmod, logfh);
}
int
acmod_set_mfcfh(acmod_t *acmod, FILE *logfh)
{
int rv = 0;
if (acmod->mfcfh)
fclose(acmod->mfcfh);
acmod->mfcfh = logfh;
fwrite(&rv, 4, 1, acmod->mfcfh);
return rv;
}
int
acmod_set_rawfh(acmod_t *acmod, FILE *logfh)
{
if (acmod->rawfh)
fclose(acmod->rawfh);
acmod->rawfh = logfh;
return 0;
}
void
acmod_grow_feat_buf(acmod_t *acmod, int nfr)
{
if (nfr > MAX_N_FRAMES)
E_FATAL("Decoder can not process more than %d frames at once, "
"requested %d\n", MAX_N_FRAMES, nfr);
acmod->feat_buf = feat_array_realloc(acmod->fcb, acmod->feat_buf,
acmod->n_feat_alloc, nfr);
acmod->framepos = ckd_realloc(acmod->framepos,
nfr * sizeof(*acmod->framepos));
acmod->n_feat_alloc = nfr;
}
int
acmod_set_grow(acmod_t *acmod, int grow_feat)
{
int tmp = acmod->grow_feat;
acmod->grow_feat = grow_feat;
/* Expand feat_buf to a reasonable size to start with. */
if (grow_feat && acmod->n_feat_alloc < 128)
acmod_grow_feat_buf(acmod, 128);
return tmp;
}
int
acmod_start_utt(acmod_t *acmod)
{
fe_start_utt(acmod->fe);
acmod->state = ACMOD_STARTED;
acmod->n_mfc_frame = 0;
acmod->n_feat_frame = 0;
acmod->mfc_outidx = 0;
acmod->feat_outidx = 0;
acmod->output_frame = 0;
acmod->senscr_frame = -1;
acmod->n_senone_active = 0;
acmod->mgau->frame_idx = 0;
acmod->rawdata_pos = 0;
return 0;
}
int
acmod_end_utt(acmod_t *acmod)
{
int32 nfr = 0;
acmod->state = ACMOD_ENDED;
if (acmod->n_mfc_frame < acmod->n_mfc_alloc) {
int inptr;
/* Where to start writing them (circular buffer) */
inptr = (acmod->mfc_outidx + acmod->n_mfc_frame) % acmod->n_mfc_alloc;
/* nfr is always either zero or one. */
fe_end_utt(acmod->fe, acmod->mfc_buf[inptr], &nfr);
acmod->n_mfc_frame += nfr;
/* Process whatever's left, and any leadout or update stats if needed. */
if (nfr)
nfr = acmod_process_mfcbuf(acmod);
else
feat_update_stats(acmod->fcb);
}
if (acmod->mfcfh) {
long outlen;
int32 rv;
outlen = (ftell(acmod->mfcfh) - 4) / 4;
if (!WORDS_BIGENDIAN)
SWAP_INT32(&outlen);
/* Try to seek and write */
if ((rv = fseek(acmod->mfcfh, 0, SEEK_SET)) == 0) {
fwrite(&outlen, 4, 1, acmod->mfcfh);
}
fclose(acmod->mfcfh);
acmod->mfcfh = NULL;
}
if (acmod->rawfh) {
fclose(acmod->rawfh);
acmod->rawfh = NULL;
}
if (acmod->senfh) {
fclose(acmod->senfh);
acmod->senfh = NULL;
}
return nfr;
}
static int
acmod_log_mfc(acmod_t *acmod,
mfcc_t **cep, int n_frames)
{
int i, n;
int32 *ptr = (int32 *)cep[0];
n = n_frames * feat_cepsize(acmod->fcb);
/* Swap bytes. */
if (!WORDS_BIGENDIAN) {
for (i = 0; i < (n * sizeof(mfcc_t)); ++i) {
SWAP_INT32(ptr + i);
}
}
/* Write features. */
if (fwrite(cep[0], sizeof(mfcc_t), n, acmod->mfcfh) != n) {
E_ERROR_SYSTEM("Failed to write %d values to log file", n);
}
/* Swap them back. */
if (!WORDS_BIGENDIAN) {
for (i = 0; i < (n * sizeof(mfcc_t)); ++i) {
SWAP_INT32(ptr + i);
}
}
return 0;
}
static int
acmod_process_full_cep(acmod_t *acmod,
mfcc_t ***inout_cep,
int *inout_n_frames)
{
int32 nfr;
/* Write to log file. */
if (acmod->mfcfh)
acmod_log_mfc(acmod, *inout_cep, *inout_n_frames);
/* Resize feat_buf to fit. */
if (acmod->n_feat_alloc < *inout_n_frames) {
if (*inout_n_frames > MAX_N_FRAMES)
E_FATAL("Batch processing can not process more than %d frames "
"at once, requested %d\n", MAX_N_FRAMES, *inout_n_frames);
feat_array_free(acmod->feat_buf);
acmod->feat_buf = feat_array_alloc(acmod->fcb, *inout_n_frames);
acmod->n_feat_alloc = *inout_n_frames;
acmod->n_feat_frame = 0;
acmod->feat_outidx = 0;
}
/* Make dynamic features. */
nfr = feat_s2mfc2feat_live(acmod->fcb, *inout_cep, inout_n_frames,
TRUE, TRUE, acmod->feat_buf);
acmod->n_feat_frame = nfr;
assert(acmod->n_feat_frame <= acmod->n_feat_alloc);
*inout_cep += *inout_n_frames;
*inout_n_frames = 0;
return nfr;
}
static int
acmod_process_full_raw(acmod_t *acmod,
int16 const **inout_raw,
size_t *inout_n_samps)
{
int32 nfr, ntail;
mfcc_t **cepptr;
/* Write to logging file if any. */
if (*inout_n_samps + acmod->rawdata_pos < acmod->rawdata_size) {
memcpy(acmod->rawdata + acmod->rawdata_pos, *inout_raw, *inout_n_samps * sizeof(int16));
acmod->rawdata_pos += *inout_n_samps;
}
if (acmod->rawfh)
fwrite(*inout_raw, sizeof(int16), *inout_n_samps, acmod->rawfh);
/* Resize mfc_buf to fit. */
if (fe_process_frames(acmod->fe, NULL, inout_n_samps, NULL, &nfr, NULL) < 0)
return -1;
if (acmod->n_mfc_alloc < nfr + 1) {
ckd_free_2d(acmod->mfc_buf);
acmod->mfc_buf = ckd_calloc_2d(nfr + 1, fe_get_output_size(acmod->fe),
sizeof(**acmod->mfc_buf));
acmod->n_mfc_alloc = nfr + 1;
}
acmod->n_mfc_frame = 0;
acmod->mfc_outidx = 0;
fe_start_utt(acmod->fe);
if (fe_process_frames(acmod->fe, inout_raw, inout_n_samps,
acmod->mfc_buf, &nfr, NULL) < 0)
return -1;
fe_end_utt(acmod->fe, acmod->mfc_buf[nfr], &ntail);
nfr += ntail;
cepptr = acmod->mfc_buf;
nfr = acmod_process_full_cep(acmod, &cepptr, &nfr);
acmod->n_mfc_frame = 0;
return nfr;
}
/**
* Process MFCCs that are in the internal buffer into features.
*/
static int32
acmod_process_mfcbuf(acmod_t *acmod)
{
mfcc_t **mfcptr;
int32 ncep;
ncep = acmod->n_mfc_frame;
/* Also do this in two parts because of the circular mfc_buf. */
if (acmod->mfc_outidx + ncep > acmod->n_mfc_alloc) {
int32 ncep1 = acmod->n_mfc_alloc - acmod->mfc_outidx;
int saved_state = acmod->state;
/* Make sure we don't end the utterance here. */
if (acmod->state == ACMOD_ENDED)
acmod->state = ACMOD_PROCESSING;
mfcptr = acmod->mfc_buf + acmod->mfc_outidx;
ncep1 = acmod_process_cep(acmod, &mfcptr, &ncep1, FALSE);
/* It's possible that not all available frames were filled. */
ncep -= ncep1;
acmod->n_mfc_frame -= ncep1;
acmod->mfc_outidx += ncep1;
acmod->mfc_outidx %= acmod->n_mfc_alloc;
/* Restore original state (could this really be the end) */
acmod->state = saved_state;
}
mfcptr = acmod->mfc_buf + acmod->mfc_outidx;
ncep = acmod_process_cep(acmod, &mfcptr, &ncep, FALSE);
acmod->n_mfc_frame -= ncep;
acmod->mfc_outidx += ncep;
acmod->mfc_outidx %= acmod->n_mfc_alloc;
return ncep;
}
int
acmod_process_raw(acmod_t *acmod,
int16 const **inout_raw,
size_t *inout_n_samps,
int full_utt)
{
int32 ncep;
int32 out_frameidx;
int16 const *prev_audio_inptr;
/* If this is a full utterance, process it all at once. */
if (full_utt)
return acmod_process_full_raw(acmod, inout_raw, inout_n_samps);
/* Append MFCCs to the end of any that are previously in there
* (in practice, there will probably be none) */
if (inout_n_samps && *inout_n_samps) {
int inptr;
int32 processed_samples;
prev_audio_inptr = *inout_raw;
/* Total number of frames available. */
ncep = acmod->n_mfc_alloc - acmod->n_mfc_frame;
/* Where to start writing them (circular buffer) */
inptr = (acmod->mfc_outidx + acmod->n_mfc_frame) % acmod->n_mfc_alloc;
/* Write them in two (or more) parts if there is wraparound. */
while (inptr + ncep > acmod->n_mfc_alloc) {
int32 ncep1 = acmod->n_mfc_alloc - inptr;
if (fe_process_frames(acmod->fe, inout_raw, inout_n_samps,
acmod->mfc_buf + inptr, &ncep1, &out_frameidx) < 0)
return -1;
if (out_frameidx > 0)
acmod->utt_start_frame = out_frameidx;
processed_samples = *inout_raw - prev_audio_inptr;
if (processed_samples + acmod->rawdata_pos < acmod->rawdata_size) {
memcpy(acmod->rawdata + acmod->rawdata_pos, prev_audio_inptr, processed_samples * sizeof(int16));
acmod->rawdata_pos += processed_samples;
}
/* Write to logging file if any. */
if (acmod->rawfh) {
fwrite(prev_audio_inptr, sizeof(int16),
processed_samples,
acmod->rawfh);
}
prev_audio_inptr = *inout_raw;
/* ncep1 now contains the number of frames actually
* processed. This is a good thing, but it means we
* actually still might have some room left at the end of
* the buffer, hence the while loop. Unfortunately it
* also means that in the case where we are really
* actually done, we need to get out totally, hence the
* goto. */
acmod->n_mfc_frame += ncep1;
ncep -= ncep1;
inptr += ncep1;
inptr %= acmod->n_mfc_alloc;
if (ncep1 == 0)
goto alldone;
}
assert(inptr + ncep <= acmod->n_mfc_alloc);
if (fe_process_frames(acmod->fe, inout_raw, inout_n_samps,
acmod->mfc_buf + inptr, &ncep, &out_frameidx) < 0)
return -1;
if (out_frameidx > 0)
acmod->utt_start_frame = out_frameidx;
processed_samples = *inout_raw - prev_audio_inptr;
if (processed_samples + acmod->rawdata_pos < acmod->rawdata_size) {
memcpy(acmod->rawdata + acmod->rawdata_pos, prev_audio_inptr, processed_samples * sizeof(int16));
acmod->rawdata_pos += processed_samples;
}
if (acmod->rawfh) {
fwrite(prev_audio_inptr, sizeof(int16),
processed_samples, acmod->rawfh);
}
prev_audio_inptr = *inout_raw;
acmod->n_mfc_frame += ncep;
alldone:
;
}
/* Hand things off to acmod_process_cep. */
return acmod_process_mfcbuf(acmod);
}
int
acmod_process_cep(acmod_t *acmod,
mfcc_t ***inout_cep,
int *inout_n_frames,
int full_utt)
{
int32 nfeat, ncep, inptr;
int orig_n_frames;
/* If this is a full utterance, process it all at once. */
if (full_utt)
return acmod_process_full_cep(acmod, inout_cep, inout_n_frames);
/* Write to log file. */
if (acmod->mfcfh)
acmod_log_mfc(acmod, *inout_cep, *inout_n_frames);
/* Maximum number of frames we're going to generate. */
orig_n_frames = ncep = nfeat = *inout_n_frames;
/* FIXME: This behaviour isn't guaranteed... */
if (acmod->state == ACMOD_ENDED)
nfeat += feat_window_size(acmod->fcb);
else if (acmod->state == ACMOD_STARTED)
nfeat -= feat_window_size(acmod->fcb);
/* Clamp number of features to fit available space. */
if (nfeat > acmod->n_feat_alloc - acmod->n_feat_frame) {
/* Grow it as needed - we have to grow it at the end of an
* utterance because we can't return a short read there. */
if (acmod->grow_feat || acmod->state == ACMOD_ENDED)
acmod_grow_feat_buf(acmod, acmod->n_feat_alloc + nfeat);
else
ncep -= (nfeat - (acmod->n_feat_alloc - acmod->n_feat_frame));
}
/* Where to start writing in the feature buffer. */
if (acmod->grow_feat) {
/* Grow to avoid wraparound if grow_feat == TRUE. */
inptr = acmod->feat_outidx + acmod->n_feat_frame;
while (inptr + nfeat >= acmod->n_feat_alloc)
acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2);
}
else {
inptr = (acmod->feat_outidx + acmod->n_feat_frame) % acmod->n_feat_alloc;
}
/* FIXME: we can't split the last frame drop properly to be on the bounary,
* so just return
*/
if (inptr + nfeat > acmod->n_feat_alloc && acmod->state == ACMOD_ENDED) {
*inout_n_frames -= ncep;
*inout_cep += ncep;
return 0;
}
/* Write them in two parts if there is wraparound. */
if (inptr + nfeat > acmod->n_feat_alloc) {
int32 ncep1 = acmod->n_feat_alloc - inptr;
/* Make sure we don't end the utterance here. */
nfeat = feat_s2mfc2feat_live(acmod->fcb, *inout_cep,
&ncep1,
(acmod->state == ACMOD_STARTED),
FALSE,
acmod->feat_buf + inptr);
if (nfeat < 0)
return -1;
/* Move the output feature pointer forward. */
acmod->n_feat_frame += nfeat;
assert(acmod->n_feat_frame <= acmod->n_feat_alloc);
inptr += nfeat;
inptr %= acmod->n_feat_alloc;
/* Move the input feature pointers forward. */
*inout_n_frames -= ncep1;
*inout_cep += ncep1;
ncep -= ncep1;
}
nfeat = feat_s2mfc2feat_live(acmod->fcb, *inout_cep,
&ncep,
(acmod->state == ACMOD_STARTED),
(acmod->state == ACMOD_ENDED),
acmod->feat_buf + inptr);
if (nfeat < 0)
return -1;
acmod->n_feat_frame += nfeat;
assert(acmod->n_feat_frame <= acmod->n_feat_alloc);
/* Move the input feature pointers forward. */
*inout_n_frames -= ncep;
*inout_cep += ncep;
if (acmod->state == ACMOD_STARTED)
acmod->state = ACMOD_PROCESSING;
return orig_n_frames - *inout_n_frames;
}
int
acmod_process_feat(acmod_t *acmod,
mfcc_t **feat)
{
int i, inptr;
if (acmod->n_feat_frame == acmod->n_feat_alloc) {
if (acmod->grow_feat)
acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2);
else
return 0;
}
if (acmod->grow_feat) {
/* Grow to avoid wraparound if grow_feat == TRUE. */
inptr = acmod->feat_outidx + acmod->n_feat_frame;
while (inptr + 1 >= acmod->n_feat_alloc)
acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2);
}
else {
inptr = (acmod->feat_outidx + acmod->n_feat_frame) % acmod->n_feat_alloc;
}
for (i = 0; i < feat_dimension1(acmod->fcb); ++i)
memcpy(acmod->feat_buf[inptr][i],
feat[i], feat_dimension2(acmod->fcb, i) * sizeof(**feat));
++acmod->n_feat_frame;
assert(acmod->n_feat_frame <= acmod->n_feat_alloc);
return 1;
}
static int
acmod_read_senfh_header(acmod_t *acmod)
{
char **name, **val;
int32 swap;
int i;
if (bio_readhdr(acmod->insenfh, &name, &val, &swap) < 0)
goto error_out;
for (i = 0; name[i] != NULL; ++i) {
if (!strcmp(name[i], "n_sen")) {
if (atoi(val[i]) != bin_mdef_n_sen(acmod->mdef)) {
E_ERROR("Number of senones in senone file (%d) does not "
"match mdef (%d)\n", atoi(val[i]),
bin_mdef_n_sen(acmod->mdef));
goto error_out;
}
}
if (!strcmp(name[i], "logbase")) {
if (fabs(atof_c(val[i]) - logmath_get_base(acmod->lmath)) > 0.001) {
E_ERROR("Logbase in senone file (%f) does not match acmod "
"(%f)\n", atof_c(val[i]),
logmath_get_base(acmod->lmath));
goto error_out;
}
}
}
acmod->insen_swap = swap;
bio_hdrarg_free(name, val);
return 0;
error_out:
bio_hdrarg_free(name, val);
return -1;
}
int
acmod_set_insenfh(acmod_t *acmod, FILE *senfh)
{
acmod->insenfh = senfh;
if (senfh == NULL) {
acmod->n_feat_frame = 0;
acmod->compallsen = cmd_ln_boolean_r(acmod->config, "-compallsen");
return 0;
}
acmod->compallsen = TRUE;
return acmod_read_senfh_header(acmod);
}
int
acmod_rewind(acmod_t *acmod)
{
/* If the feature buffer is circular, this is not possible. */
if (acmod->output_frame > acmod->n_feat_alloc) {
E_ERROR("Circular feature buffer cannot be rewound (output frame %d, "
"alloc %d)\n", acmod->output_frame, acmod->n_feat_alloc);
return -1;
}
/* Frames consumed + frames available */
acmod->n_feat_frame = acmod->output_frame + acmod->n_feat_frame;
/* Reset output pointers. */
acmod->feat_outidx = 0;
acmod->output_frame = 0;
acmod->senscr_frame = -1;
acmod->mgau->frame_idx = 0;
return 0;
}
int
acmod_advance(acmod_t *acmod)
{
/* Advance the output pointers. */
if (++acmod->feat_outidx == acmod->n_feat_alloc)
acmod->feat_outidx = 0;
--acmod->n_feat_frame;
++acmod->mgau->frame_idx;
return ++acmod->output_frame;
}
int
acmod_write_scores(acmod_t *acmod, int n_active, uint8 const *active,
int16 const *senscr, FILE *senfh)
{
int16 n_active2;
/* Uncompressed frame format:
*
* (2 bytes) n_active: Number of active senones
* If all senones active:
* (n_active * 2 bytes) scores of active senones
*
* Otherwise:
* (2 bytes) n_active: Number of active senones
* (n_active bytes) deltas to active senones
* (n_active * 2 bytes) scores of active senones
*/
n_active2 = n_active;
if (fwrite(&n_active2, 2, 1, senfh) != 1)
goto error_out;
if (n_active == bin_mdef_n_sen(acmod->mdef)) {
if (fwrite(senscr, 2, n_active, senfh) != n_active)
goto error_out;
}
else {
int i, n;
if (fwrite(active, 1, n_active, senfh) != n_active)
goto error_out;
for (i = n = 0; i < n_active; ++i) {
n += active[i];
if (fwrite(senscr + n, 2, 1, senfh) != 1)
goto error_out;
}
}
return 0;
error_out:
E_ERROR_SYSTEM("Failed to write frame to senone file");
return -1;
}
/**
* Internal version, used for reading previous frames in acmod_score()
*/
static int
acmod_read_scores_internal(acmod_t *acmod)
{
FILE *senfh = acmod->insenfh;
int16 n_active;
size_t rv;
if (acmod->n_feat_frame == acmod->n_feat_alloc) {
if (acmod->grow_feat)
acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2);
else
return 0;
}
if (senfh == NULL)
return -1;
if ((rv = fread(&n_active, 2, 1, senfh)) != 1)
goto error_out;
acmod->n_senone_active = n_active;
if (acmod->n_senone_active == bin_mdef_n_sen(acmod->mdef)) {
if ((rv = fread(acmod->senone_scores, 2,
acmod->n_senone_active, senfh)) != acmod->n_senone_active)
goto error_out;
}
else {
int i, n;
if ((rv = fread(acmod->senone_active, 1,
acmod->n_senone_active, senfh)) != acmod->n_senone_active)
goto error_out;
for (i = 0, n = 0; i < acmod->n_senone_active; ++i) {
int j, sen = n + acmod->senone_active[i];
for (j = n + 1; j < sen; ++j)
acmod->senone_scores[j] = SENSCR_DUMMY;
if ((rv = fread(acmod->senone_scores + sen, 2, 1, senfh)) != 1)
goto error_out;
n = sen;
}
n++;
while (n < bin_mdef_n_sen(acmod->mdef))
acmod->senone_scores[n++] = SENSCR_DUMMY;
}
return 1;
error_out:
if (ferror(senfh)) {
E_ERROR_SYSTEM("Failed to read frame from senone file");
return -1;
}
return 0;
}
int
acmod_read_scores(acmod_t *acmod)
{
int inptr, rv;
if (acmod->grow_feat) {
/* Grow to avoid wraparound if grow_feat == TRUE. */
inptr = acmod->feat_outidx + acmod->n_feat_frame;
/* Has to be +1, otherwise, next time acmod_advance() is
* called, this will wrap around. */
while (inptr + 1 >= acmod->n_feat_alloc)
acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2);
}
else {
inptr = (acmod->feat_outidx + acmod->n_feat_frame) %
acmod->n_feat_alloc;
}
if ((rv = acmod_read_scores_internal(acmod)) != 1)
return rv;
/* Set acmod->senscr_frame appropriately so that these scores
get reused below in acmod_score(). */
acmod->senscr_frame = acmod->output_frame + acmod->n_feat_frame;
E_DEBUG(1,("Frame %d has %d active states\n",
acmod->senscr_frame, acmod->n_senone_active));
/* Increment the "feature frame counter" and record the file
* position for the relevant frame in the (possibly circular)
* buffer. */
++acmod->n_feat_frame;
acmod->framepos[inptr] = ftell(acmod->insenfh);
return 1;
}
static int
calc_frame_idx(acmod_t *acmod, int *inout_frame_idx)
{
int frame_idx;
/* Calculate the absolute frame index to be scored. */
if (inout_frame_idx == NULL)
frame_idx = acmod->output_frame;
else if (*inout_frame_idx < 0)
frame_idx = acmod->output_frame + 1 + *inout_frame_idx;
else
frame_idx = *inout_frame_idx;
return frame_idx;
}
static int
calc_feat_idx(acmod_t *acmod, int frame_idx)
{
int n_backfr, feat_idx;
n_backfr = acmod->n_feat_alloc - acmod->n_feat_frame;
if (frame_idx < 0 || acmod->output_frame - frame_idx > n_backfr) {
E_ERROR("Frame %d outside queue of %d frames, %d alloc (%d > %d), "
"cannot score\n", frame_idx, acmod->n_feat_frame,
acmod->n_feat_alloc, acmod->output_frame - frame_idx,
n_backfr);
return -1;
}
/* Get the index in feat_buf/framepos of the frame to be scored. */
feat_idx = (acmod->feat_outidx + frame_idx - acmod->output_frame) %
acmod->n_feat_alloc;
if (feat_idx < 0)
feat_idx += acmod->n_feat_alloc;
return feat_idx;
}
mfcc_t **
acmod_get_frame(acmod_t *acmod, int *inout_frame_idx)
{
int frame_idx, feat_idx;
/* Calculate the absolute frame index requested. */
frame_idx = calc_frame_idx(acmod, inout_frame_idx);
/* Calculate position of requested frame in circular buffer. */
if ((feat_idx = calc_feat_idx(acmod, frame_idx)) < 0)
return NULL;
if (inout_frame_idx)
*inout_frame_idx = frame_idx;
return acmod->feat_buf[feat_idx];
}
int16 const *
acmod_score(acmod_t *acmod, int *inout_frame_idx)
{
int frame_idx, feat_idx;
/* Calculate the absolute frame index to be scored. */
frame_idx = calc_frame_idx(acmod, inout_frame_idx);
/* If all senones are being computed, or we are using a senone file,
then we can reuse existing scores. */
if ((acmod->compallsen || acmod->insenfh)
&& frame_idx == acmod->senscr_frame) {
if (inout_frame_idx)
*inout_frame_idx = frame_idx;
return acmod->senone_scores;
}
/* Calculate position of requested frame in circular buffer. */
if ((feat_idx = calc_feat_idx(acmod, frame_idx)) < 0)
return NULL;
/*
* If there is an input senone file locate the appropriate frame and read
* it.
*/
if (acmod->insenfh) {
fseek(acmod->insenfh, acmod->framepos[feat_idx], SEEK_SET);
if (acmod_read_scores_internal(acmod) < 0)
return NULL;
}
else {
/* Build active senone list. */
acmod_flags2list(acmod);
/* Generate scores for the next available frame */
ps_mgau_frame_eval(acmod->mgau,
acmod->senone_scores,
acmod->senone_active,
acmod->n_senone_active,
acmod->feat_buf[feat_idx],
frame_idx,
acmod->compallsen);
}
if (inout_frame_idx)
*inout_frame_idx = frame_idx;
acmod->senscr_frame = frame_idx;
/* Dump scores to the senone dump file if one exists. */
if (acmod->senfh) {
if (acmod_write_scores(acmod, acmod->n_senone_active,
acmod->senone_active,
acmod->senone_scores,
acmod->senfh) < 0)
return NULL;
E_DEBUG(1,("Frame %d has %d active states\n", frame_idx,
acmod->n_senone_active));
}
return acmod->senone_scores;
}
int
acmod_best_score(acmod_t *acmod, int *out_best_senid)
{
int i, best;
best = SENSCR_DUMMY;
if (acmod->compallsen) {
for (i = 0; i < bin_mdef_n_sen(acmod->mdef); ++i) {
if (acmod->senone_scores[i] < best) {
best = acmod->senone_scores[i];
*out_best_senid = i;
}
}
}
else {
int16 *senscr;
senscr = acmod->senone_scores;
for (i = 0; i < acmod->n_senone_active; ++i) {
senscr += acmod->senone_active[i];
if (*senscr < best) {
best = *senscr;
*out_best_senid = i;
}
}
}
return best;
}
void
acmod_clear_active(acmod_t *acmod)
{
if (acmod->compallsen)
return;
bitvec_clear_all(acmod->senone_active_vec, bin_mdef_n_sen(acmod->mdef));
acmod->n_senone_active = 0;
}
#define MPX_BITVEC_SET(a,h,i) \
if (hmm_mpx_ssid(h,i) != BAD_SSID) \
bitvec_set((a)->senone_active_vec, hmm_mpx_senid(h,i))
#define NONMPX_BITVEC_SET(a,h,i) \
bitvec_set((a)->senone_active_vec, \
hmm_nonmpx_senid(h,i))
void
acmod_activate_hmm(acmod_t *acmod, hmm_t *hmm)
{
int i;
if (acmod->compallsen)
return;
if (hmm_is_mpx(hmm)) {
switch (hmm_n_emit_state(hmm)) {
case 5:
MPX_BITVEC_SET(acmod, hmm, 4);
MPX_BITVEC_SET(acmod, hmm, 3);
case 3:
MPX_BITVEC_SET(acmod, hmm, 2);
MPX_BITVEC_SET(acmod, hmm, 1);
MPX_BITVEC_SET(acmod, hmm, 0);
break;
default:
for (i = 0; i < hmm_n_emit_state(hmm); ++i) {
MPX_BITVEC_SET(acmod, hmm, i);
}
}
}
else {
switch (hmm_n_emit_state(hmm)) {
case 5:
NONMPX_BITVEC_SET(acmod, hmm, 4);
NONMPX_BITVEC_SET(acmod, hmm, 3);
case 3:
NONMPX_BITVEC_SET(acmod, hmm, 2);
NONMPX_BITVEC_SET(acmod, hmm, 1);
NONMPX_BITVEC_SET(acmod, hmm, 0);
break;
default:
for (i = 0; i < hmm_n_emit_state(hmm); ++i) {
NONMPX_BITVEC_SET(acmod, hmm, i);
}
}
}
}
int32
acmod_flags2list(acmod_t *acmod)
{
int32 w, l, n, b, total_dists, total_words, extra_bits;
bitvec_t *flagptr;
total_dists = bin_mdef_n_sen(acmod->mdef);
if (acmod->compallsen) {
acmod->n_senone_active = total_dists;
return total_dists;
}
total_words = total_dists / BITVEC_BITS;
extra_bits = total_dists % BITVEC_BITS;
w = n = l = 0;
for (flagptr = acmod->senone_active_vec; w < total_words; ++w, ++flagptr) {
if (*flagptr == 0)
continue;
for (b = 0; b < BITVEC_BITS; ++b) {
if (*flagptr & (1UL << b)) {
int32 sen = w * BITVEC_BITS + b;
int32 delta = sen - l;
/* Handle excessive deltas "lossily" by adding a few
extra senones to bridge the gap. */
while (delta > 255) {
acmod->senone_active[n++] = 255;
delta -= 255;
}
acmod->senone_active[n++] = delta;
l = sen;
}
}
}
for (b = 0; b < extra_bits; ++b) {
if (*flagptr & (1UL << b)) {
int32 sen = w * BITVEC_BITS + b;
int32 delta = sen - l;
/* Handle excessive deltas "lossily" by adding a few
extra senones to bridge the gap. */
while (delta > 255) {
acmod->senone_active[n++] = 255;
delta -= 255;
}
acmod->senone_active[n++] = delta;
l = sen;
}
}
acmod->n_senone_active = n;
E_DEBUG(1, ("acmod_flags2list: %d active in frame %d\n",
acmod->n_senone_active, acmod->output_frame));
return n;
}
int32
acmod_stream_offset(acmod_t *acmod)
{
return acmod->utt_start_frame;
}
void
acmod_start_stream(acmod_t *acmod)
{
fe_start_stream(acmod->fe);
acmod->utt_start_frame = 0;
}
void
acmod_set_rawdata_size(acmod_t *acmod, int32 size)
{
assert(size >= 0);
acmod->rawdata_size = size;
if (acmod->rawdata_size > 0) {
ckd_free(acmod->rawdata);
acmod->rawdata = ckd_calloc(size, sizeof(int16));
}
}
void
acmod_get_rawdata(acmod_t *acmod, int16 **buffer, int32 *size)
{
if (buffer) {
*buffer = acmod->rawdata;
}
if (size) {
*size = acmod->rawdata_pos;
}
}