tenfourfox/media/sphinxbase/src/libsphinxbase/feat/agc.c
Cameron Kaiser c9b2922b70 hello FPR
2017-04-19 00:56:45 -07:00

228 lines
6.0 KiB
C

/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
/* ====================================================================
* Copyright (c) 1999-2004 Carnegie Mellon University. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* This work was supported in part by funding from the Defense Advanced
* Research Projects Agency and the National Science Foundation of the
* United States of America, and the CMU Sphinx Speech Consortium.
*
* THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
* ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
* NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* ====================================================================
*
*/
/*
* agc.c -- Various forms of automatic gain control (AGC)
*
* **********************************************
* CMU ARPA Speech Project
*
* Copyright (c) 1996 Carnegie Mellon University.
* ALL RIGHTS RESERVED.
* **********************************************
*
* HISTORY
* $Log$
* Revision 1.5 2005/06/21 19:25:41 arthchan2003
* 1, Fixed doxygen documentation. 2, Added $ keyword.
*
* Revision 1.3 2005/03/30 01:22:46 archan
* Fixed mistakes in last updates. Add
*
*
* 04-Nov-95 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
* Created.
*/
#include <string.h>
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include "sphinxbase/err.h"
#include "sphinxbase/ckd_alloc.h"
#include "sphinxbase/agc.h"
/* NOTE! These must match the enum in agc.h */
const char *agc_type_str[] = {
"none",
"max",
"emax",
"noise"
};
static const int n_agc_type_str = sizeof(agc_type_str)/sizeof(agc_type_str[0]);
agc_type_t
agc_type_from_str(const char *str)
{
int i;
for (i = 0; i < n_agc_type_str; ++i) {
if (0 == strcmp(str, agc_type_str[i]))
return (agc_type_t)i;
}
E_FATAL("Unknown AGC type '%s'\n", str);
return AGC_NONE;
}
agc_t *agc_init(void)
{
agc_t *agc;
agc = ckd_calloc(1, sizeof(*agc));
agc->noise_thresh = FLOAT2MFCC(2.0);
return agc;
}
void agc_free(agc_t *agc)
{
ckd_free(agc);
}
/**
* Normalize c0 for all frames such that max(c0) = 0.
*/
void
agc_max(agc_t *agc, mfcc_t **mfc, int32 n_frame)
{
int32 i;
if (n_frame <= 0)
return;
agc->obs_max = mfc[0][0];
for (i = 1; i < n_frame; i++) {
if (mfc[i][0] > agc->obs_max) {
agc->obs_max = mfc[i][0];
agc->obs_frame = 1;
}
}
E_INFO("AGCMax: obs=max= %.2f\n", agc->obs_max);
for (i = 0; i < n_frame; i++)
mfc[i][0] -= agc->obs_max;
}
void
agc_emax_set(agc_t *agc, float32 m)
{
agc->max = FLOAT2MFCC(m);
E_INFO("AGCEMax: max= %.2f\n", m);
}
float32
agc_emax_get(agc_t *agc)
{
return MFCC2FLOAT(agc->max);
}
void
agc_emax(agc_t *agc, mfcc_t **mfc, int32 n_frame)
{
int i;
if (n_frame <= 0)
return;
for (i = 0; i < n_frame; ++i) {
if (mfc[i][0] > agc->obs_max) {
agc->obs_max = mfc[i][0];
agc->obs_frame = 1;
}
mfc[i][0] -= agc->max;
}
}
/* Update estimated max for next utterance */
void
agc_emax_update(agc_t *agc)
{
if (agc->obs_frame) { /* Update only if some data observed */
agc->obs_max_sum += agc->obs_max;
agc->obs_utt++;
/* Re-estimate max over past history; decay the history */
agc->max = agc->obs_max_sum / agc->obs_utt;
if (agc->obs_utt == 16) {
agc->obs_max_sum /= 2;
agc->obs_utt = 8;
}
}
E_INFO("AGCEMax: obs= %.2f, new= %.2f\n", agc->obs_max, agc->max);
/* Reset the accumulators for the next utterance. */
agc->obs_frame = 0;
agc->obs_max = FLOAT2MFCC(-1000.0); /* Less than any real C0 value (hopefully!!) */
}
void
agc_noise(agc_t *agc,
mfcc_t **cep,
int32 nfr)
{
mfcc_t min_energy; /* Minimum log-energy */
mfcc_t noise_level; /* Average noise_level */
int32 i; /* frame index */
int32 noise_frames; /* Number of noise frames */
/* Determine minimum log-energy in utterance */
min_energy = cep[0][0];
for (i = 0; i < nfr; ++i) {
if (cep[i][0] < min_energy)
min_energy = cep[i][0];
}
/* Average all frames between min_energy and min_energy + agc->noise_thresh */
noise_frames = 0;
noise_level = 0;
min_energy += agc->noise_thresh;
for (i = 0; i < nfr; ++i) {
if (cep[i][0] < min_energy) {
noise_level += cep[i][0];
noise_frames++;
}
}
if (noise_frames > 0) {
noise_level /= noise_frames;
E_INFO("AGC NOISE: max= %6.3f\n", MFCC2FLOAT(noise_level));
/* Subtract noise_level from all log_energy values */
for (i = 0; i < nfr; i++) {
cep[i][0] -= noise_level;
}
}
}
void
agc_set_threshold(agc_t *agc, float32 threshold)
{
agc->noise_thresh = FLOAT2MFCC(threshold);
}
float32
agc_get_threshold(agc_t *agc)
{
return FLOAT2MFCC(agc->noise_thresh);
}