mirror of
https://github.com/classilla/tenfourfox.git
synced 2024-06-06 22:29:34 +00:00
256 lines
9.5 KiB
C
256 lines
9.5 KiB
C
/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
|
/* ====================================================================
|
|
* Copyright (c) 1999-2013 Carnegie Mellon University. All rights
|
|
* reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
*
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
*
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in
|
|
* the documentation and/or other materials provided with the
|
|
* distribution.
|
|
*
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
|
|
* ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
|
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
|
|
* NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*
|
|
* ====================================================================
|
|
*
|
|
*/
|
|
/*
|
|
* fsg_lextree.h -- The collection of all the lextrees for the entire FSM.
|
|
*
|
|
*/
|
|
|
|
#ifndef __S2_FSG_LEXTREE_H__
|
|
#define __S2_FSG_LEXTREE_H__
|
|
|
|
/* SphinxBase headers. */
|
|
#include <sphinxbase/cmd_ln.h>
|
|
#include <sphinxbase/fsg_model.h>
|
|
|
|
/* Local headers. */
|
|
#include "hmm.h"
|
|
#include "dict.h"
|
|
#include "dict2pid.h"
|
|
|
|
/*
|
|
* Compile-time constant determining the size of the
|
|
* bitvector fsg_pnode_t.fsg_pnode_ctxt_t.bv. (See below.)
|
|
* But it makes memory allocation simpler and more efficient.
|
|
* Make it smaller (2) to save memory if your phoneset has less than
|
|
* 64 phones.
|
|
*/
|
|
#define FSG_PNODE_CTXT_BVSZ 4
|
|
|
|
typedef struct {
|
|
uint32 bv[FSG_PNODE_CTXT_BVSZ];
|
|
} fsg_pnode_ctxt_t;
|
|
|
|
|
|
/*
|
|
* All transitions (words) out of any given FSG state represented are by a
|
|
* phonetic prefix lextree (except for epsilon or null transitions; they
|
|
* are not part of the lextree). Lextree leaf nodes represent individual
|
|
* FSG transitions, so no sharing is allowed at the leaf nodes. The FSG
|
|
* transition probs are distributed along the lextree: the prob at a node
|
|
* is the max of the probs of all leaf nodes (and, hence, FSG transitions)
|
|
* reachable from that node.
|
|
*
|
|
* To conserve memory, the underlying HMMs with state-level information are
|
|
* allocated only as needed. Root and leaf nodes must also account for all
|
|
* the possible phonetic contexts, with an independent HMM for each distinct
|
|
* context.
|
|
*/
|
|
typedef struct fsg_pnode_s {
|
|
/*
|
|
* If this is not a leaf node, the first successor (child) node. Otherwise
|
|
* the parent FSG transition for which this is the leaf node (for figuring
|
|
* the FSG destination state, and word emitted by the transition). A node
|
|
* may have several children. The succ ptr gives just the first; the rest
|
|
* are linked via the sibling ptr below.
|
|
*/
|
|
union {
|
|
struct fsg_pnode_s *succ;
|
|
fsg_link_t *fsglink;
|
|
} next;
|
|
|
|
/*
|
|
* For simplicity of memory management (i.e., freeing the pnodes), all
|
|
* pnodes allocated for all transitions out of a state are maintained in a
|
|
* linear linked list through the alloc_next pointer.
|
|
*/
|
|
struct fsg_pnode_s *alloc_next;
|
|
|
|
/*
|
|
* The next node that is also a child of the parent of this node; NULL if
|
|
* none.
|
|
*/
|
|
struct fsg_pnode_s *sibling;
|
|
|
|
/*
|
|
* The transition (log) probability to be incurred upon transitioning to
|
|
* this node. (Transition probabilities are really associated with the
|
|
* transitions. But a lextree node has exactly one incoming transition.
|
|
* Hence, the prob can be associated with the node.)
|
|
* This is a logs2(prob) value, and includes the language weight.
|
|
*/
|
|
int32 logs2prob;
|
|
|
|
/*
|
|
* The root and leaf positions associated with any transition have to deal
|
|
* with multiple phonetic contexts. However, different contexts may result
|
|
* in the same SSID (senone-seq ID), and can share a single pnode with that
|
|
* SSID. But the pnode should track the set of context CI phones that share
|
|
* it. Hence the fsg_pnode_ctxt_t bit-vector set-representation. (For
|
|
* simplicity of implementation, its size is a compile-time constant for
|
|
* now.) Single phone words would need a 2-D array of context, but that's
|
|
* too expensive. For now, they simply use SIL as right context, so only
|
|
* the left context is properly modelled.
|
|
* (For word-internal phones, this field is unused, of course.)
|
|
*/
|
|
fsg_pnode_ctxt_t ctxt;
|
|
|
|
uint16 ci_ext; /* This node's CIphone as viewed externally (context) */
|
|
uint8 ppos; /* Phoneme position in pronunciation */
|
|
uint8 leaf; /* Whether this is a leaf node */
|
|
|
|
/* HMM-state-level stuff here */
|
|
hmm_context_t *ctx;
|
|
hmm_t hmm;
|
|
} fsg_pnode_t;
|
|
|
|
/* Access macros */
|
|
#define fsg_pnode_leaf(p) ((p)->leaf)
|
|
#define fsg_pnode_logs2prob(p) ((p)->logs2prob)
|
|
#define fsg_pnode_succ(p) ((p)->next.succ)
|
|
#define fsg_pnode_fsglink(p) ((p)->next.fsglink)
|
|
#define fsg_pnode_sibling(p) ((p)->sibling)
|
|
#define fsg_pnode_hmmptr(p) (&((p)->hmm))
|
|
#define fsg_pnode_ci_ext(p) ((p)->ci_ext)
|
|
#define fsg_pnode_ppos(p) ((p)->ppos)
|
|
#define fsg_pnode_leaf(p) ((p)->leaf)
|
|
#define fsg_pnode_ctxt(p) ((p)->ctxt)
|
|
|
|
#define fsg_pnode_add_ctxt(p,c) ((p)->ctxt.bv[(c)>>5] |= (1 << ((c)&0x001f)))
|
|
|
|
/*
|
|
* The following is macroized because its called very frequently
|
|
* ::: uint32 fsg_pnode_ctxt_sub (fsg_pnode_ctxt_t *src, fsg_pnode_ctxt_t *sub);
|
|
*/
|
|
/*
|
|
* Subtract bitvector sub from bitvector src (src updated with the result).
|
|
* Return 0 if result is all 0, non-zero otherwise.
|
|
*/
|
|
|
|
#if (FSG_PNODE_CTXT_BVSZ == 1)
|
|
#define FSG_PNODE_CTXT_SUB(src,sub) \
|
|
((src)->bv[0] = (~((sub)->bv[0]) & (src)->bv[0]))
|
|
#elif (FSG_PNODE_CTXT_BVSZ == 2)
|
|
#define FSG_PNODE_CTXT_SUB(src,sub) \
|
|
(((src)->bv[0] = (~((sub)->bv[0]) & (src)->bv[0])) | \
|
|
((src)->bv[1] = (~((sub)->bv[1]) & (src)->bv[1])))
|
|
#elif (FSG_PNODE_CTXT_BVSZ == 4)
|
|
#define FSG_PNODE_CTXT_SUB(src,sub) \
|
|
(((src)->bv[0] = (~((sub)->bv[0]) & (src)->bv[0])) | \
|
|
((src)->bv[1] = (~((sub)->bv[1]) & (src)->bv[1])) | \
|
|
((src)->bv[2] = (~((sub)->bv[2]) & (src)->bv[2])) | \
|
|
((src)->bv[3] = (~((sub)->bv[3]) & (src)->bv[3])))
|
|
#else
|
|
#define FSG_PNODE_CTXT_SUB(src,sub) fsg_pnode_ctxt_sub_generic((src),(sub))
|
|
#endif
|
|
|
|
/**
|
|
* Collection of lextrees for an FSG.
|
|
*/
|
|
typedef struct fsg_lextree_s {
|
|
fsg_model_t *fsg; /**< The fsg for which this lextree is built. */
|
|
hmm_context_t *ctx; /**< HMM context structure. */
|
|
dict_t *dict; /**< Pronunciation dictionary for this FSG. */
|
|
dict2pid_t *d2p; /**< Context-dependent phone mappings for this FSG. */
|
|
bin_mdef_t *mdef; /**< Model definition (triphone mappings). */
|
|
|
|
/*
|
|
* Left and right CIphone sets for each state.
|
|
* Left context CIphones for a state S: If word W transitions into S, W's
|
|
* final CIphone is in S's {lc}. Words transitioning out of S must consider
|
|
* these left context CIphones.
|
|
* Similarly, right contexts for state S: If word W transitions out of S,
|
|
* W's first CIphone is in S's {rc}. Words transitioning into S must consider
|
|
* these right contexts.
|
|
*
|
|
* NOTE: Words may transition into and out of S INDIRECTLY, with intermediate
|
|
* null transitions.
|
|
* NOTE: Single-phone words are difficult; only SILENCE right context is
|
|
* modelled for them.
|
|
* NOTE: Non-silence filler phones aren't included in these sets. Filler
|
|
* words don't use context, and present the SILENCE phone as context to
|
|
* adjacent words.
|
|
*/
|
|
int16 **lc; /**< Left context triphone mappings for FSG. */
|
|
int16 **rc; /**< Right context triphone mappings for FSG. */
|
|
|
|
fsg_pnode_t **root; /* root[s] = lextree representing all transitions
|
|
out of state s. Note that the "tree" for each
|
|
state is actually a collection of trees, linked
|
|
via fsg_pnode_t.sibling (root[s]->sibling) */
|
|
fsg_pnode_t **alloc_head; /* alloc_head[s] = head of linear list of all
|
|
pnodes allocated for state s */
|
|
int32 n_pnode; /* #HMM nodes in search structure */
|
|
int32 wip;
|
|
int32 pip;
|
|
} fsg_lextree_t;
|
|
|
|
/* Access macros */
|
|
#define fsg_lextree_root(lt,s) ((lt)->root[s])
|
|
#define fsg_lextree_n_pnode(lt) ((lt)->n_pnode)
|
|
|
|
/**
|
|
* Create, initialize, and return a new phonetic lextree for the given FSG.
|
|
*/
|
|
fsg_lextree_t *fsg_lextree_init(fsg_model_t *fsg, dict_t *dict,
|
|
dict2pid_t *d2p,
|
|
bin_mdef_t *mdef, hmm_context_t *ctx,
|
|
int32 wip, int32 pip);
|
|
|
|
/**
|
|
* Free lextrees for an FSG.
|
|
*/
|
|
void fsg_lextree_free(fsg_lextree_t *fsg);
|
|
|
|
/**
|
|
* Print an FSG lextree to a file for debugging.
|
|
*/
|
|
void fsg_lextree_dump(fsg_lextree_t *fsg, FILE *fh);
|
|
|
|
/**
|
|
* Mark the given pnode as inactive (for search).
|
|
*/
|
|
void fsg_psubtree_pnode_deactivate(fsg_pnode_t *pnode);
|
|
|
|
/**
|
|
* Set all flags on in the given context bitvector.
|
|
*/
|
|
void fsg_pnode_add_all_ctxt(fsg_pnode_ctxt_t *ctxt);
|
|
|
|
/**
|
|
* Generic variant for arbitrary size
|
|
*/
|
|
uint32 fsg_pnode_ctxt_sub_generic(fsg_pnode_ctxt_t *src, fsg_pnode_ctxt_t *sub);
|
|
|
|
#endif
|