root / trunk / Linux / addons / ofxASR / libs / sphinx / include / sphinx3 / mdef.h @ 59

View | Annotate | Download (14.7 KB)

1 59 jimbo
/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 59 jimbo
/* ====================================================================
3 59 jimbo
 * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
4 59 jimbo
 * reserved.
5 59 jimbo
 *
6 59 jimbo
 * Redistribution and use in source and binary forms, with or without
7 59 jimbo
 * modification, are permitted provided that the following conditions
8 59 jimbo
 * are met:
9 59 jimbo
 *
10 59 jimbo
 * 1. Redistributions of source code must retain the above copyright
11 59 jimbo
 *    notice, this list of conditions and the following disclaimer.
12 59 jimbo
 *
13 59 jimbo
 * 2. Redistributions in binary form must reproduce the above copyright
14 59 jimbo
 *    notice, this list of conditions and the following disclaimer in
15 59 jimbo
 *    the documentation and/or other materials provided with the
16 59 jimbo
 *    distribution.
17 59 jimbo
 *
18 59 jimbo
 * This work was supported in part by funding from the Defense Advanced
19 59 jimbo
 * Research Projects Agency and the National Science Foundation of the
20 59 jimbo
 * United States of America, and the CMU Sphinx Speech Consortium.
21 59 jimbo
 *
22 59 jimbo
 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 59 jimbo
 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 59 jimbo
 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 59 jimbo
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 59 jimbo
 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 59 jimbo
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 59 jimbo
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 59 jimbo
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 59 jimbo
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 59 jimbo
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 59 jimbo
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 59 jimbo
 *
34 59 jimbo
 * ====================================================================
35 59 jimbo
 *
36 59 jimbo
 */
37 59 jimbo
/*
38 59 jimbo
 * mdef.h -- HMM model definition: base (CI) phones and triphones
39 59 jimbo
 *
40 59 jimbo
 * **********************************************
41 59 jimbo
 * CMU ARPA Speech Project
42 59 jimbo
 *
43 59 jimbo
 * Copyright (c) 1999 Carnegie Mellon University.
44 59 jimbo
 * ALL RIGHTS RESERVED.
45 59 jimbo
 * **********************************************
46 59 jimbo
 *
47 59 jimbo
 * HISTORY
48 59 jimbo
 * $Log$
49 59 jimbo
 * Revision 1.1  2006/04/05  20:27:30  dhdfu
50 59 jimbo
 * A Great Reorganzation of header files and executables
51 59 jimbo
 *
52 59 jimbo
 * Revision 1.13  2006/02/22 16:52:51  arthchan2003
53 59 jimbo
 * Merged from SPHINX3_5_2_RCI_IRII_BRANCH: 1, Fixed memory leaks in mdef. 2,  Fixed $, 3, Fixed dox-doc.
54 59 jimbo
 *
55 59 jimbo
 * Revision 1.12.4.2  2005/07/05 05:47:59  arthchan2003
56 59 jimbo
 * Fixed dox-doc. struct level of documentation are included.
57 59 jimbo
 *
58 59 jimbo
 * Revision 1.12.4.1  2005/07/03 22:54:09  arthchan2003
59 59 jimbo
 * move st2senmap into mdef_t, it was not properly freed before. \n
60 59 jimbo
 *
61 59 jimbo
 * Revision 1.12  2005/06/21 18:47:39  arthchan2003
62 59 jimbo
 * Log. 1, Added breport flag to mdef_init, 2, implemented reporting functions to
63 59 jimbo
 * mdef_report. 3, Fixed doxygen-style documentation. 4, Added $Log$
64 59 jimbo
 * Revision 1.1  2006/04/05  20:27:30  dhdfu
65 59 jimbo
 * A Great Reorganzation of header files and executables
66 59 jimbo
 *
67 59 jimbo
 * mdef_report. 3, Fixed doxygen-style documentation. 4, Added Revision 1.13  2006/02/22 16:52:51  arthchan2003
68 59 jimbo
 * mdef_report. 3, Fixed doxygen-style documentation. 4, Added Merged from SPHINX3_5_2_RCI_IRII_BRANCH: 1, Fixed memory leaks in mdef. 2,  Fixed $, 3, Fixed dox-doc.
69 59 jimbo
 * mdef_report. 3, Fixed doxygen-style documentation. 4, Added
70 59 jimbo
 *
71 59 jimbo
 * Revision 1.5  2005/06/13 04:02:55  archan
72 59 jimbo
 * Fixed most doxygen-style documentation under libs3decoder.
73 59 jimbo
 *
74 59 jimbo
 * Revision 1.4  2005/04/21 23:50:26  archan
75 59 jimbo
 * Some more refactoring on the how reporting of structures inside kbcore_t is done, it is now 50% nice. Also added class-based LM test case into test-decode.sh.in.  At this moment, everything in search mode 5 is already done.  It is time to test the idea whether the search can really be used.
76 59 jimbo
 *
77 59 jimbo
 * Revision 1.3  2005/03/30 01:22:47  archan
78 59 jimbo
 * Fixed mistakes in last updates. Add
79 59 jimbo
 *
80 59 jimbo
 * 19.Apr-2001  Ricky Houghton, added code for free allocated memory
81 59 jimbo
 *
82 59 jimbo
 * 14-Oct-1999        M K Ravishankar ([email protected]) at Carnegie Mellon
83 59 jimbo
 *                 Added mdef_sseq2sen_active().
84 59 jimbo
 *
85 59 jimbo
 * 30-Apr-1999        M K Ravishankar ([email protected]) at Carnegie Mellon
86 59 jimbo
 *                 Added senone-sequence id (ssid) to phone_t and appropriate functions to
87 59 jimbo
 *                 maintain it.  Instead, moved state sequence info to mdef_t.
88 59 jimbo
 *
89 59 jimbo
 * 13-Jul-96        M K Ravishankar ([email protected]) at Carnegie Mellon University.
90 59 jimbo
 *                 Added mdef_phone_str().
91 59 jimbo
 *
92 59 jimbo
 * 01-Jan-96        M K Ravishankar ([email protected]) at Carnegie Mellon University.
93 59 jimbo
 *                 Created.
94 59 jimbo
 */
95 59 jimbo
96 59 jimbo
97 59 jimbo
#ifndef _S3_MDEF_H_
98 59 jimbo
#define _S3_MDEF_H_
99 59 jimbo
100 59 jimbo
#include <stdio.h>
101 59 jimbo
102 59 jimbo
#include <hash_table.h>
103 59 jimbo
#include "s3types.h"
104 59 jimbo
105 59 jimbo
#ifdef __cplusplus
106 59 jimbo
extern "C" {
107 59 jimbo
#endif
108 59 jimbo
#if 0
109 59 jimbo
} /* Fool Emacs into not indenting things. */
110 59 jimbo
#endif
111 59 jimbo
112 59 jimbo
/** \file mdef.h
113 59 jimbo
 * \brief Model definition
114 59 jimbo
 */
115 59 jimbo
116 59 jimbo
/** \enum word_posn_t
117 59 jimbo
 * \brief Union of different type of word position
118 59 jimbo
 */
119 59 jimbo
120 59 jimbo
typedef enum {
121 59 jimbo
    WORD_POSN_INTERNAL = 0,        /**< Internal phone of word */
122 59 jimbo
    WORD_POSN_BEGIN = 1,        /**< Beginning phone of word */
123 59 jimbo
    WORD_POSN_END = 2,                /**< Ending phone of word */
124 59 jimbo
    WORD_POSN_SINGLE = 3,        /**< Single phone word (i.e. begin & end) */
125 59 jimbo
    WORD_POSN_UNDEFINED = 4        /**< Undefined value, used for initial conditions, etc */
126 59 jimbo
} word_posn_t;
127 59 jimbo
#define N_WORD_POSN        4        /**< total # of word positions (excluding undefined) */
128 59 jimbo
#define WPOS_NAME        "ibesu"        /**< Printable code for each word position above */
129 59 jimbo
#define S3_SILENCE_CIPHONE "SIL" /**< Hard-coded silence CI phone name */
130 59 jimbo
131 59 jimbo
/**
132 59 jimbo
   \struct ciphone_t
133 59 jimbo
   \brief CI phone information
134 59 jimbo
*/
135 59 jimbo
typedef struct {
136 59 jimbo
    char *name;                 /**< The name of the CI phone */
137 59 jimbo
    int32 filler;                /**< Whether a filler phone; if so, can be substituted by
138 59 jimbo
                                   silence phone in left or right context position */
139 59 jimbo
} ciphone_t;
140 59 jimbo
141 59 jimbo
/**
142 59 jimbo
 * \struct phone_t
143 59 jimbo
 * \brief Triphone information, including base phones as a subset.  For the latter, lc, rc and wpos are non-existent.
144 59 jimbo
 */
145 59 jimbo
typedef struct {
146 59 jimbo
    s3ssid_t ssid;                /**< State sequence (or senone sequence) ID, considering the
147 59 jimbo
                                   n_emit_state senone-ids are a unit.  The senone sequences
148 59 jimbo
                                   themselves are in a separate table */
149 59 jimbo
    s3tmatid_t tmat;                /**< Transition matrix id */
150 59 jimbo
    s3cipid_t ci, lc, rc;        /**< Base, left, right context ciphones */
151 59 jimbo
    word_posn_t wpos;                /**< Word position */
152 59 jimbo
    s3senid_t *state;           /**< State->senone mappings */
153 59 jimbo
154 59 jimbo
} phone_t;
155 59 jimbo
156 59 jimbo
/**
157 59 jimbo
 * \struct ph_rc_t
158 59 jimbo
 * \brief Structures needed for mapping <ci,lc,rc,wpos> into pid.  (See mdef_t.wpos_ci_lclist below.)  (lc = left context; rc = right context.)
159 59 jimbo
 * NOTE: Both ph_rc_t and ph_lc_t FOR INTERNAL USE ONLY.
160 59 jimbo
 */
161 59 jimbo
typedef struct ph_rc_s {
162 59 jimbo
    s3cipid_t rc;                /**< Specific rc for a parent <wpos,ci,lc> */
163 59 jimbo
    s3pid_t pid;                /**< Triphone id for above rc instance */
164 59 jimbo
    struct ph_rc_s *next;        /**< Next rc entry for same parent <wpos,ci,lc> */
165 59 jimbo
} ph_rc_t;
166 59 jimbo
167 59 jimbo
/**
168 59 jimbo
 * \struct ph_lc_t
169 59 jimbo
 * \brief Structures for storing the left context.
170 59 jimbo
 */
171 59 jimbo
172 59 jimbo
typedef struct ph_lc_s {
173 59 jimbo
    s3cipid_t lc;                /**< Specific lc for a parent <wpos,ci> */
174 59 jimbo
    ph_rc_t *rclist;                /**< rc list for above lc instance */
175 59 jimbo
    struct ph_lc_s *next;        /**< Next lc entry for same parent <wpos,ci> */
176 59 jimbo
} ph_lc_t;
177 59 jimbo
178 59 jimbo
179 59 jimbo
/** The main model definition structure */
180 59 jimbo
/**
181 59 jimbo
   \struct mdef_t
182 59 jimbo
   \brief strcture for storing the model definition.
183 59 jimbo
*/
184 59 jimbo
typedef struct {
185 59 jimbo
    int32 n_ciphone;                /**< \#basephones actually present */
186 59 jimbo
    int32 n_phone;                /**< \#basephones + \#triphones actually present */
187 59 jimbo
    int32 n_emit_state;                /**< \#emitting states per phone */
188 59 jimbo
    int32 n_ci_sen;                /**< \#CI senones; these are the first */
189 59 jimbo
    int32 n_sen;                /**< \#senones (CI+CD) */
190 59 jimbo
    int32 n_tmat;                /**< \#transition matrices */
191 59 jimbo
192 59 jimbo
    hash_table_t *ciphone_ht;        /**< Hash table for mapping ciphone strings to ids */
193 59 jimbo
    ciphone_t *ciphone;                /**< CI-phone information for all ciphones */
194 59 jimbo
    phone_t *phone;                /**< Information for all ciphones and triphones */
195 59 jimbo
    s3senid_t **sseq;                /**< Unique state (or senone) sequences in this model, shared
196 59 jimbo
                                   among all phones/triphones */
197 59 jimbo
    int32 n_sseq;                /**< No. of unique senone sequences in this model */
198 59 jimbo
199 59 jimbo
    s3senid_t *cd2cisen;        /**< Parent CI-senone id for each senone; the first
200 59 jimbo
                                   n_ci_sen are identity mappings; the CD-senones are
201 59 jimbo
                                   contiguous for each parent CI-phone */
202 59 jimbo
    s3cipid_t *sen2cimap;        /**< Parent CI-phone for each senone (CI or CD) */
203 59 jimbo
204 59 jimbo
    s3cipid_t sil;                /**< SILENCE_CIPHONE id */
205 59 jimbo
206 59 jimbo
    ph_lc_t ***wpos_ci_lclist;        /**< wpos_ci_lclist[wpos][ci] = list of lc for <wpos,ci>.
207 59 jimbo
                                   wpos_ci_lclist[wpos][ci][lc].rclist = list of rc for
208 59 jimbo
                                   <wpos,ci,lc>.  Only entries for the known triphones
209 59 jimbo
                                   are created to conserve space.
210 59 jimbo
                                   (NOTE: FOR INTERNAL USE ONLY.) */
211 59 jimbo
212 59 jimbo
    s3senid_t *st2senmap; /**< A mapping from State to senone. Only used
213 59 jimbo
                             in sphinx 3.0 HACK!, In general, there is
214 59 jimbo
                             only need for either one of st2senmap or
215 59 jimbo
                             sseq.
216 59 jimbo
                          */
217 59 jimbo
} mdef_t;
218 59 jimbo
219 59 jimbo
/** Access macros; not meant for arbitrary use */
220 59 jimbo
#define mdef_is_fillerphone(m,p)        ((m)->ciphone[p].filler)
221 59 jimbo
#define mdef_n_ciphone(m)                ((m)->n_ciphone)
222 59 jimbo
#define mdef_n_phone(m)                        ((m)->n_phone)
223 59 jimbo
#define mdef_n_sseq(m)                        ((m)->n_sseq)
224 59 jimbo
#define mdef_n_emit_state(m)                ((m)->n_emit_state)
225 59 jimbo
#define mdef_n_sen(m)                        ((m)->n_sen)
226 59 jimbo
#define mdef_n_tmat(m)                        ((m)->n_tmat)
227 59 jimbo
#define mdef_pid2ssid(m,p)                ((m)->phone[p].ssid)
228 59 jimbo
#define mdef_pid2tmatid(m,p)                ((m)->phone[p].tmat)
229 59 jimbo
#define mdef_silphone(m)                ((m)->sil)
230 59 jimbo
#define mdef_sen2cimap(m)                ((m)->sen2cimap)
231 59 jimbo
#define mdef_sseq2sen(m,ss,pos)                ((m)->sseq[ss][pos])
232 59 jimbo
#define mdef_pid2ci(m,p)                ((m)->phone[p].ci)
233 59 jimbo
#define mdef_cd2cisen(m)                ((m)->cd2cisen)
234 59 jimbo
235 59 jimbo
/**
236 59 jimbo
 * Initialize the phone structure from the given model definition file.
237 59 jimbo
 * It should be treated as a READ-ONLY structure.
238 59 jimbo
 * @return pointer to the phone structure created.
239 59 jimbo
 */
240 59 jimbo
S3DECODER_EXPORT
241 59 jimbo
mdef_t *mdef_init (const char *mdeffile, /**< In: Model definition file */
242 59 jimbo
                   int32 breport         /**< In: whether to report the progress or not */
243 59 jimbo
    );
244 59 jimbo
245 59 jimbo
246 59 jimbo
/**
247 59 jimbo
    Get the ciphone id given a string name
248 59 jimbo
    @return ciphone id for the given ciphone string name
249 59 jimbo
*/
250 59 jimbo
s3cipid_t mdef_ciphone_id (mdef_t *m,                /**< In: Model structure being queried */
251 59 jimbo
                           const char *ciphone        /**< In: ciphone for which id wanted */
252 59 jimbo
    );
253 59 jimbo
254 59 jimbo
/**
255 59 jimbo
    Get the phone string given the ci phone id.
256 59 jimbo
    @return: READ-ONLY ciphone string name for the given ciphone id
257 59 jimbo
*/
258 59 jimbo
S3DECODER_EXPORT
259 59 jimbo
const char *mdef_ciphone_str (mdef_t *m,        /**< In: Model structure being queried */
260 59 jimbo
                              s3cipid_t ci        /**< In: ciphone id for which name wanted */
261 59 jimbo
    );
262 59 jimbo
263 59 jimbo
/**
264 59 jimbo
    Decide whether the phone is ci phone.
265 59 jimbo
    @return 1 if given triphone argument is a ciphone, 0 if not, -1 if error
266 59 jimbo
*/
267 59 jimbo
int32 mdef_is_ciphone (mdef_t *m,                /**< In: Model structure being queried */
268 59 jimbo
                       s3pid_t p                /**< In: triphone id being queried */
269 59 jimbo
    );
270 59 jimbo
271 59 jimbo
/**
272 59 jimbo
   Decide whether the senone is a senone for a ci phone, or a ci senone
273 59 jimbo
   @return 1 if a given senone is a ci senone
274 59 jimbo
*/
275 59 jimbo
int32 mdef_is_cisenone(mdef_t *m,               /**< In: Model structure being queried */
276 59 jimbo
                       s3senid_t s            /**< In: senone id being queried */
277 59 jimbo
    );
278 59 jimbo
279 59 jimbo
/**
280 59 jimbo
    Decide the phone id given the left, right and base phones.
281 59 jimbo
    @return: phone id for the given constituents if found, else BAD_S3PID
282 59 jimbo
*/
283 59 jimbo
S3DECODER_EXPORT
284 59 jimbo
s3pid_t mdef_phone_id (mdef_t *m,                /**< In: Model structure being queried */
285 59 jimbo
                       s3cipid_t b,                /**< In: base ciphone id */
286 59 jimbo
                       s3cipid_t l,                /**< In: left context ciphone id */
287 59 jimbo
                       s3cipid_t r,                /**< In: right context ciphone id */
288 59 jimbo
                       word_posn_t pos        /**< In: Word position */
289 59 jimbo
    );
290 59 jimbo
291 59 jimbo
/**
292 59 jimbo
 * Like phone_id, but backs off to other word positions if exact triphone not found.
293 59 jimbo
 * Also, non-SILENCE_PHONE filler phones back off to SILENCE_PHONE.
294 59 jimbo
 * Ultimately, backs off to base phone id.  Thus, it should never return BAD_S3PID.
295 59 jimbo
 */
296 59 jimbo
S3DECODER_EXPORT
297 59 jimbo
s3pid_t mdef_phone_id_nearest (mdef_t *m,        /**< In: Model structure being queried */
298 59 jimbo
                               s3cipid_t b,        /**< In: base ciphone id */
299 59 jimbo
                               s3cipid_t l,        /**< In: left context ciphone id */
300 59 jimbo
                               s3cipid_t r,        /**< In: right context ciphone id */
301 59 jimbo
                               word_posn_t pos        /**< In: Word position */
302 59 jimbo
    );
303 59 jimbo
304 59 jimbo
/**
305 59 jimbo
 * Create a phone string for the given phone (base or triphone) id in the given buf.
306 59 jimbo
 * @return 0 if successful, -1 if error.
307 59 jimbo
 */
308 59 jimbo
S3DECODER_EXPORT
309 59 jimbo
int32 mdef_phone_str (mdef_t *m,                /**< In: Model structure being queried */
310 59 jimbo
                      s3pid_t pid,                /**< In: phone id being queried */
311 59 jimbo
                      char *buf                /**< Out: On return, buf has the string */
312 59 jimbo
    );
313 59 jimbo
314 59 jimbo
/**
315 59 jimbo
 * Obtain phone components: inverse of mdef_phone_id().
316 59 jimbo
 * @return 0 if successful, -1 otherwise.
317 59 jimbo
 */
318 59 jimbo
S3DECODER_EXPORT
319 59 jimbo
int32 mdef_phone_components (mdef_t *m,                /**< In: Model structure being queried */
320 59 jimbo
                             s3pid_t p,                /**< In: triphone id being queried */
321 59 jimbo
                             s3cipid_t *b,        /**< Out: base ciphone id */
322 59 jimbo
                             s3cipid_t *l,        /**< Out: left context ciphone id */
323 59 jimbo
                             s3cipid_t *r,        /**< Out: right context ciphone id */
324 59 jimbo
                             word_posn_t *pos        /**< Out: Word position */
325 59 jimbo
    );
326 59 jimbo
327 59 jimbo
/**
328 59 jimbo
 * Compare the underlying HMMs for two given phones (i.e., compare the two transition
329 59 jimbo
 * matrix IDs and the individual state(senone) IDs).
330 59 jimbo
 * @return 0 iff the HMMs are identical, -1 otherwise.
331 59 jimbo
 */
332 59 jimbo
int32 mdef_hmm_cmp (mdef_t *m,                        /**< In: Model being queried */
333 59 jimbo
                    s3pid_t p1, /**< In: One of the two triphones being compared */
334 59 jimbo
                    s3pid_t p2        /**< In: One of the two triphones being compared */
335 59 jimbo
    );
336 59 jimbo
337 59 jimbo
/**
338 59 jimbo
 * From the given array of active senone-sequence flags, mark the corresponding senones that
339 59 jimbo
 * are active.  Caller responsible for allocating sen[], and for clearing it, if necessary.
340 59 jimbo
 */
341 59 jimbo
void mdef_sseq2sen_active (mdef_t *mdef,        /**< In: The model definition */
342 59 jimbo
                           uint8 *sseq,                /**< In: sseq[ss] is != 0 iff senone-sequence ID
343 59 jimbo
                                                   ss is active */
344 59 jimbo
                           uint8 *sen                /**< In/Out: Set sen[s] to non-0 if so indicated
345 59 jimbo
                                                   by any active senone sequence */
346 59 jimbo
    );
347 59 jimbo
348 59 jimbo
/** For debugging: dump the mdef_t structure out. */
349 59 jimbo
void mdef_dump (FILE *fp,  /**< In: a file pointer */
350 59 jimbo
                mdef_t *m  /**< In: a model definition structure */
351 59 jimbo
    );
352 59 jimbo
353 59 jimbo
/** Report the model definition's parameters */
354 59 jimbo
void mdef_report(mdef_t *m /**<  In: model definition structure */
355 59 jimbo
    );
356 59 jimbo
357 59 jimbo
/** RAH, For freeing memory */
358 59 jimbo
void mdef_free_recursive_lc (ph_lc_t *lc /**< In: A list of left context */
359 59 jimbo
    );
360 59 jimbo
void mdef_free_recursive_rc (ph_rc_t *rc /**< In: A list of right context */
361 59 jimbo
    );
362 59 jimbo
363 59 jimbo
/** Free an mdef_t */
364 59 jimbo
S3DECODER_EXPORT
365 59 jimbo
void mdef_free (mdef_t *mdef /**< In : The model definition*/
366 59 jimbo
    );
367 59 jimbo
368 59 jimbo
369 59 jimbo
#if 0
370 59 jimbo
{ /* Stop indent from complaining */
371 59 jimbo
#endif
372 59 jimbo
#ifdef __cplusplus
373 59 jimbo
}
374 59 jimbo
#endif
375 59 jimbo
376 59 jimbo
#endif