SphinxBase 5prealpha
fsg_model.c
1/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/* ====================================================================
3 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 *
19 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
20 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
23 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 *
31 * ====================================================================
32 *
33 */
34
35#include <stdio.h>
36#include <string.h>
37#include <assert.h>
38
39/* SphinxBase headers. */
40#include "sphinxbase/err.h"
41#include "sphinxbase/pio.h"
44#include "sphinxbase/strfuncs.h"
46#include "sphinxbase/fsg_model.h"
47#include "sphinxbase/bitvec.h"
48
57 hash_table_t *null_trans; /* Null transitions keyed by state. */
58 hash_table_t *trans; /* Lists of non-null transitions keyed by state. */
59};
60
65 hash_iter_t *itor, *null_itor;
66 gnode_t *gn;
67};
68
69#define FSG_MODEL_BEGIN_DECL "FSG_BEGIN"
70#define FSG_MODEL_END_DECL "FSG_END"
71#define FSG_MODEL_N_DECL "N"
72#define FSG_MODEL_NUM_STATES_DECL "NUM_STATES"
73#define FSG_MODEL_S_DECL "S"
74#define FSG_MODEL_START_STATE_DECL "START_STATE"
75#define FSG_MODEL_F_DECL "F"
76#define FSG_MODEL_FINAL_STATE_DECL "FINAL_STATE"
77#define FSG_MODEL_T_DECL "T"
78#define FSG_MODEL_TRANSITION_DECL "TRANSITION"
79#define FSG_MODEL_COMMENT_CHAR '#'
80
81
82static int32
83nextline_str2words(FILE * fp, int32 * lineno,
84 char **lineptr, char ***wordptr)
85{
86 for (;;) {
87 size_t len;
88 int32 n;
89
90 ckd_free(*lineptr);
91 if ((*lineptr = fread_line(fp, &len)) == NULL)
92 return -1;
93
94 (*lineno)++;
95
96 if ((*lineptr)[0] == FSG_MODEL_COMMENT_CHAR)
97 continue; /* Skip comment lines */
98
99 n = str2words(*lineptr, NULL, 0);
100 if (n == 0)
101 continue; /* Skip blank lines */
102
103 /* Abuse of realloc(), but this doesn't have to be fast. */
104 if (*wordptr == NULL)
105 *wordptr = ckd_calloc(n, sizeof(**wordptr));
106 else
107 *wordptr = ckd_realloc(*wordptr, n * sizeof(**wordptr));
108 return str2words(*lineptr, *wordptr, n);
109 }
110}
111
112void
113fsg_model_trans_add(fsg_model_t * fsg,
114 int32 from, int32 to, int32 logp, int32 wid)
115{
116 fsg_link_t *link;
117 glist_t gl;
118 gnode_t *gn;
119
120 if (fsg->trans[from].trans == NULL)
121 fsg->trans[from].trans = hash_table_new(5, HASH_CASE_YES);
122
123 /* Check for duplicate link (i.e., link already exists with label=wid) */
124 for (gn = gl = fsg_model_trans(fsg, from, to); gn; gn = gnode_next(gn)) {
125 link = (fsg_link_t *) gnode_ptr(gn);
126 if (link->wid == wid) {
127 if (link->logs2prob < logp)
128 link->logs2prob = logp;
129 return;
130 }
131 }
132
133 /* Create transition object */
134 link = listelem_malloc(fsg->link_alloc);
135 link->from_state = from;
136 link->to_state = to;
137 link->logs2prob = logp;
138 link->wid = wid;
139
140 /* Add it to the list of transitions and update the hash table */
141 gl = glist_add_ptr(gl, (void *) link);
142 hash_table_replace_bkey(fsg->trans[from].trans,
143 (char const *) &link->to_state,
144 sizeof(link->to_state), gl);
145}
146
147int32
148fsg_model_tag_trans_add(fsg_model_t * fsg, int32 from, int32 to,
149 int32 logp, int32 wid)
150{
151 fsg_link_t *link, *link2;
152
153 /* Check for transition probability */
154 if (logp > 0) {
155 E_FATAL("Null transition prob must be <= 1.0 (state %d -> %d)\n",
156 from, to);
157 }
158
159 /* Self-loop null transitions (with prob <= 1.0) are redundant */
160 if (from == to)
161 return -1;
162
163 if (fsg->trans[from].null_trans == NULL)
164 fsg->trans[from].null_trans = hash_table_new(5, HASH_CASE_YES);
165
166 /* Check for a duplicate link; if found, keep the higher prob */
167 link = fsg_model_null_trans(fsg, from, to);
168 if (link) {
169 if (link->logs2prob < logp) {
170 link->logs2prob = logp;
171 return 0;
172 }
173 else
174 return -1;
175 }
176
177 /* Create null transition object */
178 link = listelem_malloc(fsg->link_alloc);
179 link->from_state = from;
180 link->to_state = to;
181 link->logs2prob = logp;
182 link->wid = -1;
183
184 link2 = (fsg_link_t *)
185 hash_table_enter_bkey(fsg->trans[from].null_trans,
186 (char const *) &link->to_state,
187 sizeof(link->to_state), link);
188 assert(link == link2);
189
190 return 1;
191}
192
193int32
194fsg_model_null_trans_add(fsg_model_t * fsg, int32 from, int32 to,
195 int32 logp)
196{
197 return fsg_model_tag_trans_add(fsg, from, to, logp, -1);
198}
199
201fsg_model_null_trans_closure(fsg_model_t * fsg, glist_t nulls)
202{
203 gnode_t *gn1;
204 int updated;
205 fsg_link_t *tl1, *tl2;
206 int32 k, n;
207
208 E_INFO("Computing transitive closure for null transitions\n");
209
210 /* If our caller didn't give us a list of null-transitions,
211 make such a list. Just loop through all the FSG states,
212 and all the null-transitions in that state (which are kept in
213 their own hash table). */
214 if (nulls == NULL) {
215 int i;
216 for (i = 0; i < fsg->n_state; ++i) {
217 hash_iter_t *itor;
218 hash_table_t *null_trans = fsg->trans[i].null_trans;
219 if (null_trans == NULL)
220 continue;
221 for (itor = hash_table_iter(null_trans);
222 itor != NULL; itor = hash_table_iter_next(itor)) {
223 nulls = glist_add_ptr(nulls, hash_entry_val(itor->ent));
224 }
225 }
226 }
227
228 /*
229 * Probably not the most efficient closure implementation, in general, but
230 * probably reasonably efficient for a sparse null transition matrix.
231 */
232 n = 0;
233 do {
234 updated = FALSE;
235
236 for (gn1 = nulls; gn1; gn1 = gnode_next(gn1)) {
237 hash_iter_t *itor;
238
239 tl1 = (fsg_link_t *) gnode_ptr(gn1);
240 assert(tl1->wid < 0);
241
242 if (fsg->trans[tl1->to_state].null_trans == NULL)
243 continue;
244
245 for (itor =
246 hash_table_iter(fsg->trans[tl1->to_state].null_trans);
247 itor; itor = hash_table_iter_next(itor)) {
248
249 tl2 = (fsg_link_t *) hash_entry_val(itor->ent);
250
251 k = fsg_model_null_trans_add(fsg,
252 tl1->from_state,
253 tl2->to_state,
254 tl1->logs2prob +
255 tl2->logs2prob);
256 if (k >= 0) {
257 updated = TRUE;
258 if (k > 0) {
259 nulls = glist_add_ptr(nulls, (void *)
260 fsg_model_null_trans
261 (fsg, tl1->from_state,
262 tl2->to_state));
263 n++;
264 }
265 }
266 }
267 }
268 } while (updated);
269
270 E_INFO("%d null transitions added\n", n);
271
272 return nulls;
273}
274
276fsg_model_trans(fsg_model_t * fsg, int32 i, int32 j)
277{
278 void *val;
279
280 if (fsg->trans[i].trans == NULL)
281 return NULL;
282 if (hash_table_lookup_bkey(fsg->trans[i].trans, (char const *) &j,
283 sizeof(j), &val) < 0)
284 return NULL;
285 return (glist_t) val;
286}
287
289fsg_model_null_trans(fsg_model_t * fsg, int32 i, int32 j)
290{
291 void *val;
292
293 if (fsg->trans[i].null_trans == NULL)
294 return NULL;
295 if (hash_table_lookup_bkey(fsg->trans[i].null_trans, (char const *) &j,
296 sizeof(j), &val) < 0)
297 return NULL;
298 return (fsg_link_t *) val;
299}
300
302fsg_model_arcs(fsg_model_t * fsg, int32 i)
303{
304 fsg_arciter_t *itor;
305
306 if (fsg->trans[i].trans == NULL && fsg->trans[i].null_trans == NULL)
307 return NULL;
308 itor = ckd_calloc(1, sizeof(*itor));
309 if (fsg->trans[i].null_trans)
310 itor->null_itor = hash_table_iter(fsg->trans[i].null_trans);
311 if (fsg->trans[i].trans)
312 itor->itor = hash_table_iter(fsg->trans[i].trans);
313 if (itor->itor != NULL)
314 itor->gn = hash_entry_val(itor->itor->ent);
315 return itor;
316}
317
319fsg_arciter_get(fsg_arciter_t * itor)
320{
321 /* Iterate over non-null arcs first. */
322 if (itor->gn)
323 return (fsg_link_t *) gnode_ptr(itor->gn);
324 else if (itor->null_itor)
325 return (fsg_link_t *) hash_entry_val(itor->null_itor->ent);
326 else
327 return NULL;
328}
329
331fsg_arciter_next(fsg_arciter_t * itor)
332{
333 /* Iterate over non-null arcs first. */
334 if (itor->gn) {
335 itor->gn = gnode_next(itor->gn);
336 /* Move to the next destination arc. */
337 if (itor->gn == NULL) {
338 itor->itor = hash_table_iter_next(itor->itor);
339 if (itor->itor != NULL)
340 itor->gn = hash_entry_val(itor->itor->ent);
341 else if (itor->null_itor == NULL)
342 goto stop_iteration;
343 }
344 }
345 else {
346 if (itor->null_itor == NULL)
347 goto stop_iteration;
348 itor->null_itor = hash_table_iter_next(itor->null_itor);
349 if (itor->null_itor == NULL)
350 goto stop_iteration;
351 }
352 return itor;
353 stop_iteration:
354 fsg_arciter_free(itor);
355 return NULL;
356
357}
358
359void
360fsg_arciter_free(fsg_arciter_t * itor)
361{
362 if (itor == NULL)
363 return;
364 hash_table_iter_free(itor->null_itor);
365 hash_table_iter_free(itor->itor);
366 ckd_free(itor);
367}
368
369int
370fsg_model_word_id(fsg_model_t * fsg, char const *word)
371{
372 int wid;
373
374 /* Search for an existing word matching this. */
375 for (wid = 0; wid < fsg->n_word; ++wid) {
376 if (0 == strcmp(fsg->vocab[wid], word))
377 break;
378 }
379 /* If not found, add this to the vocab. */
380 if (wid == fsg->n_word)
381 return -1;
382 return wid;
383}
384
385int
386fsg_model_word_add(fsg_model_t * fsg, char const *word)
387{
388 int wid, old_size;
389
390 /* Search for an existing word matching this. */
391 wid = fsg_model_word_id(fsg, word);
392 /* If not found, add this to the vocab. */
393 if (wid == -1) {
394 wid = fsg->n_word;
395 if (fsg->n_word == fsg->n_word_alloc) {
396 old_size = fsg->n_word_alloc;
397 fsg->n_word_alloc += 10;
398 fsg->vocab = ckd_realloc(fsg->vocab,
399 fsg->n_word_alloc *
400 sizeof(*fsg->vocab));
401 if (fsg->silwords)
402 fsg->silwords =
403 bitvec_realloc(fsg->silwords, old_size,
404 fsg->n_word_alloc);
405 if (fsg->altwords)
406 fsg->altwords =
407 bitvec_realloc(fsg->altwords, old_size,
408 fsg->n_word_alloc);
409 }
410 ++fsg->n_word;
411 fsg->vocab[wid] = ckd_salloc(word);
412 }
413 return wid;
414}
415
416int
417fsg_model_add_silence(fsg_model_t * fsg, char const *silword,
418 int state, float32 silprob)
419{
420 int32 logsilp;
421 int n_trans, silwid, src;
422
423 E_INFO("Adding silence transitions for %s to FSG\n", silword);
424
425 silwid = fsg_model_word_add(fsg, silword);
426 logsilp = (int32) (logmath_log(fsg->lmath, silprob) * fsg->lw);
427 if (fsg->silwords == NULL)
429 bitvec_set(fsg->silwords, silwid);
430
431 n_trans = 0;
432 if (state == -1) {
433 for (src = 0; src < fsg->n_state; src++) {
434 fsg_model_trans_add(fsg, src, src, logsilp, silwid);
435 ++n_trans;
436 }
437 }
438 else {
439 fsg_model_trans_add(fsg, state, state, logsilp, silwid);
440 ++n_trans;
441 }
442
443 E_INFO("Added %d silence word transitions\n", n_trans);
444 return n_trans;
445}
446
447int
448fsg_model_add_alt(fsg_model_t * fsg, char const *baseword,
449 char const *altword)
450{
451 int i, basewid, altwid;
452 int ntrans;
453
454 /* FIXME: This will get slow, eventually... */
455 for (basewid = 0; basewid < fsg->n_word; ++basewid)
456 if (0 == strcmp(fsg->vocab[basewid], baseword))
457 break;
458 if (basewid == fsg->n_word) {
459 E_ERROR("Base word %s not present in FSG vocabulary!\n", baseword);
460 return -1;
461 }
462 altwid = fsg_model_word_add(fsg, altword);
463 if (fsg->altwords == NULL)
465 bitvec_set(fsg->altwords, altwid);
466 if (fsg_model_is_filler(fsg, basewid)) {
467 if (fsg->silwords == NULL)
469 bitvec_set(fsg->silwords, altwid);
470 }
471
472 E_DEBUG(2, ("Adding alternate word transitions (%s,%s) to FSG\n",
473 baseword, altword));
474
475 /* Look for all transitions involving baseword and duplicate them. */
476 /* FIXME: This will also get slow, eventually... */
477 ntrans = 0;
478 for (i = 0; i < fsg->n_state; ++i) {
479 hash_iter_t *itor;
480 if (fsg->trans[i].trans == NULL)
481 continue;
482 for (itor = hash_table_iter(fsg->trans[i].trans); itor;
483 itor = hash_table_iter_next(itor)) {
484 glist_t trans;
485 gnode_t *gn;
486
487 trans = hash_entry_val(itor->ent);
488 for (gn = trans; gn; gn = gnode_next(gn)) {
489 fsg_link_t *fl = gnode_ptr(gn);
490 if (fl->wid == basewid) {
491 fsg_link_t *link;
492
493 /* Create transition object */
494 link = listelem_malloc(fsg->link_alloc);
495 link->from_state = fl->from_state;
496 link->to_state = fl->to_state;
497 link->logs2prob = fl->logs2prob; /* FIXME!!!??? */
498 link->wid = altwid;
499
500 trans = glist_add_ptr(trans, (void *) link);
501 ++ntrans;
502 }
503 }
504 hash_entry_val(itor->ent) = trans;
505 }
506 }
507
508 E_DEBUG(2, ("Added %d alternate word transitions\n", ntrans));
509 return ntrans;
510}
511
512
514fsg_model_init(char const *name, logmath_t * lmath, float32 lw,
515 int32 n_state)
516{
517 fsg_model_t *fsg;
518
519 /* Allocate basic stuff. */
520 fsg = ckd_calloc(1, sizeof(*fsg));
521 fsg->refcount = 1;
523 fsg->lmath = lmath;
524 fsg->name = name ? ckd_salloc(name) : NULL;
525 fsg->n_state = n_state;
526 fsg->lw = lw;
527
528 fsg->trans = ckd_calloc(fsg->n_state, sizeof(*fsg->trans));
529
530 return fsg;
531}
532
534fsg_model_read(FILE * fp, logmath_t * lmath, float32 lw)
535{
536 fsg_model_t *fsg;
537 hash_table_t *vocab;
538 hash_iter_t *itor;
539 int32 lastwid;
540 char **wordptr;
541 char *lineptr;
542 char *fsgname;
543 int32 lineno;
544 int32 n, i, j;
545 int n_state, n_trans, n_null_trans;
546 glist_t nulls;
547 float32 p;
548
549 lineno = 0;
550 vocab = hash_table_new(32, FALSE);
551 wordptr = NULL;
552 lineptr = NULL;
553 nulls = NULL;
554 fsgname = NULL;
555 fsg = NULL;
556
557 /* Scan upto FSG_BEGIN header */
558 for (;;) {
559 n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
560 if (n < 0) {
561 E_ERROR("%s declaration missing\n", FSG_MODEL_BEGIN_DECL);
562 goto parse_error;
563 }
564
565 if ((strcmp(wordptr[0], FSG_MODEL_BEGIN_DECL) == 0)) {
566 if (n > 2) {
567 E_ERROR("Line[%d]: malformed FSG_BEGIN declaration\n",
568 lineno);
569 goto parse_error;
570 }
571 break;
572 }
573 }
574 /* Save FSG name, or it will get clobbered below :(.
575 * If name is missing, try the default.
576 */
577 if (n == 2) {
578 fsgname = ckd_salloc(wordptr[1]);
579 }
580 else {
581 E_WARN("FSG name is missing\n");
582 fsgname = ckd_salloc("unknown");
583 }
584
585 /* Read #states */
586 n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
587 if ((n != 2)
588 || ((strcmp(wordptr[0], FSG_MODEL_N_DECL) != 0)
589 && (strcmp(wordptr[0], FSG_MODEL_NUM_STATES_DECL) != 0))
590 || (sscanf(wordptr[1], "%d", &n_state) != 1)
591 || (n_state <= 0)) {
592 E_ERROR
593 ("Line[%d]: #states declaration line missing or malformed\n",
594 lineno);
595 goto parse_error;
596 }
597
598 /* Now create the FSG. */
599 fsg = fsg_model_init(fsgname, lmath, lw, n_state);
600 ckd_free(fsgname);
601 fsgname = NULL;
602
603 /* Read start state */
604 n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
605 if ((n != 2)
606 || ((strcmp(wordptr[0], FSG_MODEL_S_DECL) != 0)
607 && (strcmp(wordptr[0], FSG_MODEL_START_STATE_DECL) != 0))
608 || (sscanf(wordptr[1], "%d", &(fsg->start_state)) != 1)
609 || (fsg->start_state < 0)
610 || (fsg->start_state >= fsg->n_state)) {
611 E_ERROR
612 ("Line[%d]: start state declaration line missing or malformed\n",
613 lineno);
614 goto parse_error;
615 }
616
617 /* Read final state */
618 n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
619 if ((n != 2)
620 || ((strcmp(wordptr[0], FSG_MODEL_F_DECL) != 0)
621 && (strcmp(wordptr[0], FSG_MODEL_FINAL_STATE_DECL) != 0))
622 || (sscanf(wordptr[1], "%d", &(fsg->final_state)) != 1)
623 || (fsg->final_state < 0)
624 || (fsg->final_state >= fsg->n_state)) {
625 E_ERROR
626 ("Line[%d]: final state declaration line missing or malformed\n",
627 lineno);
628 goto parse_error;
629 }
630
631 /* Read transitions */
632 lastwid = 0;
633 n_trans = n_null_trans = 0;
634 for (;;) {
635 int32 wid, tprob;
636
637 n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
638 if (n <= 0) {
639 E_ERROR("Line[%d]: transition or FSG_END statement expected\n",
640 lineno);
641 goto parse_error;
642 }
643
644 if ((strcmp(wordptr[0], FSG_MODEL_END_DECL) == 0)) {
645 break;
646 }
647
648 if ((strcmp(wordptr[0], FSG_MODEL_T_DECL) == 0)
649 || (strcmp(wordptr[0], FSG_MODEL_TRANSITION_DECL) == 0)) {
650
651
652 if (((n != 4) && (n != 5))
653 || (sscanf(wordptr[1], "%d", &i) != 1)
654 || (sscanf(wordptr[2], "%d", &j) != 1)
655 || (i < 0) || (i >= fsg->n_state)
656 || (j < 0) || (j >= fsg->n_state)) {
657 E_ERROR
658 ("Line[%d]: transition spec malformed; Expecting: from-state to-state trans-prob [word]\n",
659 lineno);
660 goto parse_error;
661 }
662
663 p = atof_c(wordptr[3]);
664 if ((p <= 0.0) || (p > 1.0)) {
665 E_ERROR
666 ("Line[%d]: transition spec malformed; Expecting float as transition probability\n",
667 lineno);
668 goto parse_error;
669 }
670 }
671 else {
672 E_ERROR("Line[%d]: transition or FSG_END statement expected\n",
673 lineno);
674 goto parse_error;
675 }
676
677 tprob = (int32) (logmath_log(lmath, p) * fsg->lw);
678 /* Add word to "dictionary". */
679 if (n > 4) {
680 if (hash_table_lookup_int32(vocab, wordptr[4], &wid) < 0) {
681 (void) hash_table_enter_int32(vocab,
682 ckd_salloc(wordptr[4]),
683 lastwid);
684 wid = lastwid;
685 ++lastwid;
686 }
687 fsg_model_trans_add(fsg, i, j, tprob, wid);
688 ++n_trans;
689 }
690 else {
691 if (fsg_model_null_trans_add(fsg, i, j, tprob) == 1) {
692 ++n_null_trans;
693 nulls =
694 glist_add_ptr(nulls, fsg_model_null_trans(fsg, i, j));
695 }
696 }
697 }
698
699 E_INFO("FSG: %d states, %d unique words, %d transitions (%d null)\n",
700 fsg->n_state, hash_table_inuse(vocab), n_trans, n_null_trans);
701
702
703 /* Now create a string table from the "dictionary" */
704 fsg->n_word = hash_table_inuse(vocab);
705 fsg->n_word_alloc = fsg->n_word + 10; /* Pad it a bit. */
706 fsg->vocab = ckd_calloc(fsg->n_word_alloc, sizeof(*fsg->vocab));
707 for (itor = hash_table_iter(vocab); itor;
708 itor = hash_table_iter_next(itor)) {
709 char const *word = hash_entry_key(itor->ent);
710 int32 wid = (int32) (long) hash_entry_val(itor->ent);
711 fsg->vocab[wid] = (char *) word;
712 }
713 hash_table_free(vocab);
714
715 /* Do transitive closure on null transitions */
716 nulls = fsg_model_null_trans_closure(fsg, nulls);
717 glist_free(nulls);
718
719 ckd_free(lineptr);
720 ckd_free(wordptr);
721
722 return fsg;
723
724 parse_error:
725 for (itor = hash_table_iter(vocab); itor;
726 itor = hash_table_iter_next(itor))
727 ckd_free((char *) hash_entry_key(itor->ent));
728 glist_free(nulls);
729 hash_table_free(vocab);
730 ckd_free(fsgname);
731 ckd_free(lineptr);
732 ckd_free(wordptr);
733 fsg_model_free(fsg);
734 return NULL;
735}
736
737
739fsg_model_readfile(const char *file, logmath_t * lmath, float32 lw)
740{
741 FILE *fp;
742 fsg_model_t *fsg;
743
744 if ((fp = fopen(file, "r")) == NULL) {
745 E_ERROR_SYSTEM("Failed to open FSG file '%s' for reading", file);
746 return NULL;
747 }
748 fsg = fsg_model_read(fp, lmath, lw);
749 fclose(fp);
750 return fsg;
751}
752
754fsg_model_retain(fsg_model_t * fsg)
755{
756 ++fsg->refcount;
757 return fsg;
758}
759
760static void
761trans_list_free(fsg_model_t * fsg, int32 i)
762{
763 hash_iter_t *itor;
764
765 /* FIXME (maybe): FSG links will all get freed when we call
766 * listelem_alloc_free() so don't bother freeing them explicitly
767 * here. */
768 if (fsg->trans[i].trans) {
769 for (itor = hash_table_iter(fsg->trans[i].trans);
770 itor; itor = hash_table_iter_next(itor)) {
771 glist_t gl = (glist_t) hash_entry_val(itor->ent);
772 glist_free(gl);
773 }
774 }
775 hash_table_free(fsg->trans[i].trans);
776 hash_table_free(fsg->trans[i].null_trans);
777}
778
779int
780fsg_model_free(fsg_model_t * fsg)
781{
782 int i;
783
784 if (fsg == NULL)
785 return 0;
786
787 if (--fsg->refcount > 0)
788 return fsg->refcount;
789
790 for (i = 0; i < fsg->n_word; ++i)
791 ckd_free(fsg->vocab[i]);
792 for (i = 0; i < fsg->n_state; ++i)
793 trans_list_free(fsg, i);
794 ckd_free(fsg->trans);
795 ckd_free(fsg->vocab);
797 bitvec_free(fsg->silwords);
798 bitvec_free(fsg->altwords);
799 ckd_free(fsg->name);
800 ckd_free(fsg);
801 return 0;
802}
803
804
805void
806fsg_model_write(fsg_model_t * fsg, FILE * fp)
807{
808 int32 i;
809
810 fprintf(fp, "%s %s\n", FSG_MODEL_BEGIN_DECL,
811 fsg->name ? fsg->name : "");
812 fprintf(fp, "%s %d\n", FSG_MODEL_NUM_STATES_DECL, fsg->n_state);
813 fprintf(fp, "%s %d\n", FSG_MODEL_START_STATE_DECL, fsg->start_state);
814 fprintf(fp, "%s %d\n", FSG_MODEL_FINAL_STATE_DECL, fsg->final_state);
815
816 for (i = 0; i < fsg->n_state; i++) {
817 fsg_arciter_t *itor;
818
819 for (itor = fsg_model_arcs(fsg, i); itor;
820 itor = fsg_arciter_next(itor)) {
821 fsg_link_t *tl = fsg_arciter_get(itor);
822
823 fprintf(fp, "%s %d %d %f %s\n", FSG_MODEL_TRANSITION_DECL,
824 tl->from_state, tl->to_state,
825 logmath_exp(fsg->lmath,
826 (int32) (tl->logs2prob / fsg->lw)),
827 (tl->wid < 0) ? "" : fsg_model_word_str(fsg, tl->wid));
828 }
829 }
830
831 fprintf(fp, "%s\n", FSG_MODEL_END_DECL);
832
833 fflush(fp);
834}
835
836void
837fsg_model_writefile(fsg_model_t * fsg, char const *file)
838{
839 FILE *fp;
840
841 assert(fsg);
842
843 E_INFO("Writing FSG file '%s'\n", file);
844
845 if ((fp = fopen(file, "w")) == NULL) {
846 E_ERROR_SYSTEM("Failed to open FSG file '%s' for reading", file);
847 return;
848 }
849
850 fsg_model_write(fsg, fp);
851
852 fclose(fp);
853}
854
855static void
856fsg_model_write_fsm_trans(fsg_model_t * fsg, int i, FILE * fp)
857{
858 fsg_arciter_t *itor;
859
860 for (itor = fsg_model_arcs(fsg, i); itor;
861 itor = fsg_arciter_next(itor)) {
862 fsg_link_t *tl = fsg_arciter_get(itor);
863 fprintf(fp, "%d %d %s %f\n",
864 tl->from_state, tl->to_state,
865 (tl->wid < 0) ? "<eps>" : fsg_model_word_str(fsg, tl->wid),
866 -logmath_log_to_ln(fsg->lmath, tl->logs2prob / fsg->lw));
867 }
868}
869
870void
871fsg_model_write_fsm(fsg_model_t * fsg, FILE * fp)
872{
873 int i;
874
875 /* Write transitions from initial state first. */
876 fsg_model_write_fsm_trans(fsg, fsg_model_start_state(fsg), fp);
877
878 /* Other states. */
879 for (i = 0; i < fsg->n_state; i++) {
880 if (i == fsg_model_start_state(fsg))
881 continue;
882 fsg_model_write_fsm_trans(fsg, i, fp);
883 }
884
885 /* Final state. */
886 fprintf(fp, "%d 0\n", fsg_model_final_state(fsg));
887
888 fflush(fp);
889}
890
891void
892fsg_model_writefile_fsm(fsg_model_t * fsg, char const *file)
893{
894 FILE *fp;
895
896 assert(fsg);
897
898 E_INFO("Writing FSM file '%s'\n", file);
899
900 if ((fp = fopen(file, "w")) == NULL) {
901 E_ERROR_SYSTEM("Failed to open fsm file '%s' for writing", file);
902 return;
903 }
904
905 fsg_model_write_fsm(fsg, fp);
906
907 fclose(fp);
908}
909
910void
911fsg_model_write_symtab(fsg_model_t * fsg, FILE * file)
912{
913 int i;
914
915 fprintf(file, "<eps> 0\n");
916 for (i = 0; i < fsg_model_n_word(fsg); ++i) {
917 fprintf(file, "%s %d\n", fsg_model_word_str(fsg, i), i + 1);
918 }
919 fflush(file);
920}
921
922void
923fsg_model_writefile_symtab(fsg_model_t * fsg, char const *file)
924{
925 FILE *fp;
926
927 assert(fsg);
928
929 E_INFO("Writing FSM symbol table '%s'\n", file);
930
931 if ((fp = fopen(file, "w")) == NULL) {
932 E_ERROR("Failed to open symbol table '%s' for writing", file);
933 return;
934 }
935
936 fsg_model_write_symtab(fsg, fp);
937
938 fclose(fp);
939}
An implementation of bit vectors.
#define bitvec_free(v)
Free a bit vector.
Definition: bitvec.h:87
SPHINXBASE_EXPORT bitvec_t * bitvec_realloc(bitvec_t *vec, size_t old_len, size_t new_len)
Resize a bit vector, clear the remaining bits.
Definition: bitvec.c:64
#define bitvec_alloc(n)
Allocate a bit vector, all bits are clear.
Definition: bitvec.h:75
#define bitvec_set(v, b)
Set the b-th bit of bit vector v.
Definition: bitvec.h:95
Sphinx's memory allocation/deallocation routines.
SPHINXBASE_EXPORT void ckd_free(void *ptr)
Test and free a 1-D array.
Definition: ckd_alloc.c:244
#define ckd_calloc(n, sz)
Macros to simplify the use of above functions.
Definition: ckd_alloc.h:248
#define ckd_salloc(ptr)
Macro for ckd_salloc
Definition: ckd_alloc.h:264
#define ckd_realloc(ptr, sz)
Macro for ckd_realloc
Definition: ckd_alloc.h:258
Implementation of logging routines.
#define E_ERROR(...)
Print error message to error log.
Definition: err.h:104
#define E_INFO(...)
Print logging information to standard error stream.
Definition: err.h:114
#define E_FATAL(...)
Exit with non-zero status after error message.
Definition: err.h:81
#define E_ERROR_SYSTEM(...)
Print error text; Call perror("");.
Definition: err.h:99
#define E_WARN(...)
Print warning message to error log.
Definition: err.h:109
#define E_DEBUG(level, x)
Print debugging information to standard error stream.
Definition: err.h:144
SPHINXBASE_EXPORT void glist_free(glist_t g)
Free the given generic list; user-defined data contained within is not automatically freed.
Definition: glist.c:133
SPHINXBASE_EXPORT glist_t glist_add_ptr(glist_t g, void *ptr)
Create and prepend a new list node, with the given user-defined data, at the HEAD of the given generi...
Definition: glist.c:74
#define gnode_ptr(g)
Head of a list of gnodes.
Definition: glist.h:109
Hash table implementation.
SPHINXBASE_EXPORT void hash_table_free(hash_table_t *h)
Free the specified hash table; the caller is responsible for freeing the key strings pointed to by th...
Definition: hash_table.c:688
SPHINXBASE_EXPORT hash_iter_t * hash_table_iter_next(hash_iter_t *itor)
Get the next key-value pair in iteration.
Definition: hash_table.c:656
#define hash_table_enter_int32(h, k, v)
Add a 32-bit integer value to a hash table.
Definition: hash_table.h:228
SPHINXBASE_EXPORT void * hash_table_enter_bkey(hash_table_t *h, const char *key, size_t len, void *val)
Like hash_table_enter, but with an explicitly specified key length, instead of a NULL-terminated,...
Definition: hash_table.c:535
SPHINXBASE_EXPORT void * hash_table_replace_bkey(hash_table_t *h, const char *key, size_t len, void *val)
Like hash_table_replace, but with an explicitly specified key length, instead of a NULL-terminated,...
Definition: hash_table.c:548
SPHINXBASE_EXPORT void hash_table_iter_free(hash_iter_t *itor)
Delete an unfinished iterator.
Definition: hash_table.c:682
SPHINXBASE_EXPORT int32 hash_table_lookup_bkey(hash_table_t *h, const char *key, size_t len, void **val)
Like hash_lookup, but with an explicitly specified key length, instead of a NULL-terminated,...
Definition: hash_table.c:337
#define hash_entry_val(e)
Access macros.
Definition: hash_table.h:175
SPHINXBASE_EXPORT int32 hash_table_lookup_int32(hash_table_t *h, const char *key, int32 *val)
Look up a 32-bit integer value in a hash table.
Definition: hash_table.c:322
SPHINXBASE_EXPORT hash_iter_t * hash_table_iter(hash_table_t *h)
Start iterating over key-value pairs in a hash table.
Definition: hash_table.c:646
SPHINXBASE_EXPORT hash_table_t * hash_table_new(int32 size, int32 casearg)
Allocate a new hash table for a given expected size.
Definition: hash_table.c:158
SPHINXBASE_EXPORT void listelem_alloc_free(listelem_alloc_t *le)
Finalize and release all memory associated with a list element allocator.
SPHINXBASE_EXPORT listelem_alloc_t * listelem_alloc_init(size_t elemsize)
Initialize and return a list element allocator.
#define listelem_malloc(le)
Allocate a list element and return pointer to it.
SPHINXBASE_EXPORT float64 logmath_log_to_ln(logmath_t *lmath, int logb_p)
Convert integer log in base B to natural log (in floating point).
Definition: logmath.c:468
SPHINXBASE_EXPORT float64 logmath_exp(logmath_t *lmath, int logb_p)
Convert integer log in base B to linear floating point.
Definition: logmath.c:456
SPHINXBASE_EXPORT int logmath_log(logmath_t *lmath, float64 p)
Convert linear floating point number to integer log in base B.
Definition: logmath.c:447
file IO related operations.
SPHINXBASE_EXPORT char * fread_line(FILE *stream, size_t *out_len)
Read a line of arbitrary length from a file and return it as a newly allocated string.
Definition: pio.c:377
Basic type definitions used in Sphinx.
Miscellaneous useful string functions.
SPHINXBASE_EXPORT int32 str2words(char *line, char **wptr, int32 n_wptr)
Convert a line to an array of "words", based on whitespace separators.
Definition: strfuncs.c:123
SPHINXBASE_EXPORT double atof_c(char const *str)
Locale independent version of atof().
Definition: strfuncs.c:55
Implementation of arc iterator.
Definition: fsg_model.c:64
Word level FSG definition.
Definition: fsg_model.h:99
int32 n_word_alloc
Number of words allocated in vocab.
Definition: fsg_model.h:103
int32 start_state
Must be in the range [0..n_state-1].
Definition: fsg_model.h:109
char ** vocab
Vocabulary for this FSG.
Definition: fsg_model.h:104
int32 n_state
number of states in FSG
Definition: fsg_model.h:108
int32 n_word
Number of unique words in this FSG.
Definition: fsg_model.h:102
logmath_t * lmath
Pointer to log math computation object.
Definition: fsg_model.h:107
char * name
A unique string identifier for this FSG.
Definition: fsg_model.h:101
bitvec_t * silwords
Indicates which words are silence/fillers.
Definition: fsg_model.h:105
listelem_alloc_t * link_alloc
Allocator for FSG links.
Definition: fsg_model.h:114
trans_list_t * trans
Transitions out of each state, if any.
Definition: fsg_model.h:113
int32 final_state
Must be in the range [0..n_state-1].
Definition: fsg_model.h:110
bitvec_t * altwords
Indicates which words are pronunciation alternates.
Definition: fsg_model.h:106
float32 lw
Language weight that's been applied to transition logprobs.
Definition: fsg_model.h:111
int refcount
Reference count.
Definition: fsg_model.h:100
A node in a generic list.
Definition: glist.h:100
hash_entry_t * ent
Current entry in that table.
Definition: hash_table.h:170
Adjacency list (opaque) for a state in an FSG.
Definition: fsg_model.c:56