PocketSphinx 5prealpha
allphone_search.c
1/* ====================================================================
2 * Copyright (c) 2014 Carnegie Mellon University. All rights
3 * reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 *
18 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
19 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
22 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * ====================================================================
31 *
32 */
33
34/*
35* allphone_search.c -- Search for phonetic decoding.
36*/
37
38#include <stdio.h>
39#include <string.h>
40#include <assert.h>
41
42#include <sphinxbase/err.h>
43#include <sphinxbase/ckd_alloc.h>
44#include <sphinxbase/strfuncs.h>
45#include <sphinxbase/pio.h>
46#include <sphinxbase/cmd_ln.h>
47
49#include "allphone_search.h"
50
51static ps_lattice_t *
52allphone_search_lattice(ps_search_t * search)
53{
54 return NULL;
55}
56
57static int
58allphone_search_prob(ps_search_t * search)
59{
60 return 0;
61}
62
63static void
64allphone_backtrace(allphone_search_t * allphs, int32 f, int32 *out_score);
65
66static void
67allphone_search_seg_free(ps_seg_t * seg)
68{
69 ckd_free(seg);
70}
71
72static void
73allphone_search_fill_iter(ps_seg_t *seg, phseg_t *phseg)
74{
75 seg->sf = phseg->sf;
76 seg->ef = phseg->ef;
77 seg->ascr = phseg->score;
78 seg->lscr = phseg->tscore;
79 seg->word = bin_mdef_ciphone_str(ps_search_acmod(seg->search)->mdef, phseg->ci);
80}
81
82static ps_seg_t *
83allphone_search_seg_next(ps_seg_t * seg)
84{
85 phseg_iter_t *itor = (phseg_iter_t *) seg;
86 phseg_t *phseg;
87
88 itor->seg = itor->seg->next;
89
90 if (itor->seg == NULL) {
91 allphone_search_seg_free(seg);
92 return NULL;
93 }
94 phseg = gnode_ptr(itor->seg);
95 allphone_search_fill_iter(seg, phseg);
96
97 return seg;
98}
99
100static ps_segfuncs_t fsg_segfuncs = {
101 /* seg_next */ allphone_search_seg_next,
102 /* seg_free */ allphone_search_seg_free
103};
104
105
106static ps_seg_t *
107allphone_search_seg_iter(ps_search_t * search)
108{
109 allphone_search_t *allphs = (allphone_search_t *) search;
110 phseg_iter_t *iter;
111
112 allphone_backtrace(allphs, allphs->frame - 1, NULL);
113 if (allphs->segments == NULL)
114 return NULL;
115
116 iter = ckd_calloc(1, sizeof(phseg_iter_t));
117
118 iter->base.vt = &fsg_segfuncs;
119 iter->base.search = search;
120 iter->seg = allphs->segments;
121 allphone_search_fill_iter((ps_seg_t *)iter, gnode_ptr(iter->seg));
122
123 return (ps_seg_t *) iter;
124}
125
126static ps_searchfuncs_t allphone_funcs = {
127 /* start: */ allphone_search_start,
128 /* step: */ allphone_search_step,
129 /* finish: */ allphone_search_finish,
130 /* reinit: */ allphone_search_reinit,
131 /* free: */ allphone_search_free,
132 /* lattice: */ allphone_search_lattice,
133 /* hyp: */ allphone_search_hyp,
134 /* prob: */ allphone_search_prob,
135 /* seg_iter: */ allphone_search_seg_iter,
136};
137
142static phmm_t *
143phmm_lookup(allphone_search_t * allphs, s3pid_t pid)
144{
145 phmm_t *p;
146 bin_mdef_t *mdef;
147 phmm_t **ci_phmm;
148
149 mdef = ((ps_search_t *) allphs)->acmod->mdef;
150 ci_phmm = allphs->ci_phmm;
151
152 for (p = ci_phmm[bin_mdef_pid2ci(mdef, pid)]; p; p = p->next) {
153 if (mdef_pid2tmatid(mdef, p->pid) == mdef_pid2tmatid(mdef, pid))
154 if (mdef_pid2ssid(mdef, p->pid) == mdef_pid2ssid(mdef, pid))
155 return p;
156 }
157
158 return NULL;
159}
160
161static int32
162phmm_link(allphone_search_t * allphs)
163{
164 s3cipid_t ci, rc;
165 phmm_t *p, *p2;
166 int32 *rclist;
167 int32 i, n_link;
168 plink_t *l;
169 bin_mdef_t *mdef;
170 phmm_t **ci_phmm;
171
172 mdef = ((ps_search_t *) allphs)->acmod->mdef;
173 ci_phmm = allphs->ci_phmm;
174
175 rclist = (int32 *) ckd_calloc(mdef->n_ciphone + 1, sizeof(int32));
176
177 /* Create successor links between PHMM nodes */
178 n_link = 0;
179 for (ci = 0; ci < mdef->n_ciphone; ci++) {
180 for (p = ci_phmm[ci]; p; p = p->next) {
181 /* Build rclist for p */
182 i = 0;
183 for (rc = 0; rc < mdef->n_ciphone; rc++) {
184 if (bitvec_is_set(p->rc, rc))
185 rclist[i++] = rc;
186 }
187 rclist[i] = BAD_S3CIPID;
188
189 /* For each rc in rclist, transition to PHMMs for rc if left context = ci */
190 for (i = 0; IS_S3CIPID(rclist[i]); i++) {
191 for (p2 = ci_phmm[rclist[i]]; p2; p2 = p2->next) {
192 if (bitvec_is_set(p2->lc, ci)) {
193 /* transition from p to p2 */
194 l = (plink_t *) ckd_calloc(1, sizeof(*l));
195 l->phmm = p2;
196 l->next = p->succlist;
197 p->succlist = l;
198
199 n_link++;
200 }
201 }
202 }
203 }
204 }
205
206 ckd_free(rclist);
207
208 return n_link;
209}
210
214static int
215phmm_build(allphone_search_t * allphs)
216{
217 phmm_t *p, **pid2phmm;
218 bin_mdef_t *mdef;
219 int32 lrc_size;
220 uint32 *lc, *rc;
221 s3pid_t pid;
222 s3cipid_t ci;
223 s3cipid_t *filler;
224 int n_phmm, n_link;
225 int i, nphone;
226
227 mdef = ((ps_search_t *) allphs)->acmod->mdef;
228 allphs->ci_phmm =
229 (phmm_t **) ckd_calloc(bin_mdef_n_ciphone(mdef), sizeof(phmm_t *));
230 pid2phmm =
231 (phmm_t **) ckd_calloc(bin_mdef_n_phone(mdef), sizeof(phmm_t *));
232
233 /* For each unique ciphone/triphone entry in mdef, create a PHMM node */
234 n_phmm = 0;
235 nphone = allphs->ci_only ? bin_mdef_n_ciphone(mdef) : bin_mdef_n_phone(mdef);
236 E_INFO("Building PHMM net of %d phones\n", nphone);
237 for (pid = 0; pid < nphone; pid++) {
238 if ((p = phmm_lookup(allphs, pid)) == NULL) {
239 /* not found, should be created */
240 p = (phmm_t *) ckd_calloc(1, sizeof(*p));
241 hmm_init(allphs->hmmctx, &(p->hmm), FALSE,
242 mdef_pid2ssid(mdef, pid), mdef->phone[pid].tmat);
243 p->pid = pid;
244 p->ci = bin_mdef_pid2ci(mdef, pid);
245 p->succlist = NULL;
246 p->next = allphs->ci_phmm[p->ci];
247 allphs->ci_phmm[p->ci] = p;
248 n_phmm++;
249 }
250 pid2phmm[pid] = p;
251 }
252
253 /* Fill out bitvecs of each PHMM node, alloc continuous memory chunk for context bitvectors */
254 lrc_size = bitvec_size(bin_mdef_n_ciphone(mdef));
255 lc = ckd_calloc(n_phmm * 2 * lrc_size, sizeof(bitvec_t));
256 rc = lc + (n_phmm * lrc_size);
257 for (ci = 0; ci < mdef->n_ciphone; ci++) {
258 for (p = allphs->ci_phmm[ci]; p; p = p->next) {
259 p->lc = lc;
260 lc += lrc_size;
261 p->rc = rc;
262 rc += lrc_size;
263 }
264 }
265
266 /* Fill out lc and rc bitmaps (remember to map all fillers to each other!!) */
267 filler =
268 (s3cipid_t *) ckd_calloc(bin_mdef_n_ciphone(mdef) + 1,
269 sizeof(s3cipid_t));
270
271 /* Connect fillers */
272 i = 0;
273 for (ci = 0; ci < bin_mdef_n_ciphone(mdef); ci++) {
274 p = pid2phmm[ci];
275 bitvec_set_all(p->lc, bin_mdef_n_ciphone(mdef));
276 bitvec_set_all(p->rc, bin_mdef_n_ciphone(mdef));
277 if (mdef->phone[ci].info.ci.filler) {
278 filler[i++] = ci;
279 }
280 }
281 filler[i] = BAD_S3CIPID;
282
283
284 /* Loop over cdphones only if ci_only is not set */
285 for (pid = bin_mdef_n_ciphone(mdef); pid < nphone;
286 pid++) {
287 p = pid2phmm[pid];
288
289 if (mdef->phone[mdef->phone[pid].info.cd.ctx[1]].info.ci.filler) {
290 for (i = 0; IS_S3CIPID(filler[i]); i++)
291 bitvec_set(p->lc, filler[i]);
292 }
293 else
294 bitvec_set(p->lc, mdef->phone[pid].info.cd.ctx[1]);
295
296 if (mdef->phone[mdef->phone[pid].info.cd.ctx[2]].info.ci.filler) {
297 for (i = 0; IS_S3CIPID(filler[i]); i++)
298 bitvec_set(p->rc, filler[i]);
299 }
300 else
301 bitvec_set(p->rc, mdef->phone[pid].info.cd.ctx[2]);
302 }
303 ckd_free(pid2phmm);
304 ckd_free(filler);
305
306 /* Create links between PHMM nodes */
307 n_link = phmm_link(allphs);
308
309 E_INFO("%d nodes, %d links\n", n_phmm, n_link);
310 return 0;
311}
312
313static void
314phmm_free(allphone_search_t * allphs)
315{
316 s3cipid_t ci;
317 bin_mdef_t *mdef;
318
319 if (!allphs->ci_phmm)
320 return;
321 ckd_free(allphs->ci_phmm[0]->lc);
322 mdef = ((ps_search_t *) allphs)->acmod->mdef;
323 for (ci = 0; ci < mdef_n_ciphone(mdef); ++ci) {
324 phmm_t *p, *next;
325
326 for (p = allphs->ci_phmm[ci]; p; p = next) {
327 plink_t *l, *lnext;
328
329 next = p->next;
330 for (l = p->succlist; l; l = lnext) {
331 lnext = l->next;
332 ckd_free(l);
333 }
334 hmm_deinit(&(p->hmm));
335 ckd_free(p);
336 }
337 }
338 ckd_free(allphs->ci_phmm);
339}
340
342static int32
343phmm_eval_all(allphone_search_t * allphs, const int16 * senscr)
344{
345 s3cipid_t ci;
346 phmm_t *p;
347 int32 best;
348 bin_mdef_t *mdef;
349 phmm_t **ci_phmm;
350
351 mdef = ((ps_search_t *) allphs)->acmod->mdef;
352 ci_phmm = allphs->ci_phmm;
353
354 best = WORST_SCORE;
355
356 hmm_context_set_senscore(allphs->hmmctx, senscr);
357 for (ci = 0; ci < mdef->n_ciphone; ci++) {
358 for (p = ci_phmm[(unsigned) ci]; p; p = p->next) {
359 if (hmm_frame(&(p->hmm)) == allphs->frame) {
360 int32 score;
361 allphs->n_hmm_eval++;
362 score = hmm_vit_eval((hmm_t *) p);
363 if (score > best)
364 best = score;
365 }
366 }
367 }
368
369 return best;
370}
371
372static void
373phmm_exit(allphone_search_t * allphs, int32 best)
374{
375 s3cipid_t ci;
376 phmm_t *p;
377 int32 th, nf;
378 history_t *h;
379 blkarray_list_t *history;
380 bin_mdef_t *mdef;
381 int32 curfrm;
382 phmm_t **ci_phmm;
383 int32 *ci2lmwid;
384
385 th = best + allphs->pbeam;
386
387 history = allphs->history;
388 mdef = ps_search_acmod(allphs)->mdef;
389 curfrm = allphs->frame;
390 ci_phmm = allphs->ci_phmm;
391 ci2lmwid = allphs->ci2lmwid;
392
393 nf = curfrm + 1;
394
395 for (ci = 0; ci < mdef->n_ciphone; ci++) {
396 for (p = ci_phmm[(unsigned) ci]; p; p = p->next) {
397 if (hmm_frame(&(p->hmm)) == curfrm) {
398
399 if (hmm_bestscore(&(p->hmm)) >= th) {
400
401 h = (history_t *) ckd_calloc(1, sizeof(*h));
402 h->ef = curfrm;
403 h->phmm = p;
404 h->hist = hmm_out_history(&(p->hmm));
405 h->score = hmm_out_score(&(p->hmm));
406
407 if (!allphs->lm) {
408 h->tscore = allphs->inspen;
409 }
410 else {
411 if (h->hist > 0) {
412 int32 n_used;
413 history_t *pred =
414 blkarray_list_get(history, h->hist);
415
416 if (pred->hist > 0) {
417 history_t *pred_pred =
418 blkarray_list_get(history,
419 h->hist);
420 h->tscore =
421 ngram_tg_score(allphs->lm,
422 ci2lmwid
423 [pred_pred->phmm->ci],
424 ci2lmwid[pred->
425 phmm->ci],
426 ci2lmwid[p->ci],
427 &n_used) >>
429 }
430 else {
431 h->tscore =
432 ngram_bg_score(allphs->lm,
433 ci2lmwid
434 [pred->phmm->ci],
435 ci2lmwid[p->ci],
436 &n_used) >>
438 }
439 }
440 else {
441 /*
442 * This is the beginning SIL and in srch_allphone_begin()
443 * it's inscore is set to 0.
444 */
445 h->tscore = 0;
446 }
447 }
448
449 blkarray_list_append(history, h);
450
451 /* Mark PHMM active in next frame */
452 hmm_frame(&(p->hmm)) = nf;
453 }
454 else {
455 /* Reset state scores */
456 hmm_clear(&(p->hmm));
457 }
458 }
459 }
460 }
461}
462
463static void
464phmm_trans(allphone_search_t * allphs, int32 best,
465 int32 frame_history_start)
466{
467 history_t *h;
468 phmm_t *from, *to;
469 plink_t *l;
470 int32 newscore, nf, curfrm;
471 int32 *ci2lmwid;
472 int32 hist_idx;
473
474 curfrm = allphs->frame;
475 nf = curfrm + 1;
476 ci2lmwid = allphs->ci2lmwid;
477
478 /* Transition from exited nodes to initial states of HMMs */
479 for (hist_idx = frame_history_start;
480 hist_idx < blkarray_list_n_valid(allphs->history); hist_idx++) {
481 h = blkarray_list_get(allphs->history, hist_idx);
482 from = h->phmm;
483 for (l = from->succlist; l; l = l->next) {
484 int32 tscore;
485 to = l->phmm;
486
487 /* No LM, just use uniform (insertion penalty). */
488 if (!allphs->lm)
489 tscore = allphs->inspen;
490 else {
491 int32 n_used;
492 if (h->hist > 0) {
493 history_t *pred =
494 blkarray_list_get(allphs->history, h->hist);
495 tscore =
496 ngram_tg_score(allphs->lm,
497 ci2lmwid[pred->phmm->ci],
498 ci2lmwid[from->ci],
499 ci2lmwid[to->ci],
500 &n_used) >> SENSCR_SHIFT;
501 }
502 else {
503 tscore = ngram_bg_score(allphs->lm,
504 ci2lmwid[from->ci],
505 ci2lmwid[to->ci],
506 &n_used) >> SENSCR_SHIFT;
507 }
508 }
509
510 newscore = h->score + tscore;
511 if ((newscore > best + allphs->beam)
512 && (newscore > hmm_in_score(&(to->hmm)))) {
513 hmm_enter(&(to->hmm), newscore, hist_idx, nf);
514 }
515 }
516 }
517}
518
520allphone_search_init(const char *name,
521 ngram_model_t * lm,
522 cmd_ln_t * config,
523 acmod_t * acmod, dict_t * dict, dict2pid_t * d2p)
524{
525 int i;
526 bin_mdef_t *mdef;
527 allphone_search_t *allphs;
528
529 allphs = (allphone_search_t *) ckd_calloc(1, sizeof(*allphs));
530 ps_search_init(ps_search_base(allphs), &allphone_funcs, PS_SEARCH_TYPE_ALLPHONE, name, config, acmod,
531 dict, d2p);
532 mdef = acmod->mdef;
533
534 allphs->hmmctx = hmm_context_init(bin_mdef_n_emit_state(mdef),
535 acmod->tmat->tp, NULL, mdef->sseq);
536 if (allphs->hmmctx == NULL) {
537 ps_search_free(ps_search_base(allphs));
538 return NULL;
539 }
540
541 allphs->ci_only = cmd_ln_boolean_r(config, "-allphone_ci");
542 allphs->lw = cmd_ln_float32_r(config, "-lw");
543
544 phmm_build(allphs);
545
546 if (lm) {
547 int32 silwid;
548
549 allphs->lm = ngram_model_retain(lm);
550
551 silwid = ngram_wid(allphs->lm, bin_mdef_ciphone_str(mdef,
552 mdef_silphone
553 (mdef)));
554 if (silwid == ngram_unknown_wid(allphs->lm)) {
555 E_ERROR("Phonetic LM does not have SIL phone in vocabulary\n");
556 allphone_search_free((ps_search_t *) allphs);
557 return NULL;
558 }
559
560 allphs->ci2lmwid =
561 (int32 *) ckd_calloc(mdef->n_ciphone,
562 sizeof(*allphs->ci2lmwid));
563 for (i = 0; i < mdef->n_ciphone; i++) {
564 allphs->ci2lmwid[i] =
565 ngram_wid(allphs->lm,
566 (char *) bin_mdef_ciphone_str(mdef, i));
567 /* Map filler phones and other missing phones to silence if not found */
568 if (allphs->ci2lmwid[i] == ngram_unknown_wid(allphs->lm))
569 allphs->ci2lmwid[i] = silwid;
570 }
571 }
572 else {
573 E_WARN
574 ("Failed to load language model specified in -allphone, doing unconstrained phone-loop decoding\n");
575 allphs->inspen =
576 (int32) (logmath_log
577 (acmod->lmath, cmd_ln_float32_r(config, "-pip"))
578 * allphs->lw) >> SENSCR_SHIFT;
579 }
580
581 allphs->n_tot_frame = 0;
582 allphs->frame = -1;
583 allphs->segments = NULL;
584
585 /* Get search pruning parameters */
586 allphs->beam
587 =
588 (int32) logmath_log(acmod->lmath,
589 cmd_ln_float64_r(config, "-beam"))
590 >> SENSCR_SHIFT;
591 allphs->pbeam
592 =
593 (int32) logmath_log(acmod->lmath,
594 cmd_ln_float64_r(config, "-pbeam"))
595 >> SENSCR_SHIFT;
596
597 /* LM related weights/penalties */
598 allphs->history = blkarray_list_init();
599
600 /* Acoustic score scale for posterior probabilities. */
601 allphs->ascale = 1.0 / cmd_ln_float32_r(config, "-ascale");
602
603 E_INFO("Allphone(beam: %d, pbeam: %d)\n", allphs->beam, allphs->pbeam);
604
605 ptmr_init(&allphs->perf);
606
607 return (ps_search_t *) allphs;
608}
609
610int
611allphone_search_reinit(ps_search_t * search, dict_t * dict,
612 dict2pid_t * d2p)
613{
614 allphone_search_t *allphs = (allphone_search_t *) search;
615
616 /* Free old dict2pid, dict */
617 ps_search_base_reinit(search, dict, d2p);
618
619 if (!allphs->lm) {
620 E_WARN
621 ("-lm argument missing; doing unconstrained phone-loop decoding\n");
622 allphs->inspen =
623 (int32) (logmath_log
624 (search->acmod->lmath,
625 cmd_ln_float32_r(search->config,
626 "-pip")) *
627 allphs->lw) >> SENSCR_SHIFT;
628 }
629
630 return 0;
631}
632
633void
634allphone_search_free(ps_search_t * search)
635{
636 allphone_search_t *allphs = (allphone_search_t *) search;
637
638
639 double n_speech = (double)allphs->n_tot_frame
640 / cmd_ln_int32_r(ps_search_config(allphs), "-frate");
641
642 E_INFO("TOTAL allphone %.2f CPU %.3f xRT\n",
643 allphs->perf.t_tot_cpu,
644 allphs->perf.t_tot_cpu / n_speech);
645 E_INFO("TOTAL allphone %.2f wall %.3f xRT\n",
646 allphs->perf.t_tot_elapsed,
647 allphs->perf.t_tot_elapsed / n_speech);
648
649 ps_search_base_free(search);
650
651 hmm_context_free(allphs->hmmctx);
652 phmm_free(allphs);
653 if (allphs->lm)
654 ngram_model_free(allphs->lm);
655 if (allphs->ci2lmwid)
656 ckd_free(allphs->ci2lmwid);
657 if (allphs->history)
658 blkarray_list_free(allphs->history);
659
660 ckd_free(allphs);
661}
662
663int
664allphone_search_start(ps_search_t * search)
665{
666 allphone_search_t *allphs;
667 bin_mdef_t *mdef;
668 s3cipid_t ci;
669 phmm_t *p;
670
671 allphs = (allphone_search_t *) search;
672 mdef = search->acmod->mdef;
673
674 /* Reset all HMMs. */
675 for (ci = 0; ci < bin_mdef_n_ciphone(mdef); ci++) {
676 for (p = allphs->ci_phmm[(unsigned) ci]; p; p = p->next) {
677 hmm_clear(&(p->hmm));
678 }
679 }
680
681 allphs->n_hmm_eval = 0;
682 allphs->n_sen_eval = 0;
683
684 /* Free history nodes, if any */
685 blkarray_list_reset(allphs->history);
686
687 /* Initialize start state of the SILENCE PHMM */
688 allphs->frame = 0;
689 ci = bin_mdef_silphone(mdef);
690 if (NOT_S3CIPID(ci))
691 E_FATAL("Cannot find CI-phone %s\n", S3_SILENCE_CIPHONE);
692 for (p = allphs->ci_phmm[ci]; p && (p->pid != ci); p = p->next);
693 if (!p)
694 E_FATAL("Cannot find HMM for %s\n", S3_SILENCE_CIPHONE);
695 hmm_enter(&(p->hmm), 0, 0, allphs->frame);
696
697 ptmr_reset(&allphs->perf);
698 ptmr_start(&allphs->perf);
699
700 return 0;
701}
702
703static void
704allphone_search_sen_active(allphone_search_t * allphs)
705{
706 acmod_t *acmod;
707 bin_mdef_t *mdef;
708 phmm_t *p;
709 int32 ci;
710
711 acmod = ps_search_acmod(allphs);
712 mdef = acmod->mdef;
713
714 acmod_clear_active(acmod);
715 for (ci = 0; ci < bin_mdef_n_ciphone(mdef); ci++)
716 for (p = allphs->ci_phmm[ci]; p; p = p->next)
717 if (hmm_frame(&(p->hmm)) == allphs->frame)
718 acmod_activate_hmm(acmod, &(p->hmm));
719}
720
721int
722allphone_search_step(ps_search_t * search, int frame_idx)
723{
724 int32 bestscr, frame_history_start;
725 const int16 *senscr;
726 allphone_search_t *allphs = (allphone_search_t *) search;
727 acmod_t *acmod = search->acmod;
728
729 if (!acmod->compallsen)
730 allphone_search_sen_active(allphs);
731 senscr = acmod_score(acmod, &frame_idx);
732 allphs->n_sen_eval += acmod->n_senone_active;
733 bestscr = phmm_eval_all(allphs, senscr);
734
735 frame_history_start = blkarray_list_n_valid(allphs->history);
736 phmm_exit(allphs, bestscr);
737 phmm_trans(allphs, bestscr, frame_history_start);
738
739 allphs->frame++;
740
741 return 0;
742}
743
744static int32
745ascore(allphone_search_t * allphs, history_t * h)
746{
747 int32 score = h->score;
748
749 if (h->hist > 0) {
750 history_t *pred = blkarray_list_get(allphs->history, h->hist);
751 score -= pred->score;
752 }
753
754 return score - h->tscore;
755}
756
757static void
758allphone_clear_segments(allphone_search_t * allphs)
759{
760 gnode_t *gn;
761 for (gn = allphs->segments; gn; gn = gn->next) {
762 ckd_free(gnode_ptr(gn));
763 }
764 glist_free(allphs->segments);
765 allphs->segments = NULL;
766}
767
768static void
769allphone_backtrace(allphone_search_t * allphs, int32 f, int32 *out_score)
770{
771 int32 best, hist_idx, best_idx;
772 int32 frm, last_frm;
773 history_t *h;
774 phseg_t *s;
775
776 /* Clear old list */
777 allphone_clear_segments(allphs);
778
779 frm = last_frm = f;
780 /* Find the first history entry for the requested frame */
781 hist_idx = blkarray_list_n_valid(allphs->history) - 1;
782 while (hist_idx > 0) {
783 h = blkarray_list_get(allphs->history, hist_idx);
784 if (h->ef <= f) {
785 frm = last_frm = h->ef;
786 break;
787 }
788 hist_idx--;
789 }
790
791 if (hist_idx < 0)
792 return;
793
794 /* Find bestscore */
795 best = (int32) 0x80000000;
796 best_idx = -1;
797 while (frm == last_frm && hist_idx > 0) {
798 h = blkarray_list_get(allphs->history, hist_idx);
799 frm = h->ef;
800 if (h->score > best && frm == last_frm) {
801 best = h->score;
802 best_idx = hist_idx;
803 }
804 hist_idx--;
805 }
806
807 if (best_idx < 0)
808 return;
809
810 if (out_score)
811 *out_score = best;
812
813 /* Backtrace */
814 while (best_idx > 0) {
815 h = blkarray_list_get(allphs->history, best_idx);
816 s = (phseg_t *) ckd_calloc(1, sizeof(phseg_t));
817 s->ci = h->phmm->ci;
818 s->sf =
819 (h->hist >
820 0) ? ((history_t *) blkarray_list_get(allphs->history,
821 h->hist))->ef + 1 : 0;
822 s->ef = h->ef;
823 s->score = ascore(allphs, h);
824 s->tscore = h->tscore;
825 allphs->segments = glist_add_ptr(allphs->segments, s);
826
827 best_idx = h->hist;
828 }
829
830 return;
831}
832
833int
834allphone_search_finish(ps_search_t * search)
835{
836 allphone_search_t *allphs;
837 int32 cf, n_hist;
838
839 allphs = (allphone_search_t *) search;
840
841 allphs->n_tot_frame += allphs->frame;
842 n_hist = blkarray_list_n_valid(allphs->history);
843 E_INFO
844 ("%d frames, %d HMMs (%d/fr), %d senones (%d/fr), %d history entries (%d/fr)\n",
845 allphs->frame, allphs->n_hmm_eval,
846 (allphs->frame > 0) ? allphs->n_hmm_eval / allphs->frame : 0,
847 allphs->n_sen_eval,
848 (allphs->frame > 0) ? allphs->n_sen_eval / allphs->frame : 0,
849 n_hist, (allphs->frame > 0) ? n_hist / allphs->frame : 0);
850
851 /* Now backtrace. */
852 allphone_backtrace(allphs, allphs->frame - 1, NULL);
853
854 /* Print out some statistics. */
855 ptmr_stop(&allphs->perf);
856 /* This is the number of frames processed. */
857 cf = ps_search_acmod(allphs)->output_frame;
858 if (cf > 0) {
859 double n_speech = (double) (cf + 1)
860 / cmd_ln_int32_r(ps_search_config(allphs), "-frate");
861 E_INFO("allphone %.2f CPU %.3f xRT\n",
862 allphs->perf.t_cpu, allphs->perf.t_cpu / n_speech);
863 E_INFO("allphone %.2f wall %.3f xRT\n",
864 allphs->perf.t_elapsed, allphs->perf.t_elapsed / n_speech);
865 }
866
867
868 return 0;
869}
870
871char const *
872allphone_search_hyp(ps_search_t * search, int32 * out_score)
873{
874 allphone_search_t *allphs;
875 phseg_t *p;
876 gnode_t *gn;
877 const char *phone_str;
878 bin_mdef_t *mdef;
879 int len, hyp_idx, phone_idx;
880
881 allphs = (allphone_search_t *) search;
882 mdef = search->acmod->mdef;
883
884 /* Create hypothesis */
885 if (search->hyp_str)
886 ckd_free(search->hyp_str);
887 search->hyp_str = NULL;
888
889 allphone_backtrace(allphs, allphs->frame - 1, out_score);
890 if (allphs->segments == NULL) {
891 return NULL;
892 }
893
894 len = glist_count(allphs->segments) * 10; /* maximum length of one phone with spacebar */
895
896 search->hyp_str = (char *) ckd_calloc(len, sizeof(*search->hyp_str));
897 hyp_idx = 0;
898 for (gn = allphs->segments; gn; gn = gn->next) {
899 p = gnode_ptr(gn);
900 phone_str = bin_mdef_ciphone_str(mdef, p->ci);
901 phone_idx = 0;
902 while (phone_str[phone_idx] != '\0')
903 search->hyp_str[hyp_idx++] = phone_str[phone_idx++];
904 search->hyp_str[hyp_idx++] = ' ';
905 }
906 search->hyp_str[--hyp_idx] = '\0';
907 E_INFO("Hyp: %s\n", search->hyp_str);
908 return search->hyp_str;
909}
void acmod_activate_hmm(acmod_t *acmod, hmm_t *hmm)
Activate senones associated with an HMM.
Definition: acmod.c:1213
int16 const * acmod_score(acmod_t *acmod, int *inout_frame_idx)
Score one frame of data.
Definition: acmod.c:1106
void acmod_clear_active(acmod_t *acmod)
Clear set of active senones.
Definition: acmod.c:1197
const char * bin_mdef_ciphone_str(bin_mdef_t *m, int32 ci)
In: ciphone id for which name wanted.
Definition: bin_mdef.c:737
int32 hmm_vit_eval(hmm_t *hmm)
Viterbi evaluation of given HMM.
Definition: hmm.c:789
#define hmm_context_set_senscore(ctx, senscr)
Change the senone score array for a context.
Definition: hmm.h:227
void hmm_enter(hmm_t *h, int32 score, int32 histid, int frame)
Enter an HMM with the given path score and history ID.
Definition: hmm.c:201
void hmm_deinit(hmm_t *hmm)
Free an HMM structure, releasing internal data (but not the HMM structure itself).
Definition: hmm.c:111
#define WORST_SCORE
Large "bad" score.
Definition: hmm.h:84
void hmm_init(hmm_context_t *ctx, hmm_t *hmm, int mpx, int ssid, int tmatid)
Populate a previously-allocated HMM structure, allocating internal data.
Definition: hmm.c:89
void hmm_context_free(hmm_context_t *ctx)
Free an HMM context.
Definition: hmm.c:80
hmm_context_t * hmm_context_init(int32 n_emit_state, uint8 **const *tp, int16 const *senscore, uint16 *const *sseq)
Create an HMM context.
Definition: hmm.c:56
void hmm_clear(hmm_t *h)
Reset the states of the HMM to the invalid condition.
Definition: hmm.c:183
#define SENSCR_SHIFT
Shift count for senone scores.
Definition: hmm.h:73
#define S3_SILENCE_CIPHONE
Hard-coded silence CI phone name.
Definition: mdef.h:81
Internal implementation of PocketSphinx decoder.
void ps_search_base_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
Re-initialize base structure with new dictionary.
void ps_search_base_free(ps_search_t *search)
Free search.
void ps_search_init(ps_search_t *search, ps_searchfuncs_t *vt, const char *type, const char *name, cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize base structure.
int16 s3cipid_t
Size definitions for more semantially meaningful units.
Definition: s3types.h:63
#define BAD_S3CIPID
Ci phone id.
Definition: s3types.h:64
Acoustic model structure.
Definition: acmod.h:148
bin_mdef_t * mdef
Model definition.
Definition: acmod.h:159
int n_senone_active
Number of active GMMs.
Definition: acmod.h:169
logmath_t * lmath
Log-math computation.
Definition: acmod.h:151
tmat_t * tmat
Transition matrices.
Definition: acmod.h:160
uint8 compallsen
Compute all senones?
Definition: acmod.h:188
Implementation of allphone search structure.
int32 n_hmm_eval
Total HMMs evaluated this utt.
phmm_t ** ci_phmm
PHMM lists (for each CI phone)
frame_idx_t frame
Current frame.
int32 n_sen_eval
Total senones evaluated this utt.
blkarray_list_t * history
List of history nodes allocated in each frame.
int32 n_tot_frame
Total number of frames processed.
int32 * ci2lmwid
Mapping of CI phones to LM word IDs.
float32 ascale
Acoustic score scale for posterior probabilities.
hmm_context_t * hmmctx
HMM context.
int32 ci_only
Use context-independent phones for decoding.
ptmr_t perf
Performance counter.
ngram_model_t * lm
Ngram model set.
int32 inspen
Language weights.
int32 pbeam
Effective beams after applying beam_factor.
mdef_entry_t * phone
All phone structures.
Definition: bin_mdef.h:133
int32 n_ciphone
Number of base (CI) phones.
Definition: bin_mdef.h:119
uint16 ** sseq
Unique senone sequences (2D array built at load time)
Definition: bin_mdef.h:134
Building composite triphone (as well as word internal triphones) with the dictionary.
Definition: dict2pid.h:84
a structure for a dictionary.
Definition: dict.h:76
History (paths) information at any point in allphone Viterbi search.
int32 tscore
Transition score for this path.
int32 hist
Previous history entry.
phmm_t * phmm
PHMM ending this path.
frame_idx_t ef
End frame.
int32 score
Path score for this path.
An individual HMM among the HMM search space.
uint8 ctx[3]
quintphones will require hacking
Definition: bin_mdef.h:86
int32 tmat
Transition matrix ID.
Definition: bin_mdef.h:75
struct mdef_entry_s::@0::@1 ci
< CI phone information - attributes (just "filler" for now)
Models a single unique <senone-sequence, tmat> pair.
struct plink_s * succlist
List of predecessor PHMM nodes.
struct phmm_s * next
Next unique PHMM for same parent basephone.
hmm_t hmm
Base HMM structure.
bitvec_t * rc
Set (bit-vector) of right context phones seen for this PHMM.
bitvec_t * lc
Set (bit-vector) of left context phones seen for this PHMM.
s3pid_t pid
Phone id (temp.
s3cipid_t ci
Parent basephone for this PHMM.
Segment iterator over list of phseg.
Phone level segmentation information.
Word graph structure used in bestpath/nbest search.
Base structure for search module.
acmod_t * acmod
Acoustic model.
char * hyp_str
Current hypothesis string.
cmd_ln_t * config
Configuration.
V-table for search algorithm.
Base structure for hypothesis segmentation iterator.
ps_search_t * search
Search object from whence this came.
ps_segfuncs_t * vt
V-table of seg methods.
int32 lscr
Language model score.
int32 ascr
Acoustic score.
frame_idx_t sf
Start frame.
char const * word
Word string (pointer into dictionary hash)
frame_idx_t ef
End frame.
uint8 *** tp
The transition matrices; kept in the same scale as acoustic scores; tp[tmatid][from-state][to-state].
Definition: tmat.h:56