PocketSphinx 5prealpha
s2_semi_mgau.c
1/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/* ====================================================================
3 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
21 *
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 * ====================================================================
35 *
36 */
37
38/* System headers */
39#include <stdio.h>
40#include <stdlib.h>
41#include <string.h>
42#include <assert.h>
43#include <limits.h>
44#include <math.h>
45#if defined(__ADSPBLACKFIN__)
46#elif !defined(_WIN32_WCE)
47#include <sys/types.h>
48#endif
49
50/* SphinxBase headers */
51#include <sphinx_config.h>
52#include <sphinxbase/cmd_ln.h>
53#include <sphinxbase/fixpoint.h>
54#include <sphinxbase/ckd_alloc.h>
55#include <sphinxbase/bio.h>
56#include <sphinxbase/err.h>
57#include <sphinxbase/prim_type.h>
58
59/* Local headers */
60#include "s2_semi_mgau.h"
61#include "tied_mgau_common.h"
62
63static ps_mgaufuncs_t s2_semi_mgau_funcs = {
64 "s2_semi",
65 s2_semi_mgau_frame_eval, /* frame_eval */
66 s2_semi_mgau_mllr_transform, /* transform */
67 s2_semi_mgau_free /* free */
68};
69
71 int32 score; /* score or distance */
72 int32 codeword; /* codeword (vector index) */
73};
74
75static void
76eval_topn(s2_semi_mgau_t *s, int32 feat, mfcc_t *z)
77{
78 int i, ceplen;
79 vqFeature_t *topn;
80
81 topn = s->f[feat];
82 ceplen = s->g->featlen[feat];
83
84 for (i = 0; i < s->max_topn; i++) {
85 mfcc_t *mean, diff, sqdiff, compl; /* diff, diff^2, component likelihood */
86 vqFeature_t vtmp;
87 mfcc_t *var, d;
88 mfcc_t *obs;
89 int32 cw, j;
90
91 cw = topn[i].codeword;
92 mean = s->g->mean[0][feat][0] + cw * ceplen;
93 var = s->g->var[0][feat][0] + cw * ceplen;
94 d = s->g->det[0][feat][cw];
95 obs = z;
96 for (j = 0; j < ceplen; j++) {
97 diff = *obs++ - *mean++;
98 sqdiff = MFCCMUL(diff, diff);
99 compl = MFCCMUL(sqdiff, *var);
100 d = GMMSUB(d, compl);
101 ++var;
102 }
103 topn[i].score = (int32)d;
104 if (i == 0)
105 continue;
106 vtmp = topn[i];
107 for (j = i - 1; j >= 0 && (int32)d > topn[j].score; j--) {
108 topn[j + 1] = topn[j];
109 }
110 topn[j + 1] = vtmp;
111 }
112}
113
114static void
115eval_cb(s2_semi_mgau_t *s, int32 feat, mfcc_t *z)
116{
117 vqFeature_t *worst, *best, *topn;
118 mfcc_t *mean;
119 mfcc_t *var, *det, *detP, *detE;
120 int32 i, ceplen;
121
122 best = topn = s->f[feat];
123 worst = topn + (s->max_topn - 1);
124 mean = s->g->mean[0][feat][0];
125 var = s->g->var[0][feat][0];
126 det = s->g->det[0][feat];
127 detE = det + s->g->n_density;
128 ceplen = s->g->featlen[feat];
129
130 for (detP = det; detP < detE; ++detP) {
131 mfcc_t diff, sqdiff, compl; /* diff, diff^2, component likelihood */
132 mfcc_t d;
133 mfcc_t *obs;
134 vqFeature_t *cur;
135 int32 cw, j;
136
137 d = *detP;
138 obs = z;
139 cw = (int)(detP - det);
140 for (j = 0; (j < ceplen) && (d >= worst->score); ++j) {
141 diff = *obs++ - *mean++;
142 sqdiff = MFCCMUL(diff, diff);
143 compl = MFCCMUL(sqdiff, *var);
144 d = GMMSUB(d, compl);
145 ++var;
146 }
147 if (j < ceplen) {
148 /* terminated early, so not in topn */
149 mean += (ceplen - j);
150 var += (ceplen - j);
151 continue;
152 }
153 if ((int32)d < worst->score)
154 continue;
155 for (i = 0; i < s->max_topn; i++) {
156 /* already there, so don't need to insert */
157 if (topn[i].codeword == cw)
158 break;
159 }
160 if (i < s->max_topn)
161 continue; /* already there. Don't insert */
162 /* remaining code inserts codeword and dist in correct spot */
163 for (cur = worst - 1; cur >= best && (int32)d >= cur->score; --cur)
164 memcpy(cur + 1, cur, sizeof(vqFeature_t));
165 ++cur;
166 cur->codeword = cw;
167 cur->score = (int32)d;
168 }
169}
170
171static void
172mgau_dist(s2_semi_mgau_t * s, int32 frame, int32 feat, mfcc_t * z)
173{
174 eval_topn(s, feat, z);
175
176 /* If this frame is skipped, do nothing else. */
177 if (frame % s->ds_ratio)
178 return;
179
180 /* Evaluate the rest of the codebook (or subset thereof). */
181 eval_cb(s, feat, z);
182}
183
184static int
185mgau_norm(s2_semi_mgau_t *s, int feat)
186{
187 int32 norm;
188 int j;
189
190 /* Compute quantized normalizing constant. */
191 norm = s->f[feat][0].score >> SENSCR_SHIFT;
192
193 /* Normalize the scores, negate them, and clamp their dynamic range. */
194 for (j = 0; j < s->max_topn; ++j) {
195 s->f[feat][j].score = -((s->f[feat][j].score >> SENSCR_SHIFT) - norm);
196 if (s->f[feat][j].score > MAX_NEG_ASCR)
197 s->f[feat][j].score = MAX_NEG_ASCR;
198 if (s->topn_beam[feat] && s->f[feat][j].score > s->topn_beam[feat])
199 break;
200 }
201 return j;
202}
203
204static int32
205get_scores_8b_feat_6(s2_semi_mgau_t * s, int i,
206 int16 *senone_scores, uint8 *senone_active,
207 int32 n_senone_active)
208{
209 int32 j, l;
210 uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3, *pid_cw4, *pid_cw5;
211
212 pid_cw0 = s->mixw[i][s->f[i][0].codeword];
213 pid_cw1 = s->mixw[i][s->f[i][1].codeword];
214 pid_cw2 = s->mixw[i][s->f[i][2].codeword];
215 pid_cw3 = s->mixw[i][s->f[i][3].codeword];
216 pid_cw4 = s->mixw[i][s->f[i][4].codeword];
217 pid_cw5 = s->mixw[i][s->f[i][5].codeword];
218
219 for (l = j = 0; j < n_senone_active; j++) {
220 int sen = senone_active[j] + l;
221 int32 tmp = pid_cw0[sen] + s->f[i][0].score;
222
223 tmp = fast_logmath_add(s->lmath_8b, tmp,
224 pid_cw1[sen] + s->f[i][1].score);
225 tmp = fast_logmath_add(s->lmath_8b, tmp,
226 pid_cw2[sen] + s->f[i][2].score);
227 tmp = fast_logmath_add(s->lmath_8b, tmp,
228 pid_cw3[sen] + s->f[i][3].score);
229 tmp = fast_logmath_add(s->lmath_8b, tmp,
230 pid_cw4[sen] + s->f[i][4].score);
231 tmp = fast_logmath_add(s->lmath_8b, tmp,
232 pid_cw5[sen] + s->f[i][5].score);
233
234 senone_scores[sen] += tmp;
235 l = sen;
236 }
237 return 0;
238}
239
240static int32
241get_scores_8b_feat_5(s2_semi_mgau_t * s, int i,
242 int16 *senone_scores, uint8 *senone_active,
243 int32 n_senone_active)
244{
245 int32 j, l;
246 uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3, *pid_cw4;
247
248 pid_cw0 = s->mixw[i][s->f[i][0].codeword];
249 pid_cw1 = s->mixw[i][s->f[i][1].codeword];
250 pid_cw2 = s->mixw[i][s->f[i][2].codeword];
251 pid_cw3 = s->mixw[i][s->f[i][3].codeword];
252 pid_cw4 = s->mixw[i][s->f[i][4].codeword];
253
254 for (l = j = 0; j < n_senone_active; j++) {
255 int sen = senone_active[j] + l;
256 int32 tmp = pid_cw0[sen] + s->f[i][0].score;
257
258 tmp = fast_logmath_add(s->lmath_8b, tmp,
259 pid_cw1[sen] + s->f[i][1].score);
260 tmp = fast_logmath_add(s->lmath_8b, tmp,
261 pid_cw2[sen] + s->f[i][2].score);
262 tmp = fast_logmath_add(s->lmath_8b, tmp,
263 pid_cw3[sen] + s->f[i][3].score);
264 tmp = fast_logmath_add(s->lmath_8b, tmp,
265 pid_cw4[sen] + s->f[i][4].score);
266
267 senone_scores[sen] += tmp;
268 l = sen;
269 }
270 return 0;
271}
272
273static int32
274get_scores_8b_feat_4(s2_semi_mgau_t * s, int i,
275 int16 *senone_scores, uint8 *senone_active,
276 int32 n_senone_active)
277{
278 int32 j, l;
279 uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3;
280
281 pid_cw0 = s->mixw[i][s->f[i][0].codeword];
282 pid_cw1 = s->mixw[i][s->f[i][1].codeword];
283 pid_cw2 = s->mixw[i][s->f[i][2].codeword];
284 pid_cw3 = s->mixw[i][s->f[i][3].codeword];
285
286 for (l = j = 0; j < n_senone_active; j++) {
287 int sen = senone_active[j] + l;
288 int32 tmp = pid_cw0[sen] + s->f[i][0].score;
289
290 tmp = fast_logmath_add(s->lmath_8b, tmp,
291 pid_cw1[sen] + s->f[i][1].score);
292 tmp = fast_logmath_add(s->lmath_8b, tmp,
293 pid_cw2[sen] + s->f[i][2].score);
294 tmp = fast_logmath_add(s->lmath_8b, tmp,
295 pid_cw3[sen] + s->f[i][3].score);
296
297 senone_scores[sen] += tmp;
298 l = sen;
299 }
300 return 0;
301}
302
303static int32
304get_scores_8b_feat_3(s2_semi_mgau_t * s, int i,
305 int16 *senone_scores, uint8 *senone_active,
306 int32 n_senone_active)
307{
308 int32 j, l;
309 uint8 *pid_cw0, *pid_cw1, *pid_cw2;
310
311 pid_cw0 = s->mixw[i][s->f[i][0].codeword];
312 pid_cw1 = s->mixw[i][s->f[i][1].codeword];
313 pid_cw2 = s->mixw[i][s->f[i][2].codeword];
314
315 for (l = j = 0; j < n_senone_active; j++) {
316 int sen = senone_active[j] + l;
317 int32 tmp = pid_cw0[sen] + s->f[i][0].score;
318
319 tmp = fast_logmath_add(s->lmath_8b, tmp,
320 pid_cw1[sen] + s->f[i][1].score);
321 tmp = fast_logmath_add(s->lmath_8b, tmp,
322 pid_cw2[sen] + s->f[i][2].score);
323
324 senone_scores[sen] += tmp;
325 l = sen;
326 }
327 return 0;
328}
329
330static int32
331get_scores_8b_feat_2(s2_semi_mgau_t * s, int i,
332 int16 *senone_scores, uint8 *senone_active,
333 int32 n_senone_active)
334{
335 int32 j, l;
336 uint8 *pid_cw0, *pid_cw1;
337
338 pid_cw0 = s->mixw[i][s->f[i][0].codeword];
339 pid_cw1 = s->mixw[i][s->f[i][1].codeword];
340
341 for (l = j = 0; j < n_senone_active; j++) {
342 int sen = senone_active[j] + l;
343 int32 tmp = pid_cw0[sen] + s->f[i][0].score;
344
345 tmp = fast_logmath_add(s->lmath_8b, tmp,
346 pid_cw1[sen] + s->f[i][1].score);
347
348 senone_scores[sen] += tmp;
349 l = sen;
350 }
351 return 0;
352}
353
354static int32
355get_scores_8b_feat_1(s2_semi_mgau_t * s, int i,
356 int16 *senone_scores, uint8 *senone_active,
357 int32 n_senone_active)
358{
359 int32 j, l;
360 uint8 *pid_cw0;
361
362 pid_cw0 = s->mixw[i][s->f[i][0].codeword];
363 for (l = j = 0; j < n_senone_active; j++) {
364 int sen = senone_active[j] + l;
365 int32 tmp = pid_cw0[sen] + s->f[i][0].score;
366 senone_scores[sen] += tmp;
367 l = sen;
368 }
369 return 0;
370}
371
372static int32
373get_scores_8b_feat_any(s2_semi_mgau_t * s, int i, int topn,
374 int16 *senone_scores, uint8 *senone_active,
375 int32 n_senone_active)
376{
377 int32 j, k, l;
378
379 for (l = j = 0; j < n_senone_active; j++) {
380 int sen = senone_active[j] + l;
381 uint8 *pid_cw;
382 int32 tmp;
383 pid_cw = s->mixw[i][s->f[i][0].codeword];
384 tmp = pid_cw[sen] + s->f[i][0].score;
385 for (k = 1; k < topn; ++k) {
386 pid_cw = s->mixw[i][s->f[i][k].codeword];
387 tmp = fast_logmath_add(s->lmath_8b, tmp,
388 pid_cw[sen] + s->f[i][k].score);
389 }
390 senone_scores[sen] += tmp;
391 l = sen;
392 }
393 return 0;
394}
395
396static int32
397get_scores_8b_feat(s2_semi_mgau_t * s, int i, int topn,
398 int16 *senone_scores, uint8 *senone_active, int32 n_senone_active)
399{
400 switch (topn) {
401 case 6:
402 return get_scores_8b_feat_6(s, i, senone_scores,
403 senone_active, n_senone_active);
404 case 5:
405 return get_scores_8b_feat_5(s, i, senone_scores,
406 senone_active, n_senone_active);
407 case 4:
408 return get_scores_8b_feat_4(s, i, senone_scores,
409 senone_active, n_senone_active);
410 case 3:
411 return get_scores_8b_feat_3(s, i, senone_scores,
412 senone_active, n_senone_active);
413 case 2:
414 return get_scores_8b_feat_2(s, i, senone_scores,
415 senone_active, n_senone_active);
416 case 1:
417 return get_scores_8b_feat_1(s, i, senone_scores,
418 senone_active, n_senone_active);
419 default:
420 return get_scores_8b_feat_any(s, i, topn, senone_scores,
421 senone_active, n_senone_active);
422 }
423}
424
425static int32
426get_scores_8b_feat_all(s2_semi_mgau_t * s, int i, int topn, int16 *senone_scores)
427{
428 int32 j, k;
429
430 for (j = 0; j < s->n_sen; j++) {
431 uint8 *pid_cw;
432 int32 tmp;
433 pid_cw = s->mixw[i][s->f[i][0].codeword];
434 tmp = pid_cw[j] + s->f[i][0].score;
435 for (k = 1; k < topn; ++k) {
436 pid_cw = s->mixw[i][s->f[i][k].codeword];
437 tmp = fast_logmath_add(s->lmath_8b, tmp,
438 pid_cw[j] + s->f[i][k].score);
439 }
440 senone_scores[j] += tmp;
441 }
442 return 0;
443}
444
445static int32
446get_scores_4b_feat_6(s2_semi_mgau_t * s, int i,
447 int16 *senone_scores, uint8 *senone_active,
448 int32 n_senone_active)
449{
450 int32 j, l;
451 uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3, *pid_cw4, *pid_cw5;
452 uint8 w_den[6][16];
453
454 /* Precompute scaled densities. */
455 for (j = 0; j < 16; ++j) {
456 w_den[0][j] = s->mixw_cb[j] + s->f[i][0].score;
457 w_den[1][j] = s->mixw_cb[j] + s->f[i][1].score;
458 w_den[2][j] = s->mixw_cb[j] + s->f[i][2].score;
459 w_den[3][j] = s->mixw_cb[j] + s->f[i][3].score;
460 w_den[4][j] = s->mixw_cb[j] + s->f[i][4].score;
461 w_den[5][j] = s->mixw_cb[j] + s->f[i][5].score;
462 }
463
464 pid_cw0 = s->mixw[i][s->f[i][0].codeword];
465 pid_cw1 = s->mixw[i][s->f[i][1].codeword];
466 pid_cw2 = s->mixw[i][s->f[i][2].codeword];
467 pid_cw3 = s->mixw[i][s->f[i][3].codeword];
468 pid_cw4 = s->mixw[i][s->f[i][4].codeword];
469 pid_cw5 = s->mixw[i][s->f[i][5].codeword];
470
471 for (l = j = 0; j < n_senone_active; j++) {
472 int n = senone_active[j] + l;
473 int tmp, cw;
474
475 if (n & 1) {
476 cw = pid_cw0[n/2] >> 4;
477 tmp = w_den[0][cw];
478 cw = pid_cw1[n/2] >> 4;
479 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
480 cw = pid_cw2[n/2] >> 4;
481 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
482 cw = pid_cw3[n/2] >> 4;
483 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]);
484 cw = pid_cw4[n/2] >> 4;
485 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[4][cw]);
486 cw = pid_cw5[n/2] >> 4;
487 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[5][cw]);
488 }
489 else {
490 cw = pid_cw0[n/2] & 0x0f;
491 tmp = w_den[0][cw];
492 cw = pid_cw1[n/2] & 0x0f;
493 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
494 cw = pid_cw2[n/2] & 0x0f;
495 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
496 cw = pid_cw3[n/2] & 0x0f;
497 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]);
498 cw = pid_cw4[n/2] & 0x0f;
499 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[4][cw]);
500 cw = pid_cw5[n/2] & 0x0f;
501 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[5][cw]);
502 }
503 senone_scores[n] += tmp;
504 l = n;
505 }
506 return 0;
507}
508
509static int32
510get_scores_4b_feat_5(s2_semi_mgau_t * s, int i,
511 int16 *senone_scores, uint8 *senone_active,
512 int32 n_senone_active)
513{
514 int32 j, l;
515 uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3, *pid_cw4;
516 uint8 w_den[5][16];
517
518 /* Precompute scaled densities. */
519 for (j = 0; j < 16; ++j) {
520 w_den[0][j] = s->mixw_cb[j] + s->f[i][0].score;
521 w_den[1][j] = s->mixw_cb[j] + s->f[i][1].score;
522 w_den[2][j] = s->mixw_cb[j] + s->f[i][2].score;
523 w_den[3][j] = s->mixw_cb[j] + s->f[i][3].score;
524 w_den[4][j] = s->mixw_cb[j] + s->f[i][4].score;
525 }
526
527 pid_cw0 = s->mixw[i][s->f[i][0].codeword];
528 pid_cw1 = s->mixw[i][s->f[i][1].codeword];
529 pid_cw2 = s->mixw[i][s->f[i][2].codeword];
530 pid_cw3 = s->mixw[i][s->f[i][3].codeword];
531 pid_cw4 = s->mixw[i][s->f[i][4].codeword];
532
533 for (l = j = 0; j < n_senone_active; j++) {
534 int n = senone_active[j] + l;
535 int tmp, cw;
536
537 if (n & 1) {
538 cw = pid_cw0[n/2] >> 4;
539 tmp = w_den[0][cw];
540 cw = pid_cw1[n/2] >> 4;
541 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
542 cw = pid_cw2[n/2] >> 4;
543 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
544 cw = pid_cw3[n/2] >> 4;
545 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]);
546 cw = pid_cw4[n/2] >> 4;
547 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[4][cw]);
548 }
549 else {
550 cw = pid_cw0[n/2] & 0x0f;
551 tmp = w_den[0][cw];
552 cw = pid_cw1[n/2] & 0x0f;
553 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
554 cw = pid_cw2[n/2] & 0x0f;
555 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
556 cw = pid_cw3[n/2] & 0x0f;
557 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]);
558 cw = pid_cw4[n/2] & 0x0f;
559 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[4][cw]);
560 }
561 senone_scores[n] += tmp;
562 l = n;
563 }
564 return 0;
565}
566
567static int32
568get_scores_4b_feat_4(s2_semi_mgau_t * s, int i,
569 int16 *senone_scores, uint8 *senone_active,
570 int32 n_senone_active)
571{
572 int32 j, l;
573 uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3;
574 uint8 w_den[4][16];
575
576 /* Precompute scaled densities. */
577 for (j = 0; j < 16; ++j) {
578 w_den[0][j] = s->mixw_cb[j] + s->f[i][0].score;
579 w_den[1][j] = s->mixw_cb[j] + s->f[i][1].score;
580 w_den[2][j] = s->mixw_cb[j] + s->f[i][2].score;
581 w_den[3][j] = s->mixw_cb[j] + s->f[i][3].score;
582 }
583
584 pid_cw0 = s->mixw[i][s->f[i][0].codeword];
585 pid_cw1 = s->mixw[i][s->f[i][1].codeword];
586 pid_cw2 = s->mixw[i][s->f[i][2].codeword];
587 pid_cw3 = s->mixw[i][s->f[i][3].codeword];
588
589 for (l = j = 0; j < n_senone_active; j++) {
590 int n = senone_active[j] + l;
591 int tmp, cw;
592
593 if (n & 1) {
594 cw = pid_cw0[n/2] >> 4;
595 tmp = w_den[0][cw];
596 cw = pid_cw1[n/2] >> 4;
597 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
598 cw = pid_cw2[n/2] >> 4;
599 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
600 cw = pid_cw3[n/2] >> 4;
601 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]);
602 }
603 else {
604 cw = pid_cw0[n/2] & 0x0f;
605 tmp = w_den[0][cw];
606 cw = pid_cw1[n/2] & 0x0f;
607 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
608 cw = pid_cw2[n/2] & 0x0f;
609 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
610 cw = pid_cw3[n/2] & 0x0f;
611 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]);
612 }
613 senone_scores[n] += tmp;
614 l = n;
615 }
616 return 0;
617}
618
619static int32
620get_scores_4b_feat_3(s2_semi_mgau_t * s, int i,
621 int16 *senone_scores, uint8 *senone_active,
622 int32 n_senone_active)
623{
624 int32 j, l;
625 uint8 *pid_cw0, *pid_cw1, *pid_cw2;
626 uint8 w_den[3][16];
627
628 /* Precompute scaled densities. */
629 for (j = 0; j < 16; ++j) {
630 w_den[0][j] = s->mixw_cb[j] + s->f[i][0].score;
631 w_den[1][j] = s->mixw_cb[j] + s->f[i][1].score;
632 w_den[2][j] = s->mixw_cb[j] + s->f[i][2].score;
633 }
634
635 pid_cw0 = s->mixw[i][s->f[i][0].codeword];
636 pid_cw1 = s->mixw[i][s->f[i][1].codeword];
637 pid_cw2 = s->mixw[i][s->f[i][2].codeword];
638
639 for (l = j = 0; j < n_senone_active; j++) {
640 int n = senone_active[j] + l;
641 int tmp, cw;
642
643 if (n & 1) {
644 cw = pid_cw0[n/2] >> 4;
645 tmp = w_den[0][cw];
646 cw = pid_cw1[n/2] >> 4;
647 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
648 cw = pid_cw2[n/2] >> 4;
649 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
650 }
651 else {
652 cw = pid_cw0[n/2] & 0x0f;
653 tmp = w_den[0][cw];
654 cw = pid_cw1[n/2] & 0x0f;
655 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
656 cw = pid_cw2[n/2] & 0x0f;
657 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
658 }
659 senone_scores[n] += tmp;
660 l = n;
661 }
662 return 0;
663}
664
665static int32
666get_scores_4b_feat_2(s2_semi_mgau_t * s, int i,
667 int16 *senone_scores, uint8 *senone_active,
668 int32 n_senone_active)
669{
670 int32 j, l;
671 uint8 *pid_cw0, *pid_cw1;
672 uint8 w_den[2][16];
673
674 /* Precompute scaled densities. */
675 for (j = 0; j < 16; ++j) {
676 w_den[0][j] = s->mixw_cb[j] + s->f[i][0].score;
677 w_den[1][j] = s->mixw_cb[j] + s->f[i][1].score;
678 }
679
680 pid_cw0 = s->mixw[i][s->f[i][0].codeword];
681 pid_cw1 = s->mixw[i][s->f[i][1].codeword];
682
683 for (l = j = 0; j < n_senone_active; j++) {
684 int n = senone_active[j] + l;
685 int tmp, cw;
686
687 if (n & 1) {
688 cw = pid_cw0[n/2] >> 4;
689 tmp = w_den[0][cw];
690 cw = pid_cw1[n/2] >> 4;
691 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
692 }
693 else {
694 cw = pid_cw0[n/2] & 0x0f;
695 tmp = w_den[0][cw];
696 cw = pid_cw1[n/2] & 0x0f;
697 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
698 }
699 senone_scores[n] += tmp;
700 l = n;
701 }
702 return 0;
703}
704
705static int32
706get_scores_4b_feat_1(s2_semi_mgau_t * s, int i,
707 int16 *senone_scores, uint8 *senone_active,
708 int32 n_senone_active)
709{
710 int32 j, l;
711 uint8 *pid_cw0;
712 uint8 w_den[16];
713
714 /* Precompute scaled densities. */
715 for (j = 0; j < 16; ++j) {
716 w_den[j] = s->mixw_cb[j] + s->f[i][0].score;
717 }
718
719 pid_cw0 = s->mixw[i][s->f[i][0].codeword];
720
721 for (l = j = 0; j < n_senone_active; j++) {
722 int n = senone_active[j] + l;
723 int tmp, cw;
724
725 if (n & 1) {
726 cw = pid_cw0[n/2] >> 4;
727 tmp = w_den[cw];
728 }
729 else {
730 cw = pid_cw0[n/2] & 0x0f;
731 tmp = w_den[cw];
732 }
733 senone_scores[n] += tmp;
734 l = n;
735 }
736 return 0;
737}
738
739static int32
740get_scores_4b_feat_any(s2_semi_mgau_t * s, int i, int topn,
741 int16 *senone_scores, uint8 *senone_active,
742 int32 n_senone_active)
743{
744 int32 j, k, l;
745
746 for (l = j = 0; j < n_senone_active; j++) {
747 int n = senone_active[j] + l;
748 int tmp, cw;
749 uint8 *pid_cw;
750
751 pid_cw = s->mixw[i][s->f[i][0].codeword];
752 if (n & 1)
753 cw = pid_cw[n/2] >> 4;
754 else
755 cw = pid_cw[n/2] & 0x0f;
756 tmp = s->mixw_cb[cw] + s->f[i][0].score;
757 for (k = 1; k < topn; ++k) {
758 pid_cw = s->mixw[i][s->f[i][k].codeword];
759 if (n & 1)
760 cw = pid_cw[n/2] >> 4;
761 else
762 cw = pid_cw[n/2] & 0x0f;
763 tmp = fast_logmath_add(s->lmath_8b, tmp,
764 s->mixw_cb[cw] + s->f[i][k].score);
765 }
766 senone_scores[n] += tmp;
767 l = n;
768 }
769 return 0;
770}
771
772static int32
773get_scores_4b_feat(s2_semi_mgau_t * s, int i, int topn,
774 int16 *senone_scores, uint8 *senone_active, int32 n_senone_active)
775{
776 switch (topn) {
777 case 6:
778 return get_scores_4b_feat_6(s, i, senone_scores,
779 senone_active, n_senone_active);
780 case 5:
781 return get_scores_4b_feat_5(s, i, senone_scores,
782 senone_active, n_senone_active);
783 case 4:
784 return get_scores_4b_feat_4(s, i, senone_scores,
785 senone_active, n_senone_active);
786 case 3:
787 return get_scores_4b_feat_3(s, i, senone_scores,
788 senone_active, n_senone_active);
789 case 2:
790 return get_scores_4b_feat_2(s, i, senone_scores,
791 senone_active, n_senone_active);
792 case 1:
793 return get_scores_4b_feat_1(s, i, senone_scores,
794 senone_active, n_senone_active);
795 default:
796 return get_scores_4b_feat_any(s, i, topn, senone_scores,
797 senone_active, n_senone_active);
798 }
799}
800
801static int32
802get_scores_4b_feat_all(s2_semi_mgau_t * s, int i, int topn, int16 *senone_scores)
803{
804 int j, last_sen;
805
806 j = 0;
807 /* Number of senones is always even, but don't overrun if it isn't. */
808 last_sen = s->n_sen & ~1;
809 while (j < last_sen) {
810 uint8 *pid_cw;
811 int32 tmp0, tmp1;
812 int k;
813
814 pid_cw = s->mixw[i][s->f[i][0].codeword];
815 tmp0 = s->mixw_cb[pid_cw[j/2] & 0x0f] + s->f[i][0].score;
816 tmp1 = s->mixw_cb[pid_cw[j/2] >> 4] + s->f[i][0].score;
817 for (k = 1; k < topn; ++k) {
818 int32 w_den0, w_den1;
819
820 pid_cw = s->mixw[i][s->f[i][k].codeword];
821 w_den0 = s->mixw_cb[pid_cw[j/2] & 0x0f] + s->f[i][k].score;
822 w_den1 = s->mixw_cb[pid_cw[j/2] >> 4] + s->f[i][k].score;
823 tmp0 = fast_logmath_add(s->lmath_8b, tmp0, w_den0);
824 tmp1 = fast_logmath_add(s->lmath_8b, tmp1, w_den1);
825 }
826 senone_scores[j++] += tmp0;
827 senone_scores[j++] += tmp1;
828 }
829 return 0;
830}
831
832/*
833 * Compute senone scores for the active senones.
834 */
835int32
836s2_semi_mgau_frame_eval(ps_mgau_t *ps,
837 int16 *senone_scores,
838 uint8 *senone_active,
839 int32 n_senone_active,
840 mfcc_t ** featbuf, int32 frame,
841 int32 compallsen)
842{
844 int i, topn_idx;
845 int n_feat = s->g->n_feat;
846
847 memset(senone_scores, 0, s->n_sen * sizeof(*senone_scores));
848 /* No bounds checking is done here, which just means you'll get
849 * semi-random crap if you request a frame in the future or one
850 * that's too far in the past. */
851 topn_idx = frame % s->n_topn_hist;
852 s->f = s->topn_hist[topn_idx];
853 for (i = 0; i < n_feat; ++i) {
854 /* For past frames this will already be computed. */
855 if (frame >= ps_mgau_base(ps)->frame_idx) {
856 vqFeature_t **lastf;
857 if (topn_idx == 0)
858 lastf = s->topn_hist[s->n_topn_hist-1];
859 else
860 lastf = s->topn_hist[topn_idx-1];
861 memcpy(s->f[i], lastf[i], sizeof(vqFeature_t) * s->max_topn);
862 mgau_dist(s, frame, i, featbuf[i]);
863 s->topn_hist_n[topn_idx][i] = mgau_norm(s, i);
864 }
865 if (s->mixw_cb) {
866 if (compallsen)
867 get_scores_4b_feat_all(s, i, s->topn_hist_n[topn_idx][i], senone_scores);
868 else
869 get_scores_4b_feat(s, i, s->topn_hist_n[topn_idx][i], senone_scores,
870 senone_active, n_senone_active);
871 }
872 else {
873 if (compallsen)
874 get_scores_8b_feat_all(s, i, s->topn_hist_n[topn_idx][i], senone_scores);
875 else
876 get_scores_8b_feat(s, i, s->topn_hist_n[topn_idx][i], senone_scores,
877 senone_active, n_senone_active);
878 }
879 }
880
881 return 0;
882}
883
884static int32
885read_sendump(s2_semi_mgau_t *s, bin_mdef_t *mdef, char const *file)
886{
887 FILE *fp;
888 char line[1000];
889 int32 i, n, r, c;
890 int32 do_swap, do_mmap;
891 size_t offset;
892 int n_clust = 0;
893 int n_feat = s->g->n_feat;
894 int n_density = s->g->n_density;
895 int n_sen = bin_mdef_n_sen(mdef);
896 int n_bits = 8;
897
898 s->n_sen = n_sen; /* FIXME: Should have been done earlier */
899 do_mmap = cmd_ln_boolean_r(s->config, "-mmap");
900
901 if ((fp = fopen(file, "rb")) == NULL)
902 return -1;
903
904 E_INFO("Loading senones from dump file %s\n", file);
905 /* Read title size, title */
906 if (fread(&n, sizeof(int32), 1, fp) != 1) {
907 E_ERROR_SYSTEM("Failed to read title size from %s", file);
908 goto error_out;
909 }
910 /* This is extremely bogus */
911 do_swap = 0;
912 if (n < 1 || n > 999) {
913 SWAP_INT32(&n);
914 if (n < 1 || n > 999) {
915 E_ERROR("Title length %x in dump file %s out of range\n", n, file);
916 goto error_out;
917 }
918 do_swap = 1;
919 }
920 if (fread(line, sizeof(char), n, fp) != n) {
921 E_ERROR_SYSTEM("Cannot read title");
922 goto error_out;
923 }
924 if (line[n - 1] != '\0') {
925 E_ERROR("Bad title in dump file\n");
926 goto error_out;
927 }
928 E_INFO("%s\n", line);
929
930 /* Read header size, header */
931 if (fread(&n, sizeof(n), 1, fp) != 1) {
932 E_ERROR_SYSTEM("Failed to read header size from %s", file);
933 goto error_out;
934 }
935 if (do_swap) SWAP_INT32(&n);
936 if (fread(line, sizeof(char), n, fp) != n) {
937 E_ERROR_SYSTEM("Cannot read header");
938 goto error_out;
939 }
940 if (line[n - 1] != '\0') {
941 E_ERROR("Bad header in dump file\n");
942 goto error_out;
943 }
944
945 /* Read other header strings until string length = 0 */
946 for (;;) {
947 if (fread(&n, sizeof(n), 1, fp) != 1) {
948 E_ERROR_SYSTEM("Failed to read header string size from %s", file);
949 goto error_out;
950 }
951 if (do_swap) SWAP_INT32(&n);
952 if (n == 0)
953 break;
954 if (fread(line, sizeof(char), n, fp) != n) {
955 E_ERROR_SYSTEM("Cannot read header");
956 goto error_out;
957 }
958 /* Look for a cluster count, if present */
959 if (!strncmp(line, "feature_count ", strlen("feature_count "))) {
960 n_feat = atoi(line + strlen("feature_count "));
961 }
962 if (!strncmp(line, "mixture_count ", strlen("mixture_count "))) {
963 n_density = atoi(line + strlen("mixture_count "));
964 }
965 if (!strncmp(line, "model_count ", strlen("model_count "))) {
966 n_sen = atoi(line + strlen("model_count "));
967 }
968 if (!strncmp(line, "cluster_count ", strlen("cluster_count "))) {
969 n_clust = atoi(line + strlen("cluster_count "));
970 }
971 if (!strncmp(line, "cluster_bits ", strlen("cluster_bits "))) {
972 n_bits = atoi(line + strlen("cluster_bits "));
973 }
974 }
975
976 /* Defaults for #rows, #columns in mixw array. */
977 c = n_sen;
978 r = n_density;
979 if (n_clust == 0) {
980 /* Older mixw files have them here, and they might be padded. */
981 if (fread(&r, sizeof(r), 1, fp) != 1) {
982 E_ERROR_SYSTEM("Cannot read #rows");
983 goto error_out;
984 }
985 if (do_swap) SWAP_INT32(&r);
986 if (fread(&c, sizeof(c), 1, fp) != 1) {
987 E_ERROR_SYSTEM("Cannot read #columns");
988 goto error_out;
989 }
990 if (do_swap) SWAP_INT32(&c);
991 E_INFO("Rows: %d, Columns: %d\n", r, c);
992 }
993
994 if (n_feat != s->g->n_feat) {
995 E_ERROR("Number of feature streams mismatch: %d != %d\n",
996 n_feat, s->g->n_feat);
997 goto error_out;
998 }
999 if (n_density != s->g->n_density) {
1000 E_ERROR("Number of densities mismatch: %d != %d\n",
1001 n_density, s->g->n_density);
1002 goto error_out;
1003 }
1004 if (n_sen != s->n_sen) {
1005 E_ERROR("Number of senones mismatch: %d != %d\n",
1006 n_sen, s->n_sen);
1007 goto error_out;
1008 }
1009
1010 if (!((n_clust == 0) || (n_clust == 15) || (n_clust == 16))) {
1011 E_ERROR("Cluster count must be 0, 15, or 16\n");
1012 goto error_out;
1013 }
1014 if (n_clust == 15)
1015 ++n_clust;
1016
1017 if (!((n_bits == 8) || (n_bits == 4))) {
1018 E_ERROR("Cluster count must be 4 or 8\n");
1019 goto error_out;
1020 }
1021
1022 if (do_mmap) {
1023 E_INFO("Using memory-mapped I/O for senones\n");
1024 }
1025 offset = ftell(fp);
1026
1027 /* Allocate memory for pdfs (or memory map them) */
1028 if (do_mmap) {
1029 s->sendump_mmap = mmio_file_read(file);
1030 /* Get cluster codebook if any. */
1031 if (n_clust) {
1032 s->mixw_cb = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset;
1033 offset += n_clust;
1034 }
1035 }
1036 else {
1037 /* Get cluster codebook if any. */
1038 if (n_clust) {
1039 s->mixw_cb = ckd_calloc(1, n_clust);
1040 if (fread(s->mixw_cb, 1, n_clust, fp) != (size_t) n_clust) {
1041 E_ERROR("Failed to read %d bytes from sendump\n", n_clust);
1042 goto error_out;
1043 }
1044 }
1045 }
1046
1047 /* Set up pointers, or read, or whatever */
1048 if (s->sendump_mmap) {
1049 s->mixw = ckd_calloc_2d(n_feat, n_density, sizeof(*s->mixw));
1050 for (n = 0; n < n_feat; n++) {
1051 int step = c;
1052 if (n_bits == 4)
1053 step = (step + 1) / 2;
1054 for (i = 0; i < r; i++) {
1055 s->mixw[n][i] = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset;
1056 offset += step;
1057 }
1058 }
1059 }
1060 else {
1061 s->mixw = ckd_calloc_3d(n_feat, n_density, n_sen, sizeof(***s->mixw));
1062 /* Read pdf values and ids */
1063 for (n = 0; n < n_feat; n++) {
1064 int step = c;
1065 if (n_bits == 4)
1066 step = (step + 1) / 2;
1067 for (i = 0; i < r; i++) {
1068 if (fread(s->mixw[n][i], sizeof(***s->mixw), step, fp)
1069 != (size_t) step) {
1070 E_ERROR("Failed to read %d bytes from sendump\n", step);
1071 goto error_out;
1072 }
1073 }
1074 }
1075 }
1076
1077 fclose(fp);
1078 return 0;
1079error_out:
1080 fclose(fp);
1081 return -1;
1082}
1083
1084static int32
1085read_mixw(s2_semi_mgau_t * s, char const *file_name, double SmoothMin)
1086{
1087 char **argname, **argval;
1088 char eofchk;
1089 FILE *fp;
1090 int32 byteswap, chksum_present;
1091 uint32 chksum;
1092 float32 *pdf;
1093 int32 i, f, c, n;
1094 int32 n_sen;
1095 int32 n_feat;
1096 int32 n_comp;
1097 int32 n_err;
1098
1099 E_INFO("Reading mixture weights file '%s'\n", file_name);
1100
1101 if ((fp = fopen(file_name, "rb")) == NULL)
1102 E_FATAL_SYSTEM("Failed to open mixture weights file '%s' for reading", file_name);
1103
1104 /* Read header, including argument-value info and 32-bit byteorder magic */
1105 if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0)
1106 E_FATAL("Failed to read header from file '%s'\n", file_name);
1107
1108 /* Parse argument-value list */
1109 chksum_present = 0;
1110 for (i = 0; argname[i]; i++) {
1111 if (strcmp(argname[i], "version") == 0) {
1112 if (strcmp(argval[i], MGAU_MIXW_VERSION) != 0)
1113 E_WARN("Version mismatch(%s): %s, expecting %s\n",
1114 file_name, argval[i], MGAU_MIXW_VERSION);
1115 }
1116 else if (strcmp(argname[i], "chksum0") == 0) {
1117 chksum_present = 1; /* Ignore the associated value */
1118 }
1119 }
1120 bio_hdrarg_free(argname, argval);
1121 argname = argval = NULL;
1122
1123 chksum = 0;
1124
1125 /* Read #senones, #features, #codewords, arraysize */
1126 if ((bio_fread(&n_sen, sizeof(int32), 1, fp, byteswap, &chksum) != 1)
1127 || (bio_fread(&n_feat, sizeof(int32), 1, fp, byteswap, &chksum) !=
1128 1)
1129 || (bio_fread(&n_comp, sizeof(int32), 1, fp, byteswap, &chksum) !=
1130 1)
1131 || (bio_fread(&n, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) {
1132 E_FATAL("bio_fread(%s) (arraysize) failed\n", file_name);
1133 }
1134 if (n_feat != s->g->n_feat)
1135 E_FATAL("#Features streams(%d) != %d\n", n_feat, s->g->n_feat);
1136 if (n != n_sen * n_feat * n_comp) {
1137 E_FATAL
1138 ("%s: #float32s(%d) doesn't match header dimensions: %d x %d x %d\n",
1139 file_name, i, n_sen, n_feat, n_comp);
1140 }
1141
1142 /* n_sen = number of mixture weights per codeword, which is
1143 * fixed at the number of senones since we have only one codebook.
1144 */
1145 s->n_sen = n_sen;
1146
1147 /* Quantized mixture weight arrays. */
1148 s->mixw = ckd_calloc_3d(n_feat, s->g->n_density, n_sen, sizeof(***s->mixw));
1149
1150 /* Temporary structure to read in floats before conversion to (int32) logs3 */
1151 pdf = (float32 *) ckd_calloc(n_comp, sizeof(float32));
1152
1153 /* Read senone probs data, normalize, floor, convert to logs3, truncate to 8 bits */
1154 n_err = 0;
1155 for (i = 0; i < n_sen; i++) {
1156 for (f = 0; f < n_feat; f++) {
1157 if (bio_fread((void *) pdf, sizeof(float32),
1158 n_comp, fp, byteswap, &chksum) != n_comp) {
1159 E_FATAL("bio_fread(%s) (arraydata) failed\n", file_name);
1160 }
1161
1162 /* Normalize and floor */
1163 if (vector_sum_norm(pdf, n_comp) <= 0.0)
1164 n_err++;
1165 vector_floor(pdf, n_comp, SmoothMin);
1166 vector_sum_norm(pdf, n_comp);
1167
1168 /* Convert to LOG, quantize, and transpose */
1169 for (c = 0; c < n_comp; c++) {
1170 int32 qscr;
1171
1172 qscr = -logmath_log(s->lmath_8b, pdf[c]);
1173 if ((qscr > MAX_NEG_MIXW) || (qscr < 0))
1174 qscr = MAX_NEG_MIXW;
1175 s->mixw[f][c][i] = qscr;
1176 }
1177 }
1178 }
1179 if (n_err > 0)
1180 E_WARN("Weight normalization failed for %d mixture weights components\n", n_err);
1181
1182 ckd_free(pdf);
1183
1184 if (chksum_present)
1185 bio_verify_chksum(fp, byteswap, chksum);
1186
1187 if (fread(&eofchk, 1, 1, fp) == 1)
1188 E_FATAL("More data than expected in %s\n", file_name);
1189
1190 fclose(fp);
1191
1192 E_INFO("Read %d x %d x %d mixture weights\n", n_sen, n_feat, n_comp);
1193 return n_sen;
1194}
1195
1196
1197static int
1198split_topn(char const *str, uint8 *out, int nfeat)
1199{
1200 char *topn_list = ckd_salloc(str);
1201 char *c, *cc;
1202 int i, maxn;
1203
1204 c = topn_list;
1205 i = 0;
1206 maxn = 0;
1207 while (i < nfeat && (cc = strchr(c, ',')) != NULL) {
1208 *cc = '\0';
1209 out[i] = atoi(c);
1210 if (out[i] > maxn) maxn = out[i];
1211 c = cc + 1;
1212 ++i;
1213 }
1214 if (i < nfeat && *c != '\0') {
1215 out[i] = atoi(c);
1216 if (out[i] > maxn) maxn = out[i];
1217 ++i;
1218 }
1219 while (i < nfeat)
1220 out[i++] = maxn;
1221
1222 ckd_free(topn_list);
1223 return maxn;
1224}
1225
1226
1227ps_mgau_t *
1228s2_semi_mgau_init(acmod_t *acmod)
1229{
1230 s2_semi_mgau_t *s;
1231 ps_mgau_t *ps;
1232 char const *sendump_path;
1233 int i;
1234 int n_feat;
1235
1236 s = ckd_calloc(1, sizeof(*s));
1237 s->config = acmod->config;
1238
1239 s->lmath = logmath_retain(acmod->lmath);
1240 /* Log-add table. */
1241 s->lmath_8b = logmath_init(logmath_get_base(acmod->lmath), SENSCR_SHIFT, TRUE);
1242 if (s->lmath_8b == NULL)
1243 goto error_out;
1244 /* Ensure that it is only 8 bits wide so that fast_logmath_add() works. */
1245 if (logmath_get_width(s->lmath_8b) != 1) {
1246 E_ERROR("Log base %f is too small to represent add table in 8 bits\n",
1247 logmath_get_base(s->lmath_8b));
1248 goto error_out;
1249 }
1250
1251 /* Read means and variances. */
1252 if ((s->g = gauden_init(cmd_ln_str_r(s->config, "_mean"),
1253 cmd_ln_str_r(s->config, "_var"),
1254 cmd_ln_float32_r(s->config, "-varfloor"),
1255 s->lmath)) == NULL) {
1256 E_ERROR("Failed to read means and variances\n");
1257 goto error_out;
1258 }
1259
1260 /* Currently only a single codebook is supported. */
1261 if (s->g->n_mgau != 1)
1262 goto error_out;
1263
1264 n_feat = s->g->n_feat;
1265
1266 /* Verify n_feat and veclen, against acmod. */
1267 if (n_feat != feat_dimension1(acmod->fcb)) {
1268 E_ERROR("Number of streams does not match: %d != %d\n",
1269 n_feat, feat_dimension1(acmod->fcb));
1270 goto error_out;
1271 }
1272 for (i = 0; i < n_feat; ++i) {
1273 if (s->g->featlen[i] != feat_dimension2(acmod->fcb, i)) {
1274 E_ERROR("Dimension of stream %d does not match: %d != %d\n",
1275 i, s->g->featlen[i], feat_dimension2(acmod->fcb, i));
1276 goto error_out;
1277 }
1278 }
1279 /* Read mixture weights */
1280 if ((sendump_path = cmd_ln_str_r(s->config, "_sendump"))) {
1281 if (read_sendump(s, acmod->mdef, sendump_path) < 0) {
1282 goto error_out;
1283 }
1284 }
1285 else {
1286 if (read_mixw(s, cmd_ln_str_r(s->config, "_mixw"),
1287 cmd_ln_float32_r(s->config, "-mixwfloor")) < 0) {
1288 goto error_out;
1289 }
1290 }
1291 s->ds_ratio = cmd_ln_int32_r(s->config, "-ds");
1292
1293 /* Determine top-N for each feature */
1294 s->topn_beam = ckd_calloc(n_feat, sizeof(*s->topn_beam));
1295 s->max_topn = cmd_ln_int32_r(s->config, "-topn");
1296 split_topn(cmd_ln_str_r(s->config, "-topn_beam"), s->topn_beam, n_feat);
1297 E_INFO("Maximum top-N: %d ", s->max_topn);
1298 E_INFOCONT("Top-N beams:");
1299 for (i = 0; i < n_feat; ++i) {
1300 E_INFOCONT(" %d", s->topn_beam[i]);
1301 }
1302 E_INFOCONT("\n");
1303
1304 /* Top-N scores from recent frames */
1305 s->n_topn_hist = cmd_ln_int32_r(s->config, "-pl_window") + 2;
1306 s->topn_hist = (vqFeature_t ***)
1307 ckd_calloc_3d(s->n_topn_hist, n_feat, s->max_topn,
1308 sizeof(***s->topn_hist));
1309 s->topn_hist_n = ckd_calloc_2d(s->n_topn_hist, n_feat,
1310 sizeof(**s->topn_hist_n));
1311 for (i = 0; i < s->n_topn_hist; ++i) {
1312 int j;
1313 for (j = 0; j < n_feat; ++j) {
1314 int k;
1315 for (k = 0; k < s->max_topn; ++k) {
1316 s->topn_hist[i][j][k].score = WORST_DIST;
1317 s->topn_hist[i][j][k].codeword = k;
1318 }
1319 }
1320 }
1321
1322 ps = (ps_mgau_t *)s;
1323 ps->vt = &s2_semi_mgau_funcs;
1324 return ps;
1325error_out:
1326 s2_semi_mgau_free(ps_mgau_base(s));
1327 return NULL;
1328}
1329
1330int
1331s2_semi_mgau_mllr_transform(ps_mgau_t *ps,
1332 ps_mllr_t *mllr)
1333{
1334 s2_semi_mgau_t *s = (s2_semi_mgau_t *)ps;
1335 return gauden_mllr_transform(s->g, mllr, s->config);
1336}
1337
1338void
1339s2_semi_mgau_free(ps_mgau_t *ps)
1340{
1341 s2_semi_mgau_t *s = (s2_semi_mgau_t *)ps;
1342
1343 logmath_free(s->lmath);
1344 logmath_free(s->lmath_8b);
1345 if (s->sendump_mmap) {
1346 ckd_free_2d(s->mixw);
1347 mmio_file_unmap(s->sendump_mmap);
1348 }
1349 else {
1350 ckd_free_3d(s->mixw);
1351 if (s->mixw_cb)
1352 ckd_free(s->mixw_cb);
1353 }
1354 gauden_free(s->g);
1355 ckd_free(s->topn_beam);
1356 ckd_free_2d(s->topn_hist_n);
1357 ckd_free_3d((void **)s->topn_hist);
1358 ckd_free(s);
1359}
#define SENSCR_SHIFT
Shift count for senone scores.
Definition: hmm.h:73
void gauden_free(gauden_t *g)
Release memory allocated by gauden_init.
Definition: ms_gauden.c:358
int32 gauden_mllr_transform(gauden_t *s, ps_mllr_t *mllr, cmd_ln_t *config)
Transform Gaussians according to an MLLR matrix (or, eventually, more).
Definition: ms_gauden.c:509
gauden_t * gauden_init(char const *meanfile, char const *varfile, float32 varfloor, logmath_t *lmath)
Read mixture gaussian codebooks from the given files.
Definition: ms_gauden.c:311
Acoustic model structure.
Definition: acmod.h:148
bin_mdef_t * mdef
Model definition.
Definition: acmod.h:159
cmd_ln_t * config
Configuration.
Definition: acmod.h:150
feat_t * fcb
Dynamic feature computation.
Definition: acmod.h:156
logmath_t * lmath
Log-math computation.
Definition: acmod.h:151
mfcc_t **** var
like mean; diagonal covariance vector only
Definition: ms_gauden.h:84
mfcc_t *** det
log(determinant) for each variance vector; actually, log(sqrt(2*pi*det))
Definition: ms_gauden.h:85
int32 n_feat
Number feature streams in each codebook.
Definition: ms_gauden.h:89
mfcc_t **** mean
mean[codebook][feature][codeword] vector
Definition: ms_gauden.h:83
int32 n_density
Number gaussian densities in each codebook-feature stream.
Definition: ms_gauden.h:90
int32 * featlen
feature length for each feature
Definition: ms_gauden.h:91
int32 n_mgau
Number codebooks.
Definition: ms_gauden.h:88
ps_mgaufuncs_t * vt
vtable of mgau functions.
Definition: acmod.h:114
Feature space linear transform structure.
Definition: acmod.h:82
int n_topn_hist
Number of past frames tracked.
Definition: s2_semi_mgau.h:77
vqFeature_t *** topn_hist
Top-N scores and codewords for past frames.
Definition: s2_semi_mgau.h:74
uint8 ** topn_hist_n
Variable top-N for past frames.
Definition: s2_semi_mgau.h:75
vqFeature_t ** f
Topn-N for currently scoring frame.
Definition: s2_semi_mgau.h:76
Common code shared between SC and PTM (tied-state) models.
#define GMMSUB(a, b)
Subtract GMM component b (assumed to be positive) and saturate.
LOGMATH_INLINE int fast_logmath_add(logmath_t *lmath, int mlx, int mly)
Quickly log-add two negated log probabilities.
#define MAX_NEG_ASCR
Maximum negated acoustic score value.
#define MAX_NEG_MIXW
Maximum negated mixture weight value.