SphinxBase 5prealpha
fe_internal.h
1/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/* ====================================================================
3 * Copyright (c) 1996-2004 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
21 *
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 * ====================================================================
35 *
36 */
37
38#ifndef __FE_INTERNAL_H__
39#define __FE_INTERNAL_H__
40
41#ifdef HAVE_CONFIG_H
42#include <config.h>
43#endif
44
45#include "sphinxbase/fe.h"
46#include "sphinxbase/fixpoint.h"
47
48#include "fe_noise.h"
49#include "fe_prespch_buf.h"
50#include "fe_type.h"
51
52#ifdef __cplusplus
53extern "C" {
54#endif
55#if 0
56/* Fool Emacs. */
57}
58#endif
59
60/* Values for the 'logspec' field. */
61enum {
62 RAW_LOG_SPEC = 1,
63 SMOOTH_LOG_SPEC = 2
64};
65
66/* Values for the 'transform' field. */
67enum {
68 LEGACY_DCT = 0,
69 DCT_II = 1,
70 DCT_HTK = 2
71};
72
73typedef struct melfb_s melfb_t;
75struct melfb_s {
76 float32 sampling_rate;
77 int32 num_cepstra;
78 int32 num_filters;
79 int32 fft_size;
80 float32 lower_filt_freq;
81 float32 upper_filt_freq;
82 /* DCT coefficients. */
83 mfcc_t **mel_cosine;
84 /* Filter coefficients. */
85 mfcc_t *filt_coeffs;
86 int16 *spec_start;
87 int16 *filt_start;
88 int16 *filt_width;
89 /* Luxury mobile home. */
90 int32 doublewide;
91 char const *warp_type;
92 char const *warp_params;
93 uint32 warp_id;
94 /* Precomputed normalization constants for unitary DCT-II/DCT-III */
95 mfcc_t sqrt_inv_n, sqrt_inv_2n;
96 /* Value and coefficients for HTK-style liftering */
97 int32 lifter_val;
98 mfcc_t *lifter;
99 /* Normalize filters to unit area */
100 int32 unit_area;
101 /* Round filter frequencies to DFT points (hurts accuracy, but is
102 useful for legacy purposes) */
103 int32 round_filters;
104};
105
106/* sqrt(1/2), also used for unitary DCT-II/DCT-III */
107#define SQRT_HALF FLOAT2MFCC(0.707106781186548)
108
109typedef struct vad_data_s {
110 uint8 in_speech;
111 int16 pre_speech_frames;
112 int16 post_speech_frames;
113 prespch_buf_t* prespch_buf;
114} vad_data_t;
115
117struct fe_s {
118 cmd_ln_t *config;
119 int refcount;
120
121 float32 sampling_rate;
122 int16 frame_rate;
123 int16 frame_shift;
124
125 float32 window_length;
126 int16 frame_size;
127 int16 fft_size;
128
129 uint8 fft_order;
130 uint8 feature_dimension;
131 uint8 num_cepstra;
132 uint8 remove_dc;
133 uint8 log_spec;
134 uint8 swap;
135 uint8 dither;
136 uint8 transform;
137 uint8 remove_noise;
138 uint8 remove_silence;
139
140 float32 pre_emphasis_alpha;
141 int16 pre_emphasis_prior;
142 int32 dither_seed;
143
144 int16 num_overflow_samps;
145 size_t num_processed_samps;
146
147 /* Twiddle factors for FFT. */
148 frame_t *ccc, *sss;
149 /* Mel filter parameters. */
150 melfb_t *mel_fb;
151 /* Half of a Hamming Window. */
152 window_t *hamming_window;
153
154 /* Noise removal */
155 noise_stats_t *noise_stats;
156
157 /* VAD variables */
158 int16 pre_speech;
159 int16 post_speech;
160 int16 start_speech;
161 float32 vad_threshold;
162 vad_data_t *vad_data;
163
164 /* Temporary buffers for processing. */
165 /* FIXME: too many of these. */
166 int16 *spch;
167 frame_t *frame;
168 powspec_t *spec, *mfspec;
169 int16 *overflow_samps;
170};
171
172void fe_init_dither(int32 seed);
173
174/* Apply 1/2 bit noise to a buffer of audio. */
175int32 fe_dither(int16 *buffer, int32 nsamps);
176
177/* Load a frame of data into the fe. */
178int fe_read_frame(fe_t *fe, int16 const *in, int32 len);
179
180/* Shift the input buffer back and read more data. */
181int fe_shift_frame(fe_t *fe, int16 const *in, int32 len);
182
183/* Process a frame of data into features. */
184void fe_write_frame(fe_t *fe, mfcc_t *feat, int32 store_pcm);
185
186/* Initialization functions. */
187int32 fe_build_melfilters(melfb_t *MEL_FB);
188int32 fe_compute_melcosine(melfb_t *MEL_FB);
189void fe_create_hamming(window_t *in, int32 in_len);
190void fe_create_twiddle(fe_t *fe);
191
192fixed32 fe_log_add(fixed32 x, fixed32 y);
193fixed32 fe_log_sub(fixed32 x, fixed32 y);
194
195/* Miscellaneous processing functions. */
196void fe_spec2cep(fe_t * fe, const powspec_t * mflogspec, mfcc_t * mfcep);
197void fe_dct2(fe_t *fe, const powspec_t *mflogspec, mfcc_t *mfcep, int htk);
198void fe_dct3(fe_t *fe, const mfcc_t *mfcep, powspec_t *mflogspec);
199
200#ifdef __cplusplus
201}
202#endif
203
204#endif /* __FE_INTERNAL_H__ */
Opaque structure used to hold the results of command-line parsing.
Structure for the front-end computation.
Definition: fe_internal.h:117
Base Struct to hold all structure for MFCC computation.
Definition: fe_internal.h:75