SphinxBase 5prealpha
agc.c
1/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/* ====================================================================
3 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
21 *
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 * ====================================================================
35 *
36 */
37/*
38 * agc.c -- Various forms of automatic gain control (AGC)
39 *
40 * **********************************************
41 * CMU ARPA Speech Project
42 *
43 * Copyright (c) 1996 Carnegie Mellon University.
44 * ALL RIGHTS RESERVED.
45 * **********************************************
46 *
47 * HISTORY
48 * $Log$
49 * Revision 1.5 2005/06/21 19:25:41 arthchan2003
50 * 1, Fixed doxygen documentation. 2, Added $ keyword.
51 *
52 * Revision 1.3 2005/03/30 01:22:46 archan
53 * Fixed mistakes in last updates. Add
54 *
55 *
56 * 04-Nov-95 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
57 * Created.
58 */
59
60#include <string.h>
61#ifdef HAVE_CONFIG_H
62#include <config.h>
63#endif
64
65#include "sphinxbase/err.h"
67#include "sphinxbase/agc.h"
68
69/* NOTE! These must match the enum in agc.h */
70const char *agc_type_str[] = {
71 "none",
72 "max",
73 "emax",
74 "noise"
75};
76static const int n_agc_type_str = sizeof(agc_type_str)/sizeof(agc_type_str[0]);
77
79agc_type_from_str(const char *str)
80{
81 int i;
82
83 for (i = 0; i < n_agc_type_str; ++i) {
84 if (0 == strcmp(str, agc_type_str[i]))
85 return (agc_type_t)i;
86 }
87 E_FATAL("Unknown AGC type '%s'\n", str);
88 return AGC_NONE;
89}
90
92{
93 agc_t *agc;
94 agc = ckd_calloc(1, sizeof(*agc));
95 agc->noise_thresh = FLOAT2MFCC(2.0);
96
97 return agc;
98}
99
100void agc_free(agc_t *agc)
101{
102 ckd_free(agc);
103}
104
108void
109agc_max(agc_t *agc, mfcc_t **mfc, int32 n_frame)
110{
111 int32 i;
112
113 if (n_frame <= 0)
114 return;
115 agc->obs_max = mfc[0][0];
116 for (i = 1; i < n_frame; i++) {
117 if (mfc[i][0] > agc->obs_max) {
118 agc->obs_max = mfc[i][0];
119 agc->obs_frame = 1;
120 }
121 }
122
123 E_INFO("AGCMax: obs=max= %.2f\n", agc->obs_max);
124 for (i = 0; i < n_frame; i++)
125 mfc[i][0] -= agc->obs_max;
126}
127
128void
129agc_emax_set(agc_t *agc, float32 m)
130{
131 agc->max = FLOAT2MFCC(m);
132 E_INFO("AGCEMax: max= %.2f\n", m);
133}
134
135float32
137{
138 return MFCC2FLOAT(agc->max);
139}
140
141void
142agc_emax(agc_t *agc, mfcc_t **mfc, int32 n_frame)
143{
144 int i;
145
146 if (n_frame <= 0)
147 return;
148 for (i = 0; i < n_frame; ++i) {
149 if (mfc[i][0] > agc->obs_max) {
150 agc->obs_max = mfc[i][0];
151 agc->obs_frame = 1;
152 }
153 mfc[i][0] -= agc->max;
154 }
155}
156
157/* Update estimated max for next utterance */
158void
160{
161 if (agc->obs_frame) { /* Update only if some data observed */
162 agc->obs_max_sum += agc->obs_max;
163 agc->obs_utt++;
164
165 /* Re-estimate max over past history; decay the history */
166 agc->max = agc->obs_max_sum / agc->obs_utt;
167 if (agc->obs_utt == 16) {
168 agc->obs_max_sum /= 2;
169 agc->obs_utt = 8;
170 }
171 }
172 E_INFO("AGCEMax: obs= %.2f, new= %.2f\n", agc->obs_max, agc->max);
173
174 /* Reset the accumulators for the next utterance. */
175 agc->obs_frame = 0;
176 agc->obs_max = FLOAT2MFCC(-1000.0); /* Less than any real C0 value (hopefully!!) */
177}
178
179void
181 mfcc_t **cep,
182 int32 nfr)
183{
184 mfcc_t min_energy; /* Minimum log-energy */
185 mfcc_t noise_level; /* Average noise_level */
186 int32 i; /* frame index */
187 int32 noise_frames; /* Number of noise frames */
188
189 /* Determine minimum log-energy in utterance */
190 min_energy = cep[0][0];
191 for (i = 0; i < nfr; ++i) {
192 if (cep[i][0] < min_energy)
193 min_energy = cep[i][0];
194 }
195
196 /* Average all frames between min_energy and min_energy + agc->noise_thresh */
197 noise_frames = 0;
198 noise_level = 0;
199 min_energy += agc->noise_thresh;
200 for (i = 0; i < nfr; ++i) {
201 if (cep[i][0] < min_energy) {
202 noise_level += cep[i][0];
203 noise_frames++;
204 }
205 }
206
207 if (noise_frames > 0) {
208 noise_level /= noise_frames;
209 E_INFO("AGC NOISE: max= %6.3f\n", MFCC2FLOAT(noise_level));
210 /* Subtract noise_level from all log_energy values */
211 for (i = 0; i < nfr; i++) {
212 cep[i][0] -= noise_level;
213 }
214 }
215}
216
217void
218agc_set_threshold(agc_t *agc, float32 threshold)
219{
220 agc->noise_thresh = FLOAT2MFCC(threshold);
221}
222
223float32
225{
226 return FLOAT2MFCC(agc->noise_thresh);
227}
routine that implements automatic gain control
SPHINXBASE_EXPORT void agc_max(agc_t *agc, mfcc_t **mfc, int32 n_frame)
Apply AGC to the given mfc vectors (normalize all C0 mfc coefficients in the given input such that th...
Definition: agc.c:109
SPHINXBASE_EXPORT void agc_emax_set(agc_t *agc, float32 m)
Set the current AGC maximum estimate.
Definition: agc.c:129
enum agc_type_e agc_type_t
Types of acoustic gain control to apply to the features.
SPHINXBASE_EXPORT void agc_free(agc_t *agc)
Free AGC structure.
Definition: agc.c:100
SPHINXBASE_EXPORT const char * agc_type_str[]
String representations of agc_type_t values.
Definition: agc.c:70
SPHINXBASE_EXPORT float32 agc_get_threshold(agc_t *agc)
Get the current AGC noise threshold.
Definition: agc.c:224
SPHINXBASE_EXPORT float32 agc_emax_get(agc_t *agc)
Get the current AGC maximum estimate.
Definition: agc.c:136
SPHINXBASE_EXPORT void agc_set_threshold(agc_t *agc, float32 threshold)
Set the current AGC noise threshold.
Definition: agc.c:218
SPHINXBASE_EXPORT void agc_emax(agc_t *agc, mfcc_t **mfc, int32 n_frame)
Apply AGC to the given block of MFC vectors.
Definition: agc.c:142
SPHINXBASE_EXPORT agc_t * agc_init(void)
Initialize AGC structure with default values.
Definition: agc.c:91
SPHINXBASE_EXPORT void agc_noise(agc_t *agc, mfcc_t **mfc, int32 n_frame)
Apply AGC using noise threshold to the given block of MFC vectors.
Definition: agc.c:180
SPHINXBASE_EXPORT agc_type_t agc_type_from_str(const char *str)
Convert string representation (from command-line) to agc_type_t.
Definition: agc.c:79
SPHINXBASE_EXPORT void agc_emax_update(agc_t *agc)
Update AGC parameters for next utterance.
Definition: agc.c:159
Sphinx's memory allocation/deallocation routines.
SPHINXBASE_EXPORT void ckd_free(void *ptr)
Test and free a 1-D array.
Definition: ckd_alloc.c:244
#define ckd_calloc(n, sz)
Macros to simplify the use of above functions.
Definition: ckd_alloc.h:248
Implementation of logging routines.
#define E_INFO(...)
Print logging information to standard error stream.
Definition: err.h:114
#define E_FATAL(...)
Exit with non-zero status after error message.
Definition: err.h:81
Structure holding data for doing AGC.
Definition: agc.h:113
mfcc_t obs_max
Observed max in current utterance.
Definition: agc.h:115
mfcc_t noise_thresh
Noise threshold (for AGC_NOISE only)
Definition: agc.h:119
int32 obs_frame
Whether any data was observed after prev update.
Definition: agc.h:116
mfcc_t max
Estimated max for current utterance (for AGC_EMAX)
Definition: agc.h:114
int32 obs_utt
Whether any utterances have been observed.
Definition: agc.h:117