SphinxBase 5prealpha
bio.c
1/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/* ====================================================================
3 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
21 *
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 * ====================================================================
35 *
36 */
37/*
38 * bio.c -- Sphinx-3 binary file I/O functions.
39 *
40 * **********************************************
41 * CMU ARPA Speech Project
42 *
43 * Copyright (c) 1996 Carnegie Mellon University.
44 * ALL RIGHTS RESERVED.
45 * **********************************************
46 *
47 * HISTORY
48 * $Log$
49 * Revision 1.4 2005/06/21 20:40:46 arthchan2003
50 * 1, Fixed doxygen documentation, 2, Add the $ keyword.
51 *
52 * Revision 1.3 2005/03/30 01:22:46 archan
53 * Fixed mistakes in last updates. Add
54 *
55 *
56 * 02-Jul-1997 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
57 * Bugfix: Added byteswapping in bio_verify_chksum().
58 *
59 * 18-Dec-1996 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
60 * Created.
61 */
62
63#include <stdio.h>
64#include <string.h>
65#include <assert.h>
66
67#ifdef _MSC_VER
68#pragma warning (disable: 4996)
69#endif
70
71#include "sphinxbase/bio.h"
72#include "sphinxbase/err.h"
74
75
76#define BIO_HDRARG_MAX 32
77#define END_COMMENT "*end_comment*\n"
78
79
80static void
81bcomment_read(FILE * fp)
82{
83 __BIGSTACKVARIABLE__ char iline[16384];
84
85 while (fgets(iline, sizeof(iline), fp) != NULL) {
86 if (strcmp(iline, END_COMMENT) == 0)
87 return;
88 }
89 E_FATAL("Missing %s marker\n", END_COMMENT);
90}
91
92
93static int32
94swap_check(FILE * fp)
95{
96 uint32 magic;
97
98 if (fread(&magic, sizeof(uint32), 1, fp) != 1) {
99 E_ERROR("Cannot read BYTEORDER MAGIC NO.\n");
100 return -1;
101 }
102
103 if (magic != BYTE_ORDER_MAGIC) {
104 /* either need to swap or got bogus magic number */
105 SWAP_INT32(&magic);
106
107 if (magic == BYTE_ORDER_MAGIC)
108 return 1;
109
110 SWAP_INT32(&magic);
111 E_ERROR("Bad BYTEORDER MAGIC NO: %08x, expecting %08x\n",
112 magic, BYTE_ORDER_MAGIC);
113 return -1;
114 }
115
116 return 0;
117}
118
119
120void
121bio_hdrarg_free(char **argname, char **argval)
122{
123 int32 i;
124
125 if (argname == NULL)
126 return;
127 for (i = 0; argname[i]; i++) {
128 ckd_free(argname[i]);
129 ckd_free(argval[i]);
130 }
131 ckd_free(argname);
132 ckd_free(argval);
133}
134
135
136int32
137bio_writehdr_version(FILE * fp, char *version)
138{
139 uint32 b;
140
141 fprintf(fp, "s3\n");
142 fprintf(fp, "version %s\n", version);
143 fprintf(fp, "endhdr\n");
144 fflush(fp);
145
146 b = (uint32) BYTE_ORDER_MAGIC;
147 fwrite(&b, sizeof(uint32), 1, fp);
148 fflush(fp);
149
150 return 0;
151}
152
153
154int32
155bio_writehdr(FILE *fp, ...)
156{
157 char const *key;
158 va_list args;
159 uint32 b;
160
161 fprintf(fp, "s3\n");
162 va_start(args, fp);
163 while ((key = va_arg(args, char const *)) != NULL) {
164 char const *val = va_arg(args, char const *);
165 if (val == NULL) {
166 E_ERROR("Wrong number of arguments\n");
167 va_end(args);
168 return -1;
169 }
170 fprintf(fp, "%s %s\n", key, val);
171 }
172 va_end(args);
173
174 fprintf(fp, "endhdr\n");
175 fflush(fp);
176
177 b = (uint32) BYTE_ORDER_MAGIC;
178 if (fwrite(&b, sizeof(uint32), 1, fp) != 1)
179 return -1;
180 fflush(fp);
181
182 return 0;
183}
184
185
186int32
187bio_readhdr(FILE * fp, char ***argname, char ***argval, int32 * swap)
188{
189 __BIGSTACKVARIABLE__ char line[16384], word[4096];
190 int32 i, l;
191 int32 lineno;
192
193 *argname = (char **) ckd_calloc(BIO_HDRARG_MAX + 1, sizeof(char *));
194 *argval = (char **) ckd_calloc(BIO_HDRARG_MAX, sizeof(char *));
195
196 lineno = 0;
197 if (fgets(line, sizeof(line), fp) == NULL){
198 E_ERROR("Premature EOF, line %d\n", lineno);
199 goto error_out;
200 }
201 lineno++;
202
203 if ((line[0] == 's') && (line[1] == '3') && (line[2] == '\n')) {
204 /* New format (post Dec-1996, including checksums); read argument-value pairs */
205 for (i = 0;;) {
206 if (fgets(line, sizeof(line), fp) == NULL) {
207 E_ERROR("Premature EOF, line %d\n", lineno);
208 goto error_out;
209 }
210 lineno++;
211
212 if (sscanf(line, "%s%n", word, &l) != 1) {
213 E_ERROR("Header format error, line %d\n", lineno);
214 goto error_out;
215 }
216 if (strcmp(word, "endhdr") == 0)
217 break;
218 if (word[0] == '#') /* Skip comments */
219 continue;
220
221 if (i >= BIO_HDRARG_MAX) {
222 E_ERROR
223 ("Max arg-value limit(%d) exceeded; increase BIO_HDRARG_MAX\n",
224 BIO_HDRARG_MAX);
225 goto error_out;
226 }
227
228 (*argname)[i] = ckd_salloc(word);
229 if (sscanf(line + l, "%s", word) != 1) { /* Multi-word values not allowed */
230 E_ERROR("Header format error, line %d\n", lineno);
231 goto error_out;
232 }
233 (*argval)[i] = ckd_salloc(word);
234 i++;
235 }
236 }
237 else {
238 /* Old format (without checksums); the first entry must be the version# */
239 if (sscanf(line, "%s", word) != 1) {
240 E_ERROR("Header format error, line %d\n", lineno);
241 goto error_out;
242 }
243
244 (*argname)[0] = ckd_salloc("version");
245 (*argval)[0] = ckd_salloc(word);
246 i = 1;
247
248 bcomment_read(fp);
249 }
250 (*argname)[i] = NULL;
251
252 if ((*swap = swap_check(fp)) < 0) {
253 E_ERROR("swap_check failed\n");
254 goto error_out;
255 }
256
257 return 0;
258error_out:
259 bio_hdrarg_free(*argname, *argval);
260 *argname = *argval = NULL;
261 return -1;
262}
263
264
265static uint32
266chksum_accum(const void *buf, int32 el_sz, int32 n_el, uint32 sum)
267{
268 int32 i;
269 uint8 *i8;
270 uint16 *i16;
271 uint32 *i32;
272
273 switch (el_sz) {
274 case 1:
275 i8 = (uint8 *) buf;
276 for (i = 0; i < n_el; i++)
277 sum = (sum << 5 | sum >> 27) + i8[i];
278 break;
279 case 2:
280 i16 = (uint16 *) buf;
281 for (i = 0; i < n_el; i++)
282 sum = (sum << 10 | sum >> 22) + i16[i];
283 break;
284 case 4:
285 i32 = (uint32 *) buf;
286 for (i = 0; i < n_el; i++)
287 sum = (sum << 20 | sum >> 12) + i32[i];
288 break;
289 default:
290 E_FATAL("Unsupported elemsize for checksum: %d\n", el_sz);
291 break;
292 }
293
294 return sum;
295}
296
297
298static void
299swap_buf(void *buf, int32 el_sz, int32 n_el)
300{
301 int32 i;
302 uint16 *buf16;
303 uint32 *buf32;
304
305 switch (el_sz) {
306 case 1:
307 break;
308 case 2:
309 buf16 = (uint16 *) buf;
310 for (i = 0; i < n_el; i++)
311 SWAP_INT16(buf16 + i);
312 break;
313 case 4:
314 buf32 = (uint32 *) buf;
315 for (i = 0; i < n_el; i++)
316 SWAP_INT32(buf32 + i);
317 break;
318 default:
319 E_FATAL("Unsupported elemsize for byteswapping: %d\n", el_sz);
320 break;
321 }
322}
323
324
325int32
326bio_fread(void *buf, int32 el_sz, int32 n_el, FILE * fp, int32 swap,
327 uint32 * chksum)
328{
329 if (fread(buf, el_sz, n_el, fp) != (size_t) n_el)
330 return -1;
331
332 if (swap)
333 swap_buf(buf, el_sz, n_el);
334
335 if (chksum)
336 *chksum = chksum_accum(buf, el_sz, n_el, *chksum);
337
338 return n_el;
339}
340
341int32
342bio_fwrite(const void *buf, int32 el_sz, int32 n_el, FILE *fp,
343 int32 swap, uint32 *chksum)
344{
345 if (chksum)
346 *chksum = chksum_accum(buf, el_sz, n_el, *chksum);
347 if (swap) {
348 void *nbuf;
349 int rv;
350
351 nbuf = ckd_calloc(n_el, el_sz);
352 memcpy(nbuf, buf, n_el * el_sz);
353 swap_buf(nbuf, el_sz, n_el);
354 rv = fwrite(nbuf, el_sz, n_el, fp);
355 ckd_free(nbuf);
356 return rv;
357 }
358 else {
359 return fwrite(buf, el_sz, n_el, fp);
360 }
361}
362
363int32
364bio_fread_1d(void **buf, size_t el_sz, uint32 * n_el, FILE * fp,
365 int32 sw, uint32 * ck)
366{
367 /* Read 1-d array size */
368 if (bio_fread(n_el, sizeof(int32), 1, fp, sw, ck) != 1)
369 E_FATAL("fread(arraysize) failed\n");
370 if (*n_el <= 0)
371 E_FATAL("Bad arraysize: %d\n", *n_el);
372
373 /* Allocate memory for array data */
374 *buf = (void *) ckd_calloc(*n_el, el_sz);
375
376 /* Read array data */
377 if (bio_fread(*buf, el_sz, *n_el, fp, sw, ck) != *n_el)
378 E_FATAL("fread(arraydata) failed\n");
379
380 return *n_el;
381}
382
383int32
384bio_fread_2d(void ***arr,
385 size_t e_sz,
386 uint32 *d1,
387 uint32 *d2,
388 FILE *fp,
389 uint32 swap,
390 uint32 *chksum)
391{
392 uint32 l_d1, l_d2;
393 uint32 n;
394 size_t ret;
395 void *raw;
396
397 ret = bio_fread(&l_d1, sizeof(uint32), 1, fp, swap, chksum);
398 if (ret != 1) {
399 if (ret == 0) {
400 E_ERROR_SYSTEM("Unable to read complete data");
401 }
402 else {
403 E_ERROR_SYSTEM("OS error in bio_fread_2d");
404 }
405 return -1;
406 }
407 ret = bio_fread(&l_d2, sizeof(uint32), 1, fp, swap, chksum);
408 if (ret != 1) {
409 if (ret == 0) {
410 E_ERROR_SYSTEM("Unable to read complete data");
411 }
412 else {
413 E_ERROR_SYSTEM("OS error in bio_fread_2d");
414 }
415 return -1;
416 }
417 if (bio_fread_1d(&raw, e_sz, &n, fp, swap, chksum) != n)
418 return -1;
419
420 assert(n == l_d1*l_d2);
421
422 *d1 = l_d1;
423 *d2 = l_d2;
424 *arr = ckd_alloc_2d_ptr(l_d1, l_d2, raw, e_sz);
425
426 return n;
427}
428
429int32
430bio_fread_3d(void ****arr,
431 size_t e_sz,
432 uint32 *d1,
433 uint32 *d2,
434 uint32 *d3,
435 FILE *fp,
436 uint32 swap,
437 uint32 *chksum)
438{
439 uint32 l_d1;
440 uint32 l_d2;
441 uint32 l_d3;
442 uint32 n;
443 void *raw;
444 size_t ret;
445
446 ret = bio_fread(&l_d1, sizeof(uint32), 1, fp, swap, chksum);
447 if (ret != 1) {
448 if (ret == 0) {
449 E_ERROR_SYSTEM("Unable to read complete data");
450 }
451 else {
452 E_ERROR_SYSTEM("OS error in bio_fread_3d");
453 }
454 return -1;
455 }
456 ret = bio_fread(&l_d2, sizeof(uint32), 1, fp, swap, chksum);
457 if (ret != 1) {
458 if (ret == 0) {
459 E_ERROR_SYSTEM("Unable to read complete data");
460 }
461 else {
462 E_ERROR_SYSTEM("OS error in bio_fread_3d");
463 }
464 return -1;
465 }
466 ret = bio_fread(&l_d3, sizeof(uint32), 1, fp, swap, chksum);
467 if (ret != 1) {
468 if (ret == 0) {
469 E_ERROR_SYSTEM("Unable to read complete data");
470 }
471 else {
472 E_ERROR_SYSTEM("OS error in bio_fread_3d");
473 }
474 return -1;
475 }
476
477 if (bio_fread_1d(&raw, e_sz, &n, fp, swap, chksum) != n) {
478 return -1;
479 }
480
481 assert(n == l_d1 * l_d2 * l_d3);
482
483 *arr = ckd_alloc_3d_ptr(l_d1, l_d2, l_d3, raw, e_sz);
484 *d1 = l_d1;
485 *d2 = l_d2;
486 *d3 = l_d3;
487
488 return n;
489}
490
491void
492bio_verify_chksum(FILE * fp, int32 byteswap, uint32 chksum)
493{
494 uint32 file_chksum;
495
496 if (fread(&file_chksum, sizeof(uint32), 1, fp) != 1)
497 E_FATAL("fread(chksum) failed\n");
498 if (byteswap)
499 SWAP_INT32(&file_chksum);
500 if (file_chksum != chksum)
501 E_FATAL
502 ("Checksum error; file-checksum %08x, computed %08x\n",
503 file_chksum, chksum);
504}
505
506int
507bio_fwrite_3d(void ***arr,
508 size_t e_sz,
509 uint32 d1,
510 uint32 d2,
511 uint32 d3,
512 FILE *fp,
513 uint32 *chksum)
514{
515 size_t ret;
516
517 /* write out first dimension 1 */
518 ret = bio_fwrite(&d1, sizeof(uint32), 1, fp, 0, chksum);
519 if (ret != 1) {
520 if (ret == 0) {
521 E_ERROR_SYSTEM("Unable to write complete data");
522 }
523 else {
524 E_ERROR_SYSTEM("OS error in bio_fwrite_3d");
525 }
526 return -1;
527 }
528
529 /* write out first dimension 2 */
530 ret = bio_fwrite(&d2, sizeof(uint32), 1, fp, 0, chksum);
531 if (ret != 1) {
532 if (ret == 0) {
533 E_ERROR_SYSTEM("Unable to write complete data");
534 }
535 else {
536 E_ERROR_SYSTEM("OS error in bio_fwrite_3d");
537 }
538 return -1;
539 }
540
541 /* write out first dimension 3 */
542 ret = bio_fwrite(&d3, sizeof(uint32), 1, fp, 0, chksum);
543 if (ret != 1) {
544 if (ret == 0) {
545 E_ERROR_SYSTEM("Unable to write complete data");
546 }
547 else {
548 E_ERROR_SYSTEM("OS error in bio_fwrite_3d");
549 }
550 return -1;
551 }
552
553 /* write out the data in the array as one big block */
554 return bio_fwrite_1d(arr[0][0], e_sz, d1 * d2 * d3, fp, chksum);
555}
556
557int
559 size_t e_sz,
560 uint32 d1,
561 FILE *fp,
562 uint32 *chksum)
563{
564 size_t ret;
565 ret = bio_fwrite(&d1, sizeof(uint32), 1, fp, 0, chksum);
566 if (ret != 1) {
567 if (ret == 0) {
568 E_ERROR_SYSTEM("Unable to write complete data");
569 }
570 else {
571 E_ERROR_SYSTEM("OS error in bio_fwrite_1d");
572 }
573 return -1;
574 }
575
576 ret = bio_fwrite(arr, e_sz, d1, fp, 0, chksum);
577 if (ret != d1) {
578 if (ret == 0) {
579 E_ERROR_SYSTEM("Unable to write complete data");
580 }
581 else {
582 E_ERROR_SYSTEM("OS error in bio_fwrite_1d");
583 }
584
585 return -1;
586 }
587
588 return ret;
589}
590
591int16*
592bio_read_wavfile(char const *directory,
593 char const *filename,
594 char const *extension,
595 int32 header,
596 int32 endian,
597 size_t *nsamps)
598{
599 FILE *uttfp;
600 char *inputfile;
601 size_t n, l;
602 int16 *data;
603
604 n = strlen(extension);
605 l = strlen(filename);
606 if ((n <= l) && (0 == strcmp(filename + l - n, extension)))
607 extension = "";
608 inputfile = ckd_calloc(strlen(directory) + l + n + 2, 1);
609 if (directory) {
610 sprintf(inputfile, "%s/%s%s", directory, filename, extension);
611 } else {
612 sprintf(inputfile, "%s%s", filename, extension);
613 }
614
615 if ((uttfp = fopen(inputfile, "rb")) == NULL) {
616 E_FATAL_SYSTEM("Failed to open file '%s' for reading", inputfile);
617 }
618 fseek(uttfp, 0, SEEK_END);
619 n = ftell(uttfp);
620 fseek(uttfp, 0, SEEK_SET);
621 if (header > 0) {
622 if (fseek(uttfp, header, SEEK_SET) < 0) {
623 E_ERROR_SYSTEM("Failed to move to an offset %d in a file '%s'", header, inputfile);
624 fclose(uttfp);
625 ckd_free(inputfile);
626 return NULL;
627 }
628 n -= header;
629 }
630 n /= sizeof(int16);
631 data = ckd_calloc(n, sizeof(*data));
632 if ((l = fread(data, sizeof(int16), n, uttfp)) < n) {
633 E_ERROR_SYSTEM("Failed to read %d samples from %s: %d", n, inputfile, l);
634 ckd_free(data);
635 ckd_free(inputfile);
636 fclose(uttfp);
637 return NULL;
638 }
639 ckd_free(inputfile);
640 fclose(uttfp);
641 if (nsamps) *nsamps = n;
642
643 return data;
644}
Cross platform binary IO to process files in sphinx3 format.
SPHINXBASE_EXPORT int bio_fwrite_1d(void *arr, size_t e_sz, uint32 d1, FILE *fp, uint32 *chksum)
Write a 1-d array.
Definition: bio.c:558
SPHINXBASE_EXPORT int32 bio_fwrite(const void *buf, int32 el_sz, int32 n_el, FILE *fp, int32 swap, uint32 *chksum)
Like fwrite but perform byteswapping and accumulate checksum (the 2 extra arguments).
Definition: bio.c:342
SPHINXBASE_EXPORT int16 * bio_read_wavfile(char const *directory, char const *filename, char const *extension, int32 header, int32 endian, size_t *nsamps)
Read raw data from the wav file.
Definition: bio.c:592
SPHINXBASE_EXPORT int32 bio_fread_2d(void ***arr, size_t e_sz, uint32 *d1, uint32 *d2, FILE *fp, uint32 swap, uint32 *chksum)
Read a 2-d matrix:
Definition: bio.c:384
SPHINXBASE_EXPORT int32 bio_fread(void *buf, int32 el_sz, int32 n_el, FILE *fp, int32 swap, uint32 *chksum)
Like fread but perform byteswapping and accumulate checksum (the 2 extra arguments).
Definition: bio.c:326
SPHINXBASE_EXPORT int32 bio_readhdr(FILE *fp, char ***name, char ***val, int32 *swap)
Read binary file format header: has the following format.
Definition: bio.c:187
SPHINXBASE_EXPORT int bio_fwrite_3d(void ***arr, size_t e_sz, uint32 d1, uint32 d2, uint32 d3, FILE *fp, uint32 *chksum)
Write a 3-d array (set of matrices).
Definition: bio.c:507
SPHINXBASE_EXPORT int32 bio_writehdr_version(FILE *fp, char *version)
Write a simple binary file header, containing only the version string.
Definition: bio.c:137
SPHINXBASE_EXPORT int32 bio_fread_1d(void **buf, size_t el_sz, uint32 *n_el, FILE *fp, int32 sw, uint32 *ck)
Read a 1-d array (fashioned after fread):
Definition: bio.c:364
SPHINXBASE_EXPORT int32 bio_fread_3d(void ****arr, size_t e_sz, uint32 *d1, uint32 *d2, uint32 *d3, FILE *fp, uint32 swap, uint32 *chksum)
Read a 3-d array (set of matrices)
Definition: bio.c:430
SPHINXBASE_EXPORT void bio_verify_chksum(FILE *fp, int32 byteswap, uint32 chksum)
Read and verify checksum at the end of binary file.
Definition: bio.c:492
SPHINXBASE_EXPORT void bio_hdrarg_free(char **name, char **val)
Free name and value strings previously allocated and returned by bio_readhdr.
Definition: bio.c:121
SPHINXBASE_EXPORT int32 bio_writehdr(FILE *fp,...)
Write a simple binary file header with only byte order magic word.
Definition: bio.c:155
Sphinx's memory allocation/deallocation routines.
SPHINXBASE_EXPORT void ckd_free(void *ptr)
Test and free a 1-D array.
Definition: ckd_alloc.c:244
#define ckd_calloc(n, sz)
Macros to simplify the use of above functions.
Definition: ckd_alloc.h:248
#define ckd_alloc_3d_ptr(d1, d2, d3, bf, sz)
Macro for ckd_alloc_3d_ptr
Definition: ckd_alloc.h:298
#define ckd_salloc(ptr)
Macro for ckd_salloc
Definition: ckd_alloc.h:264
#define ckd_alloc_2d_ptr(d1, d2, bf, sz)
Macro for ckd_alloc_2d_ptr
Definition: ckd_alloc.h:287
Implementation of logging routines.
#define E_ERROR(...)
Print error message to error log.
Definition: err.h:104
#define E_FATAL(...)
Exit with non-zero status after error message.
Definition: err.h:81
#define E_FATAL_SYSTEM(...)
Print error text; Call perror(""); exit(errno);.
Definition: err.h:90
#define E_ERROR_SYSTEM(...)
Print error text; Call perror("");.
Definition: err.h:99