SphinxBase 5prealpha
pio.c
1/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/* ====================================================================
3 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
21 *
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 * ====================================================================
35 *
36 */
37
38#ifdef HAVE_CONFIG_H
39#include <config.h>
40#endif
41
42#include <stdio.h>
43#include <stdlib.h>
44#include <string.h>
45#include <assert.h>
46
47#ifdef HAVE_UNISTD_H
48#include <unistd.h>
49#endif
50
51#ifdef HAVE_SYS_TYPES_H
52#include <sys/types.h>
53#endif
54
55#ifdef HAVE_SYS_STAT_H
56#include <sys/stat.h>
57#endif
58
59#if defined(_WIN32) && !defined(CYGWIN)
60#include <direct.h>
61#endif
62
63#include "sphinxbase/pio.h"
64#include "sphinxbase/filename.h"
65#include "sphinxbase/err.h"
66#include "sphinxbase/strfuncs.h"
68
69#ifndef EXEEXT
70#define EXEEXT ""
71#endif
72
73enum {
74 COMP_NONE,
75 COMP_COMPRESS,
76 COMP_GZIP,
77 COMP_BZIP2
78};
79
80static void
81guess_comptype(char const *file, int32 *ispipe, int32 *isgz)
82{
83 size_t k;
84
85 k = strlen(file);
86 *ispipe = 0;
87 *isgz = COMP_NONE;
88 if ((k > 2)
89 && ((strcmp(file + k - 2, ".Z") == 0)
90 || (strcmp(file + k - 2, ".z") == 0))) {
91 *ispipe = 1;
92 *isgz = COMP_COMPRESS;
93 }
94 else if ((k > 3) && ((strcmp(file + k - 3, ".gz") == 0)
95 || (strcmp(file + k - 3, ".GZ") == 0))) {
96 *ispipe = 1;
97 *isgz = COMP_GZIP;
98 }
99 else if ((k > 4) && ((strcmp(file + k - 4, ".bz2") == 0)
100 || (strcmp(file + k - 4, ".BZ2") == 0))) {
101 *ispipe = 1;
102 *isgz = COMP_BZIP2;
103 }
104}
105
106FILE *
107fopen_comp(const char *file, const char *mode, int32 * ispipe)
108{
109 FILE *fp;
110
111#ifndef HAVE_POPEN
112 *ispipe = 0; /* No popen() on WinCE */
113#else /* HAVE_POPEN */
114 int32 isgz;
115 guess_comptype(file, ispipe, &isgz);
116#endif /* HAVE_POPEN */
117
118 if (*ispipe) {
119#ifndef HAVE_POPEN
120 /* Shouldn't get here, anyway */
121 E_FATAL("No popen() on WinCE\n");
122#else
123 if (strcmp(mode, "r") == 0) {
124 char *command;
125 switch (isgz) {
126 case COMP_GZIP:
127 command = string_join("gunzip" EXEEXT, " -c ", file, NULL);
128 break;
129 case COMP_COMPRESS:
130 command = string_join("zcat" EXEEXT, " ", file, NULL);
131 break;
132 case COMP_BZIP2:
133 command = string_join("bunzip2" EXEEXT, " -c ", file, NULL);
134 break;
135 default:
136 command = NULL; /* Make compiler happy. */
137 E_FATAL("Unknown compression type %d\n", isgz);
138 }
139 if ((fp = popen(command, mode)) == NULL) {
140 E_ERROR_SYSTEM("Failed to open a pipe for a command '%s' mode '%s'", command, mode);
141 ckd_free(command);
142 return NULL;
143 }
144 ckd_free(command);
145 }
146 else if (strcmp(mode, "w") == 0) {
147 char *command;
148 switch (isgz) {
149 case COMP_GZIP:
150 command = string_join("gzip" EXEEXT, " > ", file, NULL);
151 break;
152 case COMP_COMPRESS:
153 command = string_join("compress" EXEEXT, " -c > ", file, NULL);
154 break;
155 case COMP_BZIP2:
156 command = string_join("bzip2" EXEEXT, " > ", file, NULL);
157 break;
158 default:
159 command = NULL; /* Make compiler happy. */
160 E_FATAL("Unknown compression type %d\n", isgz);
161 }
162 if ((fp = popen(command, mode)) == NULL) {
163 E_ERROR_SYSTEM("Failed to open a pipe for a command '%s' mode '%s'", command, mode);
164 ckd_free(command);
165 return NULL;
166 }
167 ckd_free(command);
168 }
169 else {
170 E_ERROR("Compressed file operation for mode %s is not supported\n", mode);
171 return NULL;
172 }
173#endif /* HAVE_POPEN */
174 }
175 else {
176 fp = fopen(file, mode);
177 }
178
179 return (fp);
180}
181
182
183void
184fclose_comp(FILE * fp, int32 ispipe)
185{
186 if (ispipe) {
187#ifdef HAVE_POPEN
188#if defined(_WIN32) && (!defined(__SYMBIAN32__))
189 _pclose(fp);
190#else
191 pclose(fp);
192#endif
193#endif
194 }
195 else
196 fclose(fp);
197}
198
199
200FILE *
201fopen_compchk(const char *file, int32 * ispipe)
202{
203#ifndef HAVE_POPEN
204 *ispipe = 0; /* No popen() on WinCE */
205 /* And therefore the rest of this function is useless. */
206 return (fopen_comp(file, "r", ispipe));
207#else /* HAVE_POPEN */
208 int32 isgz;
209 FILE *fh;
210
211 /* First just try to fopen_comp() it */
212 if ((fh = fopen_comp(file, "r", ispipe)) != NULL)
213 return fh;
214 else {
215 char *tmpfile;
216 size_t k;
217
218 /* File doesn't exist; try other compressed/uncompressed form, as appropriate */
219 guess_comptype(file, ispipe, &isgz);
220 k = strlen(file);
221 tmpfile = ckd_calloc(k+5, 1);
222 strcpy(tmpfile, file);
223 switch (isgz) {
224 case COMP_GZIP:
225 tmpfile[k - 3] = '\0';
226 break;
227 case COMP_BZIP2:
228 tmpfile[k - 4] = '\0';
229 break;
230 case COMP_COMPRESS:
231 tmpfile[k - 2] = '\0';
232 break;
233 case COMP_NONE:
234 strcpy(tmpfile + k, ".gz");
235 if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) {
236 E_WARN("Using %s instead of %s\n", tmpfile, file);
237 ckd_free(tmpfile);
238 return fh;
239 }
240 strcpy(tmpfile + k, ".bz2");
241 if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) {
242 E_WARN("Using %s instead of %s\n", tmpfile, file);
243 ckd_free(tmpfile);
244 return fh;
245 }
246 strcpy(tmpfile + k, ".Z");
247 if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) {
248 E_WARN("Using %s instead of %s\n", tmpfile, file);
249 ckd_free(tmpfile);
250 return fh;
251 }
252 ckd_free(tmpfile);
253 return NULL;
254 }
255 E_WARN("Using %s instead of %s\n", tmpfile, file);
256 fh = fopen_comp(tmpfile, "r", ispipe);
257 ckd_free(tmpfile);
258 return NULL;
259 }
260#endif /* HAVE_POPEN */
261}
262
265{
266 lineiter_t *li;
267
268 li = (lineiter_t *)ckd_calloc(1, sizeof(*li));
269 li->buf = (char *)ckd_malloc(128);
270 li->buf[0] = '\0';
271 li->bsiz = 128;
272 li->len = 0;
273 li->fh = fh;
274
275 li = lineiter_next(li);
276
277 /* Strip the UTF-8 BOM */
278
279 if (li && 0 == strncmp(li->buf, "\xef\xbb\xbf", 3)) {
280 memmove(li->buf, li->buf + 3, strlen(li->buf + 1));
281 li->len -= 3;
282 }
283
284 return li;
285}
286
289{
290 lineiter_t *li;
291
292 li = lineiter_start(fh);
293
294 if (li == NULL)
295 return li;
296
297 li->clean = TRUE;
298
299 if (li->buf && li->buf[0] == '#') {
300 li = lineiter_next(li);
301 } else {
302 string_trim(li->buf, STRING_BOTH);
303 }
304
305 return li;
306}
307
308
309static lineiter_t *
310lineiter_next_plain(lineiter_t *li)
311{
312 /* We are reading the next line */
313 li->lineno++;
314
315 /* Read a line and check for EOF. */
316 if (fgets(li->buf, li->bsiz, li->fh) == NULL) {
317 lineiter_free(li);
318 return NULL;
319 }
320 /* If we managed to read the whole thing, then we are done
321 * (this will be by far the most common result). */
322 li->len = (int32)strlen(li->buf);
323 if (li->len < li->bsiz - 1 || li->buf[li->len - 1] == '\n')
324 return li;
325
326 /* Otherwise we have to reallocate and keep going. */
327 while (1) {
328 li->bsiz *= 2;
329 li->buf = (char *)ckd_realloc(li->buf, li->bsiz);
330 /* If we get an EOF, we are obviously done. */
331 if (fgets(li->buf + li->len, li->bsiz - li->len, li->fh) == NULL) {
332 li->len += strlen(li->buf + li->len);
333 return li;
334 }
335 li->len += strlen(li->buf + li->len);
336 /* If we managed to read the whole thing, then we are done. */
337 if (li->len < li->bsiz - 1 || li->buf[li->len - 1] == '\n')
338 return li;
339 }
340
341 /* Shouldn't get here. */
342 return li;
343}
344
345
348{
349 if (!li->clean)
350 return lineiter_next_plain(li);
351
352 for (li = lineiter_next_plain(li); li; li = lineiter_next_plain(li)) {
353 if (li->buf) {
354 li->buf = string_trim(li->buf, STRING_BOTH);
355 if (li->buf[0] != 0 && li->buf[0] != '#')
356 break;
357 }
358 }
359 return li;
360}
361
363{
364 return li->lineno;
365}
366
367void
369{
370 if (li == NULL)
371 return;
372 ckd_free(li->buf);
373 ckd_free(li);
374}
375
376char *
377fread_line(FILE *stream, size_t *out_len)
378{
379 char *output, *outptr;
380 char buf[128];
381
382 output = outptr = NULL;
383 while (fgets(buf, sizeof(buf), stream)) {
384 size_t len = strlen(buf);
385 /* Append this data to the buffer. */
386 if (output == NULL) {
387 output = (char *)ckd_malloc(len + 1);
388 outptr = output;
389 }
390 else {
391 size_t cur = outptr - output;
392 output = (char *)ckd_realloc(output, cur + len + 1);
393 outptr = output + cur;
394 }
395 memcpy(outptr, buf, len + 1);
396 outptr += len;
397 /* Stop on a short read or end of line. */
398 if (len < sizeof(buf)-1 || buf[len-1] == '\n')
399 break;
400 }
401 if (out_len) *out_len = outptr - output;
402 return output;
403}
404
405#define FREAD_RETRY_COUNT 60
406
407int32
408fread_retry(void *pointer, int32 size, int32 num_items, FILE * stream)
409{
410 char *data;
411 size_t n_items_read;
412 size_t n_items_rem;
413 uint32 n_retry_rem;
414 int32 loc;
415
416 n_retry_rem = FREAD_RETRY_COUNT;
417
418 data = (char *)pointer;
419 loc = 0;
420 n_items_rem = num_items;
421
422 do {
423 n_items_read = fread(&data[loc], size, n_items_rem, stream);
424
425 n_items_rem -= n_items_read;
426
427 if (n_items_rem > 0) {
428 /* an incomplete read occurred */
429
430 if (n_retry_rem == 0)
431 return -1;
432
433 if (n_retry_rem == FREAD_RETRY_COUNT) {
434 E_ERROR_SYSTEM("fread() failed; retrying...\n");
435 }
436
437 --n_retry_rem;
438
439 loc += n_items_read * size;
440#if !defined(_WIN32) && defined(HAVE_UNISTD_H)
441 sleep(1);
442#endif
443 }
444 } while (n_items_rem > 0);
445
446 return num_items;
447}
448
449
450#ifdef _WIN32_WCE /* No stat() on WinCE */
451int32
452stat_retry(const char *file, struct stat * statbuf)
453{
454 WIN32_FIND_DATAW file_data;
455 HANDLE *h;
456 wchar_t *wfile;
457 size_t len;
458
459 len = mbstowcs(NULL, file, 0) + 1;
460 wfile = ckd_calloc(len, sizeof(*wfile));
461 mbstowcs(wfile, file, len);
462 if ((h = FindFirstFileW(wfile, &file_data)) == INVALID_HANDLE_VALUE) {
463 ckd_free(wfile);
464 return -1;
465 }
466 ckd_free(wfile);
467 memset(statbuf, 0, sizeof(*statbuf));
468 statbuf->st_mtime = file_data.ftLastWriteTime.dwLowDateTime;
469 statbuf->st_size = file_data.nFileSizeLow;
470 FindClose(h);
471
472 return 0;
473}
474
475
476int32
477stat_mtime(const char *file)
478{
479 struct stat statbuf;
480
481 if (stat_retry(file, &statbuf) != 0)
482 return -1;
483
484 return ((int32) statbuf.st_mtime);
485}
486#else
487#define STAT_RETRY_COUNT 10
488int32
489stat_retry(const char *file, struct stat * statbuf)
490{
491 int32 i;
492
493 for (i = 0; i < STAT_RETRY_COUNT; i++) {
494#ifndef HAVE_SYS_STAT_H
495 FILE *fp;
496
497 if ((fp = (FILE *)fopen(file, "r")) != 0) {
498 fseek(fp, 0, SEEK_END);
499 statbuf->st_size = ftell(fp);
500 fclose(fp);
501 return 0;
502 }
503#else /* HAVE_SYS_STAT_H */
504 if (stat(file, statbuf) == 0)
505 return 0;
506#endif
507 if (i == 0) {
508 E_ERROR_SYSTEM("Failed to stat file '%s'; retrying...", file);
509 }
510#ifdef HAVE_UNISTD_H
511 sleep(1);
512#endif
513 }
514
515 return -1;
516}
517
518int32
519stat_mtime(const char *file)
520{
521 struct stat statbuf;
522
523#ifdef HAVE_SYS_STAT_H
524 if (stat(file, &statbuf) != 0)
525 return -1;
526#else /* HAVE_SYS_STAT_H */
527 if (stat_retry(file, &statbuf) != 0)
528 return -1;
529#endif /* HAVE_SYS_STAT_H */
530
531 return ((int32) statbuf.st_mtime);
532}
533#endif /* !_WIN32_WCE */
534
536 FILE *fh;
537 unsigned char buf, bbits;
538 int16 refcount;
539};
540
543{
544 bit_encode_t *be;
545
546 be = (bit_encode_t *)ckd_calloc(1, sizeof(*be));
547 be->refcount = 1;
548 be->fh = outfh;
549 return be;
550}
551
554{
555 ++be->refcount;
556 return be;
557}
558
559int
561{
562 if (be == NULL)
563 return 0;
564 if (--be->refcount > 0)
565 return be->refcount;
566 ckd_free(be);
567
568 return 0;
569}
570
571int
572bit_encode_write(bit_encode_t *be, unsigned char const *bits, int nbits)
573{
574 int tbits;
575
576 tbits = nbits + be->bbits;
577 if (tbits < 8) {
578 /* Append to buffer. */
579 be->buf |= ((bits[0] >> (8 - nbits)) << (8 - tbits));
580 }
581 else {
582 int i = 0;
583 while (tbits >= 8) {
584 /* Shift bits out of the buffer and splice with high-order bits */
585 fputc(be->buf | ((bits[i]) >> be->bbits), be->fh);
586 /* Put low-order bits back into buffer */
587 be->buf = (bits[i] << (8 - be->bbits)) & 0xff;
588 tbits -= 8;
589 ++i;
590 }
591 }
592 /* tbits contains remaining number of bits. */
593 be->bbits = tbits;
594
595 return nbits;
596}
597
598int
599bit_encode_write_cw(bit_encode_t *be, uint32 codeword, int nbits)
600{
601 unsigned char bits[4];
602 codeword <<= (32 - nbits);
603 bits[0] = (codeword >> 24) & 0xff;
604 bits[1] = (codeword >> 16) & 0xff;
605 bits[2] = (codeword >> 8) & 0xff;
606 bits[3] = codeword & 0xff;
607 return bit_encode_write(be, bits, nbits);
608}
609
610int
612{
613 if (be->bbits) {
614 fputc(be->buf, be->fh);
615 be->bbits = 0;
616 }
617 return 0;
618}
619
620int
621build_directory(const char *path)
622{
623 int rv;
624
625 /* Utterly failed... */
626 if (strlen(path) == 0)
627 return -1;
628
629#if defined(_WIN32) && !defined(CYGWIN)
630 else if ((rv = _mkdir(path)) == 0)
631 return 0;
632#elif defined(HAVE_SYS_STAT_H) /* Unix, Cygwin, doesn't work on MINGW */
633 else if ((rv = mkdir(path, 0777)) == 0)
634 return 0;
635#endif
636
637 /* Or, it already exists... */
638 else if (errno == EEXIST)
639 return 0;
640 else if (errno != ENOENT) {
641 E_ERROR_SYSTEM("Failed to create %s", path);
642 return -1;
643 }
644 else {
645 char *dirname = ckd_salloc(path);
646 path2dirname(path, dirname);
647 build_directory(dirname);
648 ckd_free(dirname);
649
650#if defined(_WIN32) && !defined(CYGWIN)
651 return _mkdir(path);
652#elif defined(HAVE_SYS_STAT_H) /* Unix, Cygwin, doesn't work on MINGW */
653 return mkdir(path, 0777);
654#endif
655 }
656}
Sphinx's memory allocation/deallocation routines.
SPHINXBASE_EXPORT void ckd_free(void *ptr)
Test and free a 1-D array.
Definition: ckd_alloc.c:244
#define ckd_malloc(sz)
Macro for ckd_malloc
Definition: ckd_alloc.h:253
#define ckd_calloc(n, sz)
Macros to simplify the use of above functions.
Definition: ckd_alloc.h:248
#define ckd_salloc(ptr)
Macro for ckd_salloc
Definition: ckd_alloc.h:264
#define ckd_realloc(ptr, sz)
Macro for ckd_realloc
Definition: ckd_alloc.h:258
Implementation of logging routines.
#define E_ERROR(...)
Print error message to error log.
Definition: err.h:104
#define E_FATAL(...)
Exit with non-zero status after error message.
Definition: err.h:81
#define E_ERROR_SYSTEM(...)
Print error text; Call perror("");.
Definition: err.h:99
#define E_WARN(...)
Print warning message to error log.
Definition: err.h:109
File names related operation.
SPHINXBASE_EXPORT void path2dirname(const char *path, char *dir)
Strip off filename from the given path and copy the directory name into dir Caller must have allocate...
Definition: filename.c:68
file IO related operations.
SPHINXBASE_EXPORT int32 stat_retry(const char *file, struct stat *statbuf)
There is no bitstream decoder, because a stream abstraction is too slow.
Definition: pio.c:489
SPHINXBASE_EXPORT int32 stat_mtime(const char *file)
Return time of last modification for the given file, or -1 if stat fails.
Definition: pio.c:519
int bit_encode_flush(bit_encode_t *be)
Flush any unwritten bits, zero-padding if necessary.
Definition: pio.c:611
SPHINXBASE_EXPORT void lineiter_free(lineiter_t *li)
Stop reading lines from a file.
Definition: pio.c:368
int bit_encode_write(bit_encode_t *be, unsigned char const *bits, int nbits)
Write bits to encoder.
Definition: pio.c:572
SPHINXBASE_EXPORT int32 fread_retry(void *pointer, int32 size, int32 num_items, FILE *stream)
NFS file reads seem to fail now and then.
Definition: pio.c:408
SPHINXBASE_EXPORT int build_directory(const char *path)
Create a directory and all of its parent directories, as needed.
Definition: pio.c:621
SPHINXBASE_EXPORT int lineiter_lineno(lineiter_t *li)
Returns current line number.
Definition: pio.c:362
int bit_encode_free(bit_encode_t *be)
Release pointer to a bit encoder.
Definition: pio.c:560
SPHINXBASE_EXPORT void fclose_comp(FILE *fp, int32 ispipe)
Close a file opened using fopen_comp.
Definition: pio.c:184
SPHINXBASE_EXPORT FILE * fopen_compchk(const char *file, int32 *ispipe)
Open a file for reading, but if file not present try to open compressed version (if file is uncompres...
Definition: pio.c:201
SPHINXBASE_EXPORT lineiter_t * lineiter_start_clean(FILE *fh)
Start reading lines from a file, skip comments and trim lines.
Definition: pio.c:288
bit_encode_t * bit_encode_retain(bit_encode_t *be)
Retain pointer to a bit encoder.
Definition: pio.c:553
bit_encode_t * bit_encode_attach(FILE *outfh)
Attach bitstream encoder to a file.
Definition: pio.c:542
SPHINXBASE_EXPORT FILE * fopen_comp(const char *file, const char *mode, int32 *ispipe)
Like fopen, but use popen and zcat if it is determined that "file" is compressed (i....
Definition: pio.c:107
SPHINXBASE_EXPORT lineiter_t * lineiter_start(FILE *fh)
Start reading lines from a file.
Definition: pio.c:264
int bit_encode_write_cw(bit_encode_t *be, uint32 codeword, int nbits)
Write lowest-order bits of codeword to encoder.
Definition: pio.c:599
SPHINXBASE_EXPORT char * fread_line(FILE *stream, size_t *out_len)
Read a line of arbitrary length from a file and return it as a newly allocated string.
Definition: pio.c:377
SPHINXBASE_EXPORT lineiter_t * lineiter_next(lineiter_t *li)
Move to the next line in the file.
Definition: pio.c:347
Miscellaneous useful string functions.
SPHINXBASE_EXPORT char * string_trim(char *string, enum string_edge_e which)
Remove whitespace from a string, modifying it in-place.
Definition: strfuncs.c:97
SPHINXBASE_EXPORT char * string_join(const char *base,...)
Concatenate a NULL-terminated argument list of strings, returning a newly allocated string.
Definition: strfuncs.c:70
@ STRING_BOTH
Both ends of string.
Definition: strfuncs.h:73
Line iterator for files.
Definition: pio.h:177