libtranscript
 All Data Structures Functions Variables Enumerations Enumerator Modules
utf_endian.h
1 /* Copyright (C) 2011-2012 G.P. Halkes
2  This program is free software: you can redistribute it and/or modify
3  it under the terms of the GNU General Public License version 3, as
4  published by the Free Software Foundation.
5 
6  This program is distributed in the hope that it will be useful,
7  but WITHOUT ANY WARRANTY; without even the implied warranty of
8  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9  GNU General Public License for more details.
10 
11  You should have received a copy of the GNU General Public License
12  along with this program. If not, see <http://www.gnu.org/licenses/>.
13 */
14 #ifdef UTF_ENDIAN_H_VERSION
15 #define __ALT(x, y) x ## y
16 #define _ALT(x, y) __ALT(x, y)
17 #define ALT(x) _ALT(x, UTF_ENDIAN_H_VERSION)
18 
20 static transcript_error_t ALT(put_utf16)(uint_fast32_t codepoint, char **outbuf, const char *outbuflimit) {
21  CHECK_CODEPOINT_RANGE();
22  if (codepoint < UINT32_C(0xffff)) {
23  CHECK_OUTBYTESLEFT(2);
24  ALT(put16)(codepoint, *(unsigned char **) outbuf);
25  *outbuf += 2;
26  } else {
27  CHECK_OUTBYTESLEFT(4);
28  codepoint -= UINT32_C(0x10000);
29  ALT(put16)(UINT32_C(0xd800) + (codepoint >> 10), *(unsigned char **) outbuf);
30  ALT(put16)(UINT32_C(0xdc00) + (codepoint & 0x3ff), (*(unsigned char **) outbuf) + 2);
31  *outbuf += 4;
32  }
33  return TRANSCRIPT_SUCCESS;
34 }
35 
37 static transcript_error_t ALT(put_utf32)(uint_fast32_t codepoint, char **outbuf, const char *outbuflimit) {
38  CHECK_CODEPOINT_RANGE();
39 
40  CHECK_OUTBYTESLEFT(4);
41  ALT(put32)(codepoint, *(unsigned char **) outbuf);
42  *outbuf += 4;
43  return TRANSCRIPT_SUCCESS;
44 }
45 
47 static uint_fast32_t ALT(get_utf16)(const char **inbuf, const char *inbuflimit, bool_t skip) {
48  uint_fast32_t codepoint, masked_codepoint;
49 
50  if ((*inbuf) + 2 > inbuflimit)
51  return TRANSCRIPT_UTF_INCOMPLETE;
52 
53  codepoint = ALT(get16)(*(const unsigned char **) inbuf);
54  masked_codepoint = codepoint & UINT32_C(0xfc00);
55 
56  if (masked_codepoint == UINT32_C(0xd800)) {
57  uint_fast32_t next_codepoint;
58  /* Codepoint is high surrogate. */
59  if ((*inbuf) + 4 > inbuflimit)
60  return TRANSCRIPT_UTF_INCOMPLETE;
61 
62  next_codepoint = ALT(get16)((*(const unsigned char **) inbuf) + 2);
63  if ((next_codepoint & UINT32_C(0xfc00)) != UINT32_C(0xdc00)) {
64  /* Next codepoint is not a low surrogate. */
65  if (!skip)
66  return TRANSCRIPT_UTF_ILLEGAL;
67 
68  /* Only skip the high surrogate. */
69  *inbuf += 2;
70  return codepoint;
71  }
72  codepoint -= UINT32_C(0xd800);
73  codepoint <<= 10;
74  codepoint += next_codepoint - UINT32_C(0xdc00);
75  codepoint += UINT32_C(0x10000);
76 
77  if (!skip)
78  CHECK_CODEPOINT_ILLEGAL();
79  *inbuf += 4;
80  return codepoint;
81  }
82 
83  if (!skip) {
84  if (masked_codepoint == UINT32_C(0xdc00)) {
85  /* Codepoint is a low surrogate. */
86  return TRANSCRIPT_UTF_ILLEGAL;
87  }
88  CHECK_CODEPOINT_ILLEGAL();
89  }
90 
91  *inbuf += 2;
92  return codepoint;
93 }
94 
96 static uint_fast32_t ALT(get_utf32)(const char **inbuf, const char *inbuflimit, bool_t skip) {
97  uint32_t codepoint;
98 
99  if ((*inbuf) + 4 > inbuflimit)
100  return TRANSCRIPT_UTF_INCOMPLETE;
101 
102  memcpy(&codepoint, *inbuf, 4);
103  codepoint = ALT(get32)(*(const unsigned char **) inbuf);
104  if (!skip) {
105  CHECK_CODEPOINT_ILLEGAL();
106  CHECK_CODEPOINT_SURROGATES();
107  }
108 
109  *inbuf += 4;
110  return codepoint;
111 }
112 
113 #undef ALT
114 #undef _ALT
115 #undef __ALT
116 #endif
transcript_error_t
Error values.
Definition: transcript.h:91
All OK.
Definition: transcript.h:92