00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026 #include "qutf7codec.h"
00027
00028 #ifndef QT_NO_TEXTCODEC
00029
00030 int QUtf7Codec::mibEnum() const {
00031 return 1012;
00032 }
00033
00034 int QStrictUtf7Codec::mibEnum() const {
00035 return -1012;
00036 }
00037
00038 const char* QUtf7Codec::name() const {
00039 return "UTF-7";
00040 }
00041
00042 const char* QStrictUtf7Codec::name() const {
00043 return "X-QT-UTF-7-STRICT";
00044 }
00045
00046 const char* QUtf7Codec::mimeName() const {
00047 return "UTF-7";
00048 }
00049
00050 bool QUtf7Codec::canEncode( QChar ) const {
00051 return TRUE;
00052 }
00053
00054 bool QUtf7Codec::canEncode( const QString & ) const {
00055 return TRUE;
00056 }
00057
00058 static uchar base64Set[] = {
00059 0x00, 0x00, 0x00, 0x00,
00060 0x00, 0x11, 0xFF, 0xC0,
00061 0x7F, 0xFF, 0xFF, 0xE0,
00062 0x7F, 0xFF, 0xFF, 0xE0
00063 };
00064
00065 static uchar base64SetWithLastTwoBitsZero[] = {
00066 0x00, 0x00, 0x00, 0x00,
00067 0x00, 0x00, 0x88, 0x80,
00068 0x44, 0x44, 0x44, 0x40,
00069 0x11, 0x11, 0x11, 0x00
00070 };
00071
00072 static uchar directSet[] = {
00073 0x00, 0x00, 0x00, 0x00,
00074 0x01, 0xCF, 0xFF, 0xE1,
00075 0x7F, 0xFF, 0xFF, 0xE0,
00076 0x7F, 0xFF, 0xFF, 0xE0
00077 };
00078
00079 static uchar optDirectSet[] = {
00080 0x00, 0x00, 0x00, 0x00,
00081 0x7E, 0x20, 0x00, 0x1E,
00082 0x80, 0x00, 0x00, 0x17,
00083 0x80, 0x00, 0x00, 0x1C
00084 };
00085
00086 static inline bool isOfSet(uchar ch, uchar* set) {
00087 return set[ ch/8 ] & (0x80 >> ( ch%8 ));
00088 }
00089
00090 int QUtf7Codec::heuristicContentMatch(const char* chars, int len) const
00091 {
00092 int stepNo = 0;
00093 int i;
00094 bool shifted = FALSE;
00095 bool rightAfterEscape = FALSE;
00096 bool onlyNullBitsSinceLastBoundary = TRUE;
00097 for ( i = 0; i < len ; i++ ) {
00098 if ((unsigned char)chars[i] >= 128)
00099 break;
00100 if (shifted) {
00101 if ( isOfSet(chars[i],base64Set) ) {
00102 switch (stepNo) {
00103 case 0:
00104 onlyNullBitsSinceLastBoundary = TRUE;
00105 break;
00106 case 3:
00107 onlyNullBitsSinceLastBoundary
00108 = isOfSet(chars[i],base64SetWithLastTwoBitsZero);
00109 break;
00110 case 6:
00111 onlyNullBitsSinceLastBoundary
00112 = ( chars[i] == 'A' || chars[i] == 'Q' ||
00113 chars[i] == 'g' || chars[i] == 'w' );
00114 break;
00115 default:
00116 onlyNullBitsSinceLastBoundary
00117 = onlyNullBitsSinceLastBoundary && (chars[i] == 'A');
00118 }
00119 stepNo = (stepNo + 1) % 8;
00120 rightAfterEscape = FALSE;
00121 } else {
00122 if (rightAfterEscape && chars[i] != '-')
00123 break;
00124 if (!onlyNullBitsSinceLastBoundary)
00125 break;
00126 shifted = FALSE;
00127 stepNo = 0;
00128 }
00129 } else {
00130 if (chars[i] == '+') {
00131 shifted = TRUE;
00132 rightAfterEscape = TRUE;
00133 }
00134 }
00135 }
00136 return i;
00137 }
00138
00139 class QUtf7Decoder : public QTextDecoder {
00140
00141 ushort uc;
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152 char stepNo;
00153
00154 bool shifted;
00155
00156
00157 bool rightAfterEscape;
00158 public:
00159 QUtf7Decoder() : uc(0), stepNo(0), shifted(FALSE), rightAfterEscape(FALSE)
00160 {
00161 }
00162
00163 private:
00164 inline void resetParser()
00165 {
00166 uc = 0;
00167 stepNo = 0;
00168 shifted = FALSE;
00169 rightAfterEscape = FALSE;
00170 }
00171
00172 public:
00173 QString toUnicode(const char* chars, int len)
00174 {
00175 QString result = "";
00176 for (int i=0; i<len; i++) {
00177 uchar ch = chars[i];
00178
00179
00180
00181
00182 if ( ch > 127 ) {
00183 qWarning("QUtf7Decoder: 8bit char found in input. "
00184 "Parser has been re-initialized!");
00185 resetParser();
00186 result += QChar::replacement;
00187 continue;
00188 }
00189
00190 if (shifted) {
00191
00192
00193
00194
00195
00196 if ( rightAfterEscape && ch == '-' ) {
00197
00198
00199 resetParser();
00200 result += QChar('+');
00201
00202 continue;
00203 }
00204
00205
00206
00207
00208 ushort bits;
00209 if ( ch >= 'A' && ch <= 'Z' ) {
00210 bits = ch - 'A';
00211 } else if ( ch >= 'a' && ch <= 'z' ) {
00212 bits = ch - 'a' + 26;
00213 } else if ( ch >= '0' && ch <= '9' ) {
00214 bits = ch - '0' + 52;
00215 } else if ( ch == '+' ) {
00216 bits = 62;
00217 } else if ( ch == '/' ) {
00218 bits = 63;
00219 } else {
00220 bits = 0;
00221
00222
00223
00224
00225
00226 if ( rightAfterEscape ) {
00227
00228
00229
00230
00231 qWarning("QUtf7Decoder: ill-formed input: "
00232 "non-base64 char after escaping \"+\"!");
00233 }
00234
00235 if (stepNo >= 1 && uc) {
00236 qWarning("QUtf7Decoder: ill-formed sequence: "
00237 "non-zero bits in shifted-sequence tail!");
00238 }
00239 resetParser();
00240
00241
00242
00243 if ( ch == '-' )
00244 continue;
00245
00246 }
00247
00248 if ( shifted ) {
00249
00250
00251
00252
00253
00254 switch (stepNo) {
00255
00256 case 0: uc = bits << 10; break;
00257
00258 case 1: uc |= bits << 4; break;
00259
00260
00261 case 2: uc |= bits >> 2; result += QChar(uc);
00262
00263 uc = bits << 14; break;
00264 case 3: uc |= bits << 8; break;
00265 case 4: uc |= bits << 2; break;
00266
00267
00268 case 5: uc |= bits >> 4; result += QChar(uc);
00269
00270 uc = bits << 12; break;
00271 case 6: uc |= bits << 6; break;
00272
00273
00274
00275 case 7: uc |= bits; result += QChar(uc);
00276 uc = 0; break;
00277 default: ;
00278 }
00279
00280 stepNo++;
00281 stepNo %= 8;
00282 rightAfterEscape = FALSE;
00283
00284 continue;
00285 }
00286 }
00287
00288
00289
00290
00291
00292
00293
00294
00295 if ( ch == '+' ) {
00296
00297
00298 shifted = TRUE;
00299 stepNo = 0;
00300
00301
00302 rightAfterEscape = TRUE;
00303 } else {
00304
00305 result += QChar(ch);
00306 }
00307 }
00308
00309 return result;
00310
00311 }
00312
00313 };
00314
00315 QTextDecoder* QUtf7Codec::makeDecoder() const
00316 {
00317 return new QUtf7Decoder;
00318 }
00319
00320
00321 class QUtf7Encoder : public QTextEncoder {
00322 uchar dontNeedEncodingSet[16];
00323 ushort outbits;
00324 uint stepNo : 2;
00325 bool shifted : 1;
00326 bool mayContinueShiftedSequence : 1;
00327 public:
00328 QUtf7Encoder(bool encOpt, bool encLwsp)
00329 : outbits(0), stepNo(0),
00330 shifted(FALSE), mayContinueShiftedSequence(FALSE)
00331 {
00332 for ( int i = 0; i < 16 ; i++) {
00333 dontNeedEncodingSet[i] = directSet[i];
00334 if (!encOpt)
00335 dontNeedEncodingSet[i] |= optDirectSet[i];
00336 }
00337 if(!encLwsp) {
00338 dontNeedEncodingSet[' '/8] |= 0x80 >> (' '%8);
00339 dontNeedEncodingSet['\n'/8] |= 0x80 >> ('\n'%8);
00340 dontNeedEncodingSet['\r'/8] |= 0x80 >> ('\r'%8);
00341 dontNeedEncodingSet['\t'/8] |= 0x80 >> ('\t'%8);
00342 }
00343 }
00344
00345 private:
00346
00347 char toBase64( ushort u ) {
00348 if ( u < 26 )
00349 return (char)u + 'A';
00350 else if ( u < 52 )
00351 return (char)u - 26 + 'a';
00352 else if ( u < 62 )
00353 return (char)u - 52 + '0';
00354 else if ( u == 62 )
00355 return '+';
00356 else
00357 return '/';
00358 }
00359
00360 void addToShiftedSequence(QCString::Iterator & t, ushort u) {
00361 switch (stepNo) {
00362
00363 case 0:
00364 *t++ = toBase64( u >> 10 );
00365 *t++ = toBase64( (u & 0x03FF ) >> 4 );
00366
00367 outbits = (u & 0x000F) << 2;
00368 break;
00369
00370
00371
00372 case 1:
00373 if (!mayContinueShiftedSequence) {
00374
00375 *t++ = toBase64( outbits | ( u >> 14 ) );
00376 }
00377 *t++ = toBase64( (u & 0x3F00 ) >> 8 );
00378 *t++ = toBase64( (u & 0x00FC ) >> 2 );
00379
00380 outbits = (u & 0x0003) << 4;
00381 break;
00382
00383
00384
00385 case 2:
00386 if (!mayContinueShiftedSequence) {
00387
00388 *t++ = toBase64( outbits | ( u >> 12 ) );
00389 }
00390 *t++ = toBase64( (u & 0x0FFF) >> 6 );
00391 *t++ = toBase64( u & 0x003F );
00392 break;
00393
00394 default: ;
00395 }
00396 stepNo = (stepNo + 1) % 3;
00397 }
00398
00399 void endShiftedSequence(QCString::Iterator & t) {
00400 switch (stepNo) {
00401 case 1:
00402 case 2:
00403 *t++ = toBase64( outbits );
00404 break;
00405 case 0:
00406 default: ;
00407 }
00408 outbits = 0;
00409 }
00410
00411
00412
00413
00414
00415 bool continueOK( ushort u ) {
00416 return stepNo == 0 ||
00417 ( stepNo == 1 && (u & 0xF000) == 0 ) ||
00418 ( stepNo == 2 && (u & 0xC000) == 0 );
00419 }
00420
00421 void processDoesntNeedEncoding(QCString::Iterator & t, ushort ch) {
00422
00423 if (shifted) {
00424 endShiftedSequence(t);
00425
00426 if (isOfSet((char)ch,base64Set) || ch == '-' ) {
00427 *t++ = '-';
00428 }
00429 } else if (mayContinueShiftedSequence) {
00430
00431
00432 mayContinueShiftedSequence = FALSE;
00433 if (isOfSet(ch,base64Set) || ch == '-' ) {
00434 *t++ = '-';
00435 }
00436 }
00437 *t++ = (uchar)ch;
00438 shifted = FALSE;
00439 stepNo = 0;
00440 }
00441
00442 public:
00443 QCString fromUnicode(const QString & uc, int & len_in_out)
00444 {
00445
00446
00447
00448
00449
00450 int maxreslen = 3 * len_in_out + 5;
00451 QCString result( maxreslen );
00452
00453 #if 0
00454
00455 cout << "\nlen_in_out: " << len_in_out
00456 <<"; shifted: " << (shifted ? "true" : "false")
00457 << ";\n" << "mayContinue: "
00458 << (mayContinueShiftedSequence ? "true" : "false")
00459 << "; stepNo: " << stepNo << ";\n"
00460 << "outbits: " << outbits << endl;
00461
00462 #endif
00463
00464
00465 const QChar * s = uc.unicode();
00466 QCString::Iterator t = result.data();
00467
00468 if ( uc.isNull() ) {
00469
00470 if ( mayContinueShiftedSequence )
00471 *t++ = '-';
00472 } else {
00473
00474 for (int i = 0 ; i < len_in_out ;
00475 i++ ) {
00476 ushort ch = s[i].unicode();
00477
00478
00479
00480
00481 if ( ch < 128 ) {
00482
00483
00484
00485
00486 if ( isOfSet((uchar)ch,dontNeedEncodingSet) ) {
00487 processDoesntNeedEncoding(t,ch);
00488 continue;
00489 } else if ( ch == '+' ) {
00490
00491 if (shifted || mayContinueShiftedSequence) {
00492
00493
00494
00495
00496 addToShiftedSequence(t,ch);
00497 mayContinueShiftedSequence = FALSE;
00498 shifted = TRUE;
00499 } else {
00500
00501 *t++ = '+';
00502 *t++ = '-';
00503 }
00504 continue;
00505 }
00506 }
00507
00508
00509
00510 if (!shifted && (!mayContinueShiftedSequence || !continueOK(ch) ) ) {
00511 *t++ = '+';
00512 stepNo = 0;
00513 }
00514 addToShiftedSequence(t,ch);
00515 shifted = TRUE;
00516 mayContinueShiftedSequence = FALSE;
00517 }
00518
00519 if ( shifted ) {
00520 endShiftedSequence(t);
00521 mayContinueShiftedSequence = TRUE;
00522 };
00523 shifted = FALSE;
00524 }
00525
00526 *t = '\0';
00527 len_in_out = t - result.data();
00528
00529 #if 0
00530 cout << "len_in_out: " << len_in_out << "; "
00531 << "mayContinue: " << (mayContinueShiftedSequence ? "true" : "false")
00532 << "; stepNo: " << stepNo << endl;
00533 #endif
00534
00535 Q_ASSERT(len_in_out <= maxreslen-1);
00536
00537 return result;
00538 }
00539
00540 };
00541
00542 QTextEncoder* QUtf7Codec::makeEncoder() const {
00543 return new QUtf7Encoder( false, false );
00544 }
00545
00546 QTextEncoder* QStrictUtf7Codec::makeEncoder() const {
00547 return new QUtf7Encoder( true, false );
00548 }
00549
00550 #endif // QT_NO_TEXTCODEC