00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #include "kmime_codec_qp.h"
00033
00034 #include "kmime_util.h"
00035
00036 #include <kdebug.h>
00037
00038 #include <cassert>
00039
00040 using namespace KMime;
00041
00042 namespace KMime {
00043
00044
00045
00046 static inline char binToHex( uchar value ) {
00047 if ( value > 9 )
00048 return value + 'A' - 10;
00049 else
00050 return value + '0';
00051 }
00052
00053 static inline uchar highNibble( uchar ch ) {
00054 return ch >> 4;
00055 }
00056
00057 static inline uchar lowNibble( uchar ch ) {
00058 return ch & 0xF;
00059 }
00060
00061 static inline bool keep( uchar ch ) {
00062
00063 return !( ch < ' ' && ch != '\t' || ch == '?' );
00064 }
00065
00066
00067
00068
00069
00070 class QuotedPrintableEncoder : public Encoder {
00071 char mInputBuffer[16];
00072 uchar mCurrentLineLength;
00073 uchar mAccu;
00074 uint mInputBufferReadCursor : 4;
00075 uint mInputBufferWriteCursor : 4;
00076 enum {
00077 Never, AtBOL, Definitely
00078 } mAccuNeedsEncoding : 2;
00079 bool mSawLineEnd : 1;
00080 bool mSawCR : 1;
00081 bool mFinishing : 1;
00082 bool mFinished : 1;
00083 protected:
00084 friend class QuotedPrintableCodec;
00085 QuotedPrintableEncoder( bool withCRLF=false )
00086 : Encoder( withCRLF ), mCurrentLineLength(0), mAccu(0),
00087 mInputBufferReadCursor(0), mInputBufferWriteCursor(0),
00088 mAccuNeedsEncoding(Never),
00089 mSawLineEnd(false), mSawCR(false), mFinishing(false),
00090 mFinished(false) {}
00091
00092 bool needsEncoding( uchar ch ) {
00093 return ( ch > '~' || ch < ' ' && ch != '\t' || ch == '=' );
00094 }
00095 bool needsEncodingAtEOL( uchar ch ) {
00096 return ( ch == ' ' || ch == '\t' );
00097 }
00098 bool needsEncodingAtBOL( uchar ch ) {
00099 return ( ch == 'F' || ch == '.' || ch == '-' );
00100 }
00101 bool fillInputBuffer( const char* & scursor, const char * const send );
00102 bool processNextChar();
00103 void createOutputBuffer( char* & dcursor, const char * const dend );
00104 public:
00105 virtual ~QuotedPrintableEncoder() {}
00106
00107 bool encode( const char* & scursor, const char * const send,
00108 char* & dcursor, const char * const dend );
00109
00110 bool finish( char* & dcursor, const char * const dend );
00111 };
00112
00113
00114 class QuotedPrintableDecoder : public Decoder {
00115 const char mEscapeChar;
00116 char mBadChar;
00118 uchar mAccu;
00128 const bool mQEncoding;
00129 bool mInsideHexChar;
00130 bool mFlushing;
00131 bool mExpectLF;
00132 bool mHaveAccu;
00133 protected:
00134 friend class QuotedPrintableCodec;
00135 friend class Rfc2047QEncodingCodec;
00136 friend class Rfc2231EncodingCodec;
00137 QuotedPrintableDecoder( bool withCRLF=false,
00138 bool aQEncoding=false, char aEscapeChar='=' )
00139 : Decoder( withCRLF ),
00140 mEscapeChar(aEscapeChar),
00141 mBadChar(0),
00142 mAccu(0),
00143 mQEncoding(aQEncoding),
00144 mInsideHexChar(false),
00145 mFlushing(false),
00146 mExpectLF(false),
00147 mHaveAccu(false) {}
00148 public:
00149 virtual ~QuotedPrintableDecoder() {}
00150
00151 bool decode( const char* & scursor, const char * const send,
00152 char* & dcursor, const char * const dend );
00153
00154 bool finish( char* &, const char * const ) { return true; }
00155 };
00156
00157
00158 class Rfc2047QEncodingEncoder : public Encoder {
00159 uchar mAccu;
00160 uchar mStepNo;
00161 const char mEscapeChar;
00162 bool mInsideFinishing : 1;
00163 protected:
00164 friend class Rfc2047QEncodingCodec;
00165 friend class Rfc2231EncodingCodec;
00166 Rfc2047QEncodingEncoder( bool withCRLF=false, char aEscapeChar='=' )
00167 : Encoder( withCRLF ),
00168 mAccu(0), mStepNo(0), mEscapeChar( aEscapeChar ),
00169 mInsideFinishing( false )
00170 {
00171
00172 assert( aEscapeChar == '=' || aEscapeChar == '%' );
00173 }
00174
00175
00176 bool needsEncoding( uchar ch ) {
00177 if ( ch > 'z' ) return true;
00178 if ( !isEText( ch ) ) return true;
00179 if ( mEscapeChar == '%' && ( ch == '*' || ch == '/' ) )
00180 return true;
00181 return false;
00182 }
00183
00184 public:
00185 virtual ~Rfc2047QEncodingEncoder() {}
00186
00187 bool encode( const char* & scursor, const char * const send,
00188 char* & dcursor, const char * const dend );
00189 bool finish( char* & dcursor, const char * const dend );
00190 };
00191
00192
00193
00194 static int QuotedPrintableDecoder_maxDecodedSizeFor( int insize, bool withCRLF ) {
00195
00196 int result = insize;
00197
00198 if ( withCRLF )
00199 result += insize;
00200
00201
00202 result += 2;
00203
00204 return result;
00205 }
00206
00207 Encoder * QuotedPrintableCodec::makeEncoder( bool withCRLF ) const {
00208 return new QuotedPrintableEncoder( withCRLF );
00209 }
00210
00211 Decoder * QuotedPrintableCodec::makeDecoder( bool withCRLF ) const {
00212 return new QuotedPrintableDecoder( withCRLF );
00213 }
00214
00215 int QuotedPrintableCodec::maxDecodedSizeFor( int insize, bool withCRLF ) const {
00216 return QuotedPrintableDecoder_maxDecodedSizeFor(insize, withCRLF);
00217 }
00218
00219 Encoder * Rfc2047QEncodingCodec::makeEncoder( bool withCRLF ) const {
00220 return new Rfc2047QEncodingEncoder( withCRLF );
00221 }
00222
00223 Decoder * Rfc2047QEncodingCodec::makeDecoder( bool withCRLF ) const {
00224 return new QuotedPrintableDecoder( withCRLF, true );
00225 }
00226
00227 int Rfc2047QEncodingCodec::maxDecodedSizeFor( int insize, bool withCRLF ) const {
00228 return QuotedPrintableDecoder_maxDecodedSizeFor(insize, withCRLF);
00229 }
00230
00231 Encoder * Rfc2231EncodingCodec::makeEncoder( bool withCRLF ) const {
00232 return new Rfc2047QEncodingEncoder( withCRLF, '%' );
00233 }
00234
00235 Decoder * Rfc2231EncodingCodec::makeDecoder( bool withCRLF ) const {
00236 return new QuotedPrintableDecoder( withCRLF, true, '%' );
00237 }
00238
00239 int Rfc2231EncodingCodec::maxDecodedSizeFor( int insize, bool withCRLF ) const {
00240 return QuotedPrintableDecoder_maxDecodedSizeFor(insize, withCRLF);
00241 }
00242
00243
00244
00245
00246
00247 bool QuotedPrintableDecoder::decode( const char* & scursor, const char * const send,
00248 char* & dcursor, const char * const dend ) {
00249 if ( mWithCRLF )
00250 kdWarning() << "CRLF output for decoders isn't yet supported!" << endl;
00251
00252 while ( scursor != send && dcursor != dend ) {
00253 if ( mFlushing ) {
00254
00255
00256
00257
00258
00259
00260
00261
00262
00263 if ( mInsideHexChar ) {
00264
00265 *dcursor++ = mEscapeChar;
00266 mInsideHexChar = false;
00267 } else if ( mHaveAccu ) {
00268
00269 *dcursor++ = binToHex( highNibble( mAccu ) );
00270 mHaveAccu = false;
00271 mAccu = 0;
00272 } else {
00273
00274 assert( mAccu == 0 );
00275 if ( mBadChar ) {
00276 if ( mBadChar >= '>' && mBadChar <= '~' ||
00277 mBadChar >= '!' && mBadChar <= '<' )
00278 *dcursor++ = mBadChar;
00279 mBadChar = 0;
00280 }
00281 mFlushing = false;
00282 }
00283 continue;
00284 }
00285 assert( mBadChar == 0 );
00286
00287 uchar ch = *scursor++;
00288 uchar value = 255;
00289
00290 if ( mExpectLF && ch != '\n' ) {
00291 kdWarning() << "QuotedPrintableDecoder: "
00292 "illegally formed soft linebreak or lonely CR!" << endl;
00293 mInsideHexChar = false;
00294 mExpectLF = false;
00295 assert( mAccu == 0 );
00296 }
00297
00298 if ( mInsideHexChar ) {
00299
00300 if ( ch <= '9' ) {
00301 if ( ch >= '0' ) {
00302 value = ch - '0';
00303 } else {
00304 switch ( ch ) {
00305 case '\r':
00306 mExpectLF = true;
00307 break;
00308 case '\n':
00309
00310 if ( !mHaveAccu ) {
00311 mExpectLF = false;
00312 mInsideHexChar = false;
00313 break;
00314 }
00315
00316 default:
00317 kdWarning() << "QuotedPrintableDecoder: "
00318 "illegally formed hex char! Outputting verbatim." << endl;
00319 mBadChar = ch;
00320 mFlushing = true;
00321 }
00322 continue;
00323 }
00324 } else {
00325 if ( ch <= 'F' ) {
00326 if ( ch >= 'A' ) {
00327 value = 10 + ch - 'A';
00328 } else {
00329 mBadChar = ch;
00330 mFlushing = true;
00331 continue;
00332 }
00333 } else {
00334 if ( ch <= 'f' && ch >= 'a' ) {
00335 value = 10 + ch - 'a';
00336 } else {
00337 mBadChar = ch;
00338 mFlushing = true;
00339 continue;
00340 }
00341 }
00342 }
00343
00344 assert( value < 16 );
00345 assert( mBadChar == 0 );
00346 assert( !mExpectLF );
00347
00348 if ( mHaveAccu ) {
00349 *dcursor++ = char( mAccu | value );
00350 mAccu = 0;
00351 mHaveAccu = false;
00352 mInsideHexChar = false;
00353 } else {
00354 mHaveAccu = true;
00355 mAccu = value << 4;
00356 }
00357 } else {
00358 if ( ch <= '~' && ch >= ' ' || ch == '\t' ) {
00359 if ( ch == mEscapeChar ) {
00360 mInsideHexChar = true;
00361 } else if ( mQEncoding && ch == '_' ) {
00362 *dcursor++ = char(0x20);
00363 } else {
00364 *dcursor++ = char(ch);
00365 }
00366 } else if ( ch == '\n' ) {
00367 *dcursor++ = '\n';
00368 mExpectLF = false;
00369 } else if ( ch == '\r' ) {
00370 mExpectLF = true;
00371 } else {
00372 kdWarning() << "QuotedPrintableDecoder: " << ch <<
00373 " illegal character in input stream! Ignoring." << endl;
00374 }
00375 }
00376 }
00377
00378 return (scursor == send);
00379 }
00380
00381 bool QuotedPrintableEncoder::fillInputBuffer( const char* & scursor,
00382 const char * const send ) {
00383
00384 if ( mSawLineEnd )
00385 return true;
00386
00387
00388
00389 for ( ; ( mInputBufferWriteCursor + 1 ) % 16 != mInputBufferReadCursor
00390 && scursor != send ; mInputBufferWriteCursor++ ) {
00391 char ch = *scursor++;
00392 if ( ch == '\r' ) {
00393 mSawCR = true;
00394 } else if ( ch == '\n' ) {
00395
00396
00397 if ( mSawCR ) {
00398 mSawCR = false;
00399 assert( mInputBufferWriteCursor != mInputBufferReadCursor );
00400 mInputBufferWriteCursor--;
00401 }
00402 mSawLineEnd = true;
00403 return true;
00404 } else {
00405 mSawCR = false;
00406 }
00407 mInputBuffer[ mInputBufferWriteCursor ] = ch;
00408 }
00409 mSawLineEnd = false;
00410 return false;
00411 }
00412
00413 bool QuotedPrintableEncoder::processNextChar() {
00414
00415
00416
00417
00418
00419
00420 const int minBufferFillWithoutLineEnd = 4;
00421
00422 assert( mOutputBufferCursor == 0 );
00423
00424 int bufferFill = int(mInputBufferWriteCursor) - int(mInputBufferReadCursor) ;
00425 if ( bufferFill < 0 )
00426 bufferFill += 16;
00427
00428 assert( bufferFill >=0 && bufferFill <= 15 );
00429
00430 if ( !mFinishing && !mSawLineEnd &&
00431 bufferFill < minBufferFillWithoutLineEnd )
00432 return false;
00433
00434
00435 if ( mInputBufferReadCursor == mInputBufferWriteCursor )
00436 return false;
00437
00438
00439 mAccu = mInputBuffer[ mInputBufferReadCursor++ ];
00440 if ( needsEncoding( mAccu ) )
00441 mAccuNeedsEncoding = Definitely;
00442 else if ( ( mSawLineEnd || mFinishing )
00443 && bufferFill == 1
00444 && needsEncodingAtEOL( mAccu ) )
00445 mAccuNeedsEncoding = Definitely;
00446 else if ( needsEncodingAtBOL( mAccu ) )
00447 mAccuNeedsEncoding = AtBOL;
00448 else
00449
00450 mAccuNeedsEncoding = Never;
00451
00452 return true;
00453 }
00454
00455
00456
00457
00458
00459 void QuotedPrintableEncoder::createOutputBuffer( char* & dcursor,
00460 const char * const dend )
00461 {
00462 const int maxLineLength = 76;
00463
00464 assert( mOutputBufferCursor == 0 );
00465
00466 bool lastOneOnThisLine = mSawLineEnd
00467 && mInputBufferReadCursor == mInputBufferWriteCursor;
00468
00469 int neededSpace = 1;
00470 if ( mAccuNeedsEncoding == Definitely)
00471 neededSpace = 3;
00472
00473
00474 if ( !lastOneOnThisLine )
00475 neededSpace++;
00476
00477 if ( mCurrentLineLength > maxLineLength - neededSpace ) {
00478
00479 write( '=', dcursor, dend );
00480 writeCRLF( dcursor, dend );
00481 mCurrentLineLength = 0;
00482 }
00483
00484 if ( Never == mAccuNeedsEncoding ||
00485 AtBOL == mAccuNeedsEncoding && mCurrentLineLength != 0 ) {
00486 write( mAccu, dcursor, dend );
00487 mCurrentLineLength++;
00488 } else {
00489 write( '=', dcursor, dend );
00490 write( binToHex( highNibble( mAccu ) ), dcursor, dend );
00491 write( binToHex( lowNibble( mAccu ) ), dcursor, dend );
00492 mCurrentLineLength += 3;
00493 }
00494 }
00495
00496
00497 bool QuotedPrintableEncoder::encode( const char* & scursor, const char * const send,
00498 char* & dcursor, const char * const dend )
00499 {
00500
00501 if ( mFinishing ) return true;
00502
00503 while ( scursor != send && dcursor != dend ) {
00504 if ( mOutputBufferCursor && !flushOutputBuffer( dcursor, dend ) )
00505 return (scursor == send);
00506
00507 assert( mOutputBufferCursor == 0 );
00508
00509
00510
00511 fillInputBuffer( scursor, send );
00512
00513 if ( processNextChar() )
00514
00515 createOutputBuffer( dcursor, dend );
00516 else if ( mSawLineEnd &&
00517 mInputBufferWriteCursor == mInputBufferReadCursor ) {
00518
00519 writeCRLF( dcursor, dend );
00520
00521 mSawLineEnd = false;
00522 mCurrentLineLength = 0;
00523 } else
00524
00525 break;
00526 }
00527
00528
00529
00530 if ( mOutputBufferCursor ) flushOutputBuffer( dcursor, dend );
00531
00532 return (scursor == send);
00533
00534 }
00535
00536 bool QuotedPrintableEncoder::finish( char* & dcursor,
00537 const char * const dend ) {
00538 mFinishing = true;
00539
00540 if ( mFinished )
00541 return flushOutputBuffer( dcursor, dend );
00542
00543 while ( dcursor != dend ) {
00544 if ( mOutputBufferCursor && !flushOutputBuffer( dcursor, dend ) )
00545 return false;
00546
00547 assert( mOutputBufferCursor == 0 );
00548
00549 if ( processNextChar() )
00550
00551 createOutputBuffer( dcursor, dend );
00552 else if ( mSawLineEnd &&
00553 mInputBufferWriteCursor == mInputBufferReadCursor ) {
00554
00555 writeCRLF( dcursor, dend );
00556 mSawLineEnd = false;
00557 mCurrentLineLength = 0;
00558 } else {
00559 mFinished = true;
00560 return flushOutputBuffer( dcursor, dend );
00561 }
00562 }
00563
00564 return mFinished && !mOutputBufferCursor;
00565
00566 }
00567
00568
00569 bool Rfc2047QEncodingEncoder::encode( const char* & scursor, const char * const send,
00570 char* & dcursor, const char * const dend )
00571 {
00572 if ( mInsideFinishing ) return true;
00573
00574 while ( scursor != send && dcursor != dend ) {
00575 uchar value;
00576 switch ( mStepNo ) {
00577 case 0:
00578
00579 mAccu = *scursor++;
00580 if ( !needsEncoding( mAccu ) ) {
00581 *dcursor++ = char(mAccu);
00582 } else if ( mEscapeChar == '=' && mAccu == 0x20 ) {
00583
00584
00585 *dcursor++ = '_';
00586 } else {
00587
00588 *dcursor++ = mEscapeChar;
00589 mStepNo = 1;
00590 }
00591 continue;
00592 case 1:
00593
00594 value = highNibble(mAccu);
00595 mStepNo = 2;
00596 break;
00597 case 2:
00598
00599 value = lowNibble(mAccu);
00600 mStepNo = 0;
00601 break;
00602 default: assert( 0 );
00603 }
00604
00605
00606 *dcursor++ = binToHex( value );
00607 }
00608
00609 return (scursor == send);
00610 }
00611
00612 #include <qstring.h>
00613
00614 bool Rfc2047QEncodingEncoder::finish( char* & dcursor, const char * const dend ) {
00615 mInsideFinishing = true;
00616
00617
00618 while ( mStepNo != 0 && dcursor != dend ) {
00619 uchar value;
00620 switch ( mStepNo ) {
00621 case 1:
00622
00623 value = highNibble(mAccu);
00624 mStepNo = 2;
00625 break;
00626 case 2:
00627
00628 value = lowNibble(mAccu);
00629 mStepNo = 0;
00630 break;
00631 default: assert( 0 );
00632 }
00633
00634
00635 *dcursor++ = binToHex( value );
00636 }
00637
00638 return mStepNo == 0;
00639 }
00640
00641
00642
00643
00644 }