libkmime

kmime_header_parsing.cpp

00001 /*  -*- c++ -*-
00002     kmime_header_parsing.cpp
00003 
00004     This file is part of KMime, the KDE internet mail/usenet news message library.
00005     Copyright (c) 2001-2002 Marc Mutz <mutz@kde.org>
00006 
00007     KMime is free software; you can redistribute it and/or modify it
00008     under the terms of the GNU General Public License, version 2, as
00009     published by the Free Software Foundation.
00010 
00011     KMime is distributed in the hope that it will be useful, but
00012     WITHOUT ANY WARRANTY; without even the implied warranty of
00013     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014     General Public License for more details.
00015 
00016     You should have received a copy of the GNU General Public License
00017     along with this library; if not, write to the Free Software
00018     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
00019 
00020     In addition, as a special exception, the copyright holders give
00021     permission to link the code of this library with any edition of
00022     the Qt library by Trolltech AS, Norway (or with modified versions
00023     of Qt that use the same license as Qt), and distribute linked
00024     combinations including the two.  You must obey the GNU General
00025     Public License in all respects for all of the code used other than
00026     Qt.  If you modify this file, you may extend this exception to
00027     your version of the file, but you are not obligated to do so.  If
00028     you do not wish to do so, delete this exception statement from
00029     your version.
00030 */
00031 
00032 #include <config.h>
00033 #include "kmime_header_parsing.h"
00034 
00035 #include "kmime_codecs.h"
00036 #include "kmime_util.h"
00037 #include "kmime_warning.h"
00038 
00039 #include <kglobal.h>
00040 #include <kcharsets.h>
00041 
00042 #include <qtextcodec.h>
00043 #include <qmap.h>
00044 #include <qcstring.h>
00045 #include <qstringlist.h>
00046 
00047 #include <ctype.h> // for isdigit
00048 #include <cassert>
00049 
00050 using namespace KMime;
00051 using namespace KMime::Types;
00052 
00053 namespace KMime {
00054 
00055 namespace Types {
00056 
00057   QString AddrSpec::asString() const {
00058     bool needsQuotes = false;
00059     QString result;
00060     for ( unsigned int i = 0 ; i < localPart.length() ; ++i ) {
00061       const char ch = localPart[i].latin1();
00062       if ( ch == '.' || isAText( ch ) )
00063     result += ch;
00064       else {
00065     needsQuotes = true;
00066     if ( ch == '\\' || ch == '"' )
00067       result += '\\';
00068     result += ch;
00069       }
00070     }
00071     if ( needsQuotes )
00072       return '"' + result + "\"@" + domain;
00073     else
00074       return result + '@' + domain;
00075   }
00076 
00077 }
00078 
00079 namespace HeaderParsing {
00080 
00081 // parse the encoded-word (scursor points to after the initial '=')
00082 bool parseEncodedWord( const char* & scursor, const char * const send,
00083                QString & result, QCString & language ) {
00084 
00085   // make sure the caller already did a bit of the work.
00086   assert( *(scursor-1) == '=' );
00087 
00088   //
00089   // STEP 1:
00090   // scan for the charset/language portion of the encoded-word
00091   //
00092 
00093   char ch = *scursor++;
00094 
00095   if ( ch != '?' ) {
00096     kdDebug() << "first" << endl;
00097     KMIME_WARN_PREMATURE_END_OF(EncodedWord);
00098     return false;
00099   }
00100 
00101   // remember start of charset (ie. just after the initial "=?") and
00102   // language (just after the first '*') fields:
00103   const char * charsetStart = scursor;
00104   const char * languageStart = 0;
00105 
00106   // find delimiting '?' (and the '*' separating charset and language
00107   // tags, if any):
00108   for ( ; scursor != send ; scursor++ )
00109     if ( *scursor == '?')
00110       break;
00111     else if ( *scursor == '*' && !languageStart )
00112       languageStart = scursor + 1;
00113 
00114   // not found? can't be an encoded-word!
00115   if ( scursor == send || *scursor != '?' ) {
00116     kdDebug() << "second" << endl;
00117     KMIME_WARN_PREMATURE_END_OF(EncodedWord);
00118     return false;
00119   }
00120 
00121   // extract the language information, if any (if languageStart is 0,
00122   // language will be null, too):
00123   QCString maybeLanguage( languageStart, scursor - languageStart + 1 /*for NUL*/);
00124   // extract charset information (keep in mind: the size given to the
00125   // ctor is one off due to the \0 terminator):
00126   QCString maybeCharset( charsetStart, ( languageStart ? languageStart : scursor + 1 ) - charsetStart );
00127 
00128   //
00129   // STEP 2:
00130   // scan for the encoding portion of the encoded-word
00131   //
00132 
00133 
00134   // remember start of encoding (just _after_ the second '?'):
00135   scursor++;
00136   const char * encodingStart = scursor;
00137 
00138   // find next '?' (ending the encoding tag):
00139   for ( ; scursor != send ; scursor++ )
00140     if ( *scursor == '?' ) break;
00141 
00142   // not found? Can't be an encoded-word!
00143   if ( scursor == send || *scursor != '?' ) {
00144     kdDebug() << "third" << endl;
00145     KMIME_WARN_PREMATURE_END_OF(EncodedWord);
00146     return false;
00147   }
00148 
00149   // extract the encoding information:
00150   QCString maybeEncoding( encodingStart, scursor - encodingStart + 1 );
00151 
00152 
00153   kdDebug() << "parseEncodedWord: found charset == \"" << maybeCharset
00154         << "\"; language == \"" << maybeLanguage
00155         << "\"; encoding == \"" << maybeEncoding << "\"" << endl;
00156 
00157   //
00158   // STEP 3:
00159   // scan for encoded-text portion of encoded-word
00160   //
00161 
00162 
00163   // remember start of encoded-text (just after the third '?'):
00164   scursor++;
00165   const char * encodedTextStart = scursor;
00166 
00167   // find next '?' (ending the encoded-text):
00168   for ( ; scursor != send ; scursor++ )
00169     if ( *scursor == '?' ) break;
00170 
00171   // not found? Can't be an encoded-word!
00172   // ### maybe evaluate it nonetheless if the rest is OK?
00173   if ( scursor == send || *scursor != '?' ) {
00174     kdDebug() << "fourth" << endl;
00175     KMIME_WARN_PREMATURE_END_OF(EncodedWord);
00176     return false;
00177   }
00178   scursor++;
00179   // check for trailing '=':
00180   if ( scursor == send || *scursor != '=' ) {
00181     kdDebug() << "fifth" << endl;
00182     KMIME_WARN_PREMATURE_END_OF(EncodedWord);
00183     return false;
00184   }
00185   scursor++;
00186 
00187   // set end sentinel for encoded-text:
00188   const char * const encodedTextEnd = scursor - 2;
00189 
00190   //
00191   // STEP 4:
00192   // setup decoders for the transfer encoding and the charset
00193   //
00194 
00195 
00196   // try if there's a codec for the encoding found:
00197   Codec * codec = Codec::codecForName( maybeEncoding );
00198   if ( !codec ) {
00199     KMIME_WARN_UNKNOWN(Encoding,maybeEncoding);
00200     return false;
00201   }
00202 
00203   // get an instance of a corresponding decoder:
00204   Decoder * dec = codec->makeDecoder();
00205   assert( dec );
00206 
00207   // try if there's a (text)codec for the charset found:
00208   bool matchOK = false;
00209   QTextCodec
00210     *textCodec = KGlobal::charsets()->codecForName( maybeCharset, matchOK );
00211 
00212   if ( !matchOK || !textCodec ) {
00213     KMIME_WARN_UNKNOWN(Charset,maybeCharset);
00214     delete dec;
00215     return false;
00216   };
00217 
00218   kdDebug() << "mimeName(): \"" << textCodec->mimeName() << "\"" << endl;
00219 
00220   // allocate a temporary buffer to store the 8bit text:
00221   int encodedTextLength = encodedTextEnd - encodedTextStart;
00222   QByteArray buffer( codec->maxDecodedSizeFor( encodedTextLength ) );
00223   QByteArray::Iterator bit = buffer.begin();
00224   QByteArray::ConstIterator bend = buffer.end();
00225 
00226   //
00227   // STEP 5:
00228   // do the actual decoding
00229   //
00230 
00231   if ( !dec->decode( encodedTextStart, encodedTextEnd, bit, bend ) )
00232     KMIME_WARN << codec->name() << " codec lies about it's maxDecodedSizeFor( "
00233            << encodedTextLength << " )\nresult may be truncated" << endl;
00234 
00235   result = textCodec->toUnicode( buffer.begin(), bit - buffer.begin() );
00236 
00237   kdDebug() << "result now: \"" << result << "\"" << endl;
00238   // cleanup:
00239   delete dec;
00240   language = maybeLanguage;
00241 
00242   return true;
00243 }
00244 
00245 static inline void eatWhiteSpace( const char* & scursor, const char * const send ) {
00246   while ( scursor != send
00247       && ( *scursor == ' ' || *scursor == '\n' ||
00248            *scursor == '\t' || *scursor == '\r' ) )
00249     scursor++;
00250 }
00251 
00252 bool parseAtom( const char * & scursor, const char * const send,
00253         QString & result, bool allow8Bit )
00254 {
00255   QPair<const char*,int> maybeResult;
00256 
00257   if ( parseAtom( scursor, send, maybeResult, allow8Bit ) ) {
00258     result += QString::fromLatin1( maybeResult.first, maybeResult.second );
00259     return true;
00260   }
00261 
00262   return false;
00263 }
00264 
00265 bool parseAtom( const char * & scursor, const char * const send,
00266         QPair<const char*,int> & result, bool allow8Bit ) {
00267   bool success = false;
00268   const char * start = scursor;
00269 
00270   while ( scursor != send ) {
00271     signed char ch = *scursor++;
00272     if ( ch > 0 && isAText(ch) ) {
00273       // AText: OK
00274       success = true;
00275     } else if ( allow8Bit && ch < 0 ) {
00276       // 8bit char: not OK, but be tolerant.
00277       KMIME_WARN_8BIT(ch);
00278       success = true;
00279     } else {
00280       // CTL or special - marking the end of the atom:
00281       // re-set sursor to point to the offending
00282       // char and return:
00283       scursor--;
00284       break;
00285     }
00286   }
00287   result.first = start;
00288   result.second = scursor - start;
00289   return success;
00290 }
00291 
00292 bool parseToken( const char * & scursor, const char * const send,
00293          QString & result, bool allow8Bit )
00294 {
00295   QPair<const char*,int> maybeResult;
00296 
00297   if ( parseToken( scursor, send, maybeResult, allow8Bit ) ) {
00298     result += QString::fromLatin1( maybeResult.first, maybeResult.second );
00299     return true;
00300   }
00301 
00302   return false;
00303 }
00304 
00305 bool parseToken( const char * & scursor, const char * const send,
00306          QPair<const char*,int> & result, bool allow8Bit )
00307 {
00308   bool success = false;
00309   const char * start = scursor;
00310 
00311   while ( scursor != send ) {
00312     signed char ch = *scursor++;
00313     if ( ch > 0 && isTText(ch) ) {
00314       // TText: OK
00315       success = true;
00316     } else if ( allow8Bit && ch < 0 ) {
00317       // 8bit char: not OK, but be tolerant.
00318       KMIME_WARN_8BIT(ch);
00319       success = true;
00320     } else {
00321       // CTL or tspecial - marking the end of the atom:
00322       // re-set sursor to point to the offending
00323       // char and return:
00324       scursor--;
00325       break;
00326     }
00327   }
00328   result.first = start;
00329   result.second = scursor - start;
00330   return success;
00331 }
00332 
00333 #define READ_ch_OR_FAIL if ( scursor == send ) { \
00334                           KMIME_WARN_PREMATURE_END_OF(GenericQuotedString); \
00335                           return false; \
00336                         } else { \
00337                           ch = *scursor++; \
00338                 }
00339 
00340 // known issues:
00341 //
00342 // - doesn't handle quoted CRLF
00343 
00344 bool parseGenericQuotedString( const char* & scursor, const char * const send,
00345                    QString & result, bool isCRLF,
00346                    const char openChar, const char closeChar )
00347 {
00348   char ch;
00349   // We are in a quoted-string or domain-literal or comment and the
00350   // cursor points to the first char after the openChar.
00351   // We will apply unfolding and quoted-pair removal.
00352   // We return when we either encounter the end or unescaped openChar
00353   // or closeChar.
00354 
00355   assert( *(scursor-1) == openChar || *(scursor-1) == closeChar );
00356 
00357   while ( scursor != send ) {
00358     ch = *scursor++;
00359 
00360     if ( ch == closeChar || ch == openChar ) {
00361       // end of quoted-string or another opening char:
00362       // let caller decide what to do.
00363       return true;
00364     }
00365 
00366     switch( ch ) {
00367     case '\\':      // quoted-pair
00368       // misses "\" CRLF LWSP-char handling, see rfc822, 3.4.5
00369       READ_ch_OR_FAIL;
00370       KMIME_WARN_IF_8BIT(ch);
00371       result += QChar(ch);
00372       break;
00373     case '\r':
00374       // ###
00375       // The case of lonely '\r' is easy to solve, as they're
00376       // not part of Unix Line-ending conventions.
00377       // But I see a problem if we are given Unix-native
00378       // line-ending-mails, where we cannot determine anymore
00379       // whether a given '\n' was part of a CRLF or was occurring
00380       // on it's own.
00381       READ_ch_OR_FAIL;
00382       if ( ch != '\n' ) {
00383     // CR on it's own...
00384     KMIME_WARN_LONE(CR);
00385     result += QChar('\r');
00386     scursor--; // points to after the '\r' again
00387       } else {
00388     // CRLF encountered.
00389     // lookahead: check for folding
00390     READ_ch_OR_FAIL;
00391     if ( ch == ' ' || ch == '\t' ) {
00392       // correct folding;
00393       // position cursor behind the CRLF WSP (unfolding)
00394       // and add the WSP to the result
00395       result += QChar(ch);
00396     } else {
00397       // this is the "shouldn't happen"-case. There is a CRLF
00398       // inside a quoted-string without it being part of FWS.
00399       // We take it verbatim.
00400       KMIME_WARN_NON_FOLDING(CRLF);
00401       result += "\r\n";
00402       // the cursor is decremented again, so's we need not
00403       // duplicate the whole switch here. "ch" could've been
00404       // everything (incl. openChar or closeChar).
00405       scursor--;
00406     }
00407       }
00408       break;
00409     case '\n':
00410       // Note: CRLF has been handled above already!
00411       // ### LF needs special treatment, depending on whether isCRLF
00412       // is true (we can be sure a lonely '\n' was meant this way) or
00413       // false ('\n' alone could have meant LF or CRLF in the original
00414       // message. This parser assumes CRLF iff the LF is followed by
00415       // either WSP (folding) or NULL (premature end of quoted-string;
00416       // Should be fixed, since NULL is allowed as per rfc822).
00417       READ_ch_OR_FAIL;
00418       if ( !isCRLF && ( ch == ' ' || ch == '\t' ) ) {
00419     // folding
00420     // correct folding
00421     result += QChar(ch);
00422       } else {
00423     // non-folding
00424     KMIME_WARN_LONE(LF);
00425     result += QChar('\n');
00426     // pos is decremented, so's we need not duplicate the whole
00427     // switch here. ch could've been everything (incl. <">, "\").
00428     scursor--;
00429       }
00430       break;
00431     default:
00432       KMIME_WARN_IF_8BIT(ch);
00433       result += QChar(ch);
00434     }
00435   }
00436 
00437   return false;
00438 }
00439 
00440 // known issues:
00441 //
00442 // - doesn't handle encoded-word inside comments.
00443 
00444 bool parseComment( const char* & scursor, const char * const send,
00445            QString & result, bool isCRLF, bool reallySave )
00446 {
00447   int commentNestingDepth = 1;
00448   const char * afterLastClosingParenPos = 0;
00449   QString maybeCmnt;
00450   const char * oldscursor = scursor;
00451 
00452   assert( *(scursor-1) == '(' );
00453 
00454   while ( commentNestingDepth ) {
00455     QString cmntPart;
00456     if ( parseGenericQuotedString( scursor, send, cmntPart, isCRLF, '(', ')' ) ) {
00457       assert( *(scursor-1) == ')' || *(scursor-1) == '(' );
00458       // see the kdoc for above function for the possible conditions
00459       // we have to check:
00460       switch ( *(scursor-1) ) {
00461       case ')':
00462     if ( reallySave ) {
00463       // add the chunk that's now surely inside the comment.
00464       result += maybeCmnt;
00465       result += cmntPart;
00466       if ( commentNestingDepth > 1 ) // don't add the outermost ')'...
00467         result += QChar(')');
00468       maybeCmnt = QString::null;
00469     }
00470     afterLastClosingParenPos = scursor;
00471     --commentNestingDepth;
00472     break;
00473       case '(':
00474     if ( reallySave ) {
00475       // don't add to "result" yet, because we might find that we
00476       // are already outside the (broken) comment...
00477       maybeCmnt += cmntPart;
00478       maybeCmnt += QChar('(');
00479     }
00480     ++commentNestingDepth;
00481     break;
00482       default: assert( 0 );
00483       } // switch
00484     } else {
00485       // !parseGenericQuotedString, ie. premature end
00486       if ( afterLastClosingParenPos )
00487     scursor = afterLastClosingParenPos;
00488       else
00489     scursor = oldscursor;
00490       return false;
00491     }
00492   } // while
00493 
00494   return true;
00495 }
00496 
00497 
00498 // known issues: none.
00499 
00500 bool parsePhrase( const char* & scursor, const char * const send,
00501           QString & result, bool isCRLF )
00502 {
00503   enum { None, Phrase, Atom, EncodedWord, QuotedString } found = None;
00504   QString tmp;
00505   QCString lang;
00506   const char * successfullyParsed = 0;
00507   // only used by the encoded-word branch
00508   const char * oldscursor;
00509   // used to suppress whitespace between adjacent encoded-words
00510   // (rfc2047, 6.2):
00511   bool lastWasEncodedWord = false;
00512 
00513   while ( scursor != send ) {
00514     char ch = *scursor++;
00515     switch ( ch ) {
00516     case '.': // broken, but allow for intorop's sake
00517       if ( found == None ) {
00518     --scursor;
00519     return false;
00520       } else {
00521     if ( scursor != send && ( *scursor == ' ' || *scursor == '\t' ) )
00522       result += ". ";
00523     else
00524       result += '.';
00525     successfullyParsed = scursor;
00526       }
00527       break;
00528     case '"': // quoted-string
00529       tmp = QString::null;
00530       if ( parseGenericQuotedString( scursor, send, tmp, isCRLF, '"', '"' ) ) {
00531     successfullyParsed = scursor;
00532     assert( *(scursor-1) == '"' );
00533     switch ( found ) {
00534     case None:
00535       found = QuotedString;
00536       break;
00537     case Phrase:
00538     case Atom:
00539     case EncodedWord:
00540     case QuotedString:
00541       found = Phrase;
00542       result += QChar(' '); // rfc822, 3.4.4
00543       break;
00544     default:
00545       assert( 0 );
00546     }
00547     lastWasEncodedWord = false;
00548     result += tmp;
00549       } else {
00550     // premature end of quoted string.
00551     // What to do? Return leading '"' as special? Return as quoted-string?
00552     // We do the latter if we already found something, else signal failure.
00553     if ( found == None ) {
00554       return false;
00555     } else {
00556       result += QChar(' '); // rfc822, 3.4.4
00557       result += tmp;
00558       return true;
00559     }
00560       }
00561       break;
00562     case '(': // comment
00563       // parse it, but ignore content:
00564       tmp = QString::null;
00565       if ( parseComment( scursor, send, tmp, isCRLF,
00566              false /*don't bother with the content*/ ) ) {
00567     successfullyParsed = scursor;
00568     lastWasEncodedWord = false; // strictly interpreting rfc2047, 6.2
00569       } else {
00570     if ( found == None )
00571       return false;
00572     else {
00573       scursor = successfullyParsed;
00574       return true;
00575     }
00576       }
00577       break;
00578     case '=': // encoded-word
00579       tmp = QString::null;
00580       oldscursor = scursor;
00581       lang = 0;
00582       if ( parseEncodedWord( scursor, send, tmp, lang ) ) {
00583     successfullyParsed = scursor;
00584     switch ( found ) {
00585     case None:
00586       found = EncodedWord;
00587       break;
00588     case Phrase:
00589     case EncodedWord:
00590     case Atom:
00591     case QuotedString:
00592       if ( !lastWasEncodedWord )
00593         result += QChar(' '); // rfc822, 3.4.4
00594       found = Phrase;
00595       break;
00596     default: assert( 0 );
00597     }
00598     lastWasEncodedWord = true;
00599     result += tmp;
00600     break;
00601       } else
00602     // parse as atom:
00603     scursor = oldscursor;
00604       // fall though...
00605 
00606     default: //atom
00607       tmp = QString::null;
00608       scursor--;
00609       if ( parseAtom( scursor, send, tmp, true /* allow 8bit */ ) ) {
00610     successfullyParsed = scursor;
00611     switch ( found ) {
00612     case None:
00613       found = Atom;
00614       break;
00615     case Phrase:
00616     case Atom:
00617     case EncodedWord:
00618     case QuotedString:
00619       found = Phrase;
00620       result += QChar(' '); // rfc822, 3.4.4
00621       break;
00622     default:
00623       assert( 0 );
00624     }
00625     lastWasEncodedWord = false;
00626     result += tmp;
00627       } else {
00628     if ( found == None )
00629       return false;
00630     else {
00631       scursor = successfullyParsed;
00632       return true;
00633     }
00634       }
00635     }
00636     eatWhiteSpace( scursor, send );
00637   }
00638 
00639   return ( found != None );
00640 }
00641 
00642 
00643 bool parseDotAtom( const char* & scursor, const char * const send,
00644            QString & result, bool isCRLF )
00645 {
00646   // always points to just after the last atom parsed:
00647   const char * successfullyParsed;
00648 
00649   QString tmp;
00650   if ( !parseAtom( scursor, send, tmp, false /* no 8bit */ ) )
00651     return false;
00652   result += tmp;
00653   successfullyParsed = scursor;
00654 
00655   while ( scursor != send ) {
00656     eatCFWS( scursor, send, isCRLF );
00657 
00658     // end of header or no '.' -> return
00659     if ( scursor == send || *scursor != '.' ) return true;
00660     scursor++; // eat '.'
00661 
00662     eatCFWS( scursor, send, isCRLF );
00663 
00664     if ( scursor == send || !isAText( *scursor ) ) {
00665       // end of header or no AText, but this time following a '.'!:
00666       // reset cursor to just after last successfully parsed char and
00667       // return:
00668       scursor = successfullyParsed;
00669       return true;
00670     }
00671 
00672     // try to parse the next atom:
00673     QString maybeAtom;
00674     if ( !parseAtom( scursor, send, maybeAtom, false /*no 8bit*/ ) ) {
00675       scursor = successfullyParsed;
00676       return true;
00677     }
00678 
00679     result += QChar('.');
00680     result += maybeAtom;
00681     successfullyParsed = scursor;
00682   }
00683 
00684   scursor = successfullyParsed;
00685   return true;
00686 }
00687 
00688 
00689 void eatCFWS( const char* & scursor, const char * const send, bool isCRLF ) {
00690   QString dummy;
00691 
00692   while ( scursor != send ) {
00693     const char * oldscursor = scursor;
00694 
00695     char ch = *scursor++;
00696 
00697     switch( ch ) {
00698     case ' ':
00699     case '\t': // whitespace
00700     case '\r':
00701     case '\n': // folding
00702       continue;
00703 
00704     case '(': // comment
00705       if ( parseComment( scursor, send, dummy, isCRLF, false /*don't save*/ ) )
00706     continue;
00707       scursor = oldscursor;
00708       return;
00709 
00710     default:
00711       scursor = oldscursor;
00712       return;
00713     }
00714 
00715   }
00716 }
00717 
00718 bool parseDomain( const char* & scursor, const char * const send,
00719           QString & result, bool isCRLF ) {
00720   eatCFWS( scursor, send, isCRLF );
00721   if ( scursor == send ) return false;
00722 
00723   // domain := dot-atom / domain-literal / atom *("." atom)
00724   //
00725   // equivalent to:
00726   // domain = dot-atom / domain-literal,
00727   // since parseDotAtom does allow CFWS between atoms and dots
00728 
00729   if ( *scursor == '[' ) {
00730     // domain-literal:
00731     QString maybeDomainLiteral;
00732     // eat '[':
00733     scursor++;
00734     while ( parseGenericQuotedString( scursor, send, maybeDomainLiteral,
00735                       isCRLF, '[', ']' ) ) {
00736       if ( scursor == send ) {
00737     // end of header: check for closing ']':
00738     if ( *(scursor-1) == ']' ) {
00739       // OK, last char was ']':
00740       result = maybeDomainLiteral;
00741       return true;
00742     } else {
00743       // not OK, domain-literal wasn't closed:
00744       return false;
00745     }
00746       }
00747       // we hit openChar in parseGenericQuotedString.
00748       // include it in maybeDomainLiteral and keep on parsing:
00749       if ( *(scursor-1) == '[' ) {
00750     maybeDomainLiteral += QChar('[');
00751     continue;
00752       }
00753       // OK, real end of domain-literal:
00754       result = maybeDomainLiteral;
00755       return true;
00756     }
00757   } else {
00758     // dot-atom:
00759     QString maybeDotAtom;
00760     if ( parseDotAtom( scursor, send, maybeDotAtom, isCRLF ) ) {
00761       result = maybeDotAtom;
00762       return true;
00763     }
00764   }
00765   return false;
00766 }
00767 
00768 bool parseObsRoute( const char* & scursor, const char* const send,
00769             QStringList & result, bool isCRLF, bool save ) {
00770   while ( scursor != send ) {
00771     eatCFWS( scursor, send, isCRLF );
00772     if ( scursor == send ) return false;
00773 
00774     // empty entry:
00775     if ( *scursor == ',' ) {
00776       scursor++;
00777       if ( save ) result.append( QString::null );
00778       continue;
00779     }
00780 
00781     // empty entry ending the list:
00782     if ( *scursor == ':' ) {
00783       scursor++;
00784       if ( save ) result.append( QString::null );
00785       return true;
00786     }
00787 
00788     // each non-empty entry must begin with '@':
00789     if ( *scursor != '@' )
00790       return false;
00791     else
00792       scursor++;
00793 
00794     QString maybeDomain;
00795     if ( !parseDomain( scursor, send, maybeDomain, isCRLF ) ) return false;
00796     if ( save ) result.append( maybeDomain );
00797 
00798     // eat the following (optional) comma:
00799     eatCFWS( scursor, send, isCRLF );
00800     if ( scursor == send ) return false;
00801     if ( *scursor == ':' ) { scursor++; return true; }
00802     if ( *scursor == ',' ) scursor++;
00803 
00804   }
00805 
00806   return false;
00807 }
00808 
00809 bool parseAddrSpec( const char* & scursor, const char * const send,
00810             AddrSpec & result, bool isCRLF ) {
00811   //
00812   // STEP 1:
00813   // local-part := dot-atom / quoted-string / word *("." word)
00814   //
00815   // this is equivalent to:
00816   // local-part := word *("." word)
00817 
00818   QString maybeLocalPart;
00819   QString tmp;
00820 
00821   while ( scursor != send ) {
00822     // first, eat any whitespace
00823     eatCFWS( scursor, send, isCRLF );
00824 
00825     char ch = *scursor++;
00826     switch ( ch ) {
00827     case '.': // dot
00828       maybeLocalPart += QChar('.');
00829       break;
00830 
00831     case '@':
00832       goto SAW_AT_SIGN;
00833       break;
00834 
00835     case '"': // quoted-string
00836       tmp = QString::null;
00837       if ( parseGenericQuotedString( scursor, send, tmp, isCRLF, '"', '"' ) )
00838     maybeLocalPart += tmp;
00839       else
00840     return false;
00841       break;
00842 
00843     default: // atom
00844       scursor--; // re-set scursor to point to ch again
00845       tmp = QString::null;
00846       if ( parseAtom( scursor, send, tmp, false /* no 8bit */ ) )
00847     maybeLocalPart += tmp;
00848       else
00849     return false; // parseAtom can only fail if the first char is non-atext.
00850       break;
00851     }
00852   }
00853 
00854   return false;
00855 
00856 
00857   //
00858   // STEP 2:
00859   // domain
00860   //
00861 
00862 SAW_AT_SIGN:
00863 
00864   assert( *(scursor-1) == '@' );
00865 
00866   QString maybeDomain;
00867   if ( !parseDomain( scursor, send, maybeDomain, isCRLF ) )
00868     return false;
00869 
00870   result.localPart = maybeLocalPart;
00871   result.domain = maybeDomain;
00872 
00873   return true;
00874 }
00875 
00876 
00877 bool parseAngleAddr( const char* & scursor, const char * const send,
00878              AddrSpec & result, bool isCRLF ) {
00879   // first, we need an opening angle bracket:
00880   eatCFWS( scursor, send, isCRLF );
00881   if ( scursor == send || *scursor != '<' ) return false;
00882   scursor++; // eat '<'
00883 
00884   eatCFWS( scursor, send, isCRLF );
00885   if ( scursor == send ) return false;
00886 
00887   if ( *scursor == '@' || *scursor == ',' ) {
00888     // obs-route: parse, but ignore:
00889     KMIME_WARN << "obsolete source route found! ignoring." << endl;
00890     QStringList dummy;
00891     if ( !parseObsRoute( scursor, send, dummy,
00892              isCRLF, false /* don't save */ ) )
00893       return false;
00894     // angle-addr isn't complete until after the '>':
00895     if ( scursor == send ) return false;
00896   }
00897 
00898   // parse addr-spec:
00899   AddrSpec maybeAddrSpec;
00900   if ( !parseAddrSpec( scursor, send, maybeAddrSpec, isCRLF ) ) return false;
00901 
00902   eatCFWS( scursor, send, isCRLF );
00903   if ( scursor == send || *scursor != '>' ) return false;
00904   scursor++;
00905 
00906   result = maybeAddrSpec;
00907   return true;
00908 
00909 }
00910 
00911 bool parseMailbox( const char* & scursor, const char * const send,
00912            Mailbox & result, bool isCRLF ) {
00913 
00914   // rfc:
00915   // mailbox := addr-spec / ([ display-name ] angle-addr)
00916   // us:
00917   // mailbox := addr-spec / ([ display-name ] angle-addr)
00918   //                      / (angle-addr "(" display-name ")")
00919 
00920   eatCFWS( scursor, send, isCRLF );
00921   if ( scursor == send ) return false;
00922 
00923   AddrSpec maybeAddrSpec;
00924 
00925   // first, try if it's a vanilla addr-spec:
00926   const char * oldscursor = scursor;
00927   if ( parseAddrSpec( scursor, send, maybeAddrSpec, isCRLF ) ) {
00928     result.displayName = QString::null;
00929     result.addrSpec = maybeAddrSpec;
00930     return true;
00931   }
00932   scursor = oldscursor;
00933 
00934   // second, see if there's a display-name:
00935   QString maybeDisplayName;
00936   if ( !parsePhrase( scursor, send, maybeDisplayName, isCRLF ) ) {
00937     // failed: reset cursor, note absent display-name
00938     maybeDisplayName = QString::null;
00939     scursor = oldscursor;
00940   } else {
00941     // succeeded: eat CFWS
00942     eatCFWS( scursor, send, isCRLF );
00943     if ( scursor == send ) return false;
00944   }
00945 
00946   // third, parse the angle-addr:
00947   if ( !parseAngleAddr( scursor, send, maybeAddrSpec, isCRLF ) )
00948     return false;
00949 
00950   if ( maybeDisplayName.isNull() ) {
00951     // check for the obsolete form of display-name (as comment):
00952     eatWhiteSpace( scursor, send );
00953     if ( scursor != send && *scursor == '(' ) {
00954       scursor++;
00955       if ( !parseComment( scursor, send, maybeDisplayName, isCRLF, true /*keep*/ ) )
00956     return false;
00957     }
00958   }
00959 
00960   result.displayName = maybeDisplayName;
00961   result.addrSpec = maybeAddrSpec;
00962   return true;
00963 }
00964 
00965 bool parseGroup( const char* & scursor, const char * const send,
00966          Address & result, bool isCRLF ) {
00967   // group         := display-name ":" [ mailbox-list / CFWS ] ";" [CFWS]
00968   //
00969   // equivalent to:
00970   // group   := display-name ":" [ obs-mbox-list ] ";"
00971 
00972   eatCFWS( scursor, send, isCRLF );
00973   if ( scursor == send ) return false;
00974 
00975   // get display-name:
00976   QString maybeDisplayName;
00977   if ( !parsePhrase( scursor, send, maybeDisplayName, isCRLF ) )
00978     return false;
00979 
00980   // get ":":
00981   eatCFWS( scursor, send, isCRLF );
00982   if ( scursor == send || *scursor != ':' ) return false;
00983 
00984   result.displayName = maybeDisplayName;
00985 
00986   // get obs-mbox-list (may contain empty entries):
00987   scursor++;
00988   while ( scursor != send ) {
00989     eatCFWS( scursor, send, isCRLF );
00990     if ( scursor == send ) return false;
00991 
00992     // empty entry:
00993     if ( *scursor == ',' ) { scursor++; continue; }
00994 
00995     // empty entry ending the list:
00996     if ( *scursor == ';' ) { scursor++; return true; }
00997 
00998     Mailbox maybeMailbox;
00999     if ( !parseMailbox( scursor, send, maybeMailbox, isCRLF ) )
01000       return false;
01001     result.mailboxList.append( maybeMailbox );
01002 
01003     eatCFWS( scursor, send, isCRLF );
01004     // premature end:
01005     if ( scursor == send ) return false;
01006     // regular end of the list:
01007     if ( *scursor == ';' ) { scursor++; return true; }
01008     // eat regular list entry separator:
01009     if ( *scursor == ',' ) scursor++;
01010   }
01011   return false;
01012 }
01013 
01014 
01015 bool parseAddress( const char* & scursor, const char * const send,
01016            Address & result, bool isCRLF ) {
01017   // address       := mailbox / group
01018 
01019   eatCFWS( scursor, send, isCRLF );
01020   if ( scursor == send ) return false;
01021 
01022   // first try if it's a single mailbox:
01023   Mailbox maybeMailbox;
01024   const char * oldscursor = scursor;
01025   if ( parseMailbox( scursor, send, maybeMailbox, isCRLF ) ) {
01026     // yes, it is:
01027     result.displayName = QString::null;
01028     result.mailboxList.append( maybeMailbox );
01029     return true;
01030   }
01031   scursor = oldscursor;
01032 
01033   Address maybeAddress;
01034 
01035   // no, it's not a single mailbox. Try if it's a group:
01036   if ( !parseGroup( scursor, send, maybeAddress, isCRLF ) )
01037     return false;
01038 
01039   result = maybeAddress;
01040   return true;
01041 }
01042 
01043 bool parseAddressList( const char* & scursor, const char * const send,
01044                AddressList & result, bool isCRLF ) {
01045   while ( scursor != send ) {
01046     eatCFWS( scursor, send, isCRLF );
01047     // end of header: this is OK.
01048     if ( scursor == send ) return true;
01049     // empty entry: ignore:
01050     if ( *scursor == ',' ) { scursor++; continue; }
01051 
01052     // parse one entry
01053     Address maybeAddress;
01054     if ( !parseAddress( scursor, send, maybeAddress, isCRLF ) ) return false;
01055     result.append( maybeAddress );
01056 
01057     eatCFWS( scursor, send, isCRLF );
01058     // end of header: this is OK.
01059     if ( scursor == send ) return true;
01060     // comma separating entries: eat it.
01061     if ( *scursor == ',' ) scursor++;
01062   }
01063   return true;
01064 }
01065 
01066 
01067 static QString asterisk = QString::fromLatin1("*0*",1);
01068 static QString asteriskZero = QString::fromLatin1("*0*",2);
01069 //static QString asteriskZeroAsterisk = QString::fromLatin1("*0*",3);
01070 
01071 bool parseParameter( const char* & scursor, const char * const send,
01072              QPair<QString,QStringOrQPair> & result, bool isCRLF ) {
01073   // parameter = regular-parameter / extended-parameter
01074   // regular-parameter = regular-parameter-name "=" value
01075   // extended-parameter =
01076   // value = token / quoted-string
01077   //
01078   // note that rfc2231 handling is out of the scope of this function.
01079   // Therefore we return the attribute as QString and the value as
01080   // (start,length) tupel if we see that the value is encoded
01081   // (trailing asterisk), for parseParameterList to decode...
01082 
01083   eatCFWS( scursor, send, isCRLF );
01084   if ( scursor == send ) return false;
01085 
01086   //
01087   // parse the parameter name:
01088   //
01089   QString maybeAttribute;
01090   if ( !parseToken( scursor, send, maybeAttribute, false /* no 8bit */ ) )
01091     return false;
01092 
01093   eatCFWS( scursor, send, isCRLF );
01094   // premature end: not OK (haven't seen '=' yet).
01095   if ( scursor == send || *scursor != '=' ) return false;
01096   scursor++; // eat '='
01097 
01098   eatCFWS( scursor, send, isCRLF );
01099   if ( scursor == send ) {
01100     // don't choke on attribute=, meaning the value was omitted:
01101     if ( maybeAttribute.endsWith( asterisk ) ) {
01102       KMIME_WARN << "attribute ends with \"*\", but value is empty! "
01103     "Chopping away \"*\"." << endl;
01104       maybeAttribute.truncate( maybeAttribute.length() - 1 );
01105     }
01106     result = qMakePair( maybeAttribute.lower(), QStringOrQPair() );
01107     return true;
01108   }
01109 
01110   const char * oldscursor = scursor;
01111 
01112   //
01113   // parse the parameter value:
01114   //
01115   QStringOrQPair maybeValue;
01116   if ( *scursor == '"' ) {
01117     // value is a quoted-string:
01118     scursor++;
01119     if ( maybeAttribute.endsWith( asterisk ) ) {
01120       // attributes ending with "*" designate extended-parameters,
01121       // which cannot have quoted-strings as values. So we remove the
01122       // trailing "*" to not confuse upper layers.
01123       KMIME_WARN << "attribute ends with \"*\", but value is a quoted-string! "
01124     "Chopping away \"*\"." << endl;
01125       maybeAttribute.truncate( maybeAttribute.length() - 1 );
01126     }
01127 
01128     if ( !parseGenericQuotedString( scursor, send, maybeValue.qstring, isCRLF ) ) {
01129       scursor = oldscursor;
01130       result = qMakePair( maybeAttribute.lower(), QStringOrQPair() );
01131       return false; // this case needs further processing by upper layers!!
01132     }
01133   } else {
01134     // value is a token:
01135     if ( !parseToken( scursor, send, maybeValue.qpair, false /* no 8bit */ ) ) {
01136       scursor = oldscursor;
01137       result = qMakePair( maybeAttribute.lower(), QStringOrQPair() );
01138       return false; // this case needs further processing by upper layers!!
01139     }
01140   }
01141 
01142   result = qMakePair( maybeAttribute.lower(), maybeValue );
01143   return true;
01144 }
01145 
01146 
01147 
01148 bool parseRawParameterList( const char* & scursor, const char * const send,
01149                 QMap<QString,QStringOrQPair> & result,
01150                 bool isCRLF ) {
01151   // we use parseParameter() consecutively to obtain a map of raw
01152   // attributes to raw values. "Raw" here means that we don't do
01153   // rfc2231 decoding and concatenation. This is left to
01154   // parseParameterList(), which will call this function.
01155   //
01156   // The main reason for making this chunk of code a separate
01157   // (private) method is that we can deal with broken parameters
01158   // _here_ and leave the rfc2231 handling solely to
01159   // parseParameterList(), which will still be enough work.
01160 
01161   while ( scursor != send ) {
01162     eatCFWS( scursor, send, isCRLF );
01163     // empty entry ending the list: OK.
01164     if ( scursor == send ) return true;
01165     // empty list entry: ignore.
01166     if ( *scursor == ';' ) { scursor++; continue; }
01167 
01168     QPair<QString,QStringOrQPair> maybeParameter;
01169     if ( !parseParameter( scursor, send, maybeParameter, isCRLF ) ) {
01170       // we need to do a bit of work if the attribute is not
01171       // NULL. These are the cases marked with "needs further
01172       // processing" in parseParameter(). Specifically, parsing of the
01173       // token or the quoted-string, which should represent the value,
01174       // failed. We take the easy way out and simply search for the
01175       // next ';' to start parsing again. (Another option would be to
01176       // take the text between '=' and ';' as value)
01177       if ( maybeParameter.first.isNull() ) return false;
01178       while ( scursor != send ) {
01179     if ( *scursor++ == ';' ) goto IS_SEMICOLON;
01180       }
01181       // scursor == send case: end of list.
01182       return true;
01183     IS_SEMICOLON:
01184       // *scursor == ';' case: parse next entry.
01185       continue;
01186     }
01187     // successful parsing brings us here:
01188     result.insert( maybeParameter.first, maybeParameter.second );
01189 
01190     eatCFWS( scursor, send, isCRLF );
01191     // end of header: ends list.
01192     if ( scursor == send ) return true;
01193     // regular separator: eat it.
01194     if ( *scursor == ';' ) scursor++;
01195   }
01196   return true;
01197 }
01198 
01199 
01200 static void decodeRFC2231Value( Codec* & rfc2231Codec,
01201                 QTextCodec* & textcodec,
01202                 bool isContinuation, QString & value,
01203                 QPair<const char*,int> & source ) {
01204 
01205   //
01206   // parse the raw value into (charset,language,text):
01207   //
01208 
01209   const char * decBegin = source.first;
01210   const char * decCursor = decBegin;
01211   const char * decEnd = decCursor + source.second;
01212 
01213   if ( !isContinuation ) {
01214     // find the first single quote
01215     while ( decCursor != decEnd ) {
01216       if ( *decCursor == '\'' ) break;
01217       else decCursor++;
01218     }
01219 
01220     if ( decCursor == decEnd ) {
01221       // there wasn't a single single quote at all!
01222       // take the whole value to be in latin-1:
01223       KMIME_WARN << "No charset in extended-initial-value. "
01224     "Assuming \"iso-8859-1\"." << endl;
01225       value += QString::fromLatin1( decBegin, source.second );
01226       return;
01227     }
01228 
01229     QCString charset( decBegin, decCursor - decBegin + 1 );
01230 
01231     const char * oldDecCursor = ++decCursor;
01232     // find the second single quote (we ignore the language tag):
01233     while ( decCursor != decEnd ) {
01234       if ( *decCursor == '\'' ) break;
01235       else decCursor++;
01236     }
01237     if ( decCursor == decEnd ) {
01238       KMIME_WARN << "No language in extended-initial-value. "
01239     "Trying to recover." << endl;
01240       decCursor = oldDecCursor;
01241     } else
01242       decCursor++;
01243 
01244     // decCursor now points to the start of the
01245     // "extended-other-values":
01246 
01247     //
01248     // get the decoders:
01249     //
01250 
01251     bool matchOK = false;
01252     textcodec = KGlobal::charsets()->codecForName( charset, matchOK );
01253     if ( !matchOK ) {
01254       textcodec = 0;
01255       KMIME_WARN_UNKNOWN(Charset,charset);
01256     }
01257   }
01258 
01259   if ( !rfc2231Codec ) {
01260     rfc2231Codec = Codec::codecForName("x-kmime-rfc2231");
01261     assert( rfc2231Codec );
01262   }
01263 
01264   if ( !textcodec ) {
01265     value += QString::fromLatin1( decCursor, decEnd - decCursor );
01266     return;
01267   }
01268 
01269   Decoder * dec = rfc2231Codec->makeDecoder();
01270   assert( dec );
01271 
01272   //
01273   // do the decoding:
01274   //
01275 
01276   QByteArray buffer( rfc2231Codec->maxDecodedSizeFor( decEnd - decCursor ) );
01277   QByteArray::Iterator bit = buffer.begin();
01278   QByteArray::ConstIterator bend = buffer.end();
01279 
01280   if ( !dec->decode( decCursor, decEnd, bit, bend ) )
01281     KMIME_WARN << rfc2231Codec->name()
01282            << " codec lies about it's maxDecodedSizeFor()\n"
01283       "result may be truncated" << endl;
01284 
01285   value += textcodec->toUnicode( buffer.begin(), bit - buffer.begin() );
01286 
01287   kdDebug() << "value now: \"" << value << "\"" << endl;
01288   // cleanup:
01289   delete dec;
01290 }
01291 
01292 // known issues:
01293 //  - permutes rfc2231 continuations when the total number of parts
01294 //    exceeds 10 (other-sections then becomes *xy, ie. two digits)
01295 
01296 bool parseParameterList( const char* & scursor, const char * const send,
01297              QMap<QString,QString> & result, bool isCRLF ) {
01298   // parse the list into raw attribute-value pairs:
01299   QMap<QString,QStringOrQPair> rawParameterList;
01300   if (!parseRawParameterList( scursor, send, rawParameterList, isCRLF ) )
01301     return false;
01302 
01303   if ( rawParameterList.isEmpty() ) return true;
01304 
01305   // decode rfc 2231 continuations and alternate charset encoding:
01306 
01307   // NOTE: this code assumes that what QMapIterator delivers is sorted
01308   // by the key!
01309 
01310   Codec * rfc2231Codec = 0;
01311   QTextCodec * textcodec = 0;
01312   QString attribute;
01313   QString value;
01314   enum Modes { NoMode = 0x0, Continued = 0x1, Encoded = 0x2 } mode;
01315 
01316   QMapIterator<QString,QStringOrQPair> it, end = rawParameterList.end();
01317 
01318   for ( it = rawParameterList.begin() ; it != end ; ++it ) {
01319     if ( attribute.isNull() || !it.key().startsWith( attribute ) ) {
01320       //
01321       // new attribute:
01322       //
01323 
01324       // store the last attribute/value pair in the result map now:
01325       if ( !attribute.isNull() ) result.insert( attribute, value );
01326       // and extract the information from the new raw attribute:
01327       value = QString::null;
01328       attribute = it.key();
01329       mode = NoMode;
01330       // is the value encoded?
01331       if ( attribute.endsWith( asterisk ) ) {
01332     attribute.truncate( attribute.length() - 1 );
01333     mode = (Modes) ((int) mode | Encoded);
01334       }
01335       // is the value continued?
01336       if ( attribute.endsWith( asteriskZero ) ) {
01337     attribute.truncate( attribute.length() - 2 );
01338     mode = (Modes) ((int) mode | Continued);
01339       }
01340       //
01341       // decode if necessary:
01342       //
01343       if ( mode & Encoded ) {
01344     decodeRFC2231Value( rfc2231Codec, textcodec,
01345                 false, /* isn't continuation */
01346                 value, (*it).qpair );
01347       } else {
01348     // not encoded.
01349     if ( (*it).qpair.first )
01350       value += QString::fromLatin1( (*it).qpair.first, (*it).qpair.second );
01351     else
01352       value += (*it).qstring;
01353       }
01354 
01355       //
01356       // shortcut-processing when the value isn't encoded:
01357       //
01358 
01359       if ( !(mode & Continued) ) {
01360     // save result already:
01361     result.insert( attribute, value );
01362     // force begin of a new attribute:
01363     attribute = QString::null;
01364       }
01365     } else /* it.key().startsWith( attribute ) */ {
01366       //
01367       // continuation
01368       //
01369 
01370       // ignore the section and trust QMap to have sorted the keys:
01371       if ( it.key().endsWith( asterisk ) ) {
01372     // encoded
01373     decodeRFC2231Value( rfc2231Codec, textcodec,
01374                 true, /* is continuation */
01375                 value, (*it).qpair );
01376       } else {
01377     // not encoded
01378     if ( (*it).qpair.first )
01379       value += QString::fromLatin1( (*it).qpair.first, (*it).qpair.second );
01380     else
01381       value += (*it).qstring;
01382       }
01383     }
01384   }
01385 
01386   // write last attr/value pair:
01387   if ( !attribute.isNull() )
01388     result.insert( attribute, value );
01389 
01390   return true;
01391 }
01392 
01393 static const char * stdDayNames[] = {
01394   "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
01395 };
01396 static const int stdDayNamesLen = sizeof stdDayNames / sizeof *stdDayNames;
01397 
01398 static bool parseDayName( const char* & scursor, const char * const send )
01399 {
01400   // check bounds:
01401   if ( send - scursor < 3 ) return false;
01402 
01403   for ( int i = 0 ; i < stdDayNamesLen ; ++i )
01404     if ( qstrnicmp( scursor, stdDayNames[i], 3 ) == 0 ) {
01405       scursor += 3;
01406       kdDebug() << "found " << stdDayNames[i] << endl;
01407       return true;
01408     }
01409 
01410   return false;
01411 }
01412 
01413 
01414 static const char * stdMonthNames[] = {
01415   "Jan", "Feb", "Mar", "Apr", "May", "Jun",
01416   "Jul", "Aug", "Sep", "Oct", "Nov", "Dez"
01417 };
01418 static const int stdMonthNamesLen =
01419   sizeof stdMonthNames / sizeof *stdMonthNames;
01420 
01421 static bool parseMonthName( const char* & scursor, const char * const send,
01422                 int & result )
01423 {
01424   // check bounds:
01425   if ( send - scursor < 3 ) return false;
01426 
01427   for ( result = 0 ; result < stdMonthNamesLen ; ++result )
01428     if ( qstrnicmp( scursor, stdMonthNames[result], 3 ) == 0 ) {
01429       scursor += 3;
01430       return true;
01431     }
01432 
01433   // not found:
01434   return false;
01435 }
01436 
01437 static const struct {
01438   const char * tzName;
01439   long int secsEastOfGMT;
01440 } timeZones[] = {
01441   // rfc 822 timezones:
01442   { "GMT", 0 },
01443   { "UT", 0 },
01444   { "EDT", -4*3600 },
01445   { "EST", -5*3600 },
01446   { "MST", -5*3600 },
01447   { "CST", -6*3600 },
01448   { "MDT", -6*3600 },
01449   { "MST", -7*3600 },
01450   { "PDT", -7*3600 },
01451   { "PST", -8*3600 },
01452   // common, non-rfc-822 zones:
01453   { "CET", 1*3600 },
01454   { "MET", 1*3600 },
01455   { "UTC", 0 },
01456   { "CEST", 2*3600 },
01457   { "BST", 1*3600 },
01458   // rfc 822 military timezones:
01459   { "Z", 0 },
01460   { "A", -1*3600 },
01461   { "B", -2*3600 },
01462   { "C", -3*3600 },
01463   { "D", -4*3600 },
01464   { "E", -5*3600 },
01465   { "F", -6*3600 },
01466   { "G", -7*3600 },
01467   { "H", -8*3600 },
01468   { "I", -9*3600 },
01469   // J is not used!
01470   { "K", -10*3600 },
01471   { "L", -11*3600 },
01472   { "M", -12*3600 },
01473   { "N", 1*3600 },
01474   { "O", 2*3600 },
01475   { "P", 3*3600 },
01476   { "Q", 4*3600 },
01477   { "R", 5*3600 },
01478   { "S", 6*3600 },
01479   { "T", 7*3600 },
01480   { "U", 8*3600 },
01481   { "V", 9*3600 },
01482   { "W", 10*3600 },
01483   { "X", 11*3600 },
01484   { "Y", 12*3600 },
01485 };
01486 static const int timeZonesLen = sizeof timeZones / sizeof *timeZones;
01487 
01488 static bool parseAlphaNumericTimeZone( const char* & scursor,
01489                        const char * const send,
01490                        long int & secsEastOfGMT,
01491                        bool & timeZoneKnown )
01492 {
01493   QPair<const char*,int> maybeTimeZone(0,0);
01494   if ( !parseToken( scursor, send, maybeTimeZone, false /*no 8bit*/ ) )
01495     return false;
01496   for ( int i = 0 ; i < timeZonesLen ; ++i )
01497     if ( qstrnicmp( timeZones[i].tzName,
01498             maybeTimeZone.first, maybeTimeZone.second ) == 0 ) {
01499       scursor += maybeTimeZone.second;
01500       secsEastOfGMT = timeZones[i].secsEastOfGMT;
01501       timeZoneKnown = true;
01502       return true;
01503     }
01504 
01505   // don't choke just because we don't happen to know the time zone
01506   KMIME_WARN_UNKNOWN(time zone,QCString( maybeTimeZone.first, maybeTimeZone.second+1 ));
01507   secsEastOfGMT = 0;
01508   timeZoneKnown = false;
01509   return true;
01510 }
01511 
01512 // parse a number and return the number of digits parsed:
01513 static int parseDigits( const char* & scursor, const char * const send,
01514             int & result )
01515 {
01516   result = 0;
01517   int digits = 0;
01518   for ( ; scursor != send && isdigit( *scursor ) ; scursor++, digits++ ) {
01519     result *= 10;
01520     result += int( *scursor - '0' );
01521   }
01522   return digits;
01523 }
01524 
01525 static bool parseTimeOfDay( const char* & scursor, const char * const send,
01526                 int & hour, int & min, int & sec, bool isCRLF=false )
01527 {
01528   // time-of-day := 2DIGIT [CFWS] ":" [CFWS] 2DIGIT [ [CFWS] ":" 2DIGIT ]
01529 
01530   //
01531   // 2DIGIT representing "hour":
01532   //
01533   if ( !parseDigits( scursor, send, hour ) ) return false;
01534 
01535   eatCFWS( scursor, send, isCRLF );
01536   if ( scursor == send || *scursor != ':' ) return false;
01537   scursor++; // eat ':'
01538 
01539   eatCFWS( scursor, send, isCRLF );
01540   if ( scursor == send ) return false;
01541 
01542   //
01543   // 2DIGIT representing "minute":
01544   //
01545   if ( !parseDigits( scursor, send, min ) ) return false;
01546 
01547   eatCFWS( scursor, send, isCRLF );
01548   if ( scursor == send ) return true; // seconds are optional
01549 
01550   //
01551   // let's see if we have a 2DIGIT representing "second":
01552   //
01553   if ( *scursor == ':' ) {
01554     // yepp, there are seconds:
01555     scursor++; // eat ':'
01556     eatCFWS( scursor, send, isCRLF );
01557     if ( scursor == send ) return false;
01558 
01559     if ( !parseDigits( scursor, send, sec ) ) return false;
01560   } else {
01561     sec = 0;
01562   }
01563 
01564   return true;
01565 }
01566 
01567 
01568 bool parseTime( const char* & scursor, const char * send,
01569         int & hour, int & min, int & sec, long int & secsEastOfGMT,
01570         bool & timeZoneKnown, bool isCRLF )
01571 {
01572   // time := time-of-day CFWS ( zone / obs-zone )
01573   //
01574   // obs-zone    := "UT" / "GMT" /
01575   //                "EST" / "EDT" / ; -0500 / -0400
01576   //                "CST" / "CDT" / ; -0600 / -0500
01577   //                "MST" / "MDT" / ; -0700 / -0600
01578   //                "PST" / "PDT" / ; -0800 / -0700
01579   //                "A"-"I" / "a"-"i" /
01580   //                "K"-"Z" / "k"-"z"
01581 
01582   eatCFWS( scursor, send, isCRLF );
01583   if ( scursor == send ) return false;
01584 
01585   if ( !parseTimeOfDay( scursor, send, hour, min, sec, isCRLF ) )
01586     return false;
01587 
01588   eatCFWS( scursor, send, isCRLF );
01589   if ( scursor == send ) {
01590     timeZoneKnown = false;
01591     secsEastOfGMT = 0;
01592     return true; // allow missing timezone
01593   }
01594 
01595   timeZoneKnown = true;
01596   if ( *scursor == '+' || *scursor == '-' ) {
01597     // remember and eat '-'/'+':
01598     const char sign = *scursor++;
01599     // numerical timezone:
01600     int maybeTimeZone;
01601     if ( parseDigits( scursor, send, maybeTimeZone ) != 4 ) return false;
01602     secsEastOfGMT = 60 * ( maybeTimeZone / 100 * 60 + maybeTimeZone % 100 );
01603     if ( sign == '-' ) {
01604       secsEastOfGMT *= -1;
01605       if ( secsEastOfGMT == 0 )
01606     timeZoneKnown = false; // -0000 means indetermined tz
01607     }
01608   } else {
01609     // maybe alphanumeric timezone:
01610     if ( !parseAlphaNumericTimeZone( scursor, send, secsEastOfGMT, timeZoneKnown ) )
01611       return false;
01612   }
01613   return true;
01614 }
01615 
01616 
01617 bool parseDateTime( const char* & scursor, const char * const send,
01618             Types::DateTime & result, bool isCRLF )
01619 {
01620   // Parsing date-time; strict mode:
01621   //
01622   // date-time   := [ [CFWS] day-name [CFWS] "," ]                      ; wday
01623   // (expanded)     [CFWS] 1*2DIGIT CFWS month-name CFWS 2*DIGIT [CFWS] ; date
01624   //                time
01625   //
01626   // day-name    := "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun"
01627   // month-name  := "Jan" / "Feb" / "Mar" / "Apr" / "May" / "Jun" /
01628   //                "Jul" / "Aug" / "Sep" / "Oct" / "Nov" / "Dez"
01629 
01630   struct tm maybeDateTime = {
01631 #ifdef HAVE_TM_GMTOFF
01632     0, 0, // initializers for members tm_gmtoff and tm_zone
01633 #endif
01634     0, 0, 0, 0, 0, 0, 0, 0, 0
01635   };
01636 
01637   eatCFWS( scursor, send, isCRLF );
01638   if ( scursor == send ) return false;
01639 
01640   //
01641   // let's see if there's a day-of-week:
01642   //
01643   if ( parseDayName( scursor, send ) ) {
01644     eatCFWS( scursor, send, isCRLF );
01645     if ( scursor == send ) return false;
01646     // day-name should be followed by ',' but we treat it as optional:
01647     if ( *scursor == ',' ) {
01648       scursor++; // eat ','
01649       eatCFWS( scursor, send, isCRLF );
01650     }
01651   }
01652 
01653   //
01654   // 1*2DIGIT representing "day" (of month):
01655   //
01656   int maybeDay;
01657   if ( !parseDigits( scursor, send, maybeDay ) ) return false;
01658 
01659   eatCFWS( scursor, send, isCRLF );
01660   if ( scursor == send ) return false;
01661 
01662   // success: store maybeDay in maybeDateTime:
01663   maybeDateTime.tm_mday = maybeDay;
01664 
01665   //
01666   // month-name:
01667   //
01668   int maybeMonth = 0;
01669   if ( !parseMonthName( scursor, send, maybeMonth ) ) return false;
01670   if ( scursor == send ) return false;
01671   assert( maybeMonth >= 0 ); assert( maybeMonth <= 11 );
01672 
01673   eatCFWS( scursor, send, isCRLF );
01674   if ( scursor == send ) return false;
01675 
01676   // success: store maybeMonth in maybeDateTime:
01677   maybeDateTime.tm_mon = maybeMonth;
01678 
01679   //
01680   // 2*DIGIT representing "year":
01681   //
01682   int maybeYear;
01683   if ( !parseDigits( scursor, send, maybeYear ) ) return false;
01684   // RFC 2822 4.3 processing:
01685   if ( maybeYear < 50 )
01686     maybeYear += 2000;
01687   else if ( maybeYear < 1000 )
01688     maybeYear += 1900;
01689   // else keep as is
01690   if ( maybeYear < 1900 ) return false; // rfc2822, 3.3
01691 
01692   eatCFWS( scursor, send, isCRLF );
01693   if ( scursor == send ) return false;
01694 
01695   // success: store maybeYear in maybeDateTime:
01696   maybeDateTime.tm_year = maybeYear - 1900;
01697 
01698   //
01699   // time
01700   //
01701   int maybeHour, maybeMinute, maybeSecond;
01702   long int secsEastOfGMT;
01703   bool timeZoneKnown = true;
01704 
01705   if ( !parseTime( scursor, send,
01706            maybeHour, maybeMinute, maybeSecond,
01707            secsEastOfGMT, timeZoneKnown, isCRLF ) )
01708     return false;
01709 
01710   // success: store everything in maybeDateTime:
01711   maybeDateTime.tm_hour = maybeHour;
01712   maybeDateTime.tm_min = maybeMinute;
01713   maybeDateTime.tm_sec = maybeSecond;
01714   maybeDateTime.tm_isdst = DateFormatter::isDaylight();
01715   // now put everything together and check if mktime(3) likes it:
01716   result.time = mktime( &maybeDateTime );
01717   if ( result.time == (time_t)(-1) ) return false;
01718 
01719   // adjust to UTC/GMT:
01720   //result.time -= secsEastOfGMT;
01721   result.secsEastOfGMT = secsEastOfGMT;
01722   result.timeZoneKnown = timeZoneKnown;
01723 
01724   return true;
01725 }
01726 
01727 #if 0
01728 bool tryToMakeAnySenseOfDateString( const char* & scursor,
01729                     const char * const send,
01730                     time_t & result, bool isCRLF )
01731 {
01732   return false;
01733 }
01734 #endif
01735 
01736 } // namespace HeaderParsing
01737 
01738 } // namespace KMime
KDE Home | KDE Accessibility Home | Description of Access Keys