libkmime

kmime_parsers.cpp

00001 /*
00002     kmime_parsers.cpp
00003 
00004     KMime, the KDE internet mail/usenet news message library.
00005     Copyright (c) 2001 the KMime authors.
00006     See file AUTHORS for details
00007 
00008     This program is free software; you can redistribute it and/or modify
00009     it under the terms of the GNU General Public License as published by
00010     the Free Software Foundation; either version 2 of the License, or
00011     (at your option) any later version.
00012     You should have received a copy of the GNU General Public License
00013     along with this program; if not, write to the Free Software Foundation,
00014     Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, US
00015 */
00016 #include "kmime_parsers.h"
00017 
00018 #include <qregexp.h>
00019 
00020 using namespace KMime::Parser;
00021 
00022 namespace KMime {
00023 namespace Parser {
00024 
00025 
00026 MultiPart::MultiPart(const QCString &src, const QCString &boundary)
00027 {
00028   s_rc=src;
00029   b_oundary=boundary;
00030 }
00031 
00032 
00033 bool MultiPart::parse()
00034 {
00035   QCString b="--"+b_oundary, part;
00036   int pos1=0, pos2=0, blen=b.length();
00037 
00038   p_arts.clear();
00039 
00040   //find the first valid boundary
00041   while(1) {
00042     if( (pos1=s_rc.find(b, pos1))==-1 || pos1==0 || s_rc[pos1-1]=='\n' ) //valid boundary found or no boundary at all
00043       break;
00044     pos1+=blen; //boundary found but not valid => skip it;
00045   }
00046 
00047   if(pos1>-1) {
00048     pos1+=blen;
00049     if(s_rc[pos1]=='-' && s_rc[pos1+1]=='-') // the only valid boundary is the end-boundary - this message is *really* broken
00050       pos1=-1; //we give up
00051     else if( (pos1-blen)>1 ) //preamble present
00052       p_reamble=s_rc.left(pos1-blen);
00053   }
00054 
00055 
00056   while(pos1>-1 && pos2>-1) {
00057 
00058     //skip the rest of the line for the first boundary - the message-part starts here
00059     if( (pos1=s_rc.find('\n', pos1))>-1 ) { //now search the next linebreak
00060       //now find the next valid boundary
00061       pos2=++pos1; //pos1 and pos2 point now to the beginning of the next line after the boundary
00062       while(1) {
00063         if( (pos2=s_rc.find(b, pos2))==-1 || s_rc[pos2-1]=='\n' ) //valid boundary or no more boundaries found
00064           break;
00065         pos2+=blen; //boundary is invalid => skip it;
00066       }
00067 
00068       if(pos2==-1) { // no more boundaries found
00069         part=s_rc.mid(pos1, s_rc.length()-pos1); //take the rest of the string
00070         p_arts.append(part);
00071         pos1=-1;
00072         pos2=-1; //break;
00073       }
00074       else {
00075         part=s_rc.mid(pos1, pos2-pos1 - 1 ); // pos2 - 1 (\n) is part of the boundary (see RFC 2046, section 5.1.1)
00076         p_arts.append(part);
00077         pos2+=blen; //pos2 points now to the first charakter after the boundary
00078         if(s_rc[pos2]=='-' && s_rc[pos2+1]=='-') { //end-boundary
00079           pos1=pos2+2; //pos1 points now to the character directly after the end-boundary
00080           if( (pos1=s_rc.find('\n', pos1))>-1 ) //skipt the rest of this line
00081             e_pilouge=s_rc.mid(pos1+1, s_rc.length()-pos1-1); //everything after the end-boundary is considered as the epilouge
00082           pos1=-1;
00083           pos2=-1; //break
00084         }
00085         else {
00086           pos1=pos2; //the search continues ...
00087         }
00088       }
00089     }
00090   }
00091 
00092   return (!p_arts.isEmpty());
00093 }
00094 
00095 //============================================================================================
00096 
00097 
00098 NonMimeParser::NonMimeParser(const QCString &src) :
00099   s_rc(src), p_artNr(-1), t_otalNr(-1)
00100 {}
00101 
00105 QCString NonMimeParser::guessMimeType(const QCString& fileName)
00106 {
00107   QCString tmp, mimeType;
00108   int pos;
00109 
00110   if(!fileName.isEmpty()) {
00111     pos=fileName.findRev('.');
00112     if(pos++ != -1) {
00113       tmp=fileName.mid(pos, fileName.length()-pos).upper();
00114       if(tmp=="JPG" || tmp=="JPEG")       mimeType="image/jpeg";
00115       else if(tmp=="GIF")                 mimeType="image/gif";
00116       else if(tmp=="PNG")                 mimeType="image/png";
00117       else if(tmp=="TIFF" || tmp=="TIF")  mimeType="image/tiff";
00118       else if(tmp=="XPM")                 mimeType="image/x-xpm";
00119       else if(tmp=="XBM")                 mimeType="image/x-xbm";
00120       else if(tmp=="BMP")                 mimeType="image/x-bmp";
00121       else if(tmp=="TXT" ||
00122               tmp=="ASC" ||
00123               tmp=="H" ||
00124               tmp=="C" ||
00125               tmp=="CC" ||
00126               tmp=="CPP")                 mimeType="text/plain";
00127       else if(tmp=="HTML" || tmp=="HTM")  mimeType="text/html";
00128       else                                mimeType="application/octet-stream";
00129     }
00130     else mimeType="application/octet-stream";
00131   }
00132   else mimeType="application/octet-stream";
00133 
00134   return mimeType;
00135 }
00136 
00137 //============================================================================================
00138 
00139 
00140 UUEncoded::UUEncoded(const QCString &src, const QCString &subject) :
00141   NonMimeParser(src), s_ubject(subject)
00142 {}
00143 
00144 
00145 bool UUEncoded::parse()
00146 {
00147   int currentPos=0;
00148   bool success=true, firstIteration=true;
00149 
00150   while (success) {
00151     int beginPos=currentPos, uuStart=currentPos, endPos=0, lineCount=0, MCount=0, pos=0, len=0;
00152     bool containsBegin=false, containsEnd=false;
00153     QCString tmp,fileName;
00154 
00155     if( (beginPos=s_rc.find(QRegExp("begin [0-9][0-9][0-9]"),currentPos))>-1 && (beginPos==0 || s_rc.at(beginPos-1)=='\n') ) {
00156       containsBegin=true;
00157       uuStart=s_rc.find('\n', beginPos);
00158       if(uuStart==-1) {//no more line breaks found, we give up
00159         success = false;
00160         break;
00161       } else
00162         uuStart++; //points now at the beginning of the next line
00163     }
00164       else beginPos=currentPos;
00165 
00166     if ( (endPos=s_rc.find("\nend",(uuStart>0)? uuStart-1:0))==-1 )
00167       endPos=s_rc.length(); //no end found
00168     else
00169       containsEnd=true;
00170 
00171     if ((containsBegin && containsEnd) || firstIteration) {
00172 
00173       //printf("beginPos=%d , uuStart=%d , endPos=%d\n", beginPos, uuStart, endPos);
00174       //all lines in a uuencoded text start with 'M'
00175       for(int idx=uuStart; idx<endPos; idx++)
00176         if(s_rc[idx]=='\n') {
00177           lineCount++;
00178           if(idx+1<endPos && s_rc[idx+1]=='M') {
00179             idx++;
00180             MCount++;
00181           }
00182         }
00183 
00184       //printf("lineCount=%d , MCount=%d\n", lineCount, MCount);
00185       if( MCount==0 || (lineCount-MCount)>10 ||
00186           ((!containsBegin || !containsEnd) && (MCount<15)) ) {  // harder check for splitted-articles
00187         success = false;
00188         break; //too many "non-M-Lines" found, we give up
00189       }
00190 
00191       if( (!containsBegin || !containsEnd) && s_ubject) {  // message may be split up => parse subject
00192     QRegExp rx("[0-9]+/[0-9]+");
00193     pos=rx.search(QString(s_ubject), 0);
00194     len=rx.matchedLength();
00195         if(pos!=-1) {
00196           tmp=s_ubject.mid(pos, len);
00197           pos=tmp.find('/');
00198           p_artNr=tmp.left(pos).toInt();
00199           t_otalNr=tmp.right(tmp.length()-pos-1).toInt();
00200         } else {
00201           success = false;
00202           break; //no "part-numbers" found in the subject, we give up
00203         }
00204       }
00205 
00206       //everything before "begin" is text
00207       if(beginPos>0)
00208         t_ext.append(s_rc.mid(currentPos,beginPos-currentPos));
00209 
00210       if(containsBegin)
00211         fileName = s_rc.mid(beginPos+10, uuStart-beginPos-11); //everything between "begin ### " and the next LF is considered as the filename
00212       else
00213         fileName = "";
00214       f_ilenames.append(fileName);
00215       b_ins.append(s_rc.mid(uuStart, endPos-uuStart+1)); //everything beetween "begin" and "end" is uuencoded     
00216       m_imeTypes.append(guessMimeType(fileName));
00217       firstIteration=false;
00218 
00219       int next = s_rc.find('\n', endPos+1);
00220       if(next==-1) { //no more line breaks found, we give up
00221         success = false;
00222         break;
00223       } else
00224         next++; //points now at the beginning of the next line
00225       currentPos = next;
00226 
00227     } else {
00228       success = false;
00229     }
00230   }
00231 
00232   // append trailing text part of the article
00233   t_ext.append(s_rc.right(s_rc.length()-currentPos));
00234 
00235   return ((b_ins.count()>0) || isPartial());
00236 }
00237 
00238 
00239 //============================================================================================
00240 
00241 
00242 YENCEncoded::YENCEncoded(const QCString &src) :
00243   NonMimeParser(src)
00244 {}
00245 
00246 
00247 bool YENCEncoded::yencMeta(QCString& src, const QCString& name, int* value)
00248 {
00249   bool found = false;
00250   QCString sought=name + "=";
00251 
00252   int iPos=src.find( sought);
00253   if (iPos>-1) {
00254     int pos1=src.find(' ', iPos);
00255     int pos2=src.find('\r', iPos);
00256     int pos3=src.find('\t', iPos);
00257     int pos4=src.find('\n', iPos);
00258     if (pos2>=0 && (pos1<0 || pos1>pos2))
00259       pos1=pos2;
00260     if (pos3>=0 && (pos1<0 || pos1>pos3))
00261       pos1=pos3;
00262     if (pos4>=0 && (pos1<0 || pos1>pos4))
00263       pos1=pos4;
00264     iPos=src.findRev( '=', pos1)+1;
00265     if (iPos<pos1) {
00266       char c=src.at( iPos);
00267       if ( c>='0' && c<='9') {
00268         found=true;
00269         *value=src.mid( iPos, pos1-iPos).toInt();
00270       }
00271     }
00272   }
00273   return found;
00274 }
00275 
00276 
00277 bool YENCEncoded::parse()
00278 {
00279   int currentPos=0;
00280   bool success=true;
00281 
00282   while (success) {
00283     int beginPos=currentPos, yencStart=currentPos;
00284     bool containsPart=false;
00285     QCString fileName,mimeType;
00286 
00287     if ((beginPos=s_rc.find("=ybegin ", currentPos))>-1 && ( beginPos==0 || s_rc.at( beginPos-1)=='\n') ) {
00288       yencStart=s_rc.find( '\n', beginPos);
00289       if (yencStart==-1) { // no more line breaks found, give up
00290         success = false;
00291         break;
00292       } else {
00293         yencStart++;
00294         if (s_rc.find("=ypart", yencStart)==yencStart) {
00295           containsPart=true;
00296           yencStart=s_rc.find( '\n', yencStart);
00297           if ( yencStart== -1) {
00298             success=false;
00299             break;
00300           }
00301           yencStart++;
00302         }
00303       }
00304       // Try to identify yenc meta data
00305 
00306       // Filenames can contain any embedded chars until end of line
00307       QCString meta=s_rc.mid(beginPos, yencStart-beginPos);
00308       int namePos=meta.find("name=");
00309       if (namePos== -1) {
00310         success=false;
00311         break;
00312       }
00313       int eolPos=meta.find('\r', namePos);
00314       if (eolPos== -1)
00315       eolPos=meta.find('\n', namePos);    
00316       if (eolPos== -1) {
00317         success=false;
00318         break;
00319       }
00320       fileName=meta.mid(namePos+5, eolPos-(namePos+5));
00321 
00322       // Other metadata is integer
00323       int yencLine;
00324       if (!yencMeta(meta, "line", &yencLine)) {
00325         success=false;
00326         break;
00327       }
00328       int yencSize;
00329       if (!yencMeta( meta, "size", &yencSize)) {
00330         success=false;
00331         break;
00332       }
00333 
00334       int partBegin, partEnd;
00335       if (containsPart) {
00336         if (!yencMeta(meta, "part", &p_artNr)) {
00337           success=false;
00338           break;
00339         }
00340         if (!yencMeta(meta, "begin", &partBegin) || !
00341              yencMeta(meta, "end", &partEnd)) {
00342           success=false;
00343           break;
00344         }
00345         if (!yencMeta(meta, "total", &t_otalNr))
00346           t_otalNr=p_artNr+1;
00347         if (yencSize==partEnd-partBegin+1)
00348           t_otalNr=1; else
00349         yencSize=partEnd-partBegin+1;
00350       }
00351 
00352       // We have a valid yenc header; now we extract the binary data
00353       int totalSize=0;
00354       int pos=yencStart;
00355       int len=s_rc.length();
00356       bool lineStart=true;
00357       int lineLength=0;
00358       bool containsEnd=false;
00359       QByteArray binary = QByteArray(yencSize);
00360       while (pos<len) {
00361         int ch=s_rc.at(pos);
00362         if (ch<0)
00363           ch+=256;
00364         if (ch=='\r')
00365         {
00366           if (lineLength!=yencLine && totalSize!=yencSize)          
00367             break;          
00368           pos++;
00369         }
00370         else if (ch=='\n')
00371         {
00372           lineStart=true;
00373           lineLength=0;
00374           pos++;
00375         }
00376         else
00377         {
00378           if (ch=='=')
00379           {
00380             if (pos+1<len)
00381             {
00382               ch=s_rc.at( pos+1);
00383               if (lineStart && ch=='y')
00384               {
00385                 containsEnd=true;
00386                 break;
00387               }
00388               pos+=2;
00389               ch-=64+42;
00390               if (ch<0)
00391                 ch+=256;
00392               if (totalSize>=yencSize)            
00393                 break;            
00394               binary.at(totalSize++)=ch;
00395               lineLength++;
00396             }
00397             else            
00398               break;            
00399           }
00400           else
00401           {
00402             ch-=42;
00403             if (ch<0)
00404               ch+=256;
00405             if (totalSize>=yencSize)            
00406               break;
00407             binary.at(totalSize++)=ch;
00408             lineLength++;
00409             pos++;
00410           }
00411           lineStart=false;
00412         }
00413       }
00414       
00415       if (!containsEnd)
00416       {
00417         success=false;
00418         break;
00419       }
00420       if (totalSize!=yencSize)
00421       {        
00422         success=false;
00423         break;
00424       }
00425 
00426       // pos now points to =yend; get end data
00427       eolPos=s_rc.find('\n', pos);
00428       if (eolPos== -1)
00429       {
00430         success=false;
00431         break;
00432       }
00433       meta=s_rc.mid(pos, eolPos-pos);
00434       if (!yencMeta(meta, "size", &totalSize))
00435       {        
00436         success=false;
00437         break;
00438       }
00439       if (totalSize!=yencSize)
00440       {        
00441         success=false;
00442         break;
00443       }
00444 
00445       f_ilenames.append(fileName);
00446       m_imeTypes.append(guessMimeType( fileName));
00447       b_ins.append(binary);
00448 
00449       //everything before "begin" is text
00450       if(beginPos>0)
00451         t_ext.append(s_rc.mid(currentPos,beginPos-currentPos));
00452       currentPos = eolPos+1;
00453 
00454     } else {
00455       success = false;
00456     }
00457   }
00458 
00459   // append trailing text part of the article
00460   t_ext.append(s_rc.right(s_rc.length()-currentPos));
00461 
00462   return b_ins.count()>0;
00463 }
00464 
00465 } // namespace Parser
00466 } // namespace KMime
KDE Home | KDE Accessibility Home | Description of Access Keys