00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #include "kmime_charfreq.h"
00016
00017 namespace KMime {
00018
00019 CharFreq::CharFreq( const QByteArray & buf )
00020 : NUL(0),
00021 CTL(0),
00022 CR(0), LF(0),
00023 CRLF(0),
00024 printable(0),
00025 eightBit(0),
00026 total(0),
00027 lineMin(0xffffffff),
00028 lineMax(0),
00029 mTrailingWS(false),
00030 mLeadingFrom(false)
00031 {
00032 if ( !buf.isEmpty() )
00033 count( buf.data(), buf.size() );
00034 }
00035
00036 CharFreq::CharFreq( const char * buf, size_t len )
00037 : NUL(0),
00038 CTL(0),
00039 CR(0), LF(0),
00040 CRLF(0),
00041 printable(0),
00042 eightBit(0),
00043 total(0),
00044 lineMin(0xffffffff),
00045 lineMax(0),
00046 mTrailingWS(false),
00047 mLeadingFrom(false)
00048 {
00049 if ( buf && len > 0 )
00050 count( buf, len );
00051 }
00052
00053 static inline bool isWS( char ch ) { return ( ch == '\t' || ch == ' ' ); }
00054
00055 void CharFreq::count( const char * it, size_t len ) {
00056
00057 const char * end = it + len;
00058 uint currentLineLength = 0;
00059
00060
00061 char prevChar = '\n';
00062 char prevPrevChar = 0;
00063
00064 for ( ; it != end ; ++it ) {
00065 ++currentLineLength;
00066 switch ( *it ) {
00067 case '\0': ++NUL; break;
00068 case '\r': ++CR; break;
00069 case '\n': ++LF;
00070 if ( prevChar == '\r' ) { --currentLineLength; ++CRLF; }
00071 if ( currentLineLength >= lineMax ) lineMax = currentLineLength-1;
00072 if ( currentLineLength <= lineMin ) lineMin = currentLineLength-1;
00073 if ( !mTrailingWS )
00074 if ( isWS( prevChar ) || ( prevChar == '\r' && isWS( prevPrevChar ) ) )
00075 mTrailingWS = true;
00076 currentLineLength = 0;
00077 break;
00078 case 'F':
00079 if ( !mLeadingFrom )
00080 if ( prevChar == '\n' && end - it >= 5 && !qstrncmp( "From ", it, 5 ) )
00081 mLeadingFrom = true;
00082 ++printable;
00083 break;
00084 default:
00085 {
00086 uchar c = *it;
00087 if ( c == '\t' || c >= ' ' && c <= '~' )
00088 ++printable;
00089 else if ( c == 127 || c < ' ' )
00090 ++CTL;
00091 else
00092 ++eightBit;
00093 }
00094 }
00095 prevPrevChar = prevChar;
00096 prevChar = *it;
00097 }
00098
00099
00100 if ( currentLineLength >= lineMax ) lineMax = currentLineLength;
00101 if ( currentLineLength <= lineMin ) lineMin = currentLineLength;
00102
00103
00104 if ( isWS( prevChar ) )
00105 mTrailingWS = true;
00106
00107 total = len;
00108 }
00109
00110 bool CharFreq::isEightBitData() const {
00111 return type() == EightBitData;
00112 }
00113
00114 bool CharFreq::isEightBitText() const {
00115 return type() == EightBitText;
00116 }
00117
00118 bool CharFreq::isSevenBitData() const {
00119 return type() == SevenBitData;
00120 }
00121
00122 bool CharFreq::isSevenBitText() const {
00123 return type() == SevenBitText;
00124 }
00125
00126 bool CharFreq::hasTrailingWhitespace() const {
00127 return mTrailingWS;
00128 }
00129
00130 bool CharFreq::hasLeadingFrom() const {
00131 return mLeadingFrom;
00132 }
00133
00134 CharFreq::Type CharFreq::type() const {
00135 #if 0
00136 qDebug( "Total: %d; NUL: %d; CTL: %d;\n"
00137 "CR: %d; LF: %d; CRLF: %d;\n"
00138 "lineMin: %d; lineMax: %d;\n"
00139 "printable: %d; eightBit: %d;\n"
00140 "trailing whitespace: %s;\n"
00141 "leading 'From ': %s;\n",
00142 total, NUL, CTL, CR, LF, CRLF, lineMin, lineMax,
00143 printable, eightBit,
00144 mTrailingWS ? "yes" : "no" , mLeadingFrom ? "yes" : "no" );
00145 #endif
00146 if ( NUL )
00147 return Binary;
00148
00149
00150 if ( eightBit ) {
00151 if ( lineMax > 988 ) return EightBitData;
00152 if ( CR != CRLF || controlCodesRatio() > 0.2 ) return EightBitData;
00153 return EightBitText;
00154 }
00155
00156
00157 if ( lineMax > 988 ) return SevenBitData;
00158 if ( CR != CRLF || controlCodesRatio() > 0.2 ) return SevenBitData;
00159
00160
00161 return SevenBitText;
00162 }
00163
00164 float CharFreq::printableRatio() const {
00165 if ( total ) return float(printable) / float(total);
00166 else return 0;
00167 }
00168
00169 float CharFreq::controlCodesRatio() const {
00170 if ( total ) return float(CTL) / float(total);
00171 else return 0;
00172 }
00173
00174 }
00175
00176