akregator/src/librss
tools_p.cpp00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include "tools_p.h"
00012
00013 #include <krfcdate.h>
00014 #include <qdom.h>
00015 #include <kcharsets.h>
00016 #include <qregexp.h>
00017
00018 namespace RSS {
00019
00020 time_t parseISO8601Date(const QString &s)
00021 {
00022
00023 if (s.stripWhiteSpace().left(4).toInt() < 1000)
00024 return 0;
00025
00026
00027 if (s.find('T') != -1)
00028 return KRFCDate::parseDateISO8601(s);
00029 else
00030 return KRFCDate::parseDateISO8601(s + "T12:00:00");
00031 }
00032
00033 QString childNodesAsXML(const QDomNode& parent)
00034 {
00035 QDomNodeList list = parent.childNodes();
00036 QString str;
00037 QTextStream ts( &str, IO_WriteOnly );
00038 for (uint i = 0; i < list.count(); ++i)
00039 ts << list.item(i);
00040 return str.stripWhiteSpace();
00041 }
00042
00043 static QString plainTextToHtml(const QString& plainText)
00044 {
00045 QString str(plainText);
00046 str.replace("&", "&");
00047 str.replace("\"", """);
00048 str.replace("<", "<");
00049
00050 str.replace("\n", "<br/>");
00051 return str;
00052 }
00053
00054 enum ContentFormat { Text, HTML, XML, Binary };
00055
00056 static ContentFormat mapTypeToFormat(const QString& modep, const QString& typep, const QString& src)
00057 {
00058 QString mode = modep.isNull() ? "escaped" : modep;
00059 QString type = typep;
00060
00061
00062
00063
00064 if (type.isNull() && src.isEmpty())
00065 type = QString::fromUtf8("text");
00066
00067 if (type == QString::fromUtf8("html")
00068 || type == QString::fromUtf8("text/html"))
00069 return HTML;
00070
00071 if (type == QString::fromUtf8("text")
00072 || (type.startsWith(QString::fromUtf8("text/"), false)
00073 && !type.startsWith(QString::fromUtf8("text/xml"), false))
00074 )
00075 return Text;
00076
00077 QStringList xmltypes;
00078 xmltypes.append(QString::fromUtf8("xhtml"));
00079
00080 xmltypes.append(QString::fromUtf8("text/xml"));
00081 xmltypes.append(QString::fromUtf8("application/xml"));
00082 xmltypes.append(QString::fromUtf8("text/xml-external-parsed-entity"));
00083 xmltypes.append(QString::fromUtf8("application/xml-external-parsed-entity"));
00084 xmltypes.append(QString::fromUtf8("application/xml-dtd"));
00085
00086
00087 if (xmltypes.contains(type)
00088 || type.endsWith(QString::fromUtf8("+xml"), false)
00089 || type.endsWith(QString::fromUtf8("/xml"), false))
00090 return XML;
00091
00092 return Binary;
00093 }
00094
00095 static QString extractAtomContent(const QDomElement& e)
00096 {
00097 ContentFormat format = mapTypeToFormat(e.attribute("mode"),
00098 e.attribute("type"),
00099 e.attribute("src"));
00100
00101 switch (format)
00102 {
00103 case HTML:
00104 return KCharsets::resolveEntities(e.text().simplifyWhiteSpace());
00105 case Text:
00106 return plainTextToHtml(e.text().stripWhiteSpace());
00107 case XML:
00108 return childNodesAsXML(e).simplifyWhiteSpace();
00109 case Binary:
00110 default:
00111 return QString();
00112 }
00113
00114 return QString();
00115 }
00116
00117 QString extractNode(const QDomNode &parent, const QString &elemName, bool isInlined)
00118 {
00119 QDomNode node = parent.namedItem(elemName);
00120 if (node.isNull())
00121 return QString::null;
00122
00123 QDomElement e = node.toElement();
00124 QString result = e.text().stripWhiteSpace();
00125
00126 if (elemName == "content")
00127 {
00128 result = extractAtomContent(e);
00129 }
00130 else
00131 {
00132 bool hasPre = result.contains("<pre>",false);
00133 bool hasHtml = hasPre || result.contains("<");
00134 if(!isInlined && !hasHtml)
00135 result = result = result.replace(QChar('\n'), "<br />");
00136 if(!hasPre)
00137 result = result.simplifyWhiteSpace();
00138 }
00139
00140 return result.isEmpty() ? QString::null : result;
00141 }
00142
00143 QString extractTitle(const QDomNode & parent)
00144 {
00145 QDomNode node = parent.namedItem(QString::fromLatin1("title"));
00146 if (node.isNull())
00147 return QString::null;
00148
00149 QString result = node.toElement().text();
00150
00151 result = KCharsets::resolveEntities(KCharsets::resolveEntities(result).replace(QRegExp("<[^>]*>"), "").remove("\\"));
00152 result = result.simplifyWhiteSpace();
00153
00154 if (result.isEmpty())
00155 return QString::null;
00156
00157 return result;
00158 }
00159
00160 static void authorFromString(const QString& strp, QString& name, QString& email)
00161 {
00162 QString str = strp.stripWhiteSpace();
00163 if (str.isEmpty())
00164 return;
00165
00166
00167
00168
00169 QRegExp remail("<?([^@\\s<]+@[^>\\s]+)>?");
00170
00171
00172 int pos = remail.search(str);
00173 if (pos != -1)
00174 {
00175 QString all = remail.cap(0);
00176 email = remail.cap(1);
00177 str.replace(all, "");
00178 }
00179
00180
00181
00182 name = str.simplifyWhiteSpace();
00183
00184
00185
00186 QRegExp rename("\\(([^\\)]*)\\)");
00187
00188 pos = rename.search(name);
00189
00190 if (pos != -1)
00191 {
00192 name = rename.cap(1);
00193 }
00194
00195 name = name.isEmpty() ? QString() : name;
00196 email = email.isEmpty() ? QString() : email;
00197 }
00198
00199 QString parseItemAuthor(const QDomElement& element, Format format, Version version)
00200 {
00201 QString name;
00202 QString email;
00203
00204 QDomElement dcCreator = element.namedItem("dc:creator").toElement();
00205
00206 if (!dcCreator.isNull())
00207 authorFromString(dcCreator.text(), name, email);
00208 else if (format == AtomFeed)
00209 {
00210 QDomElement atomAuthor = element.namedItem("author").toElement();
00211 if (atomAuthor.isNull())
00212 atomAuthor = element.namedItem("atom:author").toElement();
00213 if (!atomAuthor.isNull())
00214 {
00215 QDomElement atomName = atomAuthor.namedItem("name").toElement();
00216 if (atomName.isNull())
00217 atomName = atomAuthor.namedItem("atom:name").toElement();
00218 name = atomName.text().stripWhiteSpace();
00219
00220 QDomElement atomEmail = atomAuthor.namedItem("email").toElement();
00221 if (atomEmail.isNull())
00222 atomEmail = atomAuthor.namedItem("atom:email").toElement();
00223 email = atomEmail.text().stripWhiteSpace();
00224 }
00225 }
00226 else if (format == RSSFeed)
00227 {
00228 authorFromString(element.namedItem("author").toElement().text(), name, email);
00229 }
00230
00231 if (name.isNull())
00232 name = email;
00233
00234 if (!email.isNull())
00235 return QString("<a href=\"mailto:%1\">%2</a>").arg(email).arg(name);
00236 else
00237 return name;
00238 }
00239
00240 }
00241
00242
|