akregator/src/librss

loader.cpp

00001 /*
00002  * loader.cpp
00003  *
00004  * Copyright (c) 2001, 2002, 2003 Frerich Raabe <raabe@kde.org>
00005  *
00006  * This program is distributed in the hope that it will be useful, but WITHOUT
00007  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00008  * FOR A PARTICULAR PURPOSE. For licensing and distribution details, check the
00009  * accompanying file 'COPYING'.
00010  */
00011 #include "loader.h"
00012 #include "document.h"
00013 #include "feeddetector.h"
00014 
00015 #include <kio/job.h>
00016 #include <kprocess.h>
00017 #include <kstaticdeleter.h>
00018 #include <kurl.h>
00019 #include <kdebug.h>
00020 
00021 #include <qdom.h>
00022 #include <qbuffer.h>
00023 #include <qregexp.h>
00024 #include <qstring.h>
00025 #include <qstringlist.h>
00026 #include <qtimer.h>
00027 
00028 using namespace RSS;
00029 
00030 DataRetriever::DataRetriever()
00031 {
00032 }
00033 
00034 DataRetriever::~DataRetriever()
00035 {
00036 }
00037 
00038 class FileRetriever::Private
00039 {
00040     public:
00041         
00042    Private()
00043       : buffer(NULL),
00044         lastError(0), job(NULL)
00045    {
00046    }
00047 
00048    ~Private()
00049    {
00050       delete buffer;
00051    }
00052 
00053    QBuffer *buffer;
00054    int lastError;
00055    KIO::Job *job;
00056    static KStaticDeleter<QString> userAgentsd;
00057    static QString* userAgent;
00058 };
00059 
00060 KStaticDeleter<QString> FileRetriever::Private::userAgentsd;
00061 QString* FileRetriever::Private::userAgent = 0L;
00062 FileRetriever::FileRetriever()
00063    : d(new Private)
00064 {
00065 }
00066 
00067 FileRetriever::~FileRetriever()
00068 {
00069    delete d;
00070 }
00071 
00072 bool FileRetriever::m_useCache = true;
00073 
00074 QString FileRetriever::userAgent()
00075 {
00076     if (Private::userAgent == 0L)
00077         FileRetriever::Private::userAgentsd.setObject(Private::userAgent, new QString);
00078     return *Private::userAgent;
00079 }
00080 
00081 void FileRetriever::setUserAgent(const QString &ua)
00082 {
00083     if (Private::userAgent == 0L)
00084         FileRetriever::Private::userAgentsd.setObject(Private::userAgent, new QString);
00085     (*Private::userAgent) = ua;
00086 }
00087 
00088 void FileRetriever::setUseCache(bool enabled)
00089 {
00090     m_useCache = enabled;
00091 }
00092 
00093 void FileRetriever::retrieveData(const KURL &url)
00094 {
00095    if (d->buffer)
00096       return;
00097 
00098    d->buffer = new QBuffer;
00099    d->buffer->open(IO_WriteOnly);
00100 
00101    KURL u=url;
00102 
00103    if (u.protocol()=="feed")
00104        u.setProtocol("http");
00105 
00106    d->job = KIO::get(u, false, false);
00107    d->job->addMetaData("cache", m_useCache ? "refresh" : "reload");
00108 
00109    QString ua = userAgent();
00110    if (!ua.isEmpty())
00111       d->job->addMetaData("UserAgent", ua);
00112 
00113 
00114    QTimer::singleShot(1000*90, this, SLOT(slotTimeout()));
00115 
00116    connect(d->job, SIGNAL(data(KIO::Job *, const QByteArray &)),
00117                 SLOT(slotData(KIO::Job *, const QByteArray &)));
00118    connect(d->job, SIGNAL(result(KIO::Job *)), SLOT(slotResult(KIO::Job *)));
00119    connect(d->job, SIGNAL(permanentRedirection(KIO::Job *, const KURL &, const KURL &)),
00120                 SLOT(slotPermanentRedirection(KIO::Job *, const KURL &, const KURL &)));
00121 }
00122 
00123 void FileRetriever::slotTimeout()
00124 {
00125     abort();
00126 
00127     delete d->buffer;
00128     d->buffer = NULL;
00129 
00130     d->lastError = KIO::ERR_SERVER_TIMEOUT;
00131 
00132     emit dataRetrieved(QByteArray(), false);
00133 }
00134 
00135 int FileRetriever::errorCode() const
00136 {
00137    return d->lastError;
00138 }
00139 
00140 void FileRetriever::slotData(KIO::Job *, const QByteArray &data)
00141 {
00142    d->buffer->writeBlock(data.data(), data.size());
00143 }
00144 
00145 void FileRetriever::slotResult(KIO::Job *job)
00146 {
00147    QByteArray data = d->buffer->buffer();
00148    data.detach();
00149 
00150    delete d->buffer;
00151    d->buffer = NULL;
00152 
00153    d->lastError = job->error();
00154    emit dataRetrieved(data, d->lastError == 0);
00155 }
00156 
00157 void FileRetriever::slotPermanentRedirection(KIO::Job *, const KURL &, const KURL &newUrl)
00158 {
00159    emit permanentRedirection(newUrl);
00160 }
00161 
00162 void FileRetriever::abort()
00163 {
00164     if (d->job)
00165     {
00166         d->job->kill(true);
00167         d->job = NULL;
00168     }
00169 }
00170 
00171 struct OutputRetriever::Private
00172 {
00173    Private() : process(NULL),
00174       buffer(NULL),
00175       lastError(0)
00176    {
00177    }
00178 
00179    ~Private()
00180    {
00181       delete process;
00182       delete buffer;
00183    }
00184 
00185    KShellProcess *process;
00186    QBuffer *buffer;
00187    int lastError;
00188 };
00189 
00190 OutputRetriever::OutputRetriever() :
00191    d(new Private)
00192 {
00193 }
00194 
00195 OutputRetriever::~OutputRetriever()
00196 {
00197    delete d;
00198 }
00199 
00200 void OutputRetriever::retrieveData(const KURL &url)
00201 {
00202    // Ignore subsequent calls if we didn't finish the previous job yet.
00203    if (d->buffer || d->process)
00204       return;
00205 
00206    d->buffer = new QBuffer;
00207    d->buffer->open(IO_WriteOnly);
00208 
00209    d->process = new KShellProcess();
00210    connect(d->process, SIGNAL(processExited(KProcess *)),
00211                        SLOT(slotExited(KProcess *)));
00212    connect(d->process, SIGNAL(receivedStdout(KProcess *, char *, int)),
00213                        SLOT(slotOutput(KProcess *, char *, int)));
00214    *d->process << url.path();
00215    d->process->start(KProcess::NotifyOnExit, KProcess::Stdout);
00216 }
00217 
00218 int OutputRetriever::errorCode() const
00219 {
00220    return d->lastError;
00221 }
00222 
00223 void OutputRetriever::slotOutput(KProcess *, char *data, int length)
00224 {
00225    d->buffer->writeBlock(data, length);
00226 }
00227 
00228 void OutputRetriever::slotExited(KProcess *p)
00229 {
00230    if (!p->normalExit())
00231       d->lastError = p->exitStatus();
00232 
00233    QByteArray data = d->buffer->buffer();
00234    data.detach();
00235 
00236    delete d->buffer;
00237    d->buffer = NULL;
00238 
00239    delete d->process;
00240    d->process = NULL;
00241 
00242    emit dataRetrieved(data, p->normalExit() && p->exitStatus() == 0);
00243 }
00244 
00245 struct Loader::Private
00246 {
00247    Private() : retriever(NULL),
00248       lastError(0)
00249    {
00250    }
00251 
00252    ~Private()
00253    {
00254       delete retriever;
00255    }
00256 
00257    DataRetriever *retriever;
00258    int lastError;
00259    KURL discoveredFeedURL;
00260    KURL url;
00261 };
00262 
00263 Loader *Loader::create()
00264 {
00265    return new Loader;
00266 }
00267 
00268 Loader *Loader::create(QObject *object, const char *slot)
00269 {
00270    Loader *loader = create();
00271    connect(loader, SIGNAL(loadingComplete(Loader *, Document, Status)),
00272            object, slot);
00273    return loader;
00274 }
00275 
00276 Loader::Loader() : d(new Private)
00277 {
00278 }
00279 
00280 Loader::~Loader()
00281 {
00282     delete d;
00283 }
00284 
00285 void Loader::loadFrom(const KURL &url, DataRetriever *retriever)
00286 {
00287    if (d->retriever != NULL)
00288       return;
00289 
00290    d->url=url;
00291    d->retriever = retriever;
00292 
00293    connect(d->retriever, SIGNAL(dataRetrieved(const QByteArray &, bool)),
00294            this, SLOT(slotRetrieverDone(const QByteArray &, bool)));
00295 
00296    d->retriever->retrieveData(url);
00297 }
00298 
00299 int Loader::errorCode() const
00300 {
00301    return d->lastError;
00302 }
00303 
00304 void Loader::abort()
00305 {
00306     if (d && d->retriever)
00307     {
00308         d->retriever->abort();
00309         delete d->retriever;
00310         d->retriever=NULL;
00311     }
00312     emit loadingComplete(this, QDomDocument(), Aborted);
00313     delete this;
00314 }
00315 
00316 const KURL &Loader::discoveredFeedURL() const
00317 {
00318    return d->discoveredFeedURL;
00319 }
00320 
00321 void Loader::slotRetrieverDone(const QByteArray &data, bool success)
00322 {
00323    d->lastError = d->retriever->errorCode();
00324 
00325    delete d->retriever;
00326    d->retriever = NULL;
00327 
00328    Document rssDoc;
00329    Status status = Success;
00330 
00331    if (success) {
00332       QDomDocument doc;
00333 
00334       /* Some servers insert whitespace before the <?xml...?> declaration.
00335        * QDom doesn't tolerate that (and it's right, that's invalid XML),
00336        * so we strip that.
00337        */
00338 
00339       const char *charData = data.data();
00340       int len = data.count();
00341 
00342       while (len && QChar(*charData).isSpace()) {
00343          --len;
00344          ++charData;
00345       }
00346 
00347       if ( len > 3 && QChar(*charData) == QChar(0357) ) { // 0357 0273 0277
00348               len -= 3;
00349               charData += 3;
00350       }
00351       QByteArray tmpData;
00352       tmpData.setRawData(charData, len);
00353 
00354       if (doc.setContent(tmpData))
00355       {
00356          rssDoc = Document(doc);
00357          if (!rssDoc.isValid())
00358          {
00359             discoverFeeds(tmpData);
00360             status = ParseError;
00361          }
00362       }
00363       else
00364       {
00365          discoverFeeds(tmpData);
00366          status = ParseError;
00367       }
00368 
00369       tmpData.resetRawData(charData, len);
00370    } else
00371       status = RetrieveError;
00372 
00373    emit loadingComplete(this, rssDoc, status);
00374 
00375    delete this;
00376 }
00377 
00378 void Loader::discoverFeeds(const QByteArray &data)
00379 {
00380     QString str = QString(data).simplifyWhiteSpace();
00381     
00382     QStringList feeds; 
00383     
00384     FeedDetectorEntryList list = FeedDetector::extractFromLinkTags(str); 
00385     
00386     for (FeedDetectorEntryList::ConstIterator it = list.begin(); it != list.end(); ++it)
00387     {
00388         feeds += (*it).url();
00389     }  
00390     
00391     if (list.isEmpty())
00392         feeds = FeedDetector::extractBruteForce(str);
00393         
00394     QString feed = feeds.first();
00395     QString host = d->url.host();
00396     KURL testURL;
00397     // loop through, prefer feeds on same host
00398     QStringList::Iterator end( feeds.end() );
00399     for ( QStringList::Iterator it = feeds.begin(); it != end; ++it) 
00400     {
00401         testURL=*it;
00402         if (testURL.host() == host)
00403         {
00404             feed = *it;
00405             break;
00406         }
00407     }
00408 
00409     d->discoveredFeedURL = feed.isNull() ? QString() : FeedDetector::fixRelativeURL(feed, d->url); 
00410 }
00411 
00412 #include "loader.moc"
00413 // vim:noet:ts=4
KDE Home | KDE Accessibility Home | Description of Access Keys