http://xml.apache.org/http://www.apache.org/http://www.w3.org/

Home

Readme
Release Info

Installation
Download
Build

FAQs
Samples
API Docs

DOM C++ Binding
Programming
Migration Guide

Feedback
Bug-Reporting
PDF Document

CVS Repository
Mail Archive

API Docs for SAX and DOM
 

Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members  

AbstractDOMParser.hpp

Go to the documentation of this file.
00001 /*
00002  * The Apache Software License, Version 1.1
00003  *
00004  * Copyright (c) 2002 The Apache Software Foundation.  All rights
00005  * reserved.
00006  *
00007  * Redistribution and use in source and binary forms, with or without
00008  * modification, are permitted provided that the following conditions
00009  * are met:
00010  *
00011  * 1. Redistributions of source code must retain the above copyright
00012  *    notice, this list of conditions and the following disclaimer.
00013  *
00014  * 2. Redistributions in binary form must reproduce the above copyright
00015  *    notice, this list of conditions and the following disclaimer in
00016  *    the documentation and/or other materials provided with the
00017  *    distribution.
00018  *
00019  * 3. The end-user documentation included with the redistribution,
00020  *    if any, must include the following acknowledgment:
00021  *       "This product includes software developed by the
00022  *        Apache Software Foundation (http://www.apache.org/)."
00023  *    Alternately, this acknowledgment may appear in the software itself,
00024  *    if and wherever such third-party acknowledgments normally appear.
00025  *
00026  * 4. The names "Xerces" and "Apache Software Foundation" must
00027  *    not be used to endorse or promote products derived from this
00028  *    software without prior written permission. For written
00029  *    permission, please contact apache\@apache.org.
00030  *
00031  * 5. Products derived from this software may not be called "Apache",
00032  *    nor may "Apache" appear in their name, without prior written
00033  *    permission of the Apache Software Foundation.
00034  *
00035  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
00036  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00037  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
00038  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
00039  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00040  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00041  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
00042  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
00043  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
00044  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
00045  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00046  * SUCH DAMAGE.
00047  * ====================================================================
00048  *
00049  * This software consists of voluntary contributions made by many
00050  * individuals on behalf of the Apache Software Foundation, and was
00051  * originally based on software copyright (c) 2001, International
00052  * Business Machines, Inc., http://www.ibm.com .  For more information
00053  * on the Apache Software Foundation, please see
00054  * <http://www.apache.org/>.
00055  */
00056 
00057 /*
00058  * $Id: AbstractDOMParser.hpp,v 1.11 2002/08/14 15:20:38 knoaman Exp $
00059  *
00060  */
00061 #if !defined(ABSTRACTDOMPARSER_HPP)
00062 #define ABSTRACTDOMPARSER_HPP
00063 
00064 
00065 #include <xercesc/dom/DOMDocument.hpp>
00066 #include <xercesc/framework/XMLDocumentHandler.hpp>
00067 #include <xercesc/framework/XMLErrorReporter.hpp>
00068 #include <xercesc/framework/XMLEntityHandler.hpp>
00069 #include <xercesc/util/ValueStackOf.hpp>
00070 #include <xercesc/validators/DTD/DocTypeHandler.hpp>
00071 #include <xercesc/dom/DOMDocumentType.hpp>
00072 #include <xercesc/validators/DTD/DTDElementDecl.hpp>
00073 #include <xercesc/framework/XMLBufferMgr.hpp>
00074 
00075 class XMLPScanToken;
00076 class XMLScanner;
00077 class XMLValidator;
00078 class DOMDocumentImpl;
00079 class DOMDocumentTypeImpl;
00080 class DOMElement;
00081 
00082 
00087 class  AbstractDOMParser :
00088 
00089     public XMLDocumentHandler
00090     , public XMLErrorReporter
00091     , public XMLEntityHandler
00092     , public DocTypeHandler
00093 {
00094 public :
00095     // -----------------------------------------------------------------------
00096     //  Class types
00097     // -----------------------------------------------------------------------
00100 
00108     enum ValSchemes
00109     {
00110         Val_Never
00111         , Val_Always
00112         , Val_Auto
00113     };
00115 
00116 
00117     // -----------------------------------------------------------------------
00118     //  Constructors and Detructor
00119     // -----------------------------------------------------------------------
00122 
00126     virtual ~AbstractDOMParser();
00127 
00129 
00130     // -----------------------------------------------------------------------
00131     //  Utility methods
00132     // -----------------------------------------------------------------------
00133 
00136 
00141     void reset();
00142 
00156     DOMDocument* adoptDocument();
00157 
00159 
00160 
00161     // -----------------------------------------------------------------------
00162     //  Getter methods
00163     // -----------------------------------------------------------------------
00164 
00167 
00179     DOMDocument* getDocument();
00180 
00188     const XMLValidator& getValidator() const;
00189 
00197     ValSchemes getValidationScheme() const;
00198 
00209     bool getDoSchema() const;
00210 
00221     bool getValidationSchemaFullChecking() const;
00222 
00234     int getErrorCount() const;
00235 
00246     bool getDoNamespaces() const;
00247 
00260     bool getExitOnFirstFatalError() const;
00261 
00272     bool getValidationConstraintFatal() const;
00273 
00283     bool  getCreateEntityReferenceNodes()const;
00284 
00295     bool getIncludeIgnorableWhitespace() const;
00296 
00316     XMLCh* getExternalSchemaLocation() const;
00317 
00337     XMLCh* getExternalNoNamespaceSchemaLocation() const;
00338 
00350     bool getLoadExternalDTD() const;
00351 
00360     bool  getCreateCommentNodes()const;
00361 
00363 
00364 
00365     // -----------------------------------------------------------------------
00366     //  Setter methods
00367     // -----------------------------------------------------------------------
00368 
00371 
00386     void setDoNamespaces(const bool newState);
00387 
00404     void setExitOnFirstFatalError(const bool newState);
00405 
00425     void setValidationConstraintFatal(const bool newState);
00426 
00445     void setCreateEntityReferenceNodes(const bool create);
00446 
00468     void setIncludeIgnorableWhitespace(const bool include);
00469 
00486     void setValidationScheme(const ValSchemes newScheme);
00487 
00503     void setDoSchema(const bool newState);
00504 
00521     void setValidationSchemaFullChecking(const bool schemaFullChecking);
00522 
00542 
00543     void setExternalSchemaLocation(const XMLCh* const schemaLocation);
00544 
00553     void setExternalSchemaLocation(const char* const schemaLocation);
00554 
00569     void setExternalNoNamespaceSchemaLocation(const XMLCh* const noNamespaceSchemaLocation);
00570 
00579     void setExternalNoNamespaceSchemaLocation(const char* const noNamespaceSchemaLocation);
00580 
00597     void setLoadExternalDTD(const bool newState);
00598 
00609     void setCreateCommentNodes(const bool create);
00610 
00612 
00613 
00614     // -----------------------------------------------------------------------
00615     //  Parsing methods
00616     // -----------------------------------------------------------------------
00617 
00620 
00636     void parse(const InputSource& source);
00637 
00654     void parse(const XMLCh* const systemId);
00655 
00671     void parse(const char* const systemId);
00672 
00699     bool parseFirst
00700     (
00701         const   XMLCh* const    systemId
00702         ,       XMLPScanToken&  toFill
00703     );
00704 
00732     bool parseFirst
00733     (
00734         const   char* const     systemId
00735         ,       XMLPScanToken&  toFill
00736     );
00737 
00765     bool parseFirst
00766     (
00767         const   InputSource&    source
00768         ,       XMLPScanToken&  toFill
00769     );
00770 
00793     bool parseNext(XMLPScanToken& token);
00794 
00820     void parseReset(XMLPScanToken& token);
00821 
00823 
00824     // -----------------------------------------------------------------------
00825     //  Implementation of the XMLDocumentHandler interface.
00826     // -----------------------------------------------------------------------
00827 
00830 
00843     virtual void docCharacters
00844     (
00845         const   XMLCh* const    chars
00846         , const unsigned int    length
00847         , const bool            cdataSection
00848     );
00849 
00858     virtual void docComment
00859     (
00860         const   XMLCh* const    comment
00861     );
00862 
00875     virtual void docPI
00876     (
00877         const   XMLCh* const    target
00878         , const XMLCh* const    data
00879     );
00880 
00885     virtual void endDocument();
00886 
00903     virtual void endElement
00904     (
00905         const   XMLElementDecl& elemDecl
00906         , const unsigned int    urlId
00907         , const bool            isRoot
00908         , const XMLCh* const    elemPrefix
00909     );
00910 
00919     virtual void endEntityReference
00920     (
00921         const   XMLEntityDecl&  entDecl
00922     );
00923 
00942     virtual void ignorableWhitespace
00943     (
00944         const   XMLCh* const    chars
00945         , const unsigned int    length
00946         , const bool            cdataSection
00947     );
00948 
00955     virtual void resetDocument();
00956 
00961     virtual void startDocument();
00962 
00990     virtual void startElement
00991     (
00992         const   XMLElementDecl&         elemDecl
00993         , const unsigned int            urlId
00994         , const XMLCh* const            elemPrefix
00995         , const RefVectorOf<XMLAttr>&   attrList
00996         , const unsigned int            attrCount
00997         , const bool                    isEmpty
00998         , const bool                    isRoot
00999     );
01000 
01010     virtual void startEntityReference
01011     (
01012         const   XMLEntityDecl&  entDecl
01013     );
01014 
01033     virtual void XMLDecl
01034     (
01035         const   XMLCh* const    versionStr
01036         , const XMLCh* const    encodingStr
01037         , const XMLCh* const    standaloneStr
01038         , const XMLCh* const    actualEncStr
01039     );
01041 
01042 
01043     // -----------------------------------------------------------------------
01044     //  Implementation of the deprecated DocTypeHandler interface.
01045     // -----------------------------------------------------------------------
01048     virtual void attDef
01049     (
01050         const   DTDElementDecl&     elemDecl
01051         , const DTDAttDef&          attDef
01052         , const bool                ignoring
01053     );
01054 
01055     virtual void doctypeComment
01056     (
01057         const   XMLCh* const    comment
01058     );
01059 
01060     virtual void doctypeDecl
01061     (
01062         const   DTDElementDecl& elemDecl
01063         , const XMLCh* const    publicId
01064         , const XMLCh* const    systemId
01065         , const bool            hasIntSubset
01066         , const bool            hasExtSubset = false
01067     );
01068 
01069     virtual void doctypePI
01070     (
01071         const   XMLCh* const    target
01072         , const XMLCh* const    data
01073     );
01074 
01075     virtual void doctypeWhitespace
01076     (
01077         const   XMLCh* const    chars
01078         , const unsigned int    length
01079     );
01080 
01081     virtual void elementDecl
01082     (
01083         const   DTDElementDecl& decl
01084         , const bool            isIgnored
01085     );
01086 
01087     virtual void endAttList
01088     (
01089         const   DTDElementDecl& elemDecl
01090     );
01091 
01092     virtual void endIntSubset();
01093 
01094     virtual void endExtSubset();
01095 
01096     virtual void entityDecl
01097     (
01098         const   DTDEntityDecl&  entityDecl
01099         , const bool            isPEDecl
01100         , const bool            isIgnored
01101     );
01102 
01103     virtual void resetDocType();
01104 
01105     virtual void notationDecl
01106     (
01107         const   XMLNotationDecl&    notDecl
01108         , const bool                isIgnored
01109     );
01110 
01111     virtual void startAttList
01112     (
01113         const   DTDElementDecl& elemDecl
01114     );
01115 
01116     virtual void startIntSubset();
01117 
01118     virtual void startExtSubset();
01119 
01120     virtual void TextDecl
01121     (
01122         const   XMLCh* const    versionStr
01123         , const XMLCh* const    encodingStr
01124     );
01125 
01126 
01128 
01129 
01130     // -----------------------------------------------------------------------
01131     //  Deprecated Methods
01132     // -----------------------------------------------------------------------
01135 
01145     bool getDoValidation() const;
01146 
01160     void setDoValidation(const bool newState);
01161 
01175     bool getExpandEntityReferences() const;
01176 
01193     void setExpandEntityReferences(const bool expand);
01194 
01196 
01197 protected :
01198     // -----------------------------------------------------------------------
01199     //  Protected Constructor Methods
01200     // -----------------------------------------------------------------------
01203     /** Construct a AbstractDOMParser, with an optional validator
01204       *
01205       * Constructor with an instance of validator class to use for
01206       * validation. If you don't provide a validator, a default one will
01207       * be created for you in the scanner.
01208       *
01209       * @param valToAdopt Pointer to the validator instance to use. The
01210       *                   parser is responsible for freeing the memory.
01211       */
01212     AbstractDOMParser(XMLValidator* const valToAdopt = 0);
01213 
01215 
01216     // -----------------------------------------------------------------------
01217     //  Protected getter methods
01218     // -----------------------------------------------------------------------
01221     /** Get the current DOM node
01222       *
01223       * This provides derived classes with access to the current node, i.e.
01224       * the node to which new nodes are being added.
01225       */
01226     DOMNode* getCurrentNode();
01227 
01232     XMLScanner* getScanner() const;
01233 
01239     bool getParseInProgress() const;
01240 
01242 
01243 
01244     // -----------------------------------------------------------------------
01245     //  Protected setter methods
01246     // -----------------------------------------------------------------------
01247 
01250 
01258     void setCurrentNode(DOMNode* toSet);
01259 
01266     void setDocument(DOMDocument* toSet);
01267 
01274     void setParseInProgress(const bool toSet);
01276 
01277     // -----------------------------------------------------------------------
01278     //  Protected Helper methods
01279     // -----------------------------------------------------------------------
01282     virtual DOMElement* createElementNSNode(const XMLCh *fNamespaceURI,
01283                                               const XMLCh *qualifiedName);
01284 
01285     void resetPool();
01287 
01288 
01289 private :
01290     // -----------------------------------------------------------------------
01291     //  Private data members
01292     //
01293     //  fCurrentNode
01294     //  fCurrentParent
01295     //      Used to track the current node during nested element events. Since
01296     //      the tree must be built from a set of disjoint callbacks, we need
01297     //      these to keep up with where we currently are.
01298     //
01299     //  fCurrentEntity
01300     //      Used to track the current entity decl.  If a text decl is seen later on,
01301     //      it is used to update the encoding and version information.
01302     //
01303     //  fDocument
01304     //      The root document object, filled with the document contents.
01305     //
01306     //  fCreateEntityReferenceNodes
01307     //      Indicates whether entity reference nodes should be created.
01308     //
01309     //  fIncludeIgnorableWhitespace
01310     //      Indicates whether ignorable whiltespace should be added to
01311     //      the DOM tree for validating parsers.
01312     //
01313     //  fScanner
01314     //      The scanner used for this parser. This is created during the
01315     //      constructor.
01316     //
01317     //  fNodeStack
01318     //      Used to track previous parent nodes during nested element events.
01319     //
01320     //  fParseInProgress
01321     //      Used to prevent multiple entrance to the parser while its doing
01322     //      a parse.
01323     //
01324     //  fWithinElement
01325     //      A flag to indicate that the parser is within at least one level
01326     //      of element processing.
01327     //
01328     //  fDocumentType
01329     //      Used to store and update the documentType variable information
01330     //      in fDocument
01331     //
01332     //  fDocumentVector
01333     //      Store all the previous fDocument(s) (thus not the current fDocument)
01334     //      created in this parser.  It is destroyed when the parser is destructed.
01335     //
01336     //  fCreateCommentNodes
01337     //      Indicates whether comment nodes should be created.
01338     //
01339     //  fDocumentAdoptedByUser
01340     //      The DOMDocument ownership has been transferred to application
01341     //      If set to true, the parser does not own the document anymore
01342     //      and thus will not release its memory.
01343     //
01344     //  fInternalSubset
01345     //      Buffer for storing the internal subset information.
01346     //      Once complete (after DOCTYPE is finished scanning), send
01347     //      it to DocumentType Node
01348     // -----------------------------------------------------------------------
01349     bool                          fCreateEntityReferenceNodes;
01350     bool                          fIncludeIgnorableWhitespace;
01351     bool                          fWithinElement;
01352     bool                          fParseInProgress;
01353     XMLBufferMgr                  fBufMgr;
01354     XMLScanner*                   fScanner;
01355     DOMNode*                      fCurrentParent;
01356     DOMNode*                      fCurrentNode;
01357     DOMEntity*                    fCurrentEntity;
01358     DOMDocumentImpl*              fDocument;
01359     ValueStackOf<DOMNode*>*       fNodeStack;
01360     DOMDocumentTypeImpl*          fDocumentType;
01361     RefVectorOf<DOMDocumentImpl>* fDocumentVector;
01362     bool                          fCreateCommentNodes;
01363     bool                          fDocumentAdoptedByUser;
01364     XMLBuffer&                    fInternalSubset;
01365 };
01366 
01367 
01368 
01369 // ---------------------------------------------------------------------------
01370 //  AbstractDOMParser: Getter methods
01371 // ---------------------------------------------------------------------------
01372 inline bool AbstractDOMParser::getExpandEntityReferences() const
01373 {
01374     return !fCreateEntityReferenceNodes;
01375 }
01376 inline bool AbstractDOMParser::getCreateEntityReferenceNodes() const
01377 {
01378     return fCreateEntityReferenceNodes;
01379 }
01380 
01381 inline bool AbstractDOMParser::getIncludeIgnorableWhitespace() const
01382 {
01383     return fIncludeIgnorableWhitespace;
01384 }
01385 
01386 inline bool AbstractDOMParser::getParseInProgress() const
01387 {
01388     return fParseInProgress;
01389 }
01390 
01391 inline XMLScanner* AbstractDOMParser::getScanner() const
01392 {
01393     return fScanner;
01394 }
01395 
01396 inline bool AbstractDOMParser::getCreateCommentNodes() const
01397 {
01398     return fCreateCommentNodes;
01399 }
01400 
01401 // ---------------------------------------------------------------------------
01402 //  AbstractDOMParser: Setter methods
01403 // ---------------------------------------------------------------------------
01404 inline void AbstractDOMParser::setExpandEntityReferences(const bool expand)
01405 {
01406     fCreateEntityReferenceNodes = !expand;
01407 }
01408 
01409 inline void AbstractDOMParser::setCreateEntityReferenceNodes(const bool create)
01410 {
01411     fCreateEntityReferenceNodes = create;
01412 }
01413 
01414 inline void AbstractDOMParser::setIncludeIgnorableWhitespace(const bool include)
01415 {
01416     fIncludeIgnorableWhitespace = include;
01417 }
01418 
01419 inline void AbstractDOMParser::setCreateCommentNodes(const bool create)
01420 {
01421     fCreateCommentNodes = create;
01422 }
01423 
01424 
01425 // ---------------------------------------------------------------------------
01426 //  AbstractDOMParser: Protected getter methods
01427 // ---------------------------------------------------------------------------
01428 inline DOMNode* AbstractDOMParser::getCurrentNode()
01429 {
01430     return fCurrentNode;
01431 }
01432 
01433 
01434 // ---------------------------------------------------------------------------
01435 //  AbstractDOMParser: Protected setter methods
01436 // ---------------------------------------------------------------------------
01437 inline void AbstractDOMParser::setCurrentNode(DOMNode* toSet)
01438 {
01439     fCurrentNode = toSet;
01440 }
01441 
01442 inline void AbstractDOMParser::setDocument(DOMDocument* toSet)
01443 {
01444     fDocument = (DOMDocumentImpl *)toSet;
01445 }
01446 
01447 inline void AbstractDOMParser::setParseInProgress(const bool toSet)
01448 {
01449     fParseInProgress = toSet;
01450 }
01451 
01452 #endif
01453 
01454 
01455 


Copyright © 2000 The Apache Software Foundation. All Rights Reserved.