casa  $Rev:20696$
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
TBParser.h
Go to the documentation of this file.
00001 //# TBParser.h: Parses the XMLDriver-generated XML into data in a TBTable.
00002 //# Copyright (C) 2005
00003 //# Associated Universities, Inc. Washington DC, USA.
00004 //#
00005 //# This library is free software; you can redistribute it and/or modify it
00006 //# under the terms of the GNU Library General Public License as published by
00007 //# the Free Software Foundation; either version 2 of the License, or (at your
00008 //# option) any later version.
00009 //#
00010 //# This library is distributed in the hope that it will be useful, but WITHOUT
00011 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
00012 //# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
00013 //# License for more details.
00014 //#
00015 //# You should have received a copy of the GNU Library General Public License
00016 //# along with this library; if not, write to the Free Software Foundation,
00017 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
00018 //#
00019 //# Correspondence concerning AIPS++ should be addressed as follows:
00020 //#        Internet email: aips2-request@nrao.edu.
00021 //#        Postal address: AIPS++ Project Office
00022 //#                        National Radio Astronomy Observatory
00023 //#                        520 Edgemont Road
00024 //#                        Charlottesville, VA 22903-2475 USA
00025 //#
00026 //# $Id: $
00027 #ifndef TBPARSER_H_
00028 #define TBPARSER_H_
00029 
00030 #include <vector>
00031 #include <sstream>
00032 
00033 #include <xercesc/dom/DOM.hpp>
00034 #include <xercesc/sax2/DefaultHandler.hpp>
00035 #include <xercesc/sax2/Attributes.hpp>
00036 #include <xercesc/sax2/SAX2XMLReader.hpp>
00037 
00038 #include <casaqt/QtBrowser/TBConstants.h>
00039 
00040 #include <casa/BasicSL/String.h>
00041 
00042 #include <casa/namespace.h>
00043 using namespace xercesc;
00044 using namespace std;
00045 
00046 namespace casa {
00047 
00048 //# Forward Declarations
00049 class TBField;
00050 class TBKeyword;
00051 class XMLtoken;
00052 class TableParams;
00053 class TBData;
00054 
00055 // <summary>
00056 // Parses the XMLDriver-generated XML into data in a TBTable.
00057 // </summary>
00058 //
00059 // <synopsis>
00060 // TBParser is an abstract superclass for any implementing subclass to parse
00061 // a String containing XML.  A TBParser keeps a reference to the table
00062 // parameters so that the table data can be directly imported.  Note: the
00063 // TBParser is not used for the "Direct" table driver, which is the default.
00064 // Currently the user is unable to selected an "XML" table driver, which means
00065 // TBParsers are not used.
00066 // </synopsis>
00067 
00068 class TBParser {
00069 public:
00070     // Constructor which takes a TableParams argument to store references to
00071     // the table parameters.
00072     TBParser(TableParams* tp);
00073 
00074     virtual ~TBParser();
00075 
00076     
00077     vector<vector<String>*>* getData() { return &data; }
00078     
00079     // Set whether the TBParser should print debug information or not.
00080     void setPrintDebug(bool pdb);
00081         
00082     
00083     // Any subclass must implement the parse() method.  Parses the given String
00084     // into the table parameters and returns a Result indicating whether the
00085     // parsing succeeded or not.  If parsedata is true, the table data is
00086     // parsed, otherwise just table meta-data like keywords is parsed.
00087     virtual Result parse(String* xml, bool parsedata = true) = 0;
00088 
00089 protected:
00090     // Is true if this table allows for the insertion of rows, false otherwise.
00091     bool& insertRow;
00092 
00093     // Is true if this table allows for the deletion of rows, false otherwise.
00094     bool& removeRow;
00095 
00096     // Holds the table data.
00097     vector<vector<String>*> data;
00098 
00099     // Holds the "real" table data.
00100     vector<vector<TBData*>*>& data2;
00101     
00102     // Holds the table fields.
00103     vector<TBField*>& fields;
00104 
00105     // Holds the table keywords.
00106     vector<TBKeyword*>& keywords;
00107 
00108     // Holds the list of the number of rows for each subtable.
00109     vector<int>& subtableRows;
00110 
00111     // Holds the total number of rows in the table.
00112     int& totalRows;
00113 
00114     // Holds the number of rows currently loaded in the table.
00115     int& loadedRows;
00116 
00117     // Is true if debug information should be printed, false otherwise.
00118     bool printdebug;
00119 };
00120 
00121 // <summary>
00122 // TBParser subclass that uses a "home" parsing method.
00123 // </summary>
00124 //
00125 // <synopsis>
00126 // TBHomeParser is a subclass of TBParser that implements all the parsing
00127 // methods itself using String methods.  It is somewhat slow and its use is
00128 // not recommended.
00129 // </synopsis>
00130 
00131 class TBHomeParser : public TBParser {
00132 public:
00133     // Constructor that take the table parameters.
00134     TBHomeParser(TableParams* tp);
00135 
00136     virtual ~TBHomeParser();
00137 
00138     
00139     // Implements TBParser::parse().  Parses the String into XMLtokens and then
00140     // parses the table information from the XMLtokens.
00141     Result parse(String* xml, bool parsedata = true);
00142 
00143 private:
00144     // All parsed XMLtokens that had a tag name of TBConstants::XML_FIELD.
00145     vector<XMLtoken*> xfields;
00146 
00147     // All parsed XMLtokens that had a tag name of TBConstants::XML_KEYWORD.
00148     vector<XMLtoken*> xkeywords;
00149 
00150     // All parsed XMLtokens that had a tag name of TBConstants::XML_COLUMNKW.
00151     map<String, vector<XMLtoken*>*> xcolkeywords;
00152 
00153     
00154     // Recursively parses a XMLtoken from the given String.  The level
00155     // parameter is used to properly add tabs to the debug information.
00156     XMLtoken* parseToken(String* xml, int level);
00157 
00158     // Parses XML attributes from the given String into the given token.  The
00159     // level parameter is used to properly add tabs to the debug information.
00160     void parseAttributes(XMLtoken* token, String* attrPtr, int level);
00161 
00162     // Parses XML content (<tag>content</tag>) from the given String into the
00163     // given token.  The level parameter is used to properly add tabs to the
00164     // debug information.
00165     void parseContent(XMLtoken* token, String* contentPtr, int level);
00166 
00167     // Given an XMLtoken tree, parse the table information from it.  If
00168     // parsedata is true the table data is parsed, otherwise just the
00169     // meta-information like keywords is parsed.
00170     bool parseXMLtable(XMLtoken* t, bool parsedata);
00171 };
00172 
00173 // <summary>
00174 // TBParser subclass that uses a DOM parser in the XERCES library.
00175 // </summary>
00176 //
00177 // <synopsis>
00178 // TBXercesDOMParser is a subclass of TBParser that implements all the parsing
00179 // methods using a XERCES DOM parser.  Although the actual parsing happens
00180 // quickly, deciphering table data from the parsed XML is somewhat slow and
00181 // thus the use of TBXercesDOMParser is not recommended.
00182 // </synopsis>
00183 
00184 class TBXercesDOMParser : public TBParser {
00185 public:
00186     // Constructor that takes the table parameters.
00187     TBXercesDOMParser(TableParams* tp);
00188 
00189     virtual ~TBXercesDOMParser();
00190 
00191     
00192     // Implements TBParser::parse().  The String is parsed into DOMElements and
00193     // then the table information is parsed from the DOMElements.
00194     Result parse(String* xml, bool parsedata = true);
00195 
00196 private:
00197     // First level parsing method that takes the top-level element and
00198     // parses it.
00199     Result parseXML(const DOMElement* element, bool parsedata);
00200 
00201     // Second level parsing method that takes the TABLE element and parses
00202     // the table out of it.
00203     Result parseTable(const DOMElement* element, bool parsedata);
00204 
00205     // Third level parsing method that takes the TABLEDATA element and
00206     // parses the table data out of it.
00207     Result parseTableData(const DOMElement* element);
00208 };
00209 
00210 // <summary>
00211 // TBParser subclass that uses a SAX parser in the XERCES library.
00212 // </summary>
00213 //
00214 // <synopsis>
00215 // TBXercesSAXParser is a subclass of TBParser that implements all the parsing
00216 // methods using a XERCES SAX parser.  If XML parsing is required, the
00217 // TBXercesSAXParser is recommended for its (relative) speed.
00218 // TBXercesSAXParser also implements xerces::DefaultHandler since SAX uses
00219 // event-driven parsing.
00220 // </synopsis>
00221 
00222 class TBXercesSAXParser : public TBParser, public DefaultHandler {
00223 public:
00224     // Constructor that takes the table parameters.
00225     TBXercesSAXParser(TableParams* tp);
00226 
00227     virtual ~TBXercesSAXParser();
00228 
00229     
00230     // Implements TBParser::parse().  Parses the String into the table data
00231     // serially using event-driven SAX parsing.
00232     Result parse(String* xml, bool parsedata = true);
00233 
00234     // Implements DefaultHandler::startDocument().
00235     void startDocument();
00236 
00237     // Implements DefaultHandler::endDocument().
00238     void endDocument();
00239 
00240     // Implements DefaultHandler::startElement().
00241     void startElement(const XMLCh* const uri, const XMLCh* const localname,
00242                       const XMLCh* const qname, const Attributes& attrs);
00243 
00244     // Implements DefaultHandler::endElement().
00245     void endElement(const XMLCh* const uri, const XMLCh* const localname,
00246                     const XMLCh* const qname);
00247 
00248     // Implements DefaultHandler::characters().
00249     void characters(const XMLCh* const chars, const unsigned int length);
00250 
00251 private:
00252     // SAX reader.
00253     SAX2XMLReader* reader;
00254 
00255     // Flag indicating whether the parsing is currently in a <TD> tag or not.
00256     bool inTD;
00257 
00258     // The current row of table data being parsed.
00259     vector<String>* row;
00260 
00261     // Keeps all non-XML or extra text.
00262     stringstream extraText;
00263 
00264     // Indicates whether the parsing is valid or not.
00265     bool valid;
00266 
00267     // Keep all parsed column keywords.
00268     map<int, vector<TBKeyword*>*> colkws;
00269 
00270     // Is true if the table data should be parsed, false otherwise.
00271     bool parsedata;
00272 };
00273 
00274 }
00275 
00276 #endif /* TBPARSER_H_ */