00001 //# TBParser.h: Parses the XMLDriver-generated XML into data in a TBTable. 00002 //# Copyright (C) 2005 00003 //# Associated Universities, Inc. Washington DC, USA. 00004 //# 00005 //# This library is free software; you can redistribute it and/or modify it 00006 //# under the terms of the GNU Library General Public License as published by 00007 //# the Free Software Foundation; either version 2 of the License, or (at your 00008 //# option) any later version. 00009 //# 00010 //# This library is distributed in the hope that it will be useful, but WITHOUT 00011 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 00012 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public 00013 //# License for more details. 00014 //# 00015 //# You should have received a copy of the GNU Library General Public License 00016 //# along with this library; if not, write to the Free Software Foundation, 00017 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA. 00018 //# 00019 //# Correspondence concerning AIPS++ should be addressed as follows: 00020 //# Internet email: aips2-request@nrao.edu. 00021 //# Postal address: AIPS++ Project Office 00022 //# National Radio Astronomy Observatory 00023 //# 520 Edgemont Road 00024 //# Charlottesville, VA 22903-2475 USA 00025 //# 00026 //# $Id: $ 00027 #ifndef TBPARSER_H_ 00028 #define TBPARSER_H_ 00029 00030 #include <xercesc/dom/DOM.hpp> 00031 #include <xercesc/sax2/DefaultHandler.hpp> 00032 #include <xercesc/sax2/Attributes.hpp> 00033 #include <xercesc/sax2/SAX2XMLReader.hpp> 00034 00035 #include <casaqt/QtBrowser/TBConstants.h> 00036 00037 #include <casa/BasicSL/String.h> 00038 00039 #include <vector> 00040 #include <sstream> 00041 #include <map> 00042 00043 using namespace xercesc; 00044 00045 namespace casa { 00046 00047 //# Forward Declarations 00048 class TBField; 00049 class TBKeyword; 00050 class XMLtoken; 00051 class TableParams; 00052 class TBData; 00053 00054 // <summary> 00055 // Parses the XMLDriver-generated XML into data in a TBTable. 00056 // </summary> 00057 // 00058 // <synopsis> 00059 // TBParser is an abstract superclass for any implementing subclass to parse 00060 // a String containing XML. A TBParser keeps a reference to the table 00061 // parameters so that the table data can be directly imported. Note: the 00062 // TBParser is not used for the "Direct" table driver, which is the default. 00063 // Currently the user is unable to selected an "XML" table driver, which means 00064 // TBParsers are not used. 00065 // </synopsis> 00066 00067 class TBParser { 00068 public: 00069 // Constructor which takes a TableParams argument to store references to 00070 // the table parameters. 00071 TBParser(TableParams* tp); 00072 00073 virtual ~TBParser(); 00074 00075 00076 std::vector<std::vector<String>*>* getData() { return &data; } 00077 00078 // Set whether the TBParser should print debug information or not. 00079 void setPrintDebug(bool pdb); 00080 00081 00082 // Any subclass must implement the parse() method. Parses the given String 00083 // into the table parameters and returns a Result indicating whether the 00084 // parsing succeeded or not. If parsedata is true, the table data is 00085 // parsed, otherwise just table meta-data like keywords is parsed. 00086 virtual Result parse(String* xml, bool parsedata = true) = 0; 00087 00088 protected: 00089 // Is true if this table allows for the insertion of rows, false otherwise. 00090 bool& insertRow; 00091 00092 // Is true if this table allows for the deletion of rows, false otherwise. 00093 bool& removeRow; 00094 00095 // Holds the table data. 00096 std::vector<std::vector<String>*> data; 00097 00098 // Holds the "real" table data. 00099 std::vector<std::vector<TBData*>*>& data2; 00100 00101 // Holds the table fields. 00102 std::vector<TBField*>& fields; 00103 00104 // Holds the table keywords. 00105 std::vector<TBKeyword*>& keywords; 00106 00107 // Holds the list of the number of rows for each subtable. 00108 std::vector<int>& subtableRows; 00109 00110 // Holds the total number of rows in the table. 00111 int& totalRows; 00112 00113 // Holds the number of rows currently loaded in the table. 00114 int& loadedRows; 00115 00116 // Is true if debug information should be printed, false otherwise. 00117 bool printdebug; 00118 }; 00119 00120 // <summary> 00121 // TBParser subclass that uses a "home" parsing method. 00122 // </summary> 00123 // 00124 // <synopsis> 00125 // TBHomeParser is a subclass of TBParser that implements all the parsing 00126 // methods itself using String methods. It is somewhat slow and its use is 00127 // not recommended. 00128 // </synopsis> 00129 00130 class TBHomeParser : public TBParser { 00131 public: 00132 // Constructor that take the table parameters. 00133 TBHomeParser(TableParams* tp); 00134 00135 virtual ~TBHomeParser(); 00136 00137 00138 // Implements TBParser::parse(). Parses the String into XMLtokens and then 00139 // parses the table information from the XMLtokens. 00140 Result parse(String* xml, bool parsedata = true); 00141 00142 private: 00143 // All parsed XMLtokens that had a tag name of TBConstants::XML_FIELD. 00144 std::vector<XMLtoken*> xfields; 00145 00146 // All parsed XMLtokens that had a tag name of TBConstants::XML_KEYWORD. 00147 std::vector<XMLtoken*> xkeywords; 00148 00149 // All parsed XMLtokens that had a tag name of TBConstants::XML_COLUMNKW. 00150 std::map<String, std::vector<XMLtoken*>*> xcolkeywords; 00151 00152 00153 // Recursively parses a XMLtoken from the given String. The level 00154 // parameter is used to properly add tabs to the debug information. 00155 XMLtoken* parseToken(String* xml, int level); 00156 00157 // Parses XML attributes from the given String into the given token. The 00158 // level parameter is used to properly add tabs to the debug information. 00159 void parseAttributes(XMLtoken* token, String* attrPtr, int level); 00160 00161 // Parses XML content (<tag>content</tag>) from the given String into the 00162 // given token. The level parameter is used to properly add tabs to the 00163 // debug information. 00164 void parseContent(XMLtoken* token, String* contentPtr, int level); 00165 00166 // Given an XMLtoken tree, parse the table information from it. If 00167 // parsedata is true the table data is parsed, otherwise just the 00168 // meta-information like keywords is parsed. 00169 bool parseXMLtable(XMLtoken* t, bool parsedata); 00170 }; 00171 00172 // <summary> 00173 // TBParser subclass that uses a DOM parser in the XERCES library. 00174 // </summary> 00175 // 00176 // <synopsis> 00177 // TBXercesDOMParser is a subclass of TBParser that implements all the parsing 00178 // methods using a XERCES DOM parser. Although the actual parsing happens 00179 // quickly, deciphering table data from the parsed XML is somewhat slow and 00180 // thus the use of TBXercesDOMParser is not recommended. 00181 // </synopsis> 00182 00183 class TBXercesDOMParser : public TBParser { 00184 public: 00185 // Constructor that takes the table parameters. 00186 TBXercesDOMParser(TableParams* tp); 00187 00188 virtual ~TBXercesDOMParser(); 00189 00190 00191 // Implements TBParser::parse(). The String is parsed into DOMElements and 00192 // then the table information is parsed from the DOMElements. 00193 Result parse(String* xml, bool parsedata = true); 00194 00195 private: 00196 // First level parsing method that takes the top-level element and 00197 // parses it. 00198 Result parseXML(const DOMElement* element, bool parsedata); 00199 00200 // Second level parsing method that takes the TABLE element and parses 00201 // the table out of it. 00202 Result parseTable(const DOMElement* element, bool parsedata); 00203 00204 // Third level parsing method that takes the TABLEDATA element and 00205 // parses the table data out of it. 00206 Result parseTableData(const DOMElement* element); 00207 }; 00208 00209 // <summary> 00210 // TBParser subclass that uses a SAX parser in the XERCES library. 00211 // </summary> 00212 // 00213 // <synopsis> 00214 // TBXercesSAXParser is a subclass of TBParser that implements all the parsing 00215 // methods using a XERCES SAX parser. If XML parsing is required, the 00216 // TBXercesSAXParser is recommended for its (relative) speed. 00217 // TBXercesSAXParser also implements xerces::DefaultHandler since SAX uses 00218 // event-driven parsing. 00219 // </synopsis> 00220 00221 class TBXercesSAXParser : public TBParser, public DefaultHandler { 00222 public: 00223 // Constructor that takes the table parameters. 00224 TBXercesSAXParser(TableParams* tp); 00225 00226 virtual ~TBXercesSAXParser(); 00227 00228 00229 // Implements TBParser::parse(). Parses the String into the table data 00230 // serially using event-driven SAX parsing. 00231 Result parse(String* xml, bool parsedata = true); 00232 00233 // Implements DefaultHandler::startDocument(). 00234 void startDocument(); 00235 00236 // Implements DefaultHandler::endDocument(). 00237 void endDocument(); 00238 00239 // Implements DefaultHandler::startElement(). 00240 void startElement(const XMLCh* const uri, const XMLCh* const localname, 00241 const XMLCh* const qname, const Attributes& attrs); 00242 00243 // Implements DefaultHandler::endElement(). 00244 void endElement(const XMLCh* const uri, const XMLCh* const localname, 00245 const XMLCh* const qname); 00246 00247 // Implements DefaultHandler::characters(). 00248 void characters(const XMLCh* const chars, const unsigned int length); 00249 00250 private: 00251 // SAX reader. 00252 SAX2XMLReader* reader; 00253 00254 // Flag indicating whether the parsing is currently in a <TD> tag or not. 00255 bool inTD; 00256 00257 // The current row of table data being parsed. 00258 std::vector<String>* row; 00259 00260 // Keeps all non-XML or extra text. 00261 stringstream extraText; 00262 00263 // Indicates whether the parsing is valid or not. 00264 bool valid; 00265 00266 // Keep all parsed column keywords. 00267 std::map<int, std::vector<TBKeyword*>*> colkws; 00268 00269 // Is true if the table data should be parsed, false otherwise. 00270 bool parsedata; 00271 }; 00272 00273 } 00274 00275 #endif /* TBPARSER_H_ */