00001 //# ColumnsIndexArray.h: Index to an array column in a table 00002 //# Copyright (C) 2001,2002 00003 //# Associated Universities, Inc. Washington DC, USA. 00004 //# 00005 //# This library is free software; you can redistribute it and/or modify it 00006 //# under the terms of the GNU Library General Public License as published by 00007 //# the Free Software Foundation; either version 2 of the License, or (at your 00008 //# option) any later version. 00009 //# 00010 //# This library is distributed in the hope that it will be useful, but WITHOUT 00011 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 00012 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public 00013 //# License for more details. 00014 //# 00015 //# You should have received a copy of the GNU Library General Public License 00016 //# along with this library; if not, write to the Free Software Foundation, 00017 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA. 00018 //# 00019 //# Correspondence concerning AIPS++ should be addressed as follows: 00020 //# Internet email: aips2-request@nrao.edu. 00021 //# Postal address: AIPS++ Project Office 00022 //# National Radio Astronomy Observatory 00023 //# 520 Edgemont Road 00024 //# Charlottesville, VA 22903-2475 USA 00025 //# 00026 //# $Id$ 00027 00028 #ifndef TABLES_COLUMNSINDEXARRAY_H 00029 #define TABLES_COLUMNSINDEXARRAY_H 00030 00031 00032 //# Includes 00033 #include <casacore/casa/aips.h> 00034 #include <casacore/tables/Tables/Table.h> 00035 #include <casacore/casa/Arrays/Vector.h> 00036 #include <casacore/casa/Containers/Block.h> 00037 #include <casacore/casa/Containers/Record.h> 00038 00039 namespace casacore { //# NAMESPACE CASACORE - BEGIN 00040 00041 //# Forward Declarations 00042 class String; 00043 class TableColumn; 00044 00045 00046 // <summary> 00047 // Index to an array column in a table. 00048 // </summary> 00049 00050 // <use visibility=export> 00051 00052 // <reviewed reviewer="UNKNOWN" date="before2004/08/25" tests="tColumnsIndexArray.cc" demos=""> 00053 // </reviewed> 00054 00055 // <prerequisite> 00056 // <li> <linkto class=Table>Table</linkto> 00057 // <li> <linkto class=Record>Record</linkto> 00058 // <li> <linkto class=RecordFieldPtr>RecordFieldPtr</linkto> 00059 // </prerequisite> 00060 00061 // <synopsis> 00062 // This class makes it possible to use transient indices on top 00063 // of an array column in a table in order to speed up the process of 00064 // finding rows based on a given key or key range. 00065 // It is similar to class <linkto class=ColumnsIndex>ColumnsIndex</linkto> 00066 // which is meant for one or more scalar columns. 00067 // <p> 00068 // When constructing a <src>ColumnsIndexArray</src> object, one has to define 00069 // which column forms the key for this index on the given 00070 // <src>table</src> object. 00071 // Not every data type is supported; only uChar, Short, Int, uInt, and 00072 // String array columns are supported. 00073 // The column can contain arrays of any shape and it can also contain 00074 // empty cells. The class will probably mostly be used for vectors, as 00075 // they seem to be the most logical way to hold multiple keys. 00076 // <br>The data in the given column will be read, sorted, 00077 // and stored in memory. When looking up a key or key range, the class 00078 // will use a fast binary search on the data held in memory. 00079 // <p> 00080 // The <src>ColumnsIndexArray</src> object contains a 00081 // <linkto class=Record>Record</linkto> object which can be used 00082 // to define the key to be looked up. The record contains a field for 00083 // the column in the index (with the same name and data type). 00084 // The fastest way to fill the key is by creating a 00085 // <linkto class=RecordFieldPtr>RecordFieldPtr</linkto> object for 00086 // the field in the record (see the example) and fill it as needed. 00087 // However, one can also use the <src>Record::define</src> function, 00088 // but that is slower. 00089 // <br> 00090 // A second record is available to define the upper key 00091 // in case a key range has to be looked up. The keys can be accessed 00092 // using the various <src>accessKey</src> functions. 00093 // <p> 00094 // When a key is defined, the <src>getRowNumbers</src> function can be 00095 // used to find the table rows containing the given key (range). 00096 // Function <src>getRowNumber</src> can be used to lookup a single key 00097 // if all keys in the index are unique (which can be tested with the 00098 // <src>isUnique</src> function). 00099 // <p> 00100 // Instead of using the internal records holding the keys, one can also 00101 // pass its own Record object to <src>getRowNumbers</src>. 00102 // However, it will be slower. 00103 // <p> 00104 // After an index is created, it is possible to change the data 00105 // in the underlying columns. However, the <src>ColumnsIndexArray</src> can 00106 // not detect if the column data have changed. It can only detect if 00107 // the number of rows has changed. If the column data have changed, 00108 // the user has to use the <src>setChanged</src> function to indicate 00109 // that the column has changed. 00110 // <br>If data have changed, the entire index will be recreated by 00111 // rereading and resorting the data. This will be deferred 00112 // until the next key lookup. 00113 // </synopsis> 00114 00115 // <example> 00116 // Suppose one has table with a column NAME containing vectors. 00117 // <srcblock> 00118 // // Open the table and make an index for the column. 00119 // Table tab("my.tab") 00120 // ColumnsIndexArray colInx(tab, "NAME"); 00121 // // Make a RecordFieldPtr for the NAME field in the index key record. 00122 // // Its data type has to match the data type of the column. 00123 // RecordFieldPtr<String> nameFld(colInx.accessKey(), "NAME"); 00124 // // Find the row for a given name. 00125 // Bool found; 00126 // // Fill the key field and get the row number. 00127 // // NAME is a unique key, so only one row number matches. 00128 // // Otherwise function getRowNumbers had to be used. 00129 // *nameFld = "MYNAME"; 00130 // uInt rownr = colInx.getRowNumber (found); 00131 // if (!found) { 00132 // cout << "Name MYNAME is unknown" << endl; 00133 // } 00134 // // Now get a range of names and return the row numbers in ascending order. 00135 // // This uses the fact that the 'unique' argument also sorts the data. 00136 // RecordFieldPtr<String> nameUpp(colInx.accessUpperKey(), "NAME"); 00137 // *nameFld = "LOWER"; 00138 // *nameUpp = "UPPER"; 00139 // Vector<uInt> rownrs = colInx.getRowNumbers (True, True, True); 00140 // </srcblock> 00141 00142 // <motivation> 00143 // Bob Garwood needed such a class. 00144 // </motivation> 00145 00146 00147 class ColumnsIndexArray 00148 { 00149 public: 00150 // Create an index on the given table for the given column. 00151 // The column can be a scalar or an array column. 00152 // If <src>noSort==True</src>, the table is already in order of that 00153 // column and the sort step will not be done. 00154 // It only supports String and integer columns. 00155 ColumnsIndexArray (const Table&, const String& columnName); 00156 00157 // Copy constructor (copy semantics). 00158 ColumnsIndexArray (const ColumnsIndexArray& that); 00159 00160 ~ColumnsIndexArray(); 00161 00162 // Assignment (copy semantics). 00163 ColumnsIndexArray& operator= (const ColumnsIndexArray& that); 00164 00165 // Are all keys in the index unique? 00166 Bool isUnique() const; 00167 00168 // Return the names of the columns forming the index. 00169 const String& columnName() const; 00170 00171 // Get the table for which this index is created. 00172 const Table& table() const; 00173 00174 // Something has changed in the table, so the index has to be recreated. 00175 // The 2nd version indicates that a specific column has changed, 00176 // so only that column might need to be reread. If that column is not 00177 // part of the index, nothing will be done. 00178 // <br>Note that the class itself is keeping track if the number of 00179 // rows in the table changes. 00180 // <group> 00181 void setChanged(); 00182 void setChanged (const String& columnName); 00183 // </group> 00184 00185 // Access the key values. 00186 // These functions allow you to create RecordFieldPtr<T> objects 00187 // for each field in the key. In this way you can quickly fill in 00188 // the key. 00189 // <br>The records have a fixed type, so you cannot add or delete fields. 00190 // <br>Note that <src>accessKey</src> and <src>accessLowerKey</src> 00191 // are synonyms; they return the same underlying record. 00192 // <group> 00193 Record& accessKey(); 00194 Record& accessLowerKey(); 00195 Record& accessUpperKey(); 00196 // </group> 00197 00198 // Find the row number matching the key. All keys have to be unique, 00199 // otherwise an exception is thrown. 00200 // If no match is found, <src>found</src> is set to False. 00201 // The 2nd version makes it possible to pass in your own Record 00202 // instead of using the internal record via the <src>accessKey</src> 00203 // functions. Note that the given Record will be copied to the internal 00204 // record, thus overwrites it. 00205 // <group> 00206 uInt getRowNumber (Bool& found); 00207 uInt getRowNumber (Bool& found, const Record& key); 00208 // </group> 00209 00210 // Find the row numbers matching the key. It should be used instead 00211 // of <src>getRowNumber</src> if the same key can exist multiple times. 00212 // The 2nd version makes it possible to pass in your own Record 00213 // instead of using the internal record via the <src>accessKey</src> 00214 // functions. Note that the given Record will be copied to the internal 00215 // record, thus overwrites it. 00216 // <br>A row can contain multiple equal values. In such a case the 00217 // same row number can occur multiple times in the output vector, 00218 // unless <src>unique</src> is set to True. Note that making the row 00219 // numbers unique implies a sort, so it can also be used to get the 00220 // row numbers in ascending order. 00221 // <group> 00222 Vector<uInt> getRowNumbers (Bool unique=False); 00223 Vector<uInt> getRowNumbers (const Record& key, Bool unique=False); 00224 // </group> 00225 00226 // Find the row numbers matching the key range. The boolean arguments 00227 // tell if the lower and upper key are part of the range. 00228 // The 2nd version makes it possible to pass in your own Records 00229 // instead of using the internal records via the 00230 // <src>accessLower/UpperKey</src> functions. 00231 // Note that the given Records will be copied to the internal 00232 // records, thus overwrite them. 00233 // <br>A row can contain multiple matching values. In such a case the 00234 // same row number can occur multiple times in the output vector, 00235 // unless <src>unique</src> is set to True. Note that making the row 00236 // numbers unique implies a sort, so it can also be used to get the 00237 // row numbers in ascending order. 00238 // <group> 00239 Vector<uInt> getRowNumbers (Bool lowerInclusive, Bool upperInclusive, 00240 Bool unique=False); 00241 Vector<uInt> getRowNumbers (const Record& lower, const Record& upper, 00242 Bool lowerInclusive, Bool upperInclusive, 00243 Bool unique=False); 00244 // </group> 00245 00246 protected: 00247 // Copy that object to this. 00248 void copy (const ColumnsIndexArray& that); 00249 00250 // Delete all data in the object. 00251 void deleteObjects(); 00252 00253 // Add a column to the record description for the keys. 00254 // If the switch <src>arrayPossible</src> is True, the column can 00255 // be an array. Otherwise it has to be a scalar. 00256 void addColumnToDesc (RecordDesc& description, 00257 const TableColumn& column); 00258 00259 // Make the various internal <src>RecordFieldPtr</src> objects. 00260 void makeObjects (const RecordDesc& description); 00261 00262 // Read the data of the columns forming the index, sort them and 00263 // form the index. 00264 void readData(); 00265 00266 // Do a binary search on <src>itsUniqueIndexArray</src> for the key in 00267 // <src>fieldPtrs</src>. 00268 // If the key is found, <src>found</src> is set to True and the index 00269 // in <src>itsUniqueIndexArray</src> is returned. 00270 // If not found, <src>found</src> is set to False and the index 00271 // of the next higher key is returned. 00272 uInt bsearch (Bool& found, void* fieldPtr) const; 00273 00274 // Compare the key in <src>fieldPtr</src> with the given index entry. 00275 // -1 is returned when less, 0 when equal, 1 when greater. 00276 static Int compare (void* fieldPtr, 00277 void* dataPtr, 00278 Int dataType, 00279 Int index); 00280 00281 // Fill the row numbers vector for the given start till end in the 00282 // <src>itsUniqueIndexArray</src> vector (end is not inclusive). 00283 // If <src>unique</src> is True, the row numbers will be made unique. 00284 void fillRowNumbers (Vector<uInt>& rows, uInt start, uInt end, 00285 Bool unique) const; 00286 00287 // Get the data if the column is an array. 00288 // <group> 00289 void getArray (Vector<uChar>& result, const String& name); 00290 void getArray (Vector<Short>& result, const String& name); 00291 void getArray (Vector<Int>& result, const String& name); 00292 void getArray (Vector<uInt>& result, const String& name); 00293 void getArray (Vector<String>& result, const String& name); 00294 // </group> 00295 00296 // Fill the rownrs belonging to each array value. 00297 void fillRownrs (uInt npts, const Block<uInt>& nrel); 00298 00299 private: 00300 Table itsTable; 00301 uInt itsNrrow; 00302 Record* itsLowerKeyPtr; 00303 Record* itsUpperKeyPtr; 00304 Int itsDataType; 00305 void* itsDataVector; 00306 void* itsData; //# pointer to data in itsDataVector 00307 //# The following 2 blocks are actually blocks of RecordFieldPtr<T>*. 00308 //# They are used for fast access to the records. 00309 void* itsLowerField; 00310 void* itsUpperField; 00311 Bool itsChanged; 00312 Vector<uInt> itsDataIndex; //# Row numbers of all keys 00313 //# Indices in itsDataIndex for each unique key 00314 Vector<uInt> itsUniqueIndex; 00315 Block<uInt> itsRownrs; //# rownr for each value 00316 uInt* itsDataInx; //# pointer to data in itsDataIndex 00317 uInt* itsUniqueInx; //# pointer to data in itsUniqueIndex 00318 }; 00319 00320 00321 inline Bool ColumnsIndexArray::isUnique() const 00322 { 00323 return (itsDataIndex.nelements() == itsUniqueIndex.nelements()); 00324 } 00325 inline const Table& ColumnsIndexArray::table() const 00326 { 00327 return itsTable; 00328 } 00329 inline Record& ColumnsIndexArray::accessKey() 00330 { 00331 return *itsLowerKeyPtr; 00332 } 00333 inline Record& ColumnsIndexArray::accessLowerKey() 00334 { 00335 return *itsLowerKeyPtr; 00336 } 00337 inline Record& ColumnsIndexArray::accessUpperKey() 00338 { 00339 return *itsUpperKeyPtr; 00340 } 00341 00342 00343 00344 } //# NAMESPACE CASACORE - END 00345 00346 #endif