00001 //# ColumnsIndex.h: Index to one or more columns in a table 00002 //# Copyright (C) 1998,1999,2001,2002 00003 //# Associated Universities, Inc. Washington DC, USA. 00004 //# 00005 //# This library is free software; you can redistribute it and/or modify it 00006 //# under the terms of the GNU Library General Public License as published by 00007 //# the Free Software Foundation; either version 2 of the License, or (at your 00008 //# option) any later version. 00009 //# 00010 //# This library is distributed in the hope that it will be useful, but WITHOUT 00011 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 00012 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public 00013 //# License for more details. 00014 //# 00015 //# You should have received a copy of the GNU Library General Public License 00016 //# along with this library; if not, write to the Free Software Foundation, 00017 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA. 00018 //# 00019 //# Correspondence concerning AIPS++ should be addressed as follows: 00020 //# Internet email: aips2-request@nrao.edu. 00021 //# Postal address: AIPS++ Project Office 00022 //# National Radio Astronomy Observatory 00023 //# 520 Edgemont Road 00024 //# Charlottesville, VA 22903-2475 USA 00025 //# 00026 //# $Id$ 00027 00028 #ifndef TABLES_COLUMNSINDEX_H 00029 #define TABLES_COLUMNSINDEX_H 00030 00031 00032 //# Includes 00033 #include <casacore/casa/aips.h> 00034 #include <casacore/tables/Tables/Table.h> 00035 #include <casacore/casa/Arrays/Vector.h> 00036 #include <casacore/casa/Containers/Block.h> 00037 #include <casacore/casa/Containers/Record.h> 00038 00039 namespace casacore { //# NAMESPACE CASACORE - BEGIN 00040 00041 //# Forward Declarations 00042 class String; 00043 class TableColumn; 00044 template<typename T> class RecordFieldPtr; 00045 00046 // <summary> 00047 // Index to one or more columns in a table. 00048 // </summary> 00049 00050 // <use visibility=export> 00051 00052 // <reviewed reviewer="UNKNOWN" date="before2004/08/25" tests="tColumnsIndex.cc" demos=""> 00053 // </reviewed> 00054 00055 // <prerequisite> 00056 // <li> <linkto class=Table>Table</linkto> 00057 // <li> <linkto class=Record>Record</linkto> 00058 // <li> <linkto class=RecordFieldPtr>RecordFieldPtr</linkto> 00059 // </prerequisite> 00060 00061 // <synopsis> 00062 // This class makes it possible to use transient indices on top 00063 // of tables in order to speed up the process of finding rows 00064 // based on a given key or key range. 00065 // When constructing a <src>ColumnsIndex</src> object, one has to define 00066 // which columns form the key for this index on the given 00067 // <src>table</src> object. 00068 // Only scalar columns are supported. The data in the given columns 00069 // will be read, sorted (if needed), and stored in memory. 00070 // When looking up a key or key range, the class will use a fast binary 00071 // search on the data held in memory. 00072 // <p> 00073 // The <src>ColumnsIndex</src> object contains a 00074 // <linkto class=Record>Record</linkto> object which can be used 00075 // to define the key to be looked up. The record contains a field for 00076 // each column in the index (with the same name and data type). 00077 // The fastest way to fill the key is by creating a 00078 // <linkto class=RecordFieldPtr>RecordFieldPtr</linkto> object for 00079 // each field in the record (see the example) and fill it as needed. 00080 // However, one can also use the <src>Record::define</src> function, 00081 // but that is slower. 00082 // <br> 00083 // A second record is available to define the upper key 00084 // when a key range has to be looked up. The keys can be accessed 00085 // using the various <src>accessKey</src> functions. 00086 // <p> 00087 // When a key is defined, the <src>getRowNumbers</src> function can be 00088 // used to find the table rows containing the given key (range). 00089 // Function <src>getRowNumber</src> can be used if all keys in the index 00090 // are unique (which can be tested with the <src>isUnique</src> function). 00091 // <p> 00092 // Instead of using the internal records holding the keys, one can also 00093 // pass its own Record object to <src>getRowNumbers</src>. 00094 // However, it will be slower. 00095 // <p> 00096 // When constructing the object, the user can supply his own compare 00097 // function. The default compare function compares each field of the 00098 // key in the normal way. A user's compare function makes it possible 00099 // to compare in a special way. E.g. one could use near instead of == 00100 // on floating point fields. Another example (which is shown in one 00101 // of the examples below) makes it possible to find a key in an 00102 // index consisting of a time and width. 00103 // <p> 00104 // After an index is created, it is possible to change the data 00105 // in the underlying columns. However, the <src>ColumnsIndex</src> can 00106 // not detect if the column data have changed. It can only detect if 00107 // the number of rows has changed. If the column data have changed, 00108 // the user has to use the <src>setChanged</src> function to indicate 00109 // that all columns or a particular column has changed. 00110 // <br>If data have changed, the entire index will be recreated by 00111 // rereading and optionally resorting the data. This will be deferred 00112 // until the next key lookup. 00113 // </synopsis> 00114 00115 // <example> 00116 // Suppose one has an antenna table with key ANTENNA. 00117 // <srcblock> 00118 // // Open the table and make an index for column ANTENNA. 00119 // Table tab("antenna.tab") 00120 // ColumnsIndex colInx(tab, "ANTENNA"); 00121 // // Make a RecordFieldPtr for the ANTENNA field in the index key record. 00122 // // Its data type has to match the data type of the column. 00123 // RecordFieldPtr<Int> antFld(colInx.accessKey(), "ANTENNA"); 00124 // // Now loop in some way and find the row for the antenna 00125 // // involved in that loop. 00126 // Bool found; 00127 // while (...) { 00128 // // Fill the key field and get the row number. 00129 // // ANTENNA is a unique key, so only one row number matches. 00130 // // Otherwise function getRowNumbers had to be used. 00131 // *antFld = antenna; 00132 // uInt antRownr = colInx.getRowNumber (found); 00133 // if (!found) { 00134 // cout << "Antenna " << antenna << " is unknown" << endl; 00135 // } else { 00136 // // antRownr can now be used to get data from that row in 00137 // // the antenna table. 00138 // } 00139 // } 00140 // </srcblock> 00141 // 00142 // The following example shows how multiple keys can be used and how 00143 // a search on a range can be done. 00144 // <srcblock> 00145 // Table tab("sometable") 00146 // // Note that TIME is the main key. 00147 // // Also note that stringToVector (in ArrayUtil.h) is a handy 00148 // // way to convert a String to a Vector<String>. 00149 // ColumnsIndex colInx(tab, stringToVector("TIME,ANTENNA")); 00150 // // Make a RecordFieldPtr for the fields in lower and upper key records. 00151 // RecordFieldPtr<Double> timeLow(colInx.accessLowerKey(), "TIME"); 00152 // RecordFieldPtr<Int> antLow(colInx.accessLowerKey(), "ANTENNA"); 00153 // RecordFieldPtr<Double> timeUpp(colInx.accessUpperKey(), "TIME"); 00154 // RecordFieldPtr<Int> antUpp(colInx.accessUpperKey(), "ANTENNA"); 00155 // while (...) { 00156 // // Fill the key fields. 00157 // *timeLow = ...; 00158 // *antLow = ...; 00159 // *timeUpp = ...; 00160 // *antUpp = ...; 00161 // // Find the row numbers for keys between low and upp (inclusive). 00162 // Vector<uInt> rows = colInx.getRowNumbers (True, True); 00163 // } 00164 // </srcblock> 00165 // 00166 // The following example shows how a specific compare function 00167 // could look like. A function like this will actually be used in the 00168 // calibration software. 00169 // <br> 00170 // The table for which the index is built, has rows with the TIME as its key. 00171 // However, each row is valid for a given interval, where TIME gives 00172 // the middle of the interval and WIDTH the length of the interval. 00173 // This means that the compare function has to test whether the key 00174 // is part of the interval. 00175 // <srcblock> 00176 // Int myCompare (const Block<void*>& fieldPtrs, 00177 // const Block<void*>& dataPtrs, 00178 // const Block<Int>& dataTypes, 00179 // Int index) 00180 // { 00181 // // Assert (for performance only in debug mode) that the correct 00182 // // fields are used. 00183 // DebugAssert (dataTypes.nelements() == 2, AipsError); 00184 // DebugAssert (dataTypes[0] == TpDouble && dataTypes[1] == TpDouble, 00185 // AipsError); 00186 // // Now get the key to be looked up 00187 // // (an awfully looking cast has to be used). 00188 // const Double key = *(*(const RecordFieldPtr<Double>*)(fieldPtrs[0])); 00189 // // Get the time and width of the entry to be compared. 00190 // const Double time = ((const Double*)(dataPtrs[0]))[index]; 00191 // const Double width = ((const Double*)(dataPtrs[1]))[index]; 00192 // const Double start = time - width/2; 00193 // const Double end = time + width/2; 00194 // // Test if the key is before, after, or in the interval 00195 // // (representing less, greater, equal). 00196 // if (key < start) { 00197 // return -1; 00198 // } else if (key > end) { 00199 // return 1; 00200 // } 00201 // return 0; 00202 // } 00203 // 00204 // // Now use this compare function in an actual index. 00205 // Table tab("sometable") 00206 // ColumnsIndex colInx(tab, stringToVector("TIME,WIDTH"), myCompare); 00207 // // Make a RecordFieldPtr for the TIME field in the key record. 00208 // // Note that although the WIDTH is part of the index, it is 00209 // // not an actual key. So it does not need to be filled in. 00210 // RecordFieldPtr<Double> time(colInx.accessLowerKey(), "TIME"); 00211 // Bool found; 00212 // while (...) { 00213 // // Fill the key field. 00214 // *time = ...; 00215 // // Find the row number for this time. 00216 // uInt rownr = colInx.getRowNumber (found); 00217 // } 00218 // </srcblock> 00219 // </example> 00220 00221 // <motivation> 00222 // The calibration software needs to lookup keys in calibration tables 00223 // very frequently. This class makes that process much easier and faster. 00224 // </motivation> 00225 00226 00227 class ColumnsIndex 00228 { 00229 public: 00230 // Define the signature of a comparison function. 00231 // The first block contains pointers to <src>RecordFieldPtr<T></src> 00232 // objects holding the key to be looked up. 00233 // The second block contains pointers to the column data. 00234 // The <src>index</src> argument gives the index in the column data. 00235 // The third block contains data types of those blocks (TpBool, etc.). 00236 // The function should return -1 if key is less than data, 00237 // 0 if equal, 1 if greater. 00238 // <br>An example above shows how a compare function can be used. 00239 typedef Int Compare (const Block<void*>& fieldPtrs, 00240 const Block<void*>& dataPtrs, 00241 const Block<Int>& dataTypes, 00242 Int index); 00243 00244 // Create an index on the given table for the given column. 00245 // The column has to be a scalar column. 00246 // If <src>noSort==True</src>, the table is already in order of that 00247 // column and the sort step will not be done. 00248 // The default compare function is provided by this class. It simply 00249 // compares each field in the key. 00250 ColumnsIndex (const Table&, const String& columnName, 00251 Compare* compareFunction = 0, Bool noSort = False); 00252 00253 // Create an index on the given table for the given columns, thus 00254 // the key is formed by multiple columns. 00255 // The columns have to be scalar columns. 00256 // If <src>noSort==True</src>, the table is already in order of those 00257 // columns and the sort step will not be done. 00258 // The default compare function is provided by this class. It simply 00259 // compares each field in the key. 00260 ColumnsIndex (const Table&, const Vector<String>& columnNames, 00261 Compare* compareFunction = 0, Bool noSort = False); 00262 00263 // Copy constructor (copy semantics). 00264 ColumnsIndex (const ColumnsIndex& that); 00265 00266 ~ColumnsIndex(); 00267 00268 // Assignment (copy semantics). 00269 ColumnsIndex& operator= (const ColumnsIndex& that); 00270 00271 // Are all keys in the index unique? 00272 Bool isUnique() const; 00273 00274 // Return the names of the columns forming the index. 00275 Vector<String> columnNames() const; 00276 00277 // Get the table for which this index is created. 00278 const Table& table() const; 00279 00280 // Something has changed in the table, so the index has to be recreated. 00281 // The 2nd version indicates that a specific column has changed, 00282 // so only that column is reread. If that column is not part of the 00283 // index, nothing will be done. 00284 // <br>Note that the class itself is keeping track if the number of 00285 // rows in the table changes. 00286 // <group> 00287 void setChanged(); 00288 void setChanged (const String& columnName); 00289 // </group> 00290 00291 // Access the key values. 00292 // These functions allow you to create RecordFieldPtr<T> objects 00293 // for each field in the key. In this way you can quickly fill in 00294 // the key. 00295 // <br>The records have a fixed type, so you cannot add or delete fields. 00296 // <group> 00297 Record& accessKey(); 00298 Record& accessLowerKey(); 00299 Record& accessUpperKey(); 00300 // </group> 00301 00302 // Find the row number matching the key. All keys have to be unique, 00303 // otherwise an exception is thrown. 00304 // If no match is found, <src>found</src> is set to False. 00305 // The 2nd version makes it possible to pass in your own Record 00306 // instead of using the internal record via the <src>accessKey</src> 00307 // functions. Note that the given Record will be copied to the internal 00308 // record, thus overwrites it. 00309 // <group> 00310 uInt getRowNumber (Bool& found); 00311 uInt getRowNumber (Bool& found, const Record& key); 00312 // </group> 00313 00314 // Find the row numbers matching the key. It should be used instead 00315 // of <src>getRowNumber</src> if the same key can exist multiple times. 00316 // The 2nd version makes it possible to pass in your own Record 00317 // instead of using the internal record via the <src>accessKey</src> 00318 // functions. Note that the given Record will be copied to the internal 00319 // record, thus overwrites it. 00320 // <group> 00321 Vector<uInt> getRowNumbers(); 00322 Vector<uInt> getRowNumbers (const Record& key); 00323 // </group> 00324 00325 // Find the row numbers matching the key range. The boolean arguments 00326 // tell if the lower and upper key are part of the range. 00327 // The 2nd version makes it possible to pass in your own Records 00328 // instead of using the internal records via the 00329 // <src>accessLower/UpperKey</src> functions. 00330 // Note that the given Records will be copied to the internal 00331 // records, thus overwrite them. 00332 // <group> 00333 Vector<uInt> getRowNumbers (Bool lowerInclusive, Bool upperInclusive); 00334 Vector<uInt> getRowNumbers (const Record& lower, const Record& upper, 00335 Bool lowerInclusive, Bool upperInclusive); 00336 // </group> 00337 00338 // Fill the internal key field from the corresponding external key. 00339 // The data type may differ. 00340 static void copyKeyField (void* field, int dtype, const Record& key); 00341 00342 protected: 00343 // Copy that object to this. 00344 void copy (const ColumnsIndex& that); 00345 00346 // Delete all data in the object. 00347 void deleteObjects(); 00348 00349 // Add a column to the record description for the keys. 00350 void addColumnToDesc (RecordDesc& description, 00351 const TableColumn& column); 00352 00353 // Create the various members in the object. 00354 void create (const Table& table, const Vector<String>& columnNames, 00355 Compare* compareFunction, Bool noSort); 00356 00357 // Make the various internal <src>RecordFieldPtr</src> objects. 00358 void makeObjects (const RecordDesc& description); 00359 00360 // Read the data of the columns forming the index, sort them and 00361 // form the index. 00362 void readData(); 00363 00364 // Do a binary search on <src>itsUniqueIndex</src> for the key in 00365 // <src>fieldPtrs</src>. 00366 // If the key is found, <src>found</src> is set to True and the index 00367 // in <src>itsUniqueIndex</src> is returned. 00368 // If not found, <src>found</src> is set to False and the index 00369 // of the next higher key is returned. 00370 uInt bsearch (Bool& found, const Block<void*>& fieldPtrs) const; 00371 00372 // Compare the key in <src>fieldPtrs</src> with the given index entry. 00373 // -1 is returned when less, 0 when equal, 1 when greater. 00374 static Int compare (const Block<void*>& fieldPtrs, 00375 const Block<void*>& dataPtrs, 00376 const Block<Int>& dataTypes, 00377 Int index); 00378 00379 // Fill the row numbers vector for the given start till end in the 00380 // <src>itsUniqueIndex</src> vector (end is not inclusive). 00381 void fillRowNumbers (Vector<uInt>& rows, uInt start, uInt end) const; 00382 00383 private: 00384 // Fill the internal key fields from the corresponding external key. 00385 void copyKey (Block<void*> fields, const Record& key); 00386 00387 // Fill the internal key field from the corresponding external key. 00388 // The data type may differ. 00389 template <typename T> 00390 static void copyKeyField (RecordFieldPtr<T>& field, const Record& key) 00391 { 00392 key.get (field.name(), *field); 00393 } 00394 00395 Table itsTable; 00396 uInt itsNrrow; 00397 Record* itsLowerKeyPtr; 00398 Record* itsUpperKeyPtr; 00399 Block<Int> itsDataTypes; 00400 Block<void*> itsDataVectors; 00401 Block<void*> itsData; //# pointer to data in itsDataVectors 00402 //# The following 2 blocks are actually blocks of RecordFieldPtr<T>*. 00403 //# They are used for fast access to the records. 00404 Block<void*> itsLowerFields; 00405 Block<void*> itsUpperFields; 00406 Block<Bool> itsColumnChanged; 00407 Bool itsChanged; 00408 Bool itsNoSort; //# True = sort is not needed 00409 Compare* itsCompare; //# Compare function 00410 Vector<uInt> itsDataIndex; //# Row numbers of all keys 00411 //# Indices in itsDataIndex for each unique key 00412 Vector<uInt> itsUniqueIndex; 00413 uInt* itsDataInx; //# pointer to data in itsDataIndex 00414 uInt* itsUniqueInx; //# pointer to data in itsUniqueIndex 00415 }; 00416 00417 00418 inline Bool ColumnsIndex::isUnique() const 00419 { 00420 return (itsDataIndex.nelements() == itsUniqueIndex.nelements()); 00421 } 00422 inline const Table& ColumnsIndex::table() const 00423 { 00424 return itsTable; 00425 } 00426 inline Record& ColumnsIndex::accessKey() 00427 { 00428 return *itsLowerKeyPtr; 00429 } 00430 inline Record& ColumnsIndex::accessLowerKey() 00431 { 00432 return *itsLowerKeyPtr; 00433 } 00434 inline Record& ColumnsIndex::accessUpperKey() 00435 { 00436 return *itsUpperKeyPtr; 00437 } 00438 00439 00440 } //# NAMESPACE CASACORE - END 00441 00442 #endif