ColumnsIndex.h

Go to the documentation of this file.
00001 //# ColumnsIndex.h: Index to one or more columns in a table
00002 //# Copyright (C) 1998,1999,2001,2002
00003 //# Associated Universities, Inc. Washington DC, USA.
00004 //#
00005 //# This library is free software; you can redistribute it and/or modify it
00006 //# under the terms of the GNU Library General Public License as published by
00007 //# the Free Software Foundation; either version 2 of the License, or (at your
00008 //# option) any later version.
00009 //#
00010 //# This library is distributed in the hope that it will be useful, but WITHOUT
00011 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
00012 //# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
00013 //# License for more details.
00014 //#
00015 //# You should have received a copy of the GNU Library General Public License
00016 //# along with this library; if not, write to the Free Software Foundation,
00017 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
00018 //#
00019 //# Correspondence concerning AIPS++ should be addressed as follows:
00020 //#        Internet email: aips2-request@nrao.edu.
00021 //#        Postal address: AIPS++ Project Office
00022 //#                        National Radio Astronomy Observatory
00023 //#                        520 Edgemont Road
00024 //#                        Charlottesville, VA 22903-2475 USA
00025 //#
00026 //# $Id$
00027 
00028 #ifndef TABLES_COLUMNSINDEX_H
00029 #define TABLES_COLUMNSINDEX_H
00030 
00031 
00032 //# Includes
00033 #include <casacore/casa/aips.h>
00034 #include <casacore/tables/Tables/Table.h>
00035 #include <casacore/casa/Arrays/Vector.h>
00036 #include <casacore/casa/Containers/Block.h>
00037 #include <casacore/casa/Containers/Record.h>
00038 
00039 namespace casacore { //# NAMESPACE CASACORE - BEGIN
00040 
00041 //# Forward Declarations
00042 class String;
00043 class TableColumn;
00044 template<typename T> class RecordFieldPtr;
00045 
00046 // <summary>
00047 // Index to one or more columns in a table.
00048 // </summary>
00049 
00050 // <use visibility=export>
00051 
00052 // <reviewed reviewer="UNKNOWN" date="before2004/08/25" tests="tColumnsIndex.cc" demos="">
00053 // </reviewed>
00054 
00055 // <prerequisite>
00056 //   <li> <linkto class=Table>Table</linkto>
00057 //   <li> <linkto class=Record>Record</linkto>
00058 //   <li> <linkto class=RecordFieldPtr>RecordFieldPtr</linkto>
00059 // </prerequisite>
00060 
00061 // <synopsis>
00062 // This class makes it possible to use transient indices on top
00063 // of tables in order to speed up the process of finding rows
00064 // based on a given key or key range.
00065 // When constructing a <src>ColumnsIndex</src> object, one has to define
00066 // which columns form the key for this index on the given
00067 // <src>table</src> object.
00068 // Only scalar columns are supported. The data in the given columns
00069 // will be read, sorted (if needed), and stored in memory.
00070 // When looking up a key or key range, the class will use a fast binary
00071 // search on the data held in memory.
00072 // <p>
00073 // The <src>ColumnsIndex</src> object contains a
00074 // <linkto class=Record>Record</linkto> object which can be used
00075 // to define the key to be looked up. The record contains a field for
00076 // each column in the index (with the same name and data type).
00077 // The fastest way to fill the key is by creating a
00078 // <linkto class=RecordFieldPtr>RecordFieldPtr</linkto> object for
00079 // each field in the record (see the example) and fill it as needed.
00080 // However, one can also use the <src>Record::define</src> function,
00081 // but that is slower.
00082 // <br>
00083 // A second record is available to define the upper key
00084 // when a key range has to be looked up. The keys can be accessed
00085 // using the various <src>accessKey</src> functions.
00086 // <p>
00087 // When a key is defined, the <src>getRowNumbers</src> function can be
00088 // used to find the table rows containing the given key (range).
00089 // Function <src>getRowNumber</src> can be used if all keys in the index
00090 // are unique (which can be tested with the <src>isUnique</src> function).
00091 // <p>
00092 // Instead of using the internal records holding the keys, one can also
00093 // pass its own Record object to <src>getRowNumbers</src>.
00094 // However, it will be slower.
00095 // <p>
00096 // When constructing the object, the user can supply his own compare
00097 // function. The default compare function compares each field of the
00098 // key in the normal way. A user's compare function makes it possible
00099 // to compare in a special way. E.g. one could use near instead of ==
00100 // on floating point fields. Another example (which is shown in one
00101 // of the examples below) makes it possible to find a key in an
00102 // index consisting of a time and width.
00103 // <p>
00104 // After an index is created, it is possible to change the data
00105 // in the underlying columns. However, the <src>ColumnsIndex</src> can
00106 // not detect if the column data have changed. It can only detect if
00107 // the number of rows has changed. If the column data have changed,
00108 // the user has to use the <src>setChanged</src> function to indicate
00109 // that all columns or a particular column has changed.
00110 // <br>If data have changed, the entire index will be recreated by
00111 // rereading and optionally resorting the data. This will be deferred
00112 // until the next key lookup.
00113 // </synopsis>
00114 
00115 // <example>
00116 // Suppose one has an antenna table with key ANTENNA.
00117 // <srcblock>
00118 // // Open the table and make an index for column ANTENNA.
00119 // Table tab("antenna.tab")
00120 // ColumnsIndex colInx(tab, "ANTENNA");
00121 // // Make a RecordFieldPtr for the ANTENNA field in the index key record.
00122 // // Its data type has to match the data type of the column.
00123 // RecordFieldPtr<Int> antFld(colInx.accessKey(), "ANTENNA");
00124 // // Now loop in some way and find the row for the antenna
00125 // // involved in that loop.
00126 // Bool found;
00127 // while (...) {
00128 //     // Fill the key field and get the row number.
00129 //     // ANTENNA is a unique key, so only one row number matches.
00130 //     // Otherwise function getRowNumbers had to be used.
00131 //     *antFld = antenna;
00132 //     uInt antRownr = colInx.getRowNumber (found);
00133 //     if (!found) {
00134 //         cout << "Antenna " << antenna << " is unknown" << endl;
00135 //     } else {
00136 //         // antRownr can now be used to get data from that row in
00137 //         // the antenna table.
00138 //     }
00139 // }
00140 // </srcblock>
00141 //
00142 // The following example shows how multiple keys can be used and how
00143 // a search on a range can be done.
00144 // <srcblock>
00145 // Table tab("sometable")
00146 // // Note that TIME is the main key.
00147 // // Also note that stringToVector (in ArrayUtil.h) is a handy
00148 // // way to convert a String to a Vector<String>.
00149 // ColumnsIndex colInx(tab, stringToVector("TIME,ANTENNA"));
00150 // // Make a RecordFieldPtr for the fields in lower and upper key records.
00151 // RecordFieldPtr<Double> timeLow(colInx.accessLowerKey(), "TIME");
00152 // RecordFieldPtr<Int> antLow(colInx.accessLowerKey(), "ANTENNA");
00153 // RecordFieldPtr<Double> timeUpp(colInx.accessUpperKey(), "TIME");
00154 // RecordFieldPtr<Int> antUpp(colInx.accessUpperKey(), "ANTENNA");
00155 // while (...) {
00156 //     // Fill the key fields.
00157 //     *timeLow = ...;
00158 //     *antLow = ...;
00159 //     *timeUpp = ...;
00160 //     *antUpp = ...;
00161 //     // Find the row numbers for keys between low and upp (inclusive).
00162 //     Vector<uInt> rows = colInx.getRowNumbers (True, True);
00163 // }
00164 // </srcblock>
00165 //
00166 // The following example shows how a specific compare function
00167 // could look like. A function like this will actually be used in the
00168 // calibration software.
00169 // <br>
00170 // The table for which the index is built, has rows with the TIME as its key.
00171 // However, each row is valid for a given interval, where TIME gives
00172 // the middle of the interval and WIDTH the length of the interval.
00173 // This means that the compare function has to test whether the key
00174 // is part of the interval.
00175 // <srcblock>
00176 // Int myCompare (const Block<void*>& fieldPtrs,
00177 //                const Block<void*>& dataPtrs,
00178 //                const Block<Int>& dataTypes,
00179 //                Int index)
00180 // {
00181 //   // Assert (for performance only in debug mode) that the correct
00182 //   // fields are used.
00183 //   DebugAssert (dataTypes.nelements() == 2, AipsError);
00184 //   DebugAssert (dataTypes[0] == TpDouble  &&  dataTypes[1] == TpDouble,
00185 //                AipsError);
00186 //   // Now get the key to be looked up
00187 //   // (an awfully looking cast has to be used).
00188 //   const Double key = *(*(const RecordFieldPtr<Double>*)(fieldPtrs[0]));
00189 //   // Get the time and width of the entry to be compared.
00190 //   const Double time = ((const Double*)(dataPtrs[0]))[index];
00191 //   const Double width = ((const Double*)(dataPtrs[1]))[index];
00192 //   const Double start = time - width/2;
00193 //   const Double end = time + width/2;
00194 //   // Test if the key is before, after, or in the interval
00195 //   // (representing less, greater, equal).
00196 //   if (key < start) {
00197 //     return -1;
00198 //   } else if (key > end) {
00199 //     return 1;
00200 //   }
00201 //   return 0;
00202 // }
00203 //
00204 // // Now use this compare function in an actual index.
00205 // Table tab("sometable")
00206 // ColumnsIndex colInx(tab, stringToVector("TIME,WIDTH"), myCompare);
00207 // // Make a RecordFieldPtr for the TIME field in the key record.
00208 // // Note that although the WIDTH is part of the index, it is
00209 // // not an actual key. So it does not need to be filled in.
00210 // RecordFieldPtr<Double> time(colInx.accessLowerKey(), "TIME");
00211 // Bool found;
00212 // while (...) {
00213 //     // Fill the key field.
00214 //     *time = ...;
00215 //     // Find the row number for this time.
00216 //     uInt rownr = colInx.getRowNumber (found);
00217 // }
00218 // </srcblock>
00219 // </example>
00220 
00221 // <motivation>
00222 // The calibration software needs to lookup keys in calibration tables
00223 // very frequently. This class makes that process much easier and faster.
00224 // </motivation>
00225 
00226 
00227 class ColumnsIndex
00228 {
00229 public:
00230     // Define the signature of a comparison function.
00231     // The first block contains pointers to <src>RecordFieldPtr<T></src>
00232     // objects holding the key to be looked up.
00233     // The second block contains pointers to the column data.
00234     // The <src>index</src> argument gives the index in the column data.
00235     // The third block contains data types of those blocks (TpBool, etc.).
00236     // The function should return -1 if key is less than data,
00237     // 0 if equal, 1 if greater.
00238     // <br>An example above shows how a compare function can be used.
00239     typedef Int Compare (const Block<void*>& fieldPtrs,
00240                          const Block<void*>& dataPtrs,
00241                          const Block<Int>& dataTypes,
00242                          Int index);
00243 
00244     // Create an index on the given table for the given column.
00245     // The column has to be a scalar column.
00246     // If <src>noSort==True</src>, the table is already in order of that
00247     // column and the sort step will not be done.
00248     // The default compare function is provided by this class. It simply
00249     // compares each field in the key.
00250     ColumnsIndex (const Table&, const String& columnName,
00251                   Compare* compareFunction = 0, Bool noSort = False);
00252 
00253     // Create an index on the given table for the given columns, thus
00254     // the key is formed by multiple columns.
00255     // The columns have to be scalar columns.
00256     // If <src>noSort==True</src>, the table is already in order of those
00257     // columns and the sort step will not be done.
00258     // The default compare function is provided by this class. It simply
00259     // compares each field in the key.
00260     ColumnsIndex (const Table&, const Vector<String>& columnNames,
00261                   Compare* compareFunction = 0, Bool noSort = False);
00262 
00263     // Copy constructor (copy semantics).
00264     ColumnsIndex (const ColumnsIndex& that);
00265 
00266     ~ColumnsIndex();
00267 
00268     // Assignment (copy semantics).
00269     ColumnsIndex& operator= (const ColumnsIndex& that);
00270 
00271     // Are all keys in the index unique?
00272     Bool isUnique() const;
00273 
00274     // Return the names of the columns forming the index.
00275     Vector<String> columnNames() const;
00276 
00277     // Get the table for which this index is created.
00278     const Table& table() const;
00279 
00280     // Something has changed in the table, so the index has to be recreated.
00281     // The 2nd version indicates that a specific column has changed,
00282     // so only that column is reread. If that column is not part of the
00283     // index, nothing will be done.
00284     // <br>Note that the class itself is keeping track if the number of
00285     // rows in the table changes.
00286     // <group>
00287     void setChanged();
00288     void setChanged (const String& columnName);
00289     // </group>
00290 
00291     // Access the key values.
00292     // These functions allow you to create RecordFieldPtr<T> objects
00293     // for each field in the key. In this way you can quickly fill in
00294     // the key.
00295     // <br>The records have a fixed type, so you cannot add or delete fields.
00296     // <group>
00297     Record& accessKey();
00298     Record& accessLowerKey();
00299     Record& accessUpperKey();
00300     // </group>
00301 
00302     // Find the row number matching the key. All keys have to be unique,
00303     // otherwise an exception is thrown.
00304     // If no match is found, <src>found</src> is set to False.
00305     // The 2nd version makes it possible to pass in your own Record
00306     // instead of using the internal record via the <src>accessKey</src>
00307     // functions. Note that the given Record will be copied to the internal
00308     // record, thus overwrites it.
00309     // <group>
00310     uInt getRowNumber (Bool& found);
00311     uInt getRowNumber (Bool& found, const Record& key);
00312     // </group>
00313 
00314     // Find the row numbers matching the key. It should be used instead
00315     // of <src>getRowNumber</src> if the same key can exist multiple times.
00316     // The 2nd version makes it possible to pass in your own Record
00317     // instead of using the internal record via the <src>accessKey</src>
00318     // functions. Note that the given Record will be copied to the internal
00319     // record, thus overwrites it.
00320     // <group>
00321     Vector<uInt> getRowNumbers();
00322     Vector<uInt> getRowNumbers (const Record& key);
00323     // </group>
00324 
00325     // Find the row numbers matching the key range. The boolean arguments
00326     // tell if the lower and upper key are part of the range.
00327     // The 2nd version makes it possible to pass in your own Records
00328     // instead of using the internal records via the
00329     // <src>accessLower/UpperKey</src> functions.
00330     // Note that the given Records will be copied to the internal
00331     // records, thus overwrite them.
00332     // <group>
00333     Vector<uInt> getRowNumbers (Bool lowerInclusive, Bool upperInclusive);
00334     Vector<uInt> getRowNumbers (const Record& lower, const Record& upper,
00335                                 Bool lowerInclusive, Bool upperInclusive);
00336     // </group>
00337 
00338     // Fill the internal key field from the corresponding external key.
00339     // The data type may differ.
00340     static void copyKeyField (void* field, int dtype, const Record& key);
00341 
00342 protected:
00343     // Copy that object to this.
00344     void copy (const ColumnsIndex& that);
00345 
00346     // Delete all data in the object.
00347     void deleteObjects();
00348 
00349     // Add a column to the record description for the keys.
00350     void addColumnToDesc (RecordDesc& description,
00351                           const TableColumn& column);
00352 
00353     // Create the various members in the object.
00354     void create (const Table& table, const Vector<String>& columnNames,
00355                  Compare* compareFunction, Bool noSort);
00356 
00357     // Make the various internal <src>RecordFieldPtr</src> objects.
00358     void makeObjects (const RecordDesc& description);
00359 
00360     // Read the data of the columns forming the index, sort them and
00361     // form the index.
00362     void readData();
00363 
00364     // Do a binary search on <src>itsUniqueIndex</src> for the key in
00365     // <src>fieldPtrs</src>.
00366     // If the key is found, <src>found</src> is set to True and the index
00367     // in <src>itsUniqueIndex</src> is returned.
00368     // If not found, <src>found</src> is set to False and the index
00369     // of the next higher key is returned.
00370     uInt bsearch (Bool& found, const Block<void*>& fieldPtrs) const;
00371 
00372     // Compare the key in <src>fieldPtrs</src> with the given index entry.
00373     // -1 is returned when less, 0 when equal, 1 when greater.
00374     static Int compare (const Block<void*>& fieldPtrs,
00375                         const Block<void*>& dataPtrs,
00376                         const Block<Int>& dataTypes,
00377                         Int index);
00378 
00379     // Fill the row numbers vector for the given start till end in the
00380     // <src>itsUniqueIndex</src> vector (end is not inclusive).
00381     void fillRowNumbers (Vector<uInt>& rows, uInt start, uInt end) const;
00382 
00383 private:
00384     // Fill the internal key fields from the corresponding external key.
00385     void copyKey (Block<void*> fields, const Record& key);
00386 
00387     // Fill the internal key field from the corresponding external key.
00388     // The data type may differ.
00389     template <typename T>
00390     static void copyKeyField (RecordFieldPtr<T>& field, const Record& key)
00391     {
00392       key.get (field.name(), *field);
00393     }
00394 
00395     Table  itsTable;
00396     uInt   itsNrrow;
00397     Record* itsLowerKeyPtr;
00398     Record* itsUpperKeyPtr;
00399     Block<Int>   itsDataTypes;
00400     Block<void*> itsDataVectors;
00401     Block<void*> itsData;              //# pointer to data in itsDataVectors
00402     //# The following 2 blocks are actually blocks of RecordFieldPtr<T>*.
00403     //# They are used for fast access to the records.
00404     Block<void*> itsLowerFields;
00405     Block<void*> itsUpperFields;
00406     Block<Bool>  itsColumnChanged;
00407     Bool         itsChanged;
00408     Bool         itsNoSort;            //# True = sort is not needed
00409     Compare*     itsCompare;           //# Compare function
00410     Vector<uInt> itsDataIndex;         //# Row numbers of all keys
00411     //# Indices in itsDataIndex for each unique key
00412     Vector<uInt> itsUniqueIndex;
00413     uInt*        itsDataInx;           //# pointer to data in itsDataIndex
00414     uInt*        itsUniqueInx;         //# pointer to data in itsUniqueIndex
00415 };
00416 
00417 
00418 inline Bool ColumnsIndex::isUnique() const
00419 {
00420     return (itsDataIndex.nelements() == itsUniqueIndex.nelements());
00421 }
00422 inline const Table& ColumnsIndex::table() const
00423 {
00424     return itsTable;
00425 }
00426 inline Record& ColumnsIndex::accessKey()
00427 {
00428     return *itsLowerKeyPtr;
00429 }
00430 inline Record& ColumnsIndex::accessLowerKey()
00431 {
00432     return *itsLowerKeyPtr;
00433 }
00434 inline Record& ColumnsIndex::accessUpperKey()
00435 {
00436     return *itsUpperKeyPtr;
00437 }
00438 
00439 
00440 } //# NAMESPACE CASACORE - END
00441 
00442 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

Generated on 31 Aug 2016 for casa by  doxygen 1.6.1