ColumnsIndexArray.h

Go to the documentation of this file.
00001 //# ColumnsIndexArray.h: Index to an array column in a table
00002 //# Copyright (C) 2001,2002
00003 //# Associated Universities, Inc. Washington DC, USA.
00004 //#
00005 //# This library is free software; you can redistribute it and/or modify it
00006 //# under the terms of the GNU Library General Public License as published by
00007 //# the Free Software Foundation; either version 2 of the License, or (at your
00008 //# option) any later version.
00009 //#
00010 //# This library is distributed in the hope that it will be useful, but WITHOUT
00011 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
00012 //# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
00013 //# License for more details.
00014 //#
00015 //# You should have received a copy of the GNU Library General Public License
00016 //# along with this library; if not, write to the Free Software Foundation,
00017 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
00018 //#
00019 //# Correspondence concerning AIPS++ should be addressed as follows:
00020 //#        Internet email: aips2-request@nrao.edu.
00021 //#        Postal address: AIPS++ Project Office
00022 //#                        National Radio Astronomy Observatory
00023 //#                        520 Edgemont Road
00024 //#                        Charlottesville, VA 22903-2475 USA
00025 //#
00026 //# $Id$
00027 
00028 #ifndef TABLES_COLUMNSINDEXARRAY_H
00029 #define TABLES_COLUMNSINDEXARRAY_H
00030 
00031 
00032 //# Includes
00033 #include <casacore/casa/aips.h>
00034 #include <casacore/tables/Tables/Table.h>
00035 #include <casacore/casa/Arrays/Vector.h>
00036 #include <casacore/casa/Containers/Block.h>
00037 #include <casacore/casa/Containers/Record.h>
00038 
00039 namespace casacore { //# NAMESPACE CASACORE - BEGIN
00040 
00041 //# Forward Declarations
00042 class String;
00043 class TableColumn;
00044 
00045 
00046 // <summary>
00047 // Index to an array column in a table.
00048 // </summary>
00049 
00050 // <use visibility=export>
00051 
00052 // <reviewed reviewer="UNKNOWN" date="before2004/08/25" tests="tColumnsIndexArray.cc" demos="">
00053 // </reviewed>
00054 
00055 // <prerequisite>
00056 //   <li> <linkto class=Table>Table</linkto>
00057 //   <li> <linkto class=Record>Record</linkto>
00058 //   <li> <linkto class=RecordFieldPtr>RecordFieldPtr</linkto>
00059 // </prerequisite>
00060 
00061 // <synopsis>
00062 // This class makes it possible to use transient indices on top
00063 // of an array column in a table in order to speed up the process of
00064 // finding rows based on a given key or key range.
00065 // It is similar to class <linkto class=ColumnsIndex>ColumnsIndex</linkto>
00066 // which is meant for one or more scalar columns.
00067 // <p>
00068 // When constructing a <src>ColumnsIndexArray</src> object, one has to define
00069 // which column forms the key for this index on the given
00070 // <src>table</src> object. 
00071 // Not every data type is supported; only uChar, Short, Int, uInt, and
00072 // String array columns are supported.
00073 // The column can contain arrays of any shape and it can also contain
00074 // empty cells. The class will probably mostly be used for vectors, as
00075 // they seem to be the most logical way to hold multiple keys.
00076 // <br>The data in the given column will be read, sorted,
00077 // and stored in memory. When looking up a key or key range, the class
00078 // will use a fast binary search on the data held in memory.
00079 // <p>
00080 // The <src>ColumnsIndexArray</src> object contains a
00081 // <linkto class=Record>Record</linkto> object which can be used
00082 // to define the key to be looked up. The record contains a field for
00083 // the column in the index (with the same name and data type).
00084 // The fastest way to fill the key is by creating a
00085 // <linkto class=RecordFieldPtr>RecordFieldPtr</linkto> object for
00086 // the field in the record (see the example) and fill it as needed.
00087 // However, one can also use the <src>Record::define</src> function,
00088 // but that is slower.
00089 // <br>
00090 // A second record is available to define the upper key
00091 // in case a key range has to be looked up. The keys can be accessed
00092 // using the various <src>accessKey</src> functions.
00093 // <p>
00094 // When a key is defined, the <src>getRowNumbers</src> function can be
00095 // used to find the table rows containing the given key (range).
00096 // Function <src>getRowNumber</src> can be used to lookup a single key
00097 // if all keys in the index are unique (which can be tested with the
00098 // <src>isUnique</src> function).
00099 // <p>
00100 // Instead of using the internal records holding the keys, one can also
00101 // pass its own Record object to <src>getRowNumbers</src>.
00102 // However, it will be slower.
00103 // <p>
00104 // After an index is created, it is possible to change the data
00105 // in the underlying columns. However, the <src>ColumnsIndexArray</src> can
00106 // not detect if the column data have changed. It can only detect if
00107 // the number of rows has changed. If the column data have changed,
00108 // the user has to use the <src>setChanged</src> function to indicate
00109 // that the column has changed.
00110 // <br>If data have changed, the entire index will be recreated by
00111 // rereading and resorting the data. This will be deferred
00112 // until the next key lookup.
00113 // </synopsis>
00114 
00115 // <example>
00116 // Suppose one has table with a column NAME containing vectors.
00117 // <srcblock>
00118 // // Open the table and make an index for the column.
00119 // Table tab("my.tab")
00120 // ColumnsIndexArray colInx(tab, "NAME");
00121 // // Make a RecordFieldPtr for the NAME field in the index key record.
00122 // // Its data type has to match the data type of the column.
00123 // RecordFieldPtr<String> nameFld(colInx.accessKey(), "NAME");
00124 // // Find the row for a given name.
00125 // Bool found;
00126 // // Fill the key field and get the row number.
00127 // // NAME is a unique key, so only one row number matches.
00128 // // Otherwise function getRowNumbers had to be used.
00129 // *nameFld = "MYNAME";
00130 // uInt rownr = colInx.getRowNumber (found);
00131 // if (!found) {
00132 //     cout << "Name MYNAME is unknown" << endl;
00133 // }
00134 // // Now get a range of names and return the row numbers in ascending order.
00135 // // This uses the fact that the 'unique' argument also sorts the data.
00136 // RecordFieldPtr<String> nameUpp(colInx.accessUpperKey(), "NAME");
00137 // *nameFld = "LOWER";
00138 // *nameUpp = "UPPER";
00139 // Vector<uInt> rownrs = colInx.getRowNumbers (True, True, True);
00140 // </srcblock>
00141 
00142 // <motivation>
00143 // Bob Garwood needed such a class.
00144 // </motivation>
00145 
00146 
00147 class ColumnsIndexArray
00148 {
00149 public:
00150   // Create an index on the given table for the given column.
00151   // The column can be a scalar or an array column.
00152   // If <src>noSort==True</src>, the table is already in order of that
00153   // column and the sort step will not be done.
00154   // It only supports String and integer columns.
00155   ColumnsIndexArray (const Table&, const String& columnName);
00156 
00157   // Copy constructor (copy semantics).
00158   ColumnsIndexArray (const ColumnsIndexArray& that);
00159 
00160   ~ColumnsIndexArray();
00161 
00162   // Assignment (copy semantics).
00163   ColumnsIndexArray& operator= (const ColumnsIndexArray& that);
00164 
00165   // Are all keys in the index unique?
00166   Bool isUnique() const;
00167 
00168   // Return the names of the columns forming the index.
00169   const String& columnName() const;
00170 
00171   // Get the table for which this index is created.
00172   const Table& table() const;
00173 
00174   // Something has changed in the table, so the index has to be recreated.
00175   // The 2nd version indicates that a specific column has changed,
00176   // so only that column might need to be reread. If that column is not
00177   // part of the index, nothing will be done.
00178   // <br>Note that the class itself is keeping track if the number of
00179   // rows in the table changes.
00180   // <group>
00181   void setChanged();
00182   void setChanged (const String& columnName);
00183   // </group>
00184 
00185   // Access the key values.
00186   // These functions allow you to create RecordFieldPtr<T> objects
00187   // for each field in the key. In this way you can quickly fill in
00188   // the key.
00189   // <br>The records have a fixed type, so you cannot add or delete fields.
00190   // <br>Note that <src>accessKey</src> and <src>accessLowerKey</src>
00191   // are synonyms; they return the same underlying record.
00192   // <group>
00193   Record& accessKey();
00194   Record& accessLowerKey();
00195   Record& accessUpperKey();
00196   // </group>
00197 
00198   // Find the row number matching the key. All keys have to be unique,
00199   // otherwise an exception is thrown.
00200   // If no match is found, <src>found</src> is set to False.
00201   // The 2nd version makes it possible to pass in your own Record
00202   // instead of using the internal record via the <src>accessKey</src>
00203   // functions. Note that the given Record will be copied to the internal
00204   // record, thus overwrites it.
00205   // <group>
00206   uInt getRowNumber (Bool& found);
00207   uInt getRowNumber (Bool& found, const Record& key);
00208   // </group>
00209 
00210   // Find the row numbers matching the key. It should be used instead
00211   // of <src>getRowNumber</src> if the same key can exist multiple times.
00212   // The 2nd version makes it possible to pass in your own Record
00213   // instead of using the internal record via the <src>accessKey</src>
00214   // functions. Note that the given Record will be copied to the internal
00215   // record, thus overwrites it.
00216   // <br>A row can contain multiple equal values. In such a case the
00217   // same row number can occur multiple times in the output vector,
00218   // unless <src>unique</src> is set to True. Note that making the row
00219   // numbers unique implies a sort, so it can also be used to get the
00220   // row numbers in ascending order.
00221   // <group>
00222   Vector<uInt> getRowNumbers (Bool unique=False);
00223   Vector<uInt> getRowNumbers (const Record& key, Bool unique=False);
00224   // </group>
00225 
00226   // Find the row numbers matching the key range. The boolean arguments
00227   // tell if the lower and upper key are part of the range.
00228   // The 2nd version makes it possible to pass in your own Records
00229   // instead of using the internal records via the
00230   // <src>accessLower/UpperKey</src> functions.
00231   // Note that the given Records will be copied to the internal
00232   // records, thus overwrite them.
00233   // <br>A row can contain multiple matching values. In such a case the
00234   // same row number can occur multiple times in the output vector,
00235   // unless <src>unique</src> is set to True. Note that making the row
00236   // numbers unique implies a sort, so it can also be used to get the
00237   // row numbers in ascending order.
00238   // <group>
00239   Vector<uInt> getRowNumbers (Bool lowerInclusive, Bool upperInclusive,
00240                               Bool unique=False);
00241   Vector<uInt> getRowNumbers (const Record& lower, const Record& upper,
00242                               Bool lowerInclusive, Bool upperInclusive,
00243                               Bool unique=False);
00244   // </group>
00245 
00246 protected:
00247   // Copy that object to this.
00248   void copy (const ColumnsIndexArray& that);
00249 
00250   // Delete all data in the object.
00251   void deleteObjects();
00252 
00253   // Add a column to the record description for the keys.
00254   // If the switch <src>arrayPossible</src> is True, the column can
00255   // be an array. Otherwise it has to be a scalar.
00256   void addColumnToDesc (RecordDesc& description,
00257                         const TableColumn& column);
00258 
00259   // Make the various internal <src>RecordFieldPtr</src> objects.
00260   void makeObjects (const RecordDesc& description);
00261 
00262   // Read the data of the columns forming the index, sort them and
00263   // form the index.
00264   void readData();
00265 
00266   // Do a binary search on <src>itsUniqueIndexArray</src> for the key in
00267   // <src>fieldPtrs</src>.
00268   // If the key is found, <src>found</src> is set to True and the index
00269   // in <src>itsUniqueIndexArray</src> is returned.
00270   // If not found, <src>found</src> is set to False and the index
00271   // of the next higher key is returned.
00272   uInt bsearch (Bool& found, void* fieldPtr) const;
00273 
00274   // Compare the key in <src>fieldPtr</src> with the given index entry.
00275   // -1 is returned when less, 0 when equal, 1 when greater.
00276   static Int compare (void* fieldPtr,
00277                       void* dataPtr,
00278                       Int dataType,
00279                       Int index);
00280 
00281   // Fill the row numbers vector for the given start till end in the
00282   // <src>itsUniqueIndexArray</src> vector (end is not inclusive).
00283   // If <src>unique</src> is True, the row numbers will be made unique.
00284   void fillRowNumbers (Vector<uInt>& rows, uInt start, uInt end,
00285                        Bool unique) const;
00286 
00287   // Get the data if the column is an array.
00288   // <group>
00289   void getArray (Vector<uChar>& result, const String& name);
00290   void getArray (Vector<Short>& result, const String& name);
00291   void getArray (Vector<Int>& result, const String& name);
00292   void getArray (Vector<uInt>& result, const String& name);
00293   void getArray (Vector<String>& result, const String& name);
00294   // </group>
00295 
00296   // Fill the rownrs belonging to each array value.
00297   void fillRownrs (uInt npts, const Block<uInt>& nrel);
00298 
00299 private:
00300   Table  itsTable;
00301   uInt   itsNrrow;
00302   Record* itsLowerKeyPtr;
00303   Record* itsUpperKeyPtr;
00304   Int     itsDataType;
00305   void*   itsDataVector;
00306   void*   itsData;              //# pointer to data in itsDataVector
00307   //# The following 2 blocks are actually blocks of RecordFieldPtr<T>*.
00308   //# They are used for fast access to the records.
00309   void*   itsLowerField;
00310   void*   itsUpperField;
00311   Bool         itsChanged;
00312   Vector<uInt> itsDataIndex;         //# Row numbers of all keys
00313   //# Indices in itsDataIndex for each unique key
00314   Vector<uInt> itsUniqueIndex;
00315   Block<uInt>  itsRownrs;            //# rownr for each value
00316   uInt*        itsDataInx;           //# pointer to data in itsDataIndex
00317   uInt*        itsUniqueInx;         //# pointer to data in itsUniqueIndex
00318 };
00319 
00320 
00321 inline Bool ColumnsIndexArray::isUnique() const
00322 {
00323     return (itsDataIndex.nelements() == itsUniqueIndex.nelements());
00324 }
00325 inline const Table& ColumnsIndexArray::table() const
00326 {
00327     return itsTable;
00328 }
00329 inline Record& ColumnsIndexArray::accessKey()
00330 {
00331     return *itsLowerKeyPtr;
00332 }
00333 inline Record& ColumnsIndexArray::accessLowerKey()
00334 {
00335     return *itsLowerKeyPtr;
00336 }
00337 inline Record& ColumnsIndexArray::accessUpperKey()
00338 {
00339     return *itsUpperKeyPtr;
00340 }
00341 
00342 
00343 
00344 } //# NAMESPACE CASACORE - END
00345 
00346 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

Generated on 31 Aug 2016 for casa by  doxygen 1.6.1