Tables.h

Go to the documentation of this file.
00001 //# Tables.h: The Tables module - Casacore data storage
00002 //# Copyright (C) 1994-2010
00003 //# Associated Universities, Inc. Washington DC, USA.
00004 //#
00005 //# This library is free software; you can redistribute it and/or modify it
00006 //# under the terms of the GNU Library General Public License as published by
00007 //# the Free Software Foundation; either version 2 of the License, or (at your
00008 //# option) any later version.
00009 //#
00010 //# This library is distributed in the hope that it will be useful, but WITHOUT
00011 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
00012 //# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
00013 //# License for more details.
00014 //#
00015 //# You should have received a copy of the GNU Library General Public License
00016 //# along with this library; if not, write to the Free Software Foundation,
00017 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
00018 //#
00019 //# Correspondence concerning AIPS++ should be addressed as follows:
00020 //#        Internet email: aips2-request@nrao.edu.
00021 //#        Postal address: AIPS++ Project Office
00022 //#                        National Radio Astronomy Observatory
00023 //#                        520 Edgemont Road
00024 //#                        Charlottesville, VA 22903-2475 USA
00025 //#
00026 //# $Id$
00027 
00028 #ifndef TABLES_TABLES_H
00029 #define TABLES_TABLES_H
00030 
00031 //# Includes
00032 //#   table description
00033 #include <casacore/casa/aips.h>
00034 #include <casacore/tables/Tables/TableDesc.h>
00035 #include <casacore/tables/Tables/ColumnDesc.h>
00036 #include <casacore/tables/Tables/ScaColDesc.h>
00037 #include <casacore/tables/Tables/ArrColDesc.h>
00038 #include <casacore/tables/Tables/ScaRecordColDesc.h>
00039 
00040 //#   table access
00041 #include <casacore/tables/Tables/Table.h>
00042 #include <casacore/tables/Tables/TableLock.h>
00043 #include <casacore/tables/Tables/SetupNewTab.h>
00044 #include <casacore/tables/Tables/ScalarColumn.h>
00045 #include <casacore/tables/Tables/ArrayColumn.h>
00046 #include <casacore/tables/Tables/TableRow.h>
00047 #include <casacore/tables/Tables/TableCopy.h>
00048 #include <casacore/casa/Arrays/Array.h>
00049 #include <casacore/casa/Arrays/Slicer.h>
00050 #include <casacore/casa/Arrays/Slice.h>
00051 
00052 //#   keywords
00053 #include <casacore/tables/Tables/TableRecord.h>
00054 #include <casacore/casa/Containers/RecordField.h>
00055 
00056 //#   table lookup
00057 #include <casacore/tables/Tables/ColumnsIndex.h>
00058 #include <casacore/tables/Tables/ColumnsIndexArray.h>
00059 
00060 //#   table vectors
00061 #include <casacore/tables/Tables/TableVector.h>
00062 #include <casacore/tables/Tables/TabVecMath.h>
00063 #include <casacore/tables/Tables/TabVecLogic.h>
00064 
00065 //#   data managers
00066 #include <casacore/tables/DataMan.h>
00067 
00068 //#   table expressions (for selection of rows)
00069 #include <casacore/tables/TaQL.h>
00070 
00071 
00072 namespace casacore { //# NAMESPACE CASACORE - BEGIN
00073 
00074 // <module>
00075 
00076 // <summary>
00077 // Tables are the data storage mechanism for Casacore
00078 // </summary>
00079 
00080 // <use visibility=export>
00081 
00082 // <reviewed reviewer="jhorstko" date="1994/08/30" tests="" demos="">
00083 // </reviewed>
00084 
00085 // <prerequisite>
00086 //    <li> <linkto class="Record:description">Record</linkto> class
00087 // </prerequisite>
00088 
00089 // <etymology>
00090 // "Table" is a formal term from relational database theory: 
00091 //   <em> "The organizing principle in a relational database is the TABLE,
00092 //    a rectangular, row/column arrangement of data values."</em>
00093 // Casacore tables are extensions to traditional tables, but are similar
00094 // enough that we use the same name.  There is also a strong resemblance
00095 // between the uses of Casacore tables, and FITS binary tables, which
00096 // provides another reason to use "Tables" to describe the Casacore data
00097 // storage mechanism.
00098 // </etymology>
00099 
00100 // <synopsis> 
00101 // Tables are the fundamental storage mechanism for Casacore. This document
00102 // explains <A HREF="#Tables:motivation">why</A> they had to be made,
00103 // <A HREF="#Tables:properties">what</A> their properties are, and 
00104 // <A HREF="#Tables:open">how</A> to use them. The last subject is
00105 // discussed and illustrated in a sequence of sections:
00106 // <UL>
00107 //  <LI> <A HREF="#Tables:open">opening</A> an existing table,
00108 //  <LI> <A HREF="#Tables:read">reading</A> from a table,
00109 //  <LI> <A HREF="#Tables:creation">creating</A> a new table,
00110 //  <LI> <A HREF="#Tables:write">writing</A> into a table,
00111 //  <LI> <A HREF="#Tables:row-access">accessing rows</A> in a table,
00112 //  <LI> <A HREF="#Tables:select and sort">selection and sorting</A>
00113 //       (see also <A HREF="../notes/199.html">Table Query Language</A>),
00114 //  <LI> <A HREF="#Tables:concatenation">concatenating similar tables</A>
00115 //  <LI> <A HREF="#Tables:iterate">iterating</A> through a table,
00116 //  <LI> <A HREF="#Tables:LockSync">locking/synchronization</A>
00117 //       for concurrent access,
00118 //  <LI> <A HREF="#Tables:KeyLookup">indexing</A> a table for faster lookup,
00119 //  <LI> <A HREF="#Tables:vectors">vector operations</A> on a column.
00120 //  <LI> <A HREF="#Tables:performance">performance and robustness</A>
00121 //       considerations with some information on
00122 //       <A HREF="#Tables:iotracing">IO tracing</A>.
00123 // </UL>
00124 // A few <A HREF="Tables:applications">applications</A> exist to inspect
00125 // and manipulate a table.
00126 
00127 
00128 // <ANCHOR NAME="Tables:motivation">
00129 // <motivation></ANCHOR>
00130 //
00131 // The Casacore tables are mainly based upon the ideas of Allen Farris,
00132 // as laid out in the
00133 // <A HREF="http://aips2.cv.nrao.edu/aips++/docs/reference/Database.ps.gz">
00134 // AIPS++ Database document</A>, from where the following paragraph is taken:
00135 // 
00136 // <p>
00137 // Traditional relational database tables have two features that
00138 // decisively limit their applicability to scientific data.  First, an item of
00139 // data in a column of a table must be atomic -- it must have no internal
00140 // structure.  A consequence of this restriction is that relational
00141 // databases are unable to deal with arrays of data items.  Second, an
00142 // item of data in a column of a table must not have any direct or
00143 // implied linkages to other items of data or data aggregates.  This
00144 // restriction makes it difficult to model complex relationships between
00145 // collections of data.  While these restrictions may make it easy to
00146 // define a mathematically complete set of data manipulation operations,
00147 // they are simply intolerable in a scientific data-handling context.
00148 // Multi-dimensional arrays are frequently the most natural modes in
00149 // which to discuss and think about scientific data.  In addition,
00150 // scientific data often requires complex calibration operations that
00151 // must draw on large bodies of data about equipment and its performance
00152 // in various states.  The restrictions imposed by the relational model
00153 // make it very difficult to deal with complex problems of this nature.
00154 // <p>
00155 // 
00156 // In response to these limitations, and other needs, the Casacore tables were
00157 // designed.
00158 // </motivation>
00159 
00160 // <ANCHOR NAME="Tables:properties">
00161 // <h3>Table Properties</h3></ANCHOR>
00162 //
00163 // Casacore tables have the following properties:
00164 // <ul>
00165 //  <li> A table consists of a number of rows and columns.
00166 //       <A HREF="#Tables:keywords">Keyword/value pairs</A> may be defined
00167 //       for the table as a whole and for individual columns. A keyword/value
00168 //       pair for a column could, for instance, define its unit.
00169 //  <li> Each table has a <A HREF="#Tables:Table Description">description</A>
00170 //       which specifies the number and type of columns, and maybe initial
00171 //       keyword sets and default values for the columns. 
00172 //  <li> A cell in a column may contain
00173 //       <UL>
00174 //        <LI> a scalar;
00175 //        <LI> a "direct" array -- which must have the same shape in all
00176 //             cells of a column, is usually small, and is stored in the
00177 //             table itself;
00178 //        <LI> an "indirect" array -- which may have different shapes in
00179 //             different cells of the same column, is arbitrarily large,
00180 //             and is stored in a separate file; or
00181 //       </UL>
00182 //  <li> A column may be
00183 //       <UL>
00184 //        <LI> "filled" -- containing actual data, or
00185 //        <LI> "virtual" -- containing a recipe telling how the data will
00186 //             be generated dynamically
00187 //       </UL>
00188 //  <li> Only the standard Casacore data types can be used in filled
00189 //       columns, be they scalars or arrays:  Bool, uChar, Short, uShort,
00190 //       Int, uInt, float, double, Complex, DComplex and String.
00191 //       Furthermore scalars containing
00192 //       <linkto class=TableRecord>record</linkto> values are possible
00193 //  <li> A column can have a default value, which will automatically be stored
00194 //       in a cell of the column, when a row is added to the table.
00195 //  <li> <A HREF="#Tables:Data Managers">Data managers</A> handle the
00196 //       reading, writing and generation of data. Each column in a table can
00197 //       be assigned its own data manager, which allows for optimization of
00198 //       the data storage per column. The choice of data manager determines
00199 //       whether a column is filled or virtual.
00200 //  <li> Table data are stored in a canonical format, so they can be read
00201 //       on any machine. To avoid needless swapping of bytes, the data can
00202 //       be stored in big endian (as used on e.g. SUN) or little endian
00203 //       (as used on Intel PC-s) canonical format. 
00204 //       By default it uses the format specified in the aipsrc variable
00205 //       <code>table.endianformat</code> which defaults to
00206 //       <code>Table::LocalEndian</code> (thus the endian format of the
00207 //       machine being used).
00208 //  <li> The SQL-like
00209 //       <a href="../notes/199.html">Table Query Language</a> (TaQL)
00210 //       can be used to do operations on tables like
00211 //       select, sort, update, insert, delete, and create.
00212 // </ul>
00213 //
00214 // Tables can be in one of three forms:
00215 // <ul>
00216 // <li> A plain table is a table stored on disk.
00217 //      It can be shared by multiple processes.
00218 // <li> A memory table is a table held in memory.
00219 //      It is a process specific table, thus not sharable.
00220 //      The <linkto class=Table>Table::copy</linkto> function can be used
00221 //      to turn a memory table into a plain table.
00222 // <li> A reference table is a table referencing a plain or memory table.
00223 //      It is the result of a selection or sort on another table.
00224 //      A reference table references the data in the other table, thus
00225 //      changing data in a reference table means that the data in the
00226 //      original table are changed.
00227 //      The <linkto class=Table>Table::deepCopy</linkto> function can be
00228 //      used to turn a reference table into a plain table.
00229 // </ul>
00230 // Concurrent access from different processes to the same plain table is
00231 // fully supported by means of a <A HREF="#Tables:LockSync">
00232 // locking/synchronization</A> mechanism. Concurrent access over NFS is also
00233 // supported.
00234 // <p>
00235 // A (somewhat primitive) mechanism is available to do a
00236 // <A HREF="#Tables:KeyLookup">table lookup</A> based on the contents
00237 // of a key. In the future this might be replaced by a proper B+-tree index
00238 // mechanism.
00239 
00240 // <ANCHOR NAME="Tables:open">
00241 // <h3>Opening an Existing Table</h3></ANCHOR>
00242 //
00243 // To open an existing table you just create a
00244 // <linkto class="Table:description">Table</linkto> object giving
00245 // the name of the table, like:
00246 //
00247 // <srcblock>
00248 //     Table readonly_table ("tableName");
00249 //     // or
00250 //     Table read_and_write_table ("tableName", Table::Update);
00251 // </srcblock>
00252 //
00253 // The constructor option determines whether the table will be opened as
00254 // readonly or as read/write. A readonly table file must be opened 
00255 // as readonly, otherwise an exception is thrown. The functions
00256 // <linkto class="Table">Table::isWritable(...)</linkto>
00257 // can be used to determine if a table is writable.
00258 //
00259 // When the table is opened, the data managers are reinstantiated
00260 // according to their definition at table creation.
00261 
00262 // <ANCHOR NAME="Tables:read">
00263 // <h3>Reading from a Table</h3></ANCHOR>
00264 //
00265 // You can read data from a table column with the "get" functions
00266 // in the classes
00267 // <linkto class="ScalarColumn:description">ScalarColumn&lt;T&gt;</linkto>
00268 // and
00269 // <linkto class="ArrayColumn:description">ArrayColumn&lt;T&gt;</linkto>.
00270 // For scalars of a standard data type (i.e. Bool, uChar, Int, Short,
00271 // uShort, uInt, float, double, Complex, DComplex and String) you could
00272 // instead use 
00273 // <linkto class="TableColumn">TableColumn::getScalar(...)</linkto> or
00274 // <linkto class="TableColumn">TableColumn::asXXX(...)</linkto>.
00275 // These functions offer an extra: they do automatic data type promotion;
00276 // so that you can, for example, get a double value from a float column.
00277 //
00278 // These "get" functions are used in the same way as the simple "put"
00279 // functions described in the previous section.
00280 // <p>
00281 // <linkto class="ScalarColumn:description">ScalarColumn&lt;T&gt;</linkto>
00282 // can be constructed for a non-writable column. However, an exception
00283 // is thrown if the put function is used for it.
00284 // The same is true for
00285 // <linkto class="ArrayColumn:description">ArrayColumn&lt;T&gt;</linkto> and
00286 // <linkto class="TableColumn:description">TableColumn</linkto>.
00287 // <p>
00288 // A typical program could look like:
00289 // <srcblock>
00290 // #include <casacore/tables/Tables/Table.h>
00291 // #include <casacore/tables/Tables/ScalarColumn.h>
00292 // #include <casacore/tables/Tables/ArrayColumn.h>
00293 // #include <casacore/casa/Arrays/Vector.h>
00294 // #include <casacore/casa/Arrays/Slicer.h>
00295 // #include <casacore/casa/Arrays/ArrayMath.h>
00296 // #include <iostream>
00297 // 
00298 // main()
00299 // {
00300 //     // Open the table (readonly).
00301 //     Table tab ("some.name");
00302 //
00303 //     // Construct the various column objects.
00304 //     // Their data type has to match the data type in the table description.
00305 //     ScalarColumn<Int> acCol (tab, "ac");
00306 //     ArrayColumn<Float> arr2Col (tab, "arr2");
00307 //
00308 //     // Loop through all rows in the table.
00309 //     uInt nrrow = tab.nrow();
00310 //     for (uInt i=0; i<nrow; i++) {
00311 //         // Read the row for both columns.
00312 //         cout << "Column ac in row i = " << acCol(i) << endl;
00313 //         Array<Float> array = arr2Col.get (i);
00314 //     }
00315 //
00316 //     // Show the entire column ac,
00317 //     // and show the 10th element of arr2 in each row..
00318 //     cout << ac.getColumn();
00319 //     cout << arr2.getColumn (Slicer(Slice(10)));
00320 // }
00321 // </srcblock>
00322 
00323 // <ANCHOR NAME="Tables:creation">
00324 // <h3>Creating a Table</h3></ANCHOR>
00325 //
00326 // The creation of a table is a multi-step process:
00327 // <ol>
00328 //  <li>
00329 //   Create a <A HREF="#Tables:Table Description">table description</A>.
00330 //  <li>
00331 //   Create a <linkto class="SetupNewTable:description">SetupNewTable</linkto>
00332 //   object with the name of the new table.
00333 //  <li>
00334 //   Create the necessary <A HREF="#Tables:Data Managers">data managers</A>.
00335 //  <li>
00336 //   Bind each column to the appropriate data manager.
00337 //   The system will bind unbound columns to data managers which
00338 //   are created internally using the default data manager name
00339 //   defined in the column description.
00340 //  <li>
00341 //   Define the shape of direct columns (if that was not already done in the
00342 //   column description).
00343 //  <li>
00344 //   Create the <linkto class="Table:description">Table</linkto>
00345 //   object from the SetupNewTable object. Here, a final check is performed
00346 //   and the necessary files are created.
00347 // </ol>
00348 // The recipe above is meant for the creation a plain table, but the
00349 // creation of a memory table is exactly the same. The only difference
00350 // is that in call to construct the Table object the Table::Memory
00351 // type has to be given. Note that in the SetupNewTable object the columns
00352 // can be bound to any data manager. <src>MemoryTable</src> will rebind 
00353 // stored columns to the <linkto class=MemoryStMan>MemoryStMan</linkto>
00354 // storage manager, but virtual columns bindings are not changed.
00355 
00356 //
00357 // The following example shows how you can create a table. An example
00358 // specifically illustrating the creation of the
00359 // <A HREF="#Tables:Table Description">table description</A> is given
00360 // in that section. Other sections discuss the access to the table.
00361 //
00362 // <srcblock>
00363 // #include <casacore/tables/Tables/TableDesc.h>
00364 // #include <casacore/tables/Tables/SetupNewTab.h>
00365 // #include <casacore/tables/Tables/Table.h>
00366 // #include <casacore/tables/Tables/ScaColDesc.h>
00367 // #include <casacore/tables/Tables/ScaRecordColDesc.h>
00368 // #include <casacore/tables/Tables/ArrColDesc.h>
00369 // #include <casacore/tables/Tables/StandardStMan.h>
00370 // #include <casacore/tables/Tables/IncrementalStMan.h>
00371 // 
00372 // main()
00373 // {
00374 //     // Step1 -- Build the table description.
00375 //     TableDesc td("tTableDesc", "1", TableDesc::Scratch);
00376 //     td.comment() = "A test of class SetupNewTable";
00377 //     td.addColumn (ScalarColumnDesc<Int> ("ab" ,"Comment for column ab"));
00378 //     td.addColumn (ScalarColumnDesc<Int> ("ac"));
00379 //     td.addColumn (ScalarColumnDesc<uInt> ("ad","comment for ad"));
00380 //     td.addColumn (ScalarColumnDesc<Float> ("ae"));
00381 //     td.addColumn (ScalarRecordColumnDesc ("arec"));
00382 //     td.addColumn (ArrayColumnDesc<Float> ("arr1",3,ColumnDesc::Direct));
00383 //     td.addColumn (ArrayColumnDesc<Float> ("arr2",0));
00384 //     td.addColumn (ArrayColumnDesc<Float> ("arr3",0,ColumnDesc::Direct));
00385 // 
00386 //     // Step 2 -- Setup a new table from the description.
00387 //     SetupNewTable newtab("newtab.data", td, Table::New);
00388 //
00389 //     // Step 3 -- Create storage managers for it.
00390 //     StandardStMan stmanStand_1;
00391 //     StandardStMan stmanStand_2;
00392 //     IncrementalStMan stmanIncr;
00393 // 
00394 //     // Step 4 -- First, bind all columns to the first storage
00395 //     // manager. Then, bind a few columns to another storage manager
00396 //     // (which will overwrite the previous bindings).
00397 //     newtab.bindAll (stmanStand_1);
00398 //     newtab.bindColumn ("ab", stmanStand_2);
00399 //     newtab.bindColumn ("ae", stmanIncr);
00400 //     newtab.bindColumn ("arr3", stmanIncr);
00401 // 
00402 //     // Step 5 -- Define the shape of the direct columns.
00403 //     // (this could have been done in the column description).
00404 //     newtab.setShapeColumn( "arr1", IPosition(3,2,3,4));
00405 //     newtab.setShapeColumn( "arr3", IPosition(3,3,4,5));
00406 // 
00407 //     // Step 6 -- Finally, create the table consisting of 10 rows.
00408 //     Table tab(newtab, 10);
00409 // 
00410 //     // Now we can fill the table, which is shown in a next section.
00411 //     // The Table destructor will flush the table to the files.
00412 // }
00413 // </srcblock>
00414 // To create a table in memory, only step 6 has to be modified slightly to:
00415 // <srcblock>
00416 //     Table tab(newtab, Table::Memory, 10);
00417 // </srcblock>
00418 
00419 // <ANCHOR NAME="Tables:write">
00420 // <h3>Writing into a Table</h3></ANCHOR>
00421 //
00422 // Once a table has been created or has been opened for read/write,
00423 // you want to write data into it. Before doing that you may have
00424 // to add one or more rows to the table.
00425 // <note role=tip> If a table was created with a given number of rows, you
00426 // do not need to add rows; you may not even be able to do so.
00427 // </note>
00428 //
00429 // When adding new rows to the table, either via the
00430 // <linkto class="Table">Table(...) constructor</linkto>
00431 // or via the
00432 // <linkto class="Table">Table::addRow(...)</linkto>
00433 // function, you can choose to have those rows initialized with the
00434 // default values given in the description.
00435 //
00436 // To actually write the data into the table you need the classes
00437 // <linkto class="ScalarColumn:description">ScalarColumn&lt;T&gt;</linkto> and
00438 // <linkto class="ArrayColumn:description">ArrayColumn&lt;T&gt;</linkto>.
00439 // For each column you can construct one or
00440 // more of these objects. Their put(...) functions
00441 // let you write a value at a time or the entire column in one go.
00442 // For arrays you can "put" subsections of the arrays.
00443 //
00444 // As an alternative for scalars of a standard data type (i.e. Bool,
00445 // uChar, Int, Short, uShort, uInt, float, double, Complex, DComplex
00446 // and String) you could use the functions
00447 // <linkto class="TableColumn">TableColumn::putScalar(...)</linkto>.
00448 // These functions offer an extra: automatic data type promotion; so that
00449 // you can, for example, put a float value in a double column.
00450 //
00451 // A typical program could look like:
00452 // <srcblock>
00453 // #include <casacore/tables/Tables/TableDesc.h>
00454 // #include <casacore/tables/Tables/SetupNewTab.h>
00455 // #include <casacore/tables/Tables/Table.h>
00456 // #include <casacore/tables/Tables/ScaColDesc.h>
00457 // #include <casacore/tables/Tables/ArrColDesc.h>
00458 // #include <casacore/tables/Tables/ScalarColumn.h>
00459 // #include <casacore/tables/Tables/ArrayColumn.h>
00460 // #include <casacore/casa/Arrays/Vector.h>
00461 // #include <casacore/casa/Arrays/Slicer.h>
00462 // #include <casacore/casa/Arrays/ArrayMath.h>
00463 // #include <iostream>
00464 // 
00465 // main()
00466 // {
00467 //     // First build the table description.
00468 //     TableDesc td("tTableDesc", "1", TableDesc::Scratch);
00469 //     td.comment() = "A test of class SetupNewTable";
00470 //     td.addColumn (ScalarColumnDesc<Int> ("ac"));
00471 //     td.addColumn (ArrayColumnDesc<Float> ("arr2",0));
00472 // 
00473 //     // Setup a new table from the description,
00474 //     // and create the (still empty) table.
00475 //     // Note that since we do not explicitly bind columns to
00476 //     // data managers, all columns will be bound to the default
00477 //     // standard storage manager StandardStMan.
00478 //     SetupNewTable newtab("newtab.data", td, Table::New);
00479 //     Table tab(newtab);
00480 //
00481 //     // Construct the various column objects.
00482 //     // Their data type has to match the data type in the description.
00483 //     ScalarColumn<Int> ac (tab, "ac");
00484 //     ArrayColumn<Float> arr2 (tab, "arr2");
00485 //     Vector<Float> vec2(100);
00486 //
00487 //     // Write the data into the columns.
00488 //     // In each cell arr2 will be a vector of length 100.
00489 //     // Since its shape is not set explicitly, it is done implicitly.
00490 //     for (uInt i=0; i<10; i++) {
00491 //         tab.addRow();               // First add a row.
00492 //         ac.put (i, i+10);           // value is i+10 in row i
00493 //         indgen (vec2, float(i+20)); // vec2 gets i+20, i+21, ..., i+119
00494 //         arr2.put (i, vec2); 
00495 //     }
00496 //
00497 //     // Finally, show the entire column ac,
00498 //     // and show the 10th element of arr2.
00499 //     cout << ac.getColumn();
00500 //     cout << arr2.getColumn (Slicer(Slice(10)));
00501 //
00502 //     // The Table destructor writes the table.
00503 // }
00504 // </srcblock>
00505 //
00506 // In this example we added rows in the for loop, but we could also have
00507 // created 10 rows straightaway by constructing the Table object as:
00508 // <srcblock>
00509 //     Table tab(newtab, 10);
00510 // </srcblock>
00511 // in which case we would not include
00512 // <srcblock>
00513 //     tab.addRow()
00514 // </srcblock>
00515 //
00516 // The classes 
00517 // <linkto class="TableColumn:description">TableColumn</linkto>,
00518 // <linkto class="ScalarColumn:description">ScalarColumn&lt;T&gt;</linkto>, and
00519 // <linkto class="ArrayColumn:description">ArrayColumn&lt;T&gt;</linkto>
00520 // contain several functions to put values into a single cell or into the
00521 // whole column. This may look confusing, but is actually quite simple.
00522 // The functions can be divided in two groups:
00523 // <ol>
00524 //  <li>
00525 //   Put the given value into the column cell(s).
00526 //   <ul>
00527 //    <li>
00528 //     The simplest put functions,
00529 //     <linkto class="ScalarColumn">ScalarColumn::put(...)</linkto> and
00530 //     <linkto class="ArrayColumn">ArrayColumn::put(...)</linkto>,
00531 //     put a value into the given column cell. For convenience, there is an
00532 //     <linkto class="ArrayColumn">ArrayColumn::putSlice(...)</linkto>
00533 //     to put only a part of the array.
00534 //    <li>
00535 //     <linkto class="ScalarColumn">ScalarColumn::fillColumn(...)</linkto> and
00536 //     <linkto class="ArrayColumn">ArrayColumn::fillColumn(...)</linkto>
00537 //     fill an entire column by putting the given value into all the cells
00538 //     of the column.
00539 //    <li>
00540 //     The simplest putColumn functions,
00541 //     <linkto class="ScalarColumn">ScalarColumn::putColumn(...)</linkto> and
00542 //     <linkto class="ArrayColumn">ArrayColumn::putColumn(...)</linkto>,
00543 //     put an array of values into the column. There is a special
00544 //     <linkto class="ArrayColumn">ArrayColumn::putColumn(...)</linkto>
00545 //     version which puts only a part of the arrays.
00546 //   </ul>
00547 //
00548 //  <li>
00549 //   Copy values from another column to this column.<BR>
00550 //   These functions have the advantage that the
00551 //   data type of the input and/or output column can be unknown.
00552 //   The generic TableColumn objects can be used for this purpose.
00553 //   The put(Column) function checks the data types and, if possible,
00554 //   converts them. If the conversion is not possible, it throws an
00555 //   exception.
00556 //   <ul>
00557 //    <li>
00558 //     The put functions copy the value in a cell of the input column
00559 //     to a cell in the output column. The row numbers of the cells
00560 //     in the columns can be different.
00561 //    <li>
00562 //     The putColumn functions copy the entire contents of the input column
00563 //     to the output column. The lengths of the columns must be equal.
00564 //   </ul>
00565 //   Each class has its own set of these functions.
00566 //   <ul>
00567 //    <li>
00568 //     <linkto class="TableColumn">TableColumn::put(...)</linkto> and
00569 //     <linkto class="TableColumn">TableColumn::putColumn(...)</linkto> and
00570 //     are the most generic. They can be
00571 //     used if the data types of both input and output column are unknown.
00572 //     Note that these functions are virtual.
00573 //    <li>
00574 //     <linkto class="ScalarColumn">ScalarColumn::put(...)</linkto>,
00575 //     <linkto class="ArrayColumn">ArrayColumn::put(...)</linkto>,
00576 //     <linkto class="ScalarColumn">ScalarColumn::putColumn(...)</linkto>, and
00577 //     <linkto class="ArrayColumn">ArrayColumn::putColumn(...)</linkto>
00578 //     are less generic and therefore potentially more efficient.
00579 //     The most efficient variants are the ones taking a
00580 //     Scalar/ArrayColumn&lt;T&gt;, because they require no data type
00581 //     conversion.
00582 //   </ul>
00583 // </ol>
00584 
00585 // <ANCHOR NAME="Tables:row-access">
00586 // <h3>Accessing rows in a Table</h3></ANCHOR>
00587 //
00588 // Apart from accessing a table column-wise as described in the
00589 // previous two sections, it is also possible to access a table row-wise.
00590 // The <linkto class=TableRow>TableRow</linkto> class makes it possible
00591 // to access multiple fields in a table row as a whole. Note that like the
00592 // XXColumn classes described above, there is also an ROTableRow class
00593 // for access to readonly tables.
00594 // <p>
00595 // On construction of a TableRow object it has to be specified which
00596 // fields (i.e. columns) are part of the row. For these fields a
00597 // fixed structured <linkto class=TableRecord>TableRecord</linkto>
00598 // object is constructed as part of the TableRow object. The TableRow::get
00599 // function will fill this record with the table data for the given row.
00600 // The user has access to the record and can use
00601 // <linkto class=RecordFieldPtr>RecordFieldPtr</linkto> objects for
00602 // speedier access to the record.
00603 // <p>
00604 // The class could be used as shown in the following example.
00605 // <srcblock>
00606 // // Open the table as readonly and define a row object to contain
00607 // // the given columns.
00608 // // Note that the function stringToVector is a very convenient
00609 // // way to construct a Vector<String>.
00610 // // Show the description of the fields in the row.
00611 // Table table("Some.table");
00612 // ROTableRow row (table, stringToVector("col1,col2,col3"));
00613 // cout << row.record().description();
00614 // // Since the structure of the record is known, the RecordFieldPtr
00615 // // objects could be used to allow for easy and fast access to
00616 // // the record which is refilled for each get.
00617 // RORecordFieldPtr<String> col1(row.record(), "col1");
00618 // RORecordFieldPtr<Double> col2(row.record(), "col2");
00619 // RORecordFieldPtr<Array<Int> > col3(row.record(), "col3");
00620 // for (uInt i=0; i<table.nrow(); i++) {
00621 //     row.get (i);
00622 //     someString = *col1;
00623 //     somedouble = *col2;
00624 //     someArrayInt = *col3;
00625 // }
00626 // </srcblock>
00627 // The description of TableRow contains some more extensive examples.
00628 
00629 // <ANCHOR NAME="Tables:select and sort">
00630 // <h3>Table Selection and Sorting</h3></ANCHOR>
00631 //
00632 // The result of a select and sort of a table is another table,
00633 // which references the original table. This means that an update
00634 // of a sorted or selected table results in the update of the original
00635 // table. The result is, however, a table in itself, so all table
00636 // functions (including select and sort) can be used with it.
00637 // Note that a true copy of such a reference table can be made with
00638 // the <linkto class=Table>Table::deepCopy</linkto> function.
00639 // <p>
00640 // Rows or columns can be selected from a table. Columns can be selected
00641 // by the
00642 // <linkto class="Table">Table::project(...)</linkto>
00643 // function, while rows can be selected by the various
00644 // <linkto class="Table">Table operator()</linkto> functions.
00645 // Usually a row is selected by giving a select expression with
00646 // <linkto class="TableExprNode:description">TableExprNode</linkto>
00647 // objects. These objects represent the various nodes
00648 // in an expression, e.g. a constant, a column, or a subexpression.
00649 // The Table function
00650 // <linkto class="Table">Table::col(...)</linkto>
00651 // creates a TableExprNode object for a column. The function
00652 // <linkto class="Table">Table::key(...)</linkto>
00653 // does the same for a keyword by reading
00654 // the keyword value and storing it as a constant in an expression node.
00655 // All column nodes in an expression must belong to the same table,
00656 // otherwise an exception is thrown.
00657 // In the following example we select all rows with RA>10:
00658 // <srcblock>
00659 //    #include <casacore/tables/Tables/ExprNode.h>
00660 //    Table table ("Table.name");
00661 //    Table result = table (table.col("RA") > 10);
00662 // </srcblock>
00663 // while in the next one we select rows with RA and DEC in the given 
00664 // intervals:
00665 // <srcblock>
00666 //    Table result = table (table.col("RA") > 10
00667 //                       && table.col("RA") < 14
00668 //                       && table.col("DEC") >= -10
00669 //                       && table.col("DEC") <= 10);
00670 // </srcblock>
00671 // The following operators can be used to form arbitrarily
00672 // complex expressions:
00673 // <ul>
00674 //  <li> Relational operators ==, !=, >, >=, < and <=.
00675 //  <li> Logical operators &&, || and !.
00676 //  <li> Arithmetic operators +, -, *, /, %, and unary + and -.
00677 //  <li> Bit operators ^, &, |, and unary ~.
00678 //  <li> Operator() to take a subsection of an array.
00679 // </ul>
00680 // Many functions (like sin, max, conj) can be used in an expression.
00681 // Class <linkto class=TableExprNode>TableExprNode</linkto> shows
00682 // the available functions.
00683 // E.g.
00684 // <srcblock>
00685 //    Table result = table (sin (table.col("RA")) > 0.5);
00686 // </srcblock>
00687 // Function <src>in</src> can be used to select from a set of values.
00688 // A value set can be constructed using class
00689 // <linkto class=TableExprNodeSet>TableExprNodeSet</linkto>.
00690 // <srcblock>
00691 //    TableExprNodeSet set;
00692 //    set.add (TableExprNodeSetElem ("abc"));
00693 //    set.add (TableExprNodeSetElem ("defg"));
00694 //    set.add (TableExprNodeSetElem ("h"));
00695 //    Table result = table (table.col("NAME).in (set));
00696 // </srcblock>
00697 // select rows with a NAME equal to <src>abc</src>,
00698 // <src>defg</src>, or <src>h</src>.
00699 //
00700 // <p>
00701 // You can sort a table on one or more columns containing scalars.
00702 // In this example we simply sort on column RA (default is ascending):
00703 // <srcblock>
00704 //    Table table ("Table.name");
00705 //    Table result = table.sort ("RA");
00706 // </srcblock>
00707 // Multiple
00708 // <linkto class="Table">Table::sort(...)</linkto>
00709 // functions exist which allow for more flexible control over the sort order.
00710 // In the next example we sort first on RA in descending order
00711 // and then on DEC in ascending order:
00712 // <srcblock>
00713 //    Table table ("Table.name");
00714 //    Block<String> sortKeys(2);
00715 //    Block<int>    sortOrders(2);
00716 //    sortKeys(0)   = "RA";
00717 //    sortOrders(0) = Sort::Descending;
00718 //    sortKeys(1)   = "DEC";
00719 //    sortOrders(1) = Sort::Ascending;
00720 //    Table result = table.sort (sortKeys, sortOrders);
00721 // </srcblock>
00722 //
00723 // Tables stemming from the same root, can be combined in several
00724 // ways with the help of the various logical
00725 // <linkto class="Table">Table operators</linkto> (operator|, etc.).
00726 
00727 // <h4>Table Query Language</h4>
00728 // The selection and sorting mechanism described above can only be used
00729 // in a hard-coded way in a C++ program.
00730 // There is, however, another way. Strings containing selection and
00731 // sorting commands can be used.
00732 // The syntax of these commands is based on SQL and is described in the
00733 // <a href="../notes/199.html">Table Query Language</a> (TaQL) note 199.
00734 // The language supports UDFs (User Defined Functions) in dynamically
00735 // loadable libraries as explained in the note.
00736 // <br>A TaQL command can be executed with the static function
00737 // <src>tableCommand</src> defined in class
00738 // <linkto class=TableParse>TableParse</linkto>.
00739 
00740 // <ANCHOR NAME="Tables:concatenation">
00741 // <h3>Table Concatenation</h3></ANCHOR>
00742 // Tables with identical descriptions can be concatenated in a virtual way
00743 // using the Table concatenation constructor. Such a Table object behaves
00744 // as any other Table object, thus any operation can be performed on it.
00745 // An identical description means that the number of columns, the column names,
00746 // and their data types of the columns must be the same. The columns do not
00747 // need to be ordered in the same way nor to be stored in the same way.
00748 // <br>Note that if tables have different column names, it is possible
00749 // to form a projection (as described in the previous section) first
00750 // to make them appear identical.
00751 //
00752 // Sometimes a MeasurementSet is partitioned, for instance in chunks of
00753 // one hour. All those chunks can be virtually concatenated this way.
00754 // Note that all tables in the concatenation will be opened, thus one might
00755 // run out of file descriptors if there are many chunks.
00756 //
00757 // Similar to reference tables, it is possible to make a concatenated Table
00758 // persistent by using the <src>rename</src> function. It will not copy the
00759 // data; only the names of the tables used are written.
00760 //
00761 // The keywords of a concatenated table are taken from the first table.
00762 // It is possible to change or add keywords, but that is not persistent,
00763 // not even if the concatenated table is made persistent.
00764 // <br>The keywords holding subtables can be handled in a special way.
00765 // Normally the subtables of the concatenation are the subtables of the first
00766 // table are used, but is it possible to concatenate subtables as well by
00767 // giving their names in the constructor.
00768 // In this way the, say, SYSCAL subtable of a MeasurementSet can be
00769 // concatenated as well.
00770 // <srcblock>
00771 //   // Create virtual concatenation of ms0 and ms1.
00772 //   Block<String> names(2);
00773 //   names[0] = "ms0";
00774 //   names[1] = "ms1";
00775 //   // Also concatenate their SYSCAL subtables.
00776 //   Block<String> subNames(1, "SYSCAL");
00777 //   Table concTab (names, subNames);
00778 // </srcblock>
00779 
00780 // <ANCHOR NAME="Tables:iterate">
00781 // <h3>Table Iterators</h3></ANCHOR>
00782 //
00783 // You can iterate through a table in an arbitrary order by getting
00784 // a subset of the table consisting of the rows in which the iteration
00785 // columns have the same value.
00786 // An iterator object is created by constructing a
00787 // <linkto class="TableIterator:description">TableIterator</linkto>
00788 // object with the appropriate column names.
00789 //
00790 // In the next example we define an iteration on the columns Time and
00791 // Baseline. Each iteration step returns a table subset in which Time and
00792 // Baseline have the same value.
00793 //
00794 // <srcblock>
00795 //    // Iterate over Time and Baseline (by default in ascending order).
00796 //    // Time is the main iteration order, thus the first column specified.
00797 //    Table t;
00798 //    Table tab ("UV_Table.data");
00799 //    Block<String> iv0(2);
00800 //    iv0[0] = "Time";
00801 //    iv0[1] = "Baseline";
00802 //    //
00803 //    // Create the iterator. This will prepare the first subtable.
00804 //    TableIterator iter(tab, iv0);
00805 //    Int nr = 0;
00806 //    while (!iter.pastEnd()) {
00807 //        // Get the first subtable.
00808 //        // This will contain rows with equal Time and Baseline.
00809 //        t = iter.table();
00810 //        cout << t.nrow() << " ";
00811 //        nr++;
00812 //        // Prepare the next subtable with the next Time,Baseline value.
00813 //        iter.next();
00814 //    }
00815 //    cout << endl << nr << " iteration steps" << endl;
00816 // </srcblock>
00817 //
00818 // You can define more than one iterator on the same table; they operate
00819 // independently.
00820 //
00821 // Note that the result of each iteration step is a table in itself which
00822 // references the original table, just as in the case of a sort or select.
00823 // This means that the resulting table can be used again in a sort, select,
00824 // iteration, etc..
00825 
00826 // <ANCHOR NAME="Tables:vectors">
00827 // <h3>Table Vectors</h3></ANCHOR>
00828 //
00829 // A table vector makes it possible to treat a column in a table
00830 // as a vector. Almost all operators and functions defined for normal
00831 // vectors, are also defined for table vectors. So it is, for instance,
00832 // possible to add a constant to a table vector. This has the effect
00833 // that the underlying column gets changed.
00834 //
00835 // You can use the templated class
00836 // <linkto class="TableVector:description">TableVector</linkto>
00837 // to make a scalar column appear as a (table) vector.
00838 // Columns containing arrays or tables are not supported.
00839 // The data type of the TableVector object must match the
00840 // data type of the column.
00841 // A table vector can also hold a normal vector so that (temporary)
00842 // results of table vector operations can be handled.
00843 //
00844 // In the following example we double the data in column COL1 and
00845 // store the result in a temporary table vector.
00846 // <srcblock>
00847 //    // Create a table vector for column COL1.
00848 //    // Note that if the table is readonly, putting data in the table vector
00849 //    // results in an exception.
00850 //    Table tab ("Table.data");
00851 //    TableVector<Int> tabvec(tab, "COL1");
00852 //    // Multiply it by a constant. Result is kept in a Vector in memory.
00853 //    TableVector<Int> temp = 2 * tabvec;
00854 // </srcblock>
00855 //
00856 // In the next example we double the data in COL1 and put the result back
00857 // in the column.
00858 // <srcblock>
00859 //    // Create a table vector for column COL1.
00860 //    // It has to be a TableVector to be able to change the column.
00861 //    Table tab ("Table.data", Table::Update);
00862 //    TableVector<Int> tabvec(tab, "COL1");
00863 //    // Multiply it by a constant.
00864 //    tabvec *= 2;
00865 // </srcblock>
00866 
00867 // <ANCHOR NAME="Tables:keywords">
00868 // <h3>Table Keywords</h3></ANCHOR>
00869 //
00870 // Any number of keyword/value pairs may be attached to the table as a whole,
00871 // or to any individual column. They may be freely added, retrieved,
00872 // re-assigned, or deleted. They are, in essence, a self-resizing list of
00873 // values (any of the primitive types) indexed by Strings (the keyword).
00874 //
00875 // A table keyword/value pair might be
00876 // <srcblock>
00877 //      Observer = Grote Reber
00878 //      Date = 10 october 1942
00879 // </srcblock>
00880 // Column keyword/value pairs might be
00881 // <srcblock>
00882 //      Units = mJy
00883 //      Reference Pixel = 320
00884 // </srcblock>
00885 // The class 
00886 // <linkto class="TableRecord:description">TableRecord</linkto>
00887 // represents the keywords in a table.
00888 // It is (indirectly) derived from the standard record classes in the class
00889 // <linkto class="Record:description">Record</linkto>
00890 
00891 // <ANCHOR NAME="Tables:Table Description">
00892 // <h3>Table Description</h3></ANCHOR>
00893 //
00894 // A table contains a description of itself, which defines the layout of the
00895 // columns and the keyword sets for the table and for the individual columns.
00896 // It may also define initial keyword sets and default values for the columns.
00897 // Such a default value is automatically stored in a cell in the table column,
00898 // whenever a row is added to the table.
00899 //
00900 // The creation of the table descriptor is the first step in the creation of
00901 // a new table. The description is part of the table itself, but may also
00902 // exist in a separate file. This is useful if you need to create a number
00903 // of tables with the same structure; in other circumstances it probably
00904 // should be avoided.
00905 //
00906 // The public classes to set up a table description are:
00907 // <ul>
00908 //  <li> <linkto class="TableDesc:description">TableDesc</linkto>
00909 //       -- holds the table description.
00910 //  <li> <linkto class="ColumnDesc:description">ColumnDesc</linkto>
00911 //       -- holds a generic column description.
00912 //  <li> <linkto class="ScalarColumnDesc:description">ScalarColumnDesc&lt;T&gt;
00913 //       </linkto>
00914 //       -- defines a column containing a scalar value.
00915 //  <li> <linkto class="ScalarRecordColumnDesc:description">ScalarRecordColumnDesc;
00916 //       </linkto>
00917 //       -- defines a column containing a scalar record value.
00918 //  <li> <linkto class="ArrayColumnDesc:description">ArrayColumnDesc&lt;T&gt;
00919 //       </linkto>
00920 //       -- defines a column containing an (in)direct array.
00921 // </ul>
00922 //
00923 // Here follows a typical example of the construction of a table
00924 // description. For more specialized things -- like the definition of a
00925 // default data manager -- we refer to the descriptions of the above
00926 // mentioned classes.
00927 //
00928 // <srcblock>
00929 // #include <casacore/tables/Tables/TableDesc.h>
00930 // #include <casacore/tables/Tables/ScaColDesc.h>
00931 // #include <casacore/tables/Tables/ArrColDesc.h>
00932 // #include <aips/Tables/ScaRecordTabDesc.h>
00933 // #include <casacore/tables/Tables/TableRecord.h>
00934 // #include <casacore/casa/Arrays/IPosition.h>
00935 // #include <casacore/casa/Arrays/Vector.h>
00936 //
00937 // main()
00938 // {
00939 //     // Create a new table description
00940 //     // Define a comment for the table description.
00941 //     // Define some keywords.
00942 //     ColumnDesc colDesc1, colDesc2;
00943 //     TableDesc td("tTableDesc", "1", TableDesc::New);
00944 //     td.comment() = "A test of class TableDesc";
00945 //     td.rwKeywordSet().define ("ra" float(3.14));
00946 //     td.rwKeywordSet().define ("equinox", double(1950));
00947 //     td.rwKeywordSet().define ("aa", Int(1));
00948 //
00949 //     // Define an integer column ab.
00950 //     td.addColumn (ScalarColumnDesc<Int> ("ab", "Comment for column ab"));
00951 //
00952 //     // Add a scalar integer column ac, define keywords for it
00953 //     // and define a default value 0.
00954 //     // Overwrite the value of keyword unit.
00955 //     ScalarColumnDesc<Int> acColumn("ac");
00956 //     acColumn.rwKeywordSet().define ("scale" Complex(0,0));
00957 //     acColumn.rwKeywordSet().define ("unit", "");
00958 //     acColumn.setDefault (0);
00959 //     td.addColumn (acColumn);
00960 //     td.rwColumnDesc("ac").rwKeywordSet().define ("unit", "DEG");
00961 //
00962 //     // Add a scalar string column ad and define its comment string.
00963 //     td.addColumn (ScalarColumnDesc<String> ("ad","comment for ad"));
00964 //
00965 //     // Now define array columns.
00966 //     // This one is indirect and has no dimensionality mentioned yet.
00967 //     td.addColumn (ArrayColumnDesc<Complex> ("Arr1","comment for Arr1"));
00968 //     // This one is indirect and has 3-dim arrays.
00969 //     td.addColumn (ArrayColumnDesc<Int> ("A2r1","comment for Arr1",3));
00970 //     // This one is direct and has 2-dim arrays with axes length 4 and 7.
00971 //     td.addColumn (ArrayColumnDesc<uInt> ("Arr3","comment for Arr1",
00972 //                                          IPosition(2,4,7),
00973 //                                          ColumnDesc::Direct));
00974 //
00975 //     // Add columns containing records.
00976 //     td.addColumn (ScalarRecordColumnDesc ("Rec1"));
00977 // }
00978 // </srcblock>
00979 
00980 // <ANCHOR NAME="Tables:Data Managers">
00981 // <h3>Data Managers</h3></ANCHOR>
00982 //
00983 // Data managers take care of the actual access to the data in a column.
00984 // There are two kinds of data managers:
00985 // <ol>
00986 //  <li> <A HREF="#Tables:storage managers">Storage managers</A> --
00987 //   which store the data as such. They can only handle the standard
00988 //   data type (Bool,...,String) as discussed in the section about the
00989 //   <A HREF="#Tables:properties">table properties</A>).
00990 //  <li> <A HREF="#Tables:virtual column engines">Virtual column engines</A>
00991 //   -- which manipulate the data.
00992 //   An engine could be a simple thing like scaling the data (as done
00993 //   in classic AIPS to reduce data storage), but it could also be an
00994 //   elaborate thing like applying corrections on-the-fly.
00995 //   <br>An engine must be used to store data objects with a non-standard type.
00996 //   It has to break down the object into items with standard data types
00997 //   which can be stored with a storage manager.
00998 // </ol>
00999 // In general the user of a table does not need to be aware which
01000 // data managers are being used underneath. Only when the table is created
01001 // data managers have to be bound to the columns. Thereafter it is
01002 // completely transparent.
01003 //
01004 // Data managers needs to be registered, so they can be found when a table is
01005 // opened. All data managers mentioned below are part of the system and
01006 // pre-registered.
01007 // It is, however, also possible to load data managers on demand. If a data
01008 // manager is not registered it is tried to load a shared library with the
01009 // part of the data manager name (in lowercase) before a dot or left arrow.
01010 // The dot makes it possible to have multiple data managers in a shared library,
01011 // while the left arrow is meant for templated data manager classes.
01012 // <br>E.g. if <src>BitFlagsEngine<uChar></src> was not registered, the shared
01013 // library <src>libbitflagsengine.so</src> (or .dylib) will be loaded. If
01014 // successful, its function <src>register_bitflagsengine()</src> will be
01015 // executed which should register the data manager(s). Thereafter it is known
01016 // and will be used. For example in a file Register.h and Register.cc:
01017 // <srcblock>
01018 //   // Declare in .h file as C function, so no name mangling is done.
01019 //   extern "C" {
01020 //     void register_bitflagsengine();
01021 //   }
01022 //   // Implement in .cc file.
01023 //   void register_bitflagsengine()
01024 //   {
01025 //     BitFlagsEngine<uChar>::registerClass();
01026 //     BitFlagsEngine<Short>::registerClass();
01027 //     BitFlagsEngine<Int>::registerClass();
01028 //   }
01029 // </srcblock>
01030 // There are several functions that can give information which data managers
01031 // are used for which columns and to obtain the characteristics and properties
01032 // of them. Class RODataManAccessor and derived classes can be used for it
01033 // as well as the functions <src>dataManagerInfo</src> and
01034 // <src>showStructure</src> in class Table.
01035 
01036 // <ANCHOR NAME="Tables:storage managers">
01037 // <h3>Storage Managers</h3></ANCHOR>
01038 //
01039 // Storage managers are used to store the data contained in the column cells.
01040 // At table construction time the binding of columns to storage managers is done.
01041 // <br>Each storage manager uses one or more files (usually called table.fi_xxx
01042 // where i is a sequence number and _xxx is some kind of extension).
01043 // Typically several file are used to store the data of the columns of a table.
01044 // <br>In order to reduce the number of files (and to support large block sizes),
01045 // it is possible to have a single container file (a MultiFile) containing all
01046 // data files used by the storage managers. Such a file is called table.mf.
01047 // Note that the program <em>lsmf</em> can be used to see which
01048 // files are contained in a MultiFile. The program <em>tomf</em> can
01049 // convert the files in a MultiFile to regular files.
01050 // <br>At table creation time it is decided if a MultiFile will be used. It
01051 // can be done by means of the StorageOption object given to the SetupNewTable
01052 // constructor and/or by the aipsrc variables:
01053 // <ul>
01054 //  <li> <src>table.storage.option</src> which can have the value
01055 //       'multifile', 'sepfile' (meaning separate files), or 'default'.
01056 //       Currently the default is to use separate files.
01057 //  <li> <src>table.storage.blocksize</src> defines the block size to be
01058 //       used by a MultiFile. If 0 is given, the file system's block size
01059 //       will be used.
01060 // </ul>
01061 // About all standard storage managers support the MultiFile.
01062 // The exception is StManAipsIO, because it is hardly ever used.
01063 //
01064 // Several storage managers exist, each with its own storage characteristics.
01065 // The default and preferred storage manager is <src>StandardStMan</src>.
01066 // Other storage managers should only be used if they pay off in
01067 // file space (like <src>IncrementalStMan</src> for slowly varying data)
01068 // or access speed (like the tiled storage managers for large data arrays).
01069 // <br>The storage managers store the data in a big or little endian
01070 // canonical format. The format can be specified when the table is created.
01071 // By default it uses the endian format as specified in the aipsrc variable
01072 // <code>table.endianformat</code> which can have the value local, big,
01073 // or little. The default is local.
01074 // <ol>
01075 //  <li>
01076 //   <linkto class="StandardStMan:description">StandardStMan</linkto>
01077 //   stores all the values in so-called buckets (equally sized chunks
01078 //   in the file). It requires little memory.
01079 //   <br>It replaces the old <src>StManAipsIO</src>.
01080 //
01081 //  <li>
01082 //   <linkto class="IncrementalStMan:description">IncrementalStMan</linkto>
01083 //   uses a storage mechanism resembling "incremental backups". A value
01084 //   is only stored if it is different from the previous row. It is
01085 //   very well suited for slowly varying data.
01086 //   <br>The class <linkto class="ROIncrementalStManAccessor:description">
01087 //   ROIncrementalStManAccessor</linkto> can be used to tune the
01088 //   behaviour of the <src>IncrementalStMan</src>. It contains functions
01089 //   to deal with the cache size and to show the behaviour of the cache.
01090 //
01091 //  <li>
01092 //   The <a href="#Tables:TiledStMan">Tiled Storage Managers</a>
01093 //   store the data as a tiled hypercube allowing for more or less equally
01094 //   efficient data access along all main axes. It can be used for
01095 //   UV-data as well as for image data.
01096 //
01097 //  <li>
01098 //   <linkto class="StManAipsIO:description">StManAipsIO</linkto>
01099 //   uses <src>AipsIO</src> to store the data in the columns.
01100 //   It supports all table functionality, but its I/O is probably not
01101 //   as efficient as other storage managers. It also requires that
01102 //   a large part of the table fits in memory.
01103 //   <br>It should not be used anymore, because it uses a lot of memory
01104 //   for larger tables and because it is not very robust in case an
01105 //   application or system crashes.
01106 //
01107 //  <li>
01108 //   <linkto class="MemoryStMan:description">MemoryStMan</linkto>
01109 //   holds the data in memory. It means that data 'stored' with this
01110 //   storage manager are NOT persistent.
01111 //   <br>This storage manager is primarily meant for tables held in
01112 //   memory, but it can also be useful for temporary columns in
01113 //   normal tables. Note, however, that if a table is accessed
01114 //   concurrently from multiple processes, MemoryStMan data cannot be
01115 //   synchronized.
01116 // </ol>
01117 //
01118 // The storage manager framework makes it possible to support arbitrary files
01119 // as tables. This has been used in a case where a file is filled
01120 // by the data acquisition system of a telescope. The file is simultaneously
01121 // used as a table using a dedicated storage manager. The table
01122 // system and storage manager provide a sync function to synchronize
01123 // the processes, i.e. to make the table system aware of changes
01124 // in the file size (thus in the table size) by the filling process.
01125 //
01126 // <note role=tip>
01127 // Not all data managers support all the table functionality. So, the choice
01128 // of a data manager can greatly influence the type of operations you can do
01129 // on the table as a whole.
01130 // For example, if a column uses the tiled storage manager,
01131 // it is not possible to delete rows from the table, because that storage
01132 // manager will not support deletion of rows.
01133 // However, it is always possible to delete all columns of a data
01134 // manager in one single call.
01135 // </note>
01136 
01137 // <ANCHOR NAME="Tables:TiledStMan">
01138 // <h3>Tiled Storage Manager</h3></ANCHOR>
01139 // The Tiled Storage Managers allow one to store the data of
01140 // one or more columns in a tiled way. Tiling means
01141 // that the data are stored without a preferred order to make access
01142 // along the different main axes equally efficient. This is done by
01143 // storing the data in so-called tiles (i.e. equally shaped subsets of an
01144 // array) to increase data locality. The user can define the tile shape
01145 // to optimize for the most frequently used access.
01146 // <p>
01147 // The Tiled Storage Manager has the following properties:
01148 // <ul>
01149 //  <li> There can be more than one Tiled Storage Manager in
01150 //       a table; each with its own (unique) name.
01151 //  <li> Each Tiled Storage Manager can store an
01152 //       N-dimensional so-called hypercolumn.
01153 //       Elaborate hypercolumns can be defined using
01154 //       <linkto file="TableDesc.h#defineHypercolumn">
01155 //       TableDesc::defineHypercolumn</linkto>).
01156 //       <br>Note that defining a hypercolumn is only necessary if it
01157 //       contains multiple columns or if the TiledDataStMan is used.
01158 //       It means that in practice it is hardly ever needed to define a
01159 //       hypercolumn.
01160 //       <br>A hypercolumn consists of up to three types of columns:
01161 //       <dl>
01162 //        <dt> Data columns
01163 //        <dd> contain the data to be stored in a tiled way. This will
01164 //             be done in tiled hypercubes.
01165 //             There must be at least one data column.
01166 //             <br> For example: a table contains UV-data with
01167 //                  data columns "Visibility" and "Weight".
01168 //        <dt> Coordinate columns
01169 //        <dd> define the world coordinates of the pixels in the data columns.
01170 //             Coordinate columns are optional, but if given there must
01171 //             be N coordinate columns for an N-dimensional hypercolumn.
01172 //             <br>
01173 //             For example: the data in the example above is 4-dimensional
01174 //             and has coordinate columns "Time", "Baseline", "Frequency",
01175 //             and "Polarization".
01176 //        <dt> Id columns
01177 //        <dd> are needed if TiledDataStMan is used.
01178 //             Different rows in the data columns can be stored in different
01179 //             hypercubes. The values in the id column(s) uniquely identify
01180 //             the hypercube a row is stored in.
01181 //             <br>
01182 //             For example: the line and continuum data in a MeasurementSet
01183 //             table need to be stored in 2 different hypercubes (because
01184 //             their shapes are different (see below)). A column containing
01185 //             the type (line or continuum) has to be used as an id column.
01186 //       </dl>
01187 //  <li> If multiple data columns are used, the shape of their data
01188 //       must be conforming in each individual row.
01189 //       If data in different rows have different shapes, they must be
01190 //       stored in different hypercubes, because a hypercube can only hold
01191 //       data with conforming shapes.
01192 //       <br>
01193 //       Thus in the example above, rows with line data will have conforming
01194 //       shapes and can be stored in one hypercube. The continuum data
01195 //       will have another shape and can be stored in another hypercube.
01196 //       <br>
01197 //       The storage manager keeps track of the mapping of rows to/from
01198 //       hypercubes.
01199 //  <li> Each hypercube can be tiled in its own way. It is not required
01200 //       that an integer number of tiles fits in the hypercube. The last
01201 //       tiles will be padded as needed.
01202 //  <li> The last axis of a hypercube can be extensible. This means that
01203 //       the size of that axis does not need to be defined when the
01204 //       hypercube is defined in the storage manager. Instead, the hypercube
01205 //       can be extended when another chunk of data has to be stored.
01206 //       This can be very useful in, for example, a (quasi-)realtime
01207 //       environment where the size of the time axis is not known.
01208 //  <li> If coordinate columns are defined, they describe the coordinates
01209 //       of the axes of the hypercubes. Each hypercube has its own set of
01210 //       coordinates.
01211 //  <li> Data and id columns have to be stored with the Tiled
01212 //       Storage Manager. However, coordinate columns do not need to be
01213 //       stored with the Tiled Storage Manager.
01214 //       Especially in the case where the coordinates for a hypercube axis
01215 //       are varying (i.e. dependent on other axes), another storage manager
01216 //       has to be used (because the Tiled Storage Manager can only
01217 //       hold constant coordinates).
01218 // </ul>
01219 // <p>
01220 // The following Tiled Storage Managers are available:
01221 // <dl>
01222 //  <dt> <linkto class=TiledShapeStMan:description>TiledShapeStMan</linkto>
01223 //  <dd> can be seen as a specialization of <src>TiledDataStMan</src>
01224 //       by using the array shape as the id value.
01225 //       Similarly to <src>TiledDataStMan</src> it can maintain multiple
01226 //       hypercubes and store multiple rows in a hypercube, but it is
01227 //       easier to use, because the special <src>addHypercube</src> and
01228 //       <src>extendHypercube</src> functions are not needed.
01229 //       An hypercube is automatically added when a new array shape is
01230 //       encountered.
01231 //       <br>
01232 //       This storage manager could be used for a table with a column
01233 //       containing line and continuum data, which will result
01234 //       in 2 hypercubes.
01235 //  <dt> <linkto class=TiledCellStMan:description>TiledCellStMan</linkto>
01236 //  <dd> creates (automatically) a new hypercube for each row.
01237 //       Thus each row of the hypercolumn is stored in a separate hypercube.
01238 //       Note that the row number serves as the id value. So an id column
01239 //       is not needed, although there are multiple hypercubes.
01240 //       <br>
01241 //       This storage manager is meant for tables where the data arrays
01242 //       in the different rows are not accessed together. One can think
01243 //       of a column containing images. Each row contains an image and
01244 //       only one image is shown at a time.
01245 //  <dt> <linkto class=TiledColumnStMan:description>TiledColumnStMan</linkto>
01246 //  <dd> creates one hypercube for the entire hypercolumn. Thus all cells
01247 //       in the hypercube have to have the same shape and therefore this
01248 //       storage manager is only possible if all columns in the hypercolumn
01249 //       have the attribute FixedShape.
01250 //       <br>
01251 //       This storage manager could be used for a table with a column
01252 //       containing images for the Stokes parameters I, Q, U, and V.
01253 //       By storing them in one hypercube, it is possible to retrieve
01254 //       the 4 Stokes values for a subset of the image or for an individual
01255 //       pixel in a very efficient way.
01256 //  <dt> <linkto class=TiledDataStMan:description>TiledDataStMan</linkto>
01257 //  <dd> allows one to control the creation and extension of hypercubes.
01258 //       This is done by means of the class
01259 //       <linkto class=TiledDataStManAccessor:description>
01260 //       TiledDataStManAccessor</linkto>.
01261 //       It makes it possible to store, say, row 0-9 in hypercube A,
01262 //       row 10-34 in hypercube B, row 35-54 in hypercube A again, etc..
01263 //       <br>
01264 //       The drawback of this storage manager is that its hypercubes are not
01265 //       automatically extended when adding new rows. The special functions
01266 //       <src>addHypercube</src> and <src>extendHypercube</src> have to be
01267 //       used making it somewhat tedious to use.
01268 //       Therefore this storage manager may become obsolete in the near future.
01269 // </dl>
01270 // The Tiled Storage Managers have 3 ways to access and cache the data.
01271 // Class <linkto class=TSMOption>TSMOption</linkto> can be used to setup an
01272 // access choice and use it in a Table constructor.
01273 // <ul>
01274 //  <li> The old way (the only way until January 2010) uses a cache
01275 //       of its own to keep tiles that might need to be reused. It will always
01276 //       access entire tiles, even if only a small part is needed.
01277 //       It is possible to define a maximum cache size. The description of class
01278 //       <linkto class=ROTiledStManAccessor>ROTiledStManAccessor</linkto>
01279 //       contains a discussion about the effect of defining a maximum cache
01280 //       size.
01281 //  <li> Memory-mapping the data files. In this way the operating system
01282 //       takes care of the IO and caching. However, the limited address space
01283 //       may preclude using it for large tables on 32-bit systems.
01284 //  <li> Use buffered IO and let the kernel's file cache take care of caching.
01285 //       It will access the data in chunks of the given buffer size, so the
01286 //       entire tile does not need to be accessed if only a small part is
01287 //       needed.
01288 // </ul>
01289 // Apart from reading, all access ways described above can also handle writing
01290 // and extending tables. They create fully equal files. Both little and big
01291 // endian data can be read or written.
01292 
01293 // <ANCHOR NAME="Tables:virtual column engines">
01294 // <h3>Virtual Column Engines</h3></ANCHOR>
01295 //
01296 // Virtual column engines are used to implement the virtual (i.e.
01297 // calculated-on-the-fly) columns. The Table system provides
01298 // an abstract base class (or "interface class")
01299 // <linkto class="VirtualColumnEngine:description">VirtualColumnEngine</linkto>
01300 // that specifies the protocol for these engines.
01301 // The programmer must derive a concrete class to implement
01302 // the application-specific virtual column.
01303 // <p>
01304 // For example: the programmer
01305 // needs a column in a table which is the difference between two other
01306 // columns.  (Perhaps these two other columns are updated periodically
01307 // during the execution of a program.)  A good way to handle this would
01308 // be to have a virtual column in the table, and write a virtual column
01309 // engine which knows how to calculate the difference between corresponding
01310 // cells of the two other columns. So the result is that accessing a
01311 // particular cell of the virtual column invokes the virtual column engine,
01312 // which then gets the values from the other two columns, and returns their
01313 // difference. This particular example could be done using 
01314 // <linkto class="VirtualTaQLColumn:description">VirtualTaQLColumn</linkto>.
01315 // <p>
01316 // Several virtual column engines exist:
01317 // <ol>
01318 //  <li> The class
01319 //   <linkto class="VirtualTaQLColumn:description">VirtualTaQLColumn</linkto>
01320 //   makes it possible to define a column as an arbitrary expression of
01321 //   other columns. It uses the <a href="../notes/199.html">TaQL</a>
01322 //   CALC command. The virtual column can be a scalar or an array and
01323 //   can have one of the standard data types supported by the Table System.
01324 //  <li> The class
01325 //   <linkto class="BitFlagsEngine:description">BitFlagsEngine</linkto>
01326 //   maps an integer bit flags column to a Bool column. A read and write mask
01327 //   can be defined telling which bits to take into account when mapping
01328 //   to and from Bool (thus when reading or writing the Bool).
01329 //  <li> The class
01330 //   <linkto class="CompressFloat:description">CompressFloat</linkto>
01331 //   compresses a single precision floating point array by scaling the
01332 //   values to shorts (16-bit integer).
01333 //  <li> The class
01334 //   <linkto class="CompressComplex:description">CompressComplex</linkto>
01335 //   compresses a single precision complex array by scaling the
01336 //   values to shorts (16-bit integer). In fact, the 2 parts of the complex
01337 //   number are combined to an 32-bit integer.
01338 //  <li> The class
01339 //   <linkto class="CompressComplexSD:description">CompressComplexSD</linkto>
01340 //   does the same as CompressComplex, but optimizes for the case where the
01341 //   imaginary part is zero (which is often the case for Single Dish data).
01342 //  <li> The double templated class
01343 //   <linkto class="ScaledArrayEngine:description">ScaledArrayEngine</linkto>
01344 //   scales the data in an array from, for example,
01345 //   float to short before putting it.
01346 //  <li> The double templated class
01347 //   <linkto class="MappedArrayEngine:description">MappedArrayEngine</linkto>
01348 //   converts the data from one data type to another. Sometimes it might be
01349 //   needed to store the residual data in an MS in double precision.
01350 //   Because the imaging task can only handle single precision, this enigne
01351 //   can be used to map the data from double to single precision.
01352 //  <li> The double templated class
01353 //   <linkto class="RetypedArrayEngine:description">RetypedArrayEngine</linkto>
01354 //   converts the data from one data type to another with the possibility
01355 //   to reduce the number of dimensions. For example, it can be used to
01356 //   store an 2-d array of StokesVector objects as a 3-d array of floats
01357 //   by treating the 4 data elements as an extra array axis. If the
01358 //   StokesVector class is simple, it can be done very efficiently.
01359 //  <li> The class
01360 //   <linkto class="ForwardColumnEngine:description">
01361 //   ForwardColumnEngine</linkto>
01362 //   forwards the gets and puts on a row in a column to the same row
01363 //   in a column with the same name in another table. This provides
01364 //   a virtual copy of the referenced column.
01365 //  <li> The class
01366 //   <linkto class="ForwardColumnIndexedRowEngine:description">
01367 //   ForwardColumnIndexedRowEngine</linkto>
01368 //   is similar to <src>ForwardColumnEngine.</src>.
01369 //   However, instead of forwarding it to the same row it uses a
01370 //   a column to map its row number to a row number in the referenced
01371 //   table. In this way multiple rows can share the same data.
01372 //   This data manager only allows for get operations.
01373 //  <li> The calibration module has implemented a virtual column engine
01374 //   to do on-the-fly calibration in a transparent way.
01375 // </ol>
01376 // To handle arbitrary data types the templated abstract base class
01377 // <linkto class="VSCEngine:description">VSCEngine</linkto>
01378 // has been written. An example of how to use this class can be
01379 // found in the demo program <src>dVSCEngine.cc</src>.
01380 
01381 // <ANCHOR NAME="Tables:LockSync">
01382 // <h3>Table locking and synchronization</h3></ANCHOR>
01383 //
01384 // Multiple concurrent readers and writers (also via NFS) of a
01385 // table are supported by means of a locking/synchronization mechanism.
01386 // This mechanism is not very sophisticated in the sense that it is
01387 // very coarsely grained. When locking, the entire table gets locked.
01388 // A special lock file is used to lock the table. This lock file also
01389 // contains some synchronization data.
01390 // <p>
01391 // Five ways of locking are supported (see class
01392 // <linkto class=TableLock>TableLock</linkto>):
01393 // <dl>
01394 //  <dt> TableLock::PermanentLocking(Wait)
01395 //  <dd> locks the table permanently (from open till close). This means
01396 //       that one writer OR multiple readers are possible.
01397 //  <dt> TableLock::AutoLocking
01398 //  <dd> does the locking automatically. This is the default mode.
01399 //       This mode makes it possible that a table is shared amongst
01400 //       processes without the user needing to write any special code.
01401 //       It also means that a lock is only released when needed.
01402 //  <dt> TableLock::AutoNoReadLocking
01403 //  <dd> is similar to AutoLocking. However, no lock is acquired when
01404 //       reading the table making it possible to read the table while
01405 //       another process holds a write-lock. It also means that for read
01406 //       purposes no automatic synchronization is done when the table is
01407 //       updated in another process.
01408 //       Explicit synchronization can be done by means of the function
01409 //       <src>Table::resync</src>.
01410 //  <dt> TableLock::UserLocking
01411 //  <dd> requires that the programmer explicitly acquires and releases
01412 //       a lock on the table. This makes some kind of transaction
01413 //       processing possible. E.g. set a write lock, add a row,
01414 //       write all data into the row and release the lock.
01415 //       The Table functions <src>lock</src> and <src>unlock</src>
01416 //       have to be used to acquire and release a (read or write) lock.
01417 //  <dt> TableLock::UserNoReadLocking
01418 //  <dd> is similar to UserLocking. However, similarly to AutoNoReadLocking
01419 //       no lock is needed to read the table.
01420 //  <dt> TableLock::NoLocking
01421 //  <dd> does not use table locking. It is the responsibility of the
01422 //       user to ensure that no concurrent access is done on the same
01423 //       bucket or tile in a storage manager, otherwise a table might
01424 //       get corrupted.
01425 //       <br>This mode is always used if Casacore is built with
01426 //       -DAIPS_TABLE_NOLOCKING.
01427 // </dl>
01428 // Synchronization of the processes accessing the same table is done
01429 // by means of the lock file. When a lock is released, the storage
01430 // managers flush their data into the table files. Some synchronization data
01431 // is written into the lock file telling the new number of table rows
01432 // and telling which storage managers have written data.
01433 // This information is read when another process acquires the lock
01434 // and is used to determine which storage managers have to refresh
01435 // their internal caches.
01436 // <br>Note that for the NoReadLocking modes (see above) explicit
01437 // synchronization might be needed using <src>Table::resync</src>.
01438 // <p>
01439 // The function <src>Table::hasDataChanged</src> can be used to check
01440 // if a table is (being) changed by another process. In this way
01441 // a program can react on it. E.g. the table browser can refresh its
01442 // screen when the underlying table is changed.
01443 // <p>
01444 // In general the default locking option will do.
01445 // From the above it should be clear that heavy concurrent access
01446 // results in a lot of flushing, thus will have a negative impact on
01447 // performance. If uninterrupted access to a table is needed,
01448 // the <src>PermanentLocking</src> option should be used.
01449 // If transaction-like processing is done (e.g. updating a table
01450 // containing an observation catalogue), the <src>UserLocking</src>
01451 // option is probably best.
01452 // <p>
01453 // Creation or deletion of a table is not possible if that table
01454 // is still open in another process. The function
01455 // <src>Table::isMultiUsed()</src> can be used to check if a table
01456 // is open in other processes.
01457 // <br>
01458 // The function <src>deleteTable</src> should be used to delete
01459 // a table. Before deleting the table it ensures that it is writable
01460 // and that it is not open in the current or another process
01461 // <p>
01462 // The following example wants to read the table uninterrupted, thus it uses
01463 // the <src>PermanentLocking</src> option. It also wants to wait
01464 // until the lock is actually acquired.
01465 // Note that the destructor closes the table and releases the lock.
01466 // <srcblock>
01467 // // Open the table (readonly).
01468 // // Acquire a permanent (read) lock.
01469 // // It waits until the lock is acquired.
01470 // Table tab ("some.name",
01471 //            TableLock(TableLock::PermanentLockingWait));
01472 // </srcblock>
01473 //
01474 // The following example uses the automatic locking..
01475 // It tells the system to check about every 20 seconds if another
01476 // process wants access to the table.
01477 // <srcblock>
01478 // // Open the table (readonly).
01479 // Table tab ("some.name",
01480 //            TableLock(TableLock::AutoLocking, 20));
01481 // </srcblock>
01482 //
01483 // The following example gets data (say from a GUI) and writes it
01484 // as a row into the table. The lock the table as little as possible
01485 // the lock is acquired just before writing and released immediately
01486 // thereafter.
01487 // <srcblock>
01488 // // Open the table (writable).
01489 // Table tab ("some.name",
01490 //            TableLock(TableLock::UserLocking),
01491 //            Table::Update);
01492 // while (True) {
01493 //     get input data
01494 //     tab.lock();     // Acquire a write lock and wait for it.
01495 //     tab.addRow();
01496 //     write data into the row
01497 //     tab.unlock();   // Release the lock.
01498 // }
01499 // </srcblock>
01500 //
01501 // The following example deletes a table if it is not used in
01502 // another process.
01503 // <srcblock>
01504 // Table tab ("some.name");
01505 // if (! tab.isMultiUsed()) {
01506 //     tab.markForDelete();
01507 // }
01508 // </srcblock>
01509 
01510 // <ANCHOR NAME="Tables:KeyLookup">
01511 // <h3>Table lookup based on a key</h3></ANCHOR>
01512 //
01513 // Class <linkto class=ColumnsIndex>ColumnsIndex</linkto> offers the
01514 // user a means to find the rows matching a given key or key range.
01515 // It is a somewhat primitive replacement of a B-tree index and in the
01516 // future it may be replaced by a proper B+-tree implementation.
01517 // <p>
01518 // The <src>ColumnsIndex</src> class makes it possible to build an
01519 // in-core index on one or more columns. Looking a key or key range
01520 // is done using a binary search on that index. It returns a vector
01521 // containing the row numbers of the rows matching the key (range).
01522 // <p>
01523 // The class is not capable of tracing changes in the underlying column(s).
01524 // It detects a change in the number of rows and updates the index
01525 // accordingly. However, it has to be told explicitly when a value
01526 // in the underlying column(s) changes.
01527 // <p>
01528 // The following example shows how the class can be used.
01529 // <example>
01530 // Suppose one has an antenna table with key ANTENNA.
01531 // <srcblock>
01532 // // Open the table and make an index for column ANTENNA.
01533 // Table tab("antenna.tab")
01534 // ColumnsIndex colInx(tab, "ANTENNA");
01535 // // Make a RecordFieldPtr for the ANTENNA field in the index key record.
01536 // // Its data type has to match the data type of the column.
01537 // RecordFieldPtr<Int> antFld(colInx.accessKey(), "ANTENNA");
01538 // // Now loop in some way and find the row for the antenna
01539 // // involved in that loop.
01540 // Bool found;
01541 // while (...) {
01542 //     // Fill the key field and get the row number.
01543 //     // ANTENNA is a unique key, so only one row number matches.
01544 //     // Otherwise function getRowNumbers had to be used.
01545 //     *antFld = antenna;
01546 //     uInt antRownr = colInx.getRowNumber (found);
01547 //     if (!found) {
01548 //         cout << "Antenna " << antenna << " is unknown" << endl;
01549 //     } else {
01550 //         // antRownr can now be used to get data from that row in
01551 //         // the antenna table.
01552 //     }
01553 // }
01554 // </srcblock>
01555 // </example>
01556 // <linkto class=ColumnsIndex>ColumnsIndex</linkto> itself contains a more
01557 // advanced example. It shows how to use a private compare function
01558 // to adjust the lookup if the index does not contain single
01559 // key values, but intervals instead. This is useful if a row in
01560 // a (sub)table is valid for, say, a time range instead of a single
01561 // timestamp.
01562 
01563 // <ANCHOR NAME="Tables:performance">
01564 // <h3>Performance and robustness considerations</h3></ANCHOR>
01565 //
01566 // The Table System resembles a database system, but it is not as robust.
01567 // It lacks the transaction and logging facilities common to data base systems.
01568 // It means that in case of a crash data might be lost.
01569 // To reduce the risk of data loss to
01570 // a minimum, it is advisable to regularly do a <tt>flush</tt>, optionally
01571 // with an <tt>fsync</tt> to ensure that all data are really written.
01572 // However, that can degrade the performance because it involves extra writes.
01573 // So one should find the right balance between robustness and performance.
01574 //
01575 // To get a good feeling for the performance issues, it is important to
01576 // understand some of the internals of the Table System.
01577 // <br>The storage managers drive the performance. All storage managers use
01578 // buckets (called tiles for the TiledStMan) which contain the data.
01579 // All IO is done by bucket. The bucket/tile size is defined when creating
01580 // the storage manager objects. Sometimes the default will do, but usually
01581 // it is better to set it explicitly.
01582 //
01583 // It is best to do a flush when a tile is full.
01584 // For example: <br>
01585 // When creating a MeasurementSet containing N antennae (thus N*(N-1) baselines
01586 // or N*(N+1) if auto-correlations are stored as well) it makes sense to
01587 // store, say, N/2 rows in a tile and do a flush each time all baselines
01588 // are written. In that way tiles are fully filled when doing the flush, so
01589 // no extra IO is involved.
01590 // <br>Here is some code showing this when creating a MeasurementSet.
01591 // The code should speak for itself.
01592 // <srcblock>
01593 // MS* createMS (const String& msName, int nrchan, int nrant)
01594 // {
01595 //   // Get the MS main default table description.
01596 //   TableDesc td = MS::requiredTableDesc();
01597 //   // Add the data column and its unit.
01598 //   MS::addColumnToDesc(td, MS::DATA, 2);
01599 //   td.rwColumnDesc(MS::columnName(MS::DATA)).rwKeywordSet().
01600 //                                                 define("UNIT","Jy");
01601 //   // Store the DATA and FLAG column in two separate files.
01602 //   // In this way accessing FLAG only is much cheaper than
01603 //   // when combining DATA and FLAG.
01604 //   // All data have the same shape, thus use TiledColumnStMan.
01605 //   // Also store UVW with TiledColumnStMan.
01606 //   Vector<String> tsmNames(1);
01607 //   tsmNames[0] = MS::columnName(MS::DATA);
01608 //   td.rwColumnDesc(tsmNames[0]).setShape (IPosition(2,itsNrCorr,itsNrFreq));
01609 //   td.defineHypercolumn("TiledData", 3, tsmNames);
01610 //   tsmNames[0] = MS::columnName(MS::FLAG);
01611 //   td.rwColumnDesc(tsmNames[0]).setShape (IPosition(2,itsNrCorr,itsNrFreq));
01612 //   td.defineHypercolumn("TiledFlag", 3, tsmNames);
01613 //   tsmNames[0] = MS::columnName(MS::UVW);
01614 //   td.defineHypercolumn("TiledUVW", 2, tsmNames);
01615 //   // Setup the new table.
01616 //   SetupNewTable newTab(msName, td, Table::New);
01617 //   // Most columns vary slowly and use the IncrStMan.
01618 //   IncrementalStMan incrStMan("ISMData");
01619 //   // A few columns use he StandardStMan (set an appropriate bucket size).
01620 //   StandardStMan    stanStMan("SSMData", 32768);
01621 //   // Store all pol and freq and some rows in a single tile.
01622 //   // autocorrelations are written, thus in total there are
01623 //   // nrant*(nrant+1)/2 baselines. Ensure a baseline takes up an
01624 //   // integer number of tiles.
01625 //   TiledColumnStMan tiledData("TiledData",
01626 //                              IPosition(3,4,nchan,(nrant+1)/2));
01627 //   TiledColumnStMan tiledFlag("TiledFlag",
01628 //                              IPosition(3,4,nchan,8*(nrant+1)/2));
01629 //   TiledColumnStMan tiledUVW("TiledUVW", IPosition(2,3,));
01630 //                             IPosition(2,3,nrant*(nrant+1)/2));
01631 //   newTab.bindAll (incrStMan);
01632 //   newTab.bindColumn(MS::columnName(MS::ANTENNA1),stanStMan);
01633 //   newTab.bindColumn(MS::columnName(MS::ANTENNA2),stanStMan);
01634 //   newTab.bindColumn(MS::columnName(MS::DATA),tiledData);
01635 //   newTab.bindColumn(MS::columnName(MS::FLAG),tiledFlag);
01636 //   newTab.bindColumn(MS::columnName(MS::UVW),tiledUVW);
01637 //   // Create the MS and its subtables.
01638 //   // Get access to its columns.
01639 //   MS* msp = new MeasurementSet(newTab);
01640 //   // Create all subtables.
01641 //   // Do this after the creation of optional subtables,
01642 //   // so the MS will know about those optional sutables.
01643 //   msp->createDefaultSubtables (Table::New);
01644 //   return msp;
01645 // }
01646 // </srcblock>
01647 
01648 // <h4>Some more performance considerations</h4>
01649 // Which storage managers to use and how to use them depends heavily on
01650 // the type of data and the access patterns to the data. Here follow some
01651 // guidelines:
01652 // <ol>
01653 //  <li> Scalar data can be stored with the StandardStMan (SSM) or
01654 //       IncrementalStMan (ISM). For slowly varying data (e.g. the TIME column
01655 //       in a MeasurementSet) it is best to use the ISM. Otherwise the SSM.
01656 //       Note that very long strings (longer than the bucketsize) can only
01657 //       be stored with the SSM.
01658 //  <li> Any number of storage managers can be used. In fact, each column
01659 //       can have a storage manager of its own resulting in column-wise
01660 //       stored data which is more and more used in data base systems.
01661 //       In that way a query or sort on that column is very fast, because
01662 //       the buckets to read only contain data of that column.
01663 //       In practice one can decide to combine a few frequently used columns
01664 //       in a storage manager.
01665 //  <li> Array data can be stored with any column manager. Small fixed size
01666 //       arrays can be stored directly with the SSM
01667 //       (or ISM if not changing much).
01668 //       However, they can also be stored with a TiledStMan (TSM) as shown
01669 //       for the UVW column in the example above.
01670 //       <br> Large arrays should usually be stored with a TSM. However,
01671 //       if it must be possible to change the shape of an array after it
01672 //       was stored, the SSM (or ISM) must be used. Note that in that
01673 //       case a lot of disk space can be wasted, because the SSM and ISM
01674 //       store the array data at the end of the file if the array got
01675 //       bigger and do not reuse the old space. The only way to
01676 //       reclaim it is by making a deep copy of the entire table.
01677 //  <li> If an array is stored with a TSM, it is important to decide
01678 //       which TSM to use.
01679 //       <ol>
01680 //        <li> The TiledColumnStMan is the most efficient, but only suitable
01681 //         for arrays having the same shape in the entire column.
01682 //        <li> The TiledShapeStMan is suitable for columns where the arrays
01683 //         can have a few shapes.
01684 //        <li> The TiledCellStMan is suitable for columns where the arrays
01685 //         can have many different shapes.
01686 //       </ol>
01687 //       This is discussed in more detail
01688 //       <a href="#Tables:TiledStMan">above</a>.
01689 //  <li> If storing an array with a TSM, it can be very important to
01690 //       choose the right tile shape. Not only does this define the size
01691 //       of a tile, but it also defines if access in other directions
01692 //       than the natural direction can be fast. It is also discussed in
01693 //       more detail <a href="#Tables:TiledStMan">above</a>.
01694 //  <li> Columns can be combined in a single TiledStMan. For instance, combining DATA
01695 //       and FLAG is advantageous if FLAG is always used with DATA. However, if FLAG
01696 //       is used on its own (e.g. in combination with CORRECTED_DATA), it is better
01697 //       to separate them, otherwise tiles containing FLAG also contain DATA making the
01698 //       tiles much bigger, thus more expensive to access.
01699 // </ol>
01700 //
01701 // <ANCHOR NAME="Tables:iotracing">
01702 // <h4>IO Tracing</h4></ANCHOR>
01703 //
01704 // Several forms of tracing can be done to see how the Table I/O performs.
01705 // <ul>
01706 //  <li> On Linux/UNIX systems the <src>strace</src> command can be used to
01707 //       collect trace information about the physical IO.
01708 //  <li> The function <src>showCacheStatistics</src> in class
01709 //       TiledStManAccessor can be used to show the number of actual reads
01710 //       and writes and the percentage of cache hits.
01711 //  <li> The software has some options to trace the operations done on
01712 //       tables. It is possible to specify the columns and/or the operations
01713 //       to be traced. The following <src>aipsrc</src> variables can be used.
01714 //   <ul>
01715 //    <li> <src>table.trace.filename</src> specifies the file to write the
01716 //         trace output to. If not given or empty, no tracing will be done.
01717 //         The file name can contain environment variables or a tilde.
01718 //    <li> <src>table.trace.operation</src> specifies the operations to be
01719 //         traced. It is a string containing s, r, and/or w where
01720 //         s means tracing RefTable construction (selection/sort),
01721 //         r means column reads, and w means column writes.
01722 //         If empty, only the high level table operations (open, create, close)
01723 //         will be traced.
01724 //    <li> <src>table.trace.columntype</src> specifies the types of columns to
01725 //         be traced. It is a string containing the characters s, a, and/or r.
01726 //         s means all scalar columns, a all array columns, and r all record
01727 //         columns. If empty and if <src>table.trace.column</src> is empty,
01728 //         its default value is a.
01729 //    <li> <src>table.trace.column</src> specifies names of columns to be
01730 //         traced. Its value can be one or more glob-like patterns separated
01731 //         by commas without any whitespace. The default is empty.
01732 //         For example:
01733 // <srcblock>
01734 //    table.trace.column: *DATA,FLAG,WEIGHT*
01735 // </srcblock>
01736 //         to trace all DATA, the FLAG, and all WEIGHT columns.
01737 //   </ul>
01738 //       The trace output is a text file with the following columns
01739 //       separated by a space.
01740 //   <ul>
01741 //    <li> The UTC time the trace line was written (with msec accuracy).
01742 //    <li> The operation: n(ew), o(pen), c(lose), t(able), r(ead), w(rite),
01743 //         s(election/sort/iter), p(rojection).
01744 //         t means an arbitrary table operation as given in the name column.
01745 //    <li> The table-id (as t=i) given at table creation (new) or open.
01746 //    <li> The table name, column name, or table operation
01747 //         (as <src>*oper*</src>).
01748 //         <src>*reftable*</src> means that the operation is on a RefTable
01749 //         (thus result of selection, sort, projection, or iteration).
01750 //    <li> The row or rows to access (* means all rows).
01751 //         Multiple rows are given as a series of ranges like s:e:i,s:e:i,...
01752 //         where e and i are only given if applicable (default i is 1).
01753 //         Note that e is inclusive and defaults to s.
01754 //    <li> The optional array shape to access (none means scalar).
01755 //         In case multiple rows are accessed, the last shape value is the
01756 //         number of rows.
01757 //    <li> The optional slice of the array in each row as [start][end][stride].
01758 //   </ul>
01759 //       Shape, start, end, and stride are given in Fortran-order as
01760 //       [n1,n2,...].
01761 // </ul>
01762 
01763 // <ANCHOR NAME="Tables:applications">
01764 // <h4>Applications to inspect/manipulate a table</h4></ANCHOR>
01765 // <ul>
01766 //  <li><em>showtable</em> shows the structure of a table. It can show:
01767 //   <ul>
01768 //    <li> the columns and their format (optionally sorted on name)
01769 //    <li> the data managers used to store the column data
01770 //    <li> the table and/or column keywords and their values
01771 //    <li> recursively the same info of the subtables
01772 //   </ul>
01773 //  <li><em>showtablelock</em> if a table is locked or opened and by
01774 //      which process.
01775 //  <li><em>lsmf</em> shows the virtual files contained in a MultiFile.
01776 //  <li><em>tomf</em> copies the given files to a MultiFile.
01777 //  <li><em>taql</em> can be used to query a table using the
01778 //       <a href="../notes/199.html">Table Query Language</a> (TaQL).
01779 // </ul>
01780 //
01781 // </synopsis>
01782 // </module>
01783 
01784 
01785 
01786 } //# NAMESPACE CASACORE - END
01787 
01788 #endif