00001 //# Tables.h: The Tables module - Casacore data storage 00002 //# Copyright (C) 1994-2010 00003 //# Associated Universities, Inc. Washington DC, USA. 00004 //# 00005 //# This library is free software; you can redistribute it and/or modify it 00006 //# under the terms of the GNU Library General Public License as published by 00007 //# the Free Software Foundation; either version 2 of the License, or (at your 00008 //# option) any later version. 00009 //# 00010 //# This library is distributed in the hope that it will be useful, but WITHOUT 00011 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 00012 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public 00013 //# License for more details. 00014 //# 00015 //# You should have received a copy of the GNU Library General Public License 00016 //# along with this library; if not, write to the Free Software Foundation, 00017 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA. 00018 //# 00019 //# Correspondence concerning AIPS++ should be addressed as follows: 00020 //# Internet email: aips2-request@nrao.edu. 00021 //# Postal address: AIPS++ Project Office 00022 //# National Radio Astronomy Observatory 00023 //# 520 Edgemont Road 00024 //# Charlottesville, VA 22903-2475 USA 00025 //# 00026 //# $Id$ 00027 00028 #ifndef TABLES_TABLES_H 00029 #define TABLES_TABLES_H 00030 00031 //# Includes 00032 //# table description 00033 #include <casacore/casa/aips.h> 00034 #include <casacore/tables/Tables/TableDesc.h> 00035 #include <casacore/tables/Tables/ColumnDesc.h> 00036 #include <casacore/tables/Tables/ScaColDesc.h> 00037 #include <casacore/tables/Tables/ArrColDesc.h> 00038 #include <casacore/tables/Tables/ScaRecordColDesc.h> 00039 00040 //# table access 00041 #include <casacore/tables/Tables/Table.h> 00042 #include <casacore/tables/Tables/TableLock.h> 00043 #include <casacore/tables/Tables/SetupNewTab.h> 00044 #include <casacore/tables/Tables/ScalarColumn.h> 00045 #include <casacore/tables/Tables/ArrayColumn.h> 00046 #include <casacore/tables/Tables/TableRow.h> 00047 #include <casacore/tables/Tables/TableCopy.h> 00048 #include <casacore/casa/Arrays/Array.h> 00049 #include <casacore/casa/Arrays/Slicer.h> 00050 #include <casacore/casa/Arrays/Slice.h> 00051 00052 //# keywords 00053 #include <casacore/tables/Tables/TableRecord.h> 00054 #include <casacore/casa/Containers/RecordField.h> 00055 00056 //# table lookup 00057 #include <casacore/tables/Tables/ColumnsIndex.h> 00058 #include <casacore/tables/Tables/ColumnsIndexArray.h> 00059 00060 //# table vectors 00061 #include <casacore/tables/Tables/TableVector.h> 00062 #include <casacore/tables/Tables/TabVecMath.h> 00063 #include <casacore/tables/Tables/TabVecLogic.h> 00064 00065 //# data managers 00066 #include <casacore/tables/DataMan.h> 00067 00068 //# table expressions (for selection of rows) 00069 #include <casacore/tables/TaQL.h> 00070 00071 00072 namespace casacore { //# NAMESPACE CASACORE - BEGIN 00073 00074 // <module> 00075 00076 // <summary> 00077 // Tables are the data storage mechanism for Casacore 00078 // </summary> 00079 00080 // <use visibility=export> 00081 00082 // <reviewed reviewer="jhorstko" date="1994/08/30" tests="" demos=""> 00083 // </reviewed> 00084 00085 // <prerequisite> 00086 // <li> <linkto class="Record:description">Record</linkto> class 00087 // </prerequisite> 00088 00089 // <etymology> 00090 // "Table" is a formal term from relational database theory: 00091 // <em> "The organizing principle in a relational database is the TABLE, 00092 // a rectangular, row/column arrangement of data values."</em> 00093 // Casacore tables are extensions to traditional tables, but are similar 00094 // enough that we use the same name. There is also a strong resemblance 00095 // between the uses of Casacore tables, and FITS binary tables, which 00096 // provides another reason to use "Tables" to describe the Casacore data 00097 // storage mechanism. 00098 // </etymology> 00099 00100 // <synopsis> 00101 // Tables are the fundamental storage mechanism for Casacore. This document 00102 // explains <A HREF="#Tables:motivation">why</A> they had to be made, 00103 // <A HREF="#Tables:properties">what</A> their properties are, and 00104 // <A HREF="#Tables:open">how</A> to use them. The last subject is 00105 // discussed and illustrated in a sequence of sections: 00106 // <UL> 00107 // <LI> <A HREF="#Tables:open">opening</A> an existing table, 00108 // <LI> <A HREF="#Tables:read">reading</A> from a table, 00109 // <LI> <A HREF="#Tables:creation">creating</A> a new table, 00110 // <LI> <A HREF="#Tables:write">writing</A> into a table, 00111 // <LI> <A HREF="#Tables:row-access">accessing rows</A> in a table, 00112 // <LI> <A HREF="#Tables:select and sort">selection and sorting</A> 00113 // (see also <A HREF="../notes/199.html">Table Query Language</A>), 00114 // <LI> <A HREF="#Tables:concatenation">concatenating similar tables</A> 00115 // <LI> <A HREF="#Tables:iterate">iterating</A> through a table, 00116 // <LI> <A HREF="#Tables:LockSync">locking/synchronization</A> 00117 // for concurrent access, 00118 // <LI> <A HREF="#Tables:KeyLookup">indexing</A> a table for faster lookup, 00119 // <LI> <A HREF="#Tables:vectors">vector operations</A> on a column. 00120 // <LI> <A HREF="#Tables:performance">performance and robustness</A> 00121 // considerations with some information on 00122 // <A HREF="#Tables:iotracing">IO tracing</A>. 00123 // </UL> 00124 // A few <A HREF="Tables:applications">applications</A> exist to inspect 00125 // and manipulate a table. 00126 00127 00128 // <ANCHOR NAME="Tables:motivation"> 00129 // <motivation></ANCHOR> 00130 // 00131 // The Casacore tables are mainly based upon the ideas of Allen Farris, 00132 // as laid out in the 00133 // <A HREF="http://aips2.cv.nrao.edu/aips++/docs/reference/Database.ps.gz"> 00134 // AIPS++ Database document</A>, from where the following paragraph is taken: 00135 // 00136 // <p> 00137 // Traditional relational database tables have two features that 00138 // decisively limit their applicability to scientific data. First, an item of 00139 // data in a column of a table must be atomic -- it must have no internal 00140 // structure. A consequence of this restriction is that relational 00141 // databases are unable to deal with arrays of data items. Second, an 00142 // item of data in a column of a table must not have any direct or 00143 // implied linkages to other items of data or data aggregates. This 00144 // restriction makes it difficult to model complex relationships between 00145 // collections of data. While these restrictions may make it easy to 00146 // define a mathematically complete set of data manipulation operations, 00147 // they are simply intolerable in a scientific data-handling context. 00148 // Multi-dimensional arrays are frequently the most natural modes in 00149 // which to discuss and think about scientific data. In addition, 00150 // scientific data often requires complex calibration operations that 00151 // must draw on large bodies of data about equipment and its performance 00152 // in various states. The restrictions imposed by the relational model 00153 // make it very difficult to deal with complex problems of this nature. 00154 // <p> 00155 // 00156 // In response to these limitations, and other needs, the Casacore tables were 00157 // designed. 00158 // </motivation> 00159 00160 // <ANCHOR NAME="Tables:properties"> 00161 // <h3>Table Properties</h3></ANCHOR> 00162 // 00163 // Casacore tables have the following properties: 00164 // <ul> 00165 // <li> A table consists of a number of rows and columns. 00166 // <A HREF="#Tables:keywords">Keyword/value pairs</A> may be defined 00167 // for the table as a whole and for individual columns. A keyword/value 00168 // pair for a column could, for instance, define its unit. 00169 // <li> Each table has a <A HREF="#Tables:Table Description">description</A> 00170 // which specifies the number and type of columns, and maybe initial 00171 // keyword sets and default values for the columns. 00172 // <li> A cell in a column may contain 00173 // <UL> 00174 // <LI> a scalar; 00175 // <LI> a "direct" array -- which must have the same shape in all 00176 // cells of a column, is usually small, and is stored in the 00177 // table itself; 00178 // <LI> an "indirect" array -- which may have different shapes in 00179 // different cells of the same column, is arbitrarily large, 00180 // and is stored in a separate file; or 00181 // </UL> 00182 // <li> A column may be 00183 // <UL> 00184 // <LI> "filled" -- containing actual data, or 00185 // <LI> "virtual" -- containing a recipe telling how the data will 00186 // be generated dynamically 00187 // </UL> 00188 // <li> Only the standard Casacore data types can be used in filled 00189 // columns, be they scalars or arrays: Bool, uChar, Short, uShort, 00190 // Int, uInt, float, double, Complex, DComplex and String. 00191 // Furthermore scalars containing 00192 // <linkto class=TableRecord>record</linkto> values are possible 00193 // <li> A column can have a default value, which will automatically be stored 00194 // in a cell of the column, when a row is added to the table. 00195 // <li> <A HREF="#Tables:Data Managers">Data managers</A> handle the 00196 // reading, writing and generation of data. Each column in a table can 00197 // be assigned its own data manager, which allows for optimization of 00198 // the data storage per column. The choice of data manager determines 00199 // whether a column is filled or virtual. 00200 // <li> Table data are stored in a canonical format, so they can be read 00201 // on any machine. To avoid needless swapping of bytes, the data can 00202 // be stored in big endian (as used on e.g. SUN) or little endian 00203 // (as used on Intel PC-s) canonical format. 00204 // By default it uses the format specified in the aipsrc variable 00205 // <code>table.endianformat</code> which defaults to 00206 // <code>Table::LocalEndian</code> (thus the endian format of the 00207 // machine being used). 00208 // <li> The SQL-like 00209 // <a href="../notes/199.html">Table Query Language</a> (TaQL) 00210 // can be used to do operations on tables like 00211 // select, sort, update, insert, delete, and create. 00212 // </ul> 00213 // 00214 // Tables can be in one of three forms: 00215 // <ul> 00216 // <li> A plain table is a table stored on disk. 00217 // It can be shared by multiple processes. 00218 // <li> A memory table is a table held in memory. 00219 // It is a process specific table, thus not sharable. 00220 // The <linkto class=Table>Table::copy</linkto> function can be used 00221 // to turn a memory table into a plain table. 00222 // <li> A reference table is a table referencing a plain or memory table. 00223 // It is the result of a selection or sort on another table. 00224 // A reference table references the data in the other table, thus 00225 // changing data in a reference table means that the data in the 00226 // original table are changed. 00227 // The <linkto class=Table>Table::deepCopy</linkto> function can be 00228 // used to turn a reference table into a plain table. 00229 // </ul> 00230 // Concurrent access from different processes to the same plain table is 00231 // fully supported by means of a <A HREF="#Tables:LockSync"> 00232 // locking/synchronization</A> mechanism. Concurrent access over NFS is also 00233 // supported. 00234 // <p> 00235 // A (somewhat primitive) mechanism is available to do a 00236 // <A HREF="#Tables:KeyLookup">table lookup</A> based on the contents 00237 // of a key. In the future this might be replaced by a proper B+-tree index 00238 // mechanism. 00239 00240 // <ANCHOR NAME="Tables:open"> 00241 // <h3>Opening an Existing Table</h3></ANCHOR> 00242 // 00243 // To open an existing table you just create a 00244 // <linkto class="Table:description">Table</linkto> object giving 00245 // the name of the table, like: 00246 // 00247 // <srcblock> 00248 // Table readonly_table ("tableName"); 00249 // // or 00250 // Table read_and_write_table ("tableName", Table::Update); 00251 // </srcblock> 00252 // 00253 // The constructor option determines whether the table will be opened as 00254 // readonly or as read/write. A readonly table file must be opened 00255 // as readonly, otherwise an exception is thrown. The functions 00256 // <linkto class="Table">Table::isWritable(...)</linkto> 00257 // can be used to determine if a table is writable. 00258 // 00259 // When the table is opened, the data managers are reinstantiated 00260 // according to their definition at table creation. 00261 00262 // <ANCHOR NAME="Tables:read"> 00263 // <h3>Reading from a Table</h3></ANCHOR> 00264 // 00265 // You can read data from a table column with the "get" functions 00266 // in the classes 00267 // <linkto class="ScalarColumn:description">ScalarColumn<T></linkto> 00268 // and 00269 // <linkto class="ArrayColumn:description">ArrayColumn<T></linkto>. 00270 // For scalars of a standard data type (i.e. Bool, uChar, Int, Short, 00271 // uShort, uInt, float, double, Complex, DComplex and String) you could 00272 // instead use 00273 // <linkto class="TableColumn">TableColumn::getScalar(...)</linkto> or 00274 // <linkto class="TableColumn">TableColumn::asXXX(...)</linkto>. 00275 // These functions offer an extra: they do automatic data type promotion; 00276 // so that you can, for example, get a double value from a float column. 00277 // 00278 // These "get" functions are used in the same way as the simple "put" 00279 // functions described in the previous section. 00280 // <p> 00281 // <linkto class="ScalarColumn:description">ScalarColumn<T></linkto> 00282 // can be constructed for a non-writable column. However, an exception 00283 // is thrown if the put function is used for it. 00284 // The same is true for 00285 // <linkto class="ArrayColumn:description">ArrayColumn<T></linkto> and 00286 // <linkto class="TableColumn:description">TableColumn</linkto>. 00287 // <p> 00288 // A typical program could look like: 00289 // <srcblock> 00290 // #include <casacore/tables/Tables/Table.h> 00291 // #include <casacore/tables/Tables/ScalarColumn.h> 00292 // #include <casacore/tables/Tables/ArrayColumn.h> 00293 // #include <casacore/casa/Arrays/Vector.h> 00294 // #include <casacore/casa/Arrays/Slicer.h> 00295 // #include <casacore/casa/Arrays/ArrayMath.h> 00296 // #include <iostream> 00297 // 00298 // main() 00299 // { 00300 // // Open the table (readonly). 00301 // Table tab ("some.name"); 00302 // 00303 // // Construct the various column objects. 00304 // // Their data type has to match the data type in the table description. 00305 // ScalarColumn<Int> acCol (tab, "ac"); 00306 // ArrayColumn<Float> arr2Col (tab, "arr2"); 00307 // 00308 // // Loop through all rows in the table. 00309 // uInt nrrow = tab.nrow(); 00310 // for (uInt i=0; i<nrow; i++) { 00311 // // Read the row for both columns. 00312 // cout << "Column ac in row i = " << acCol(i) << endl; 00313 // Array<Float> array = arr2Col.get (i); 00314 // } 00315 // 00316 // // Show the entire column ac, 00317 // // and show the 10th element of arr2 in each row.. 00318 // cout << ac.getColumn(); 00319 // cout << arr2.getColumn (Slicer(Slice(10))); 00320 // } 00321 // </srcblock> 00322 00323 // <ANCHOR NAME="Tables:creation"> 00324 // <h3>Creating a Table</h3></ANCHOR> 00325 // 00326 // The creation of a table is a multi-step process: 00327 // <ol> 00328 // <li> 00329 // Create a <A HREF="#Tables:Table Description">table description</A>. 00330 // <li> 00331 // Create a <linkto class="SetupNewTable:description">SetupNewTable</linkto> 00332 // object with the name of the new table. 00333 // <li> 00334 // Create the necessary <A HREF="#Tables:Data Managers">data managers</A>. 00335 // <li> 00336 // Bind each column to the appropriate data manager. 00337 // The system will bind unbound columns to data managers which 00338 // are created internally using the default data manager name 00339 // defined in the column description. 00340 // <li> 00341 // Define the shape of direct columns (if that was not already done in the 00342 // column description). 00343 // <li> 00344 // Create the <linkto class="Table:description">Table</linkto> 00345 // object from the SetupNewTable object. Here, a final check is performed 00346 // and the necessary files are created. 00347 // </ol> 00348 // The recipe above is meant for the creation a plain table, but the 00349 // creation of a memory table is exactly the same. The only difference 00350 // is that in call to construct the Table object the Table::Memory 00351 // type has to be given. Note that in the SetupNewTable object the columns 00352 // can be bound to any data manager. <src>MemoryTable</src> will rebind 00353 // stored columns to the <linkto class=MemoryStMan>MemoryStMan</linkto> 00354 // storage manager, but virtual columns bindings are not changed. 00355 00356 // 00357 // The following example shows how you can create a table. An example 00358 // specifically illustrating the creation of the 00359 // <A HREF="#Tables:Table Description">table description</A> is given 00360 // in that section. Other sections discuss the access to the table. 00361 // 00362 // <srcblock> 00363 // #include <casacore/tables/Tables/TableDesc.h> 00364 // #include <casacore/tables/Tables/SetupNewTab.h> 00365 // #include <casacore/tables/Tables/Table.h> 00366 // #include <casacore/tables/Tables/ScaColDesc.h> 00367 // #include <casacore/tables/Tables/ScaRecordColDesc.h> 00368 // #include <casacore/tables/Tables/ArrColDesc.h> 00369 // #include <casacore/tables/Tables/StandardStMan.h> 00370 // #include <casacore/tables/Tables/IncrementalStMan.h> 00371 // 00372 // main() 00373 // { 00374 // // Step1 -- Build the table description. 00375 // TableDesc td("tTableDesc", "1", TableDesc::Scratch); 00376 // td.comment() = "A test of class SetupNewTable"; 00377 // td.addColumn (ScalarColumnDesc<Int> ("ab" ,"Comment for column ab")); 00378 // td.addColumn (ScalarColumnDesc<Int> ("ac")); 00379 // td.addColumn (ScalarColumnDesc<uInt> ("ad","comment for ad")); 00380 // td.addColumn (ScalarColumnDesc<Float> ("ae")); 00381 // td.addColumn (ScalarRecordColumnDesc ("arec")); 00382 // td.addColumn (ArrayColumnDesc<Float> ("arr1",3,ColumnDesc::Direct)); 00383 // td.addColumn (ArrayColumnDesc<Float> ("arr2",0)); 00384 // td.addColumn (ArrayColumnDesc<Float> ("arr3",0,ColumnDesc::Direct)); 00385 // 00386 // // Step 2 -- Setup a new table from the description. 00387 // SetupNewTable newtab("newtab.data", td, Table::New); 00388 // 00389 // // Step 3 -- Create storage managers for it. 00390 // StandardStMan stmanStand_1; 00391 // StandardStMan stmanStand_2; 00392 // IncrementalStMan stmanIncr; 00393 // 00394 // // Step 4 -- First, bind all columns to the first storage 00395 // // manager. Then, bind a few columns to another storage manager 00396 // // (which will overwrite the previous bindings). 00397 // newtab.bindAll (stmanStand_1); 00398 // newtab.bindColumn ("ab", stmanStand_2); 00399 // newtab.bindColumn ("ae", stmanIncr); 00400 // newtab.bindColumn ("arr3", stmanIncr); 00401 // 00402 // // Step 5 -- Define the shape of the direct columns. 00403 // // (this could have been done in the column description). 00404 // newtab.setShapeColumn( "arr1", IPosition(3,2,3,4)); 00405 // newtab.setShapeColumn( "arr3", IPosition(3,3,4,5)); 00406 // 00407 // // Step 6 -- Finally, create the table consisting of 10 rows. 00408 // Table tab(newtab, 10); 00409 // 00410 // // Now we can fill the table, which is shown in a next section. 00411 // // The Table destructor will flush the table to the files. 00412 // } 00413 // </srcblock> 00414 // To create a table in memory, only step 6 has to be modified slightly to: 00415 // <srcblock> 00416 // Table tab(newtab, Table::Memory, 10); 00417 // </srcblock> 00418 00419 // <ANCHOR NAME="Tables:write"> 00420 // <h3>Writing into a Table</h3></ANCHOR> 00421 // 00422 // Once a table has been created or has been opened for read/write, 00423 // you want to write data into it. Before doing that you may have 00424 // to add one or more rows to the table. 00425 // <note role=tip> If a table was created with a given number of rows, you 00426 // do not need to add rows; you may not even be able to do so. 00427 // </note> 00428 // 00429 // When adding new rows to the table, either via the 00430 // <linkto class="Table">Table(...) constructor</linkto> 00431 // or via the 00432 // <linkto class="Table">Table::addRow(...)</linkto> 00433 // function, you can choose to have those rows initialized with the 00434 // default values given in the description. 00435 // 00436 // To actually write the data into the table you need the classes 00437 // <linkto class="ScalarColumn:description">ScalarColumn<T></linkto> and 00438 // <linkto class="ArrayColumn:description">ArrayColumn<T></linkto>. 00439 // For each column you can construct one or 00440 // more of these objects. Their put(...) functions 00441 // let you write a value at a time or the entire column in one go. 00442 // For arrays you can "put" subsections of the arrays. 00443 // 00444 // As an alternative for scalars of a standard data type (i.e. Bool, 00445 // uChar, Int, Short, uShort, uInt, float, double, Complex, DComplex 00446 // and String) you could use the functions 00447 // <linkto class="TableColumn">TableColumn::putScalar(...)</linkto>. 00448 // These functions offer an extra: automatic data type promotion; so that 00449 // you can, for example, put a float value in a double column. 00450 // 00451 // A typical program could look like: 00452 // <srcblock> 00453 // #include <casacore/tables/Tables/TableDesc.h> 00454 // #include <casacore/tables/Tables/SetupNewTab.h> 00455 // #include <casacore/tables/Tables/Table.h> 00456 // #include <casacore/tables/Tables/ScaColDesc.h> 00457 // #include <casacore/tables/Tables/ArrColDesc.h> 00458 // #include <casacore/tables/Tables/ScalarColumn.h> 00459 // #include <casacore/tables/Tables/ArrayColumn.h> 00460 // #include <casacore/casa/Arrays/Vector.h> 00461 // #include <casacore/casa/Arrays/Slicer.h> 00462 // #include <casacore/casa/Arrays/ArrayMath.h> 00463 // #include <iostream> 00464 // 00465 // main() 00466 // { 00467 // // First build the table description. 00468 // TableDesc td("tTableDesc", "1", TableDesc::Scratch); 00469 // td.comment() = "A test of class SetupNewTable"; 00470 // td.addColumn (ScalarColumnDesc<Int> ("ac")); 00471 // td.addColumn (ArrayColumnDesc<Float> ("arr2",0)); 00472 // 00473 // // Setup a new table from the description, 00474 // // and create the (still empty) table. 00475 // // Note that since we do not explicitly bind columns to 00476 // // data managers, all columns will be bound to the default 00477 // // standard storage manager StandardStMan. 00478 // SetupNewTable newtab("newtab.data", td, Table::New); 00479 // Table tab(newtab); 00480 // 00481 // // Construct the various column objects. 00482 // // Their data type has to match the data type in the description. 00483 // ScalarColumn<Int> ac (tab, "ac"); 00484 // ArrayColumn<Float> arr2 (tab, "arr2"); 00485 // Vector<Float> vec2(100); 00486 // 00487 // // Write the data into the columns. 00488 // // In each cell arr2 will be a vector of length 100. 00489 // // Since its shape is not set explicitly, it is done implicitly. 00490 // for (uInt i=0; i<10; i++) { 00491 // tab.addRow(); // First add a row. 00492 // ac.put (i, i+10); // value is i+10 in row i 00493 // indgen (vec2, float(i+20)); // vec2 gets i+20, i+21, ..., i+119 00494 // arr2.put (i, vec2); 00495 // } 00496 // 00497 // // Finally, show the entire column ac, 00498 // // and show the 10th element of arr2. 00499 // cout << ac.getColumn(); 00500 // cout << arr2.getColumn (Slicer(Slice(10))); 00501 // 00502 // // The Table destructor writes the table. 00503 // } 00504 // </srcblock> 00505 // 00506 // In this example we added rows in the for loop, but we could also have 00507 // created 10 rows straightaway by constructing the Table object as: 00508 // <srcblock> 00509 // Table tab(newtab, 10); 00510 // </srcblock> 00511 // in which case we would not include 00512 // <srcblock> 00513 // tab.addRow() 00514 // </srcblock> 00515 // 00516 // The classes 00517 // <linkto class="TableColumn:description">TableColumn</linkto>, 00518 // <linkto class="ScalarColumn:description">ScalarColumn<T></linkto>, and 00519 // <linkto class="ArrayColumn:description">ArrayColumn<T></linkto> 00520 // contain several functions to put values into a single cell or into the 00521 // whole column. This may look confusing, but is actually quite simple. 00522 // The functions can be divided in two groups: 00523 // <ol> 00524 // <li> 00525 // Put the given value into the column cell(s). 00526 // <ul> 00527 // <li> 00528 // The simplest put functions, 00529 // <linkto class="ScalarColumn">ScalarColumn::put(...)</linkto> and 00530 // <linkto class="ArrayColumn">ArrayColumn::put(...)</linkto>, 00531 // put a value into the given column cell. For convenience, there is an 00532 // <linkto class="ArrayColumn">ArrayColumn::putSlice(...)</linkto> 00533 // to put only a part of the array. 00534 // <li> 00535 // <linkto class="ScalarColumn">ScalarColumn::fillColumn(...)</linkto> and 00536 // <linkto class="ArrayColumn">ArrayColumn::fillColumn(...)</linkto> 00537 // fill an entire column by putting the given value into all the cells 00538 // of the column. 00539 // <li> 00540 // The simplest putColumn functions, 00541 // <linkto class="ScalarColumn">ScalarColumn::putColumn(...)</linkto> and 00542 // <linkto class="ArrayColumn">ArrayColumn::putColumn(...)</linkto>, 00543 // put an array of values into the column. There is a special 00544 // <linkto class="ArrayColumn">ArrayColumn::putColumn(...)</linkto> 00545 // version which puts only a part of the arrays. 00546 // </ul> 00547 // 00548 // <li> 00549 // Copy values from another column to this column.<BR> 00550 // These functions have the advantage that the 00551 // data type of the input and/or output column can be unknown. 00552 // The generic TableColumn objects can be used for this purpose. 00553 // The put(Column) function checks the data types and, if possible, 00554 // converts them. If the conversion is not possible, it throws an 00555 // exception. 00556 // <ul> 00557 // <li> 00558 // The put functions copy the value in a cell of the input column 00559 // to a cell in the output column. The row numbers of the cells 00560 // in the columns can be different. 00561 // <li> 00562 // The putColumn functions copy the entire contents of the input column 00563 // to the output column. The lengths of the columns must be equal. 00564 // </ul> 00565 // Each class has its own set of these functions. 00566 // <ul> 00567 // <li> 00568 // <linkto class="TableColumn">TableColumn::put(...)</linkto> and 00569 // <linkto class="TableColumn">TableColumn::putColumn(...)</linkto> and 00570 // are the most generic. They can be 00571 // used if the data types of both input and output column are unknown. 00572 // Note that these functions are virtual. 00573 // <li> 00574 // <linkto class="ScalarColumn">ScalarColumn::put(...)</linkto>, 00575 // <linkto class="ArrayColumn">ArrayColumn::put(...)</linkto>, 00576 // <linkto class="ScalarColumn">ScalarColumn::putColumn(...)</linkto>, and 00577 // <linkto class="ArrayColumn">ArrayColumn::putColumn(...)</linkto> 00578 // are less generic and therefore potentially more efficient. 00579 // The most efficient variants are the ones taking a 00580 // Scalar/ArrayColumn<T>, because they require no data type 00581 // conversion. 00582 // </ul> 00583 // </ol> 00584 00585 // <ANCHOR NAME="Tables:row-access"> 00586 // <h3>Accessing rows in a Table</h3></ANCHOR> 00587 // 00588 // Apart from accessing a table column-wise as described in the 00589 // previous two sections, it is also possible to access a table row-wise. 00590 // The <linkto class=TableRow>TableRow</linkto> class makes it possible 00591 // to access multiple fields in a table row as a whole. Note that like the 00592 // XXColumn classes described above, there is also an ROTableRow class 00593 // for access to readonly tables. 00594 // <p> 00595 // On construction of a TableRow object it has to be specified which 00596 // fields (i.e. columns) are part of the row. For these fields a 00597 // fixed structured <linkto class=TableRecord>TableRecord</linkto> 00598 // object is constructed as part of the TableRow object. The TableRow::get 00599 // function will fill this record with the table data for the given row. 00600 // The user has access to the record and can use 00601 // <linkto class=RecordFieldPtr>RecordFieldPtr</linkto> objects for 00602 // speedier access to the record. 00603 // <p> 00604 // The class could be used as shown in the following example. 00605 // <srcblock> 00606 // // Open the table as readonly and define a row object to contain 00607 // // the given columns. 00608 // // Note that the function stringToVector is a very convenient 00609 // // way to construct a Vector<String>. 00610 // // Show the description of the fields in the row. 00611 // Table table("Some.table"); 00612 // ROTableRow row (table, stringToVector("col1,col2,col3")); 00613 // cout << row.record().description(); 00614 // // Since the structure of the record is known, the RecordFieldPtr 00615 // // objects could be used to allow for easy and fast access to 00616 // // the record which is refilled for each get. 00617 // RORecordFieldPtr<String> col1(row.record(), "col1"); 00618 // RORecordFieldPtr<Double> col2(row.record(), "col2"); 00619 // RORecordFieldPtr<Array<Int> > col3(row.record(), "col3"); 00620 // for (uInt i=0; i<table.nrow(); i++) { 00621 // row.get (i); 00622 // someString = *col1; 00623 // somedouble = *col2; 00624 // someArrayInt = *col3; 00625 // } 00626 // </srcblock> 00627 // The description of TableRow contains some more extensive examples. 00628 00629 // <ANCHOR NAME="Tables:select and sort"> 00630 // <h3>Table Selection and Sorting</h3></ANCHOR> 00631 // 00632 // The result of a select and sort of a table is another table, 00633 // which references the original table. This means that an update 00634 // of a sorted or selected table results in the update of the original 00635 // table. The result is, however, a table in itself, so all table 00636 // functions (including select and sort) can be used with it. 00637 // Note that a true copy of such a reference table can be made with 00638 // the <linkto class=Table>Table::deepCopy</linkto> function. 00639 // <p> 00640 // Rows or columns can be selected from a table. Columns can be selected 00641 // by the 00642 // <linkto class="Table">Table::project(...)</linkto> 00643 // function, while rows can be selected by the various 00644 // <linkto class="Table">Table operator()</linkto> functions. 00645 // Usually a row is selected by giving a select expression with 00646 // <linkto class="TableExprNode:description">TableExprNode</linkto> 00647 // objects. These objects represent the various nodes 00648 // in an expression, e.g. a constant, a column, or a subexpression. 00649 // The Table function 00650 // <linkto class="Table">Table::col(...)</linkto> 00651 // creates a TableExprNode object for a column. The function 00652 // <linkto class="Table">Table::key(...)</linkto> 00653 // does the same for a keyword by reading 00654 // the keyword value and storing it as a constant in an expression node. 00655 // All column nodes in an expression must belong to the same table, 00656 // otherwise an exception is thrown. 00657 // In the following example we select all rows with RA>10: 00658 // <srcblock> 00659 // #include <casacore/tables/Tables/ExprNode.h> 00660 // Table table ("Table.name"); 00661 // Table result = table (table.col("RA") > 10); 00662 // </srcblock> 00663 // while in the next one we select rows with RA and DEC in the given 00664 // intervals: 00665 // <srcblock> 00666 // Table result = table (table.col("RA") > 10 00667 // && table.col("RA") < 14 00668 // && table.col("DEC") >= -10 00669 // && table.col("DEC") <= 10); 00670 // </srcblock> 00671 // The following operators can be used to form arbitrarily 00672 // complex expressions: 00673 // <ul> 00674 // <li> Relational operators ==, !=, >, >=, < and <=. 00675 // <li> Logical operators &&, || and !. 00676 // <li> Arithmetic operators +, -, *, /, %, and unary + and -. 00677 // <li> Bit operators ^, &, |, and unary ~. 00678 // <li> Operator() to take a subsection of an array. 00679 // </ul> 00680 // Many functions (like sin, max, conj) can be used in an expression. 00681 // Class <linkto class=TableExprNode>TableExprNode</linkto> shows 00682 // the available functions. 00683 // E.g. 00684 // <srcblock> 00685 // Table result = table (sin (table.col("RA")) > 0.5); 00686 // </srcblock> 00687 // Function <src>in</src> can be used to select from a set of values. 00688 // A value set can be constructed using class 00689 // <linkto class=TableExprNodeSet>TableExprNodeSet</linkto>. 00690 // <srcblock> 00691 // TableExprNodeSet set; 00692 // set.add (TableExprNodeSetElem ("abc")); 00693 // set.add (TableExprNodeSetElem ("defg")); 00694 // set.add (TableExprNodeSetElem ("h")); 00695 // Table result = table (table.col("NAME).in (set)); 00696 // </srcblock> 00697 // select rows with a NAME equal to <src>abc</src>, 00698 // <src>defg</src>, or <src>h</src>. 00699 // 00700 // <p> 00701 // You can sort a table on one or more columns containing scalars. 00702 // In this example we simply sort on column RA (default is ascending): 00703 // <srcblock> 00704 // Table table ("Table.name"); 00705 // Table result = table.sort ("RA"); 00706 // </srcblock> 00707 // Multiple 00708 // <linkto class="Table">Table::sort(...)</linkto> 00709 // functions exist which allow for more flexible control over the sort order. 00710 // In the next example we sort first on RA in descending order 00711 // and then on DEC in ascending order: 00712 // <srcblock> 00713 // Table table ("Table.name"); 00714 // Block<String> sortKeys(2); 00715 // Block<int> sortOrders(2); 00716 // sortKeys(0) = "RA"; 00717 // sortOrders(0) = Sort::Descending; 00718 // sortKeys(1) = "DEC"; 00719 // sortOrders(1) = Sort::Ascending; 00720 // Table result = table.sort (sortKeys, sortOrders); 00721 // </srcblock> 00722 // 00723 // Tables stemming from the same root, can be combined in several 00724 // ways with the help of the various logical 00725 // <linkto class="Table">Table operators</linkto> (operator|, etc.). 00726 00727 // <h4>Table Query Language</h4> 00728 // The selection and sorting mechanism described above can only be used 00729 // in a hard-coded way in a C++ program. 00730 // There is, however, another way. Strings containing selection and 00731 // sorting commands can be used. 00732 // The syntax of these commands is based on SQL and is described in the 00733 // <a href="../notes/199.html">Table Query Language</a> (TaQL) note 199. 00734 // The language supports UDFs (User Defined Functions) in dynamically 00735 // loadable libraries as explained in the note. 00736 // <br>A TaQL command can be executed with the static function 00737 // <src>tableCommand</src> defined in class 00738 // <linkto class=TableParse>TableParse</linkto>. 00739 00740 // <ANCHOR NAME="Tables:concatenation"> 00741 // <h3>Table Concatenation</h3></ANCHOR> 00742 // Tables with identical descriptions can be concatenated in a virtual way 00743 // using the Table concatenation constructor. Such a Table object behaves 00744 // as any other Table object, thus any operation can be performed on it. 00745 // An identical description means that the number of columns, the column names, 00746 // and their data types of the columns must be the same. The columns do not 00747 // need to be ordered in the same way nor to be stored in the same way. 00748 // <br>Note that if tables have different column names, it is possible 00749 // to form a projection (as described in the previous section) first 00750 // to make them appear identical. 00751 // 00752 // Sometimes a MeasurementSet is partitioned, for instance in chunks of 00753 // one hour. All those chunks can be virtually concatenated this way. 00754 // Note that all tables in the concatenation will be opened, thus one might 00755 // run out of file descriptors if there are many chunks. 00756 // 00757 // Similar to reference tables, it is possible to make a concatenated Table 00758 // persistent by using the <src>rename</src> function. It will not copy the 00759 // data; only the names of the tables used are written. 00760 // 00761 // The keywords of a concatenated table are taken from the first table. 00762 // It is possible to change or add keywords, but that is not persistent, 00763 // not even if the concatenated table is made persistent. 00764 // <br>The keywords holding subtables can be handled in a special way. 00765 // Normally the subtables of the concatenation are the subtables of the first 00766 // table are used, but is it possible to concatenate subtables as well by 00767 // giving their names in the constructor. 00768 // In this way the, say, SYSCAL subtable of a MeasurementSet can be 00769 // concatenated as well. 00770 // <srcblock> 00771 // // Create virtual concatenation of ms0 and ms1. 00772 // Block<String> names(2); 00773 // names[0] = "ms0"; 00774 // names[1] = "ms1"; 00775 // // Also concatenate their SYSCAL subtables. 00776 // Block<String> subNames(1, "SYSCAL"); 00777 // Table concTab (names, subNames); 00778 // </srcblock> 00779 00780 // <ANCHOR NAME="Tables:iterate"> 00781 // <h3>Table Iterators</h3></ANCHOR> 00782 // 00783 // You can iterate through a table in an arbitrary order by getting 00784 // a subset of the table consisting of the rows in which the iteration 00785 // columns have the same value. 00786 // An iterator object is created by constructing a 00787 // <linkto class="TableIterator:description">TableIterator</linkto> 00788 // object with the appropriate column names. 00789 // 00790 // In the next example we define an iteration on the columns Time and 00791 // Baseline. Each iteration step returns a table subset in which Time and 00792 // Baseline have the same value. 00793 // 00794 // <srcblock> 00795 // // Iterate over Time and Baseline (by default in ascending order). 00796 // // Time is the main iteration order, thus the first column specified. 00797 // Table t; 00798 // Table tab ("UV_Table.data"); 00799 // Block<String> iv0(2); 00800 // iv0[0] = "Time"; 00801 // iv0[1] = "Baseline"; 00802 // // 00803 // // Create the iterator. This will prepare the first subtable. 00804 // TableIterator iter(tab, iv0); 00805 // Int nr = 0; 00806 // while (!iter.pastEnd()) { 00807 // // Get the first subtable. 00808 // // This will contain rows with equal Time and Baseline. 00809 // t = iter.table(); 00810 // cout << t.nrow() << " "; 00811 // nr++; 00812 // // Prepare the next subtable with the next Time,Baseline value. 00813 // iter.next(); 00814 // } 00815 // cout << endl << nr << " iteration steps" << endl; 00816 // </srcblock> 00817 // 00818 // You can define more than one iterator on the same table; they operate 00819 // independently. 00820 // 00821 // Note that the result of each iteration step is a table in itself which 00822 // references the original table, just as in the case of a sort or select. 00823 // This means that the resulting table can be used again in a sort, select, 00824 // iteration, etc.. 00825 00826 // <ANCHOR NAME="Tables:vectors"> 00827 // <h3>Table Vectors</h3></ANCHOR> 00828 // 00829 // A table vector makes it possible to treat a column in a table 00830 // as a vector. Almost all operators and functions defined for normal 00831 // vectors, are also defined for table vectors. So it is, for instance, 00832 // possible to add a constant to a table vector. This has the effect 00833 // that the underlying column gets changed. 00834 // 00835 // You can use the templated class 00836 // <linkto class="TableVector:description">TableVector</linkto> 00837 // to make a scalar column appear as a (table) vector. 00838 // Columns containing arrays or tables are not supported. 00839 // The data type of the TableVector object must match the 00840 // data type of the column. 00841 // A table vector can also hold a normal vector so that (temporary) 00842 // results of table vector operations can be handled. 00843 // 00844 // In the following example we double the data in column COL1 and 00845 // store the result in a temporary table vector. 00846 // <srcblock> 00847 // // Create a table vector for column COL1. 00848 // // Note that if the table is readonly, putting data in the table vector 00849 // // results in an exception. 00850 // Table tab ("Table.data"); 00851 // TableVector<Int> tabvec(tab, "COL1"); 00852 // // Multiply it by a constant. Result is kept in a Vector in memory. 00853 // TableVector<Int> temp = 2 * tabvec; 00854 // </srcblock> 00855 // 00856 // In the next example we double the data in COL1 and put the result back 00857 // in the column. 00858 // <srcblock> 00859 // // Create a table vector for column COL1. 00860 // // It has to be a TableVector to be able to change the column. 00861 // Table tab ("Table.data", Table::Update); 00862 // TableVector<Int> tabvec(tab, "COL1"); 00863 // // Multiply it by a constant. 00864 // tabvec *= 2; 00865 // </srcblock> 00866 00867 // <ANCHOR NAME="Tables:keywords"> 00868 // <h3>Table Keywords</h3></ANCHOR> 00869 // 00870 // Any number of keyword/value pairs may be attached to the table as a whole, 00871 // or to any individual column. They may be freely added, retrieved, 00872 // re-assigned, or deleted. They are, in essence, a self-resizing list of 00873 // values (any of the primitive types) indexed by Strings (the keyword). 00874 // 00875 // A table keyword/value pair might be 00876 // <srcblock> 00877 // Observer = Grote Reber 00878 // Date = 10 october 1942 00879 // </srcblock> 00880 // Column keyword/value pairs might be 00881 // <srcblock> 00882 // Units = mJy 00883 // Reference Pixel = 320 00884 // </srcblock> 00885 // The class 00886 // <linkto class="TableRecord:description">TableRecord</linkto> 00887 // represents the keywords in a table. 00888 // It is (indirectly) derived from the standard record classes in the class 00889 // <linkto class="Record:description">Record</linkto> 00890 00891 // <ANCHOR NAME="Tables:Table Description"> 00892 // <h3>Table Description</h3></ANCHOR> 00893 // 00894 // A table contains a description of itself, which defines the layout of the 00895 // columns and the keyword sets for the table and for the individual columns. 00896 // It may also define initial keyword sets and default values for the columns. 00897 // Such a default value is automatically stored in a cell in the table column, 00898 // whenever a row is added to the table. 00899 // 00900 // The creation of the table descriptor is the first step in the creation of 00901 // a new table. The description is part of the table itself, but may also 00902 // exist in a separate file. This is useful if you need to create a number 00903 // of tables with the same structure; in other circumstances it probably 00904 // should be avoided. 00905 // 00906 // The public classes to set up a table description are: 00907 // <ul> 00908 // <li> <linkto class="TableDesc:description">TableDesc</linkto> 00909 // -- holds the table description. 00910 // <li> <linkto class="ColumnDesc:description">ColumnDesc</linkto> 00911 // -- holds a generic column description. 00912 // <li> <linkto class="ScalarColumnDesc:description">ScalarColumnDesc<T> 00913 // </linkto> 00914 // -- defines a column containing a scalar value. 00915 // <li> <linkto class="ScalarRecordColumnDesc:description">ScalarRecordColumnDesc; 00916 // </linkto> 00917 // -- defines a column containing a scalar record value. 00918 // <li> <linkto class="ArrayColumnDesc:description">ArrayColumnDesc<T> 00919 // </linkto> 00920 // -- defines a column containing an (in)direct array. 00921 // </ul> 00922 // 00923 // Here follows a typical example of the construction of a table 00924 // description. For more specialized things -- like the definition of a 00925 // default data manager -- we refer to the descriptions of the above 00926 // mentioned classes. 00927 // 00928 // <srcblock> 00929 // #include <casacore/tables/Tables/TableDesc.h> 00930 // #include <casacore/tables/Tables/ScaColDesc.h> 00931 // #include <casacore/tables/Tables/ArrColDesc.h> 00932 // #include <aips/Tables/ScaRecordTabDesc.h> 00933 // #include <casacore/tables/Tables/TableRecord.h> 00934 // #include <casacore/casa/Arrays/IPosition.h> 00935 // #include <casacore/casa/Arrays/Vector.h> 00936 // 00937 // main() 00938 // { 00939 // // Create a new table description 00940 // // Define a comment for the table description. 00941 // // Define some keywords. 00942 // ColumnDesc colDesc1, colDesc2; 00943 // TableDesc td("tTableDesc", "1", TableDesc::New); 00944 // td.comment() = "A test of class TableDesc"; 00945 // td.rwKeywordSet().define ("ra" float(3.14)); 00946 // td.rwKeywordSet().define ("equinox", double(1950)); 00947 // td.rwKeywordSet().define ("aa", Int(1)); 00948 // 00949 // // Define an integer column ab. 00950 // td.addColumn (ScalarColumnDesc<Int> ("ab", "Comment for column ab")); 00951 // 00952 // // Add a scalar integer column ac, define keywords for it 00953 // // and define a default value 0. 00954 // // Overwrite the value of keyword unit. 00955 // ScalarColumnDesc<Int> acColumn("ac"); 00956 // acColumn.rwKeywordSet().define ("scale" Complex(0,0)); 00957 // acColumn.rwKeywordSet().define ("unit", ""); 00958 // acColumn.setDefault (0); 00959 // td.addColumn (acColumn); 00960 // td.rwColumnDesc("ac").rwKeywordSet().define ("unit", "DEG"); 00961 // 00962 // // Add a scalar string column ad and define its comment string. 00963 // td.addColumn (ScalarColumnDesc<String> ("ad","comment for ad")); 00964 // 00965 // // Now define array columns. 00966 // // This one is indirect and has no dimensionality mentioned yet. 00967 // td.addColumn (ArrayColumnDesc<Complex> ("Arr1","comment for Arr1")); 00968 // // This one is indirect and has 3-dim arrays. 00969 // td.addColumn (ArrayColumnDesc<Int> ("A2r1","comment for Arr1",3)); 00970 // // This one is direct and has 2-dim arrays with axes length 4 and 7. 00971 // td.addColumn (ArrayColumnDesc<uInt> ("Arr3","comment for Arr1", 00972 // IPosition(2,4,7), 00973 // ColumnDesc::Direct)); 00974 // 00975 // // Add columns containing records. 00976 // td.addColumn (ScalarRecordColumnDesc ("Rec1")); 00977 // } 00978 // </srcblock> 00979 00980 // <ANCHOR NAME="Tables:Data Managers"> 00981 // <h3>Data Managers</h3></ANCHOR> 00982 // 00983 // Data managers take care of the actual access to the data in a column. 00984 // There are two kinds of data managers: 00985 // <ol> 00986 // <li> <A HREF="#Tables:storage managers">Storage managers</A> -- 00987 // which store the data as such. They can only handle the standard 00988 // data type (Bool,...,String) as discussed in the section about the 00989 // <A HREF="#Tables:properties">table properties</A>). 00990 // <li> <A HREF="#Tables:virtual column engines">Virtual column engines</A> 00991 // -- which manipulate the data. 00992 // An engine could be a simple thing like scaling the data (as done 00993 // in classic AIPS to reduce data storage), but it could also be an 00994 // elaborate thing like applying corrections on-the-fly. 00995 // <br>An engine must be used to store data objects with a non-standard type. 00996 // It has to break down the object into items with standard data types 00997 // which can be stored with a storage manager. 00998 // </ol> 00999 // In general the user of a table does not need to be aware which 01000 // data managers are being used underneath. Only when the table is created 01001 // data managers have to be bound to the columns. Thereafter it is 01002 // completely transparent. 01003 // 01004 // Data managers needs to be registered, so they can be found when a table is 01005 // opened. All data managers mentioned below are part of the system and 01006 // pre-registered. 01007 // It is, however, also possible to load data managers on demand. If a data 01008 // manager is not registered it is tried to load a shared library with the 01009 // part of the data manager name (in lowercase) before a dot or left arrow. 01010 // The dot makes it possible to have multiple data managers in a shared library, 01011 // while the left arrow is meant for templated data manager classes. 01012 // <br>E.g. if <src>BitFlagsEngine<uChar></src> was not registered, the shared 01013 // library <src>libbitflagsengine.so</src> (or .dylib) will be loaded. If 01014 // successful, its function <src>register_bitflagsengine()</src> will be 01015 // executed which should register the data manager(s). Thereafter it is known 01016 // and will be used. For example in a file Register.h and Register.cc: 01017 // <srcblock> 01018 // // Declare in .h file as C function, so no name mangling is done. 01019 // extern "C" { 01020 // void register_bitflagsengine(); 01021 // } 01022 // // Implement in .cc file. 01023 // void register_bitflagsengine() 01024 // { 01025 // BitFlagsEngine<uChar>::registerClass(); 01026 // BitFlagsEngine<Short>::registerClass(); 01027 // BitFlagsEngine<Int>::registerClass(); 01028 // } 01029 // </srcblock> 01030 // There are several functions that can give information which data managers 01031 // are used for which columns and to obtain the characteristics and properties 01032 // of them. Class RODataManAccessor and derived classes can be used for it 01033 // as well as the functions <src>dataManagerInfo</src> and 01034 // <src>showStructure</src> in class Table. 01035 01036 // <ANCHOR NAME="Tables:storage managers"> 01037 // <h3>Storage Managers</h3></ANCHOR> 01038 // 01039 // Storage managers are used to store the data contained in the column cells. 01040 // At table construction time the binding of columns to storage managers is done. 01041 // <br>Each storage manager uses one or more files (usually called table.fi_xxx 01042 // where i is a sequence number and _xxx is some kind of extension). 01043 // Typically several file are used to store the data of the columns of a table. 01044 // <br>In order to reduce the number of files (and to support large block sizes), 01045 // it is possible to have a single container file (a MultiFile) containing all 01046 // data files used by the storage managers. Such a file is called table.mf. 01047 // Note that the program <em>lsmf</em> can be used to see which 01048 // files are contained in a MultiFile. The program <em>tomf</em> can 01049 // convert the files in a MultiFile to regular files. 01050 // <br>At table creation time it is decided if a MultiFile will be used. It 01051 // can be done by means of the StorageOption object given to the SetupNewTable 01052 // constructor and/or by the aipsrc variables: 01053 // <ul> 01054 // <li> <src>table.storage.option</src> which can have the value 01055 // 'multifile', 'sepfile' (meaning separate files), or 'default'. 01056 // Currently the default is to use separate files. 01057 // <li> <src>table.storage.blocksize</src> defines the block size to be 01058 // used by a MultiFile. If 0 is given, the file system's block size 01059 // will be used. 01060 // </ul> 01061 // About all standard storage managers support the MultiFile. 01062 // The exception is StManAipsIO, because it is hardly ever used. 01063 // 01064 // Several storage managers exist, each with its own storage characteristics. 01065 // The default and preferred storage manager is <src>StandardStMan</src>. 01066 // Other storage managers should only be used if they pay off in 01067 // file space (like <src>IncrementalStMan</src> for slowly varying data) 01068 // or access speed (like the tiled storage managers for large data arrays). 01069 // <br>The storage managers store the data in a big or little endian 01070 // canonical format. The format can be specified when the table is created. 01071 // By default it uses the endian format as specified in the aipsrc variable 01072 // <code>table.endianformat</code> which can have the value local, big, 01073 // or little. The default is local. 01074 // <ol> 01075 // <li> 01076 // <linkto class="StandardStMan:description">StandardStMan</linkto> 01077 // stores all the values in so-called buckets (equally sized chunks 01078 // in the file). It requires little memory. 01079 // <br>It replaces the old <src>StManAipsIO</src>. 01080 // 01081 // <li> 01082 // <linkto class="IncrementalStMan:description">IncrementalStMan</linkto> 01083 // uses a storage mechanism resembling "incremental backups". A value 01084 // is only stored if it is different from the previous row. It is 01085 // very well suited for slowly varying data. 01086 // <br>The class <linkto class="ROIncrementalStManAccessor:description"> 01087 // ROIncrementalStManAccessor</linkto> can be used to tune the 01088 // behaviour of the <src>IncrementalStMan</src>. It contains functions 01089 // to deal with the cache size and to show the behaviour of the cache. 01090 // 01091 // <li> 01092 // The <a href="#Tables:TiledStMan">Tiled Storage Managers</a> 01093 // store the data as a tiled hypercube allowing for more or less equally 01094 // efficient data access along all main axes. It can be used for 01095 // UV-data as well as for image data. 01096 // 01097 // <li> 01098 // <linkto class="StManAipsIO:description">StManAipsIO</linkto> 01099 // uses <src>AipsIO</src> to store the data in the columns. 01100 // It supports all table functionality, but its I/O is probably not 01101 // as efficient as other storage managers. It also requires that 01102 // a large part of the table fits in memory. 01103 // <br>It should not be used anymore, because it uses a lot of memory 01104 // for larger tables and because it is not very robust in case an 01105 // application or system crashes. 01106 // 01107 // <li> 01108 // <linkto class="MemoryStMan:description">MemoryStMan</linkto> 01109 // holds the data in memory. It means that data 'stored' with this 01110 // storage manager are NOT persistent. 01111 // <br>This storage manager is primarily meant for tables held in 01112 // memory, but it can also be useful for temporary columns in 01113 // normal tables. Note, however, that if a table is accessed 01114 // concurrently from multiple processes, MemoryStMan data cannot be 01115 // synchronized. 01116 // </ol> 01117 // 01118 // The storage manager framework makes it possible to support arbitrary files 01119 // as tables. This has been used in a case where a file is filled 01120 // by the data acquisition system of a telescope. The file is simultaneously 01121 // used as a table using a dedicated storage manager. The table 01122 // system and storage manager provide a sync function to synchronize 01123 // the processes, i.e. to make the table system aware of changes 01124 // in the file size (thus in the table size) by the filling process. 01125 // 01126 // <note role=tip> 01127 // Not all data managers support all the table functionality. So, the choice 01128 // of a data manager can greatly influence the type of operations you can do 01129 // on the table as a whole. 01130 // For example, if a column uses the tiled storage manager, 01131 // it is not possible to delete rows from the table, because that storage 01132 // manager will not support deletion of rows. 01133 // However, it is always possible to delete all columns of a data 01134 // manager in one single call. 01135 // </note> 01136 01137 // <ANCHOR NAME="Tables:TiledStMan"> 01138 // <h3>Tiled Storage Manager</h3></ANCHOR> 01139 // The Tiled Storage Managers allow one to store the data of 01140 // one or more columns in a tiled way. Tiling means 01141 // that the data are stored without a preferred order to make access 01142 // along the different main axes equally efficient. This is done by 01143 // storing the data in so-called tiles (i.e. equally shaped subsets of an 01144 // array) to increase data locality. The user can define the tile shape 01145 // to optimize for the most frequently used access. 01146 // <p> 01147 // The Tiled Storage Manager has the following properties: 01148 // <ul> 01149 // <li> There can be more than one Tiled Storage Manager in 01150 // a table; each with its own (unique) name. 01151 // <li> Each Tiled Storage Manager can store an 01152 // N-dimensional so-called hypercolumn. 01153 // Elaborate hypercolumns can be defined using 01154 // <linkto file="TableDesc.h#defineHypercolumn"> 01155 // TableDesc::defineHypercolumn</linkto>). 01156 // <br>Note that defining a hypercolumn is only necessary if it 01157 // contains multiple columns or if the TiledDataStMan is used. 01158 // It means that in practice it is hardly ever needed to define a 01159 // hypercolumn. 01160 // <br>A hypercolumn consists of up to three types of columns: 01161 // <dl> 01162 // <dt> Data columns 01163 // <dd> contain the data to be stored in a tiled way. This will 01164 // be done in tiled hypercubes. 01165 // There must be at least one data column. 01166 // <br> For example: a table contains UV-data with 01167 // data columns "Visibility" and "Weight". 01168 // <dt> Coordinate columns 01169 // <dd> define the world coordinates of the pixels in the data columns. 01170 // Coordinate columns are optional, but if given there must 01171 // be N coordinate columns for an N-dimensional hypercolumn. 01172 // <br> 01173 // For example: the data in the example above is 4-dimensional 01174 // and has coordinate columns "Time", "Baseline", "Frequency", 01175 // and "Polarization". 01176 // <dt> Id columns 01177 // <dd> are needed if TiledDataStMan is used. 01178 // Different rows in the data columns can be stored in different 01179 // hypercubes. The values in the id column(s) uniquely identify 01180 // the hypercube a row is stored in. 01181 // <br> 01182 // For example: the line and continuum data in a MeasurementSet 01183 // table need to be stored in 2 different hypercubes (because 01184 // their shapes are different (see below)). A column containing 01185 // the type (line or continuum) has to be used as an id column. 01186 // </dl> 01187 // <li> If multiple data columns are used, the shape of their data 01188 // must be conforming in each individual row. 01189 // If data in different rows have different shapes, they must be 01190 // stored in different hypercubes, because a hypercube can only hold 01191 // data with conforming shapes. 01192 // <br> 01193 // Thus in the example above, rows with line data will have conforming 01194 // shapes and can be stored in one hypercube. The continuum data 01195 // will have another shape and can be stored in another hypercube. 01196 // <br> 01197 // The storage manager keeps track of the mapping of rows to/from 01198 // hypercubes. 01199 // <li> Each hypercube can be tiled in its own way. It is not required 01200 // that an integer number of tiles fits in the hypercube. The last 01201 // tiles will be padded as needed. 01202 // <li> The last axis of a hypercube can be extensible. This means that 01203 // the size of that axis does not need to be defined when the 01204 // hypercube is defined in the storage manager. Instead, the hypercube 01205 // can be extended when another chunk of data has to be stored. 01206 // This can be very useful in, for example, a (quasi-)realtime 01207 // environment where the size of the time axis is not known. 01208 // <li> If coordinate columns are defined, they describe the coordinates 01209 // of the axes of the hypercubes. Each hypercube has its own set of 01210 // coordinates. 01211 // <li> Data and id columns have to be stored with the Tiled 01212 // Storage Manager. However, coordinate columns do not need to be 01213 // stored with the Tiled Storage Manager. 01214 // Especially in the case where the coordinates for a hypercube axis 01215 // are varying (i.e. dependent on other axes), another storage manager 01216 // has to be used (because the Tiled Storage Manager can only 01217 // hold constant coordinates). 01218 // </ul> 01219 // <p> 01220 // The following Tiled Storage Managers are available: 01221 // <dl> 01222 // <dt> <linkto class=TiledShapeStMan:description>TiledShapeStMan</linkto> 01223 // <dd> can be seen as a specialization of <src>TiledDataStMan</src> 01224 // by using the array shape as the id value. 01225 // Similarly to <src>TiledDataStMan</src> it can maintain multiple 01226 // hypercubes and store multiple rows in a hypercube, but it is 01227 // easier to use, because the special <src>addHypercube</src> and 01228 // <src>extendHypercube</src> functions are not needed. 01229 // An hypercube is automatically added when a new array shape is 01230 // encountered. 01231 // <br> 01232 // This storage manager could be used for a table with a column 01233 // containing line and continuum data, which will result 01234 // in 2 hypercubes. 01235 // <dt> <linkto class=TiledCellStMan:description>TiledCellStMan</linkto> 01236 // <dd> creates (automatically) a new hypercube for each row. 01237 // Thus each row of the hypercolumn is stored in a separate hypercube. 01238 // Note that the row number serves as the id value. So an id column 01239 // is not needed, although there are multiple hypercubes. 01240 // <br> 01241 // This storage manager is meant for tables where the data arrays 01242 // in the different rows are not accessed together. One can think 01243 // of a column containing images. Each row contains an image and 01244 // only one image is shown at a time. 01245 // <dt> <linkto class=TiledColumnStMan:description>TiledColumnStMan</linkto> 01246 // <dd> creates one hypercube for the entire hypercolumn. Thus all cells 01247 // in the hypercube have to have the same shape and therefore this 01248 // storage manager is only possible if all columns in the hypercolumn 01249 // have the attribute FixedShape. 01250 // <br> 01251 // This storage manager could be used for a table with a column 01252 // containing images for the Stokes parameters I, Q, U, and V. 01253 // By storing them in one hypercube, it is possible to retrieve 01254 // the 4 Stokes values for a subset of the image or for an individual 01255 // pixel in a very efficient way. 01256 // <dt> <linkto class=TiledDataStMan:description>TiledDataStMan</linkto> 01257 // <dd> allows one to control the creation and extension of hypercubes. 01258 // This is done by means of the class 01259 // <linkto class=TiledDataStManAccessor:description> 01260 // TiledDataStManAccessor</linkto>. 01261 // It makes it possible to store, say, row 0-9 in hypercube A, 01262 // row 10-34 in hypercube B, row 35-54 in hypercube A again, etc.. 01263 // <br> 01264 // The drawback of this storage manager is that its hypercubes are not 01265 // automatically extended when adding new rows. The special functions 01266 // <src>addHypercube</src> and <src>extendHypercube</src> have to be 01267 // used making it somewhat tedious to use. 01268 // Therefore this storage manager may become obsolete in the near future. 01269 // </dl> 01270 // The Tiled Storage Managers have 3 ways to access and cache the data. 01271 // Class <linkto class=TSMOption>TSMOption</linkto> can be used to setup an 01272 // access choice and use it in a Table constructor. 01273 // <ul> 01274 // <li> The old way (the only way until January 2010) uses a cache 01275 // of its own to keep tiles that might need to be reused. It will always 01276 // access entire tiles, even if only a small part is needed. 01277 // It is possible to define a maximum cache size. The description of class 01278 // <linkto class=ROTiledStManAccessor>ROTiledStManAccessor</linkto> 01279 // contains a discussion about the effect of defining a maximum cache 01280 // size. 01281 // <li> Memory-mapping the data files. In this way the operating system 01282 // takes care of the IO and caching. However, the limited address space 01283 // may preclude using it for large tables on 32-bit systems. 01284 // <li> Use buffered IO and let the kernel's file cache take care of caching. 01285 // It will access the data in chunks of the given buffer size, so the 01286 // entire tile does not need to be accessed if only a small part is 01287 // needed. 01288 // </ul> 01289 // Apart from reading, all access ways described above can also handle writing 01290 // and extending tables. They create fully equal files. Both little and big 01291 // endian data can be read or written. 01292 01293 // <ANCHOR NAME="Tables:virtual column engines"> 01294 // <h3>Virtual Column Engines</h3></ANCHOR> 01295 // 01296 // Virtual column engines are used to implement the virtual (i.e. 01297 // calculated-on-the-fly) columns. The Table system provides 01298 // an abstract base class (or "interface class") 01299 // <linkto class="VirtualColumnEngine:description">VirtualColumnEngine</linkto> 01300 // that specifies the protocol for these engines. 01301 // The programmer must derive a concrete class to implement 01302 // the application-specific virtual column. 01303 // <p> 01304 // For example: the programmer 01305 // needs a column in a table which is the difference between two other 01306 // columns. (Perhaps these two other columns are updated periodically 01307 // during the execution of a program.) A good way to handle this would 01308 // be to have a virtual column in the table, and write a virtual column 01309 // engine which knows how to calculate the difference between corresponding 01310 // cells of the two other columns. So the result is that accessing a 01311 // particular cell of the virtual column invokes the virtual column engine, 01312 // which then gets the values from the other two columns, and returns their 01313 // difference. This particular example could be done using 01314 // <linkto class="VirtualTaQLColumn:description">VirtualTaQLColumn</linkto>. 01315 // <p> 01316 // Several virtual column engines exist: 01317 // <ol> 01318 // <li> The class 01319 // <linkto class="VirtualTaQLColumn:description">VirtualTaQLColumn</linkto> 01320 // makes it possible to define a column as an arbitrary expression of 01321 // other columns. It uses the <a href="../notes/199.html">TaQL</a> 01322 // CALC command. The virtual column can be a scalar or an array and 01323 // can have one of the standard data types supported by the Table System. 01324 // <li> The class 01325 // <linkto class="BitFlagsEngine:description">BitFlagsEngine</linkto> 01326 // maps an integer bit flags column to a Bool column. A read and write mask 01327 // can be defined telling which bits to take into account when mapping 01328 // to and from Bool (thus when reading or writing the Bool). 01329 // <li> The class 01330 // <linkto class="CompressFloat:description">CompressFloat</linkto> 01331 // compresses a single precision floating point array by scaling the 01332 // values to shorts (16-bit integer). 01333 // <li> The class 01334 // <linkto class="CompressComplex:description">CompressComplex</linkto> 01335 // compresses a single precision complex array by scaling the 01336 // values to shorts (16-bit integer). In fact, the 2 parts of the complex 01337 // number are combined to an 32-bit integer. 01338 // <li> The class 01339 // <linkto class="CompressComplexSD:description">CompressComplexSD</linkto> 01340 // does the same as CompressComplex, but optimizes for the case where the 01341 // imaginary part is zero (which is often the case for Single Dish data). 01342 // <li> The double templated class 01343 // <linkto class="ScaledArrayEngine:description">ScaledArrayEngine</linkto> 01344 // scales the data in an array from, for example, 01345 // float to short before putting it. 01346 // <li> The double templated class 01347 // <linkto class="MappedArrayEngine:description">MappedArrayEngine</linkto> 01348 // converts the data from one data type to another. Sometimes it might be 01349 // needed to store the residual data in an MS in double precision. 01350 // Because the imaging task can only handle single precision, this enigne 01351 // can be used to map the data from double to single precision. 01352 // <li> The double templated class 01353 // <linkto class="RetypedArrayEngine:description">RetypedArrayEngine</linkto> 01354 // converts the data from one data type to another with the possibility 01355 // to reduce the number of dimensions. For example, it can be used to 01356 // store an 2-d array of StokesVector objects as a 3-d array of floats 01357 // by treating the 4 data elements as an extra array axis. If the 01358 // StokesVector class is simple, it can be done very efficiently. 01359 // <li> The class 01360 // <linkto class="ForwardColumnEngine:description"> 01361 // ForwardColumnEngine</linkto> 01362 // forwards the gets and puts on a row in a column to the same row 01363 // in a column with the same name in another table. This provides 01364 // a virtual copy of the referenced column. 01365 // <li> The class 01366 // <linkto class="ForwardColumnIndexedRowEngine:description"> 01367 // ForwardColumnIndexedRowEngine</linkto> 01368 // is similar to <src>ForwardColumnEngine.</src>. 01369 // However, instead of forwarding it to the same row it uses a 01370 // a column to map its row number to a row number in the referenced 01371 // table. In this way multiple rows can share the same data. 01372 // This data manager only allows for get operations. 01373 // <li> The calibration module has implemented a virtual column engine 01374 // to do on-the-fly calibration in a transparent way. 01375 // </ol> 01376 // To handle arbitrary data types the templated abstract base class 01377 // <linkto class="VSCEngine:description">VSCEngine</linkto> 01378 // has been written. An example of how to use this class can be 01379 // found in the demo program <src>dVSCEngine.cc</src>. 01380 01381 // <ANCHOR NAME="Tables:LockSync"> 01382 // <h3>Table locking and synchronization</h3></ANCHOR> 01383 // 01384 // Multiple concurrent readers and writers (also via NFS) of a 01385 // table are supported by means of a locking/synchronization mechanism. 01386 // This mechanism is not very sophisticated in the sense that it is 01387 // very coarsely grained. When locking, the entire table gets locked. 01388 // A special lock file is used to lock the table. This lock file also 01389 // contains some synchronization data. 01390 // <p> 01391 // Five ways of locking are supported (see class 01392 // <linkto class=TableLock>TableLock</linkto>): 01393 // <dl> 01394 // <dt> TableLock::PermanentLocking(Wait) 01395 // <dd> locks the table permanently (from open till close). This means 01396 // that one writer OR multiple readers are possible. 01397 // <dt> TableLock::AutoLocking 01398 // <dd> does the locking automatically. This is the default mode. 01399 // This mode makes it possible that a table is shared amongst 01400 // processes without the user needing to write any special code. 01401 // It also means that a lock is only released when needed. 01402 // <dt> TableLock::AutoNoReadLocking 01403 // <dd> is similar to AutoLocking. However, no lock is acquired when 01404 // reading the table making it possible to read the table while 01405 // another process holds a write-lock. It also means that for read 01406 // purposes no automatic synchronization is done when the table is 01407 // updated in another process. 01408 // Explicit synchronization can be done by means of the function 01409 // <src>Table::resync</src>. 01410 // <dt> TableLock::UserLocking 01411 // <dd> requires that the programmer explicitly acquires and releases 01412 // a lock on the table. This makes some kind of transaction 01413 // processing possible. E.g. set a write lock, add a row, 01414 // write all data into the row and release the lock. 01415 // The Table functions <src>lock</src> and <src>unlock</src> 01416 // have to be used to acquire and release a (read or write) lock. 01417 // <dt> TableLock::UserNoReadLocking 01418 // <dd> is similar to UserLocking. However, similarly to AutoNoReadLocking 01419 // no lock is needed to read the table. 01420 // <dt> TableLock::NoLocking 01421 // <dd> does not use table locking. It is the responsibility of the 01422 // user to ensure that no concurrent access is done on the same 01423 // bucket or tile in a storage manager, otherwise a table might 01424 // get corrupted. 01425 // <br>This mode is always used if Casacore is built with 01426 // -DAIPS_TABLE_NOLOCKING. 01427 // </dl> 01428 // Synchronization of the processes accessing the same table is done 01429 // by means of the lock file. When a lock is released, the storage 01430 // managers flush their data into the table files. Some synchronization data 01431 // is written into the lock file telling the new number of table rows 01432 // and telling which storage managers have written data. 01433 // This information is read when another process acquires the lock 01434 // and is used to determine which storage managers have to refresh 01435 // their internal caches. 01436 // <br>Note that for the NoReadLocking modes (see above) explicit 01437 // synchronization might be needed using <src>Table::resync</src>. 01438 // <p> 01439 // The function <src>Table::hasDataChanged</src> can be used to check 01440 // if a table is (being) changed by another process. In this way 01441 // a program can react on it. E.g. the table browser can refresh its 01442 // screen when the underlying table is changed. 01443 // <p> 01444 // In general the default locking option will do. 01445 // From the above it should be clear that heavy concurrent access 01446 // results in a lot of flushing, thus will have a negative impact on 01447 // performance. If uninterrupted access to a table is needed, 01448 // the <src>PermanentLocking</src> option should be used. 01449 // If transaction-like processing is done (e.g. updating a table 01450 // containing an observation catalogue), the <src>UserLocking</src> 01451 // option is probably best. 01452 // <p> 01453 // Creation or deletion of a table is not possible if that table 01454 // is still open in another process. The function 01455 // <src>Table::isMultiUsed()</src> can be used to check if a table 01456 // is open in other processes. 01457 // <br> 01458 // The function <src>deleteTable</src> should be used to delete 01459 // a table. Before deleting the table it ensures that it is writable 01460 // and that it is not open in the current or another process 01461 // <p> 01462 // The following example wants to read the table uninterrupted, thus it uses 01463 // the <src>PermanentLocking</src> option. It also wants to wait 01464 // until the lock is actually acquired. 01465 // Note that the destructor closes the table and releases the lock. 01466 // <srcblock> 01467 // // Open the table (readonly). 01468 // // Acquire a permanent (read) lock. 01469 // // It waits until the lock is acquired. 01470 // Table tab ("some.name", 01471 // TableLock(TableLock::PermanentLockingWait)); 01472 // </srcblock> 01473 // 01474 // The following example uses the automatic locking.. 01475 // It tells the system to check about every 20 seconds if another 01476 // process wants access to the table. 01477 // <srcblock> 01478 // // Open the table (readonly). 01479 // Table tab ("some.name", 01480 // TableLock(TableLock::AutoLocking, 20)); 01481 // </srcblock> 01482 // 01483 // The following example gets data (say from a GUI) and writes it 01484 // as a row into the table. The lock the table as little as possible 01485 // the lock is acquired just before writing and released immediately 01486 // thereafter. 01487 // <srcblock> 01488 // // Open the table (writable). 01489 // Table tab ("some.name", 01490 // TableLock(TableLock::UserLocking), 01491 // Table::Update); 01492 // while (True) { 01493 // get input data 01494 // tab.lock(); // Acquire a write lock and wait for it. 01495 // tab.addRow(); 01496 // write data into the row 01497 // tab.unlock(); // Release the lock. 01498 // } 01499 // </srcblock> 01500 // 01501 // The following example deletes a table if it is not used in 01502 // another process. 01503 // <srcblock> 01504 // Table tab ("some.name"); 01505 // if (! tab.isMultiUsed()) { 01506 // tab.markForDelete(); 01507 // } 01508 // </srcblock> 01509 01510 // <ANCHOR NAME="Tables:KeyLookup"> 01511 // <h3>Table lookup based on a key</h3></ANCHOR> 01512 // 01513 // Class <linkto class=ColumnsIndex>ColumnsIndex</linkto> offers the 01514 // user a means to find the rows matching a given key or key range. 01515 // It is a somewhat primitive replacement of a B-tree index and in the 01516 // future it may be replaced by a proper B+-tree implementation. 01517 // <p> 01518 // The <src>ColumnsIndex</src> class makes it possible to build an 01519 // in-core index on one or more columns. Looking a key or key range 01520 // is done using a binary search on that index. It returns a vector 01521 // containing the row numbers of the rows matching the key (range). 01522 // <p> 01523 // The class is not capable of tracing changes in the underlying column(s). 01524 // It detects a change in the number of rows and updates the index 01525 // accordingly. However, it has to be told explicitly when a value 01526 // in the underlying column(s) changes. 01527 // <p> 01528 // The following example shows how the class can be used. 01529 // <example> 01530 // Suppose one has an antenna table with key ANTENNA. 01531 // <srcblock> 01532 // // Open the table and make an index for column ANTENNA. 01533 // Table tab("antenna.tab") 01534 // ColumnsIndex colInx(tab, "ANTENNA"); 01535 // // Make a RecordFieldPtr for the ANTENNA field in the index key record. 01536 // // Its data type has to match the data type of the column. 01537 // RecordFieldPtr<Int> antFld(colInx.accessKey(), "ANTENNA"); 01538 // // Now loop in some way and find the row for the antenna 01539 // // involved in that loop. 01540 // Bool found; 01541 // while (...) { 01542 // // Fill the key field and get the row number. 01543 // // ANTENNA is a unique key, so only one row number matches. 01544 // // Otherwise function getRowNumbers had to be used. 01545 // *antFld = antenna; 01546 // uInt antRownr = colInx.getRowNumber (found); 01547 // if (!found) { 01548 // cout << "Antenna " << antenna << " is unknown" << endl; 01549 // } else { 01550 // // antRownr can now be used to get data from that row in 01551 // // the antenna table. 01552 // } 01553 // } 01554 // </srcblock> 01555 // </example> 01556 // <linkto class=ColumnsIndex>ColumnsIndex</linkto> itself contains a more 01557 // advanced example. It shows how to use a private compare function 01558 // to adjust the lookup if the index does not contain single 01559 // key values, but intervals instead. This is useful if a row in 01560 // a (sub)table is valid for, say, a time range instead of a single 01561 // timestamp. 01562 01563 // <ANCHOR NAME="Tables:performance"> 01564 // <h3>Performance and robustness considerations</h3></ANCHOR> 01565 // 01566 // The Table System resembles a database system, but it is not as robust. 01567 // It lacks the transaction and logging facilities common to data base systems. 01568 // It means that in case of a crash data might be lost. 01569 // To reduce the risk of data loss to 01570 // a minimum, it is advisable to regularly do a <tt>flush</tt>, optionally 01571 // with an <tt>fsync</tt> to ensure that all data are really written. 01572 // However, that can degrade the performance because it involves extra writes. 01573 // So one should find the right balance between robustness and performance. 01574 // 01575 // To get a good feeling for the performance issues, it is important to 01576 // understand some of the internals of the Table System. 01577 // <br>The storage managers drive the performance. All storage managers use 01578 // buckets (called tiles for the TiledStMan) which contain the data. 01579 // All IO is done by bucket. The bucket/tile size is defined when creating 01580 // the storage manager objects. Sometimes the default will do, but usually 01581 // it is better to set it explicitly. 01582 // 01583 // It is best to do a flush when a tile is full. 01584 // For example: <br> 01585 // When creating a MeasurementSet containing N antennae (thus N*(N-1) baselines 01586 // or N*(N+1) if auto-correlations are stored as well) it makes sense to 01587 // store, say, N/2 rows in a tile and do a flush each time all baselines 01588 // are written. In that way tiles are fully filled when doing the flush, so 01589 // no extra IO is involved. 01590 // <br>Here is some code showing this when creating a MeasurementSet. 01591 // The code should speak for itself. 01592 // <srcblock> 01593 // MS* createMS (const String& msName, int nrchan, int nrant) 01594 // { 01595 // // Get the MS main default table description. 01596 // TableDesc td = MS::requiredTableDesc(); 01597 // // Add the data column and its unit. 01598 // MS::addColumnToDesc(td, MS::DATA, 2); 01599 // td.rwColumnDesc(MS::columnName(MS::DATA)).rwKeywordSet(). 01600 // define("UNIT","Jy"); 01601 // // Store the DATA and FLAG column in two separate files. 01602 // // In this way accessing FLAG only is much cheaper than 01603 // // when combining DATA and FLAG. 01604 // // All data have the same shape, thus use TiledColumnStMan. 01605 // // Also store UVW with TiledColumnStMan. 01606 // Vector<String> tsmNames(1); 01607 // tsmNames[0] = MS::columnName(MS::DATA); 01608 // td.rwColumnDesc(tsmNames[0]).setShape (IPosition(2,itsNrCorr,itsNrFreq)); 01609 // td.defineHypercolumn("TiledData", 3, tsmNames); 01610 // tsmNames[0] = MS::columnName(MS::FLAG); 01611 // td.rwColumnDesc(tsmNames[0]).setShape (IPosition(2,itsNrCorr,itsNrFreq)); 01612 // td.defineHypercolumn("TiledFlag", 3, tsmNames); 01613 // tsmNames[0] = MS::columnName(MS::UVW); 01614 // td.defineHypercolumn("TiledUVW", 2, tsmNames); 01615 // // Setup the new table. 01616 // SetupNewTable newTab(msName, td, Table::New); 01617 // // Most columns vary slowly and use the IncrStMan. 01618 // IncrementalStMan incrStMan("ISMData"); 01619 // // A few columns use he StandardStMan (set an appropriate bucket size). 01620 // StandardStMan stanStMan("SSMData", 32768); 01621 // // Store all pol and freq and some rows in a single tile. 01622 // // autocorrelations are written, thus in total there are 01623 // // nrant*(nrant+1)/2 baselines. Ensure a baseline takes up an 01624 // // integer number of tiles. 01625 // TiledColumnStMan tiledData("TiledData", 01626 // IPosition(3,4,nchan,(nrant+1)/2)); 01627 // TiledColumnStMan tiledFlag("TiledFlag", 01628 // IPosition(3,4,nchan,8*(nrant+1)/2)); 01629 // TiledColumnStMan tiledUVW("TiledUVW", IPosition(2,3,)); 01630 // IPosition(2,3,nrant*(nrant+1)/2)); 01631 // newTab.bindAll (incrStMan); 01632 // newTab.bindColumn(MS::columnName(MS::ANTENNA1),stanStMan); 01633 // newTab.bindColumn(MS::columnName(MS::ANTENNA2),stanStMan); 01634 // newTab.bindColumn(MS::columnName(MS::DATA),tiledData); 01635 // newTab.bindColumn(MS::columnName(MS::FLAG),tiledFlag); 01636 // newTab.bindColumn(MS::columnName(MS::UVW),tiledUVW); 01637 // // Create the MS and its subtables. 01638 // // Get access to its columns. 01639 // MS* msp = new MeasurementSet(newTab); 01640 // // Create all subtables. 01641 // // Do this after the creation of optional subtables, 01642 // // so the MS will know about those optional sutables. 01643 // msp->createDefaultSubtables (Table::New); 01644 // return msp; 01645 // } 01646 // </srcblock> 01647 01648 // <h4>Some more performance considerations</h4> 01649 // Which storage managers to use and how to use them depends heavily on 01650 // the type of data and the access patterns to the data. Here follow some 01651 // guidelines: 01652 // <ol> 01653 // <li> Scalar data can be stored with the StandardStMan (SSM) or 01654 // IncrementalStMan (ISM). For slowly varying data (e.g. the TIME column 01655 // in a MeasurementSet) it is best to use the ISM. Otherwise the SSM. 01656 // Note that very long strings (longer than the bucketsize) can only 01657 // be stored with the SSM. 01658 // <li> Any number of storage managers can be used. In fact, each column 01659 // can have a storage manager of its own resulting in column-wise 01660 // stored data which is more and more used in data base systems. 01661 // In that way a query or sort on that column is very fast, because 01662 // the buckets to read only contain data of that column. 01663 // In practice one can decide to combine a few frequently used columns 01664 // in a storage manager. 01665 // <li> Array data can be stored with any column manager. Small fixed size 01666 // arrays can be stored directly with the SSM 01667 // (or ISM if not changing much). 01668 // However, they can also be stored with a TiledStMan (TSM) as shown 01669 // for the UVW column in the example above. 01670 // <br> Large arrays should usually be stored with a TSM. However, 01671 // if it must be possible to change the shape of an array after it 01672 // was stored, the SSM (or ISM) must be used. Note that in that 01673 // case a lot of disk space can be wasted, because the SSM and ISM 01674 // store the array data at the end of the file if the array got 01675 // bigger and do not reuse the old space. The only way to 01676 // reclaim it is by making a deep copy of the entire table. 01677 // <li> If an array is stored with a TSM, it is important to decide 01678 // which TSM to use. 01679 // <ol> 01680 // <li> The TiledColumnStMan is the most efficient, but only suitable 01681 // for arrays having the same shape in the entire column. 01682 // <li> The TiledShapeStMan is suitable for columns where the arrays 01683 // can have a few shapes. 01684 // <li> The TiledCellStMan is suitable for columns where the arrays 01685 // can have many different shapes. 01686 // </ol> 01687 // This is discussed in more detail 01688 // <a href="#Tables:TiledStMan">above</a>. 01689 // <li> If storing an array with a TSM, it can be very important to 01690 // choose the right tile shape. Not only does this define the size 01691 // of a tile, but it also defines if access in other directions 01692 // than the natural direction can be fast. It is also discussed in 01693 // more detail <a href="#Tables:TiledStMan">above</a>. 01694 // <li> Columns can be combined in a single TiledStMan. For instance, combining DATA 01695 // and FLAG is advantageous if FLAG is always used with DATA. However, if FLAG 01696 // is used on its own (e.g. in combination with CORRECTED_DATA), it is better 01697 // to separate them, otherwise tiles containing FLAG also contain DATA making the 01698 // tiles much bigger, thus more expensive to access. 01699 // </ol> 01700 // 01701 // <ANCHOR NAME="Tables:iotracing"> 01702 // <h4>IO Tracing</h4></ANCHOR> 01703 // 01704 // Several forms of tracing can be done to see how the Table I/O performs. 01705 // <ul> 01706 // <li> On Linux/UNIX systems the <src>strace</src> command can be used to 01707 // collect trace information about the physical IO. 01708 // <li> The function <src>showCacheStatistics</src> in class 01709 // TiledStManAccessor can be used to show the number of actual reads 01710 // and writes and the percentage of cache hits. 01711 // <li> The software has some options to trace the operations done on 01712 // tables. It is possible to specify the columns and/or the operations 01713 // to be traced. The following <src>aipsrc</src> variables can be used. 01714 // <ul> 01715 // <li> <src>table.trace.filename</src> specifies the file to write the 01716 // trace output to. If not given or empty, no tracing will be done. 01717 // The file name can contain environment variables or a tilde. 01718 // <li> <src>table.trace.operation</src> specifies the operations to be 01719 // traced. It is a string containing s, r, and/or w where 01720 // s means tracing RefTable construction (selection/sort), 01721 // r means column reads, and w means column writes. 01722 // If empty, only the high level table operations (open, create, close) 01723 // will be traced. 01724 // <li> <src>table.trace.columntype</src> specifies the types of columns to 01725 // be traced. It is a string containing the characters s, a, and/or r. 01726 // s means all scalar columns, a all array columns, and r all record 01727 // columns. If empty and if <src>table.trace.column</src> is empty, 01728 // its default value is a. 01729 // <li> <src>table.trace.column</src> specifies names of columns to be 01730 // traced. Its value can be one or more glob-like patterns separated 01731 // by commas without any whitespace. The default is empty. 01732 // For example: 01733 // <srcblock> 01734 // table.trace.column: *DATA,FLAG,WEIGHT* 01735 // </srcblock> 01736 // to trace all DATA, the FLAG, and all WEIGHT columns. 01737 // </ul> 01738 // The trace output is a text file with the following columns 01739 // separated by a space. 01740 // <ul> 01741 // <li> The UTC time the trace line was written (with msec accuracy). 01742 // <li> The operation: n(ew), o(pen), c(lose), t(able), r(ead), w(rite), 01743 // s(election/sort/iter), p(rojection). 01744 // t means an arbitrary table operation as given in the name column. 01745 // <li> The table-id (as t=i) given at table creation (new) or open. 01746 // <li> The table name, column name, or table operation 01747 // (as <src>*oper*</src>). 01748 // <src>*reftable*</src> means that the operation is on a RefTable 01749 // (thus result of selection, sort, projection, or iteration). 01750 // <li> The row or rows to access (* means all rows). 01751 // Multiple rows are given as a series of ranges like s:e:i,s:e:i,... 01752 // where e and i are only given if applicable (default i is 1). 01753 // Note that e is inclusive and defaults to s. 01754 // <li> The optional array shape to access (none means scalar). 01755 // In case multiple rows are accessed, the last shape value is the 01756 // number of rows. 01757 // <li> The optional slice of the array in each row as [start][end][stride]. 01758 // </ul> 01759 // Shape, start, end, and stride are given in Fortran-order as 01760 // [n1,n2,...]. 01761 // </ul> 01762 01763 // <ANCHOR NAME="Tables:applications"> 01764 // <h4>Applications to inspect/manipulate a table</h4></ANCHOR> 01765 // <ul> 01766 // <li><em>showtable</em> shows the structure of a table. It can show: 01767 // <ul> 01768 // <li> the columns and their format (optionally sorted on name) 01769 // <li> the data managers used to store the column data 01770 // <li> the table and/or column keywords and their values 01771 // <li> recursively the same info of the subtables 01772 // </ul> 01773 // <li><em>showtablelock</em> if a table is locked or opened and by 01774 // which process. 01775 // <li><em>lsmf</em> shows the virtual files contained in a MultiFile. 01776 // <li><em>tomf</em> copies the given files to a MultiFile. 01777 // <li><em>taql</em> can be used to query a table using the 01778 // <a href="../notes/199.html">Table Query Language</a> (TaQL). 01779 // </ul> 01780 // 01781 // </synopsis> 01782 // </module> 01783 01784 01785 01786 } //# NAMESPACE CASACORE - END 01787 01788 #endif