00001 //# SSMBase.h: Base class of the Standard Storage Manager 00002 //# Copyright (C) 2000,2001,2002 00003 //# Associated Universities, Inc. Washington DC, USA. 00004 //# 00005 //# This library is free software; you can redistribute it and/or modify it 00006 //# under the terms of the GNU Library General Public License as published by 00007 //# the Free Software Foundation; either version 2 of the License, or (at your 00008 //# option) any later version. 00009 //# 00010 //# This library is distributed in the hope that it will be useful, but WITHOUT 00011 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 00012 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public 00013 //# License for more details. 00014 //# 00015 //# You should have received a copy of the GNU Library General Public License 00016 //# along with this library; if not, write to the Free Software Foundation, 00017 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA. 00018 //# 00019 //# Correspondence concerning AIPS++ should be addressed as follows: 00020 //# Internet email: aips2-request@nrao.edu. 00021 //# Postal address: AIPS++ Project Office 00022 //# National Radio Astronomy Observatory 00023 //# 520 Edgemont Road 00024 //# Charlottesville, VA 22903-2475 USA 00025 //# 00026 //# $Id$ 00027 00028 #ifndef TABLES_SSMBASE_H 00029 #define TABLES_SSMBASE_H 00030 00031 00032 //# Includes 00033 #include <casacore/casa/aips.h> 00034 #include <casacore/tables/DataMan/DataManager.h> 00035 #include <casacore/casa/Containers/Block.h> 00036 00037 namespace casacore { //# NAMESPACE CASACORE - BEGIN 00038 00039 //# Forward declarations 00040 class BucketCache; 00041 class BucketFile; 00042 class StManArrayFile; 00043 class SSMIndex; 00044 class SSMColumn; 00045 class SSMStringHandler; 00046 00047 // <summary> 00048 // Base class of the Standard Storage Manager 00049 // </summary> 00050 00051 // <use visibility=local> 00052 00053 // <reviewed reviewer="UNKNOWN" date="before2004/08/25" tests="tStandardStMan.cc"> 00054 // </reviewed> 00055 00056 // <prerequisite> 00057 //# Classes you should understand before using this one. 00058 // <li> <linkto class=StandardStMan>StandardStMan</linkto> 00059 // <li> <linkto class=SSMColumn>SSMColumn</linkto> 00060 // </prerequisite> 00061 00062 // <etymology> 00063 // SSMBase is the base class of the Standard Storage Manager. 00064 // </etymology> 00065 00066 // <synopsis> 00067 // The global principles of this class are described in 00068 // <linkto class="StandardStMan:description">StandardStMan</linkto>. 00069 // <p> 00070 // The Standard Storage Manager divides the data file in equally sized 00071 // chunks called buckets. There are 3 types of buckets: 00072 // <ul> 00073 // <li> Data buckets containing the fixed length data (scalars and 00074 // direct arrays of data type Int, Float, Bool, etc.). 00075 // For variable shaped data (strings and indirect arrays) they 00076 // contain references to the actual data position in the 00077 // string buckets or in an external file. 00078 // <li> String buckets containing strings and array of strings. 00079 // <li> Index buckets containing the index info for the data buckets. 00080 // </ul> 00081 // Bucket access is handled by class 00082 // <linkto class=BucketCache>BucketCache</linkto>. 00083 // It also keeps a list of free buckets. A bucket is freed when it is 00084 // not needed anymore (e.g. all data from it are deleted). 00085 // <p> 00086 // Data buckets form the main part of the SSM. The data can be viewed as 00087 // a few streams of buckets, where each stream contains the data of 00088 // a given number of columns. Each stream has an 00089 // <linkto class=SSMIndex>SSMIndex</linkto> object describing the 00090 // number of rows stored in each data bucket of the stream. 00091 // The SSM starts with a single bucket stream (holding all columns), 00092 // but when columns are added, new bucket streams might be created. 00093 // <p> 00094 // For example, we have an SSM with a bucket size of 100 bytes. 00095 // There are 5 Int columns (A,B,C,D,E) each taking 4 bytes per row. 00096 // Column A, B, C, and D are stored in bucket stream 1, while column 00097 // E is stored in bucket stream 2. So in stream 1 each bucket can hold 00098 // 6 rows, while in stream 2 each bucket can hold 25 rows. 00099 // For a 100 row table it will result in 17+4 data buckets. 00100 // <p> 00101 // A few classes collaborate to make it work: 00102 // <ul> 00103 // <li> Each bucket stream has an <linkto class=SSMIndex>SSMIndex</linkto> 00104 // object to map row number to bucket number. 00105 // Note that in principle each bucket in a stream contains the same 00106 // number of rows. However, when a row is deleted it is removed 00107 // from its bucket shifting the remainder to the left. Data in the 00108 // next buckets is not shifted, so that bucket has now one row less. 00109 // <li> For each column SSMBase knows to which bucket stream it belongs 00110 // and at which offset the column starts in a bucket. 00111 // Note that column data in a bucket are adjacent, which is done 00112 // to make it easier to use the 00113 // <linkto class=ColumnCache>ColumnCache</linkto> object in SSMColumn 00114 // and to be able to efficiently store Bool values as bits. 00115 // <li> Each column has an <linkto class=SSMColumn>SSMColumn</linkto> 00116 // object knowing how many bits each data cell takes in a bucket. 00117 // The SSMColumn objects handle all access to data in the columns 00118 // (using SSMBase and SSMIndex). 00119 // </ul> 00120 // <p> 00121 // String buckets are used by class 00122 // <linkto class=SSMStringHandler>SSMStringHandler</linkto> to 00123 // store scalar strings and fixed and variable shaped arrays of strings. 00124 // The bucketnr, offset, and length of such string (arrays) are stored 00125 // in the data buckets. 00126 // <br> 00127 // Indirect arrays of other data types are also stored indirectly 00128 // and their offset is stored in the data buckets. Such arrays are 00129 // handled by class <linkto class=StIndArray>StIndArray</linkto> 00130 // which uses an extra file to store the arrays. 00131 // <p> 00132 // Index buckets are used by SSMBase to make the SSMIndex data persistent. 00133 // It uses alternately 2 sets of index buckets. In that way there is 00134 // always an index availanle in case the system crashes. 00135 // If possible 2 halfs of a single bucket are used alternately, otherwise 00136 // separate buckets are used. 00137 // </synopsis> 00138 00139 // <motivation> 00140 // The public interface of SSMBase is quite large, because the other 00141 // internal SSM classes need these functions. To have a class with a 00142 // minimal interface for the normal user, class <src>StandardStMan</src> 00143 // is derived from it. 00144 // <br>StandardStMan needs an isA- instead of hasA-relation to be 00145 // able to bind columns to it in class <linkto class=SetupNewTable> 00146 // SetupNewTable</linkto>. 00147 // </motivation> 00148 00149 // <todo asof="$DATE:$"> 00150 //# A List of bugs, limitations, extensions or planned refinements. 00151 // <li> Remove AipsIO argument from open and close. 00152 // <li> When only 1 bucket in use addcolumn can check if there's enough 00153 // room to fit the new column (so rearange the bucket) in the free 00154 // row space. 00155 // </todo> 00156 00157 00158 class SSMBase: public DataManager 00159 { 00160 public: 00161 // Create a Standard storage manager with default name SSM. 00162 explicit SSMBase (Int aBucketSize=0, 00163 uInt aCacheSize=1); 00164 00165 // Create a Standard storage manager with the given name. 00166 explicit SSMBase (const String& aDataManName, 00167 Int aBucketSize=0, 00168 uInt aCacheSize=1); 00169 00170 // Create a Standard storage manager with the given name. 00171 // The specifications are part of the record (as created by dataManagerSpec). 00172 SSMBase (const String& aDataManName, 00173 const Record& spec); 00174 00175 ~SSMBase(); 00176 00177 // Clone this object. 00178 // It does not clone SSMColumn objects possibly used. 00179 // The caller has to delete the newly created object. 00180 virtual DataManager* clone() const; 00181 00182 // Get the type name of the data manager (i.e. StandardStMan). 00183 virtual String dataManagerType() const; 00184 00185 // Get the name given to the storage manager (in the constructor). 00186 virtual String dataManagerName() const; 00187 00188 // Record a record containing data manager specifications. 00189 virtual Record dataManagerSpec() const; 00190 00191 // Get data manager properties that can be modified. 00192 // It is only ActualCacheSize (the actual cache size in buckets). 00193 // It is a subset of the data manager specification. 00194 virtual Record getProperties() const; 00195 00196 // Modify data manager properties. 00197 // Only ActualCacheSize can be used. It is similar to function setCacheSize 00198 // with <src>canExceedNrBuckets=False</src>. 00199 virtual void setProperties (const Record& spec); 00200 00201 // Get the version of the class. 00202 uInt getVersion() const; 00203 00204 // Set the cache size (in buckets). 00205 // If <src>canExceedNrBuckets=True</src>, the given cache size can be 00206 // larger than the nr of buckets in the file. In this way the cache can 00207 // be made large enough for a future file extension. 00208 // Otherwise, it is limited to the actual number of buckets. This is useful 00209 // if one wants the entire file to be cached. 00210 void setCacheSize (uInt aCacheSize, Bool canExceedNrBuckets=True); 00211 00212 // Get the current cache size (in buckets). 00213 uInt getCacheSize() const; 00214 00215 // Clear the cache used by this storage manager. 00216 // It will flush the cache as needed and remove all buckets from it. 00217 void clearCache(); 00218 00219 // Show the statistics of all caches used. 00220 virtual void showCacheStatistics (ostream& anOs) const; 00221 00222 // Show statistics of all indices used. 00223 void showIndexStatistics (ostream & anOs) const; 00224 00225 // Show statistics of the Base offsets/index etc. 00226 void showBaseStatistics (ostream & anOs) const; 00227 00228 // Get the bucket size. 00229 uInt getBucketSize() const; 00230 00231 // Get the number of rows in this storage manager. 00232 uInt getNRow() const; 00233 00234 // The storage manager can add rows. 00235 virtual Bool canAddRow() const; 00236 00237 // The storage manager can delete rows. 00238 virtual Bool canRemoveRow() const; 00239 00240 // The storage manager can add columns. 00241 virtual Bool canAddColumn() const; 00242 00243 // The storage manager can delete columns. 00244 virtual Bool canRemoveColumn() const; 00245 00246 // Make the object from the type name string. 00247 // This function gets registered in the DataManager "constructor" map. 00248 // The caller has to delete the object. 00249 static DataManager* makeObject (const String& aDataManType, 00250 const Record& spec); 00251 00252 // Get access to the given column. 00253 SSMColumn& getColumn (uInt aColNr); 00254 00255 // Get access to the given Index. 00256 SSMIndex& getIndex (uInt anIdxNr); 00257 00258 // Make the current bucket in the cache dirty (i.e. something has been 00259 // changed in it and it needs to be written when removed from the cache). 00260 // (used by SSMColumn::putValue). 00261 void setBucketDirty(); 00262 00263 // Open (if needed) the file for indirect arrays with the given mode. 00264 // Return a pointer to the object. 00265 StManArrayFile* openArrayFile (ByteIO::OpenOption anOpt); 00266 00267 // Find the bucket containing the column and row and return the pointer 00268 // to the beginning of the column data in that bucket. 00269 // It also fills in the start and end row for the column data. 00270 char* find (uInt aRowNr, uInt aColNr, 00271 uInt& aStartRow, uInt& anEndRow); 00272 00273 // Add a new bucket and get its bucket number. 00274 uInt getNewBucket(); 00275 00276 // Read the bucket (if needed) and return the pointer to it. 00277 char* getBucket (uInt aBucketNr); 00278 00279 // Remove a bucket from the bucket cache. 00280 void removeBucket (uInt aBucketNr); 00281 00282 // Get rows per bucket for the given column. 00283 uInt getRowsPerBucket (uInt aColumn) const; 00284 00285 // Return a pointer to the (one and only) StringHandler object. 00286 SSMStringHandler* getStringHandler(); 00287 00288 // <group> 00289 // Callbacks for BucketCache access. 00290 static char* readCallBack (void* anOwner, const char* aBucketStorage); 00291 static void writeCallBack (void* anOwner, char* aBucketStorage, 00292 const char* aBucket); 00293 static void deleteCallBack (void*, char* aBucket); 00294 static char* initCallBack (void* anOwner); 00295 // </group> 00296 00297 private: 00298 // Copy constructor (only meant for clone function). 00299 SSMBase (const SSMBase& that); 00300 00301 // Assignment cannot be used. 00302 SSMBase& operator= (const SSMBase& that); 00303 00304 // (Re)create the index, file, and cache object. 00305 // It is used when all rows are deleted from the table. 00306 void recreate(); 00307 00308 // The data manager supports use of MultiFile. 00309 virtual Bool hasMultiFileSupport() const; 00310 00311 // Flush and optionally fsync the data. 00312 // It returns a True status if it had to flush (i.e. if data have changed). 00313 virtual Bool flush (AipsIO&, Bool doFsync); 00314 00315 // Let the storage manager create files as needed for a new table. 00316 // This allows a column with an indirect array to create its file. 00317 virtual void create (uInt aNrRows); 00318 00319 // Open the storage manager file for an existing table, read in 00320 // the data, and let the SSMColumn objects read their data. 00321 virtual void open (uInt aRowNr, AipsIO&); 00322 00323 // Resync the storage manager with the new file contents. 00324 // This is done by clearing the cache. 00325 virtual void resync (uInt aRowNr); 00326 00327 // Reopen the storage manager files for read/write. 00328 virtual void reopenRW(); 00329 00330 // The data manager will be deleted (because all its columns are 00331 // requested to be deleted). 00332 // So clean up the things needed (e.g. delete files). 00333 virtual void deleteManager(); 00334 00335 // Let the storage manager initialize itself (upon creation). 00336 // It determines the bucket size and fills the index. 00337 void init(); 00338 00339 // Determine and set the bucket size. 00340 // It returns the number of rows per bucket. 00341 uInt setBucketSize(); 00342 00343 // Get the number of indices in use. 00344 uInt getNrIndices() const; 00345 00346 // Add rows to the storage manager. 00347 // Per column it extends number of rows. 00348 virtual void addRow (uInt aNrRows); 00349 00350 // Delete a row from all columns. 00351 virtual void removeRow (uInt aRowNr); 00352 00353 // Do the final addition of a column. 00354 virtual void addColumn (DataManagerColumn*); 00355 00356 // Remove a column from the data file. 00357 virtual void removeColumn (DataManagerColumn*); 00358 00359 // Create a column in the storage manager on behalf of a table column. 00360 // The caller has to delete the newly created object. 00361 // <group> 00362 // Create a scalar column. 00363 virtual DataManagerColumn* makeScalarColumn (const String& aName, 00364 int aDataType, 00365 const String& aDataTypeID); 00366 // Create a direct array column. 00367 virtual DataManagerColumn* makeDirArrColumn (const String& aName, 00368 int aDataType, 00369 const String& aDataTypeID); 00370 // Create an indirect array column. 00371 virtual DataManagerColumn* makeIndArrColumn (const String& aName, 00372 int aDataType, 00373 const String& aDataTypeID); 00374 // </group> 00375 00376 // Get the cache object. 00377 // This will construct the cache object if not present yet. 00378 // The cache object will be deleted by the destructor. 00379 BucketCache& getCache(); 00380 00381 // Construct the cache object (if not constructed yet). 00382 void makeCache(); 00383 00384 // Read the header. 00385 void readHeader(); 00386 00387 // Read the index from its buckets. 00388 void readIndexBuckets(); 00389 00390 // Write the header and the indices. 00391 void writeIndex(); 00392 00393 00394 //# Declare member variables. 00395 // Name of data manager. 00396 String itsDataManName; 00397 00398 // The file containing the indirect arrays. 00399 StManArrayFile* itsIosFile; 00400 00401 // The number of rows in the columns. 00402 uInt itsNrRows; 00403 00404 // Column offset 00405 Block<uInt> itsColumnOffset; 00406 00407 // Row Index ID containing all the columns in a bucket 00408 Block<uInt> itsColIndexMap; 00409 00410 // Will contain all indices 00411 PtrBlock<SSMIndex*> itsPtrIndex; 00412 00413 // The cache with the SSM buckets. 00414 BucketCache* itsCache; 00415 00416 // The file containing all data. 00417 BucketFile* itsFile; 00418 00419 // String handler class 00420 SSMStringHandler* itsStringHandler; 00421 00422 // The persistent cache size. 00423 uInt itsPersCacheSize; 00424 00425 // The actual cache size. 00426 uInt itsCacheSize; 00427 00428 // The initial number of buckets in the cache. 00429 uInt itsNrBuckets; 00430 00431 // Nr of buckets needed for index. 00432 uInt itsNrIdxBuckets; 00433 00434 // Number of the first index bucket 00435 Int itsFirstIdxBucket; 00436 00437 // Offset of index in first bucket. 00438 // If >0, the index fits in a single bucket. 00439 uInt itsIdxBucketOffset; 00440 00441 // Number of the first String Bucket 00442 Int itsLastStringBucket; 00443 00444 // length of index memoryblock 00445 uInt itsIndexLength; 00446 00447 // The nr of free buckets. 00448 uInt itsFreeBucketsNr; 00449 00450 // The first free bucket. 00451 Int itsFirstFreeBucket; 00452 00453 // The bucket size. 00454 uInt itsBucketSize; 00455 uInt itsBucketRows; 00456 00457 // The assembly of all columns. 00458 PtrBlock<SSMColumn*> itsPtrColumn; 00459 00460 // Has the data changed since the last flush? 00461 Bool isDataChanged; 00462 }; 00463 00464 00465 inline uInt SSMBase::getNrIndices() const 00466 { 00467 return itsPtrIndex.nelements(); 00468 } 00469 00470 inline uInt SSMBase::getCacheSize() const 00471 { 00472 return itsCacheSize; 00473 } 00474 00475 inline uInt SSMBase::getNRow() const 00476 { 00477 return itsNrRows; 00478 } 00479 00480 inline uInt SSMBase::getBucketSize() const 00481 { 00482 return itsBucketSize; 00483 } 00484 00485 inline BucketCache& SSMBase::getCache() 00486 { 00487 if (itsCache == 0) { 00488 makeCache(); 00489 } 00490 return *itsCache; 00491 } 00492 00493 inline SSMColumn& SSMBase::getColumn (uInt aColNr) 00494 { 00495 return *(itsPtrColumn[aColNr]); 00496 } 00497 00498 inline SSMIndex& SSMBase::getIndex (uInt anIdxNr) 00499 { 00500 return *(itsPtrIndex[anIdxNr]); 00501 } 00502 00503 inline SSMStringHandler* SSMBase::getStringHandler() 00504 { 00505 return itsStringHandler; 00506 } 00507 00508 00509 00510 } //# NAMESPACE CASACORE - END 00511 00512 #endif