00001 //# MultiFileBase.h: Abstract base class to combine multiple files in a single one 00002 //# Copyright (C) 2014 00003 //# Associated Universities, Inc. Washington DC, USA. 00004 //# 00005 //# This library is free software; you can redistribute it and/or modify it 00006 //# under the terms of the GNU Library General Public License as published by 00007 //# the Free Software Foundation; either version 2 of the License, or (at your 00008 //# option) any later version. 00009 //# 00010 //# This library is distributed in the hope that it will be useful, but WITHOUT 00011 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 00012 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public 00013 //# License for more details. 00014 //# 00015 //# You should have received a copy of the GNU Library General Public License 00016 //# along with this library; if not, write to the Free Software Foundation, 00017 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA. 00018 //# 00019 //# Correspondence concerning AIPS++ should be addressed as follows: 00020 //# Internet email: aips2-request@nrao.edu. 00021 //# Postal address: AIPS++ Project Office 00022 //# National Radio Astronomy Observatory 00023 //# 520 Edgemont Road 00024 //# Charlottesville, VA 22903-2475 USA 00025 //# 00026 //# $Id: RegularFileIO.h 20551 2009-03-25 00:11:33Z Malte.Marquarding $ 00027 00028 #ifndef CASA_MULTIFILEBASE_H 00029 #define CASA_MULTIFILEBASE_H 00030 00031 //# Includes 00032 #include <casacore/casa/aips.h> 00033 #include <casacore/casa/IO/ByteIO.h> 00034 #include <casacore/casa/BasicSL/String.h> 00035 #include <casacore/casa/Utilities/CountedPtr.h> 00036 #include <casacore/casa/vector.h> 00037 #include <casacore/casa/ostream.h> 00038 00039 00040 namespace casacore { //# NAMESPACE CASACORE - BEGIN 00041 00042 //# Forward declaration. 00043 class AipsIO; 00044 class HDF5Group; 00045 class HDF5DataSet; 00046 00047 00048 // <summary> 00049 // Helper class for MultiFileBase containing info per internal file 00050 // </summary> 00051 // <use visibility=local> 00052 struct MultiFileInfo { 00053 explicit MultiFileInfo (Int64 bufSize=0); 00054 vector<Int64> blockNrs; // physical blocknrs for this logical file 00055 vector<char> buffer; // buffer holding a data block 00056 Int64 curBlock; // the data block held in buffer (<0 is none) 00057 Int64 fsize; // file size (in bytes) 00058 String name; // the virtual file name 00059 Bool dirty; // has data in buffer been changed? 00060 CountedPtr<HDF5Group> group; 00061 CountedPtr<HDF5DataSet> dataSet; 00062 }; 00063 void operator<< (ostream&, const MultiFileInfo&); 00064 void operator<< (AipsIO&, const MultiFileInfo&); 00065 void operator>> (AipsIO&, MultiFileInfo&); 00066 00067 00068 // <summary> 00069 // Abstract base class to combine multiple files in a single one. 00070 // </summary> 00071 00072 // <use visibility=export> 00073 00074 // <reviewed reviewer="" date="" tests="tMultiFile" demos=""> 00075 // </reviewed> 00076 00077 // <synopsis> 00078 // This class is a container file holding multiple virtual files. It is 00079 // primarily meant as a container file for the storage manager files of a 00080 // table to reduce the number of files used (especially for Lustre) and to 00081 // reduce the number of open files (especially when concatenating tables). 00082 // <br>A secondary goal is offering the ability to use an IO buffer size 00083 // that matches the file system well (large buffer size for e.g. ZFS). 00084 // 00085 // The SetupNewTable constructor has a StorageOption argument to define 00086 // if a MultiFile has to be used and if so, the buffer size to use. 00087 // It is also possible to specify that through aipsrc variables. 00088 // 00089 // A virtual file is spread over multiple (fixed size) data blocks in the 00090 // MultiFile. A data block is never shared by multiple files. 00091 // For each virtual file MultiFile keeps a MultiFileInfo object telling 00092 // the file size and the blocks numbers used for the file. When flushing 00093 // the MultiFile, this meta info is written into a header block and, 00094 // if needed, continuation blocks. On open and resync, it is read back. 00095 // <br> 00096 // 00097 // A virtual file is represented by an MFFileIO object, which is derived 00098 // from ByteIO and as such part of the casacore IO framework. It makes it 00099 // possible for applications to access a virtual file in the same way as 00100 // a regular file. 00101 // 00102 // It is possible to delete a virtual file. Its blocks will be added to 00103 // the free block list (which is also stored in the meta info). 00104 // </synopsis> 00105 00106 // <example> 00107 // In principle it is possible to use the MultiFile functions directly. 00108 // However, in general it is much easier to use an MFFileIO object 00109 // per virtual file as shown below. 00110 // <srcblock> 00111 // // Create a new MultiFile using a block size of 1 MB. 00112 // MultiFile mfile("file.mf', ByteIO::New, 1048576); 00113 // // Create a virtual file in it. 00114 // MFFileIO mf1(mfile, "mf1", ByteIO::New); 00115 // // Use it (for example) as the sink of AipsIO. 00116 // AipsIO stream (&mf1); 00117 // // Write values. 00118 // stream << (Int)10; 00119 // stream << True; 00120 // // Seek to beginning of file and read data in. 00121 // stream.setpos (0); 00122 // Int vali; 00123 // Bool valb; 00124 // stream >> vali >> valb; 00125 // </srcblock> 00126 // </example> 00127 00128 // <todo> 00129 // <li> write headers at alternating file positions (for robustness) 00130 // <li> possibly write headers entirely at the end if larger than blocksize 00131 // </todo> 00132 00133 00134 class MultiFileBase 00135 { 00136 public: 00137 // Open or create a MultiFileBase with the given name. 00138 // Upon creation the block size can be given. If 0, it uses the block size 00139 // of the file system the file is on. 00140 MultiFileBase (const String& name, Int blockSize=0); 00141 00142 // The destructor flushes and closes the file. 00143 virtual ~MultiFileBase(); 00144 00145 // Return the file id of a file in the MultiFileBase object. 00146 // If the name is unknown, an exception is thrown if throwExcp is set. 00147 // Otherwise it returns -1. 00148 Int fileId (const String& name, Bool throwExcp=True) const; 00149 00150 // Add a file to the MultiFileBase object. It returns the file id. 00151 // Only the base name of the given file name is used. In this way the 00152 // MultiFileBase container file can be moved. 00153 Int addFile (const String& name); 00154 00155 // Delete a file. It adds its blocks to the free block list. 00156 void deleteFile (Int fileId); 00157 00158 // Read a block at the given offset. It returns the actual size read. 00159 Int64 read (Int fileId, void* buffer, Int64 size, Int64 offset); 00160 00161 // Write a block at the given offset. It returns the actual size written. 00162 Int64 write (Int fileId, const void* buffer, Int64 size, Int64 offset); 00163 00164 // Flush the file by writing all dirty data and all header info. 00165 void flush(); 00166 00167 // Resync with another process by clearing the buffers and rereading 00168 // the header. The header is only read if its counter has changed. 00169 void resync(); 00170 00171 // Reopen the underlying file for read/write access. 00172 // Nothing will be done if the file is writable already. 00173 // Otherwise it will be reopened and an exception will be thrown 00174 // if it is not possible to reopen it for read/write access. 00175 virtual void reopenRW() = 0; 00176 00177 // Fsync the file (i.e., force the data to be physically written). 00178 virtual void fsync() = 0; 00179 00180 // Get the file name of the MultiFileBase. 00181 String fileName() const 00182 { return itsName; } 00183 00184 // Is the file writable? 00185 Bool isWritable() const 00186 { return itsWritable; } 00187 00188 // Get the block size used. 00189 Int64 blockSize() const 00190 { return itsBlockSize; } 00191 00192 // Get the nr of virtual files. 00193 uInt nfile() const; 00194 00195 // Get the total nr of data blocks used. 00196 Int64 size() const 00197 { return itsNrBlock; } 00198 00199 // Get the info object (for test purposes mainly). 00200 const vector<MultiFileInfo>& info() const 00201 { return itsInfo; } 00202 00203 // Get the free blocks (for test purposes mainly). 00204 const vector<Int64>& freeBlocks() const 00205 { return itsFreeBlocks; } 00206 00207 private: 00208 void writeDirty (MultiFileInfo& info) 00209 { 00210 writeBlock (info, info.curBlock, &(info.buffer[0])); 00211 info.dirty = False; 00212 } 00213 00214 // Do the class-specific actions on adding a file. 00215 virtual void doAddFile (MultiFileInfo&) = 0; 00216 // Do the class-specific actions on deleting a file. 00217 virtual void doDeleteFile (MultiFileInfo&) = 0; 00218 // Flush the file itself. 00219 virtual void flushFile() = 0; 00220 // Flush and close the file. 00221 virtual void close() = 0; 00222 // Write the header info. 00223 virtual void writeHeader() = 0; 00224 // Read the header info. If always==False, the info is only read if the 00225 // header counter has changed. 00226 virtual void readHeader (Bool always=True) = 0; 00227 // Extend the virtual file to fit lastblk. 00228 virtual void extend (MultiFileInfo& info, Int64 lastblk) = 0; 00229 // Write a data block. 00230 virtual void writeBlock (MultiFileInfo& info, Int64 blknr, 00231 const void* buffer) = 0; 00232 // Read a data block. 00233 virtual void readBlock (MultiFileInfo& info, Int64 blknr, 00234 void* buffer) = 0; 00235 00236 protected: 00237 // Set the flags and blockSize for a new MultiFile/HDF5. 00238 void setNewFile(); 00239 00240 //# Data members 00241 String itsName; 00242 Int64 itsBlockSize; // The blocksize used 00243 Int64 itsNrBlock; // The total nr of blocks actually used 00244 Int64 itsHdrCounter; // Counter of header changes 00245 vector<MultiFileInfo> itsInfo; 00246 Bool itsWritable; // Is the file writable? 00247 Bool itsChanged; // Has header info changed since last flush? 00248 vector<Int64> itsFreeBlocks; 00249 }; 00250 00251 00252 } //# NAMESPACE CASACORE - END 00253 00254 #endif