MultiFileBase.h

Go to the documentation of this file.
00001 //# MultiFileBase.h: Abstract base class to combine multiple files in a single one
00002 //# Copyright (C) 2014
00003 //# Associated Universities, Inc. Washington DC, USA.
00004 //#
00005 //# This library is free software; you can redistribute it and/or modify it
00006 //# under the terms of the GNU Library General Public License as published by
00007 //# the Free Software Foundation; either version 2 of the License, or (at your
00008 //# option) any later version.
00009 //#
00010 //# This library is distributed in the hope that it will be useful, but WITHOUT
00011 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
00012 //# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
00013 //# License for more details.
00014 //#
00015 //# You should have received a copy of the GNU Library General Public License
00016 //# along with this library; if not, write to the Free Software Foundation,
00017 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
00018 //#
00019 //# Correspondence concerning AIPS++ should be addressed as follows:
00020 //#        Internet email: aips2-request@nrao.edu.
00021 //#        Postal address: AIPS++ Project Office
00022 //#                        National Radio Astronomy Observatory
00023 //#                        520 Edgemont Road
00024 //#                        Charlottesville, VA 22903-2475 USA
00025 //#
00026 //# $Id: RegularFileIO.h 20551 2009-03-25 00:11:33Z Malte.Marquarding $
00027 
00028 #ifndef CASA_MULTIFILEBASE_H
00029 #define CASA_MULTIFILEBASE_H
00030 
00031 //# Includes
00032 #include <casacore/casa/aips.h>
00033 #include <casacore/casa/IO/ByteIO.h>
00034 #include <casacore/casa/BasicSL/String.h>
00035 #include <casacore/casa/Utilities/CountedPtr.h>
00036 #include <casacore/casa/vector.h>
00037 #include <casacore/casa/ostream.h>
00038 
00039 
00040 namespace casacore { //# NAMESPACE CASACORE - BEGIN
00041 
00042   //# Forward declaration.
00043   class AipsIO;
00044   class HDF5Group;
00045   class HDF5DataSet;
00046 
00047 
00048   // <summary>
00049   // Helper class for MultiFileBase containing info per internal file
00050   // </summary>
00051   // <use visibility=local>
00052   struct MultiFileInfo {
00053     explicit MultiFileInfo (Int64 bufSize=0);
00054     vector<Int64> blockNrs;     // physical blocknrs for this logical file
00055     vector<char>  buffer;       // buffer holding a data block
00056     Int64         curBlock;     // the data block held in buffer (<0 is none)
00057     Int64         fsize;        // file size (in bytes)
00058     String        name;         // the virtual file name
00059     Bool          dirty;        // has data in buffer been changed?
00060     CountedPtr<HDF5Group> group;
00061     CountedPtr<HDF5DataSet> dataSet;
00062   };
00063   void operator<< (ostream&, const MultiFileInfo&);
00064   void operator<< (AipsIO&, const MultiFileInfo&);
00065   void operator>> (AipsIO&, MultiFileInfo&);
00066 
00067 
00068   // <summary> 
00069   // Abstract base class to combine multiple files in a single one.
00070   // </summary>
00071 
00072   // <use visibility=export>
00073 
00074   // <reviewed reviewer="" date="" tests="tMultiFile" demos="">
00075   // </reviewed>
00076 
00077   // <synopsis> 
00078   // This class is a container file holding multiple virtual files. It is
00079   // primarily meant as a container file for the storage manager files of a
00080   // table to reduce the number of files used (especially for Lustre) and to
00081   // reduce the number of open files (especially when concatenating tables).
00082   // <br>A secondary goal is offering the ability to use an IO buffer size
00083   // that matches the file system well (large buffer size for e.g. ZFS).
00084   //
00085   // The SetupNewTable constructor has a StorageOption argument to define
00086   // if a MultiFile has to be used and if so, the buffer size to use.
00087   // It is also possible to specify that through aipsrc variables.
00088   //
00089   // A virtual file is spread over multiple (fixed size) data blocks in the
00090   // MultiFile. A data block is never shared by multiple files.
00091   // For each virtual file MultiFile keeps a MultiFileInfo object telling
00092   // the file size and the blocks numbers used for the file. When flushing
00093   // the MultiFile, this meta info is written into a header block and,
00094   // if needed, continuation blocks. On open and resync, it is read back.
00095   // <br>
00096   //
00097   // A virtual file is represented by an MFFileIO object, which is derived
00098   // from ByteIO and as such part of the casacore IO framework. It makes it
00099   // possible for applications to access a virtual file in the same way as
00100   // a regular file.
00101   //
00102   // It is possible to delete a virtual file. Its blocks will be added to
00103   // the free block list (which is also stored in the meta info).
00104   // </synopsis>
00105 
00106   // <example>
00107   // In principle it is possible to use the MultiFile functions directly.
00108   // However, in general it is much easier to use an MFFileIO object
00109   // per virtual file as shown below.
00110   // <srcblock>
00111   //    // Create a new MultiFile using a block size of 1 MB.
00112   //    MultiFile mfile("file.mf', ByteIO::New, 1048576);
00113   //    // Create a virtual file in it.
00114   //    MFFileIO mf1(mfile, "mf1", ByteIO::New);
00115   //    // Use it (for example) as the sink of AipsIO.
00116   //    AipsIO stream (&mf1);
00117   //    // Write values.
00118   //    stream << (Int)10;
00119   //    stream << True;
00120   //    // Seek to beginning of file and read data in.
00121   //    stream.setpos (0);
00122   //    Int vali;
00123   //    Bool valb;
00124   //    stream >> vali >> valb;
00125   // </srcblock>
00126   // </example>
00127 
00128   // <todo>
00129   //  <li> write headers at alternating file positions (for robustness)
00130   //  <li> possibly write headers entirely at the end if larger than blocksize
00131   // </todo>
00132 
00133 
00134   class MultiFileBase
00135   {
00136   public:
00137     // Open or create a MultiFileBase with the given name.
00138     // Upon creation the block size can be given. If 0, it uses the block size
00139     // of the file system the file is on.
00140     MultiFileBase (const String& name, Int blockSize=0);
00141 
00142     // The destructor flushes and closes the file.
00143     virtual ~MultiFileBase();
00144 
00145     // Return the file id of a file in the MultiFileBase object.
00146     // If the name is unknown, an exception is thrown if throwExcp is set.
00147     // Otherwise it returns -1.
00148     Int fileId (const String& name, Bool throwExcp=True) const;
00149 
00150     // Add a file to the MultiFileBase object. It returns the file id.
00151     // Only the base name of the given file name is used. In this way the
00152     // MultiFileBase container file can be moved.
00153     Int addFile (const String& name);
00154 
00155     // Delete a file. It adds its blocks to the free block list.
00156     void deleteFile (Int fileId);
00157 
00158     // Read a block at the given offset. It returns the actual size read.
00159     Int64 read (Int fileId, void* buffer, Int64 size, Int64 offset);
00160 
00161     // Write a block at the given offset. It returns the actual size written.
00162     Int64 write (Int fileId, const void* buffer, Int64 size, Int64 offset);
00163 
00164     // Flush the file by writing all dirty data and all header info.
00165     void flush();
00166 
00167     // Resync with another process by clearing the buffers and rereading
00168     // the header. The header is only read if its counter has changed.
00169     void resync();
00170 
00171     // Reopen the underlying file for read/write access.
00172     // Nothing will be done if the file is writable already.
00173     // Otherwise it will be reopened and an exception will be thrown
00174     // if it is not possible to reopen it for read/write access.
00175     virtual void reopenRW() = 0;
00176 
00177     // Fsync the file (i.e., force the data to be physically written).
00178     virtual void fsync() = 0;
00179 
00180     // Get the file name of the MultiFileBase.
00181     String fileName() const
00182       { return itsName; }
00183 
00184     // Is the file writable?
00185     Bool isWritable() const
00186       { return itsWritable; }
00187 
00188     // Get the block size used.
00189     Int64 blockSize() const
00190       { return itsBlockSize; }
00191 
00192     // Get the nr of virtual files.
00193     uInt nfile() const;
00194 
00195     // Get the total nr of data blocks used.
00196     Int64 size() const
00197       { return itsNrBlock; }
00198 
00199     // Get the info object (for test purposes mainly).
00200     const vector<MultiFileInfo>& info() const
00201       { return itsInfo; }
00202 
00203     // Get the free blocks (for test purposes mainly).
00204     const vector<Int64>& freeBlocks() const
00205       { return itsFreeBlocks; }
00206 
00207   private:
00208     void writeDirty (MultiFileInfo& info)
00209     {
00210       writeBlock (info, info.curBlock, &(info.buffer[0]));
00211       info.dirty = False;
00212     }
00213 
00214     // Do the class-specific actions on adding a file.
00215     virtual void doAddFile (MultiFileInfo&) = 0;
00216     // Do the class-specific actions on deleting a file.
00217     virtual void doDeleteFile (MultiFileInfo&) = 0;
00218     // Flush the file itself.
00219     virtual void flushFile() = 0;
00220     // Flush and close the file.
00221     virtual void close() = 0;
00222     // Write the header info.
00223     virtual void writeHeader() = 0;
00224     // Read the header info. If always==False, the info is only read if the
00225     // header counter has changed.
00226     virtual void readHeader (Bool always=True) = 0;
00227     // Extend the virtual file to fit lastblk.
00228     virtual void extend (MultiFileInfo& info, Int64 lastblk) = 0;
00229     // Write a data block.
00230     virtual void writeBlock (MultiFileInfo& info, Int64 blknr,
00231                              const void* buffer) = 0;
00232     // Read a data block.
00233     virtual void readBlock (MultiFileInfo& info, Int64 blknr,
00234                             void* buffer) = 0;
00235 
00236   protected:
00237     // Set the flags and blockSize for a new MultiFile/HDF5.
00238     void setNewFile();
00239 
00240     //# Data members
00241     String itsName;
00242     Int64  itsBlockSize;  // The blocksize used
00243     Int64  itsNrBlock;    // The total nr of blocks actually used
00244     Int64  itsHdrCounter; // Counter of header changes
00245     vector<MultiFileInfo> itsInfo;
00246     Bool                  itsWritable; // Is the file writable?
00247     Bool                  itsChanged; // Has header info changed since last flush?
00248     vector<Int64>         itsFreeBlocks;
00249   };
00250 
00251 
00252 } //# NAMESPACE CASACORE - END
00253 
00254 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

Generated on 31 Aug 2016 for casa by  doxygen 1.6.1