HashMap.h

Go to the documentation of this file.
00001 //# HashMap.h: this defines HashMap, which is a hashed associative array
00002 //# Copyright (C) 1995,1996,1999,2000,2001
00003 //# Associated Universities, Inc. Washington DC, USA.
00004 //#
00005 //# This library is free software; you can redistribute it and/or modify it
00006 //# under the terms of the GNU Library General Public License as published by
00007 //# the Free Software Foundation; either version 2 of the License, or (at your
00008 //# option) any later version.
00009 //#
00010 //# This library is distributed in the hope that it will be useful, but WITHOUT
00011 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
00012 //# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
00013 //# License for more details.
00014 //#
00015 //# You should have received a copy of the GNU Library General Public License
00016 //# along with this library; if not, write to the Free Software Foundation,
00017 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
00018 //#
00019 //# Correspondence concerning AIPS++ should be addressed as follows:
00020 //#        Internet email: aips2-request@nrao.edu.
00021 //#        Postal address: AIPS++ Project Office
00022 //#                        National Radio Astronomy Observatory
00023 //#                        520 Edgemont Road
00024 //#                        Charlottesville, VA 22903-2475 USA
00025 //#
00026 //# $Id$
00027 
00028 #ifndef CASA_HASHMAP_H
00029 #define CASA_HASHMAP_H
00030 
00031 //# Includes
00032 #include <casacore/casa/aips.h>
00033 #include <casacore/casa/Containers/Block.h>
00034 #include <casacore/casa/Containers/List.h>
00035 #include <casacore/casa/Containers/OrderedPair.h>
00036 #include <casacore/casa/Exceptions/Error.h>
00037 
00038 namespace casacore { //# NAMESPACE CASACORE - BEGIN
00039 
00040 //# Forward Declarations
00041 template<class key,class val> class ConstHashMapIter;
00042 extern void throw_invalid_hashmapiter_error();
00043 extern void throw_hashmapiter_init_error();
00044 
00045 // <summary>
00046 //     Hash functions for standard types
00047 // </summary>
00048 //
00049 // <synopsis>
00050 // These are the declarations for the standard hash functions
00051 // used by <linkto class=HashMap>HashMap</linkto>. In general, a function
00052 // such as these is defined for each type that is to be used as
00053 // a key in <linkto class=HashMap>HashMap</linkto>.
00054 //
00055 // These hash functions map the key type to any integer. This
00056 // integer is then used by <linkto class=HashMap>HashMap</linkto> to
00057 // select a bucket in the hash table.
00058 // </synopsis>
00059 //
00060 // <group name=hashfunc>
00061 uInt hashFunc(const String &);
00062 uInt hashFunc(const float &);
00063 uInt hashFunc(const double &);
00064 uInt hashFunc(const int &);
00065 uInt hashFunc(const unsigned &);
00066 //</group>
00067 
00068 
00069 // <summary>
00070 //     Specify the default values for HashMap keys
00071 // </summary>
00072 //
00073 // <synopsis>
00074 // These are the declarations for a set of functions which provide
00075 // the default values for types which are used as keys in
00076 // <linkto class=HashMap>HashMap</linkto>.
00077 // </synopsis>
00078 //
00079 // <group name=defaulthashvalue>
00080 const Int &defaultHashValue(const Int *);
00081 const uInt &defaultHashValue(const uInt *);
00082 const Long &defaultHashValue(const Long *);
00083 const uLong &defaultHashValue(const uLong *);
00084 const Float &defaultHashValue(const Float *);
00085 const Double &defaultHashValue(const Double *);
00086 const lDouble &defaultHashValue(const lDouble *);
00087 // </group>
00088 
00089 // <summary>
00090 //     Hash function with state
00091 // </summary>
00092 // <use visibility=export>
00093 // <reviewed reviewer="" date="yyyy/mm/dd" tests="" demos="">
00094 //
00095 // <etymology>
00096 //      This is basically a way of specifying a hash function, but
00097 //      it is implemented as a class. Thus it is called HashClass,
00098 //      similar to "hash function".
00099 // </etymology>
00100 //
00101 // <synopsis>
00102 //      This class is used to specify a hash function. Sometimes a hash
00103 //      function may require state, it may be useful to create a
00104 //      hierarchy of hash functions, or it may be useful to create a class
00105 //      which provides for hashing as well as other functionality. This
00106 //      class can be used as a base class for any of these purposed. This
00107 //      class is intended for parameterization of
00108 //      <linkto class=HashMap>HashMap</linkto>.
00109 //
00110 //      The hash function maps the key type to any integer. This
00111 //      integer is then used by <linkto class=HashMap>HashMap</linkto> to
00112 //      select a bucket in the hash table.
00113 // </synopsis>
00114 //
00115 // <example>
00116 //      If one wished to make a HashClass for integers, something like the
00117 //      following might be done:
00118 //      <srcblock>
00119 //      class IntHash : public HashClass<Int> {
00120 //      public:
00121 //        uInt hash(const Int &v) const { return (uInt) v; }
00122 //        uInt hash(const Int &v) { return (uInt) v; }
00123 //        HashClass<Int> *clone() const { return new IntHash; }
00124 //        IntHash() : HashClass<Int>() { }
00125 //      };
00126 //      </srcblock>
00127 // </example>
00128 //
00129 // <motivation>
00130 //      There may be occasions when it is more convenient to use a class
00131 //      instead of a single function. This base class provides a starting
00132 //      point plus, and <src>HashMap<k,v></src> has the necessary hooks to
00133 //      make use of classes derived from this class.
00134 // </motivation>
00135 //
00136 template<class key> class HashClass {
00137 public:
00138     //
00139     // This function maps elements of <src>key</src> type to any integer.
00140     // This integer is then used by <linkto class=HashMap>HashMap</linkto> to
00141     // select a bucket in the hash table.
00142     //
00143     virtual uInt hash(const key &) = 0;
00144 
00145     //
00146     // This function is used to make a <em>deep copy</em>. This means that
00147     // the copy, which this function returns, contains all derived information.
00148     //
00149     virtual HashClass<key> *clone() const = 0;
00150 
00151     HashClass();
00152     virtual ~HashClass();
00153 };
00154 
00155 
00156 // <summary>
00157 //     Associative Array with a hash table implementation
00158 // </summary>
00159 // <use visibility=export>
00160 // <reviewed reviewer="" date="yyyy/mm/dd" tests="" demos="">
00161 //
00162 // <prerequisite>
00163 //   <li> basic concepts behind hash tables
00164 // </prerequisite>
00165 //
00166 // <etymology>
00167 //   This is an associative array, also known as a map, and it is implemented
00168 //   using a hash table, so it is called HashMap.
00169 // </etymology>
00170 //
00171 // <synopsis>
00172 //   This class is an implementation of an associative array. This is a common
00173 //   data structure which associates a key of one type with a value of the same
00174 //   or different type. Essentially it is an (unordered) array which is indexed
00175 //   by an arbitrary type of index, e.g. strings.
00176 //
00177 //   This class has two template type parameters. The first is the type of the
00178 //   key and the second is the type of the value. Thus the associative array
00179 //   is a mapping from the domain, any valid object of the key type, to the
00180 //   range, any valid object of the value type. This is a <em>complete</em>
00181 //   map which means that every element in the domain maps to one and only
00182 //   one element in the range. Those elements which have not been set by the
00183 //   user of this class map to a default value which is set at construction
00184 //   time.
00185 //
00186 //   One of the important features of this class which must be understood
00187 //   is the load factor. This factor indicates the average number of items
00188 //   in the buckets of the hash table which are currently in use. The goal
00189 //   is to have the hash function greatly reduce the number of item which
00190 //   must be searched, i.e. to have a limited number of items in each bucket.
00191 //   The load factor determines this. Thus a load factor of 1000 or 0 is a
00192 //   poor choice. The default load factor is 4 which should generally be
00193 //   fine. The load factor is set with <src>setMaxLoad()</src> and retrieved
00194 //   with <src>maxLoad()</src>.
00195 //
00196 //   For this class to be used,
00197 //   three things must be defined:
00198 //   <ul>
00199 //      <li> a specialization of the <src>hash()</src> templated function for
00200 //              the key type or a class derived from <src>HashClass<key></src>.
00201 //              Either of which can be used to implement the hash function for
00202 //              a particular type.
00203 //      <li> an equality operator ( '==' ) for the key
00204 //      <li> a default constructor or a specialization of
00205 //              <src>defaultHashValue()</src> for the key type
00206 //   </ul>
00207 //
00208 //   The implementation of this hash map is derived from work on a proposed
00209 //   addition to the Standard Template Library by Javier Barreiro, Robert Fraley
00210 //   and <a href="http://www.cs.rpi.edu/~musser/">David R. Musser</a>. The information
00211 //   which is available includes:
00212 //   <ul>
00213 //        <li> <a href="ftp://ftp.cs.rpi.edu/pub/stl/hashrationale.ps.Z">
00214 //             rationale for hash map addition to the STL </a>
00215 //        <li> <a href="ftp://ftp.cs.rpi.edu/pub/stl/hashdoc.ps.Z">
00216 //             hash map addition proposal</a>
00217 //        <li> <a href="ftp://ftp.cs.rpi.edu/pub/stl/hashimp2.Z">
00218 //             preliminary implementation</a>
00219 //   </ul>
00220 //   each of these sources was utilized in the development of this set of classes,
00221 //   and in particular, the preliminary implementation was the source for the hashing
00222 //   algorithm used in this set of classes.
00223 // </synopsis>
00224 //
00225 // <example>
00226 //    <srcblock>
00227 //    #include <casacore/casa/Containers/HashMap.h>
00228 //    #include <casacore/casa/BasicSL/String.h>
00229 //    #include <casacore/casa/iostream.h>
00230 //   
00231 //    main() {
00232 //      HashMap<String,Int> hash;
00233 //    
00234 //      hash("one") = 1;                // sets the value of key "one" to "1"
00235 //      hash.define("two",2);           // defines a mapping from key "two" to "2"
00236 //      hash("three") = 3;
00237 //      hash.define("four",4);
00238 //      hash("five") = 5;
00239 //      hash.define("six",6);
00240 //    
00241 //      HashMapIter<String,Int> iter(hash);
00242 //      for ( iter.toStart(); ! iter.atEnd(); iter++ )
00243 //          cout << iter.getVal() << ": " << iter.getKey() << endl;
00244 //    
00245 //      cout << endl << "Diagnostics" << endl << 
00246 //                "========================" << endl;
00247 //      cout << "number defined:    " << hash.ndefined() << endl;
00248 //      cout << "buckets used:      " << hash.usedBuckets() << endl;
00249 //      cout << "buckets available: " << hash.availableBuckets() << endl;
00250 //      cout << "buckets allocated: " << hash.allocBuckets() << endl;
00251 //      cout << "loading:           " << hash.loading() << endl;
00252 //      cout << "size (in bytes):   " << hash.totalSize() << endl;
00253 //    
00254 //    }
00255 //    </srcblock>
00256 // </example>
00257 //
00258 // <motivation>
00259 //    There are a couple of reasons why this class was built:
00260 //         <ul>
00261 //             <li> use of a hash table can be more efficient
00262 //             <li> there are a couple of Map classes currently:
00263 //               <ul>
00264 //                 <li> <linkto class=OrderedMap>OrderedMap</linkto>
00265 //                 <li> <linkto class=SimpleOrderedMap>SimpleOrderedMap</linkto>
00266 //               </ul>
00267 //               <src>OrderedMap</src> is derived from a map base class,
00268 //               <linkto class=Map><src>Map</src></linkto> while
00269 //               <src>SimpleOrderedMap</src> is not. This collection of classes
00270 //               has resulted in confusion for the users. It is hoped that this
00271 //               class can in time replace these other "map" classes by
00272 //               satisfying the performance demands of
00273 //               <src>SimpleOrderedMap</src> while still meeting the constraints
00274 //               of the other map classes.
00275 //         </ul>
00276 // </motivation>
00277 //
00278 // <templating arg=key>
00279 //    <li> equality operator (operator==)
00280 //    <li> function hashFunc() or HashClass derived class provided
00281 //    <li> default constructor or defaultHashValue() specialization provided or
00282 //         default value provided at time of construction
00283 // </templating>
00284 // <templating arg=val>
00285 //    <li> copy constructor
00286 // </templating>
00287 //
00288 // <thrown>
00289 //    <li> AipsError
00290 // </thrown>
00291 //
00292 // <todo asof="yyyy/mm/dd">
00293 //   <li> add this feature
00294 //   <li> fix this bug
00295 //   <li> start discussion of this possible extension
00296 // </todo>
00297 template<class key, class val> class HashMap {
00298 friend class ConstHashMapIter<key,val>;
00299 private:
00300     enum HashMap_Constants { defaultSize_ = 131, defaultMaxLoad_ = 4 };
00301 public:
00302     static float defaultMaxLoad() { return float(defaultMaxLoad_); }
00303     static uInt  defaultSize() { return uInt(defaultSize_); }
00304 
00305     // Signature of the hash functions
00306     typedef uInt (*Func)(const key&);
00307     //
00308     // Copy constructor with copy semantics
00309     //
00310     HashMap(const HashMap &);
00311 
00312     //
00313     // Default constructor (and variation) which allows for
00314     // specifying:
00315     //   <ul>
00316     //     <li> a default value, <src>dflt</src>
00317     //     <li> the initial number of buckets, <src>size</src>
00318     //     <li> the hash function, <src>newfunc</src>
00319     //     <li> the maximum load factor, <src>maxlf</src>
00320     //   </ul>
00321     //
00322     // This is a pair because the hash function can either be
00323     // a simple function or a class derived from
00324     // <linkto class=HashClass><src>HashClass</src></linkto>.
00325     // <group>
00326     HashMap(const val &dflt = defaultHashValue((const val*)(0)),
00327             uInt size = uInt(defaultSize_), 
00328             Func newfunc = hashFunc,
00329             float maxlf = float(defaultMaxLoad_))
00330       : total_(size),
00331         used_(size),
00332         filled_(0),
00333         defs_(0),
00334         maxLoad_(maxlf),
00335         blk(size, static_cast<List<OrderedPair<key,val> >*>(0)),
00336         func(newfunc),
00337         hashClass(0),
00338         dfltVal(dflt)
00339       { }
00340 
00341     HashMap(const val &dflt, uInt size, const HashClass<key> &newfunc,
00342             float maxlf = float(defaultMaxLoad_))
00343       : total_(size),
00344         used_(size),
00345         filled_(0),
00346         defs_(0), 
00347         maxLoad_(maxlf),
00348         blk(size, static_cast<List<OrderedPair<key,val> >*>(0)),
00349         func(0),
00350         hashClass(newfunc.clone()),
00351         dfltVal(dflt)
00352       { }
00353     // </group>
00354 
00355 
00356     //
00357     // Constructor which allows for specifying:
00358     //   <ul>
00359     //     <li> default value, <src>dflt</src>
00360     //     <li> hash function, <src>newfunc</src>
00361     //     <li> maximum load factor, <src>maxlf</src>
00362     //   </ul>
00363     // This is provided because often the user will not be interested
00364     // in specifying the initial number of buckets since the number is
00365     // increased as needed to maintain the max load factor.
00366     //
00367     // This is a pair because the hash function can either be
00368     // a simple function or a class derived from
00369     // <linkto class=HashClass><src>HashClass</src></linkto>.
00370     // <group>
00371     HashMap(const val &dflt, Func newfunc, float maxlf = float(defaultMaxLoad_))
00372       : total_(uInt(defaultSize())), used_(uInt(defaultSize())),
00373         filled_(0), defs_(0), maxLoad_(maxlf),
00374         blk(uInt(defaultSize()), static_cast<List<OrderedPair<key,val> >*>(0)),
00375         func(newfunc), hashClass(0), dfltVal(dflt)
00376     { }
00377 
00378     HashMap(const val &dflt, const HashClass<key> &newfunc,
00379             float maxlf = float(defaultMaxLoad_)) 
00380       : total_(defaultSize()), used_(defaultSize()),
00381         filled_(0), defs_(0), maxLoad_(maxlf), 
00382         blk(uInt(defaultSize_), static_cast<List<OrderedPair<key,val> >*>(0)), func(0),
00383         hashClass(newfunc.clone()), dfltVal(dflt)
00384     { }
00385     // </group>
00386 
00387     //
00388     // This copies the right hand side of the assignment. Assignment is done
00389     // with <em>copy semantics</em>. This means that all the state is copied.
00390     //
00391     HashMap<key,val> &operator=(const HashMap<key,val> &);
00392 
00393     //
00394     // Retrieve values from the map, possibly for later assignment.
00395     // It is important to realize that for the <em>non-const</em> version
00396     // accessing the key causes an entry to be created in the map if it
00397     // didn't already exist. The "value" for this new entry is the
00398     // default value. "isDefined()" should be used if this behavior is
00399     // not desired.
00400     // <group>
00401     const val &operator() (const key &ky) const;
00402     val &operator() (const key &ky);
00403     // </group>
00404 
00405     //
00406     // Define a complete mapping.
00407     //
00408     val &define(const key &k, const val &v);
00409     //
00410     // Remove a user defined mapping from <src>k</src> to a value.
00411     // After this, the value which <src>k</src> maps to reverts to
00412     // the default value.
00413     //
00414     void remove(const key &k);
00415     //
00416     // Check to see if a user defined mapping exists for
00417     // <src>k</src>. This does <em>not</em> modify the map.
00418     //
00419     Bool isDefined(const key &k) const;
00420     //
00421     // Retrieve the default value.
00422     // <group>
00423     const val &defaultVal() const { return dfltVal; }
00424     val &defaultVal() { return dfltVal; }
00425     // </group>
00426 
00427     //
00428     // Remove all user defined mapping.
00429     //
00430     void clear() { freeTable(); }
00431     //
00432     //  Get or set the maximum load factor.
00433     //
00434     // <group>
00435     float maxLoad() const { return maxLoad_; }
00436     void setMaxLoad( float new_max ) { maxLoad_ = new_max; }
00437     // </group>
00438 
00439     // Total number of buckets, i.e. the number the
00440     // hashing mechanism thinks it has. This is the total
00441     // number of buckets used for calculations in the hashing
00442     // mechanism. This may be smaller than <src>allocBuckets()</src>
00443     // because more underlying storage may be allocated than the
00444     // hashing mechanism needs.
00445     uInt totalBuckets() const { return total_; }
00446     // Number of buckets available, i.e. those which
00447     // the hashing mechanism allows itself to use. This
00448     // may be smaller than <src>totalBuckets()</src> because
00449     // the hashing mechanism can restrict itself to some subset
00450     // of the buckets available.
00451     uInt availableBuckets() const { return used_; }
00452     // Number of buckets currently populated by a bucket list.
00453     uInt usedBuckets() const { return filled_; }
00454     // Number of buckets which have been allocated, i.e. the
00455     // total number which have currently been allocated. This
00456     // is the number of buckets created. It may be bigger than
00457     // <src>totalBuckets()</src> because more memory can be
00458     // allocated than the hashing mechanism needs.
00459     uInt allocBuckets() const { return blk.nelements(); }
00460 
00461     // Number of mappings which have been defined by the user.
00462     uInt ndefined() const { return defs_; }
00463 
00464     // Current hash table loading factor.
00465     float loading() const { return ndefined() ? (float) ndefined() / (float) availableBuckets() : 0.0; }
00466     // Have any mappings been defined by the user.
00467     Bool empty() const { return ndefined() == 0 ? True : False; }
00468     // This returns a Block which has the number of elements in each bucket
00469     // of the table.
00470     Block<uInt> distribution() const;
00471     // Total size of this HashMap minus dynamically allocated memory for
00472     // key or val.
00473     uInt totalSize() const;
00474 
00475     //
00476     // dtor
00477     //
00478     virtual ~HashMap();
00479 
00480     enum {HashMapVersion = 1};
00481 
00482 protected:
00483     // Call the hash function.
00484     uInt hash(const key &k) const {
00485         uInt off = func ? (*func)(k) % totalBuckets() :
00486                 hashClass ? hashClass->hash(k) % totalBuckets() : 0;
00487         return off >= availableBuckets() ? off - (totalBuckets() >> 1) : off;
00488     }
00489     //
00490     // delete the contents of the hash table
00491     //
00492     void freeTable();
00493     //
00494     // enlarge the hash table. Returns the bucket which is being
00495     // moved...
00496     //
00497     uInt enlarge();
00498     //
00499     // populate bucket "to". Returns the bucket which is being
00500     // moved...
00501     //
00502     uInt populate( uInt to );
00503 
00504 private:
00505     // Total Slots
00506     uInt total_;
00507     // Slots Being Used
00508     uInt used_;
00509     // Slots with At Least One Value in the Bucket
00510     uInt filled_;
00511     // Number of Defined Mappings
00512     uInt defs_;
00513     // Maximum load
00514     float maxLoad_;
00515     PtrBlock<List<OrderedPair<key,val> >*> blk;
00516     Func func;
00517     HashClass<key> *hashClass;
00518     val dfltVal;
00519 };
00520 
00521 
00522 } //# NAMESPACE CASACORE - END
00523 
00524 #ifndef CASACORE_NO_AUTO_TEMPLATES
00525 #include <casacore/casa/Containers/HashMap.tcc>
00526 #endif //# CASACORE_NO_AUTO_TEMPLATES
00527 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

Generated on 31 Aug 2016 for casa by  doxygen 1.6.1