00001 //# HashMap.h: this defines HashMap, which is a hashed associative array 00002 //# Copyright (C) 1995,1996,1999,2000,2001 00003 //# Associated Universities, Inc. Washington DC, USA. 00004 //# 00005 //# This library is free software; you can redistribute it and/or modify it 00006 //# under the terms of the GNU Library General Public License as published by 00007 //# the Free Software Foundation; either version 2 of the License, or (at your 00008 //# option) any later version. 00009 //# 00010 //# This library is distributed in the hope that it will be useful, but WITHOUT 00011 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 00012 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public 00013 //# License for more details. 00014 //# 00015 //# You should have received a copy of the GNU Library General Public License 00016 //# along with this library; if not, write to the Free Software Foundation, 00017 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA. 00018 //# 00019 //# Correspondence concerning AIPS++ should be addressed as follows: 00020 //# Internet email: aips2-request@nrao.edu. 00021 //# Postal address: AIPS++ Project Office 00022 //# National Radio Astronomy Observatory 00023 //# 520 Edgemont Road 00024 //# Charlottesville, VA 22903-2475 USA 00025 //# 00026 //# $Id$ 00027 00028 #ifndef CASA_HASHMAP_H 00029 #define CASA_HASHMAP_H 00030 00031 //# Includes 00032 #include <casacore/casa/aips.h> 00033 #include <casacore/casa/Containers/Block.h> 00034 #include <casacore/casa/Containers/List.h> 00035 #include <casacore/casa/Containers/OrderedPair.h> 00036 #include <casacore/casa/Exceptions/Error.h> 00037 00038 namespace casacore { //# NAMESPACE CASACORE - BEGIN 00039 00040 //# Forward Declarations 00041 template<class key,class val> class ConstHashMapIter; 00042 extern void throw_invalid_hashmapiter_error(); 00043 extern void throw_hashmapiter_init_error(); 00044 00045 // <summary> 00046 // Hash functions for standard types 00047 // </summary> 00048 // 00049 // <synopsis> 00050 // These are the declarations for the standard hash functions 00051 // used by <linkto class=HashMap>HashMap</linkto>. In general, a function 00052 // such as these is defined for each type that is to be used as 00053 // a key in <linkto class=HashMap>HashMap</linkto>. 00054 // 00055 // These hash functions map the key type to any integer. This 00056 // integer is then used by <linkto class=HashMap>HashMap</linkto> to 00057 // select a bucket in the hash table. 00058 // </synopsis> 00059 // 00060 // <group name=hashfunc> 00061 uInt hashFunc(const String &); 00062 uInt hashFunc(const float &); 00063 uInt hashFunc(const double &); 00064 uInt hashFunc(const int &); 00065 uInt hashFunc(const unsigned &); 00066 //</group> 00067 00068 00069 // <summary> 00070 // Specify the default values for HashMap keys 00071 // </summary> 00072 // 00073 // <synopsis> 00074 // These are the declarations for a set of functions which provide 00075 // the default values for types which are used as keys in 00076 // <linkto class=HashMap>HashMap</linkto>. 00077 // </synopsis> 00078 // 00079 // <group name=defaulthashvalue> 00080 const Int &defaultHashValue(const Int *); 00081 const uInt &defaultHashValue(const uInt *); 00082 const Long &defaultHashValue(const Long *); 00083 const uLong &defaultHashValue(const uLong *); 00084 const Float &defaultHashValue(const Float *); 00085 const Double &defaultHashValue(const Double *); 00086 const lDouble &defaultHashValue(const lDouble *); 00087 // </group> 00088 00089 // <summary> 00090 // Hash function with state 00091 // </summary> 00092 // <use visibility=export> 00093 // <reviewed reviewer="" date="yyyy/mm/dd" tests="" demos=""> 00094 // 00095 // <etymology> 00096 // This is basically a way of specifying a hash function, but 00097 // it is implemented as a class. Thus it is called HashClass, 00098 // similar to "hash function". 00099 // </etymology> 00100 // 00101 // <synopsis> 00102 // This class is used to specify a hash function. Sometimes a hash 00103 // function may require state, it may be useful to create a 00104 // hierarchy of hash functions, or it may be useful to create a class 00105 // which provides for hashing as well as other functionality. This 00106 // class can be used as a base class for any of these purposed. This 00107 // class is intended for parameterization of 00108 // <linkto class=HashMap>HashMap</linkto>. 00109 // 00110 // The hash function maps the key type to any integer. This 00111 // integer is then used by <linkto class=HashMap>HashMap</linkto> to 00112 // select a bucket in the hash table. 00113 // </synopsis> 00114 // 00115 // <example> 00116 // If one wished to make a HashClass for integers, something like the 00117 // following might be done: 00118 // <srcblock> 00119 // class IntHash : public HashClass<Int> { 00120 // public: 00121 // uInt hash(const Int &v) const { return (uInt) v; } 00122 // uInt hash(const Int &v) { return (uInt) v; } 00123 // HashClass<Int> *clone() const { return new IntHash; } 00124 // IntHash() : HashClass<Int>() { } 00125 // }; 00126 // </srcblock> 00127 // </example> 00128 // 00129 // <motivation> 00130 // There may be occasions when it is more convenient to use a class 00131 // instead of a single function. This base class provides a starting 00132 // point plus, and <src>HashMap<k,v></src> has the necessary hooks to 00133 // make use of classes derived from this class. 00134 // </motivation> 00135 // 00136 template<class key> class HashClass { 00137 public: 00138 // 00139 // This function maps elements of <src>key</src> type to any integer. 00140 // This integer is then used by <linkto class=HashMap>HashMap</linkto> to 00141 // select a bucket in the hash table. 00142 // 00143 virtual uInt hash(const key &) = 0; 00144 00145 // 00146 // This function is used to make a <em>deep copy</em>. This means that 00147 // the copy, which this function returns, contains all derived information. 00148 // 00149 virtual HashClass<key> *clone() const = 0; 00150 00151 HashClass(); 00152 virtual ~HashClass(); 00153 }; 00154 00155 00156 // <summary> 00157 // Associative Array with a hash table implementation 00158 // </summary> 00159 // <use visibility=export> 00160 // <reviewed reviewer="" date="yyyy/mm/dd" tests="" demos=""> 00161 // 00162 // <prerequisite> 00163 // <li> basic concepts behind hash tables 00164 // </prerequisite> 00165 // 00166 // <etymology> 00167 // This is an associative array, also known as a map, and it is implemented 00168 // using a hash table, so it is called HashMap. 00169 // </etymology> 00170 // 00171 // <synopsis> 00172 // This class is an implementation of an associative array. This is a common 00173 // data structure which associates a key of one type with a value of the same 00174 // or different type. Essentially it is an (unordered) array which is indexed 00175 // by an arbitrary type of index, e.g. strings. 00176 // 00177 // This class has two template type parameters. The first is the type of the 00178 // key and the second is the type of the value. Thus the associative array 00179 // is a mapping from the domain, any valid object of the key type, to the 00180 // range, any valid object of the value type. This is a <em>complete</em> 00181 // map which means that every element in the domain maps to one and only 00182 // one element in the range. Those elements which have not been set by the 00183 // user of this class map to a default value which is set at construction 00184 // time. 00185 // 00186 // One of the important features of this class which must be understood 00187 // is the load factor. This factor indicates the average number of items 00188 // in the buckets of the hash table which are currently in use. The goal 00189 // is to have the hash function greatly reduce the number of item which 00190 // must be searched, i.e. to have a limited number of items in each bucket. 00191 // The load factor determines this. Thus a load factor of 1000 or 0 is a 00192 // poor choice. The default load factor is 4 which should generally be 00193 // fine. The load factor is set with <src>setMaxLoad()</src> and retrieved 00194 // with <src>maxLoad()</src>. 00195 // 00196 // For this class to be used, 00197 // three things must be defined: 00198 // <ul> 00199 // <li> a specialization of the <src>hash()</src> templated function for 00200 // the key type or a class derived from <src>HashClass<key></src>. 00201 // Either of which can be used to implement the hash function for 00202 // a particular type. 00203 // <li> an equality operator ( '==' ) for the key 00204 // <li> a default constructor or a specialization of 00205 // <src>defaultHashValue()</src> for the key type 00206 // </ul> 00207 // 00208 // The implementation of this hash map is derived from work on a proposed 00209 // addition to the Standard Template Library by Javier Barreiro, Robert Fraley 00210 // and <a href="http://www.cs.rpi.edu/~musser/">David R. Musser</a>. The information 00211 // which is available includes: 00212 // <ul> 00213 // <li> <a href="ftp://ftp.cs.rpi.edu/pub/stl/hashrationale.ps.Z"> 00214 // rationale for hash map addition to the STL </a> 00215 // <li> <a href="ftp://ftp.cs.rpi.edu/pub/stl/hashdoc.ps.Z"> 00216 // hash map addition proposal</a> 00217 // <li> <a href="ftp://ftp.cs.rpi.edu/pub/stl/hashimp2.Z"> 00218 // preliminary implementation</a> 00219 // </ul> 00220 // each of these sources was utilized in the development of this set of classes, 00221 // and in particular, the preliminary implementation was the source for the hashing 00222 // algorithm used in this set of classes. 00223 // </synopsis> 00224 // 00225 // <example> 00226 // <srcblock> 00227 // #include <casacore/casa/Containers/HashMap.h> 00228 // #include <casacore/casa/BasicSL/String.h> 00229 // #include <casacore/casa/iostream.h> 00230 // 00231 // main() { 00232 // HashMap<String,Int> hash; 00233 // 00234 // hash("one") = 1; // sets the value of key "one" to "1" 00235 // hash.define("two",2); // defines a mapping from key "two" to "2" 00236 // hash("three") = 3; 00237 // hash.define("four",4); 00238 // hash("five") = 5; 00239 // hash.define("six",6); 00240 // 00241 // HashMapIter<String,Int> iter(hash); 00242 // for ( iter.toStart(); ! iter.atEnd(); iter++ ) 00243 // cout << iter.getVal() << ": " << iter.getKey() << endl; 00244 // 00245 // cout << endl << "Diagnostics" << endl << 00246 // "========================" << endl; 00247 // cout << "number defined: " << hash.ndefined() << endl; 00248 // cout << "buckets used: " << hash.usedBuckets() << endl; 00249 // cout << "buckets available: " << hash.availableBuckets() << endl; 00250 // cout << "buckets allocated: " << hash.allocBuckets() << endl; 00251 // cout << "loading: " << hash.loading() << endl; 00252 // cout << "size (in bytes): " << hash.totalSize() << endl; 00253 // 00254 // } 00255 // </srcblock> 00256 // </example> 00257 // 00258 // <motivation> 00259 // There are a couple of reasons why this class was built: 00260 // <ul> 00261 // <li> use of a hash table can be more efficient 00262 // <li> there are a couple of Map classes currently: 00263 // <ul> 00264 // <li> <linkto class=OrderedMap>OrderedMap</linkto> 00265 // <li> <linkto class=SimpleOrderedMap>SimpleOrderedMap</linkto> 00266 // </ul> 00267 // <src>OrderedMap</src> is derived from a map base class, 00268 // <linkto class=Map><src>Map</src></linkto> while 00269 // <src>SimpleOrderedMap</src> is not. This collection of classes 00270 // has resulted in confusion for the users. It is hoped that this 00271 // class can in time replace these other "map" classes by 00272 // satisfying the performance demands of 00273 // <src>SimpleOrderedMap</src> while still meeting the constraints 00274 // of the other map classes. 00275 // </ul> 00276 // </motivation> 00277 // 00278 // <templating arg=key> 00279 // <li> equality operator (operator==) 00280 // <li> function hashFunc() or HashClass derived class provided 00281 // <li> default constructor or defaultHashValue() specialization provided or 00282 // default value provided at time of construction 00283 // </templating> 00284 // <templating arg=val> 00285 // <li> copy constructor 00286 // </templating> 00287 // 00288 // <thrown> 00289 // <li> AipsError 00290 // </thrown> 00291 // 00292 // <todo asof="yyyy/mm/dd"> 00293 // <li> add this feature 00294 // <li> fix this bug 00295 // <li> start discussion of this possible extension 00296 // </todo> 00297 template<class key, class val> class HashMap { 00298 friend class ConstHashMapIter<key,val>; 00299 private: 00300 enum HashMap_Constants { defaultSize_ = 131, defaultMaxLoad_ = 4 }; 00301 public: 00302 static float defaultMaxLoad() { return float(defaultMaxLoad_); } 00303 static uInt defaultSize() { return uInt(defaultSize_); } 00304 00305 // Signature of the hash functions 00306 typedef uInt (*Func)(const key&); 00307 // 00308 // Copy constructor with copy semantics 00309 // 00310 HashMap(const HashMap &); 00311 00312 // 00313 // Default constructor (and variation) which allows for 00314 // specifying: 00315 // <ul> 00316 // <li> a default value, <src>dflt</src> 00317 // <li> the initial number of buckets, <src>size</src> 00318 // <li> the hash function, <src>newfunc</src> 00319 // <li> the maximum load factor, <src>maxlf</src> 00320 // </ul> 00321 // 00322 // This is a pair because the hash function can either be 00323 // a simple function or a class derived from 00324 // <linkto class=HashClass><src>HashClass</src></linkto>. 00325 // <group> 00326 HashMap(const val &dflt = defaultHashValue((const val*)(0)), 00327 uInt size = uInt(defaultSize_), 00328 Func newfunc = hashFunc, 00329 float maxlf = float(defaultMaxLoad_)) 00330 : total_(size), 00331 used_(size), 00332 filled_(0), 00333 defs_(0), 00334 maxLoad_(maxlf), 00335 blk(size, static_cast<List<OrderedPair<key,val> >*>(0)), 00336 func(newfunc), 00337 hashClass(0), 00338 dfltVal(dflt) 00339 { } 00340 00341 HashMap(const val &dflt, uInt size, const HashClass<key> &newfunc, 00342 float maxlf = float(defaultMaxLoad_)) 00343 : total_(size), 00344 used_(size), 00345 filled_(0), 00346 defs_(0), 00347 maxLoad_(maxlf), 00348 blk(size, static_cast<List<OrderedPair<key,val> >*>(0)), 00349 func(0), 00350 hashClass(newfunc.clone()), 00351 dfltVal(dflt) 00352 { } 00353 // </group> 00354 00355 00356 // 00357 // Constructor which allows for specifying: 00358 // <ul> 00359 // <li> default value, <src>dflt</src> 00360 // <li> hash function, <src>newfunc</src> 00361 // <li> maximum load factor, <src>maxlf</src> 00362 // </ul> 00363 // This is provided because often the user will not be interested 00364 // in specifying the initial number of buckets since the number is 00365 // increased as needed to maintain the max load factor. 00366 // 00367 // This is a pair because the hash function can either be 00368 // a simple function or a class derived from 00369 // <linkto class=HashClass><src>HashClass</src></linkto>. 00370 // <group> 00371 HashMap(const val &dflt, Func newfunc, float maxlf = float(defaultMaxLoad_)) 00372 : total_(uInt(defaultSize())), used_(uInt(defaultSize())), 00373 filled_(0), defs_(0), maxLoad_(maxlf), 00374 blk(uInt(defaultSize()), static_cast<List<OrderedPair<key,val> >*>(0)), 00375 func(newfunc), hashClass(0), dfltVal(dflt) 00376 { } 00377 00378 HashMap(const val &dflt, const HashClass<key> &newfunc, 00379 float maxlf = float(defaultMaxLoad_)) 00380 : total_(defaultSize()), used_(defaultSize()), 00381 filled_(0), defs_(0), maxLoad_(maxlf), 00382 blk(uInt(defaultSize_), static_cast<List<OrderedPair<key,val> >*>(0)), func(0), 00383 hashClass(newfunc.clone()), dfltVal(dflt) 00384 { } 00385 // </group> 00386 00387 // 00388 // This copies the right hand side of the assignment. Assignment is done 00389 // with <em>copy semantics</em>. This means that all the state is copied. 00390 // 00391 HashMap<key,val> &operator=(const HashMap<key,val> &); 00392 00393 // 00394 // Retrieve values from the map, possibly for later assignment. 00395 // It is important to realize that for the <em>non-const</em> version 00396 // accessing the key causes an entry to be created in the map if it 00397 // didn't already exist. The "value" for this new entry is the 00398 // default value. "isDefined()" should be used if this behavior is 00399 // not desired. 00400 // <group> 00401 const val &operator() (const key &ky) const; 00402 val &operator() (const key &ky); 00403 // </group> 00404 00405 // 00406 // Define a complete mapping. 00407 // 00408 val &define(const key &k, const val &v); 00409 // 00410 // Remove a user defined mapping from <src>k</src> to a value. 00411 // After this, the value which <src>k</src> maps to reverts to 00412 // the default value. 00413 // 00414 void remove(const key &k); 00415 // 00416 // Check to see if a user defined mapping exists for 00417 // <src>k</src>. This does <em>not</em> modify the map. 00418 // 00419 Bool isDefined(const key &k) const; 00420 // 00421 // Retrieve the default value. 00422 // <group> 00423 const val &defaultVal() const { return dfltVal; } 00424 val &defaultVal() { return dfltVal; } 00425 // </group> 00426 00427 // 00428 // Remove all user defined mapping. 00429 // 00430 void clear() { freeTable(); } 00431 // 00432 // Get or set the maximum load factor. 00433 // 00434 // <group> 00435 float maxLoad() const { return maxLoad_; } 00436 void setMaxLoad( float new_max ) { maxLoad_ = new_max; } 00437 // </group> 00438 00439 // Total number of buckets, i.e. the number the 00440 // hashing mechanism thinks it has. This is the total 00441 // number of buckets used for calculations in the hashing 00442 // mechanism. This may be smaller than <src>allocBuckets()</src> 00443 // because more underlying storage may be allocated than the 00444 // hashing mechanism needs. 00445 uInt totalBuckets() const { return total_; } 00446 // Number of buckets available, i.e. those which 00447 // the hashing mechanism allows itself to use. This 00448 // may be smaller than <src>totalBuckets()</src> because 00449 // the hashing mechanism can restrict itself to some subset 00450 // of the buckets available. 00451 uInt availableBuckets() const { return used_; } 00452 // Number of buckets currently populated by a bucket list. 00453 uInt usedBuckets() const { return filled_; } 00454 // Number of buckets which have been allocated, i.e. the 00455 // total number which have currently been allocated. This 00456 // is the number of buckets created. It may be bigger than 00457 // <src>totalBuckets()</src> because more memory can be 00458 // allocated than the hashing mechanism needs. 00459 uInt allocBuckets() const { return blk.nelements(); } 00460 00461 // Number of mappings which have been defined by the user. 00462 uInt ndefined() const { return defs_; } 00463 00464 // Current hash table loading factor. 00465 float loading() const { return ndefined() ? (float) ndefined() / (float) availableBuckets() : 0.0; } 00466 // Have any mappings been defined by the user. 00467 Bool empty() const { return ndefined() == 0 ? True : False; } 00468 // This returns a Block which has the number of elements in each bucket 00469 // of the table. 00470 Block<uInt> distribution() const; 00471 // Total size of this HashMap minus dynamically allocated memory for 00472 // key or val. 00473 uInt totalSize() const; 00474 00475 // 00476 // dtor 00477 // 00478 virtual ~HashMap(); 00479 00480 enum {HashMapVersion = 1}; 00481 00482 protected: 00483 // Call the hash function. 00484 uInt hash(const key &k) const { 00485 uInt off = func ? (*func)(k) % totalBuckets() : 00486 hashClass ? hashClass->hash(k) % totalBuckets() : 0; 00487 return off >= availableBuckets() ? off - (totalBuckets() >> 1) : off; 00488 } 00489 // 00490 // delete the contents of the hash table 00491 // 00492 void freeTable(); 00493 // 00494 // enlarge the hash table. Returns the bucket which is being 00495 // moved... 00496 // 00497 uInt enlarge(); 00498 // 00499 // populate bucket "to". Returns the bucket which is being 00500 // moved... 00501 // 00502 uInt populate( uInt to ); 00503 00504 private: 00505 // Total Slots 00506 uInt total_; 00507 // Slots Being Used 00508 uInt used_; 00509 // Slots with At Least One Value in the Bucket 00510 uInt filled_; 00511 // Number of Defined Mappings 00512 uInt defs_; 00513 // Maximum load 00514 float maxLoad_; 00515 PtrBlock<List<OrderedPair<key,val> >*> blk; 00516 Func func; 00517 HashClass<key> *hashClass; 00518 val dfltVal; 00519 }; 00520 00521 00522 } //# NAMESPACE CASACORE - END 00523 00524 #ifndef CASACORE_NO_AUTO_TEMPLATES 00525 #include <casacore/casa/Containers/HashMap.tcc> 00526 #endif //# CASACORE_NO_AUTO_TEMPLATES 00527 #endif