casa
$Rev:20696$
|
00001 //# HashMap.h: this defines HashMap, which is a hashed associative array 00002 //# Copyright (C) 1995,1996,1999,2000,2001 00003 //# Associated Universities, Inc. Washington DC, USA. 00004 //# 00005 //# This library is free software; you can redistribute it and/or modify it 00006 //# under the terms of the GNU Library General Public License as published by 00007 //# the Free Software Foundation; either version 2 of the License, or (at your 00008 //# option) any later version. 00009 //# 00010 //# This library is distributed in the hope that it will be useful, but WITHOUT 00011 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 00012 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public 00013 //# License for more details. 00014 //# 00015 //# You should have received a copy of the GNU Library General Public License 00016 //# along with this library; if not, write to the Free Software Foundation, 00017 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA. 00018 //# 00019 //# Correspondence concerning AIPS++ should be addressed as follows: 00020 //# Internet email: aips2-request@nrao.edu. 00021 //# Postal address: AIPS++ Project Office 00022 //# National Radio Astronomy Observatory 00023 //# 520 Edgemont Road 00024 //# Charlottesville, VA 22903-2475 USA 00025 //# 00026 //# $Id: HashMap.h 20551 2009-03-25 00:11:33Z Malte.Marquarding $ 00027 00028 #ifndef CASA_HASHMAP_H 00029 #define CASA_HASHMAP_H 00030 00031 //# Includes 00032 #include <casa/Containers/Block.h> 00033 #include <casa/Containers/List.h> 00034 #include <casa/Containers/OrderedPair.h> 00035 #include <casa/Exceptions/Error.h> 00036 00037 namespace casa { //# NAMESPACE CASA - BEGIN 00038 00039 //# Forward Declarations 00040 template<class key,class val> class ConstHashMapIter; 00041 extern void throw_invalid_hashmapiter_error(); 00042 extern void throw_hashmapiter_init_error(); 00043 00044 // <summary> 00045 // Hash functions for standard types 00046 // </summary> 00047 // 00048 // <synopsis> 00049 // These are the declarations for the standard hash functions 00050 // used by <linkto class=HashMap>HashMap</linkto>. In general, a function 00051 // such as these is defined for each type that is to be used as 00052 // a key in <linkto class=HashMap>HashMap</linkto>. 00053 // 00054 // These hash functions map the key type to any integer. This 00055 // integer is then used by <linkto class=HashMap>HashMap</linkto> to 00056 // select a bucket in the hash table. 00057 // </synopsis> 00058 // 00059 // <group name=hashfunc> 00060 uInt hashFunc(const String &); 00061 uInt hashFunc(const float &); 00062 uInt hashFunc(const double &); 00063 uInt hashFunc(const int &); 00064 uInt hashFunc(const unsigned &); 00065 //</group> 00066 00067 00068 // <summary> 00069 // Specify the default values for HashMap keys 00070 // </summary> 00071 // 00072 // <synopsis> 00073 // These are the declarations for a set of functions which provide 00074 // the default values for types which are used as keys in 00075 // <linkto class=HashMap>HashMap</linkto>. 00076 // </synopsis> 00077 // 00078 // <group name=defaulthashvalue> 00079 const Int &defaultHashValue(const Int *); 00080 const uInt &defaultHashValue(const uInt *); 00081 const Long &defaultHashValue(const Long *); 00082 const uLong &defaultHashValue(const uLong *); 00083 const Float &defaultHashValue(const Float *); 00084 const Double &defaultHashValue(const Double *); 00085 const lDouble &defaultHashValue(const lDouble *); 00086 // </group> 00087 00088 // <summary> 00089 // Hash function with state 00090 // </summary> 00091 // <use visibility=export> 00092 // <reviewed reviewer="" date="yyyy/mm/dd" tests="" demos=""> 00093 // 00094 // <etymology> 00095 // This is basically a way of specifying a hash function, but 00096 // it is implemented as a class. Thus it is called HashClass, 00097 // similar to "hash function". 00098 // </etymology> 00099 // 00100 // <synopsis> 00101 // This class is used to specify a hash function. Sometimes a hash 00102 // function may require state, it may be useful to create a 00103 // hierarchy of hash functions, or it may be useful to create a class 00104 // which provides for hashing as well as other functionality. This 00105 // class can be used as a base class for any of these purposed. This 00106 // class is intended for parameterization of 00107 // <linkto class=HashMap>HashMap</linkto>. 00108 // 00109 // The hash function maps the key type to any integer. This 00110 // integer is then used by <linkto class=HashMap>HashMap</linkto> to 00111 // select a bucket in the hash table. 00112 // </synopsis> 00113 // 00114 // <example> 00115 // If one wished to make a HashClass for integers, something like the 00116 // following might be done: 00117 // <srcblock> 00118 // class IntHash : public HashClass<Int> { 00119 // public: 00120 // uInt hash(const Int &v) const { return (uInt) v; } 00121 // uInt hash(const Int &v) { return (uInt) v; } 00122 // HashClass<Int> *clone() const { return new IntHash; } 00123 // IntHash() : HashClass<Int>() { } 00124 // }; 00125 // </srcblock> 00126 // </example> 00127 // 00128 // <motivation> 00129 // There may be occasions when it is more convenient to use a class 00130 // instead of a single function. This base class provides a starting 00131 // point plus, and <src>HashMap<k,v></src> has the necessary hooks to 00132 // make use of classes derived from this class. 00133 // </motivation> 00134 // 00135 template<class key> class HashClass { 00136 public: 00137 // 00138 // This function maps elements of <src>key</src> type to any integer. 00139 // This integer is then used by <linkto class=HashMap>HashMap</linkto> to 00140 // select a bucket in the hash table. 00141 // 00142 virtual uInt hash(const key &) = 0; 00143 00144 // 00145 // This function is used to make a <em>deep copy</em>. This means that 00146 // the copy, which this function returns, contains all derived information. 00147 // 00148 virtual HashClass<key> *clone() const = 0; 00149 00150 HashClass(); 00151 virtual ~HashClass(); 00152 }; 00153 00154 00155 // <summary> 00156 // Associative Array with a hash table implementation 00157 // </summary> 00158 // <use visibility=export> 00159 // <reviewed reviewer="" date="yyyy/mm/dd" tests="" demos=""> 00160 // 00161 // <prerequisite> 00162 // <li> basic concepts behind hash tables 00163 // </prerequisite> 00164 // 00165 // <etymology> 00166 // This is an associative array, also known as a map, and it is implemented 00167 // using a hash table, so it is called HashMap. 00168 // </etymology> 00169 // 00170 // <synopsis> 00171 // This class is an implementation of an associative array. This is a common 00172 // data structure which associates a key of one type with a value of the same 00173 // or different type. Essentially it is an (unordered) array which is indexed 00174 // by an arbitrary type of index, e.g. strings. 00175 // 00176 // This class has two template type parameters. The first is the type of the 00177 // key and the second is the type of the value. Thus the associative array 00178 // is a mapping from the domain, any valid object of the key type, to the 00179 // range, any valid object of the value type. This is a <em>complete</em> 00180 // map which means that every element in the domain maps to one and only 00181 // one element in the range. Those elements which have not been set by the 00182 // user of this class map to a default value which is set at construction 00183 // time. 00184 // 00185 // One of the important features of this class which must be understood 00186 // is the load factor. This factor indicates the average number of items 00187 // in the buckets of the hash table which are currently in use. The goal 00188 // is to have the hash function greatly reduce the number of item which 00189 // must be searched, i.e. to have a limited number of items in each bucket. 00190 // The load factor determines this. Thus a load factor of 1000 or 0 is a 00191 // poor choice. The default load factor is 4 which should generally be 00192 // fine. The load factor is set with <src>setMaxLoad()</src> and retrieved 00193 // with <src>maxLoad()</src>. 00194 // 00195 // For this class to be used, 00196 // three things must be defined: 00197 // <ul> 00198 // <li> a specialization of the <src>hash()</src> templated function for 00199 // the key type or a class derived from <src>HashClass<key></src>. 00200 // Either of which can be used to implement the hash function for 00201 // a particular type. 00202 // <li> an equality operator ( '==' ) for the key 00203 // <li> a default constructor or a specialization of 00204 // <src>defaultHashValue()</src> for the key type 00205 // </ul> 00206 // 00207 // The implementation of this hash map is derived from work on a proposed 00208 // addition to the Standard Template Library by Javier Barreiro, Robert Fraley 00209 // and <a href="http://www.cs.rpi.edu/~musser/">David R. Musser</a>. The information 00210 // which is available includes: 00211 // <ul> 00212 // <li> <a href="ftp://ftp.cs.rpi.edu/pub/stl/hashrationale.ps.Z"> 00213 // rationale for hash map addition to the STL </a> 00214 // <li> <a href="ftp://ftp.cs.rpi.edu/pub/stl/hashdoc.ps.Z"> 00215 // hash map addition proposal</a> 00216 // <li> <a href="ftp://ftp.cs.rpi.edu/pub/stl/hashimp2.Z"> 00217 // preliminary implementation</a> 00218 // </ul> 00219 // each of these sources was utilized in the development of this set of classes, 00220 // and in particular, the preliminary implementation was the source for the hashing 00221 // algorithm used in this set of classes. 00222 // </synopsis> 00223 // 00224 // <example> 00225 // <srcblock> 00226 // #include <casa/Containers/HashMap.h> 00227 // #include <casa/BasicSL/String.h> 00228 // #include <casa/iostream.h> 00229 // 00230 // main() { 00231 // HashMap<String,Int> hash; 00232 // 00233 // hash("one") = 1; // sets the value of key "one" to "1" 00234 // hash.define("two",2); // defines a mapping from key "two" to "2" 00235 // hash("three") = 3; 00236 // hash.define("four",4); 00237 // hash("five") = 5; 00238 // hash.define("six",6); 00239 // 00240 // HashMapIter<String,Int> iter(hash); 00241 // for ( iter.toStart(); ! iter.atEnd(); iter++ ) 00242 // cout << iter.getVal() << ": " << iter.getKey() << endl; 00243 // 00244 // cout << endl << "Diagnostics" << endl << 00245 // "========================" << endl; 00246 // cout << "number defined: " << hash.ndefined() << endl; 00247 // cout << "buckets used: " << hash.usedBuckets() << endl; 00248 // cout << "buckets available: " << hash.availableBuckets() << endl; 00249 // cout << "buckets allocated: " << hash.allocBuckets() << endl; 00250 // cout << "loading: " << hash.loading() << endl; 00251 // cout << "size (in bytes): " << hash.totalSize() << endl; 00252 // 00253 // } 00254 // </srcblock> 00255 // </example> 00256 // 00257 // <motivation> 00258 // There are a couple of reasons why this class was built: 00259 // <ul> 00260 // <li> use of a hash table can be more efficient 00261 // <li> there are a couple of Map classes currently: 00262 // <ul> 00263 // <li> <linkto class=OrderedMap>OrderedMap</linkto> 00264 // <li> <linkto class=SimpleOrderedMap>SimpleOrderedMap</linkto> 00265 // </ul> 00266 // <src>OrderedMap</src> is derived from a map base class, 00267 // <linkto class=Map><src>Map</src></linkto> while 00268 // <src>SimpleOrderedMap</src> is not. This collection of classes 00269 // has resulted in confusion for the users. It is hoped that this 00270 // class can in time replace these other "map" classes by 00271 // satisfying the performance demands of 00272 // <src>SimpleOrderedMap</src> while still meeting the constraints 00273 // of the other map classes. 00274 // </ul> 00275 // </motivation> 00276 // 00277 // <templating arg=key> 00278 // <li> equality operator (operator==) 00279 // <li> function hashFunc() or HashClass derived class provided 00280 // <li> default constructor or defaultHashValue() specialization provided or 00281 // default value provided at time of construction 00282 // </templating> 00283 // <templating arg=val> 00284 // <li> copy constructor 00285 // </templating> 00286 // 00287 // <thrown> 00288 // <li> AipsError 00289 // </thrown> 00290 // 00291 // <todo asof="yyyy/mm/dd"> 00292 // <li> add this feature 00293 // <li> fix this bug 00294 // <li> start discussion of this possible extension 00295 // </todo> 00296 template<class key, class val> class HashMap { 00297 friend class ConstHashMapIter<key,val>; 00298 private: 00299 enum HashMap_Constants { defaultSize_ = 131, defaultMaxLoad_ = 4 }; 00300 public: 00301 static float defaultMaxLoad() { return float(defaultMaxLoad_); } 00302 static uInt defaultSize() { return uInt(defaultSize_); } 00303 00304 // Signature of the hash functions 00305 typedef uInt (*Func)(const key&); 00306 // 00307 // Copy constructor with copy semantics 00308 // 00309 HashMap(const HashMap &); 00310 00311 // 00312 // Default constructor (and variation) which allows for 00313 // specifying: 00314 // <ul> 00315 // <li> a default value, <src>dflt</src> 00316 // <li> the initial number of buckets, <src>size</src> 00317 // <li> the hash function, <src>newfunc</src> 00318 // <li> the maximum load factor, <src>maxlf</src> 00319 // </ul> 00320 // 00321 // This is a pair because the hash function can either be 00322 // a simple function or a class derived from 00323 // <linkto class=HashClass><src>HashClass</src></linkto>. 00324 // <group> 00325 HashMap(const val &dflt = defaultHashValue((const val*)(0)), 00326 uInt size = uInt(defaultSize_), 00327 Func newfunc = hashFunc, 00328 float maxlf = float(defaultMaxLoad_)) 00329 : total_(size), 00330 used_(size), 00331 filled_(0), 00332 defs_(0), 00333 maxLoad_(maxlf), 00334 blk(size, static_cast<List<OrderedPair<key,val> >*>(0)), 00335 func(newfunc), 00336 hashClass(0), 00337 dfltVal(dflt) 00338 { } 00339 00340 HashMap(const val &dflt, uInt size, const HashClass<key> &newfunc, 00341 float maxlf = float(defaultMaxLoad_)) 00342 : total_(size), 00343 used_(size), 00344 filled_(0), 00345 defs_(0), 00346 maxLoad_(maxlf), 00347 blk(size, static_cast<List<OrderedPair<key,val> >*>(0)), 00348 func(0), 00349 hashClass(newfunc.clone()), 00350 dfltVal(dflt) 00351 { } 00352 // </group> 00353 00354 00355 // 00356 // Constructor which allows for specifying: 00357 // <ul> 00358 // <li> default value, <src>dflt</src> 00359 // <li> hash function, <src>newfunc</src> 00360 // <li> maximum load factor, <src>maxlf</src> 00361 // </ul> 00362 // This is provided because often the user will not be interested 00363 // in specifying the initial number of buckets since the number is 00364 // increased as needed to maintain the max load factor. 00365 // 00366 // This is a pair because the hash function can either be 00367 // a simple function or a class derived from 00368 // <linkto class=HashClass><src>HashClass</src></linkto>. 00369 // <group> 00370 HashMap(const val &dflt, Func newfunc, float maxlf = float(defaultMaxLoad_)) 00371 : total_(uInt(defaultSize())), used_(uInt(defaultSize())), 00372 filled_(0), defs_(0), maxLoad_(maxlf), 00373 blk(uInt(defaultSize()), static_cast<List<OrderedPair<key,val> >*>(0)), 00374 func(newfunc), hashClass(0), dfltVal(dflt) 00375 { } 00376 00377 HashMap(const val &dflt, const HashClass<key> &newfunc, 00378 float maxlf = float(defaultMaxLoad_)) 00379 : total_(defaultSize()), used_(defaultSize()), 00380 filled_(0), defs_(0), maxLoad_(maxlf), 00381 blk(uInt(defaultSize_), static_cast<List<OrderedPair<key,val> >*>(0)), func(0), 00382 hashClass(newfunc.clone()), dfltVal(dflt) 00383 { } 00384 // </group> 00385 00386 // 00387 // This copies the right hand side of the assignment. Assignment is done 00388 // with <em>copy semantics</em>. This means that all the state is copied. 00389 // 00390 HashMap<key,val> &operator=(const HashMap<key,val> &); 00391 00392 // 00393 // Retrieve values from the map, possibly for later assignment. 00394 // It is important to realize that for the <em>non-const</em> version 00395 // accessing the key causes an entry to be created in the map if it 00396 // didn't already exist. The "value" for this new entry is the 00397 // default value. "isDefined()" should be used if this behavior is 00398 // not desired. 00399 // <group> 00400 const val &operator() (const key &ky) const; 00401 val &operator() (const key &ky); 00402 // </group> 00403 00404 // 00405 // Define a complete mapping. 00406 // 00407 val &define(const key &k, const val &v); 00408 // 00409 // Remove a user defined mapping from <src>k</src> to a value. 00410 // After this, the value which <src>k</src> maps to reverts to 00411 // the default value. 00412 // 00413 void remove(const key &k); 00414 // 00415 // Check to see if a user defined mapping exists for 00416 // <src>k</src>. This does <em>not</em> modify the map. 00417 // 00418 Bool isDefined(const key &k) const; 00419 // 00420 // Retrieve the default value. 00421 // <group> 00422 const val &defaultVal() const { return dfltVal; } 00423 val &defaultVal() { return dfltVal; } 00424 // </group> 00425 00426 // 00427 // Remove all user defined mapping. 00428 // 00429 void clear() { freeTable(); } 00430 // 00431 // Get or set the maximum load factor. 00432 // 00433 // <group> 00434 float maxLoad() const { return maxLoad_; } 00435 void setMaxLoad( float new_max ) { maxLoad_ = new_max; } 00436 // </group> 00437 00438 // Total number of buckets, i.e. the number the 00439 // hashing mechanism thinks it has. This is the total 00440 // number of buckets used for calculations in the hashing 00441 // mechanism. This may be smaller than <src>allocBuckets()</src> 00442 // because more underlying storage may be allocated than the 00443 // hashing mechanism needs. 00444 uInt totalBuckets() const { return total_; } 00445 // Number of buckets available, i.e. those which 00446 // the hashing mechanism allows itself to use. This 00447 // may be smaller than <src>totalBuckets()</src> because 00448 // the hashing mechanism can restrict itself to some subset 00449 // of the buckets available. 00450 uInt availableBuckets() const { return used_; } 00451 // Number of buckets currently populated by a bucket list. 00452 uInt usedBuckets() const { return filled_; } 00453 // Number of buckets which have been allocated, i.e. the 00454 // total number which have currently been allocated. This 00455 // is the number of buckets created. It may be bigger than 00456 // <src>totalBuckets()</src> because more memory can be 00457 // allocated than the hashing mechanism needs. 00458 uInt allocBuckets() const { return blk.nelements(); } 00459 00460 // Number of mappings which have been defined by the user. 00461 uInt ndefined() const { return defs_; } 00462 00463 // Current hash table loading factor. 00464 float loading() const { return ndefined() ? (float) ndefined() / (float) availableBuckets() : 0.0; } 00465 // Have any mappings been defined by the user. 00466 Bool empty() const { return ndefined() == 0 ? True : False; } 00467 // This returns a Block which has the number of elements in each bucket 00468 // of the table. 00469 Block<uInt> distribution() const; 00470 // Total size of this HashMap minus dynamically allocated memory for 00471 // key or val. 00472 uInt totalSize() const; 00473 00474 // 00475 // dtor 00476 // 00477 virtual ~HashMap(); 00478 00479 enum {HashMapVersion = 1}; 00480 00481 protected: 00482 // Call the hash function. 00483 uInt hash(const key &k) const { 00484 uInt off = func ? (*func)(k) % totalBuckets() : 00485 hashClass ? hashClass->hash(k) % totalBuckets() : 0; 00486 return off >= availableBuckets() ? off - (totalBuckets() >> 1) : off; 00487 } 00488 // 00489 // delete the contents of the hash table 00490 // 00491 void freeTable(); 00492 // 00493 // enlarge the hash table. Returns the bucket which is being 00494 // moved... 00495 // 00496 uInt enlarge(); 00497 // 00498 // populate bucket "to". Returns the bucket which is being 00499 // moved... 00500 // 00501 uInt populate( uInt to ); 00502 00503 private: 00504 // Total Slots 00505 uInt total_; 00506 // Slots Being Used 00507 uInt used_; 00508 // Slots with At Least One Value in the Bucket 00509 uInt filled_; 00510 // Number of Defined Mappings 00511 uInt defs_; 00512 // Maximum load 00513 float maxLoad_; 00514 PtrBlock<List<OrderedPair<key,val> >*> blk; 00515 Func func; 00516 HashClass<key> *hashClass; 00517 val dfltVal; 00518 }; 00519 00520 00521 } //# NAMESPACE CASA - END 00522 00523 #ifndef CASACORE_NO_AUTO_TEMPLATES 00524 #include <casa/Containers/HashMap.tcc> 00525 #endif //# CASACORE_NO_AUTO_TEMPLATES 00526 #endif