casa  $Rev:20696$
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
HashMap.h
Go to the documentation of this file.
00001 //# HashMap.h: this defines HashMap, which is a hashed associative array
00002 //# Copyright (C) 1995,1996,1999,2000,2001
00003 //# Associated Universities, Inc. Washington DC, USA.
00004 //#
00005 //# This library is free software; you can redistribute it and/or modify it
00006 //# under the terms of the GNU Library General Public License as published by
00007 //# the Free Software Foundation; either version 2 of the License, or (at your
00008 //# option) any later version.
00009 //#
00010 //# This library is distributed in the hope that it will be useful, but WITHOUT
00011 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
00012 //# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
00013 //# License for more details.
00014 //#
00015 //# You should have received a copy of the GNU Library General Public License
00016 //# along with this library; if not, write to the Free Software Foundation,
00017 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
00018 //#
00019 //# Correspondence concerning AIPS++ should be addressed as follows:
00020 //#        Internet email: aips2-request@nrao.edu.
00021 //#        Postal address: AIPS++ Project Office
00022 //#                        National Radio Astronomy Observatory
00023 //#                        520 Edgemont Road
00024 //#                        Charlottesville, VA 22903-2475 USA
00025 //#
00026 //# $Id: HashMap.h 20551 2009-03-25 00:11:33Z Malte.Marquarding $
00027 
00028 #ifndef CASA_HASHMAP_H
00029 #define CASA_HASHMAP_H
00030 
00031 //# Includes
00032 #include <casa/Containers/Block.h>
00033 #include <casa/Containers/List.h>
00034 #include <casa/Containers/OrderedPair.h>
00035 #include <casa/Exceptions/Error.h>
00036 
00037 namespace casa { //# NAMESPACE CASA - BEGIN
00038 
00039 //# Forward Declarations
00040 template<class key,class val> class ConstHashMapIter;
00041 extern void throw_invalid_hashmapiter_error();
00042 extern void throw_hashmapiter_init_error();
00043 
00044 // <summary>
00045 //     Hash functions for standard types
00046 // </summary>
00047 //
00048 // <synopsis>
00049 // These are the declarations for the standard hash functions
00050 // used by <linkto class=HashMap>HashMap</linkto>. In general, a function
00051 // such as these is defined for each type that is to be used as
00052 // a key in <linkto class=HashMap>HashMap</linkto>.
00053 //
00054 // These hash functions map the key type to any integer. This
00055 // integer is then used by <linkto class=HashMap>HashMap</linkto> to
00056 // select a bucket in the hash table.
00057 // </synopsis>
00058 //
00059 // <group name=hashfunc>
00060 uInt hashFunc(const String &);
00061 uInt hashFunc(const float &);
00062 uInt hashFunc(const double &);
00063 uInt hashFunc(const int &);
00064 uInt hashFunc(const unsigned &);
00065 //</group>
00066 
00067 
00068 // <summary>
00069 //     Specify the default values for HashMap keys
00070 // </summary>
00071 //
00072 // <synopsis>
00073 // These are the declarations for a set of functions which provide
00074 // the default values for types which are used as keys in
00075 // <linkto class=HashMap>HashMap</linkto>.
00076 // </synopsis>
00077 //
00078 // <group name=defaulthashvalue>
00079 const Int &defaultHashValue(const Int *);
00080 const uInt &defaultHashValue(const uInt *);
00081 const Long &defaultHashValue(const Long *);
00082 const uLong &defaultHashValue(const uLong *);
00083 const Float &defaultHashValue(const Float *);
00084 const Double &defaultHashValue(const Double *);
00085 const lDouble &defaultHashValue(const lDouble *);
00086 // </group>
00087 
00088 // <summary>
00089 //     Hash function with state
00090 // </summary>
00091 // <use visibility=export>
00092 // <reviewed reviewer="" date="yyyy/mm/dd" tests="" demos="">
00093 //
00094 // <etymology>
00095 //      This is basically a way of specifying a hash function, but
00096 //      it is implemented as a class. Thus it is called HashClass,
00097 //      similar to "hash function".
00098 // </etymology>
00099 //
00100 // <synopsis>
00101 //      This class is used to specify a hash function. Sometimes a hash
00102 //      function may require state, it may be useful to create a
00103 //      hierarchy of hash functions, or it may be useful to create a class
00104 //      which provides for hashing as well as other functionality. This
00105 //      class can be used as a base class for any of these purposed. This
00106 //      class is intended for parameterization of
00107 //      <linkto class=HashMap>HashMap</linkto>.
00108 //
00109 //      The hash function maps the key type to any integer. This
00110 //      integer is then used by <linkto class=HashMap>HashMap</linkto> to
00111 //      select a bucket in the hash table.
00112 // </synopsis>
00113 //
00114 // <example>
00115 //      If one wished to make a HashClass for integers, something like the
00116 //      following might be done:
00117 //      <srcblock>
00118 //      class IntHash : public HashClass<Int> {
00119 //      public:
00120 //        uInt hash(const Int &v) const { return (uInt) v; }
00121 //        uInt hash(const Int &v) { return (uInt) v; }
00122 //        HashClass<Int> *clone() const { return new IntHash; }
00123 //        IntHash() : HashClass<Int>() { }
00124 //      };
00125 //      </srcblock>
00126 // </example>
00127 //
00128 // <motivation>
00129 //      There may be occasions when it is more convenient to use a class
00130 //      instead of a single function. This base class provides a starting
00131 //      point plus, and <src>HashMap<k,v></src> has the necessary hooks to
00132 //      make use of classes derived from this class.
00133 // </motivation>
00134 //
00135 template<class key> class HashClass {
00136 public:
00137     //
00138     // This function maps elements of <src>key</src> type to any integer.
00139     // This integer is then used by <linkto class=HashMap>HashMap</linkto> to
00140     // select a bucket in the hash table.
00141     //
00142     virtual uInt hash(const key &) = 0;
00143 
00144     //
00145     // This function is used to make a <em>deep copy</em>. This means that
00146     // the copy, which this function returns, contains all derived information.
00147     //
00148     virtual HashClass<key> *clone() const = 0;
00149 
00150     HashClass();
00151     virtual ~HashClass();
00152 };
00153 
00154 
00155 // <summary>
00156 //     Associative Array with a hash table implementation
00157 // </summary>
00158 // <use visibility=export>
00159 // <reviewed reviewer="" date="yyyy/mm/dd" tests="" demos="">
00160 //
00161 // <prerequisite>
00162 //   <li> basic concepts behind hash tables
00163 // </prerequisite>
00164 //
00165 // <etymology>
00166 //   This is an associative array, also known as a map, and it is implemented
00167 //   using a hash table, so it is called HashMap.
00168 // </etymology>
00169 //
00170 // <synopsis>
00171 //   This class is an implementation of an associative array. This is a common
00172 //   data structure which associates a key of one type with a value of the same
00173 //   or different type. Essentially it is an (unordered) array which is indexed
00174 //   by an arbitrary type of index, e.g. strings.
00175 //
00176 //   This class has two template type parameters. The first is the type of the
00177 //   key and the second is the type of the value. Thus the associative array
00178 //   is a mapping from the domain, any valid object of the key type, to the
00179 //   range, any valid object of the value type. This is a <em>complete</em>
00180 //   map which means that every element in the domain maps to one and only
00181 //   one element in the range. Those elements which have not been set by the
00182 //   user of this class map to a default value which is set at construction
00183 //   time.
00184 //
00185 //   One of the important features of this class which must be understood
00186 //   is the load factor. This factor indicates the average number of items
00187 //   in the buckets of the hash table which are currently in use. The goal
00188 //   is to have the hash function greatly reduce the number of item which
00189 //   must be searched, i.e. to have a limited number of items in each bucket.
00190 //   The load factor determines this. Thus a load factor of 1000 or 0 is a
00191 //   poor choice. The default load factor is 4 which should generally be
00192 //   fine. The load factor is set with <src>setMaxLoad()</src> and retrieved
00193 //   with <src>maxLoad()</src>.
00194 //
00195 //   For this class to be used,
00196 //   three things must be defined:
00197 //   <ul>
00198 //      <li> a specialization of the <src>hash()</src> templated function for
00199 //              the key type or a class derived from <src>HashClass<key></src>.
00200 //              Either of which can be used to implement the hash function for
00201 //              a particular type.
00202 //      <li> an equality operator ( '==' ) for the key
00203 //      <li> a default constructor or a specialization of
00204 //              <src>defaultHashValue()</src> for the key type
00205 //   </ul>
00206 //
00207 //   The implementation of this hash map is derived from work on a proposed
00208 //   addition to the Standard Template Library by Javier Barreiro, Robert Fraley
00209 //   and <a href="http://www.cs.rpi.edu/~musser/">David R. Musser</a>. The information
00210 //   which is available includes:
00211 //   <ul>
00212 //        <li> <a href="ftp://ftp.cs.rpi.edu/pub/stl/hashrationale.ps.Z">
00213 //             rationale for hash map addition to the STL </a>
00214 //        <li> <a href="ftp://ftp.cs.rpi.edu/pub/stl/hashdoc.ps.Z">
00215 //             hash map addition proposal</a>
00216 //        <li> <a href="ftp://ftp.cs.rpi.edu/pub/stl/hashimp2.Z">
00217 //             preliminary implementation</a>
00218 //   </ul>
00219 //   each of these sources was utilized in the development of this set of classes,
00220 //   and in particular, the preliminary implementation was the source for the hashing
00221 //   algorithm used in this set of classes.
00222 // </synopsis>
00223 //
00224 // <example>
00225 //    <srcblock>
00226 //    #include <casa/Containers/HashMap.h>
00227 //    #include <casa/BasicSL/String.h>
00228 //    #include <casa/iostream.h>
00229 //   
00230 //    main() {
00231 //      HashMap<String,Int> hash;
00232 //    
00233 //      hash("one") = 1;                // sets the value of key "one" to "1"
00234 //      hash.define("two",2);           // defines a mapping from key "two" to "2"
00235 //      hash("three") = 3;
00236 //      hash.define("four",4);
00237 //      hash("five") = 5;
00238 //      hash.define("six",6);
00239 //    
00240 //      HashMapIter<String,Int> iter(hash);
00241 //      for ( iter.toStart(); ! iter.atEnd(); iter++ )
00242 //          cout << iter.getVal() << ": " << iter.getKey() << endl;
00243 //    
00244 //      cout << endl << "Diagnostics" << endl << 
00245 //                "========================" << endl;
00246 //      cout << "number defined:    " << hash.ndefined() << endl;
00247 //      cout << "buckets used:      " << hash.usedBuckets() << endl;
00248 //      cout << "buckets available: " << hash.availableBuckets() << endl;
00249 //      cout << "buckets allocated: " << hash.allocBuckets() << endl;
00250 //      cout << "loading:           " << hash.loading() << endl;
00251 //      cout << "size (in bytes):   " << hash.totalSize() << endl;
00252 //    
00253 //    }
00254 //    </srcblock>
00255 // </example>
00256 //
00257 // <motivation>
00258 //    There are a couple of reasons why this class was built:
00259 //         <ul>
00260 //             <li> use of a hash table can be more efficient
00261 //             <li> there are a couple of Map classes currently:
00262 //               <ul>
00263 //                 <li> <linkto class=OrderedMap>OrderedMap</linkto>
00264 //                 <li> <linkto class=SimpleOrderedMap>SimpleOrderedMap</linkto>
00265 //               </ul>
00266 //               <src>OrderedMap</src> is derived from a map base class,
00267 //               <linkto class=Map><src>Map</src></linkto> while
00268 //               <src>SimpleOrderedMap</src> is not. This collection of classes
00269 //               has resulted in confusion for the users. It is hoped that this
00270 //               class can in time replace these other "map" classes by
00271 //               satisfying the performance demands of
00272 //               <src>SimpleOrderedMap</src> while still meeting the constraints
00273 //               of the other map classes.
00274 //         </ul>
00275 // </motivation>
00276 //
00277 // <templating arg=key>
00278 //    <li> equality operator (operator==)
00279 //    <li> function hashFunc() or HashClass derived class provided
00280 //    <li> default constructor or defaultHashValue() specialization provided or
00281 //         default value provided at time of construction
00282 // </templating>
00283 // <templating arg=val>
00284 //    <li> copy constructor
00285 // </templating>
00286 //
00287 // <thrown>
00288 //    <li> AipsError
00289 // </thrown>
00290 //
00291 // <todo asof="yyyy/mm/dd">
00292 //   <li> add this feature
00293 //   <li> fix this bug
00294 //   <li> start discussion of this possible extension
00295 // </todo>
00296 template<class key, class val> class HashMap {
00297 friend class ConstHashMapIter<key,val>;
00298 private:
00299     enum HashMap_Constants { defaultSize_ = 131, defaultMaxLoad_ = 4 };
00300 public:
00301     static float defaultMaxLoad() { return float(defaultMaxLoad_); }
00302     static uInt  defaultSize() { return uInt(defaultSize_); }
00303 
00304     // Signature of the hash functions
00305     typedef uInt (*Func)(const key&);
00306     //
00307     // Copy constructor with copy semantics
00308     //
00309     HashMap(const HashMap &);
00310 
00311     //
00312     // Default constructor (and variation) which allows for
00313     // specifying:
00314     //   <ul>
00315     //     <li> a default value, <src>dflt</src>
00316     //     <li> the initial number of buckets, <src>size</src>
00317     //     <li> the hash function, <src>newfunc</src>
00318     //     <li> the maximum load factor, <src>maxlf</src>
00319     //   </ul>
00320     //
00321     // This is a pair because the hash function can either be
00322     // a simple function or a class derived from
00323     // <linkto class=HashClass><src>HashClass</src></linkto>.
00324     // <group>
00325     HashMap(const val &dflt = defaultHashValue((const val*)(0)),
00326             uInt size = uInt(defaultSize_), 
00327             Func newfunc = hashFunc,
00328             float maxlf = float(defaultMaxLoad_))
00329       : total_(size),
00330         used_(size),
00331         filled_(0),
00332         defs_(0),
00333         maxLoad_(maxlf),
00334         blk(size, static_cast<List<OrderedPair<key,val> >*>(0)),
00335         func(newfunc),
00336         hashClass(0),
00337         dfltVal(dflt)
00338       { }
00339 
00340     HashMap(const val &dflt, uInt size, const HashClass<key> &newfunc,
00341             float maxlf = float(defaultMaxLoad_))
00342       : total_(size),
00343         used_(size),
00344         filled_(0),
00345         defs_(0), 
00346         maxLoad_(maxlf),
00347         blk(size, static_cast<List<OrderedPair<key,val> >*>(0)),
00348         func(0),
00349         hashClass(newfunc.clone()),
00350         dfltVal(dflt)
00351       { }
00352     // </group>
00353 
00354 
00355     //
00356     // Constructor which allows for specifying:
00357     //   <ul>
00358     //     <li> default value, <src>dflt</src>
00359     //     <li> hash function, <src>newfunc</src>
00360     //     <li> maximum load factor, <src>maxlf</src>
00361     //   </ul>
00362     // This is provided because often the user will not be interested
00363     // in specifying the initial number of buckets since the number is
00364     // increased as needed to maintain the max load factor.
00365     //
00366     // This is a pair because the hash function can either be
00367     // a simple function or a class derived from
00368     // <linkto class=HashClass><src>HashClass</src></linkto>.
00369     // <group>
00370     HashMap(const val &dflt, Func newfunc, float maxlf = float(defaultMaxLoad_))
00371       : total_(uInt(defaultSize())), used_(uInt(defaultSize())),
00372         filled_(0), defs_(0), maxLoad_(maxlf),
00373         blk(uInt(defaultSize()), static_cast<List<OrderedPair<key,val> >*>(0)),
00374         func(newfunc), hashClass(0), dfltVal(dflt)
00375     { }
00376 
00377     HashMap(const val &dflt, const HashClass<key> &newfunc,
00378             float maxlf = float(defaultMaxLoad_)) 
00379       : total_(defaultSize()), used_(defaultSize()),
00380         filled_(0), defs_(0), maxLoad_(maxlf), 
00381         blk(uInt(defaultSize_), static_cast<List<OrderedPair<key,val> >*>(0)), func(0),
00382         hashClass(newfunc.clone()), dfltVal(dflt)
00383     { }
00384     // </group>
00385 
00386     //
00387     // This copies the right hand side of the assignment. Assignment is done
00388     // with <em>copy semantics</em>. This means that all the state is copied.
00389     //
00390     HashMap<key,val> &operator=(const HashMap<key,val> &);
00391 
00392     //
00393     // Retrieve values from the map, possibly for later assignment.
00394     // It is important to realize that for the <em>non-const</em> version
00395     // accessing the key causes an entry to be created in the map if it
00396     // didn't already exist. The "value" for this new entry is the
00397     // default value. "isDefined()" should be used if this behavior is
00398     // not desired.
00399     // <group>
00400     const val &operator() (const key &ky) const;
00401     val &operator() (const key &ky);
00402     // </group>
00403 
00404     //
00405     // Define a complete mapping.
00406     //
00407     val &define(const key &k, const val &v);
00408     //
00409     // Remove a user defined mapping from <src>k</src> to a value.
00410     // After this, the value which <src>k</src> maps to reverts to
00411     // the default value.
00412     //
00413     void remove(const key &k);
00414     //
00415     // Check to see if a user defined mapping exists for
00416     // <src>k</src>. This does <em>not</em> modify the map.
00417     //
00418     Bool isDefined(const key &k) const;
00419     //
00420     // Retrieve the default value.
00421     // <group>
00422     const val &defaultVal() const { return dfltVal; }
00423     val &defaultVal() { return dfltVal; }
00424     // </group>
00425 
00426     //
00427     // Remove all user defined mapping.
00428     //
00429     void clear() { freeTable(); }
00430     //
00431     //  Get or set the maximum load factor.
00432     //
00433     // <group>
00434     float maxLoad() const { return maxLoad_; }
00435     void setMaxLoad( float new_max ) { maxLoad_ = new_max; }
00436     // </group>
00437 
00438     // Total number of buckets, i.e. the number the
00439     // hashing mechanism thinks it has. This is the total
00440     // number of buckets used for calculations in the hashing
00441     // mechanism. This may be smaller than <src>allocBuckets()</src>
00442     // because more underlying storage may be allocated than the
00443     // hashing mechanism needs.
00444     uInt totalBuckets() const { return total_; }
00445     // Number of buckets available, i.e. those which
00446     // the hashing mechanism allows itself to use. This
00447     // may be smaller than <src>totalBuckets()</src> because
00448     // the hashing mechanism can restrict itself to some subset
00449     // of the buckets available.
00450     uInt availableBuckets() const { return used_; }
00451     // Number of buckets currently populated by a bucket list.
00452     uInt usedBuckets() const { return filled_; }
00453     // Number of buckets which have been allocated, i.e. the
00454     // total number which have currently been allocated. This
00455     // is the number of buckets created. It may be bigger than
00456     // <src>totalBuckets()</src> because more memory can be
00457     // allocated than the hashing mechanism needs.
00458     uInt allocBuckets() const { return blk.nelements(); }
00459 
00460     // Number of mappings which have been defined by the user.
00461     uInt ndefined() const { return defs_; }
00462 
00463     // Current hash table loading factor.
00464     float loading() const { return ndefined() ? (float) ndefined() / (float) availableBuckets() : 0.0; }
00465     // Have any mappings been defined by the user.
00466     Bool empty() const { return ndefined() == 0 ? True : False; }
00467     // This returns a Block which has the number of elements in each bucket
00468     // of the table.
00469     Block<uInt> distribution() const;
00470     // Total size of this HashMap minus dynamically allocated memory for
00471     // key or val.
00472     uInt totalSize() const;
00473 
00474     //
00475     // dtor
00476     //
00477     virtual ~HashMap();
00478 
00479     enum {HashMapVersion = 1};
00480 
00481 protected:
00482     // Call the hash function.
00483     uInt hash(const key &k) const {
00484         uInt off = func ? (*func)(k) % totalBuckets() :
00485                 hashClass ? hashClass->hash(k) % totalBuckets() : 0;
00486         return off >= availableBuckets() ? off - (totalBuckets() >> 1) : off;
00487     }
00488     //
00489     // delete the contents of the hash table
00490     //
00491     void freeTable();
00492     //
00493     // enlarge the hash table. Returns the bucket which is being
00494     // moved...
00495     //
00496     uInt enlarge();
00497     //
00498     // populate bucket "to". Returns the bucket which is being
00499     // moved...
00500     //
00501     uInt populate( uInt to );
00502 
00503 private:
00504     // Total Slots
00505     uInt total_;
00506     // Slots Being Used
00507     uInt used_;
00508     // Slots with At Least One Value in the Bucket
00509     uInt filled_;
00510     // Number of Defined Mappings
00511     uInt defs_;
00512     // Maximum load
00513     float maxLoad_;
00514     PtrBlock<List<OrderedPair<key,val> >*> blk;
00515     Func func;
00516     HashClass<key> *hashClass;
00517     val dfltVal;
00518 };
00519 
00520 
00521 } //# NAMESPACE CASA - END
00522 
00523 #ifndef CASACORE_NO_AUTO_TEMPLATES
00524 #include <casa/Containers/HashMap.tcc>
00525 #endif //# CASACORE_NO_AUTO_TEMPLATES
00526 #endif