casa  $Rev:20696$
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
Sort.h
Go to the documentation of this file.
00001 //# Sort.h: Sort objects on one or more keys
00002 //# Copyright (C) 1995,1996,1997,1998,1999,2000,2001
00003 //# Associated Universities, Inc. Washington DC, USA.
00004 //#
00005 //# This library is free software; you can redistribute it and/or modify it
00006 //# under the terms of the GNU Library General Public License as published by
00007 //# the Free Software Foundation; either version 2 of the License, or (at your
00008 //# option) any later version.
00009 //#
00010 //# This library is distributed in the hope that it will be useful, but WITHOUT
00011 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
00012 //# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
00013 //# License for more details.
00014 //#
00015 //# You should have received a copy of the GNU Library General Public License
00016 //# along with this library; if not, write to the Free Software Foundation,
00017 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
00018 //#
00019 //# Correspondence concerning AIPS++ should be addressed as follows:
00020 //#        Internet email: aips2-request@nrao.edu.
00021 //#        Postal address: AIPS++ Project Office
00022 //#                        National Radio Astronomy Observatory
00023 //#                        520 Edgemont Road
00024 //#                        Charlottesville, VA 22903-2475 USA
00025 //#
00026 //#
00027 //# $Id: Sort.h 21089 2011-05-19 03:51:34Z gervandiepen $
00028 
00029 #ifndef CASA_SORT_H
00030 #define CASA_SORT_H
00031 
00032 //# Includes
00033 #include <casa/aips.h>
00034 #include <casa/Utilities/ValType.h>
00035 #include <casa/Containers/Block.h>
00036 #include <casa/Utilities/Compare.h>
00037 #include <casa/Utilities/CountedPtr.h>
00038 
00039 namespace casa { //# NAMESPACE CASA - BEGIN
00040 
00041 //# Forward Declarations
00042 template<class T> class Vector;
00043 
00044 // <summary> Define a Sort key </summary>
00045 // <use visibility=local>
00046 // <reviewed reviewer="Friso Olnon" date="1995/03/01" tests="tSort, tSort_1">
00047 // </reviewed>
00048 
00049 // <synopsis>
00050 // SortKey is a helper class for the <linkto class=Sort>Sort</linkto> class.
00051 // It holds the following information about a sort key:
00052 // <ul>
00053 //  <li> Address of the data array containing the sort key;
00054 //  <li> A CountedPtr to a comparison object to be used
00055 //       (of a class derived from the abstract base class BaseCompare).
00056 //  <li> Increment for the next data point -- this lets you specify a
00057 //       stride for keys embedded in a struct;
00058 //  <li> Sort order -- ascending or descending;
00059 // </ul>
00060 // </synopsis> 
00061 
00062 class SortKey
00063 {
00064 public:
00065     friend class Sort;
00066 
00067     // Define a sort key in a given data array using the indicated
00068     // comparison object, stride and sort order.
00069     SortKey (const void* data, const CountedPtr<BaseCompare>&,
00070              uInt increment, int order);
00071 
00072     // Copy constructor (copy semantics).
00073     SortKey (const SortKey&);
00074 
00075     ~SortKey();
00076 
00077     // Assignment (copy semantics).
00078     SortKey& operator= (const SortKey&);
00079 
00080     // Try if GenSort can be used for this single key.
00081     // If it succeeds, it returns the resulting number of elements.
00082     // Otherwise it returns 0.
00083     uInt tryGenSort (Vector<uInt>& indexVector, uInt nrrec, int opt) const;
00084 
00085 protected:
00086     // sort order; -1 = ascending, 1 = descending
00087     int               order_p;
00088     // address of first data point
00089     const void*       data_p;
00090     // increment for next data point
00091     uInt              incr_p;
00092     // comparison object; use CountedPtr for memory management
00093     CountedPtr<BaseCompare> ccmpObj_p;
00094     // comparison object; use raw ponter for performance
00095     BaseCompare* cmpObj_p;
00096 };
00097 
00098 
00099 
00100 // <summary> Sort on one or more keys, ascending and/or descending </summary>
00101 // <use visibility=export>
00102 // <reviewed reviewer="Friso Olnon" date="1995/03/01" tests="tSort, tSort_1">
00103 // </reviewed>
00104 
00105 // <synopsis>
00106 // <src>Sort</src> lets you sort data on one or more keys in a mix of
00107 // <src>Sort::ascending</src> and <src>Sort::descending</src> order.
00108 // Duplicates can be skipped by giving the option
00109 // <src>Sort::NoDuplicates</src>. Only in this case the number of output
00110 // elements can be different from the number of input elements.
00111 // <br>The <src>unique</src> function offers another way of getting
00112 // unique values.
00113 // <p>
00114 // Class <src>Sort</src> does not sort the data themselves, but
00115 // returns an index to them. This gives more flexibility and
00116 // allows the sort to be stable; but it is slower.
00117 // <br>Very fast sorting of the data themselves can be done with the
00118 // functions in class <linkto class=GenSort>GenSort</linkto>.
00119 // If sorting on a single key with a standard data type is done,
00120 // Sort will use GenSortIndirect to speed up the sort.
00121 // <br>
00122 // Three sort algorithms are provided:
00123 // <DL>
00124 //  <DT> <src>Sort::InsSort</src>
00125 //  <DD> Insertion sort has O(n*n) behaviour, thus is very slow.  It
00126 //       will only be very fast if the array is already (almost) in the
00127 //       right order.
00128 //  <DT> <src>Sort::QuickSort</src>
00129 //  <DD> Care has been taken to solve the well-known quicksort problems
00130 //       like "array already in order" or "many equal elements".  The
00131 //       behaviour is O(n*log(n)) in all the cases tested, even in
00132 //       degenerated cases where the SUN Solaris qsort algorithm is O(n*n).
00133 //  <DT> <src>Sort::HeapSort</src>
00134 //  <DD> Heapsort has O(n*log(n)) behaviour. Its speed is lower than
00135 //       that of QuickSort, so QuickSort is the default algorithm.
00136 // </DL>
00137 // All sort algorithms are <em>stable</em>, which means that the original
00138 // order is kept when keys are equal.
00139 //
00140 // The sort is a four step process:
00141 // <ol>
00142 //  <li> Construct the <src>Sort</src> object.
00143 //  <li> Define the sort keys. The function <src>sortKey</src> must be
00144 //       called for each sort key (the most significant one first).
00145 //       The comparison object can be passed in directly, or a 
00146 //       <linkto group="DataType.h#DataType">basic data type</linkto>
00147 //       can be given. In the latter case the appropriate ObjCompare
00148 //       comparison object will be created.
00149 //  <li> Sort the data. The function <src>sort</src> returns an index
00150 //       array, which is allocated when needed.
00151 //  <li> Destruct the <src>Sort</src> object (usually done automatically)
00152 //       and delete the index array.
00153 // </ol>
00154 // The data can be in a single array of structs, in separate arrays, or
00155 // in a mix of those. Of course, all arrays must have the same length.
00156 // The data can be passed to the <src>Sort</src> constructor and/or to the
00157 // <src>sortKey</src> function. If passed to the <src>Sort</src> constructor,
00158 // the offset of the data item in the data array must be given to
00159 // <src>sortKey</src>.
00160 // </synopsis>
00161 
00162 // <example>
00163 // In the first example we sort the data contained in two "parallel"
00164 // arrays, <src>idata</src> and <src>ddata</src>, both of length
00165 // <src>nrdata</src>.
00166 // <srcblock>
00167 //    Sort sort;
00168 //    sort.sortKey (idata, TpInt);                       // define 1st sort key
00169 //    sort.sortKey (ddata, TpDouble,0,Sort::Descending); // define 2nd sort key
00170 //    Vector<uInt> inx;
00171 //    sort.sort (inx, nrdata);
00172 //    for (uInt i=0; i<nrdata; i++) {                    // show sorted data
00173 //        cout << idata[inx[i]] << " " << ddata[inx[i]] << endl;
00174 //    }
00175 // </srcblock>
00176 // Now <src>nr</src> contains the nr of records (=<src>nrdata</src>)
00177 // and <src>inx</src> an array of (sorted) indices.
00178 //
00179 // In the second example we sort the data stored in an array of structs
00180 // on the double (ascending) and the string (descending). We can pass
00181 // the data to the <src>Sort</src> constructor, and the offsets of the
00182 // struct elements to the <src>sortKey</src> function.
00183 // <srcblock>
00184 //    struct Ts {
00185 //         String as;
00186 //         double ad;
00187 //    }
00188 //    Vector<uInt> inx;
00189 //    Sort sort (tsarr, sizeof(Ts));
00190 //    sort.sortKey ((char*)&tsarr[0].ad - (char*)tsarr, TpDouble);
00191 //    sort.sortKey ((char*)&tsarr[0].as - (char*)tsarr, TpString,
00192 //                                                       Sort::Descending);
00193 //    sort.sort (inx, nrts);
00194 // </srcblock>
00195 // Note that the first argument in function <src>sortKey</src> gives
00196 // the offset of the variable in the struct.
00197 //
00198 // Alternatively, and probably slightly easier, we could pass the data
00199 // to the <src>sortKey</src> function and use an increment:
00200 // <srcblock>
00201 //    struct Ts {
00202 //         String as;
00203 //         double ad;
00204 //    }
00205 //    Vector<uInt> inx;
00206 //    Sort sort;
00207 //    sort.sortKey (&tsarr[0].ad, TpDouble, sizeof(Ts));
00208 //    sort.sortKey (&tsarr[0].as, TpString, sizeof(Ts), Sort::Descending);
00209 //    sort.sort (inx, nrts);
00210 // </srcblock>
00211 //
00212 // Finally, we could provide a comparison object for the struct.
00213 // <srcblock>
00214 //    struct Ts {
00215 //         String as;
00216 //         double ad;
00217 //    }
00218 //    class MyCompare: public BaseCompare {
00219 //      virtual int comp (const void* val1, const void* val2) const
00220 //      {
00221 //        const Ts& t1 = *(Ts*)val1;
00222 //        const Ts& t2 = *(Ts*)val2;
00223 //        if (t1.ad < t2.ad) return -1;
00224 //        if (t1.ad > t2.ad) return 1;
00225 //        if (t1.as < t2.as) return 1;    // string must be descending
00226 //        if (t1.as > t2.as) return -1;
00227 //        return 0;
00228 //      }
00229 //    };
00230 //    Vector<uInt> inx;
00231 //    Sort sort;
00232 //    sort.sortKey (tsarr, compareTs, sizeof(Ts)); 
00233 //    sort.sort (inx, nrts);
00234 // </srcblock>
00235 
00236 class Sort
00237 {
00238 public:
00239     // Enumerate the sort options:
00240     enum Option {HeapSort=1,               // use Heapsort algorithm
00241                  InsSort=2,                // use insertion sort algorithm
00242                  QuickSort=4,              // use Quicksort algorithm
00243                  NoDuplicates=8};          // skip data with equal sort keys
00244 
00245     // Enumerate the sort order:
00246     enum Order {Ascending=-1,
00247                 Descending=1};
00248 
00249     // The default constructor can be used when the data is only passed
00250     // in via function <src>sortKey</src>.
00251     Sort();
00252 
00253     // Construct a Sort object for the given data array with elements
00254     // of <src>elementSize</src> bytes.  This data array will be used
00255     // when an offset is given to the <src>sortKey</src> functions.
00256     // You can still pass additional data arrays to the
00257     // <src>sortKey</src> functions.
00258     Sort (const void* data, uInt elementSize);
00259 
00260     // Copy constructor (copy semantics).
00261     Sort (const Sort&);
00262 
00263     ~Sort();
00264 
00265     // Assignment (copy semantics).
00266     Sort& operator= (const Sort&);
00267 
00268     // Define a sort key (the most significant key should be defined first).
00269     // The key contains:
00270     // <ul>
00271     // <li> A pointer to the start of the data array. --- When structs are
00272     //   sorted on an element in the struct, the pointer must point to
00273     //   that element in the first struct.
00274     // <li> A pointer to the comparison object to be used. --- The
00275     //   comparison object can be specified in two ways:
00276     //   <ul>
00277     //   <li> by giving a
00278     //     <linkto group="DataType.h#DataType">basic data type</linkto>,
00279     //     in which case the appropriate comparison object will be
00280     //     created automatically, or
00281     //   <li> by a CountedPtr of a comparison object.
00282     //     You may want to use the templated comparison classes
00283     //     <linkto class=ObjCompare>ObjCompare</linkto>(),
00284     //     but you are free to use any other class derived from BaseCompare
00285     //     that implements the <src>comp</src> function.
00286     //   </ul>
00287     // <li> The increment from one data element to the next. --- When
00288     //   structs are sorted on an element in the struct, the increment
00289     //   should be the size of the struct. If the comparison object is
00290     //   automatically created using the data type specified, the default
00291     //   increment is the size of the data type.
00292     // <li> The sort order. --- <src>Ascending</src> (default) or 
00293     //   <src>Descending</src>;
00294     // </ul>
00295     //
00296     // When the data array has been passed to the Sort constructor,
00297     // the data pointer and the increment arguments can be replaced by a
00298     // single argument: the offset of the key in each element of the array.
00299     //
00300     // <group>
00301     void sortKey (const void* data, DataType, uInt increment = 0,
00302                   Order = Ascending);
00303     void sortKey (const void* data, const CountedPtr<BaseCompare>&,
00304                   uInt increment, Order = Ascending);
00305     void sortKey (uInt offset, DataType, Order = Ascending);
00306     void sortKey (uInt offset, const CountedPtr<BaseCompare>&,
00307                   Order = Ascending);
00308     // </group>
00309 
00310     // Sort the data array of <src>nrrec</src> records.
00311     // The result is an array of indices giving the requested order.
00312     // It returns the number of resulting records. The indices array
00313     // is resized to that number.
00314     uInt sort (Vector<uInt>& indexVector, uInt nrrec,
00315                int options = QuickSort) const;
00316 
00317     // Get all unique records in a sorted array. The array order is
00318     // given in the indexVector (as possibly returned by the sort function).
00319     // The default indexVector is 0..nrrec-1.
00320     // The index of each first unique record is returned in the uniqueVector.
00321     // They are indices in the supplied indexVector, so
00322     // <src>data[indexVector(uniqueVector(i))]</src>
00323     // is giving the i-th unique record.
00324     // Note that the records indexed by <src>indexVector(uniqueVector(i))</src>
00325     // till <src>indexVector(uniqueVector(i+1))</src> are all the same.
00326     // <br>
00327     // It returns the number of unique records. The unique array
00328     // is resized to that number.
00329     // <group>
00330     uInt unique (Vector<uInt>& uniqueVector, uInt nrrec) const;
00331     uInt unique (Vector<uInt>& uniqueVector,
00332                  const Vector<uInt>& indexVector) const;
00333     // </group>
00334 
00335 private:
00336     // Copy that Sort object to this.
00337     void copy (const Sort& that);
00338 
00339     // Add a sort key giving a data type or the sort key.
00340     // <group>
00341     void addKey (const void* data, DataType, uInt nr, int options);
00342     void addKey (SortKey*);
00343     // </group>
00344 
00345     // Do an insertion sort, optionally skipping duplicates.
00346     // <group>
00347     uInt insSort (uInt nr, uInt* indices) const;
00348     uInt insSortNoDup (uInt nr, uInt* indices) const;
00349     // </group>
00350 
00351     // Do a quicksort, optionally skipping duplicates
00352     // (qkSort is the actual quicksort function).
00353     // <group>
00354     uInt quickSort (uInt nr, uInt* indices) const;
00355     uInt quickSortNoDup (uInt nr, uInt* indices) const;
00356     void qkSort (Int nr, uInt* indices) const;
00357     // </group>
00358 
00359     // Do a heapsort, optionally skipping duplicates.
00360     // <group>
00361     uInt heapSort (uInt nr, uInt* indices) const;
00362     uInt heapSortNoDup (uInt nr, uInt* indices) const;
00363     // </group>
00364 
00365     // Siftdown algorithm for heapsort.
00366     void siftDown (Int low, Int up, uInt* indices) const;
00367 
00368     // Compare the keys of 2 records.
00369     int compare (uInt index1, uInt index2) const;
00370 
00371     // Swap 2 indices.
00372     inline void swap (Int index1, Int index2, uInt* indices) const;
00373 
00374 
00375     PtrBlock<SortKey*> keys_p;                    //# keys to sort on
00376     uInt               nrkey_p;                   //# #sort-keys
00377     const void*        data_p;                    //# pointer to data records
00378     uInt               size_p;                    //# size of data record
00379 };
00380 
00381 
00382 
00383 inline void Sort::swap (Int i, Int j, uInt* inx) const
00384 {
00385     uInt t = inx[i];
00386     inx[i] = inx[j];
00387     inx[j] = t;
00388 }
00389 
00390 
00391 
00392 } //# NAMESPACE CASA - END
00393 
00394 #endif