Sort.h

Go to the documentation of this file.
00001 //# Sort.h: Sort objects on one or more keys
00002 //# Copyright (C) 1995,1996,1997,1998,1999,2000,2001
00003 //# Associated Universities, Inc. Washington DC, USA.
00004 //#
00005 //# This library is free software; you can redistribute it and/or modify it
00006 //# under the terms of the GNU Library General Public License as published by
00007 //# the Free Software Foundation; either version 2 of the License, or (at your
00008 //# option) any later version.
00009 //#
00010 //# This library is distributed in the hope that it will be useful, but WITHOUT
00011 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
00012 //# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
00013 //# License for more details.
00014 //#
00015 //# You should have received a copy of the GNU Library General Public License
00016 //# along with this library; if not, write to the Free Software Foundation,
00017 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
00018 //#
00019 //# Correspondence concerning AIPS++ should be addressed as follows:
00020 //#        Internet email: aips2-request@nrao.edu.
00021 //#        Postal address: AIPS++ Project Office
00022 //#                        National Radio Astronomy Observatory
00023 //#                        520 Edgemont Road
00024 //#                        Charlottesville, VA 22903-2475 USA
00025 //#
00026 //#
00027 //# $Id$
00028 
00029 #ifndef CASA_SORT_H
00030 #define CASA_SORT_H
00031 
00032 //# Includes
00033 #include <casa/aips.h>
00034 #include <casa/Utilities/ValType.h>
00035 #include <casa/Containers/Block.h>
00036 #include <casa/Utilities/Compare.h>
00037 
00038 namespace casa { //# NAMESPACE CASA - BEGIN
00039 
00040 //# Forward Declarations
00041 class AipsIO;
00042 template<class T> class Vector;
00043 
00044 // <summary> Define a Sort key </summary>
00045 // <use visibility=local>
00046 // <reviewed reviewer="Friso Olnon" date="1995/03/01" tests="tSort, tSort_1">
00047 // </reviewed>
00048 
00049 // <synopsis>
00050 // SortKey is a helper class for the <linkto class=Sort>Sort</linkto> class.
00051 // It holds the following information about a sort key:
00052 // <ul>
00053 //  <li> Address of the data array containing the sort key;
00054 //  <li> Address of the comparison function to be used -- this must be
00055 //       a function with the signature 
00056 //       <linkto group="Compare.h#ObjCompareFunc">ObjCompareFunc</linkto>;
00057 //  <li> Increment for the next data point -- this lets you specify a
00058 //       stride for keys embedded in a struct;
00059 //  <li> Sort order -- ascending or descending;
00060 // </ul>
00061 // </synopsis> 
00062 
00063 class SortKey
00064 {
00065 public:
00066     friend class Sort;
00067 
00068     // Define a sort key in a given data array using the indicated
00069     // comparison function, stride and sort order.
00070     SortKey (const void* data, ObjCompareFunc*, uInt increment, int order);
00071 
00072     // Copy constructor (copy semantics).
00073     SortKey (const SortKey&);
00074 
00075     ~SortKey();
00076 
00077     // Assignment (copy semantics).
00078     SortKey& operator= (const SortKey&);
00079 
00080     // Try if GenSort can be used for this single key.
00081     // If it succeeds, it returns the resulting number of elements.
00082     // Otherwise it returns 0.
00083     uInt tryGenSort (Vector<uInt>& indexVector, uInt nrrec, int opt) const;
00084 
00085 protected:
00086     // sort order; -1 = ascending, 1 = descending
00087     int               order_p;
00088     // address of first data point
00089     const void*       data_p;
00090     // increment for next data point
00091     uInt              incr_p;
00092     // ptr to comparison routine
00093     ObjCompareFunc*   cmpFunc_p;
00094 };
00095 
00096 
00097 
00098 // <summary> Sort on one or more keys, ascending and/or descending </summary>
00099 // <use visibility=export>
00100 // <reviewed reviewer="Friso Olnon" date="1995/03/01" tests="tSort, tSort_1">
00101 // </reviewed>
00102 
00103 // <synopsis>
00104 // <src>Sort</src> lets you sort data on one or more keys in a mix of
00105 // <src>Sort::ascending</src> and <src>Sort::descending</src> order.
00106 // Duplicates can be skipped by giving the option
00107 // <src>Sort::NoDuplicates</src>. Only in this case the number of output
00108 // elements can be different from the number of input elements.
00109 // <br>The <src>unique</src> offers another way of getting unique values,
00110 // <p>
00111 // Class <src>Sort</src> does not sort the data themselves, but
00112 // returns an index to them. This gives more flexibility and
00113 // allows the sort to be stable; but it is slower.
00114 // Very fast sorting of the data themselves can be done with the
00115 // functions in class <linkto class=GenSort>GenSort</linkto>.
00116 // <br>
00117 // Three sort algorithms are provided:
00118 // <DL>
00119 //  <DT> <src>Sort::InsSort</src>
00120 //  <DD> Insertion sort has O(n*n) behaviour, thus is very slow.  It
00121 //       will only be very fast when the array is already (almost) in the
00122 //       right order.
00123 //  <DT> <src>Sort::QuickSort</src>
00124 //  <DD> Care has been taken to solve the well-known quicksort problems
00125 //       like "array already in order" or "many equal elements".  The
00126 //       behaviour is O(n*log(n)) in all the cases tested, even in
00127 //       degenerated cases where the SUN Solaris qsort algorithm is O(n*n).
00128 //  <DT> <src>Sort::HeapSort</src>
00129 //  <DD> Heapsort has O(n*log(n)) behaviour. Its speed is higher than
00130 //       that of QuickSort, so it is the default algorithm.
00131 // </DL>
00132 // All sort algorithms are <em>stable</em>, which means that the original
00133 // order is kept when keys are equal.
00134 //
00135 // The sort is a four step process:
00136 // <ol>
00137 //  <li> Construct the <src>Sort</src> object.
00138 //  <li> Define the sort keys. The function <src>sortKey</src> must be
00139 //       called for each sort key (the most significant one first).
00140 //       The comparison function can be passed in directly, or a 
00141 //       <linkto group="DataType.h#DataType">basic data type</linkto>
00142 //       can be given. In the latter case the appropriate comparison
00143 //       function will be selected.
00144 //  <li> Sort the data. The function <src>sort</src> returns an index
00145 //       array, which is allocated when needed.
00146 //  <li> Destruct the <src>Sort</src> object (usually done automatically)
00147 //       and delete the index array.
00148 // </ol>
00149 // The data can be in a single array of structs, in separate arrays, or
00150 // in a mix of those. Of course, all arrays must have the same length.
00151 // The data can be passed to the <src>Sort</src> constructor and/or to the
00152 // <src>sortKey</src> function. If passed to the <src>Sort</src> constructor,
00153 // the offset of the data item in the data array must be given to
00154 // <src>sortKey</src>.
00155 // </synopsis>
00156 
00157 // <example>
00158 // In the first example we sort the data contained in two "parallel"
00159 // arrays, <src>idata</src> and <src>ddata</src>, both of length
00160 // <src>nrdata</src>.
00161 // <srcblock>
00162 //    Sort sort;
00163 //    sort.sortKey (idata, TpInt);                       // define 1st sort key
00164 //    sort.sortKey (ddata, TpDouble,0,Sort::Descending); // define 2nd sort key
00165 //    uInt* inx=0;
00166 //    sort.sort (nrdata, inx);
00167 //    for (uInt i=0; i<nrdata; i++) {                    // show sorted data
00168 //        cout << idata[inx[i]] << " " << ddata[inx[i]] << endl;
00169 //    }
00170 //    delete inx;
00171 // </srcblock>
00172 // Now <src>nr</src> contains the nr of records (=<src>nrdata</src>)
00173 // and <src>inx</src> an array of (sorted) indices.
00174 // The index array <src>inx</src> has to be deleted by the user.
00175 //
00176 // In the second example we sort the data stored in an array of structs
00177 // on the double (ascending) and the string (descending). We can pass
00178 // the data to the <src>Sort</src> constructor, and the offsets of the
00179 // struct elements to the <src>sortKey</src> function.
00180 // <srcblock>
00181 //    struct Ts {
00182 //         String as;
00183 //         double ad;
00184 //    }
00185 //    uInt* inx=0;
00186 //    Sort sort (tsarr, sizeof(Ts));
00187 //    sort.sortKey ((char*)&tsarr[0].ad - (char*)tsarr, TpDouble);
00188 //    sort.sortKey ((char*)&tsarr[0].as - (char*)tsarr, TpString,
00189 //                                                       Sort::Descending);
00190 //    sort.sort (nrts, inx);
00191 // </srcblock>
00192 // Note that the first argument in function <src>sortKey</src> gives
00193 // the offset of the variable in the struct.
00194 //
00195 // Alternatively, and probably slightly easier, we could pass the data
00196 // to the <src>sortKey</src> function:
00197 // <srcblock>
00198 //    struct Ts {
00199 //         String as;
00200 //         double ad;
00201 //    }
00202 //    uInt* inx=0;
00203 //    Sort sort;
00204 //    sort.sortKey (&tsarr[0].ad, TpDouble, sizeof(Ts));
00205 //    sort.sortKey (&tsarr[0].as, TpString, sizeof(Ts), Sort::Descending);
00206 //    sort.sort (nrts, inx);
00207 // </srcblock>
00208 //
00209 // Finally, we could provide a comparison function for the struct.
00210 // (The function is shown inline, but should be implemented out-of-line.)
00211 // This method is not recommended, because it means more work for the user.
00212 // Moreover, the descending sort on the string has to be coded in the
00213 // comparison function, which is not very flexible.
00214 // <srcblock>
00215 //    struct Ts {
00216 //         String as;
00217 //         double ad;
00218 //    }
00219 //    int compareTs (const void* val1, const void* val2)
00220 //    {
00221 //        const Ts& t1 = *(Ts*)val1;
00222 //        const Ts& t2 = *(Ts*)val2;
00223 //        if (t1.ad < t2.ad) return -1;
00224 //        if (t1.ad > t2.ad) return 1;
00225 //        if (t1.as < t2.as) return 1;    // string must be descending
00226 //        if (t1.as > t2.as) return -1;
00227 //        return 0;
00228 //    }
00229 //    uInt* inx=0;
00230 //    Sort sort;
00231 //    sort.sortKey (tsarr, compareTs, sizeof(Ts)); 
00232 //    sort.sort (nrts, inx);
00233 // </srcblock>
00234 //
00235 // The last example illustrates the use of the
00236 // <src>Sort::NoDuplicates</src> flag and an input index array.
00237 // First we remove duplicate strings, and then sort the result.
00238 // <srcblock>
00239 //    struct Ts {
00240 //         String as;
00241 //         double ad;
00242 //    }
00243 //    uInt* inxNoDup=0;
00244 //    Sort sort;
00245 //    sort.sortKey (&tsarr[0].as, TpString, sizeof(Ts));
00246 //    uInt nrout = sort.sort (nrts, inxNoDup, Sort::NoDuplicates);
00247 //    Sort sort2;
00248 //    sort2.sortKey (&tsarr[0].ad, TpDouble, sizeof(Ts));
00249 //    sort2.sortKey (&tsarr[0].as, TpString, sizeof(Ts), Sort::Descending);
00250 //    uInt* inx=0;
00251 //    sort2.sort (nrout, inxNoDup, inx);
00252 // </srcblock>
00253 // </example>
00254 
00255 
00256 class Sort
00257 {
00258 public:
00259     // Enumerate the sort options:
00260     enum Option {HeapSort=1,               // use Heapsort algorithm
00261                  InsSort=2,                // use insertion sort algorithm
00262                  QuickSort=4,              // use Quicksort algorithm
00263                  NoDuplicates=8};          // skip data with equal sort keys
00264 
00265     // Enumerate the sort order:
00266     enum Order {Ascending=-1,
00267                 Descending=1};
00268 
00269     // The default constructor can be used when the data is only passed
00270     // in via function <src>sortKey</src>.
00271     Sort();
00272 
00273     // Construct a Sort object for the given data array with elements
00274     // of <src>elementSize</src> bytes.  This data array will be used
00275     // when an offset is given to the <src>sortKey</src> functions.
00276     // You can still pass additional data arrays to the
00277     // <src>sortKey</src> functions.
00278     Sort (const void* data, uInt elementSize);
00279 
00280     // Copy constructor (copy semantics).
00281     Sort (const Sort&);
00282 
00283     ~Sort();
00284 
00285     // Assignment (copy semantics).
00286     Sort& operator= (const Sort&);
00287 
00288     // Define a sort key (the most significant key should be defined first).
00289     // The key contains:
00290     // <ul>
00291     // <li> A pointer to the start of the data array. --- When structs are
00292     //   sorted on an element in the struct, the pointer must point to
00293     //   that element in the first struct.
00294     // <li> A pointer to the comparison function to be used. --- The
00295     //   comparison function can be specified in two ways:
00296     //   <ul>
00297     //   <li> by giving a
00298     //     <linkto group="DataType.h#DataType">basic data type</linkto>,
00299     //     in which case the appropriate comparison function will be
00300     //     selected automatically, or
00301     //   <li> by pointing directly to a comparison function with the
00302     //     signature
00303     //     <linkto group="Compare.h#ObjCompareFunc">ObjCompareFunc</linkto>.
00304     //     You may want to use the templated comparison function
00305     //     <linkto class=ObjCompare>ObjCompare::compare</linkto>(),
00306     //     but you are free to use any other function that can be called as:
00307     //     <ul>
00308     //     <li> int (const void* value1, const void* value2)
00309     //     </ul>
00310     //     and returns:
00311     //     <ul>
00312     //     <li> -1  if value1 &lt; value2
00313     //     <li>  1  if value1 &gt; value2
00314     //     <li>  0  if value1 == value2.
00315     //     </ul>
00316     //   </ul>
00317     // <li> The increment from one data element to the next. --- When
00318     //   structs are sorted on an element in the struct, the increment
00319     //   should be the size of the struct. When the comparison function is
00320     //   automatically determined from the data type specified, the default
00321     //   increment is the size of the data type.
00322     // <li> The sort order. --- <src>Ascending</src> (default) or 
00323     //   <src>Descending</src>;
00324     // </ul>
00325     //
00326     // When the data array has been passed to the Sort constructor,
00327     // the data pointer and the increment arguments can be replaced by a
00328     // single argument: the offset of the key in each element of the array.
00329     //
00330     // <group>
00331     void sortKey (const void* data, DataType, uInt increment = 0,
00332                   Order = Ascending);
00333     void sortKey (const void* data, ObjCompareFunc*, uInt increment,
00334                   Order = Ascending);
00335     void sortKey (uInt offset, DataType, Order = Ascending);
00336     void sortKey (uInt offset, ObjCompareFunc*, Order = Ascending);
00337     // </group>
00338 
00339     // Sort the data array of <src>nrrec</src> records.
00340     // The result is an array of indices giving the requested order.
00341     // It returns the number of resulting records. The indices array
00342     // is resized to that number.
00343     uInt sort (Vector<uInt>& indexVector, uInt nrrec,
00344                int options = HeapSort) const;
00345 
00346     // Get all unique records in a sorted array. The array order is
00347     // given in the indexVector (as possibly returned by the sort function).
00348     // The default indexVector is 0..nrrec-1.
00349     // The index of each first unique record is returned in the uniqueVector.
00350     // They are indices in the supplied indexVector, so
00351     // <src>data[indexVector(uniqueVector(i))]</src>
00352     // is giving the i-th unique record.
00353     // Note that the records indexed by <src>indexVector(uniqueVector(i))</src>
00354     // till <src>indexVector(uniqueVector(i+1))</src> are all the same.
00355     // <br>
00356     // It returns the number of unique records. The unique array
00357     // is resized to that number.
00358     // <group>
00359     uInt unique (Vector<uInt>& uniqueVector, uInt nrrec) const;
00360     uInt unique (Vector<uInt>& uniqueVector,
00361                  const Vector<uInt>& indexVector) const;
00362     // </group>
00363 
00364 private:
00365     // Copy that Sort object to this.
00366     void copy (const Sort& that);
00367 
00368     // Add a sort key giving a data type or a compare function.
00369     // <group>
00370     void addKey (const void* data, DataType, uInt nr, int options);
00371     void addKey (const void* data, ObjCompareFunc*, uInt nr, int options);
00372     // </group>
00373 
00374     // Do an insertion sort, optionally skipping duplicates.
00375     // <group>
00376     uInt insSort (uInt nr, uInt* indices) const;
00377     uInt insSortNoDup (uInt nr, uInt* indices) const;
00378     // </group>
00379 
00380     // Do a quicksort, optionally skipping duplicates
00381     // (qkSort is the actual quicksort function).
00382     // <group>
00383     uInt quickSort (uInt nr, uInt* indices) const;
00384     uInt quickSortNoDup (uInt nr, uInt* indices) const;
00385     void qkSort (Int nr, uInt* indices) const;
00386     // </group>
00387 
00388     // Do a heapsort, optionally skipping duplicates.
00389     // <group>
00390     uInt heapSort (uInt nr, uInt* indices) const;
00391     uInt heapSortNoDup (uInt nr, uInt* indices) const;
00392     // </group>
00393 
00394     // Siftdown algorithm for heapsort.
00395     void siftDown (Int low, Int up, uInt* indices) const;
00396 
00397     // Compare the keys of 2 records.
00398     int compare (uInt index1, uInt index2) const;
00399 
00400     // Swap 2 indices.
00401     inline void swap (Int index1, Int index2, uInt* indices) const;
00402 
00403 
00404     Block<void*>    keys_p;                       //# keys to sort on
00405     uInt            nrkey_p;                      //# #sort-keys
00406     const void*     data_p;                       //# pointer to data records
00407     uInt            size_p;                       //# size of data record
00408 };
00409 
00410 
00411 
00412 inline void Sort::swap (Int i, Int j, uInt* inx) const
00413 {
00414     uInt t = inx[i];
00415     inx[i] = inx[j];
00416     inx[j] = t;
00417 }
00418 
00419 
00420 
00421 } //# NAMESPACE CASA - END
00422 
00423 #endif

Generated on Mon Sep 1 22:34:00 2008 for NRAOCASA by  doxygen 1.5.1