casa
$Rev:20696$
|
00001 //# Sort.h: Sort objects on one or more keys 00002 //# Copyright (C) 1995,1996,1997,1998,1999,2000,2001 00003 //# Associated Universities, Inc. Washington DC, USA. 00004 //# 00005 //# This library is free software; you can redistribute it and/or modify it 00006 //# under the terms of the GNU Library General Public License as published by 00007 //# the Free Software Foundation; either version 2 of the License, or (at your 00008 //# option) any later version. 00009 //# 00010 //# This library is distributed in the hope that it will be useful, but WITHOUT 00011 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 00012 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public 00013 //# License for more details. 00014 //# 00015 //# You should have received a copy of the GNU Library General Public License 00016 //# along with this library; if not, write to the Free Software Foundation, 00017 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA. 00018 //# 00019 //# Correspondence concerning AIPS++ should be addressed as follows: 00020 //# Internet email: aips2-request@nrao.edu. 00021 //# Postal address: AIPS++ Project Office 00022 //# National Radio Astronomy Observatory 00023 //# 520 Edgemont Road 00024 //# Charlottesville, VA 22903-2475 USA 00025 //# 00026 //# 00027 //# $Id: Sort.h 21089 2011-05-19 03:51:34Z gervandiepen $ 00028 00029 #ifndef CASA_SORT_H 00030 #define CASA_SORT_H 00031 00032 //# Includes 00033 #include <casa/aips.h> 00034 #include <casa/Utilities/ValType.h> 00035 #include <casa/Containers/Block.h> 00036 #include <casa/Utilities/Compare.h> 00037 #include <casa/Utilities/CountedPtr.h> 00038 00039 namespace casa { //# NAMESPACE CASA - BEGIN 00040 00041 //# Forward Declarations 00042 template<class T> class Vector; 00043 00044 // <summary> Define a Sort key </summary> 00045 // <use visibility=local> 00046 // <reviewed reviewer="Friso Olnon" date="1995/03/01" tests="tSort, tSort_1"> 00047 // </reviewed> 00048 00049 // <synopsis> 00050 // SortKey is a helper class for the <linkto class=Sort>Sort</linkto> class. 00051 // It holds the following information about a sort key: 00052 // <ul> 00053 // <li> Address of the data array containing the sort key; 00054 // <li> A CountedPtr to a comparison object to be used 00055 // (of a class derived from the abstract base class BaseCompare). 00056 // <li> Increment for the next data point -- this lets you specify a 00057 // stride for keys embedded in a struct; 00058 // <li> Sort order -- ascending or descending; 00059 // </ul> 00060 // </synopsis> 00061 00062 class SortKey 00063 { 00064 public: 00065 friend class Sort; 00066 00067 // Define a sort key in a given data array using the indicated 00068 // comparison object, stride and sort order. 00069 SortKey (const void* data, const CountedPtr<BaseCompare>&, 00070 uInt increment, int order); 00071 00072 // Copy constructor (copy semantics). 00073 SortKey (const SortKey&); 00074 00075 ~SortKey(); 00076 00077 // Assignment (copy semantics). 00078 SortKey& operator= (const SortKey&); 00079 00080 // Try if GenSort can be used for this single key. 00081 // If it succeeds, it returns the resulting number of elements. 00082 // Otherwise it returns 0. 00083 uInt tryGenSort (Vector<uInt>& indexVector, uInt nrrec, int opt) const; 00084 00085 protected: 00086 // sort order; -1 = ascending, 1 = descending 00087 int order_p; 00088 // address of first data point 00089 const void* data_p; 00090 // increment for next data point 00091 uInt incr_p; 00092 // comparison object; use CountedPtr for memory management 00093 CountedPtr<BaseCompare> ccmpObj_p; 00094 // comparison object; use raw ponter for performance 00095 BaseCompare* cmpObj_p; 00096 }; 00097 00098 00099 00100 // <summary> Sort on one or more keys, ascending and/or descending </summary> 00101 // <use visibility=export> 00102 // <reviewed reviewer="Friso Olnon" date="1995/03/01" tests="tSort, tSort_1"> 00103 // </reviewed> 00104 00105 // <synopsis> 00106 // <src>Sort</src> lets you sort data on one or more keys in a mix of 00107 // <src>Sort::ascending</src> and <src>Sort::descending</src> order. 00108 // Duplicates can be skipped by giving the option 00109 // <src>Sort::NoDuplicates</src>. Only in this case the number of output 00110 // elements can be different from the number of input elements. 00111 // <br>The <src>unique</src> function offers another way of getting 00112 // unique values. 00113 // <p> 00114 // Class <src>Sort</src> does not sort the data themselves, but 00115 // returns an index to them. This gives more flexibility and 00116 // allows the sort to be stable; but it is slower. 00117 // <br>Very fast sorting of the data themselves can be done with the 00118 // functions in class <linkto class=GenSort>GenSort</linkto>. 00119 // If sorting on a single key with a standard data type is done, 00120 // Sort will use GenSortIndirect to speed up the sort. 00121 // <br> 00122 // Three sort algorithms are provided: 00123 // <DL> 00124 // <DT> <src>Sort::InsSort</src> 00125 // <DD> Insertion sort has O(n*n) behaviour, thus is very slow. It 00126 // will only be very fast if the array is already (almost) in the 00127 // right order. 00128 // <DT> <src>Sort::QuickSort</src> 00129 // <DD> Care has been taken to solve the well-known quicksort problems 00130 // like "array already in order" or "many equal elements". The 00131 // behaviour is O(n*log(n)) in all the cases tested, even in 00132 // degenerated cases where the SUN Solaris qsort algorithm is O(n*n). 00133 // <DT> <src>Sort::HeapSort</src> 00134 // <DD> Heapsort has O(n*log(n)) behaviour. Its speed is lower than 00135 // that of QuickSort, so QuickSort is the default algorithm. 00136 // </DL> 00137 // All sort algorithms are <em>stable</em>, which means that the original 00138 // order is kept when keys are equal. 00139 // 00140 // The sort is a four step process: 00141 // <ol> 00142 // <li> Construct the <src>Sort</src> object. 00143 // <li> Define the sort keys. The function <src>sortKey</src> must be 00144 // called for each sort key (the most significant one first). 00145 // The comparison object can be passed in directly, or a 00146 // <linkto group="DataType.h#DataType">basic data type</linkto> 00147 // can be given. In the latter case the appropriate ObjCompare 00148 // comparison object will be created. 00149 // <li> Sort the data. The function <src>sort</src> returns an index 00150 // array, which is allocated when needed. 00151 // <li> Destruct the <src>Sort</src> object (usually done automatically) 00152 // and delete the index array. 00153 // </ol> 00154 // The data can be in a single array of structs, in separate arrays, or 00155 // in a mix of those. Of course, all arrays must have the same length. 00156 // The data can be passed to the <src>Sort</src> constructor and/or to the 00157 // <src>sortKey</src> function. If passed to the <src>Sort</src> constructor, 00158 // the offset of the data item in the data array must be given to 00159 // <src>sortKey</src>. 00160 // </synopsis> 00161 00162 // <example> 00163 // In the first example we sort the data contained in two "parallel" 00164 // arrays, <src>idata</src> and <src>ddata</src>, both of length 00165 // <src>nrdata</src>. 00166 // <srcblock> 00167 // Sort sort; 00168 // sort.sortKey (idata, TpInt); // define 1st sort key 00169 // sort.sortKey (ddata, TpDouble,0,Sort::Descending); // define 2nd sort key 00170 // Vector<uInt> inx; 00171 // sort.sort (inx, nrdata); 00172 // for (uInt i=0; i<nrdata; i++) { // show sorted data 00173 // cout << idata[inx[i]] << " " << ddata[inx[i]] << endl; 00174 // } 00175 // </srcblock> 00176 // Now <src>nr</src> contains the nr of records (=<src>nrdata</src>) 00177 // and <src>inx</src> an array of (sorted) indices. 00178 // 00179 // In the second example we sort the data stored in an array of structs 00180 // on the double (ascending) and the string (descending). We can pass 00181 // the data to the <src>Sort</src> constructor, and the offsets of the 00182 // struct elements to the <src>sortKey</src> function. 00183 // <srcblock> 00184 // struct Ts { 00185 // String as; 00186 // double ad; 00187 // } 00188 // Vector<uInt> inx; 00189 // Sort sort (tsarr, sizeof(Ts)); 00190 // sort.sortKey ((char*)&tsarr[0].ad - (char*)tsarr, TpDouble); 00191 // sort.sortKey ((char*)&tsarr[0].as - (char*)tsarr, TpString, 00192 // Sort::Descending); 00193 // sort.sort (inx, nrts); 00194 // </srcblock> 00195 // Note that the first argument in function <src>sortKey</src> gives 00196 // the offset of the variable in the struct. 00197 // 00198 // Alternatively, and probably slightly easier, we could pass the data 00199 // to the <src>sortKey</src> function and use an increment: 00200 // <srcblock> 00201 // struct Ts { 00202 // String as; 00203 // double ad; 00204 // } 00205 // Vector<uInt> inx; 00206 // Sort sort; 00207 // sort.sortKey (&tsarr[0].ad, TpDouble, sizeof(Ts)); 00208 // sort.sortKey (&tsarr[0].as, TpString, sizeof(Ts), Sort::Descending); 00209 // sort.sort (inx, nrts); 00210 // </srcblock> 00211 // 00212 // Finally, we could provide a comparison object for the struct. 00213 // <srcblock> 00214 // struct Ts { 00215 // String as; 00216 // double ad; 00217 // } 00218 // class MyCompare: public BaseCompare { 00219 // virtual int comp (const void* val1, const void* val2) const 00220 // { 00221 // const Ts& t1 = *(Ts*)val1; 00222 // const Ts& t2 = *(Ts*)val2; 00223 // if (t1.ad < t2.ad) return -1; 00224 // if (t1.ad > t2.ad) return 1; 00225 // if (t1.as < t2.as) return 1; // string must be descending 00226 // if (t1.as > t2.as) return -1; 00227 // return 0; 00228 // } 00229 // }; 00230 // Vector<uInt> inx; 00231 // Sort sort; 00232 // sort.sortKey (tsarr, compareTs, sizeof(Ts)); 00233 // sort.sort (inx, nrts); 00234 // </srcblock> 00235 00236 class Sort 00237 { 00238 public: 00239 // Enumerate the sort options: 00240 enum Option {HeapSort=1, // use Heapsort algorithm 00241 InsSort=2, // use insertion sort algorithm 00242 QuickSort=4, // use Quicksort algorithm 00243 NoDuplicates=8}; // skip data with equal sort keys 00244 00245 // Enumerate the sort order: 00246 enum Order {Ascending=-1, 00247 Descending=1}; 00248 00249 // The default constructor can be used when the data is only passed 00250 // in via function <src>sortKey</src>. 00251 Sort(); 00252 00253 // Construct a Sort object for the given data array with elements 00254 // of <src>elementSize</src> bytes. This data array will be used 00255 // when an offset is given to the <src>sortKey</src> functions. 00256 // You can still pass additional data arrays to the 00257 // <src>sortKey</src> functions. 00258 Sort (const void* data, uInt elementSize); 00259 00260 // Copy constructor (copy semantics). 00261 Sort (const Sort&); 00262 00263 ~Sort(); 00264 00265 // Assignment (copy semantics). 00266 Sort& operator= (const Sort&); 00267 00268 // Define a sort key (the most significant key should be defined first). 00269 // The key contains: 00270 // <ul> 00271 // <li> A pointer to the start of the data array. --- When structs are 00272 // sorted on an element in the struct, the pointer must point to 00273 // that element in the first struct. 00274 // <li> A pointer to the comparison object to be used. --- The 00275 // comparison object can be specified in two ways: 00276 // <ul> 00277 // <li> by giving a 00278 // <linkto group="DataType.h#DataType">basic data type</linkto>, 00279 // in which case the appropriate comparison object will be 00280 // created automatically, or 00281 // <li> by a CountedPtr of a comparison object. 00282 // You may want to use the templated comparison classes 00283 // <linkto class=ObjCompare>ObjCompare</linkto>(), 00284 // but you are free to use any other class derived from BaseCompare 00285 // that implements the <src>comp</src> function. 00286 // </ul> 00287 // <li> The increment from one data element to the next. --- When 00288 // structs are sorted on an element in the struct, the increment 00289 // should be the size of the struct. If the comparison object is 00290 // automatically created using the data type specified, the default 00291 // increment is the size of the data type. 00292 // <li> The sort order. --- <src>Ascending</src> (default) or 00293 // <src>Descending</src>; 00294 // </ul> 00295 // 00296 // When the data array has been passed to the Sort constructor, 00297 // the data pointer and the increment arguments can be replaced by a 00298 // single argument: the offset of the key in each element of the array. 00299 // 00300 // <group> 00301 void sortKey (const void* data, DataType, uInt increment = 0, 00302 Order = Ascending); 00303 void sortKey (const void* data, const CountedPtr<BaseCompare>&, 00304 uInt increment, Order = Ascending); 00305 void sortKey (uInt offset, DataType, Order = Ascending); 00306 void sortKey (uInt offset, const CountedPtr<BaseCompare>&, 00307 Order = Ascending); 00308 // </group> 00309 00310 // Sort the data array of <src>nrrec</src> records. 00311 // The result is an array of indices giving the requested order. 00312 // It returns the number of resulting records. The indices array 00313 // is resized to that number. 00314 uInt sort (Vector<uInt>& indexVector, uInt nrrec, 00315 int options = QuickSort) const; 00316 00317 // Get all unique records in a sorted array. The array order is 00318 // given in the indexVector (as possibly returned by the sort function). 00319 // The default indexVector is 0..nrrec-1. 00320 // The index of each first unique record is returned in the uniqueVector. 00321 // They are indices in the supplied indexVector, so 00322 // <src>data[indexVector(uniqueVector(i))]</src> 00323 // is giving the i-th unique record. 00324 // Note that the records indexed by <src>indexVector(uniqueVector(i))</src> 00325 // till <src>indexVector(uniqueVector(i+1))</src> are all the same. 00326 // <br> 00327 // It returns the number of unique records. The unique array 00328 // is resized to that number. 00329 // <group> 00330 uInt unique (Vector<uInt>& uniqueVector, uInt nrrec) const; 00331 uInt unique (Vector<uInt>& uniqueVector, 00332 const Vector<uInt>& indexVector) const; 00333 // </group> 00334 00335 private: 00336 // Copy that Sort object to this. 00337 void copy (const Sort& that); 00338 00339 // Add a sort key giving a data type or the sort key. 00340 // <group> 00341 void addKey (const void* data, DataType, uInt nr, int options); 00342 void addKey (SortKey*); 00343 // </group> 00344 00345 // Do an insertion sort, optionally skipping duplicates. 00346 // <group> 00347 uInt insSort (uInt nr, uInt* indices) const; 00348 uInt insSortNoDup (uInt nr, uInt* indices) const; 00349 // </group> 00350 00351 // Do a quicksort, optionally skipping duplicates 00352 // (qkSort is the actual quicksort function). 00353 // <group> 00354 uInt quickSort (uInt nr, uInt* indices) const; 00355 uInt quickSortNoDup (uInt nr, uInt* indices) const; 00356 void qkSort (Int nr, uInt* indices) const; 00357 // </group> 00358 00359 // Do a heapsort, optionally skipping duplicates. 00360 // <group> 00361 uInt heapSort (uInt nr, uInt* indices) const; 00362 uInt heapSortNoDup (uInt nr, uInt* indices) const; 00363 // </group> 00364 00365 // Siftdown algorithm for heapsort. 00366 void siftDown (Int low, Int up, uInt* indices) const; 00367 00368 // Compare the keys of 2 records. 00369 int compare (uInt index1, uInt index2) const; 00370 00371 // Swap 2 indices. 00372 inline void swap (Int index1, Int index2, uInt* indices) const; 00373 00374 00375 PtrBlock<SortKey*> keys_p; //# keys to sort on 00376 uInt nrkey_p; //# #sort-keys 00377 const void* data_p; //# pointer to data records 00378 uInt size_p; //# size of data record 00379 }; 00380 00381 00382 00383 inline void Sort::swap (Int i, Int j, uInt* inx) const 00384 { 00385 uInt t = inx[i]; 00386 inx[i] = inx[j]; 00387 inx[j] = t; 00388 } 00389 00390 00391 00392 } //# NAMESPACE CASA - END 00393 00394 #endif