00001 //# Sort.h: Sort objects on one or more keys 00002 //# Copyright (C) 1995,1996,1997,1998,1999,2000,2001 00003 //# Associated Universities, Inc. Washington DC, USA. 00004 //# 00005 //# This library is free software; you can redistribute it and/or modify it 00006 //# under the terms of the GNU Library General Public License as published by 00007 //# the Free Software Foundation; either version 2 of the License, or (at your 00008 //# option) any later version. 00009 //# 00010 //# This library is distributed in the hope that it will be useful, but WITHOUT 00011 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 00012 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public 00013 //# License for more details. 00014 //# 00015 //# You should have received a copy of the GNU Library General Public License 00016 //# along with this library; if not, write to the Free Software Foundation, 00017 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA. 00018 //# 00019 //# Correspondence concerning AIPS++ should be addressed as follows: 00020 //# Internet email: aips2-request@nrao.edu. 00021 //# Postal address: AIPS++ Project Office 00022 //# National Radio Astronomy Observatory 00023 //# 520 Edgemont Road 00024 //# Charlottesville, VA 22903-2475 USA 00025 //# 00026 //# 00027 //# $Id$ 00028 00029 #ifndef CASA_SORT_H 00030 #define CASA_SORT_H 00031 00032 //# Includes 00033 #include <casa/aips.h> 00034 #include <casa/Utilities/ValType.h> 00035 #include <casa/Containers/Block.h> 00036 #include <casa/Utilities/Compare.h> 00037 00038 namespace casa { //# NAMESPACE CASA - BEGIN 00039 00040 //# Forward Declarations 00041 class AipsIO; 00042 template<class T> class Vector; 00043 00044 // <summary> Define a Sort key </summary> 00045 // <use visibility=local> 00046 // <reviewed reviewer="Friso Olnon" date="1995/03/01" tests="tSort, tSort_1"> 00047 // </reviewed> 00048 00049 // <synopsis> 00050 // SortKey is a helper class for the <linkto class=Sort>Sort</linkto> class. 00051 // It holds the following information about a sort key: 00052 // <ul> 00053 // <li> Address of the data array containing the sort key; 00054 // <li> Address of the comparison function to be used -- this must be 00055 // a function with the signature 00056 // <linkto group="Compare.h#ObjCompareFunc">ObjCompareFunc</linkto>; 00057 // <li> Increment for the next data point -- this lets you specify a 00058 // stride for keys embedded in a struct; 00059 // <li> Sort order -- ascending or descending; 00060 // </ul> 00061 // </synopsis> 00062 00063 class SortKey 00064 { 00065 public: 00066 friend class Sort; 00067 00068 // Define a sort key in a given data array using the indicated 00069 // comparison function, stride and sort order. 00070 SortKey (const void* data, ObjCompareFunc*, uInt increment, int order); 00071 00072 // Copy constructor (copy semantics). 00073 SortKey (const SortKey&); 00074 00075 ~SortKey(); 00076 00077 // Assignment (copy semantics). 00078 SortKey& operator= (const SortKey&); 00079 00080 // Try if GenSort can be used for this single key. 00081 // If it succeeds, it returns the resulting number of elements. 00082 // Otherwise it returns 0. 00083 uInt tryGenSort (Vector<uInt>& indexVector, uInt nrrec, int opt) const; 00084 00085 protected: 00086 // sort order; -1 = ascending, 1 = descending 00087 int order_p; 00088 // address of first data point 00089 const void* data_p; 00090 // increment for next data point 00091 uInt incr_p; 00092 // ptr to comparison routine 00093 ObjCompareFunc* cmpFunc_p; 00094 }; 00095 00096 00097 00098 // <summary> Sort on one or more keys, ascending and/or descending </summary> 00099 // <use visibility=export> 00100 // <reviewed reviewer="Friso Olnon" date="1995/03/01" tests="tSort, tSort_1"> 00101 // </reviewed> 00102 00103 // <synopsis> 00104 // <src>Sort</src> lets you sort data on one or more keys in a mix of 00105 // <src>Sort::ascending</src> and <src>Sort::descending</src> order. 00106 // Duplicates can be skipped by giving the option 00107 // <src>Sort::NoDuplicates</src>. Only in this case the number of output 00108 // elements can be different from the number of input elements. 00109 // <br>The <src>unique</src> offers another way of getting unique values, 00110 // <p> 00111 // Class <src>Sort</src> does not sort the data themselves, but 00112 // returns an index to them. This gives more flexibility and 00113 // allows the sort to be stable; but it is slower. 00114 // Very fast sorting of the data themselves can be done with the 00115 // functions in class <linkto class=GenSort>GenSort</linkto>. 00116 // <br> 00117 // Three sort algorithms are provided: 00118 // <DL> 00119 // <DT> <src>Sort::InsSort</src> 00120 // <DD> Insertion sort has O(n*n) behaviour, thus is very slow. It 00121 // will only be very fast when the array is already (almost) in the 00122 // right order. 00123 // <DT> <src>Sort::QuickSort</src> 00124 // <DD> Care has been taken to solve the well-known quicksort problems 00125 // like "array already in order" or "many equal elements". The 00126 // behaviour is O(n*log(n)) in all the cases tested, even in 00127 // degenerated cases where the SUN Solaris qsort algorithm is O(n*n). 00128 // <DT> <src>Sort::HeapSort</src> 00129 // <DD> Heapsort has O(n*log(n)) behaviour. Its speed is higher than 00130 // that of QuickSort, so it is the default algorithm. 00131 // </DL> 00132 // All sort algorithms are <em>stable</em>, which means that the original 00133 // order is kept when keys are equal. 00134 // 00135 // The sort is a four step process: 00136 // <ol> 00137 // <li> Construct the <src>Sort</src> object. 00138 // <li> Define the sort keys. The function <src>sortKey</src> must be 00139 // called for each sort key (the most significant one first). 00140 // The comparison function can be passed in directly, or a 00141 // <linkto group="DataType.h#DataType">basic data type</linkto> 00142 // can be given. In the latter case the appropriate comparison 00143 // function will be selected. 00144 // <li> Sort the data. The function <src>sort</src> returns an index 00145 // array, which is allocated when needed. 00146 // <li> Destruct the <src>Sort</src> object (usually done automatically) 00147 // and delete the index array. 00148 // </ol> 00149 // The data can be in a single array of structs, in separate arrays, or 00150 // in a mix of those. Of course, all arrays must have the same length. 00151 // The data can be passed to the <src>Sort</src> constructor and/or to the 00152 // <src>sortKey</src> function. If passed to the <src>Sort</src> constructor, 00153 // the offset of the data item in the data array must be given to 00154 // <src>sortKey</src>. 00155 // </synopsis> 00156 00157 // <example> 00158 // In the first example we sort the data contained in two "parallel" 00159 // arrays, <src>idata</src> and <src>ddata</src>, both of length 00160 // <src>nrdata</src>. 00161 // <srcblock> 00162 // Sort sort; 00163 // sort.sortKey (idata, TpInt); // define 1st sort key 00164 // sort.sortKey (ddata, TpDouble,0,Sort::Descending); // define 2nd sort key 00165 // uInt* inx=0; 00166 // sort.sort (nrdata, inx); 00167 // for (uInt i=0; i<nrdata; i++) { // show sorted data 00168 // cout << idata[inx[i]] << " " << ddata[inx[i]] << endl; 00169 // } 00170 // delete inx; 00171 // </srcblock> 00172 // Now <src>nr</src> contains the nr of records (=<src>nrdata</src>) 00173 // and <src>inx</src> an array of (sorted) indices. 00174 // The index array <src>inx</src> has to be deleted by the user. 00175 // 00176 // In the second example we sort the data stored in an array of structs 00177 // on the double (ascending) and the string (descending). We can pass 00178 // the data to the <src>Sort</src> constructor, and the offsets of the 00179 // struct elements to the <src>sortKey</src> function. 00180 // <srcblock> 00181 // struct Ts { 00182 // String as; 00183 // double ad; 00184 // } 00185 // uInt* inx=0; 00186 // Sort sort (tsarr, sizeof(Ts)); 00187 // sort.sortKey ((char*)&tsarr[0].ad - (char*)tsarr, TpDouble); 00188 // sort.sortKey ((char*)&tsarr[0].as - (char*)tsarr, TpString, 00189 // Sort::Descending); 00190 // sort.sort (nrts, inx); 00191 // </srcblock> 00192 // Note that the first argument in function <src>sortKey</src> gives 00193 // the offset of the variable in the struct. 00194 // 00195 // Alternatively, and probably slightly easier, we could pass the data 00196 // to the <src>sortKey</src> function: 00197 // <srcblock> 00198 // struct Ts { 00199 // String as; 00200 // double ad; 00201 // } 00202 // uInt* inx=0; 00203 // Sort sort; 00204 // sort.sortKey (&tsarr[0].ad, TpDouble, sizeof(Ts)); 00205 // sort.sortKey (&tsarr[0].as, TpString, sizeof(Ts), Sort::Descending); 00206 // sort.sort (nrts, inx); 00207 // </srcblock> 00208 // 00209 // Finally, we could provide a comparison function for the struct. 00210 // (The function is shown inline, but should be implemented out-of-line.) 00211 // This method is not recommended, because it means more work for the user. 00212 // Moreover, the descending sort on the string has to be coded in the 00213 // comparison function, which is not very flexible. 00214 // <srcblock> 00215 // struct Ts { 00216 // String as; 00217 // double ad; 00218 // } 00219 // int compareTs (const void* val1, const void* val2) 00220 // { 00221 // const Ts& t1 = *(Ts*)val1; 00222 // const Ts& t2 = *(Ts*)val2; 00223 // if (t1.ad < t2.ad) return -1; 00224 // if (t1.ad > t2.ad) return 1; 00225 // if (t1.as < t2.as) return 1; // string must be descending 00226 // if (t1.as > t2.as) return -1; 00227 // return 0; 00228 // } 00229 // uInt* inx=0; 00230 // Sort sort; 00231 // sort.sortKey (tsarr, compareTs, sizeof(Ts)); 00232 // sort.sort (nrts, inx); 00233 // </srcblock> 00234 // 00235 // The last example illustrates the use of the 00236 // <src>Sort::NoDuplicates</src> flag and an input index array. 00237 // First we remove duplicate strings, and then sort the result. 00238 // <srcblock> 00239 // struct Ts { 00240 // String as; 00241 // double ad; 00242 // } 00243 // uInt* inxNoDup=0; 00244 // Sort sort; 00245 // sort.sortKey (&tsarr[0].as, TpString, sizeof(Ts)); 00246 // uInt nrout = sort.sort (nrts, inxNoDup, Sort::NoDuplicates); 00247 // Sort sort2; 00248 // sort2.sortKey (&tsarr[0].ad, TpDouble, sizeof(Ts)); 00249 // sort2.sortKey (&tsarr[0].as, TpString, sizeof(Ts), Sort::Descending); 00250 // uInt* inx=0; 00251 // sort2.sort (nrout, inxNoDup, inx); 00252 // </srcblock> 00253 // </example> 00254 00255 00256 class Sort 00257 { 00258 public: 00259 // Enumerate the sort options: 00260 enum Option {HeapSort=1, // use Heapsort algorithm 00261 InsSort=2, // use insertion sort algorithm 00262 QuickSort=4, // use Quicksort algorithm 00263 NoDuplicates=8}; // skip data with equal sort keys 00264 00265 // Enumerate the sort order: 00266 enum Order {Ascending=-1, 00267 Descending=1}; 00268 00269 // The default constructor can be used when the data is only passed 00270 // in via function <src>sortKey</src>. 00271 Sort(); 00272 00273 // Construct a Sort object for the given data array with elements 00274 // of <src>elementSize</src> bytes. This data array will be used 00275 // when an offset is given to the <src>sortKey</src> functions. 00276 // You can still pass additional data arrays to the 00277 // <src>sortKey</src> functions. 00278 Sort (const void* data, uInt elementSize); 00279 00280 // Copy constructor (copy semantics). 00281 Sort (const Sort&); 00282 00283 ~Sort(); 00284 00285 // Assignment (copy semantics). 00286 Sort& operator= (const Sort&); 00287 00288 // Define a sort key (the most significant key should be defined first). 00289 // The key contains: 00290 // <ul> 00291 // <li> A pointer to the start of the data array. --- When structs are 00292 // sorted on an element in the struct, the pointer must point to 00293 // that element in the first struct. 00294 // <li> A pointer to the comparison function to be used. --- The 00295 // comparison function can be specified in two ways: 00296 // <ul> 00297 // <li> by giving a 00298 // <linkto group="DataType.h#DataType">basic data type</linkto>, 00299 // in which case the appropriate comparison function will be 00300 // selected automatically, or 00301 // <li> by pointing directly to a comparison function with the 00302 // signature 00303 // <linkto group="Compare.h#ObjCompareFunc">ObjCompareFunc</linkto>. 00304 // You may want to use the templated comparison function 00305 // <linkto class=ObjCompare>ObjCompare::compare</linkto>(), 00306 // but you are free to use any other function that can be called as: 00307 // <ul> 00308 // <li> int (const void* value1, const void* value2) 00309 // </ul> 00310 // and returns: 00311 // <ul> 00312 // <li> -1 if value1 < value2 00313 // <li> 1 if value1 > value2 00314 // <li> 0 if value1 == value2. 00315 // </ul> 00316 // </ul> 00317 // <li> The increment from one data element to the next. --- When 00318 // structs are sorted on an element in the struct, the increment 00319 // should be the size of the struct. When the comparison function is 00320 // automatically determined from the data type specified, the default 00321 // increment is the size of the data type. 00322 // <li> The sort order. --- <src>Ascending</src> (default) or 00323 // <src>Descending</src>; 00324 // </ul> 00325 // 00326 // When the data array has been passed to the Sort constructor, 00327 // the data pointer and the increment arguments can be replaced by a 00328 // single argument: the offset of the key in each element of the array. 00329 // 00330 // <group> 00331 void sortKey (const void* data, DataType, uInt increment = 0, 00332 Order = Ascending); 00333 void sortKey (const void* data, ObjCompareFunc*, uInt increment, 00334 Order = Ascending); 00335 void sortKey (uInt offset, DataType, Order = Ascending); 00336 void sortKey (uInt offset, ObjCompareFunc*, Order = Ascending); 00337 // </group> 00338 00339 // Sort the data array of <src>nrrec</src> records. 00340 // The result is an array of indices giving the requested order. 00341 // It returns the number of resulting records. The indices array 00342 // is resized to that number. 00343 uInt sort (Vector<uInt>& indexVector, uInt nrrec, 00344 int options = HeapSort) const; 00345 00346 // Get all unique records in a sorted array. The array order is 00347 // given in the indexVector (as possibly returned by the sort function). 00348 // The default indexVector is 0..nrrec-1. 00349 // The index of each first unique record is returned in the uniqueVector. 00350 // They are indices in the supplied indexVector, so 00351 // <src>data[indexVector(uniqueVector(i))]</src> 00352 // is giving the i-th unique record. 00353 // Note that the records indexed by <src>indexVector(uniqueVector(i))</src> 00354 // till <src>indexVector(uniqueVector(i+1))</src> are all the same. 00355 // <br> 00356 // It returns the number of unique records. The unique array 00357 // is resized to that number. 00358 // <group> 00359 uInt unique (Vector<uInt>& uniqueVector, uInt nrrec) const; 00360 uInt unique (Vector<uInt>& uniqueVector, 00361 const Vector<uInt>& indexVector) const; 00362 // </group> 00363 00364 private: 00365 // Copy that Sort object to this. 00366 void copy (const Sort& that); 00367 00368 // Add a sort key giving a data type or a compare function. 00369 // <group> 00370 void addKey (const void* data, DataType, uInt nr, int options); 00371 void addKey (const void* data, ObjCompareFunc*, uInt nr, int options); 00372 // </group> 00373 00374 // Do an insertion sort, optionally skipping duplicates. 00375 // <group> 00376 uInt insSort (uInt nr, uInt* indices) const; 00377 uInt insSortNoDup (uInt nr, uInt* indices) const; 00378 // </group> 00379 00380 // Do a quicksort, optionally skipping duplicates 00381 // (qkSort is the actual quicksort function). 00382 // <group> 00383 uInt quickSort (uInt nr, uInt* indices) const; 00384 uInt quickSortNoDup (uInt nr, uInt* indices) const; 00385 void qkSort (Int nr, uInt* indices) const; 00386 // </group> 00387 00388 // Do a heapsort, optionally skipping duplicates. 00389 // <group> 00390 uInt heapSort (uInt nr, uInt* indices) const; 00391 uInt heapSortNoDup (uInt nr, uInt* indices) const; 00392 // </group> 00393 00394 // Siftdown algorithm for heapsort. 00395 void siftDown (Int low, Int up, uInt* indices) const; 00396 00397 // Compare the keys of 2 records. 00398 int compare (uInt index1, uInt index2) const; 00399 00400 // Swap 2 indices. 00401 inline void swap (Int index1, Int index2, uInt* indices) const; 00402 00403 00404 Block<void*> keys_p; //# keys to sort on 00405 uInt nrkey_p; //# #sort-keys 00406 const void* data_p; //# pointer to data records 00407 uInt size_p; //# size of data record 00408 }; 00409 00410 00411 00412 inline void Sort::swap (Int i, Int j, uInt* inx) const 00413 { 00414 uInt t = inx[i]; 00415 inx[i] = inx[j]; 00416 inx[j] = t; 00417 } 00418 00419 00420 00421 } //# NAMESPACE CASA - END 00422 00423 #endif
1.5.1