casa  5.7.0-16
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Sort.h
Go to the documentation of this file.
1 //# Sort.h: Sort objects on one or more keys
2 //# Copyright (C) 1995,1996,1997,1998,1999,2000,2001
3 //# Associated Universities, Inc. Washington DC, USA.
4 //#
5 //# This library is free software; you can redistribute it and/or modify it
6 //# under the terms of the GNU Library General Public License as published by
7 //# the Free Software Foundation; either version 2 of the License, or (at your
8 //# option) any later version.
9 //#
10 //# This library is distributed in the hope that it will be useful, but WITHOUT
11 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13 //# License for more details.
14 //#
15 //# You should have received a copy of the GNU Library General Public License
16 //# along with this library; if not, write to the Free Software Foundation,
17 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18 //#
19 //# Correspondence concerning AIPS++ should be addressed as follows:
20 //# Internet email: aips2-request@nrao.edu.
21 //# Postal address: AIPS++ Project Office
22 //# National Radio Astronomy Observatory
23 //# 520 Edgemont Road
24 //# Charlottesville, VA 22903-2475 USA
25 //#
26 //#
27 //# $Id$
28 
29 #ifndef CASA_SORT_H
30 #define CASA_SORT_H
31 
32 //# Includes
33 #include <casacore/casa/aips.h>
38 
39 namespace casacore { //# NAMESPACE CASACORE - BEGIN
40 
41 //# Forward Declarations
42 template<class T> class Vector;
43 
44 // <summary> Define a Sort key </summary>
45 // <use visibility=local>
46 // <reviewed reviewer="Friso Olnon" date="1995/03/01" tests="tSort, tSort_1">
47 // </reviewed>
48 
49 // <synopsis>
50 // SortKey is a helper class for the <linkto class=Sort>Sort</linkto> class.
51 // It holds the following information about a sort key:
52 // <ul>
53 // <li> Address of the data array containing the sort key;
54 // <li> A CountedPtr to a comparison object to be used
55 // (of a class derived from the abstract base class BaseCompare).
56 // <li> Increment for the next data point -- this lets you specify a
57 // stride for keys embedded in a struct;
58 // <li> Sort order -- ascending or descending;
59 // </ul>
60 // </synopsis>
61 
62 class SortKey
63 {
64 public:
65  friend class Sort;
66 
67  // Define a sort key in a given data array using the indicated
68  // comparison object, stride and sort order.
69  SortKey (const void* data, const CountedPtr<BaseCompare>&,
70  uInt increment, int order);
71 
72  // Copy constructor (copy semantics).
73  SortKey (const SortKey&);
74 
75  ~SortKey();
76 
77  // Assignment (copy semantics).
78  SortKey& operator= (const SortKey&);
79 
80  // Try if GenSort can be used for this single key.
81  // If it succeeds, it returns the resulting number of elements.
82  // Otherwise it returns 0.
83  uInt tryGenSort (Vector<uInt>& indexVector, uInt nrrec, int opt) const;
84 
85  // Get the sort order.
86  int order() const
87  { return order_p; }
88 
89 protected:
90  // sort order; -1 = ascending, 1 = descending
91  int order_p;
92  // address of first data point
93  const void* data_p;
94  // increment for next data point
96  // comparison object; use CountedPtr for memory management
98  // comparison object; use raw pointer for performance
100 };
101 
102 
103 
104 // <summary> Sort on one or more keys, ascending and/or descending </summary>
105 // <use visibility=export>
106 // <reviewed reviewer="Friso Olnon" date="1995/03/01" tests="tSort, tSort_1">
107 // </reviewed>
108 
109 // <synopsis>
110 // <src>Sort</src> lets you sort data on one or more keys in a mix of
111 // <src>Sort::ascending</src> and <src>Sort::descending</src> order.
112 // Duplicates can be skipped by giving the option
113 // <src>Sort::NoDuplicates</src>. Only in this case the number of output
114 // elements can be different from the number of input elements.
115 // <br>The <src>unique</src> function offers another way of getting
116 // unique values.
117 // <p>
118 // Class <src>Sort</src> does not sort the data themselves, but
119 // returns an index to them. This gives more flexibility and
120 // allows the sort to be stable; but it is slower.
121 // <br>Very fast sorting of the data themselves can be done with the
122 // functions in class <linkto class=GenSort>GenSort</linkto>.
123 // If sorting on a single key with a standard data type is done,
124 // Sort will use GenSortIndirect to speed up the sort.
125 // <br>
126 // Four sort algorithms are provided:
127 // <DL>
128 // <DT> <src>Sort::ParSort</src>
129 // <DD> The parallel merge sort is the fastest if it can use multiple threads.
130 // For a single thread it has O(n*log(n)) behaviour, but is slower
131 // than quicksort.
132 // A drawback is that it needs an extra index array to do the merge.
133 // <DT> <src>Sort::InsSort</src>
134 // <DD> Insertion sort has O(n*n) behaviour, thus is very slow for large
135 // arrays. However, it is the fastest method for small arrays
136 // (< 50 elements) and for arrays already (almost) in the right order.
137 // <DT> <src>Sort::QuickSort</src>
138 // <DD> Care has been taken to solve the well-known quicksort problems
139 // like "array already in order" or "many equal elements". The
140 // behaviour is O(n*log(n)) in all the cases tested, even in
141 // degenerated cases where the SUN Solaris qsort algorithm is O(n*n).
142 // <DT> <src>Sort::HeapSort</src>
143 // <DD> Heapsort has O(n*log(n)) behaviour. Its speed is lower than
144 // that of QuickSort, so QuickSort is the default algorithm.
145 // </DL>
146 // The default is to use QuickSort for small arrays or if only a single
147 // thread can be used. Otherwise ParSort is the default.
148 //
149 // All sort algorithms are <em>stable</em>, which means that the original
150 // order is kept when keys are equal.
151 //
152 // The sort is a four step process:
153 // <ol>
154 // <li> Construct the <src>Sort</src> object.
155 // <li> Define the sort keys. The function <src>sortKey</src> must be
156 // called for each sort key (the most significant one first).
157 // The comparison object can be passed in directly, or a
158 // <linkto group="DataType.h#DataType">basic data type</linkto>
159 // can be given. In the latter case the appropriate ObjCompare
160 // comparison object will be created.
161 // <li> Sort the data. The function <src>sort</src> returns an index
162 // array, which is allocated when needed.
163 // <li> Destruct the <src>Sort</src> object (usually done automatically)
164 // and delete the index array.
165 // </ol>
166 // The data can be in a single array of structs, in separate arrays, or
167 // in a mix of those. Of course, all arrays must have the same length.
168 // The data can be passed to the <src>Sort</src> constructor and/or to the
169 // <src>sortKey</src> function. If passed to the <src>Sort</src> constructor,
170 // the offset of the data item in the data array must be given to
171 // <src>sortKey</src>.
172 // </synopsis>
173 
174 // <example>
175 // In the first example we sort the data contained in two "parallel"
176 // arrays, <src>idata</src> and <src>ddata</src>, both of length
177 // <src>nrdata</src>.
178 // <srcblock>
179 // Sort sort;
180 // sort.sortKey (idata, TpInt); // define 1st sort key
181 // sort.sortKey (ddata, TpDouble,0,Sort::Descending); // define 2nd sort key
182 // Vector<uInt> inx;
183 // sort.sort (inx, nrdata);
184 // for (uInt i=0; i<nrdata; i++) { // show sorted data
185 // cout << idata[inx[i]] << " " << ddata[inx[i]] << endl;
186 // }
187 // </srcblock>
188 // Now <src>nr</src> contains the nr of records (=<src>nrdata</src>)
189 // and <src>inx</src> an array of (sorted) indices.
190 //
191 // In the second example we sort the data stored in an array of structs
192 // on the double (ascending) and the string (descending). We can pass
193 // the data to the <src>Sort</src> constructor, and the offsets of the
194 // struct elements to the <src>sortKey</src> function.
195 // <srcblock>
196 // struct Ts {
197 // String as;
198 // double ad;
199 // }
200 // Vector<uInt> inx;
201 // Sort sort (tsarr, sizeof(Ts));
202 // sort.sortKey ((char*)&tsarr[0].ad - (char*)tsarr, TpDouble);
203 // sort.sortKey ((char*)&tsarr[0].as - (char*)tsarr, TpString,
204 // Sort::Descending);
205 // sort.sort (inx, nrts);
206 // </srcblock>
207 // Note that the first argument in function <src>sortKey</src> gives
208 // the offset of the variable in the struct.
209 //
210 // Alternatively, and probably slightly easier, we could pass the data
211 // to the <src>sortKey</src> function and use an increment:
212 // <srcblock>
213 // struct Ts {
214 // String as;
215 // double ad;
216 // }
217 // Vector<uInt> inx;
218 // Sort sort;
219 // sort.sortKey (&tsarr[0].ad, TpDouble, sizeof(Ts));
220 // sort.sortKey (&tsarr[0].as, TpString, sizeof(Ts), Sort::Descending);
221 // sort.sort (inx, nrts);
222 // </srcblock>
223 //
224 // Finally, we could provide a comparison object for the struct.
225 // <srcblock>
226 // struct Ts {
227 // String as;
228 // double ad;
229 // }
230 // class MyCompare: public BaseCompare {
231 // virtual int comp (const void* val1, const void* val2) const
232 // {
233 // const Ts& t1 = *(Ts*)val1;
234 // const Ts& t2 = *(Ts*)val2;
235 // if (t1.ad < t2.ad) return -1;
236 // if (t1.ad > t2.ad) return 1;
237 // if (t1.as < t2.as) return 1; // string must be descending
238 // if (t1.as > t2.as) return -1;
239 // return 0;
240 // }
241 // };
242 // Vector<uInt> inx;
243 // Sort sort;
244 // sort.sortKey (tsarr, compareTs, sizeof(Ts));
245 // sort.sort (inx, nrts);
246 // </srcblock>
247 
248 class Sort
249 {
250 public:
251  // Enumerate the sort options:
252  enum Option {DefaultSort=0, // ParSort, but QuickSort for small array
253  HeapSort=1, // use Heapsort algorithm
254  InsSort=2, // use insertion sort algorithm
255  QuickSort=4, // use Quicksort algorithm
256  ParSort=8, // use parallel merge sort algorithm
257  NoDuplicates=16}; // skip data with equal sort keys
258 
259  // Enumerate the sort order:
260  enum Order {Ascending=-1,
262 
263  // The default constructor can be used when the data is only passed
264  // in via function <src>sortKey</src>.
265  Sort();
266 
267  // Construct a Sort object for the given data array with elements
268  // of <src>elementSize</src> bytes. This data array will be used
269  // when an offset is given to the <src>sortKey</src> functions.
270  // You can still pass additional data arrays to the
271  // <src>sortKey</src> functions.
272  Sort (const void* data, uInt elementSize);
273 
274  // Copy constructor (copy semantics).
275  Sort (const Sort&);
276 
277  ~Sort();
278 
279  // Assignment (copy semantics).
280  Sort& operator= (const Sort&);
281 
282  // Define a sort key (the most significant key should be defined first).
283  // The key contains:
284  // <ul>
285  // <li> A pointer to the start of the data array. --- When structs are
286  // sorted on an element in the struct, the pointer must point to
287  // that element in the first struct.
288  // <li> A pointer to the comparison object to be used. --- The
289  // comparison object can be specified in two ways:
290  // <ul>
291  // <li> by giving a
292  // <linkto group="DataType.h#DataType">basic data type</linkto>,
293  // in which case the appropriate comparison object will be
294  // created automatically, or
295  // <li> by a CountedPtr of a comparison object.
296  // You may want to use the templated comparison classes
297  // <linkto class=ObjCompare>ObjCompare</linkto>(),
298  // but you are free to use any other class derived from BaseCompare
299  // that implements the <src>comp</src> function.
300  // </ul>
301  // <li> The increment from one data element to the next. --- When
302  // structs are sorted on an element in the struct, the increment
303  // should be the size of the struct. If the comparison object is
304  // automatically created using the data type specified, the default
305  // increment is the size of the data type.
306  // <li> The sort order. --- <src>Ascending</src> (default) or
307  // <src>Descending</src>;
308  // </ul>
309  //
310  // When the data array has been passed to the Sort constructor,
311  // the data pointer and the increment arguments can be replaced by a
312  // single argument: the offset of the key in each element of the array.
313  //
314  // <group>
315  void sortKey (const void* data, DataType, uInt increment = 0,
316  Order = Ascending);
317  void sortKey (const void* data, const CountedPtr<BaseCompare>&,
318  uInt increment, Order = Ascending);
319  void sortKey (uInt offset, DataType, Order = Ascending);
320  void sortKey (uInt offset, const CountedPtr<BaseCompare>&,
321  Order = Ascending);
322  // </group>
323 
324  // Sort the data array of <src>nrrec</src> records.
325  // The result is an array of indices giving the requested order.
326  // It returns the number of resulting records. The indices array
327  // is resized to that number.
328  // <br> By default it'll try if the faster GenSortIndirect can be used
329  // if a sort on a single key is used.
330  uInt sort (Vector<uInt>& indexVector, uInt nrrec,
331  int options = DefaultSort, Bool tryGenSort = True) const;
332 
333  // Get all unique records in a sorted array. The array order is
334  // given in the indexVector (as possibly returned by the sort function).
335  // The default indexVector is 0..nrrec-1.
336  // The index of each first unique record is returned in the uniqueVector.
337  // They are indices in the supplied indexVector, so
338  // <src>data[indexVector(uniqueVector(i))]</src>
339  // is giving the i-th unique record.
340  // Note that the records indexed by <src>indexVector(uniqueVector(i))</src>
341  // till <src>indexVector(uniqueVector(i+1))</src> are all the same.
342  // <br>
343  // It returns the number of unique records. The unique array
344  // is resized to that number.
345  // <group>
346  uInt unique (Vector<uInt>& uniqueVector, uInt nrrec) const;
347  uInt unique (Vector<uInt>& uniqueVector,
348  const Vector<uInt>& indexVector) const;
349  // </group>
350 
351 private:
352  // Copy that Sort object to this.
353  void copy (const Sort& that);
354 
355  // Add a sort key giving a data type or the sort key.
356  // <group>
357  void addKey (const void* data, DataType, uInt nr, int options);
358  void addKey (SortKey*);
359  // </group>
360 
361  // Do an insertion sort, optionally skipping duplicates.
362  // <group>
363  uInt insSort (uInt nr, uInt* indices) const;
364  uInt insSortNoDup (uInt nr, uInt* indices) const;
365  // </group>
366 
367  // Do a merge sort, if possible in parallel using OpenMP.
368  // Note that the env.var. OMP_NUM_TRHEADS sets the maximum nr of threads
369  // to use. It defaults to the number of cores.
370  uInt parSort (int nthr, uInt nrrec, uInt* inx) const;
371  void merge (uInt* inx, uInt* tmp, uInt size, uInt* index,
372  uInt nparts) const;
373 
374  // Do a quicksort, optionally skipping duplicates
375  // (qkSort is the actual quicksort function).
376  // <group>
377  uInt quickSort (uInt nr, uInt* indices) const;
378  uInt quickSortNoDup (uInt nr, uInt* indices) const;
379  void qkSort (Int nr, uInt* indices) const;
380  // </group>
381 
382  // Do a heapsort, optionally skipping duplicates.
383  // <group>
384  uInt heapSort (uInt nr, uInt* indices) const;
385  uInt heapSortNoDup (uInt nr, uInt* indices) const;
386  // </group>
387 
388  // Siftdown algorithm for heapsort.
389  void siftDown (Int low, Int up, uInt* indices) const;
390 
391  // Compare the keys of 2 records.
392  int compare (uInt index1, uInt index2) const;
393 
394  // Swap 2 indices.
395  inline void swap (Int index1, Int index2, uInt* indices) const;
396 
397 
398  PtrBlock<SortKey*> keys_p; //# keys to sort on
399  uInt nrkey_p; //# #sort-keys
400  const void* data_p; //# pointer to data records
401  uInt size_p; //# size of data record
402  int order_p; //# -1=asc 0=mixed 1=desc
403 };
404 
405 
406 
407 inline void Sort::swap (Int i, Int j, uInt* inx) const
408 {
409  uInt t = inx[i];
410  inx[i] = inx[j];
411  inx[j] = t;
412 }
413 
414 
415 
416 } //# NAMESPACE CASACORE - END
417 
418 #endif
int Int
Definition: aipstype.h:50
std::vector< double > Vector
Definition: ds9context.h:24
uInt quickSort(uInt nr, uInt *indices) const
Do a quicksort, optionally skipping duplicates (qkSort is the actual quicksort function).
uInt size_p
Definition: Sort.h:401
uInt heapSortNoDup(uInt nr, uInt *indices) const
int order_p
Definition: Sort.h:402
Order
Enumerate the sort order:
Definition: Sort.h:260
Define a Sort key.
Definition: Sort.h:62
void qkSort(Int nr, uInt *indices) const
int compare(uInt index1, uInt index2) const
Compare the keys of 2 records.
void merge(uInt *inx, uInt *tmp, uInt size, uInt *index, uInt nparts) const
size_t size() const
uInt heapSort(uInt nr, uInt *indices) const
Do a heapsort, optionally skipping duplicates.
ABSTRACT CLASSES Deliberately vague to be general enough to allow for many different types of data
Definition: PlotData.h:48
Options options
int order_p
sort order; -1 = ascending, 1 = descending
Definition: Sort.h:91
Sort on one or more keys, ascending and/or descending.
Definition: Sort.h:248
int order() const
Get the sort order.
Definition: Sort.h:86
Referenced counted pointer for constant data.
Definition: VisModelData.h:42
void swap(Int index1, Int index2, uInt *indices) const
Swap 2 indices.
Definition: Sort.h:407
Option
Enumerate the sort options:
Definition: Sort.h:252
uInt parSort(int nthr, uInt nrrec, uInt *inx) const
Do a merge sort, if possible in parallel using OpenMP.
bool Bool
Define the standard types used by Casacore.
Definition: aipstype.h:42
SortKey(const void *data, const CountedPtr< BaseCompare > &, uInt increment, int order)
Define a sort key in a given data array using the indicated comparison object, stride and sort order...
void copy(const Sort &that)
Copy that Sort object to this.
A drop-in replacement for Block&lt;T*&gt;.
Definition: WProjectFT.h:54
uInt quickSortNoDup(uInt nr, uInt *indices) const
uInt nrkey_p
Definition: Sort.h:399
SortKey & operator=(const SortKey &)
Assignment (copy semantics).
CountedPtr< BaseCompare > ccmpObj_p
comparison object; use CountedPtr for memory management
Definition: Sort.h:97
abstract base class for comparing two objects
Definition: Compare.h:64
BaseCompare * cmpObj_p
comparison object; use raw pointer for performance
Definition: Sort.h:99
Sort()
The default constructor can be used when the data is only passed in via function sortKey.
uInt insSortNoDup(uInt nr, uInt *indices) const
const void * data_p
Definition: Sort.h:400
void addKey(const void *data, DataType, uInt nr, int options)
Add a sort key giving a data type or the sort key.
uInt unique(Vector< uInt > &uniqueVector, uInt nrrec) const
Get all unique records in a sorted array.
uInt incr_p
increment for next data point
Definition: Sort.h:95
void sortKey(const void *data, DataType, uInt increment=0, Order=Ascending)
Define a sort key (the most significant key should be defined first).
const void * data_p
address of first data point
Definition: Sort.h:93
void siftDown(Int low, Int up, uInt *indices) const
Siftdown algorithm for heapsort.
PtrBlock< SortKey * > keys_p
Definition: Sort.h:398
Sort & operator=(const Sort &)
Assignment (copy semantics).
const Bool True
Definition: aipstype.h:43
uInt sort(Vector< uInt > &indexVector, uInt nrrec, int options=DefaultSort, Bool tryGenSort=True) const
Sort the data array of nrrec records.
uInt tryGenSort(Vector< uInt > &indexVector, uInt nrrec, int opt) const
Try if GenSort can be used for this single key.
unsigned int uInt
Definition: aipstype.h:51
uInt insSort(uInt nr, uInt *indices) const
Do an insertion sort, optionally skipping duplicates.
#define casacore
&lt;X11/Intrinsic.h&gt; #defines true, false, casacore::Bool, and String.
Definition: X11Intrinsic.h:42