00001 //# Sort.h: Sort objects on one or more keys 00002 //# Copyright (C) 1995,1996,1997,1998,1999,2000,2001 00003 //# Associated Universities, Inc. Washington DC, USA. 00004 //# 00005 //# This library is free software; you can redistribute it and/or modify it 00006 //# under the terms of the GNU Library General Public License as published by 00007 //# the Free Software Foundation; either version 2 of the License, or (at your 00008 //# option) any later version. 00009 //# 00010 //# This library is distributed in the hope that it will be useful, but WITHOUT 00011 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 00012 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public 00013 //# License for more details. 00014 //# 00015 //# You should have received a copy of the GNU Library General Public License 00016 //# along with this library; if not, write to the Free Software Foundation, 00017 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA. 00018 //# 00019 //# Correspondence concerning AIPS++ should be addressed as follows: 00020 //# Internet email: aips2-request@nrao.edu. 00021 //# Postal address: AIPS++ Project Office 00022 //# National Radio Astronomy Observatory 00023 //# 520 Edgemont Road 00024 //# Charlottesville, VA 22903-2475 USA 00025 //# 00026 //# 00027 //# $Id$ 00028 00029 #ifndef CASA_SORT_H 00030 #define CASA_SORT_H 00031 00032 //# Includes 00033 #include <casacore/casa/aips.h> 00034 #include <casacore/casa/Utilities/ValType.h> 00035 #include <casacore/casa/Containers/Block.h> 00036 #include <casacore/casa/Utilities/Compare.h> 00037 #include <casacore/casa/Utilities/CountedPtr.h> 00038 00039 namespace casacore { //# NAMESPACE CASACORE - BEGIN 00040 00041 //# Forward Declarations 00042 template<class T> class Vector; 00043 00044 // <summary> Define a Sort key </summary> 00045 // <use visibility=local> 00046 // <reviewed reviewer="Friso Olnon" date="1995/03/01" tests="tSort, tSort_1"> 00047 // </reviewed> 00048 00049 // <synopsis> 00050 // SortKey is a helper class for the <linkto class=Sort>Sort</linkto> class. 00051 // It holds the following information about a sort key: 00052 // <ul> 00053 // <li> Address of the data array containing the sort key; 00054 // <li> A CountedPtr to a comparison object to be used 00055 // (of a class derived from the abstract base class BaseCompare). 00056 // <li> Increment for the next data point -- this lets you specify a 00057 // stride for keys embedded in a struct; 00058 // <li> Sort order -- ascending or descending; 00059 // </ul> 00060 // </synopsis> 00061 00062 class SortKey 00063 { 00064 public: 00065 friend class Sort; 00066 00067 // Define a sort key in a given data array using the indicated 00068 // comparison object, stride and sort order. 00069 SortKey (const void* data, const CountedPtr<BaseCompare>&, 00070 uInt increment, int order); 00071 00072 // Copy constructor (copy semantics). 00073 SortKey (const SortKey&); 00074 00075 ~SortKey(); 00076 00077 // Assignment (copy semantics). 00078 SortKey& operator= (const SortKey&); 00079 00080 // Try if GenSort can be used for this single key. 00081 // If it succeeds, it returns the resulting number of elements. 00082 // Otherwise it returns 0. 00083 uInt tryGenSort (Vector<uInt>& indexVector, uInt nrrec, int opt) const; 00084 00085 // Get the sort order. 00086 int order() const 00087 { return order_p; } 00088 00089 protected: 00090 // sort order; -1 = ascending, 1 = descending 00091 int order_p; 00092 // address of first data point 00093 const void* data_p; 00094 // increment for next data point 00095 uInt incr_p; 00096 // comparison object; use CountedPtr for memory management 00097 CountedPtr<BaseCompare> ccmpObj_p; 00098 // comparison object; use raw pointer for performance 00099 BaseCompare* cmpObj_p; 00100 }; 00101 00102 00103 00104 // <summary> Sort on one or more keys, ascending and/or descending </summary> 00105 // <use visibility=export> 00106 // <reviewed reviewer="Friso Olnon" date="1995/03/01" tests="tSort, tSort_1"> 00107 // </reviewed> 00108 00109 // <synopsis> 00110 // <src>Sort</src> lets you sort data on one or more keys in a mix of 00111 // <src>Sort::ascending</src> and <src>Sort::descending</src> order. 00112 // Duplicates can be skipped by giving the option 00113 // <src>Sort::NoDuplicates</src>. Only in this case the number of output 00114 // elements can be different from the number of input elements. 00115 // <br>The <src>unique</src> function offers another way of getting 00116 // unique values. 00117 // <p> 00118 // Class <src>Sort</src> does not sort the data themselves, but 00119 // returns an index to them. This gives more flexibility and 00120 // allows the sort to be stable; but it is slower. 00121 // <br>Very fast sorting of the data themselves can be done with the 00122 // functions in class <linkto class=GenSort>GenSort</linkto>. 00123 // If sorting on a single key with a standard data type is done, 00124 // Sort will use GenSortIndirect to speed up the sort. 00125 // <br> 00126 // Four sort algorithms are provided: 00127 // <DL> 00128 // <DT> <src>Sort::ParSort</src> 00129 // <DD> The parallel merge sort is the fastest if it can use multiple threads. 00130 // For a single thread it has O(n*log(n)) behaviour, but is slower 00131 // than quicksort. 00132 // A drawback is that it needs an extra index array to do the merge. 00133 // <DT> <src>Sort::InsSort</src> 00134 // <DD> Insertion sort has O(n*n) behaviour, thus is very slow for large 00135 // arrays. However, it is the fastest method for small arrays 00136 // (< 50 elements) and for arrays already (almost) in the right order. 00137 // <DT> <src>Sort::QuickSort</src> 00138 // <DD> Care has been taken to solve the well-known quicksort problems 00139 // like "array already in order" or "many equal elements". The 00140 // behaviour is O(n*log(n)) in all the cases tested, even in 00141 // degenerated cases where the SUN Solaris qsort algorithm is O(n*n). 00142 // <DT> <src>Sort::HeapSort</src> 00143 // <DD> Heapsort has O(n*log(n)) behaviour. Its speed is lower than 00144 // that of QuickSort, so QuickSort is the default algorithm. 00145 // </DL> 00146 // The default is to use QuickSort for small arrays or if only a single 00147 // thread can be used. Otherwise ParSort is the default. 00148 // 00149 // All sort algorithms are <em>stable</em>, which means that the original 00150 // order is kept when keys are equal. 00151 // 00152 // The sort is a four step process: 00153 // <ol> 00154 // <li> Construct the <src>Sort</src> object. 00155 // <li> Define the sort keys. The function <src>sortKey</src> must be 00156 // called for each sort key (the most significant one first). 00157 // The comparison object can be passed in directly, or a 00158 // <linkto group="DataType.h#DataType">basic data type</linkto> 00159 // can be given. In the latter case the appropriate ObjCompare 00160 // comparison object will be created. 00161 // <li> Sort the data. The function <src>sort</src> returns an index 00162 // array, which is allocated when needed. 00163 // <li> Destruct the <src>Sort</src> object (usually done automatically) 00164 // and delete the index array. 00165 // </ol> 00166 // The data can be in a single array of structs, in separate arrays, or 00167 // in a mix of those. Of course, all arrays must have the same length. 00168 // The data can be passed to the <src>Sort</src> constructor and/or to the 00169 // <src>sortKey</src> function. If passed to the <src>Sort</src> constructor, 00170 // the offset of the data item in the data array must be given to 00171 // <src>sortKey</src>. 00172 // </synopsis> 00173 00174 // <example> 00175 // In the first example we sort the data contained in two "parallel" 00176 // arrays, <src>idata</src> and <src>ddata</src>, both of length 00177 // <src>nrdata</src>. 00178 // <srcblock> 00179 // Sort sort; 00180 // sort.sortKey (idata, TpInt); // define 1st sort key 00181 // sort.sortKey (ddata, TpDouble,0,Sort::Descending); // define 2nd sort key 00182 // Vector<uInt> inx; 00183 // sort.sort (inx, nrdata); 00184 // for (uInt i=0; i<nrdata; i++) { // show sorted data 00185 // cout << idata[inx[i]] << " " << ddata[inx[i]] << endl; 00186 // } 00187 // </srcblock> 00188 // Now <src>nr</src> contains the nr of records (=<src>nrdata</src>) 00189 // and <src>inx</src> an array of (sorted) indices. 00190 // 00191 // In the second example we sort the data stored in an array of structs 00192 // on the double (ascending) and the string (descending). We can pass 00193 // the data to the <src>Sort</src> constructor, and the offsets of the 00194 // struct elements to the <src>sortKey</src> function. 00195 // <srcblock> 00196 // struct Ts { 00197 // String as; 00198 // double ad; 00199 // } 00200 // Vector<uInt> inx; 00201 // Sort sort (tsarr, sizeof(Ts)); 00202 // sort.sortKey ((char*)&tsarr[0].ad - (char*)tsarr, TpDouble); 00203 // sort.sortKey ((char*)&tsarr[0].as - (char*)tsarr, TpString, 00204 // Sort::Descending); 00205 // sort.sort (inx, nrts); 00206 // </srcblock> 00207 // Note that the first argument in function <src>sortKey</src> gives 00208 // the offset of the variable in the struct. 00209 // 00210 // Alternatively, and probably slightly easier, we could pass the data 00211 // to the <src>sortKey</src> function and use an increment: 00212 // <srcblock> 00213 // struct Ts { 00214 // String as; 00215 // double ad; 00216 // } 00217 // Vector<uInt> inx; 00218 // Sort sort; 00219 // sort.sortKey (&tsarr[0].ad, TpDouble, sizeof(Ts)); 00220 // sort.sortKey (&tsarr[0].as, TpString, sizeof(Ts), Sort::Descending); 00221 // sort.sort (inx, nrts); 00222 // </srcblock> 00223 // 00224 // Finally, we could provide a comparison object for the struct. 00225 // <srcblock> 00226 // struct Ts { 00227 // String as; 00228 // double ad; 00229 // } 00230 // class MyCompare: public BaseCompare { 00231 // virtual int comp (const void* val1, const void* val2) const 00232 // { 00233 // const Ts& t1 = *(Ts*)val1; 00234 // const Ts& t2 = *(Ts*)val2; 00235 // if (t1.ad < t2.ad) return -1; 00236 // if (t1.ad > t2.ad) return 1; 00237 // if (t1.as < t2.as) return 1; // string must be descending 00238 // if (t1.as > t2.as) return -1; 00239 // return 0; 00240 // } 00241 // }; 00242 // Vector<uInt> inx; 00243 // Sort sort; 00244 // sort.sortKey (tsarr, compareTs, sizeof(Ts)); 00245 // sort.sort (inx, nrts); 00246 // </srcblock> 00247 00248 class Sort 00249 { 00250 public: 00251 // Enumerate the sort options: 00252 enum Option {DefaultSort=0, // ParSort, but QuickSort for small array 00253 HeapSort=1, // use Heapsort algorithm 00254 InsSort=2, // use insertion sort algorithm 00255 QuickSort=4, // use Quicksort algorithm 00256 ParSort=8, // use parallel merge sort algorithm 00257 NoDuplicates=16}; // skip data with equal sort keys 00258 00259 // Enumerate the sort order: 00260 enum Order {Ascending=-1, 00261 Descending=1}; 00262 00263 // The default constructor can be used when the data is only passed 00264 // in via function <src>sortKey</src>. 00265 Sort(); 00266 00267 // Construct a Sort object for the given data array with elements 00268 // of <src>elementSize</src> bytes. This data array will be used 00269 // when an offset is given to the <src>sortKey</src> functions. 00270 // You can still pass additional data arrays to the 00271 // <src>sortKey</src> functions. 00272 Sort (const void* data, uInt elementSize); 00273 00274 // Copy constructor (copy semantics). 00275 Sort (const Sort&); 00276 00277 ~Sort(); 00278 00279 // Assignment (copy semantics). 00280 Sort& operator= (const Sort&); 00281 00282 // Define a sort key (the most significant key should be defined first). 00283 // The key contains: 00284 // <ul> 00285 // <li> A pointer to the start of the data array. --- When structs are 00286 // sorted on an element in the struct, the pointer must point to 00287 // that element in the first struct. 00288 // <li> A pointer to the comparison object to be used. --- The 00289 // comparison object can be specified in two ways: 00290 // <ul> 00291 // <li> by giving a 00292 // <linkto group="DataType.h#DataType">basic data type</linkto>, 00293 // in which case the appropriate comparison object will be 00294 // created automatically, or 00295 // <li> by a CountedPtr of a comparison object. 00296 // You may want to use the templated comparison classes 00297 // <linkto class=ObjCompare>ObjCompare</linkto>(), 00298 // but you are free to use any other class derived from BaseCompare 00299 // that implements the <src>comp</src> function. 00300 // </ul> 00301 // <li> The increment from one data element to the next. --- When 00302 // structs are sorted on an element in the struct, the increment 00303 // should be the size of the struct. If the comparison object is 00304 // automatically created using the data type specified, the default 00305 // increment is the size of the data type. 00306 // <li> The sort order. --- <src>Ascending</src> (default) or 00307 // <src>Descending</src>; 00308 // </ul> 00309 // 00310 // When the data array has been passed to the Sort constructor, 00311 // the data pointer and the increment arguments can be replaced by a 00312 // single argument: the offset of the key in each element of the array. 00313 // 00314 // <group> 00315 void sortKey (const void* data, DataType, uInt increment = 0, 00316 Order = Ascending); 00317 void sortKey (const void* data, const CountedPtr<BaseCompare>&, 00318 uInt increment, Order = Ascending); 00319 void sortKey (uInt offset, DataType, Order = Ascending); 00320 void sortKey (uInt offset, const CountedPtr<BaseCompare>&, 00321 Order = Ascending); 00322 // </group> 00323 00324 // Sort the data array of <src>nrrec</src> records. 00325 // The result is an array of indices giving the requested order. 00326 // It returns the number of resulting records. The indices array 00327 // is resized to that number. 00328 // <br> By default it'll try if the faster GenSortIndirect can be used 00329 // if a sort on a single key is used. 00330 uInt sort (Vector<uInt>& indexVector, uInt nrrec, 00331 int options = DefaultSort, Bool tryGenSort = True) const; 00332 00333 // Get all unique records in a sorted array. The array order is 00334 // given in the indexVector (as possibly returned by the sort function). 00335 // The default indexVector is 0..nrrec-1. 00336 // The index of each first unique record is returned in the uniqueVector. 00337 // They are indices in the supplied indexVector, so 00338 // <src>data[indexVector(uniqueVector(i))]</src> 00339 // is giving the i-th unique record. 00340 // Note that the records indexed by <src>indexVector(uniqueVector(i))</src> 00341 // till <src>indexVector(uniqueVector(i+1))</src> are all the same. 00342 // <br> 00343 // It returns the number of unique records. The unique array 00344 // is resized to that number. 00345 // <group> 00346 uInt unique (Vector<uInt>& uniqueVector, uInt nrrec) const; 00347 uInt unique (Vector<uInt>& uniqueVector, 00348 const Vector<uInt>& indexVector) const; 00349 // </group> 00350 00351 private: 00352 // Copy that Sort object to this. 00353 void copy (const Sort& that); 00354 00355 // Add a sort key giving a data type or the sort key. 00356 // <group> 00357 void addKey (const void* data, DataType, uInt nr, int options); 00358 void addKey (SortKey*); 00359 // </group> 00360 00361 // Do an insertion sort, optionally skipping duplicates. 00362 // <group> 00363 uInt insSort (uInt nr, uInt* indices) const; 00364 uInt insSortNoDup (uInt nr, uInt* indices) const; 00365 // </group> 00366 00367 // Do a merge sort, if possible in parallel using OpenMP. 00368 // Note that the env.var. OMP_NUM_TRHEADS sets the maximum nr of threads 00369 // to use. It defaults to the number of cores. 00370 uInt parSort (int nthr, uInt nrrec, uInt* inx) const; 00371 void merge (uInt* inx, uInt* tmp, uInt size, uInt* index, 00372 uInt nparts) const; 00373 00374 // Do a quicksort, optionally skipping duplicates 00375 // (qkSort is the actual quicksort function). 00376 // <group> 00377 uInt quickSort (uInt nr, uInt* indices) const; 00378 uInt quickSortNoDup (uInt nr, uInt* indices) const; 00379 void qkSort (Int nr, uInt* indices) const; 00380 // </group> 00381 00382 // Do a heapsort, optionally skipping duplicates. 00383 // <group> 00384 uInt heapSort (uInt nr, uInt* indices) const; 00385 uInt heapSortNoDup (uInt nr, uInt* indices) const; 00386 // </group> 00387 00388 // Siftdown algorithm for heapsort. 00389 void siftDown (Int low, Int up, uInt* indices) const; 00390 00391 // Compare the keys of 2 records. 00392 int compare (uInt index1, uInt index2) const; 00393 00394 // Swap 2 indices. 00395 inline void swap (Int index1, Int index2, uInt* indices) const; 00396 00397 00398 PtrBlock<SortKey*> keys_p; //# keys to sort on 00399 uInt nrkey_p; //# #sort-keys 00400 const void* data_p; //# pointer to data records 00401 uInt size_p; //# size of data record 00402 int order_p; //# -1=asc 0=mixed 1=desc 00403 }; 00404 00405 00406 00407 inline void Sort::swap (Int i, Int j, uInt* inx) const 00408 { 00409 uInt t = inx[i]; 00410 inx[i] = inx[j]; 00411 inx[j] = t; 00412 } 00413 00414 00415 00416 } //# NAMESPACE CASACORE - END 00417 00418 #endif