GenSort.h

Go to the documentation of this file.
00001 //# GenSort.h: General sort functions
00002 //# Copyright (C) 1993,1994,1995,1996,1997,1999
00003 //# Associated Universities, Inc. Washington DC, USA.
00004 //#
00005 //# This library is free software; you can redistribute it and/or modify it
00006 //# under the terms of the GNU Library General Public License as published by
00007 //# the Free Software Foundation; either version 2 of the License, or (at your
00008 //# option) any later version.
00009 //#
00010 //# This library is distributed in the hope that it will be useful, but WITHOUT
00011 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
00012 //# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
00013 //# License for more details.
00014 //#
00015 //# You should have received a copy of the GNU Library General Public License
00016 //# along with this library; if not, write to the Free Software Foundation,
00017 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
00018 //#
00019 //# Correspondence concerning AIPS++ should be addressed as follows:
00020 //#        Internet email: aips2-request@nrao.edu.
00021 //#        Postal address: AIPS++ Project Office
00022 //#                        National Radio Astronomy Observatory
00023 //#                        520 Edgemont Road
00024 //#                        Charlottesville, VA 22903-2475 USA
00025 //#
00026 //# $Id$
00027 
00028 #ifndef CASA_GENSORT_H
00029 #define CASA_GENSORT_H
00030 
00031 #include <casacore/casa/aips.h>
00032 #include <casacore/casa/Utilities/Sort.h>
00033 
00034 namespace casacore { //# NAMESPACE CASACORE - BEGIN
00035 
00036 //# Forward declarations.
00037 template<class T> class Array;
00038 template<class T> class Vector;
00039 template<class T> class Block;
00040 
00041 // <summary> General in-place sort functions </summary>
00042 // <use visibility=local>
00043 // <reviewed reviewer="Friso Olnon" date="1995/03/16" tests="tGenSort" demos="">
00044 
00045 // <synopsis> 
00046 //
00047 // The static member functions of this templated class are highly optimized
00048 // sort functions.  They do an in-place sort of an array of values.  The
00049 // functions are templated, so they can in principle be used with any
00050 // data type. However, if used with non-builtin data types, their
00051 // class must provide certain functions (see <em>Template Type Argument
00052 // Requirements</em>).
00053 //
00054 // If it is impossible or too expensive to define these functions, the
00055 // <linkto class=Sort>Sort</linkto> class can be used instead. This sorts
00056 // indirectly using an index array. Instead of the functions mentioned
00057 // above it requires a comparison routine.
00058 //
00059 // The <src>GenSort</src> functions can sort:
00060 // <ul>
00061 //  <li> C-arrays of values;
00062 //  <li> <src>Array</src>s of values -- the array can have any shape
00063 //         and the increment can be >1;
00064 //  <li> <src>Block</src>s of values -- there is a special function to
00065 //         sort less elements than the size of the <src>Block</src>.
00066 // </ul>
00067 //
00068 // The sort order can be specified in the order field:
00069 // <dl>
00070 //   <dt> <src>Sort::Ascending</src> (default),
00071 //   <dt> <src>Sort::Descending</src>.
00072 // </dl>
00073 //
00074 // Previously the sort algorithm to use could be given in the options field.
00075 // <dl>
00076 //   <dt> <src>Sort::QuickSort</src> (default)
00077 //   <dd> is the fastest. It is about 4-6 times faster
00078 //     than the qsort function on the SUN. No worst case has been
00079 //     found, even not for cases where qsort is terribly slow.
00080 //   <dt> <src>Sort::HeapSort</src>
00081 //   <dd> is about twice as slow as quicksort.
00082 //     It has the advantage that the worst case is always o(n*log(n)),
00083 //     while quicksort can have hypothetical inputs with o(n*n).
00084 //   <dt> <src>Sort::InsSort</src>
00085 //   <dd> is o(n*n) for random inputs. It is, however, the
00086 //     only stable sort (i.e. equal values remain in the original order).
00087 // </dl>
00088 // However, these options are not used anymore because the sort now always
00089 // uses a merge sort that is equally fast for random input and much faster for
00090 // degenerated cases like an already ordered or reversely ordered array.
00091 // Furthermore, merge sort is always stable and will be parallelized if OpenMP
00092 // support is enabled giving a 6-fold speedup on 8 cores.
00093 // <br><src>Sort::NoDuplicates</src> in the options field indicates that
00094 // duplicate values will be removed (only the first occurrance is kept).
00095 // <br>The previous sort functionality is still available through the functions
00096 // quickSort, heapSort, and insSort.
00097 // <p>
00098 // All the sort functions return the number of values sorted as their
00099 // function value; when duplicate values have been removed, the number of
00100 // unique valuess will be returned.
00101 // <p>
00102 // The class also provides a function to find the k-th largest value
00103 // in an array of values. This uses a stripped-down version of quicksort
00104 // and is at least 6 times faster than a full quicksort.
00105 // </synopsis> 
00106 
00107 // <templating arg=T>
00108 //   <li> <src>operator=</src> to assign when swapping elements
00109 //   <li> <src>operator<</src>, <src>operator></src> and
00110 //        <src>operator==</src>  to compare elements
00111 //   <li> default constructor to allocate a temporary
00112 // </templating>
00113 
00114 template<class T> class GenSort
00115 {
00116 public:
00117 
00118     // Sort a C-array containing <src>nr</src> <src>T</src>-type objects.
00119     // The sort is done in place and is always stable (thus equal keys keep
00120     // their original order). It returns the number of values, which
00121     // can be different if a NoDuplicates sort is done.
00122     // <br>Insertion sort is used for short arrays (<50 elements). Otherwise,
00123     // a merge sort is used which will be parallelized if casacore is built
00124     // with OpenMP support.
00125     // <group>
00126     static uInt sort (T*, uInt nr, Sort::Order = Sort::Ascending,
00127                       int options = 0);
00128 
00129     static uInt sort (Array<T>&, Sort::Order = Sort::Ascending,
00130                       int options = 0);
00131 
00132     static uInt sort (Block<T>&, uInt nr, Sort::Order = Sort::Ascending,
00133                       int options = 0);
00134     // <group>
00135   
00136     // Find the k-th largest value.
00137     // <br>Note: it does a partial quicksort, thus the data array gets changed.
00138     static T kthLargest (T* data, uInt nr, uInt k);
00139 
00140     // Sort C-array using quicksort.
00141     static uInt quickSort (T*, uInt nr, Sort::Order = Sort::Ascending,
00142                            int options = 0);
00143     // Sort C-array using heapsort.
00144     static uInt heapSort   (T*, uInt nr, Sort::Order = Sort::Ascending,
00145                            int options = 0);
00146     // Sort C-array using insertion sort.
00147     static uInt insSort    (T*, uInt nr, Sort::Order = Sort::Ascending,
00148                            int options = 0);
00149     // Sort C-array using parallel merge sort (using OpenMP).
00150     // By default OpenMP determines the number of threads that can be used.
00151     static uInt parSort    (T*, uInt nr, Sort::Order = Sort::Ascending,
00152                             int options = 0, int nthread = 0);
00153 
00154     // Swap 2 elements in array.
00155     static inline void swap (T&, T&);
00156 
00157     // Reverse the elements in <src>res</src> and put them into <src>data</src>.
00158     // Care is taken if both pointers reference the same data.
00159     static void reverse (T* data, const T* res, uInt nrrec);
00160 
00161 private:
00162     // The<src>data</src> buffer is divided in <src>nparts</src> parts.
00163     // In each part the values are in ascending order.
00164     // The index tells the nr of elements in each part.
00165     // Recursively each two subsequent parts are merged until only part is left
00166     // (giving the sorted array). Alternately <src>data</src> and <src>tmp</src>
00167     // are used for the merge result. The pointer containing the final result
00168     // is returned.
00169     // <br>If possible, merging the parts is done in parallel (using OpenMP).
00170     static T* merge (T* data, T* tmp, uInt nrrec, uInt* index,
00171                      uInt nparts);
00172 
00173     // Quicksort in ascending order.
00174     static void quickSortAsc (T*, Int, Bool multiThread=False, Int rec_lim=128);
00175 
00176     // Heapsort in ascending order.
00177     static void heapSortAsc (T*, Int);
00178     // Helper function for ascending heapsort.
00179     static void heapAscSiftDown (Int, Int, T*);
00180 
00181     // Insertion sort in ascending order.
00182     static uInt insSortAsc (T*, Int, int option);
00183     // Insertion sort in ascending order allowing duplicates.
00184     // This is also used by quicksort for its last steps.
00185     static uInt insSortAscDup (T*, Int);
00186     // Insertion sort in ascending order allowing no duplicates.
00187     // This is also used by the other sort algorithms to skip duplicates.
00188     static uInt insSortAscNoDup (T*, Int);
00189 };
00190 
00191 
00192 // <summary> General indirect sort functions </summary>
00193 // <use visibility=local>
00194 // <reviewed reviewer="" date="" tests="" demos="">
00195 
00196 // <synopsis> 
00197 // This class is similar to <linkto class=GenSort>GenSort</linkto>.
00198 // The only difference is that the functions in this class sort
00199 // indirectly instead of in-place.
00200 // They return the result of the sort as an sorted vector of indices
00201 // This is slower, because an extra indirection is involved in each
00202 // comparison. However, this sort allows to sort const data.
00203 // Another advantage is that this sort is always stable (i.e. equal
00204 // values are kept in their original order).
00205 
00206 template<class T> class GenSortIndirect
00207 {
00208 public:
00209 
00210     // Sort a C-array containing <src>nr</src> <src>T</src>-type objects.
00211     // The resulting index vector gives the sorted indices.
00212     static uInt sort (Vector<uInt>& indexVector, const T* data, uInt nr,
00213                       Sort::Order = Sort::Ascending,
00214                       int options = Sort::QuickSort);
00215 
00216     // Sort a C-array containing <src>nr</src> <src>T</src>-type objects.
00217     // The resulting index vector gives the sorted indices.
00218     static uInt sort (Vector<uInt>& indexVector, const Array<T>& data,
00219                       Sort::Order = Sort::Ascending,
00220                       int options = Sort::QuickSort);
00221 
00222     // Sort a C-array containing <src>nr</src> <src>T</src>-type objects.
00223     // The resulting index vector gives the sorted indices.
00224     static uInt sort (Vector<uInt>& indexVector, const Block<T>& data, uInt nr,
00225                       Sort::Order = Sort::Ascending,
00226                       int options = Sort::QuickSort);
00227 
00228     // Find the index of the k-th largest value.
00229     static uInt kthLargest (T* data, uInt nr, uInt k);
00230 
00231     // Sort container using quicksort.
00232     // The argument <src>inx</src> gives the index defining the order of the
00233     // values in the data array. Its length must be at least <src>nr</src>
00234     // and it must be filled with the index values of the data.
00235     // Usually this is 0..nr, but it could contain a selection of the data.
00236     static uInt quickSort (uInt* inx, const T* data,
00237                            uInt nr, Sort::Order, int options);
00238     // Sort container using heapsort.
00239     static uInt heapSort (uInt* inx, const T* data,
00240                           uInt nr, Sort::Order, int options);
00241     // Sort container using insertion sort.
00242     static uInt insSort (uInt* inx, const T* data,
00243                          uInt nr, Sort::Order, int options);
00244     // Sort container using parallel merge sort (using OpenMP).
00245     // By default the maximum number of threads is used.
00246     static uInt parSort (uInt* inx, const T* data,
00247                          uInt nr, Sort::Order, int options, int nthreads=0);
00248 
00249 private:
00250     // Swap 2 indices.
00251     static inline void swapInx (uInt& index1, uInt& index2);
00252 
00253     // The<src>data</src> buffer is divided in <src>nparts</src> parts.
00254     // In each part the values are in ascending order.
00255     // The index tells the nr of elements in each part.
00256     // Recursively each two subsequent parts are merged until only part is left
00257     // (giving the sorted array). Alternately <src>data</src> and <src>tmp</src>
00258     // are used for the merge result. The pointer containing the final result
00259     // is returned.
00260     // <br>If possible, merging the parts is done in parallel (using OpenMP).
00261     static uInt* merge (const T* data, uInt* inx, uInt* tmp, uInt nrrec,
00262                         uInt* index, uInt nparts);
00263 
00264     // Check if 2 values are in ascending order.
00265     // When equal, the order is correct if index1<index2.
00266     static inline int isAscending (const T* data, Int index1, Int index2);
00267 
00268 
00269     // Quicksort in ascending order.
00270     static void quickSortAsc (uInt* inx, const T*, Int nr,
00271                               Bool multiThread=False, Int rec_lim=128);
00272 
00273     // Heapsort in ascending order.
00274     static void heapSortAsc (uInt* inx, const T*, Int nr);
00275     // Helper function for ascending heapsort.
00276     static void heapAscSiftDown (uInt* inx, Int, Int, const T*);
00277 
00278     // Insertion sort in ascending order.
00279     static uInt insSortAsc (uInt* inx, const T*, Int nr, int option);
00280     // Insertion sort in ascending order allowing duplicates.
00281     // This is also used by quicksort for its last steps.
00282     static uInt insSortAscDup (uInt* inx, const T*, Int nr);
00283     // Insertion sort in ascending order allowing no duplicates.
00284     // This is also used by the other sort algorithms to skip duplicates.
00285     static uInt insSortAscNoDup (uInt* inx, const T*, Int nr);
00286 };
00287 
00288 
00289 // <summary> Global in-place sort functions </summary>
00290 
00291 // The following global functions are easier to use than the static
00292 // <linkto class=GenSort>GenSort</linkto> member functions.
00293 // They do an in-place sort of data, thus the data themselves are moved
00294 // ending up in the requested order.
00295 // <p>
00296 // The default sorting method is QuickSort, which is the fastest.
00297 // However, there are pathological cases where it can be slow.
00298 // HeapSort is about twice a slow, but its speed is guaranteed.
00299 // InsSort (insertion sort) can be very, very slow, but it is the only
00300 // stable sort method (i.e. equal values are kept in their original order).
00301 // However, <linkto name=genSortIndirect> indirect sorting methods </linkto>
00302 // are available to make QuickSort and HeapSort stable.
00303 // <p>
00304 // All sort methods have an option to skip duplicate values. This is the
00305 // only case where the returned number of values can be less than the
00306 // original number of values.
00307 
00308 // <group name=genSortInPlace>
00309 
00310 template<class T>
00311 inline
00312 uInt genSort (T* data, uInt nr,
00313               Sort::Order order = Sort::Ascending, int options=0)
00314   { return GenSort<T>::sort (data, nr, order, options); }
00315 
00316 template<class T>
00317 inline
00318 uInt genSort (Array<T>& data,
00319               Sort::Order order = Sort::Ascending, int options=0)
00320   { return GenSort<T>::sort (data, order, options); }
00321 
00322 template<class T>
00323 inline
00324 uInt genSort (Block<T>& data,
00325               Sort::Order order = Sort::Ascending, int options=0)
00326   { return GenSort<T>::sort (data, data.nelements(), order, options); }
00327 
00328 template<class T>
00329 inline
00330 uInt genSort (Block<T>& data, uInt nr,
00331               Sort::Order order = Sort::Ascending, int options=0)
00332   { return GenSort<T>::sort (data, nr, order, options); }
00333 // </group>
00334 
00335 
00336 // <summary> Global indirect sort functions </summary>
00337 
00338 // The following global functions easier to use than the static
00339 // <linkto class=GenSortIndirect>GenSortIndirect</linkto> member functions.
00340 // They do an indirect sort of data, thus the data themselves are not moved.
00341 // Rather an index vector is returned giving the sorted data indices.
00342 // <p>
00343 // The sorting method used is merge sort, which is always stable.
00344 // It is the fastest, especially if it can use multiple threads.
00345 // <p>
00346 // Unlike the <linkto name=genSortInPlace> in-place sorting methods
00347 // </linkto>, all indirect sorting methods are stable (i.e. equal
00348 // values are left in their original order).
00349 // <p>
00350 // All sort methods have an option to skip duplicate values. This is the
00351 // only case where the returned number of values can be less than the
00352 // original number of values.
00353 
00354 // <group name=genSortIndirect>
00355 
00356 template<class T>
00357 inline
00358 uInt genSort (Vector<uInt>& indexVector, const T* data, uInt nr,
00359               Sort::Order order = Sort::Ascending, int options=0)
00360   { return GenSortIndirect<T>::sort (indexVector, data, nr, order, options); }
00361 
00362 template<class T>
00363 inline
00364 uInt genSort (Vector<uInt>& indexVector, const Array<T>& data,
00365               Sort::Order order = Sort::Ascending, int options=0)
00366   { return GenSortIndirect<T>::sort (indexVector, data, order, options); }
00367 
00368 template<class T>
00369 inline
00370 uInt genSort (Vector<uInt>& indexVector, const Block<T>& data,
00371               Sort::Order order = Sort::Ascending, int options=0)
00372   { return GenSortIndirect<T>::sort (indexVector, data, data.nelements(),
00373                                      order, options); }
00374 
00375 template<class T>
00376 inline
00377 uInt genSort (Vector<uInt>& indexVector, const Block<T>& data, uInt nr,
00378               Sort::Order order = Sort::Ascending, int options=0)
00379   { return GenSortIndirect<T>::sort (indexVector, data, nr, order, options); }
00380 // </group>
00381 
00382 
00383 
00384 // Implement inline member functions.
00385 
00386 template<class T>
00387 inline void GenSort<T>::swap (T& l, T& r)
00388 {
00389     T t = l;
00390     l = r;
00391     r = t;
00392 }
00393 template<class T>
00394 inline void GenSortIndirect<T>::swapInx (uInt& i, uInt& j)
00395 {
00396     uInt t = i;
00397     i = j;
00398     j = t;
00399 }
00400 template<class T>
00401 inline int GenSortIndirect<T>::isAscending (const T* data, Int i, Int j)
00402 {
00403     return (data[i] > data[j]  ||  (data[i] == data[j]  &&  i > j));
00404 }
00405 
00406 
00407 
00408 } //# NAMESPACE CASACORE - END
00409 
00410 #ifndef CASACORE_NO_AUTO_TEMPLATES
00411 #include <casacore/casa/Utilities/GenSort.tcc>
00412 #endif //# CASACORE_NO_AUTO_TEMPLATES
00413 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

Generated on 31 Aug 2016 for casa by  doxygen 1.6.1