Sort.h

Go to the documentation of this file.
00001 //# Sort.h: Sort objects on one or more keys
00002 //# Copyright (C) 1995,1996,1997,1998,1999,2000,2001
00003 //# Associated Universities, Inc. Washington DC, USA.
00004 //#
00005 //# This library is free software; you can redistribute it and/or modify it
00006 //# under the terms of the GNU Library General Public License as published by
00007 //# the Free Software Foundation; either version 2 of the License, or (at your
00008 //# option) any later version.
00009 //#
00010 //# This library is distributed in the hope that it will be useful, but WITHOUT
00011 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
00012 //# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
00013 //# License for more details.
00014 //#
00015 //# You should have received a copy of the GNU Library General Public License
00016 //# along with this library; if not, write to the Free Software Foundation,
00017 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
00018 //#
00019 //# Correspondence concerning AIPS++ should be addressed as follows:
00020 //#        Internet email: aips2-request@nrao.edu.
00021 //#        Postal address: AIPS++ Project Office
00022 //#                        National Radio Astronomy Observatory
00023 //#                        520 Edgemont Road
00024 //#                        Charlottesville, VA 22903-2475 USA
00025 //#
00026 //#
00027 //# $Id$
00028 
00029 #ifndef CASA_SORT_H
00030 #define CASA_SORT_H
00031 
00032 //# Includes
00033 #include <casacore/casa/aips.h>
00034 #include <casacore/casa/Utilities/ValType.h>
00035 #include <casacore/casa/Containers/Block.h>
00036 #include <casacore/casa/Utilities/Compare.h>
00037 #include <casacore/casa/Utilities/CountedPtr.h>
00038 
00039 namespace casacore { //# NAMESPACE CASACORE - BEGIN
00040 
00041 //# Forward Declarations
00042 template<class T> class Vector;
00043 
00044 // <summary> Define a Sort key </summary>
00045 // <use visibility=local>
00046 // <reviewed reviewer="Friso Olnon" date="1995/03/01" tests="tSort, tSort_1">
00047 // </reviewed>
00048 
00049 // <synopsis>
00050 // SortKey is a helper class for the <linkto class=Sort>Sort</linkto> class.
00051 // It holds the following information about a sort key:
00052 // <ul>
00053 //  <li> Address of the data array containing the sort key;
00054 //  <li> A CountedPtr to a comparison object to be used
00055 //       (of a class derived from the abstract base class BaseCompare).
00056 //  <li> Increment for the next data point -- this lets you specify a
00057 //       stride for keys embedded in a struct;
00058 //  <li> Sort order -- ascending or descending;
00059 // </ul>
00060 // </synopsis> 
00061 
00062 class SortKey
00063 {
00064 public:
00065     friend class Sort;
00066 
00067     // Define a sort key in a given data array using the indicated
00068     // comparison object, stride and sort order.
00069     SortKey (const void* data, const CountedPtr<BaseCompare>&,
00070              uInt increment, int order);
00071 
00072     // Copy constructor (copy semantics).
00073     SortKey (const SortKey&);
00074 
00075     ~SortKey();
00076 
00077     // Assignment (copy semantics).
00078     SortKey& operator= (const SortKey&);
00079 
00080     // Try if GenSort can be used for this single key.
00081     // If it succeeds, it returns the resulting number of elements.
00082     // Otherwise it returns 0.
00083     uInt tryGenSort (Vector<uInt>& indexVector, uInt nrrec, int opt) const;
00084 
00085     // Get the sort order.
00086     int order() const
00087       { return order_p; }
00088 
00089 protected:
00090     // sort order; -1 = ascending, 1 = descending
00091     int               order_p;
00092     // address of first data point
00093     const void*       data_p;
00094     // increment for next data point
00095     uInt              incr_p;
00096     // comparison object; use CountedPtr for memory management
00097     CountedPtr<BaseCompare> ccmpObj_p;
00098     // comparison object; use raw pointer for performance
00099     BaseCompare* cmpObj_p;
00100 };
00101 
00102 
00103 
00104 // <summary> Sort on one or more keys, ascending and/or descending </summary>
00105 // <use visibility=export>
00106 // <reviewed reviewer="Friso Olnon" date="1995/03/01" tests="tSort, tSort_1">
00107 // </reviewed>
00108 
00109 // <synopsis>
00110 // <src>Sort</src> lets you sort data on one or more keys in a mix of
00111 // <src>Sort::ascending</src> and <src>Sort::descending</src> order.
00112 // Duplicates can be skipped by giving the option
00113 // <src>Sort::NoDuplicates</src>. Only in this case the number of output
00114 // elements can be different from the number of input elements.
00115 // <br>The <src>unique</src> function offers another way of getting
00116 // unique values.
00117 // <p>
00118 // Class <src>Sort</src> does not sort the data themselves, but
00119 // returns an index to them. This gives more flexibility and
00120 // allows the sort to be stable; but it is slower.
00121 // <br>Very fast sorting of the data themselves can be done with the
00122 // functions in class <linkto class=GenSort>GenSort</linkto>.
00123 // If sorting on a single key with a standard data type is done,
00124 // Sort will use GenSortIndirect to speed up the sort.
00125 // <br>
00126 // Four sort algorithms are provided:
00127 // <DL>
00128 //  <DT> <src>Sort::ParSort</src>
00129 //  <DD> The parallel merge sort is the fastest if it can use multiple threads.
00130 //       For a single thread it has O(n*log(n)) behaviour, but is slower
00131 //       than quicksort.
00132 //       A drawback is that it needs an extra index array to do the merge.
00133 //  <DT> <src>Sort::InsSort</src>
00134 //  <DD> Insertion sort has O(n*n) behaviour, thus is very slow for large
00135 //       arrays. However, it is the fastest method for small arrays
00136 //       (< 50 elements) and for arrays already (almost) in the right order.
00137 //  <DT> <src>Sort::QuickSort</src>
00138 //  <DD> Care has been taken to solve the well-known quicksort problems
00139 //       like "array already in order" or "many equal elements".  The
00140 //       behaviour is O(n*log(n)) in all the cases tested, even in
00141 //       degenerated cases where the SUN Solaris qsort algorithm is O(n*n).
00142 //  <DT> <src>Sort::HeapSort</src>
00143 //  <DD> Heapsort has O(n*log(n)) behaviour. Its speed is lower than
00144 //       that of QuickSort, so QuickSort is the default algorithm.
00145 // </DL>
00146 // The default is to use QuickSort for small arrays or if only a single
00147 // thread can be used. Otherwise ParSort is the default.
00148 // 
00149 // All sort algorithms are <em>stable</em>, which means that the original
00150 // order is kept when keys are equal.
00151 //
00152 // The sort is a four step process:
00153 // <ol>
00154 //  <li> Construct the <src>Sort</src> object.
00155 //  <li> Define the sort keys. The function <src>sortKey</src> must be
00156 //       called for each sort key (the most significant one first).
00157 //       The comparison object can be passed in directly, or a 
00158 //       <linkto group="DataType.h#DataType">basic data type</linkto>
00159 //       can be given. In the latter case the appropriate ObjCompare
00160 //       comparison object will be created.
00161 //  <li> Sort the data. The function <src>sort</src> returns an index
00162 //       array, which is allocated when needed.
00163 //  <li> Destruct the <src>Sort</src> object (usually done automatically)
00164 //       and delete the index array.
00165 // </ol>
00166 // The data can be in a single array of structs, in separate arrays, or
00167 // in a mix of those. Of course, all arrays must have the same length.
00168 // The data can be passed to the <src>Sort</src> constructor and/or to the
00169 // <src>sortKey</src> function. If passed to the <src>Sort</src> constructor,
00170 // the offset of the data item in the data array must be given to
00171 // <src>sortKey</src>.
00172 // </synopsis>
00173 
00174 // <example>
00175 // In the first example we sort the data contained in two "parallel"
00176 // arrays, <src>idata</src> and <src>ddata</src>, both of length
00177 // <src>nrdata</src>.
00178 // <srcblock>
00179 //    Sort sort;
00180 //    sort.sortKey (idata, TpInt);                       // define 1st sort key
00181 //    sort.sortKey (ddata, TpDouble,0,Sort::Descending); // define 2nd sort key
00182 //    Vector<uInt> inx;
00183 //    sort.sort (inx, nrdata);
00184 //    for (uInt i=0; i<nrdata; i++) {                    // show sorted data
00185 //        cout << idata[inx[i]] << " " << ddata[inx[i]] << endl;
00186 //    }
00187 // </srcblock>
00188 // Now <src>nr</src> contains the nr of records (=<src>nrdata</src>)
00189 // and <src>inx</src> an array of (sorted) indices.
00190 //
00191 // In the second example we sort the data stored in an array of structs
00192 // on the double (ascending) and the string (descending). We can pass
00193 // the data to the <src>Sort</src> constructor, and the offsets of the
00194 // struct elements to the <src>sortKey</src> function.
00195 // <srcblock>
00196 //    struct Ts {
00197 //         String as;
00198 //         double ad;
00199 //    }
00200 //    Vector<uInt> inx;
00201 //    Sort sort (tsarr, sizeof(Ts));
00202 //    sort.sortKey ((char*)&tsarr[0].ad - (char*)tsarr, TpDouble);
00203 //    sort.sortKey ((char*)&tsarr[0].as - (char*)tsarr, TpString,
00204 //                                                       Sort::Descending);
00205 //    sort.sort (inx, nrts);
00206 // </srcblock>
00207 // Note that the first argument in function <src>sortKey</src> gives
00208 // the offset of the variable in the struct.
00209 //
00210 // Alternatively, and probably slightly easier, we could pass the data
00211 // to the <src>sortKey</src> function and use an increment:
00212 // <srcblock>
00213 //    struct Ts {
00214 //         String as;
00215 //         double ad;
00216 //    }
00217 //    Vector<uInt> inx;
00218 //    Sort sort;
00219 //    sort.sortKey (&tsarr[0].ad, TpDouble, sizeof(Ts));
00220 //    sort.sortKey (&tsarr[0].as, TpString, sizeof(Ts), Sort::Descending);
00221 //    sort.sort (inx, nrts);
00222 // </srcblock>
00223 //
00224 // Finally, we could provide a comparison object for the struct.
00225 // <srcblock>
00226 //    struct Ts {
00227 //         String as;
00228 //         double ad;
00229 //    }
00230 //    class MyCompare: public BaseCompare {
00231 //      virtual int comp (const void* val1, const void* val2) const
00232 //      {
00233 //        const Ts& t1 = *(Ts*)val1;
00234 //        const Ts& t2 = *(Ts*)val2;
00235 //        if (t1.ad < t2.ad) return -1;
00236 //        if (t1.ad > t2.ad) return 1;
00237 //        if (t1.as < t2.as) return 1;    // string must be descending
00238 //        if (t1.as > t2.as) return -1;
00239 //        return 0;
00240 //      }
00241 //    };
00242 //    Vector<uInt> inx;
00243 //    Sort sort;
00244 //    sort.sortKey (tsarr, compareTs, sizeof(Ts)); 
00245 //    sort.sort (inx, nrts);
00246 // </srcblock>
00247 
00248 class Sort
00249 {
00250 public:
00251     // Enumerate the sort options:
00252     enum Option {DefaultSort=0,     // ParSort, but QuickSort for small array
00253                  HeapSort=1,        // use Heapsort algorithm
00254                  InsSort=2,         // use insertion sort algorithm
00255                  QuickSort=4,       // use Quicksort algorithm
00256                  ParSort=8,         // use parallel merge sort algorithm
00257                  NoDuplicates=16};  // skip data with equal sort keys
00258 
00259     // Enumerate the sort order:
00260     enum Order {Ascending=-1,
00261                 Descending=1};
00262 
00263     // The default constructor can be used when the data is only passed
00264     // in via function <src>sortKey</src>.
00265     Sort();
00266 
00267     // Construct a Sort object for the given data array with elements
00268     // of <src>elementSize</src> bytes.  This data array will be used
00269     // when an offset is given to the <src>sortKey</src> functions.
00270     // You can still pass additional data arrays to the
00271     // <src>sortKey</src> functions.
00272     Sort (const void* data, uInt elementSize);
00273 
00274     // Copy constructor (copy semantics).
00275     Sort (const Sort&);
00276 
00277     ~Sort();
00278 
00279     // Assignment (copy semantics).
00280     Sort& operator= (const Sort&);
00281 
00282     // Define a sort key (the most significant key should be defined first).
00283     // The key contains:
00284     // <ul>
00285     // <li> A pointer to the start of the data array. --- When structs are
00286     //   sorted on an element in the struct, the pointer must point to
00287     //   that element in the first struct.
00288     // <li> A pointer to the comparison object to be used. --- The
00289     //   comparison object can be specified in two ways:
00290     //   <ul>
00291     //   <li> by giving a
00292     //     <linkto group="DataType.h#DataType">basic data type</linkto>,
00293     //     in which case the appropriate comparison object will be
00294     //     created automatically, or
00295     //   <li> by a CountedPtr of a comparison object.
00296     //     You may want to use the templated comparison classes
00297     //     <linkto class=ObjCompare>ObjCompare</linkto>(),
00298     //     but you are free to use any other class derived from BaseCompare
00299     //     that implements the <src>comp</src> function.
00300     //   </ul>
00301     // <li> The increment from one data element to the next. --- When
00302     //   structs are sorted on an element in the struct, the increment
00303     //   should be the size of the struct. If the comparison object is
00304     //   automatically created using the data type specified, the default
00305     //   increment is the size of the data type.
00306     // <li> The sort order. --- <src>Ascending</src> (default) or 
00307     //   <src>Descending</src>;
00308     // </ul>
00309     //
00310     // When the data array has been passed to the Sort constructor,
00311     // the data pointer and the increment arguments can be replaced by a
00312     // single argument: the offset of the key in each element of the array.
00313     //
00314     // <group>
00315     void sortKey (const void* data, DataType, uInt increment = 0,
00316                   Order = Ascending);
00317     void sortKey (const void* data, const CountedPtr<BaseCompare>&,
00318                   uInt increment, Order = Ascending);
00319     void sortKey (uInt offset, DataType, Order = Ascending);
00320     void sortKey (uInt offset, const CountedPtr<BaseCompare>&,
00321                   Order = Ascending);
00322     // </group>
00323 
00324     // Sort the data array of <src>nrrec</src> records.
00325     // The result is an array of indices giving the requested order.
00326     // It returns the number of resulting records. The indices array
00327     // is resized to that number.
00328     // <br> By default it'll try if the faster GenSortIndirect can be used
00329     // if a sort on a single key is used.
00330     uInt sort (Vector<uInt>& indexVector, uInt nrrec,
00331                int options = DefaultSort, Bool tryGenSort = True) const;
00332 
00333     // Get all unique records in a sorted array. The array order is
00334     // given in the indexVector (as possibly returned by the sort function).
00335     // The default indexVector is 0..nrrec-1.
00336     // The index of each first unique record is returned in the uniqueVector.
00337     // They are indices in the supplied indexVector, so
00338     // <src>data[indexVector(uniqueVector(i))]</src>
00339     // is giving the i-th unique record.
00340     // Note that the records indexed by <src>indexVector(uniqueVector(i))</src>
00341     // till <src>indexVector(uniqueVector(i+1))</src> are all the same.
00342     // <br>
00343     // It returns the number of unique records. The unique array
00344     // is resized to that number.
00345     // <group>
00346     uInt unique (Vector<uInt>& uniqueVector, uInt nrrec) const;
00347     uInt unique (Vector<uInt>& uniqueVector,
00348                  const Vector<uInt>& indexVector) const;
00349     // </group>
00350 
00351 private:
00352     // Copy that Sort object to this.
00353     void copy (const Sort& that);
00354 
00355     // Add a sort key giving a data type or the sort key.
00356     // <group>
00357     void addKey (const void* data, DataType, uInt nr, int options);
00358     void addKey (SortKey*);
00359     // </group>
00360 
00361     // Do an insertion sort, optionally skipping duplicates.
00362     // <group>
00363     uInt insSort (uInt nr, uInt* indices) const;
00364     uInt insSortNoDup (uInt nr, uInt* indices) const;
00365     // </group>
00366 
00367     // Do a merge sort, if possible in parallel using OpenMP.
00368     // Note that the env.var. OMP_NUM_TRHEADS sets the maximum nr of threads
00369     // to use. It defaults to the number of cores.
00370     uInt parSort (int nthr, uInt nrrec, uInt* inx) const;
00371     void merge (uInt* inx, uInt* tmp, uInt size, uInt* index,
00372                 uInt nparts) const;
00373 
00374     // Do a quicksort, optionally skipping duplicates
00375     // (qkSort is the actual quicksort function).
00376     // <group>
00377     uInt quickSort (uInt nr, uInt* indices) const;
00378     uInt quickSortNoDup (uInt nr, uInt* indices) const;
00379     void qkSort (Int nr, uInt* indices) const;
00380     // </group>
00381 
00382     // Do a heapsort, optionally skipping duplicates.
00383     // <group>
00384     uInt heapSort (uInt nr, uInt* indices) const;
00385     uInt heapSortNoDup (uInt nr, uInt* indices) const;
00386     // </group>
00387 
00388     // Siftdown algorithm for heapsort.
00389     void siftDown (Int low, Int up, uInt* indices) const;
00390 
00391     // Compare the keys of 2 records.
00392     int compare (uInt index1, uInt index2) const;
00393 
00394     // Swap 2 indices.
00395     inline void swap (Int index1, Int index2, uInt* indices) const;
00396 
00397 
00398     PtrBlock<SortKey*> keys_p;                    //# keys to sort on
00399     uInt               nrkey_p;                   //# #sort-keys
00400     const void*        data_p;                    //# pointer to data records
00401     uInt               size_p;                    //# size of data record
00402     int                order_p;                   //# -1=asc 0=mixed 1=desc
00403 };
00404 
00405 
00406 
00407 inline void Sort::swap (Int i, Int j, uInt* inx) const
00408 {
00409     uInt t = inx[i];
00410     inx[i] = inx[j];
00411     inx[j] = t;
00412 }
00413 
00414 
00415 
00416 } //# NAMESPACE CASACORE - END
00417 
00418 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

Generated on 31 Aug 2016 for casa by  doxygen 1.6.1