ClassicalStatistics.h

Go to the documentation of this file.
00001 //# Copyright (C) 2000,2001
00002 //# Associated Universities, Inc. Washington DC, USA.
00003 //#
00004 //# This library is free software; you can redistribute it and/or modify it
00005 //# under the terms of the GNU Library General Public License as published by
00006 //# the Free Software Foundation; either version 2 of the License, or (at your
00007 //# option) any later version.
00008 //#
00009 //# This library is distributed in the hope that it will be useful, but WITHOUT
00010 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
00011 //# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
00012 //# License for more details.
00013 //#
00014 //# You should have received a copy of the GNU Library General Public License
00015 //# along with this library; if not, write to the Free Software Foundation,
00016 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
00017 //#
00018 //# Correspondence concerning AIPS++ should be addressed as follows:
00019 //#        Internet email: aips2-request@nrao.edu.
00020 //#        Postal address: AIPS++ Project Office
00021 //#                        National Radio Astronomy Observatory
00022 //#                        520 Edgemont Road
00023 //#                        Charlottesville, VA 22903-2475 USA
00024 //#
00025 //# $Id: Array.h 21545 2015-01-22 19:36:35Z gervandiepen $
00026 
00027 #ifndef SCIMATH_CLASSICALSTATS_H
00028 #define SCIMATH_CLASSICALSTATS_H
00029 
00030 #include <casacore/casa/aips.h>
00031 
00032 #include <casacore/scimath/Mathematics/StatisticsAlgorithm.h>
00033 
00034 #include <casacore/scimath/Mathematics/StatisticsTypes.h>
00035 #include <casacore/scimath/Mathematics/StatisticsUtilities.h>
00036 
00037 #include <set>
00038 #include <vector>
00039 #include <utility>
00040 
00041 namespace casacore {
00042 
00043 template <class T> class PtrHolder;
00044 
00045 // Class to calculate statistics in a "classical" sense, ie using accumulators with no
00046 // special filtering beyond optional range filtering etc.
00047 //
00048 // setCalculateAsAdded() allows one to specify if statistics should be calculated and updated
00049 // on upon each call to set/addData(). If False, statistics will be calculated only when
00050 // getStatistic(), getStatistics(), or similar methods are called. Setting this value to True
00051 // allows the caller to not have to keep all the data accessible at once. Note however, that all
00052 // data must be simultaneously accessible if quantile (eg median) calculations are desired.
00053 
00054 // I attempted to write this class using the Composite design pattern, with eg the
00055 // _unweightedStats() and _weightedStats() methods in their own class, but for reasons I
00056 // don't understand, that impacted performance significantly. So I'm using the current
00057 // architecture, which I know is a bit a maintenance nightmare.
00058 
00059 template <class AccumType, class DataIterator, class MaskIterator=const Bool*, class WeightsIterator=DataIterator> 
00060 class ClassicalStatistics
00061     : public StatisticsAlgorithm<AccumType, DataIterator, MaskIterator, WeightsIterator> {
00062 public:
00063 
00064     ClassicalStatistics();
00065 
00066     // copy semantics
00067     ClassicalStatistics(const ClassicalStatistics<AccumType, DataIterator, MaskIterator, WeightsIterator>& cs);
00068 
00069     virtual ~ClassicalStatistics();
00070 
00071     // copy semantics
00072     ClassicalStatistics<AccumType, DataIterator, MaskIterator, WeightsIterator>& operator=(
00073         const ClassicalStatistics<AccumType, DataIterator, MaskIterator, WeightsIterator>& other
00074     );
00075 
00076     // get the algorithm that this object uses for computing stats
00077     virtual StatisticsData::ALGORITHM algorithm() const {
00078         return StatisticsData::CLASSICAL;
00079     };
00080 
00081     // <group>
00082     // In the following group of methods, if the size of the composite dataset
00083     // is smaller than
00084     // <src>binningThreshholdSizeBytes</src>, the composite dataset
00085     // will be (perhaps partially) sorted and persisted in memory during the
00086     // call. In that case, and if <src>persistSortedArray</src> is True, this
00087     // sorted array will remain in memory after the call and will be used on
00088     // subsequent calls of this method when <src>binningThreshholdSizeBytes</src>
00089     // is greater than the size of the composite dataset. If
00090     // <src>persistSortedArray</src> is False, the sorted array will not be
00091     // stored after this call completes and so any subsequent calls for which the
00092     // dataset size is less than <src>binningThreshholdSizeBytes</src>, the
00093     // dataset will be sorted from scratch. Values which are not included due to
00094     // non-unity strides, are not included in any specified ranges, are masked,
00095     // or have associated weights of zero are not considered as dataset members
00096     // for quantile computations.
00097     // If one has a priori information regarding the number of points (npts) and/or
00098     // the minimum and maximum values of the data set, these can be supplied to
00099     // improve performance. Note however, that if these values are not correct, the
00100     // resulting median
00101     // and/or quantile values will also not be correct (although see the following notes regarding
00102     // max/min). Note that if this object has already had getStatistics()
00103     // called, and the min and max were calculated, there is no need to pass these values in
00104     // as they have been stored internally and used (although passing them in shouldn't hurt
00105     // anything). If provided, npts, the number of points falling in the specified ranges which are
00106     // not masked and have weights > 0, should be exactly correct. <src>min</src> can be less than
00107     // the true minimum, and <src>max</src> can be greater than the True maximum, but for best
00108     // performance, these should be as close to the actual min and max as possible.
00109     // In order for quantile computations to occur over multiple datasets, all datasets
00110     // must be available. This means that if setCalculateAsAdded()
00111     // was previously called by passing in a value of True, these methods will throw
00112     // an exception as the previous call indicates that there is no guarantee that
00113     // all datasets will be available. If one uses a data provider (by having called
00114     // setDataProvider()), then this should not be an issue.
00115 
00116     // get the median of the distribution.
00117     // For a dataset with an odd number of good points, the median is just the value
00118     // at index int(N/2) in the equivalent sorted dataset, where N is the number of points.
00119     // For a dataset with an even number of points, the median is the mean of the values at
00120     // indices int(N/2)-1 and int(N/2) in the sorted dataset.
00121     // <src>nBins</src> is the number of bins, per histogram, to use to bin the data. More
00122     // bins decrease the likelihood that multiple passes of the data set will be necessary, but
00123     // also increase the amount of memory used. If nBins is set to less than 1,000, it is
00124     // automatically increased to 1,000; there should be no reason to ever set nBins to be
00125     // this small.
00126     virtual AccumType getMedian(
00127         CountedPtr<uInt64> knownNpts=NULL, CountedPtr<AccumType> knownMin=NULL,
00128         CountedPtr<AccumType> knownMax=NULL, uInt binningThreshholdSizeBytes=4096*4096,
00129         Bool persistSortedArray=False, uInt64 nBins=10000
00130     );
00131 
00132     // If one needs to compute both the median and quantile values, it is better to call
00133     // getMedianAndQuantiles() rather than getMedian() and getQuantiles() separately, as the
00134     // first will scan large data sets fewer times than calling the separate methods.
00135     // The return value is the median; the quantiles are returned in the <src>quantiles</src> map.
00136     // Values in the <src>fractions</src> set represent the locations in the CDF and should be
00137     // between 0 and 1, exclusive.
00138     virtual AccumType getMedianAndQuantiles(
00139         std::map<Double, AccumType>& quantiles, const std::set<Double>& fractions,
00140         CountedPtr<uInt64> knownNpts=NULL, CountedPtr<AccumType> knownMin=NULL,
00141         CountedPtr<AccumType> knownMax=NULL,
00142         uInt binningThreshholdSizeBytes=4096*4096, Bool persistSortedArray=False,
00143         uInt64 nBins=10000
00144     );
00145 
00146     // get the median of the absolute deviation about the median of the data.
00147     virtual AccumType getMedianAbsDevMed(
00148         CountedPtr<uInt64> knownNpts=NULL,
00149         CountedPtr<AccumType> knownMin=NULL, CountedPtr<AccumType> knownMax=NULL,
00150         uInt binningThreshholdSizeBytes=4096*4096, Bool persistSortedArray=False,
00151         uInt64 nBins=10000
00152     );
00153 
00154     // Get the specified quantiles. <src>fractions</src> must be between 0 and 1,
00155     // noninclusive.
00156     virtual std::map<Double, AccumType> getQuantiles(
00157         const std::set<Double>& fractions, CountedPtr<uInt64> knownNpts=NULL,
00158         CountedPtr<AccumType> knownMin=NULL, CountedPtr<AccumType> knownMax=NULL,
00159         uInt binningThreshholdSizeBytes=4096*4096, Bool persistSortedArray=False,
00160         uInt64 nBins=10000
00161     );
00162 
00163     // </group>
00164 
00165     // scan the dataset(s) that have been added, and find the min and max.
00166     // This method may be called even if setStatsToCaclulate has been called and
00167     // MAX and MIN has been excluded. If setCalculateAsAdded(True) has previously been
00168     // called after this object has been (re)initialized, an exception will be thrown.
00169     virtual void getMinMax(AccumType& mymin, AccumType& mymax);
00170 
00171     // scan the dataset(s) that have been added, and find the number of good points.
00172     // This method may be called even if setStatsToCaclulate has been called and
00173     // NPTS has been excluded. If setCalculateAsAdded(True) has previously been
00174     // called after this object has been (re)initialized, an exception will be thrown.
00175     virtual uInt64 getNPts();
00176 
00177     // see base class description
00178     virtual std::pair<Int64, Int64> getStatisticIndex(StatisticsData::STATS stat);
00179 
00180     // Has any data been added to this object? Will return False if the object has
00181     // been reset and no data have been added afterward.
00182     Bool hasData() const  { return _hasData; }
00183 
00184     // reset object to initial state. Clears all private fields including data,
00185     // accumulators, etc.
00186     virtual void reset();
00187 
00188     // Should statistics be updated with calls to addData or should they only be calculated
00189     // upon calls to getStatistics etc? Beware that calling this will automatically reinitialize
00190     // the object, so that it will contain no references to data et al. after this method has
00191     // been called.
00192     virtual void setCalculateAsAdded(Bool c);
00193 
00194     // An exception will be thrown if setCalculateAsAdded(True) has been called.
00195     void setDataProvider(StatsDataProvider<AccumType, DataIterator, MaskIterator, WeightsIterator> *dataProvider);
00196 
00197     void setStatsToCalculate(std::set<StatisticsData::STATS>& stats);
00198 
00199 protected:
00200 
00201     // <group>
00202     // scan through the data set to determine the number of good (unmasked, weight > 0,
00203     // within range) points. The first with no mask, no
00204     // ranges, and no weights is trivial with npts = nr in this class, but is implemented here
00205     // so that derived classes may override it.
00206     inline virtual void _accumNpts(
00207         uInt64& npts,
00208         const DataIterator& dataBegin, Int64 nr, uInt dataStride
00209     ) const;
00210 
00211     virtual void _accumNpts(
00212         uInt64& npts,
00213         const DataIterator& dataBegin, Int64 nr, uInt dataStride,
00214         const DataRanges& ranges, Bool isInclude
00215     ) const;
00216 
00217     virtual void _accumNpts(
00218         uInt64& npts,
00219         const DataIterator& dataBegin, Int64 nr, uInt dataStride,
00220         const MaskIterator& maskBegin, uInt maskStride
00221     ) const;
00222 
00223     virtual void _accumNpts(
00224         uInt64& npts,
00225         const DataIterator& dataBegin, Int64 nr, uInt dataStride,
00226         const MaskIterator& maskBegin, uInt maskStride, const DataRanges& ranges,
00227         Bool isInclude
00228     ) const;
00229 
00230     virtual void _accumNpts(
00231         uInt64& npts,
00232         const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
00233         Int64 nr, uInt dataStride
00234     ) const;
00235 
00236     virtual void _accumNpts(
00237         uInt64& npts,
00238         const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
00239         Int64 nr, uInt dataStride, const DataRanges& ranges, Bool isInclude
00240     ) const;
00241 
00242     virtual void _accumNpts(
00243         uInt64& npts,
00244         const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
00245         Int64 nr, uInt dataStride, const MaskIterator& maskBegin, uInt maskStride,
00246         const DataRanges& ranges, Bool isInclude
00247     ) const;
00248 
00249     virtual void _accumNpts(
00250             uInt64& npts,
00251         const DataIterator& dataBegin, const WeightsIterator& weightBegin,
00252         Int64 nr, uInt dataStride, const MaskIterator& maskBegin, uInt maskStride
00253     ) const;
00254     // </group>
00255 
00256     // <group>
00257     inline void _accumulate(
00258         StatsData<AccumType>& stats, const AccumType& datum,
00259         const LocationType& location
00260     );
00261  
00262     inline void _accumulate(
00263         StatsData<AccumType>& stats, const AccumType& datum,
00264         const AccumType& weight, const LocationType& location
00265     );
00266     // </group>
00267 
00268     void _addData();
00269 
00270     void _clearData();
00271 
00272     void _clearStats();
00273 
00274     // scan dataset(s) to find min and max
00275     void _doMinMax(AccumType& vmin, AccumType& vmax);
00276 
00277     // <group>
00278     // Get the counts of data within the specified histogram bins. The number of
00279     // arrays within binCounts will be equal to the number of histograms in <src>binDesc</src>.
00280     // Each array within <src>binCounts</src> will have the same number of elements as the
00281     // number of bins in its corresponding histogram in <src>binDesc</src>.
00282     virtual void _findBins(
00283         vector<vector<uInt64> >& binCounts,
00284         vector<CountedPtr<AccumType> >& sameVal, vector<Bool>& allSame,
00285         const DataIterator& dataBegin, Int64 nr, uInt dataStride,
00286         const vector<typename StatisticsUtilities<AccumType>::BinDesc>& binDesc,
00287         const vector<AccumType>& maxLimit
00288     ) const;
00289 
00290     virtual void _findBins(
00291         vector<vector<uInt64> >& binCounts,
00292         vector<CountedPtr<AccumType> >& sameVal, vector<Bool>& allSame,
00293         const DataIterator& dataBegin, Int64 nr, uInt dataStride,
00294         const DataRanges& ranges, Bool isInclude,
00295         const vector<typename StatisticsUtilities<AccumType>::BinDesc>& binDesc, const vector<AccumType>& maxLimit
00296     ) const;
00297 
00298     virtual void _findBins(
00299         vector<vector<uInt64> >& binCounts,
00300         vector<CountedPtr<AccumType> >& sameVal, vector<Bool>& allSame,
00301         const DataIterator& dataBegin, Int64 nr, uInt dataStride,
00302         const MaskIterator& maskBegin, uInt maskStride,
00303         const vector<typename StatisticsUtilities<AccumType>::BinDesc>& binDesc, const vector<AccumType>& maxLimit
00304     ) const;
00305 
00306     virtual void _findBins(
00307         vector<vector<uInt64> >& binCounts,
00308         vector<CountedPtr<AccumType> >& sameVal, vector<Bool>& allSame,
00309         const DataIterator& dataBegin, Int64 nr, uInt dataStride,
00310         const MaskIterator& maskBegin, uInt maskStride, const DataRanges& ranges,
00311         Bool isInclude,
00312         const vector<typename StatisticsUtilities<AccumType>::BinDesc>& binDesc, const vector<AccumType>& maxLimit
00313     ) const;
00314 
00315     virtual void _findBins(
00316         vector<vector<uInt64> >& binCounts,
00317         vector<CountedPtr<AccumType> >& sameVal, vector<Bool>& allSame,
00318         const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
00319         Int64 nr, uInt dataStride,
00320         const vector<typename StatisticsUtilities<AccumType>::BinDesc>& binDesc, const vector<AccumType>& maxLimit
00321     ) const ;
00322 
00323     virtual void _findBins(
00324         vector<vector<uInt64> >& binCounts,
00325         vector<CountedPtr<AccumType> >& sameVal, vector<Bool>& allSame,
00326         const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
00327         Int64 nr, uInt dataStride, const DataRanges& ranges, Bool isInclude,
00328         const vector<typename StatisticsUtilities<AccumType>::BinDesc>& binDesc, const vector<AccumType>& maxLimit
00329     ) const;
00330 
00331     virtual void _findBins(
00332         vector<vector<uInt64> >& binCounts,
00333         vector<CountedPtr<AccumType> >& sameVal, vector<Bool>& allSame,
00334         const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
00335         Int64 nr, uInt dataStride, const MaskIterator& maskBegin, uInt maskStride,
00336         const DataRanges& ranges, Bool isInclude,
00337         const vector<typename StatisticsUtilities<AccumType>::BinDesc>& binDesc, const vector<AccumType>& maxLimit
00338     ) const;
00339 
00340     virtual void _findBins(
00341         vector<vector<uInt64> >& binCounts,
00342         vector<CountedPtr<AccumType> >& sameVal, vector<Bool>& allSame,
00343         const DataIterator& dataBegin, const WeightsIterator& weightBegin,
00344         Int64 nr, uInt dataStride, const MaskIterator& maskBegin, uInt maskStride,
00345         const vector<typename StatisticsUtilities<AccumType>::BinDesc>& binDesc, const vector<AccumType>& maxLimit
00346     ) const;
00347     // </group>
00348 
00349     Bool _getDoMaxMin() const { return _doMaxMin; }
00350 
00351     Bool _getIDataset() const { return _idataset; }
00352 
00353     virtual StatsData<AccumType> _getInitialStats() const;
00354     
00355     AccumType _getStatistic(StatisticsData::STATS stat);
00356 
00357     StatsData<AccumType> _getStatistics();
00358 
00359     // retreive stats structure. Allows derived classes to maintain their own
00360     // StatsData structs.
00361     inline virtual StatsData<AccumType>& _getStatsData() { return _statsData; }
00362 
00363     inline virtual const StatsData<AccumType>& _getStatsData() const { return _statsData; }
00364     
00365     // <group>
00366     virtual void _minMax(
00367         CountedPtr<AccumType>& mymin, CountedPtr<AccumType>& mymax,
00368         const DataIterator& dataBegin, Int64 nr, uInt dataStride
00369     ) const;
00370 
00371     virtual void _minMax(
00372         CountedPtr<AccumType>& mymin, CountedPtr<AccumType>& mymax,
00373         const DataIterator& dataBegin, Int64 nr, uInt dataStride,
00374         const DataRanges& ranges, Bool isInclude
00375     ) const;
00376 
00377     virtual void _minMax(
00378         CountedPtr<AccumType>& mymin, CountedPtr<AccumType>& mymax,
00379         const DataIterator& dataBegin, Int64 nr, uInt dataStride,
00380         const MaskIterator& maskBegin, uInt maskStride
00381     ) const;
00382 
00383     virtual void _minMax(
00384         CountedPtr<AccumType>& mymin, CountedPtr<AccumType>& mymax,
00385         const DataIterator& dataBegin, Int64 nr, uInt dataStride,
00386         const MaskIterator& maskBegin, uInt maskStride, const DataRanges& ranges,
00387         Bool isInclude
00388     ) const;
00389 
00390     virtual void _minMax(
00391         CountedPtr<AccumType>& mymin, CountedPtr<AccumType>& mymax,
00392         const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
00393         Int64 nr, uInt dataStride
00394     ) const;
00395 
00396     virtual void _minMax(
00397         CountedPtr<AccumType>& mymin, CountedPtr<AccumType>& mymax,
00398         const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
00399         Int64 nr, uInt dataStride, const DataRanges& ranges, Bool isInclude
00400     ) const;
00401 
00402     virtual void _minMax(
00403         CountedPtr<AccumType>& mymin, CountedPtr<AccumType>& mymax,
00404         const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
00405         Int64 nr, uInt dataStride, const MaskIterator& maskBegin, uInt maskStride,
00406         const DataRanges& ranges, Bool isInclude
00407     ) const;
00408 
00409     virtual void _minMax(
00410         CountedPtr<AccumType>& mymin, CountedPtr<AccumType>& mymax,
00411         const DataIterator& dataBegin, const WeightsIterator& weightBegin,
00412         Int64 nr, uInt dataStride, const MaskIterator& maskBegin, uInt maskStride
00413     ) const;
00414     // </group>
00415 
00416     //<group>
00417     // populate an unsorted array with valid data.
00418     // no weights, no mask, no ranges
00419     virtual void _populateArray(
00420         vector<AccumType>& ary, const DataIterator& dataBegin, Int64 nr, uInt dataStride
00421     ) const;
00422 
00423     // ranges
00424     virtual void _populateArray(
00425         vector<AccumType>& ary, const DataIterator& dataBegin, Int64 nr,
00426         uInt dataStride, const DataRanges& ranges, Bool isInclude
00427     ) const;
00428 
00429     virtual void _populateArray(
00430         vector<AccumType>& ary, const DataIterator& dataBegin,
00431         Int64 nr, uInt dataStride, const MaskIterator& maskBegin,
00432         uInt maskStride
00433     ) const;
00434 
00435     // mask and ranges
00436     virtual void _populateArray(
00437         vector<AccumType>& ary, const DataIterator& dataBegin, Int64 nr,
00438         uInt dataStride, const MaskIterator& maskBegin, uInt maskStride,
00439         const DataRanges& ranges, Bool isInclude
00440     ) const;
00441 
00442     // weights
00443     virtual void _populateArray(
00444         vector<AccumType>& ary, const DataIterator& dataBegin,
00445         const WeightsIterator& weightsBegin, Int64 nr, uInt dataStride
00446     ) const;
00447 
00448     // weights and ranges
00449     virtual void _populateArray(
00450         vector<AccumType>& ary, const DataIterator& dataBegin,
00451         const WeightsIterator& weightsBegin, Int64 nr, uInt dataStride,
00452         const DataRanges& ranges, Bool isInclude
00453     ) const;
00454 
00455     // weights and mask
00456     virtual void _populateArray(
00457         vector<AccumType>& ary, const DataIterator& dataBegin,
00458         const WeightsIterator& weightBegin, Int64 nr, uInt dataStride,
00459         const MaskIterator& maskBegin, uInt maskStride
00460     ) const;
00461 
00462     // weights, mask, ranges
00463     virtual void _populateArray(
00464         vector<AccumType>& ary, const DataIterator& dataBegin, const WeightsIterator& weightBegin,
00465         Int64 nr, uInt dataStride, const MaskIterator& maskBegin, uInt maskStride,
00466         const DataRanges& ranges, Bool isInclude
00467     ) const;
00468     // </group>
00469 
00470     // <group>
00471     // Create a vector of unsorted arrays, one array for each bin defined by <src>includeLimits</src>.
00472     // <src>includeLimits</src> should be non-overlapping and should be given in ascending order (the
00473     // algorithm used assumes this). Once the sum of the lengths of all arrays equals <src>maxCount</src>
00474     // the method will return with no further processing.
00475     // no weights, no mask, no ranges
00476     virtual void _populateArrays(
00477         vector<vector<AccumType> >& arys, uInt64& currentCount, const DataIterator& dataBegin, Int64 nr, uInt dataStride,
00478         const vector<std::pair<AccumType, AccumType> > &includeLimits, uInt64 maxCount
00479     ) const;
00480 
00481     // ranges
00482     virtual void _populateArrays(
00483         vector<vector<AccumType> >& arys, uInt64& currentCount, const DataIterator& dataBegin, Int64 nr,
00484         uInt dataStride, const DataRanges& ranges, Bool isInclude,
00485         const vector<std::pair<AccumType, AccumType> > &includeLimits, uInt64 maxCount
00486     ) const;
00487 
00488     virtual void _populateArrays(
00489         vector<vector<AccumType> >& arys, uInt64& currentCount, const DataIterator& dataBegin,
00490         Int64 nr, uInt dataStride, const MaskIterator& maskBegin,
00491         uInt maskStride,
00492         const vector<std::pair<AccumType, AccumType> > &includeLimits, uInt64 maxCount
00493     ) const;
00494 
00495     // mask and ranges
00496     virtual void _populateArrays(
00497         vector<vector<AccumType> >& arys, uInt64& currentCount, const DataIterator& dataBegin, Int64 nr,
00498         uInt dataStride, const MaskIterator& maskBegin, uInt maskStride,
00499         const DataRanges& ranges, Bool isInclude,
00500         const vector<std::pair<AccumType, AccumType> > &includeLimits, uInt64 maxCount
00501     ) const;
00502 
00503     // weights
00504     virtual void _populateArrays(
00505         vector<vector<AccumType> >& arys, uInt64& currentCount, const DataIterator& dataBegin,
00506         const WeightsIterator& weightsBegin, Int64 nr, uInt dataStride,
00507         const vector<std::pair<AccumType, AccumType> > &includeLimits, uInt64 maxCount
00508     ) const;
00509 
00510     // weights and ranges
00511     virtual void _populateArrays(
00512         vector<vector<AccumType> >& arys, uInt64& currentCount, const DataIterator& dataBegin,
00513         const WeightsIterator& weightsBegin, Int64 nr, uInt dataStride,
00514         const DataRanges& ranges, Bool isInclude,
00515         const vector<std::pair<AccumType, AccumType> > &includeLimits, uInt64 maxCount
00516     ) const;
00517 
00518     // weights and mask
00519     virtual void _populateArrays(
00520         vector<vector<AccumType> >& arys, uInt64& currentCount, const DataIterator& dataBegin,
00521         const WeightsIterator& weightBegin, Int64 nr, uInt dataStride,
00522         const MaskIterator& maskBegin, uInt maskStride,
00523         const vector<std::pair<AccumType, AccumType> > &includeLimits, uInt64 maxCount
00524     ) const;
00525 
00526     // weights, mask, ranges
00527     virtual void _populateArrays(
00528         vector<vector<AccumType> >& arys, uInt64& currentCount, const DataIterator& dataBegin, const WeightsIterator& weightBegin,
00529         Int64 nr, uInt dataStride, const MaskIterator& maskBegin, uInt maskStride,
00530         const DataRanges& ranges, Bool isInclude,
00531         const vector<std::pair<AccumType, AccumType> > &includeLimits, uInt64 maxCount
00532     ) const;
00533     // </group>
00534 
00535     // <group>
00536     // no weights, no mask, no ranges
00537     virtual Bool _populateTestArray(
00538         vector<AccumType>& ary, const DataIterator& dataBegin,
00539         Int64 nr, uInt dataStride, uInt maxElements
00540     ) const;
00541 
00542     // ranges
00543     virtual Bool _populateTestArray(
00544         vector<AccumType>& ary, const DataIterator& dataBegin, Int64 nr,
00545         uInt dataStride, const DataRanges& ranges, Bool isInclude,
00546         uInt maxElements
00547     ) const;
00548 
00549     // mask
00550     virtual Bool _populateTestArray(
00551         vector<AccumType>& ary, const DataIterator& dataBegin,
00552         Int64 nr, uInt dataStride, const MaskIterator& maskBegin,
00553         uInt maskStride, uInt maxElements
00554     ) const;
00555 
00556     // mask and ranges
00557     virtual Bool _populateTestArray(
00558         vector<AccumType>& ary, const DataIterator& dataBegin, Int64 nr,
00559         uInt dataStride, const MaskIterator& maskBegin, uInt maskStride,
00560         const DataRanges& ranges, Bool isInclude, uInt maxElements
00561     ) const;
00562 
00563     // weights
00564     virtual Bool _populateTestArray(
00565         vector<AccumType>& ary, const DataIterator& dataBegin,
00566         const WeightsIterator& weightBegin, Int64 nr, uInt dataStride,
00567         uInt maxElements
00568     ) const;
00569 
00570     // weights and ranges
00571     virtual Bool _populateTestArray(
00572         vector<AccumType>& ary, const DataIterator& dataBegin,
00573         const WeightsIterator& weightsBegin, Int64 nr, uInt dataStride,
00574         const DataRanges& ranges, Bool isInclude, uInt maxElements
00575     ) const;
00576 
00577     // weights and mask
00578     virtual Bool _populateTestArray(
00579         vector<AccumType>& ary, const DataIterator& dataBegin,
00580         const WeightsIterator& weightBegin, Int64 nr,
00581         uInt dataStride, const MaskIterator& maskBegin,
00582         uInt maskStride, uInt maxElements
00583     ) const;
00584 
00585     // weights, mask, ranges
00586     virtual Bool _populateTestArray(
00587         vector<AccumType>& ary, const DataIterator& dataBegin, const WeightsIterator& weightBegin,
00588         Int64 nr, uInt dataStride, const MaskIterator& maskBegin, uInt maskStride,
00589         const DataRanges& ranges, Bool isInclude,
00590         uInt maxElements
00591     ) const;
00592     // </group>
00593    
00594     // <group>
00595     // no weights, no mask, no ranges
00596     virtual void _unweightedStats(
00597         StatsData<AccumType>& stats, uInt64& ngood, LocationType& location,
00598         const DataIterator& dataBegin, Int64 nr, uInt dataStride
00599     );
00600 
00601     // no weights, no mask
00602     virtual void _unweightedStats(
00603         StatsData<AccumType>& stats, uInt64& ngood, LocationType& location,
00604         const DataIterator& dataBegin, Int64 nr, uInt dataStride,
00605         const DataRanges& ranges, Bool isInclude
00606     );
00607 
00608     virtual void _unweightedStats(
00609         StatsData<AccumType>& stats, uInt64& ngood, LocationType& location,
00610         const DataIterator& dataBegin, Int64 nr, uInt dataStride,
00611         const MaskIterator& maskBegin, uInt maskStride
00612     );
00613 
00614     virtual void _unweightedStats(
00615         StatsData<AccumType>& stats, uInt64& ngood, LocationType& location,
00616         const DataIterator& dataBegin, Int64 nr, uInt dataStride,
00617         const MaskIterator& maskBegin, uInt maskStride,
00618         const DataRanges& ranges, Bool isInclude
00619     );
00620 
00621     // </group>
00622     virtual void _updateDataProviderMaxMin(
00623         const StatsData<AccumType>& threadStats
00624     );
00625 
00626     // <group>
00627     // has weights, but no mask, no ranges
00628     virtual void _weightedStats(
00629         StatsData<AccumType>& stats, LocationType& location,
00630         const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
00631         Int64 nr, uInt dataStride
00632     );
00633 
00634     virtual void _weightedStats(
00635         StatsData<AccumType>& stats, LocationType& location,
00636         const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
00637         Int64 nr, uInt dataStride, const DataRanges& ranges, Bool isInclude
00638     );
00639 
00640     virtual void _weightedStats(
00641         StatsData<AccumType>& stats, LocationType& location,
00642         const DataIterator& dataBegin, const WeightsIterator& weightBegin,
00643         Int64 nr, uInt dataStride, const MaskIterator& maskBegin, uInt maskStride
00644     );
00645 
00646     virtual void _weightedStats(
00647         StatsData<AccumType>& stats, LocationType& location,
00648         const DataIterator& dataBegin, const WeightsIterator& weightBegin,
00649         Int64 nr, uInt dataStride, const MaskIterator& maskBegin, uInt maskStride,
00650         const DataRanges& ranges, Bool isInclude
00651     );
00652     // </group>
00653 
00654 private:
00655     StatsData<AccumType> _statsData;
00656     Int64 _idataset;
00657     Bool _calculateAsAdded, _doMaxMin, _doMedAbsDevMed, _mustAccumulate,
00658         _hasData;
00659 
00660     // mutables, used to mitigate repeated code
00661     mutable typename vector<DataIterator>::const_iterator _dend, _diter;
00662     mutable vector<Int64>::const_iterator _citer;
00663     mutable vector<uInt>::const_iterator _dsiter;
00664     mutable std::map<uInt, MaskIterator> _masks;
00665     mutable uInt _maskStride;
00666     mutable std::map<uInt, WeightsIterator> _weights;
00667     mutable std::map<uInt, DataRanges> _ranges;
00668     mutable std::map<uInt, Bool> _isIncludeRanges;
00669     mutable Bool _hasMask, _hasRanges, _hasWeights, _myIsInclude;
00670     mutable DataRanges _myRanges;
00671     mutable MaskIterator _myMask;
00672     mutable DataIterator _myData;
00673     mutable WeightsIterator _myWeights;
00674     mutable uInt _dataCount, _myStride;
00675     mutable uInt64 _myCount;
00676 
00677     static const uInt CACHE_PADDING;
00678     static const uInt BLOCK_SIZE;
00679 
00680     // tally the number of data points that fall into each bin provided by <src>binDesc</src>
00681     // Any points that are less than binDesc.minLimit or greater than
00682     // binDesc.minLimit + binDesc.nBins*binDesc.binWidth are not included in the counts. A data
00683     // point that falls exactly on a bin boundary is considered to be in the higher index bin.
00684     // <src>sameVal</src> will be non-null if all the good values in the histogram range are the
00685     // same. In that case, the value held will be the value of each of those data points.
00686     vector<vector<uInt64> > _binCounts(
00687         vector<CountedPtr<AccumType> >& sameVal,
00688         const vector<typename StatisticsUtilities<AccumType>::BinDesc>& binDesc
00689     );
00690 
00691     void _computeBins(
00692         vector<vector<uInt64> >& bins, vector<CountedPtr<AccumType> >& sameVal,
00693         vector<Bool>& allSame, DataIterator dataIter, MaskIterator maskIter,
00694         WeightsIterator weightsIter, uInt64 count,
00695         const vector<typename StatisticsUtilities<AccumType>::BinDesc>& binDesc,
00696         const vector<AccumType>& maxLimit
00697     );
00698 
00699     void _computeDataArray(
00700         vector<AccumType>& ary, DataIterator dataIter,
00701         MaskIterator maskIter, WeightsIterator weightsIter,
00702         uInt64 dataCount
00703     );
00704 
00705     void _computeDataArrays(
00706         vector<vector<AccumType> >& arys, uInt64& currentCount,
00707         DataIterator dataIter, MaskIterator maskIter,
00708         WeightsIterator weightsIter, uInt64 dataCount,
00709         const vector<std::pair<AccumType, AccumType> >& includeLimits,
00710         uInt64 maxCount
00711     );
00712 
00713     void _computeMinMax(
00714         CountedPtr<AccumType>& mymax, CountedPtr<AccumType>& mymin,
00715         DataIterator dataIter, MaskIterator maskIter,
00716         WeightsIterator weightsIter, uInt64 dataCount
00717     );
00718 
00719     void _computeStats(
00720         StatsData<AccumType>& stats, uInt64& ngood, LocationType& location,
00721         DataIterator dataIter, MaskIterator maskIter,
00722         WeightsIterator weightsIter, uInt64 count
00723     );
00724 
00725     // convert in place by taking the absolute value of the difference of the vector and the median
00726     static void _convertToAbsDevMedArray(vector<AccumType>& myArray, AccumType median);
00727 
00728     // Create an unsorted array of the complete data set. If <src>includeLimits</src> is specified,
00729     // only points within those limits (including min but excluding max, as per definition of bins),
00730     // are included.
00731     void _createDataArray(
00732         vector<AccumType>& array
00733     );
00734 
00735     void _createDataArrays(
00736         vector<vector<AccumType> >& arrays,
00737         const vector<std::pair<AccumType, AccumType> > &includeLimits,
00738         uInt64 maxCount
00739     );
00740     // extract data from multiple histograms given by <src>binDesc</src>.
00741     // <src>dataIndices</src> represent the indices of the sorted arrays of values to
00742     // extract. There should be exactly one set of data indices to extract for each
00743     // supplied histogram. The data indices are relative to the minimum value of the minimum
00744     // bin in their repsective histograms. The ordering of the maps in the returned vector represent
00745     // the ordering of histograms in <src>binDesc</src>. <src>binDesc</src> should contain
00746     // non-overlapping histograms and the histograms should be specified in ascending order.
00747     vector<std::map<uInt64, AccumType> > _dataFromMultipleBins(
00748         const vector<typename StatisticsUtilities<AccumType>::BinDesc>& binDesc, uInt64 maxArraySize,
00749         const vector<std::set<uInt64> >& dataIndices, uInt64 nBins
00750     );
00751 
00752     vector<std::map<uInt64, AccumType> > _dataFromSingleBins(
00753         const vector<uInt64>& binNpts, uInt64 maxArraySize,
00754         const vector<std::pair<AccumType, AccumType> >& binLimits,
00755         const vector<std::set<uInt64> >& dataIndices, uInt64 nBins
00756     );
00757 
00758     Int64 _doNpts();
00759 
00760     // increment the relevant loop counters
00761     Bool _increment(Bool includeIDataset);
00762 
00763     // increment thread-based iterators
00764     void _incrementThreadIters(
00765         DataIterator& dataIter, MaskIterator& maskIter,
00766         WeightsIterator& weightsIter, uInt64& offset, uInt nthreads
00767     ) const;
00768 
00769     // get the values for the specified indices in the sorted array of all good data
00770     std::map<uInt64, AccumType> _indicesToValues(
00771         CountedPtr<uInt64> knownNpts, CountedPtr<AccumType> knownMin,
00772         CountedPtr<AccumType> knownMax, uInt64 maxArraySize,
00773         const std::set<uInt64>& dataIndices, Bool persistSortedArray,
00774         uInt64 nBins
00775     );
00776     
00777     void _initIterators();
00778 
00779     void _initLoopVars();
00780 
00781     void _initThreadVars(
00782         uInt& nBlocks, uInt64& extra, uInt& nthreads, PtrHolder<DataIterator>& dataIter,
00783         PtrHolder<MaskIterator>& maskIter, PtrHolder<WeightsIterator>& weightsIter,
00784         PtrHolder<uInt64>& offset, uInt nThreadsMax
00785     ) const;
00786 
00787     // Determine by scanning the dataset if the number of good points is smaller than
00788     // <src>maxArraySize</src>. If so, <src>arrayToSort</src> will contain the unsorted
00789     // data values. If not, this vector will be empty.
00790     Bool _isNptsSmallerThan(vector<AccumType>& arrayToSort, uInt maxArraySize);
00791 
00792     // If <src>allowPad</src> is True, then pad the lower side of the lowest bin and the
00793     // higher side of the highest bin so that minData and maxData do not fall on the edge
00794     // of their respective bins. If false, no padding so that minData and maxData are also
00795     // exactly the histogram abscissa limits.
00796     static void _makeBins(
00797         typename StatisticsUtilities<AccumType>::BinDesc& bins, AccumType minData, AccumType maxData, uInt maxBins,
00798         Bool allowPad
00799     );
00800 
00801     static void _mergeResults(
00802         vector<vector<uInt64> >& bins, vector<CountedPtr<AccumType> >& sameVal,
00803         vector<Bool>& allSame, const PtrHolder<vector<vector<uInt64> > >& tBins,
00804         const PtrHolder<vector<CountedPtr<AccumType> > >& tSameVal,
00805         const PtrHolder<vector<Bool> >& tAllSame, uInt nThreadsMax
00806     );
00807 
00808     // get the index (for odd npts) or indices (for even npts) of the median of the sorted array.
00809     // If knownNpts is not null, it will be used and must be correct. If it is null, the value of
00810     // _npts will be used if it has been previously calculated. If not, the data sets will
00811     // be scanned to determine npts.
00812     std::set<uInt64> _medianIndices(CountedPtr<uInt64> knownNpts);
00813 
00814     uInt _nThreadsMax() const;
00815 
00816     uInt _threadIdx() const;
00817 
00818     // get values from sorted array if the array is small enough to be held in
00819     // memory. Note that this is the array containing all good data, not data in
00820     // just a single bin representing a subset of good data.
00821     // Returns True if the data were successfully retrieved.
00822     // If True is returned, the values map will contain a map of index to value.
00823     Bool _valuesFromSortedArray(
00824         std::map<uInt64, AccumType>& values, CountedPtr<uInt64> knownNpts,
00825         const std::set<uInt64>& indices, uInt64 maxArraySize,
00826         Bool persistSortedArray
00827     );
00828 };
00829 
00830 }
00831 
00832 #ifndef CASACORE_NO_AUTO_TEMPLATES
00833 #include <casacore/scimath/Mathematics/ClassicalStatistics.tcc>
00834 #endif //# CASACORE_NO_AUTO_TEMPLATES
00835 
00836 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

Generated on 31 Aug 2016 for casa by  doxygen 1.6.1