FitToHalfStatistics.h

Go to the documentation of this file.
00001 //# Copyright (C) 2000,2001
00002 //# Associated Universities, Inc. Washington DC, USA.
00003 //#
00004 //# This library is free software; you can redistribute it and/or modify it
00005 //# under the terms of the GNU Library General Public License as published by
00006 //# the Free Software Foundation; either version 2 of the License, or (at your
00007 //# option) any later version.
00008 //#
00009 //# This library is distributed in the hope that it will be useful, but WITHOUT
00010 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
00011 //# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
00012 //# License for more details.
00013 //#
00014 //# You should have received a copy of the GNU Library General Public License
00015 //# along with this library; if not, write to the Free Software Foundation,
00016 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
00017 //#
00018 //# Correspondence concerning AIPS++ should be addressed as follows:
00019 //#        Internet email: aips2-request@nrao.edu.
00020 //#        Postal address: AIPS++ Project Office
00021 //#                        National Radio Astronomy Observatory
00022 //#                        520 Edgemont Road
00023 //#                        Charlottesville, VA 22903-2475 USA
00024 //#
00025 //# $Id: Array.h 21545 2015-01-22 19:36:35Z gervandiepen $
00026 
00027 #ifndef SCIMATH_FITTOHALFSTATISTICS_H
00028 #define SCIMATH_FITTOHALFSTATISTICS_H
00029 
00030 #include <casacore/casa/aips.h>
00031 
00032 #include <casacore/scimath/Mathematics/ConstrainedRangeStatistics.h>
00033 #include <casacore/scimath/Mathematics/FitToHalfStatisticsData.h>
00034 
00035 namespace casacore {
00036 
00037 // Class to calculate statistics using the so-called fit to half algorithm. In this
00038 // algorithm, a center value is specified, and only points greater or equal or less or equal
00039 // this value are included. Furthermore, each of the included points is reflected about
00040 // the center value, and these virtual points are added to the included points and
00041 // the union of sets of included real points and virtual points are used for computing statistics.
00042 // The specified center point is therefore the mean and median of the resulting
00043 // distribution, and the total number of points is exactly twice the number of real
00044 // data points that are included.
00045 
00046 template <class AccumType, class DataIterator, class MaskIterator=const Bool *, class WeightsIterator=DataIterator>
00047 class FitToHalfStatistics
00048     : public ConstrainedRangeStatistics<CASA_STATP> {
00049 public:
00050 
00051     const static AccumType TWO;
00052 
00053     // <src>value</src> is only used if <src>center</src>=CVALUE
00054     FitToHalfStatistics(
00055         FitToHalfStatisticsData::CENTER center=FitToHalfStatisticsData::CMEAN,
00056         FitToHalfStatisticsData::USE_DATA useData=FitToHalfStatisticsData::LE_CENTER,
00057         AccumType value=0
00058     );
00059 
00060     virtual ~FitToHalfStatistics();
00061 
00062     // copy semantics
00063     FitToHalfStatistics<CASA_STATP>& operator=(
00064         const FitToHalfStatistics<CASA_STATP>& other
00065     );
00066 
00067     // get the algorithm that this object uses for computing stats
00068     virtual StatisticsData::ALGORITHM algorithm() const {
00069         return StatisticsData::FITTOHALF;
00070     };
00071 
00072     // The median is just the center value, so none of the parameters to this method are used.
00073     AccumType getMedian(
00074         CountedPtr<uInt64> knownNpts=NULL, CountedPtr<AccumType> knownMin=NULL,
00075         CountedPtr<AccumType> knownMax=NULL, uInt binningThreshholdSizeBytes=4096*4096,
00076         Bool persistSortedArray=False, uInt64 nBins=10000
00077     );
00078 
00079     // <group>
00080     // In the following group of methods, if the size of the composite dataset
00081     // is smaller than
00082     // <src>binningThreshholdSizeBytes</src>, the composite dataset
00083     // will be (perhaps partially) sorted and persisted in memory during the
00084     // call. In that case, and if <src>persistSortedArray</src> is True, this
00085     // sorted array will remain in memory after the call and will be used on
00086     // subsequent calls of this method when <src>binningThreshholdSizeBytes</src>
00087     // is greater than the size of the composite dataset. If
00088     // <src>persistSortedArray</src> is False, the sorted array will not be
00089     // stored after this call completes and so any subsequent calls for which the
00090     // dataset size is less than <src>binningThreshholdSizeBytes</src>, the
00091     // dataset will be sorted from scratch. Values which are not included due to
00092     // non-unity strides, are not included in any specified ranges, are masked,
00093     // or have associated weights of zero are not considered as dataset members
00094     // for quantile computations.
00095     // If one has a priori information regarding
00096     // the number of points (npts) and/or the minimum and maximum values of the data
00097     // set, these can be supplied to improve performance. Note however, that if these
00098     // values are not correct, the resulting median
00099     // and/or quantile values will also not be correct (although see the following notes regarding
00100     // max/min). Note that if this object has already had getStatistics()
00101     // called, and the min and max were calculated, there is no need to pass these values in
00102     // as they have been stored internally and used (although passing them in shouldn't hurt
00103     // anything). If provided, npts, the number of points falling in the specified ranges which are
00104     // not masked and have weights > 0, should be exactly correct. <src>min</src> can be less than
00105     // the true minimum, and <src>max</src> can be greater than the True maximum, but for best
00106     // performance, these should be as close to the actual min and max as possible.
00107 
00108     AccumType getMedianAndQuantiles(
00109         std::map<Double, AccumType>& quantiles, const std::set<Double>& fractions,
00110         CountedPtr<uInt64> knownNpts=NULL, CountedPtr<AccumType> knownMin=NULL,
00111         CountedPtr<AccumType> knownMax=NULL,
00112         uInt binningThreshholdSizeBytes=4096*4096, Bool persistSortedArray=False,
00113         uInt64 nBins=10000
00114     );
00115 
00116     // get the median of the absolute deviation about the median of the data.
00117     AccumType getMedianAbsDevMed(
00118         CountedPtr<uInt64> knownNpts=NULL,
00119         CountedPtr<AccumType> knownMin=NULL, CountedPtr<AccumType> knownMax=NULL,
00120         uInt binningThreshholdSizeBytes=4096*4096, Bool persistSortedArray=False,
00121         uInt64 nBins=10000
00122     );
00123 
00124     // Get the specified quantiles. <src>fractions</src> must be between 0 and 1,
00125     // noninclusive.
00126     std::map<Double, AccumType> getQuantiles(
00127         const std::set<Double>& fractions, CountedPtr<uInt64> knownNpts=NULL,
00128         CountedPtr<AccumType> knownMin=NULL, CountedPtr<AccumType> knownMax=NULL,
00129         uInt binningThreshholdSizeBytes=4096*4096, Bool persistSortedArray=False,
00130         uInt64 nBins=10000
00131     );
00132     // </group>
00133 
00134     // scan the dataset(s) that have been added, and find the min and max.
00135     // This method may be called even if setStatsToCaclulate has been called and
00136     // MAX and MIN has been excluded.
00137     virtual void getMinMax(AccumType& mymin, AccumType& mymax);
00138 
00139     // scan the dataset(s) that have been added, and find the number of good points.
00140     // This method may be called even if setStatsToCaclulate has been called and
00141     // NPTS has been excluded. If setCalculateAsAdded(True) has previously been
00142     // called after this object has been (re)initialized, an exception will be thrown.
00143     uInt64 getNPts();
00144 
00145     // reset object to initial state. Clears all private fields including data,
00146     // accumulators, global range. It does not affect the fence factor (_f), which was
00147     // set at object construction.
00148     virtual void reset();
00149 
00150     // This class does not allow statistics to be calculated as datasets are added, so
00151     // an exception will be thrown if <src>c</src> is True.
00152     void setCalculateAsAdded(Bool c);
00153 
00154 protected:
00155 
00156     virtual void _clearData();
00157 
00158     virtual StatsData<AccumType> _getInitialStats() const;
00159 
00160     StatsData<AccumType> _getStatistics();
00161 
00162     inline StatsData<AccumType>& _getStatsData() { return _statsData; }
00163 
00164     inline const StatsData<AccumType>& _getStatsData() const { return _statsData; }
00165 
00166     // <group>
00167     // no weights, no mask, no ranges
00168     void _unweightedStats(
00169         StatsData<AccumType>& stats, uInt64& ngood, LocationType& location,
00170         const DataIterator& dataBegin, Int64 nr, uInt dataStride
00171     );
00172 
00173     // no weights, no mask
00174     void _unweightedStats(
00175         StatsData<AccumType>& stats, uInt64& ngood, LocationType& location,
00176         const DataIterator& dataBegin, Int64 nr, uInt dataStride,
00177         const DataRanges& ranges, Bool isInclude
00178     );
00179 
00180     void _unweightedStats(
00181         StatsData<AccumType>& stats, uInt64& ngood, LocationType& location,
00182         const DataIterator& dataBegin, Int64 nr, uInt dataStride,
00183         const MaskIterator& maskBegin, uInt maskStride
00184     );
00185 
00186     void _unweightedStats(
00187         StatsData<AccumType>& stats, uInt64& ngood, LocationType& location,
00188         const DataIterator& dataBegin, Int64 nr, uInt dataStride,
00189         const MaskIterator& maskBegin, uInt maskStride,
00190         const DataRanges& ranges, Bool isInclude
00191     );
00192     // </group>
00193 
00194     void _updateDataProviderMaxMin(
00195         const StatsData<AccumType>& threadStats
00196     );  
00197 
00198     
00199     // <group>
00200     // has weights, but no mask, no ranges
00201     void _weightedStats(
00202         StatsData<AccumType>& stats, LocationType& location,
00203         const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
00204         Int64 nr, uInt dataStride
00205     );
00206 
00207     void _weightedStats(
00208         StatsData<AccumType>& stats, LocationType& location,
00209         const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
00210         Int64 nr, uInt dataStride, const DataRanges& ranges, Bool isInclude
00211     );
00212 
00213     void _weightedStats(
00214         StatsData<AccumType>& stats, LocationType& location,
00215         const DataIterator& dataBegin, const WeightsIterator& weightBegin,
00216         Int64 nr, uInt dataStride, const MaskIterator& maskBegin, uInt maskStride
00217     );
00218 
00219     void _weightedStats(
00220         StatsData<AccumType>& stats, LocationType& location,
00221         const DataIterator& dataBegin, const WeightsIterator& weightBegin,
00222         Int64 nr, uInt dataStride, const MaskIterator& maskBegin, uInt maskStride,
00223         const DataRanges& ranges, Bool isInclude
00224     );
00225     // </group>
00226 
00227 private:
00228     FitToHalfStatisticsData::CENTER _centerType;
00229     Bool _useLower;
00230     AccumType _centerValue;
00231     StatsData<AccumType> _statsData;
00232     Bool _doMedAbsDevMed, _rangeIsSet;
00233     // these are the max and min for the real portion of the dataset
00234     CountedPtr<AccumType> _realMax, _realMin;
00235 
00236     void _getRealMinMax(
00237             CountedPtr<AccumType>& realMin, CountedPtr<AccumType>& realMax,
00238         CountedPtr<AccumType> knownMin, CountedPtr<AccumType> knownMax
00239     );
00240 
00241     void _setRange();
00242 };
00243 
00244 }
00245 
00246 #ifndef CASACORE_NO_AUTO_TEMPLATES
00247 #include <casacore/scimath/Mathematics/FitToHalfStatistics.tcc>
00248 #endif //# CASACORE_NO_AUTO_TEMPLATES
00249 
00250 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

Generated on 31 Aug 2016 for casa by  doxygen 1.6.1