LineFindingUtils.h

Go to the documentation of this file.
00001 //# --------------------------------------------------------------------
00002 //# LineFindingUtils.h: this defines utility functions of line finding
00003 //# --------------------------------------------------------------------
00004 //# Copyright (C) 2015
00005 //# National Astronomical Observatory of Japan
00006 //#
00007 //# This library is free software; you can redistribute it and/or modify it
00008 //# under the terms of the GNU Library General Public License as published by
00009 //# the Free Software Foundation; either version 2 of the License, or (at your
00010 //# option) any later version.
00011 //#
00012 //# This library is distributed in the hope that it will be useful, but WITHOUT
00013 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
00014 //# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
00015 //# License for more details.
00016 //#
00017 //# You should have received a copy of the GNU Library General Public License
00018 //# along with this library; if not, write to the Free Software Foundation,
00019 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
00020 //#
00021 //# Correspondence concerning AIPS++ should be addressed as follows:
00022 //#        Internet email: aips2-request@nrao.edu.
00023 //#        Postal address: AIPS++ Project Office
00024 //#                        National Radio Astronomy Observatory
00025 //#                        520 Edgemont Road
00026 //#                        Charlottesville, VA 22903-2475 USA
00027 //#
00028 //# $Id$
00029 #ifndef _CASA_LINE_FINDING_UTILS_H_
00030 #define _CASA_LINE_FINDING_UTILS_H_
00031 
00032 #include <iostream>
00033 #include <string>
00034 #include <list>
00035 #include <sstream>
00036 
00037 #include <casa/aipstype.h>
00038 #include <casa/Arrays/Vector.h>
00039 
00040 namespace casa { //# NAMESPACE CASA - BEGIN
00041 
00042 class LineFinderUtils {
00043 public:
00044         /*
00045          * Bin data and mask arrays.
00046          *
00047          * @param[in] num_in : the number of elements in in_data and in_mask
00048          * @param[in] in_data : an input data array to bin
00049          * @param[in] in_mask : an input mask array to bin
00050          * @param[in] bin_size : the number of channels to bin
00051          * @param[in] num_out : the number of elements in out_data and out_mask
00052          * @param[out] out_data : an array to store binned data
00053          * @param[out] out_mask : an array to store binned mask
00054          * @param[in] offset : the channel index in in_data and in_mask
00055          *         to start binning. offset should be 0 if keepsize=false.
00056          * @param[in] keepsize : if true, keep array size after binning.
00057          *         otherwise, the output arrays will have (num_in-offset+1)/bin_size channel.
00058          *         num_out should be equal to num_in when keepsize=true.
00059          */
00060         template<typename DataType>
00061         static size_t binDataAndMask(size_t const num_in,
00062                         DataType const in_data[/*num_in*/], bool const in_mask[/*num_in*/],
00063                         size_t const bin_size, size_t const num_out,
00064                         DataType out_data[/*num_out*/], bool out_mask[/*num_out*/],
00065                         size_t const offset = 0, bool const keepsize = false);
00066 
00067         /*
00068          * Calculate median absolute deviation (MAD) of a data array
00069          * Processes mad[i] = data[i] - (median of valid elements in data)
00070          *
00071          * @param[in] num_data : the number of elements in @a in_data ,
00072          *         @a in_mask , and @a mad.
00073          * @param[in] in_data : data array to calculate MAD
00074          * @param[in] in_mask : a boolean mask array. the elements
00075          *         with mask=false will be ignored in calculating median.
00076          * @param[out] mad : an array of MAD values
00077          */
00078         static void calculateMAD(size_t const num_data,
00079                         float const in_data[/*num_data*/], bool const in_mask[/*num_data*/],
00080                         float mad[/*num_data*/]);
00081 
00082         /*
00083          * Count the number of True elements in a boolean array.
00084          *
00085          * @param[in] num_data : the number of elements in @a data
00086          * @param[in] data : a boolean array
00087          * @return the number of elements with true in @a data
00088          */
00089         inline static size_t countTrue(size_t num_data,
00090                         bool const data[/*num_data*/]) {
00091                 size_t ntrue = 0;
00092                 static_assert(static_cast<uint8_t>(true)==1, "cast of bool failed");
00093                 static_assert(static_cast<uint8_t>(false)==0, "cast of bool failed");
00094                 uint8_t const *data8 = reinterpret_cast<uint8_t const *>(data);
00095                 static_assert(sizeof(data[0])==sizeof(data8[0]),
00096                                 "bool and uint8_t has different size");
00097                 for (size_t i = 0; i < num_data; ++i) {
00098                         ntrue += data8[i];
00099                 }
00100                 return ntrue;
00101         }
00102         ;
00103 
00104         /*
00105          * create mask by a threshold value
00106          * out_mask[i] = in_mask[i] && (in_data[i] >= threshold)
00107          *
00108          * @param[in] num_data : the number of elements in @a in_data ,
00109          *         @a in_mask , and @a out_mask
00110          * @param[in] in_data : a data array
00111          * @param[in] in_mask : a boolean mask array
00112          * @param[in] threshold : a threshold value to compare with @a in_data
00113          * @param[out] out_mask : an output mask array
00114          */
00115         static void createMaskByAThreshold(size_t const num_data,
00116                         float const in_data[/*num_data*/], bool const in_mask[/*num_data*/],
00117                         float const threshold, bool out_mask[/*num_data*/]);
00118         /*
00119          create mask by threshold value array
00120          out_mask[i] = in_mask[i] && (in_data[i] >= threshold_array[i])
00121          */
00122         /*     template <typename DataType>  */
00123         /*     static void createMaskByThresholds(size_t const num_data, */
00124         /*                                     DataType const in_data[/\*num_data*\/], */
00125         /*                                     bool const in_mask[/\*num_data*\/], */
00126         /*                                     DataType const threshold_array[/\*num_data*\/], */
00127         /*                                     bool out_mask[/\*num_data*\/]); */
00128 
00129         /*
00130          * create sign array by a threshold value
00131          * sign[i] = +1 (in_data[i] > threshold)
00132          *         = 0  (in_data[i] == threshold)
00133          *         = -1 (in_data[i] < threshold)
00134          *
00135          * @param[in] num_data : the number of elements of @a in_data and @a sign
00136          * @param[in] in_data : an input data array to calculate sign
00137          * @param[in] threshold : a threshold to calculate sign
00138          * @param[out] sign : an output array
00139          */
00140         template<typename DataType>
00141         inline static void createSignByAThreshold(size_t const num_data,
00142                         DataType const in_data[/*num_data*/], DataType const threshold,
00143                         int8_t sign[/*num_data*/]) {
00144                 for (size_t i = 0; i < num_data; ++i) {
00145                         sign[i] = signCompare(in_data[i], threshold);
00146                 }
00147         }
00148         ;
00149 
00150         /*
00151          * create sign array by data and threshold arrays
00152          * sign[i] = +1 (in_data[i] > threshold_array[i])
00153          *         = 0  (in_data[i] == threshold_array[i])
00154          *         = -1 (in_data[i] < threshold_array[i])
00155          *
00156          * @param[in] num_data : the number of elements of @a in_data ,
00157          *         @a threshod_array, and @a sign
00158          * @param[in] in_data : an input data array to calculate sign
00159          * @param[in] threshold_array : a threshold array to calculate sign
00160          * @param[out] sign : an output array
00161          */
00162         template<typename DataType>
00163         inline static void createSignByThresholds(size_t const num_data,
00164                         DataType const in_data[/*num_data*/],
00165                         DataType const threshold_array[/*num_data*/],
00166                         int8_t sign[/*num_data*/]) {
00167                 for (size_t i = 0; i < num_data; ++i) {
00168                         sign[i] = signCompare(in_data[i], threshold_array[i]);
00169                 }
00170         }
00171         ;
00172 
00173         /*
00174          * Convert channel idxs of line ranges of binned array to the ones before binning
00175          *
00176          * @param[in] bin_size : the number of channels binned
00177          * @param[in] offset : the offset channel idx
00178          * @param[in,out] range_list : the list of line channel ranges to convert
00179          */
00180         static void deBinRanges(size_t const bin_size, size_t const offset,
00181                         std::list<std::pair<size_t, size_t>>& range_list);
00182 
00183         /*
00184          * Extend for line wing
00185          * Line ranges are extended while sign has the same value as the range
00186          * AND mask=true.
00187          *
00188          * @param[in] num_sign : the number of elements in sign and mask array
00189          * @param[in] sign : an array with the values either -1, 0, or 1.
00190          *         It indicates how far a line could be extended.
00191          * @param[in] mask : a boolean mask. The line range will be truncated
00192          *         at the channel, mask=false.
00193          * @param[in,out] range_list : a list of line channel ranges
00194          */
00195         static void extendRangeBySign(size_t num_sign,
00196                         int8_t const sign[/*num_sign*/], bool const mask[/*num_sign*/],
00197                         std::list<std::pair<size_t, size_t>>& range_list);
00198 
00199         /*
00200          * Get median of an sorted array.
00201          * @param[in] num_data: the number of elements from which median is calculated
00202          * @param[in] data: values should be sorted in ascending order.
00203          * @param[in] Must neighther have Inf nor Nan in elements 0-num_data-1.
00204          *
00205          * When the number of elements is odd the function returns data[num_data/2].
00206          * Otherwise, average of middle two elements, i.e, (data[num_data/2]+data[num_data/2-1])/2
00207          */
00208         template<typename DataType>
00209         inline static DataType getMedianOfSorted(size_t const num_data,
00210                         DataType const data[/*num_data*/]) {
00211                 return (data[num_data / 2] + data[num_data / 2 - ((num_data + 1) % 2)])
00212                                 / static_cast<DataType>(2);
00213         }
00214         ;
00215 
00216         /*
00217          * Get median of an array taking mask into account.
00218          *
00219          * @param[in] num_data : the number of elements in data and mask arrays
00220          * @param[in] data : data array to calculate median from
00221          * @param[in] mask : mask array. Only elements with mask[i]=true is
00222          *         taken into account to calculate median
00223          * @param[in] fraction : a fraction of valid channels to calculate median
00224          *         lower data are used.
00225          */
00226         static float maskedMedian(size_t num_data, float const* data,
00227                         bool const mask[/*num_data*/], float fraction = 1.0);
00228 
00229         /*
00230          * Convert boolean channel mask to channel index ranges.
00231          * For example:
00232          *    mask = {F, T, T, F, T} -> [[1,2], [4,4]]
00233          * @param[in] num_mask : the number of elements in mask
00234          * @param[in] mask : a boolean array of mask
00235          * @param[out] out_range : a list of line channel range
00236          */
00237         static void maskToRangesList(size_t const num_mask,
00238                         bool const mask[/*num_mask*/],
00239                         std::list<std::pair<size_t, size_t>>& out_range);
00240 
00241         /*
00242          * Merge line ranges if the ranges are separated only by flagged
00243          * channels, i.e., mask=false.
00244          *
00245          * @param[in] num_mask : the number of elements in mask
00246          * @param[in] mask : a boolean array to store mask
00247          * @param[in] maxgap : the maximum separation of line channels to merge
00248          * @param[in,out] range_list : a list of line channel ranges
00249          *
00250          */
00251         static void mergeGapByFalse(size_t const num_mask,
00252                         bool const mask[/*num_mask*/], size_t const maxgap,
00253                         std::list<std::pair<size_t, size_t>>& range_list);
00254 
00255         /*
00256          * Merge line ranges if the ranges are overlapped.
00257          *
00258          * @param[in] range_list : a list of line range to modify
00259          *         The list should be sorted in ascending order of idx
00260          *         for both for first and second elements of pairs.
00261          */
00262         static void mergeOverlappingRanges(
00263                         std::list<std::pair<size_t, size_t>>& range_list);
00264 
00265         /*
00266          * Merge line ranges if the ranges two lists are overlapped.
00267          *
00268          * @param[in,out] to : a list of line range to merge
00269          * @param[in] from : a list of line range to merge
00270          * Both lists should be sorted in ascending order of idx
00271          * for both for first and second elements of pairs.
00272          */
00273         static void mergeOverlapInTwoLists(std::list<std::pair<size_t, size_t>>& to,
00274                         std::list<std::pair<size_t, size_t>>& from);
00275 
00276         /*
00277          * Merge line ranges if the gap between lines are smaller than
00278          * a certain fraction of narrower line.
00279          * @param[in] fraction : the fraction to narrower line
00280          * @param[in] maxwidth :
00281          * @param[in] range_list : a list of line range to modify
00282          *         The list should be sorted in ascending order of idx
00283          *         for both for first and second elements of pairs.
00284          */
00285         static void mergeSmallGapByFraction(double const fraction,
00286                         size_t const maxwidth,
00287                         std::list<std::pair<size_t, size_t>>& range_list);
00288 
00289         /*
00290          * Remove line ranges larger than maxwidth from the list.
00291          *
00292          * @param[in] maxwidth : maximum line width allowed
00293          * @param[in] range_list : a list of line range to test
00294          */
00295         static void rejectWideRange(size_t const maxwidth,
00296                         std::list<std::pair<size_t, size_t>>& range_list);
00297 
00298         /*
00299          * Remove line ranges smaller than minwidth from the list.
00300          *
00301          * @param[in] minwidth : minimum line width allowed
00302          * @param[in] range_list : a list of line range to test
00303          */
00304         static void rejectNarrowRange(size_t const minwidth,
00305                         std::list<std::pair<size_t, size_t>>& range_list);
00306         /*     template <typename DataType> */
00307         /*       static void splitLines(DataType const& in_data, bool const& in_mask, */
00308         /*                           std::list<std::pair<size_t,size_t>> const& in_range, */
00309         /*                           std::list<std::pair<size_t,size_t>>& out_range); */
00310 
00311         inline static string FormatLineString(std::list<std::pair<size_t,size_t>> &line_list) {
00312           ostringstream oss;
00313           oss << "number of Lines = " << line_list.size() << endl;
00314           for (std::list<std::pair<size_t,size_t>>::iterator iter = line_list.begin();
00315                iter!=line_list.end(); ++iter) {
00316             oss << "- [ " << (*iter).first << ", " << (*iter).second << " ] (width: " << (*iter).second-(*iter).first+1 << ")" << endl;
00317           }
00318           return oss.str();
00319         };
00320 
00321 private:
00322         /*
00323          * Merge a line range to a list of line range taking into account the overlap.
00324          */
00325         static size_t mergeARangeToList(
00326                         std::list<std::pair<size_t, size_t>>& range_list,
00327                         std::pair<size_t, size_t>& new_range, size_t const cursor = 0);
00328 
00329         /*
00330          * Compare data and threshold and returns
00331          * 1 if data > threshold
00332          * 0 if data == threshold
00333          * -1 if data < threshold
00334          */
00335         template<typename DataType>
00336         inline static int8_t signCompare(DataType const& data,
00337                         DataType const& threshold) {
00338                 if (data > threshold)
00339                         return 1;
00340                 else if (data < threshold)
00341                         return -1;
00342                 else
00343                         return 0;
00344         }
00345         ;
00346 
00347 };
00348 
00349 } //# NAMESPACE CASA - END
00350 
00351 #endif /* _CASA_LINE_FINDING_UTILS_H_ */