00001 //# -------------------------------------------------------------------- 00002 //# LineFindingUtils.h: this defines utility functions of line finding 00003 //# -------------------------------------------------------------------- 00004 //# Copyright (C) 2015 00005 //# National Astronomical Observatory of Japan 00006 //# 00007 //# This library is free software; you can redistribute it and/or modify it 00008 //# under the terms of the GNU Library General Public License as published by 00009 //# the Free Software Foundation; either version 2 of the License, or (at your 00010 //# option) any later version. 00011 //# 00012 //# This library is distributed in the hope that it will be useful, but WITHOUT 00013 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 00014 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public 00015 //# License for more details. 00016 //# 00017 //# You should have received a copy of the GNU Library General Public License 00018 //# along with this library; if not, write to the Free Software Foundation, 00019 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA. 00020 //# 00021 //# Correspondence concerning AIPS++ should be addressed as follows: 00022 //# Internet email: aips2-request@nrao.edu. 00023 //# Postal address: AIPS++ Project Office 00024 //# National Radio Astronomy Observatory 00025 //# 520 Edgemont Road 00026 //# Charlottesville, VA 22903-2475 USA 00027 //# 00028 //# $Id$ 00029 #ifndef _CASA_LINE_FINDING_UTILS_H_ 00030 #define _CASA_LINE_FINDING_UTILS_H_ 00031 00032 #include <iostream> 00033 #include <string> 00034 #include <list> 00035 #include <sstream> 00036 00037 #include <casa/aipstype.h> 00038 #include <casa/Arrays/Vector.h> 00039 00040 namespace casa { //# NAMESPACE CASA - BEGIN 00041 00042 class LineFinderUtils { 00043 public: 00044 /* 00045 * Bin data and mask arrays. 00046 * 00047 * @param[in] num_in : the number of elements in in_data and in_mask 00048 * @param[in] in_data : an input data array to bin 00049 * @param[in] in_mask : an input mask array to bin 00050 * @param[in] bin_size : the number of channels to bin 00051 * @param[in] num_out : the number of elements in out_data and out_mask 00052 * @param[out] out_data : an array to store binned data 00053 * @param[out] out_mask : an array to store binned mask 00054 * @param[in] offset : the channel index in in_data and in_mask 00055 * to start binning. offset should be 0 if keepsize=false. 00056 * @param[in] keepsize : if true, keep array size after binning. 00057 * otherwise, the output arrays will have (num_in-offset+1)/bin_size channel. 00058 * num_out should be equal to num_in when keepsize=true. 00059 */ 00060 template<typename DataType> 00061 static size_t binDataAndMask(size_t const num_in, 00062 DataType const in_data[/*num_in*/], bool const in_mask[/*num_in*/], 00063 size_t const bin_size, size_t const num_out, 00064 DataType out_data[/*num_out*/], bool out_mask[/*num_out*/], 00065 size_t const offset = 0, bool const keepsize = false); 00066 00067 /* 00068 * Calculate median absolute deviation (MAD) of a data array 00069 * Processes mad[i] = data[i] - (median of valid elements in data) 00070 * 00071 * @param[in] num_data : the number of elements in @a in_data , 00072 * @a in_mask , and @a mad. 00073 * @param[in] in_data : data array to calculate MAD 00074 * @param[in] in_mask : a boolean mask array. the elements 00075 * with mask=false will be ignored in calculating median. 00076 * @param[out] mad : an array of MAD values 00077 */ 00078 static void calculateMAD(size_t const num_data, 00079 float const in_data[/*num_data*/], bool const in_mask[/*num_data*/], 00080 float mad[/*num_data*/]); 00081 00082 /* 00083 * Count the number of True elements in a boolean array. 00084 * 00085 * @param[in] num_data : the number of elements in @a data 00086 * @param[in] data : a boolean array 00087 * @return the number of elements with true in @a data 00088 */ 00089 inline static size_t countTrue(size_t num_data, 00090 bool const data[/*num_data*/]) { 00091 size_t ntrue = 0; 00092 static_assert(static_cast<uint8_t>(true)==1, "cast of bool failed"); 00093 static_assert(static_cast<uint8_t>(false)==0, "cast of bool failed"); 00094 uint8_t const *data8 = reinterpret_cast<uint8_t const *>(data); 00095 static_assert(sizeof(data[0])==sizeof(data8[0]), 00096 "bool and uint8_t has different size"); 00097 for (size_t i = 0; i < num_data; ++i) { 00098 ntrue += data8[i]; 00099 } 00100 return ntrue; 00101 } 00102 ; 00103 00104 /* 00105 * create mask by a threshold value 00106 * out_mask[i] = in_mask[i] && (in_data[i] >= threshold) 00107 * 00108 * @param[in] num_data : the number of elements in @a in_data , 00109 * @a in_mask , and @a out_mask 00110 * @param[in] in_data : a data array 00111 * @param[in] in_mask : a boolean mask array 00112 * @param[in] threshold : a threshold value to compare with @a in_data 00113 * @param[out] out_mask : an output mask array 00114 */ 00115 static void createMaskByAThreshold(size_t const num_data, 00116 float const in_data[/*num_data*/], bool const in_mask[/*num_data*/], 00117 float const threshold, bool out_mask[/*num_data*/]); 00118 /* 00119 create mask by threshold value array 00120 out_mask[i] = in_mask[i] && (in_data[i] >= threshold_array[i]) 00121 */ 00122 /* template <typename DataType> */ 00123 /* static void createMaskByThresholds(size_t const num_data, */ 00124 /* DataType const in_data[/\*num_data*\/], */ 00125 /* bool const in_mask[/\*num_data*\/], */ 00126 /* DataType const threshold_array[/\*num_data*\/], */ 00127 /* bool out_mask[/\*num_data*\/]); */ 00128 00129 /* 00130 * create sign array by a threshold value 00131 * sign[i] = +1 (in_data[i] > threshold) 00132 * = 0 (in_data[i] == threshold) 00133 * = -1 (in_data[i] < threshold) 00134 * 00135 * @param[in] num_data : the number of elements of @a in_data and @a sign 00136 * @param[in] in_data : an input data array to calculate sign 00137 * @param[in] threshold : a threshold to calculate sign 00138 * @param[out] sign : an output array 00139 */ 00140 template<typename DataType> 00141 inline static void createSignByAThreshold(size_t const num_data, 00142 DataType const in_data[/*num_data*/], DataType const threshold, 00143 int8_t sign[/*num_data*/]) { 00144 for (size_t i = 0; i < num_data; ++i) { 00145 sign[i] = signCompare(in_data[i], threshold); 00146 } 00147 } 00148 ; 00149 00150 /* 00151 * create sign array by data and threshold arrays 00152 * sign[i] = +1 (in_data[i] > threshold_array[i]) 00153 * = 0 (in_data[i] == threshold_array[i]) 00154 * = -1 (in_data[i] < threshold_array[i]) 00155 * 00156 * @param[in] num_data : the number of elements of @a in_data , 00157 * @a threshod_array, and @a sign 00158 * @param[in] in_data : an input data array to calculate sign 00159 * @param[in] threshold_array : a threshold array to calculate sign 00160 * @param[out] sign : an output array 00161 */ 00162 template<typename DataType> 00163 inline static void createSignByThresholds(size_t const num_data, 00164 DataType const in_data[/*num_data*/], 00165 DataType const threshold_array[/*num_data*/], 00166 int8_t sign[/*num_data*/]) { 00167 for (size_t i = 0; i < num_data; ++i) { 00168 sign[i] = signCompare(in_data[i], threshold_array[i]); 00169 } 00170 } 00171 ; 00172 00173 /* 00174 * Convert channel idxs of line ranges of binned array to the ones before binning 00175 * 00176 * @param[in] bin_size : the number of channels binned 00177 * @param[in] offset : the offset channel idx 00178 * @param[in,out] range_list : the list of line channel ranges to convert 00179 */ 00180 static void deBinRanges(size_t const bin_size, size_t const offset, 00181 std::list<std::pair<size_t, size_t>>& range_list); 00182 00183 /* 00184 * Extend for line wing 00185 * Line ranges are extended while sign has the same value as the range 00186 * AND mask=true. 00187 * 00188 * @param[in] num_sign : the number of elements in sign and mask array 00189 * @param[in] sign : an array with the values either -1, 0, or 1. 00190 * It indicates how far a line could be extended. 00191 * @param[in] mask : a boolean mask. The line range will be truncated 00192 * at the channel, mask=false. 00193 * @param[in,out] range_list : a list of line channel ranges 00194 */ 00195 static void extendRangeBySign(size_t num_sign, 00196 int8_t const sign[/*num_sign*/], bool const mask[/*num_sign*/], 00197 std::list<std::pair<size_t, size_t>>& range_list); 00198 00199 /* 00200 * Get median of an sorted array. 00201 * @param[in] num_data: the number of elements from which median is calculated 00202 * @param[in] data: values should be sorted in ascending order. 00203 * @param[in] Must neighther have Inf nor Nan in elements 0-num_data-1. 00204 * 00205 * When the number of elements is odd the function returns data[num_data/2]. 00206 * Otherwise, average of middle two elements, i.e, (data[num_data/2]+data[num_data/2-1])/2 00207 */ 00208 template<typename DataType> 00209 inline static DataType getMedianOfSorted(size_t const num_data, 00210 DataType const data[/*num_data*/]) { 00211 return (data[num_data / 2] + data[num_data / 2 - ((num_data + 1) % 2)]) 00212 / static_cast<DataType>(2); 00213 } 00214 ; 00215 00216 /* 00217 * Get median of an array taking mask into account. 00218 * 00219 * @param[in] num_data : the number of elements in data and mask arrays 00220 * @param[in] data : data array to calculate median from 00221 * @param[in] mask : mask array. Only elements with mask[i]=true is 00222 * taken into account to calculate median 00223 * @param[in] fraction : a fraction of valid channels to calculate median 00224 * lower data are used. 00225 */ 00226 static float maskedMedian(size_t num_data, float const* data, 00227 bool const mask[/*num_data*/], float fraction = 1.0); 00228 00229 /* 00230 * Convert boolean channel mask to channel index ranges. 00231 * For example: 00232 * mask = {F, T, T, F, T} -> [[1,2], [4,4]] 00233 * @param[in] num_mask : the number of elements in mask 00234 * @param[in] mask : a boolean array of mask 00235 * @param[out] out_range : a list of line channel range 00236 */ 00237 static void maskToRangesList(size_t const num_mask, 00238 bool const mask[/*num_mask*/], 00239 std::list<std::pair<size_t, size_t>>& out_range); 00240 00241 /* 00242 * Merge line ranges if the ranges are separated only by flagged 00243 * channels, i.e., mask=false. 00244 * 00245 * @param[in] num_mask : the number of elements in mask 00246 * @param[in] mask : a boolean array to store mask 00247 * @param[in] maxgap : the maximum separation of line channels to merge 00248 * @param[in,out] range_list : a list of line channel ranges 00249 * 00250 */ 00251 static void mergeGapByFalse(size_t const num_mask, 00252 bool const mask[/*num_mask*/], size_t const maxgap, 00253 std::list<std::pair<size_t, size_t>>& range_list); 00254 00255 /* 00256 * Merge line ranges if the ranges are overlapped. 00257 * 00258 * @param[in] range_list : a list of line range to modify 00259 * The list should be sorted in ascending order of idx 00260 * for both for first and second elements of pairs. 00261 */ 00262 static void mergeOverlappingRanges( 00263 std::list<std::pair<size_t, size_t>>& range_list); 00264 00265 /* 00266 * Merge line ranges if the ranges two lists are overlapped. 00267 * 00268 * @param[in,out] to : a list of line range to merge 00269 * @param[in] from : a list of line range to merge 00270 * Both lists should be sorted in ascending order of idx 00271 * for both for first and second elements of pairs. 00272 */ 00273 static void mergeOverlapInTwoLists(std::list<std::pair<size_t, size_t>>& to, 00274 std::list<std::pair<size_t, size_t>>& from); 00275 00276 /* 00277 * Merge line ranges if the gap between lines are smaller than 00278 * a certain fraction of narrower line. 00279 * @param[in] fraction : the fraction to narrower line 00280 * @param[in] maxwidth : 00281 * @param[in] range_list : a list of line range to modify 00282 * The list should be sorted in ascending order of idx 00283 * for both for first and second elements of pairs. 00284 */ 00285 static void mergeSmallGapByFraction(double const fraction, 00286 size_t const maxwidth, 00287 std::list<std::pair<size_t, size_t>>& range_list); 00288 00289 /* 00290 * Remove line ranges larger than maxwidth from the list. 00291 * 00292 * @param[in] maxwidth : maximum line width allowed 00293 * @param[in] range_list : a list of line range to test 00294 */ 00295 static void rejectWideRange(size_t const maxwidth, 00296 std::list<std::pair<size_t, size_t>>& range_list); 00297 00298 /* 00299 * Remove line ranges smaller than minwidth from the list. 00300 * 00301 * @param[in] minwidth : minimum line width allowed 00302 * @param[in] range_list : a list of line range to test 00303 */ 00304 static void rejectNarrowRange(size_t const minwidth, 00305 std::list<std::pair<size_t, size_t>>& range_list); 00306 /* template <typename DataType> */ 00307 /* static void splitLines(DataType const& in_data, bool const& in_mask, */ 00308 /* std::list<std::pair<size_t,size_t>> const& in_range, */ 00309 /* std::list<std::pair<size_t,size_t>>& out_range); */ 00310 00311 inline static string FormatLineString(std::list<std::pair<size_t,size_t>> &line_list) { 00312 ostringstream oss; 00313 oss << "number of Lines = " << line_list.size() << endl; 00314 for (std::list<std::pair<size_t,size_t>>::iterator iter = line_list.begin(); 00315 iter!=line_list.end(); ++iter) { 00316 oss << "- [ " << (*iter).first << ", " << (*iter).second << " ] (width: " << (*iter).second-(*iter).first+1 << ")" << endl; 00317 } 00318 return oss.str(); 00319 }; 00320 00321 private: 00322 /* 00323 * Merge a line range to a list of line range taking into account the overlap. 00324 */ 00325 static size_t mergeARangeToList( 00326 std::list<std::pair<size_t, size_t>>& range_list, 00327 std::pair<size_t, size_t>& new_range, size_t const cursor = 0); 00328 00329 /* 00330 * Compare data and threshold and returns 00331 * 1 if data > threshold 00332 * 0 if data == threshold 00333 * -1 if data < threshold 00334 */ 00335 template<typename DataType> 00336 inline static int8_t signCompare(DataType const& data, 00337 DataType const& threshold) { 00338 if (data > threshold) 00339 return 1; 00340 else if (data < threshold) 00341 return -1; 00342 else 00343 return 0; 00344 } 00345 ; 00346 00347 }; 00348 00349 } //# NAMESPACE CASA - END 00350 00351 #endif /* _CASA_LINE_FINDING_UTILS_H_ */