00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028 #ifndef CASA_PRECTIMER_H
00029 #define CASA_PRECTIMER_H
00030
00031
00032 #include <casacore/casa/aips.h>
00033 #include <cstdlib>
00034 #include <iostream>
00035
00036 #if defined __ia64__ && defined __INTEL_COMPILER
00037 #include <ia64regs.h>
00038 #endif
00039
00040
00041 namespace casacore {
00042
00043
00044 class String;
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101 class PrecTimer {
00102 public:
00103
00104 PrecTimer();
00105
00106
00107 ~PrecTimer();
00108
00109
00110 void start();
00111
00112 void stop();
00113
00114
00115 void reset();
00116
00117
00118
00119 void show() const;
00120 void show (std::ostream& os) const;
00121
00122
00123
00124
00125
00126 void show (const String&) const;
00127 void show (std::ostream& os, const String& prefix) const;
00128
00129
00130
00131 double getReal() const;
00132
00133
00134 unsigned long long getCount() const;
00135
00136 private:
00137 void print_time (std::ostream&, double time) const;
00138
00139 struct TimeStruct {
00140 #if defined __PPC__
00141 int total_time_high, total_time_low;
00142 #else
00143 int total_time_low, total_time_high;
00144 #endif
00145 };
00146 union Union1 {
00147 long long total_time;
00148 TimeStruct s1;
00149 };
00150
00151 #if defined __i386__ && defined __INTEL_COMPILER && defined _OPENMP
00152 struct CountStruct {
00153 int count_low, count_high;
00154 };
00155 union Union2 {
00156 unsigned long long count;
00157 CountStruct s2;
00158 };
00159 #else
00160 struct Union2 {
00161 unsigned long long count;
00162 };
00163 #endif
00164
00165 Union1 u1;
00166 Union2 u2;
00167
00168 static double CPU_speed_in_MHz;
00169 static double get_CPU_speed_in_MHz();
00170 };
00171
00172
00173
00174 inline void PrecTimer::reset()
00175 {
00176 u1.total_time = 0;
00177 u2.count = 0;
00178 }
00179
00180 inline unsigned long long PrecTimer::getCount() const
00181 {
00182 return u2.count;
00183 }
00184
00185 inline PrecTimer::PrecTimer()
00186 {
00187 reset();
00188 }
00189
00190 inline PrecTimer::~PrecTimer()
00191 {}
00192
00193
00194 inline void PrecTimer::start()
00195 {
00196 #if defined __x86_64__ && defined __INTEL_COMPILER && defined _OPENMP
00197 asm volatile
00198 (
00199 "rdtsc\n\t"
00200 "shlq $32,%%rdx\n\t"
00201 "leaq (%%rax,%%rdx),%%rax\n\t"
00202 "lock;subq %%rax,%0"
00203 :
00204 "+m" (u1.total_time)
00205 :
00206 :
00207 "rax", "rdx"
00208 );
00209 #elif defined __i386__ && defined __INTEL_COMPILER && defined _OPENMP
00210 asm volatile
00211 (
00212 "rdtsc\n\t"
00213 "lock;subl %%eax,%0\n\t"
00214 "lock;sbbl %%edx,%1"
00215 :
00216 "+m" (u1.s1.total_time_low), "+m" (u1.s1total_time_high)
00217 :
00218 :
00219 "eax", "edx"
00220 );
00221 #elif (defined __i386__ || defined __x86_64__) && (defined __PATHSCALE__ || (defined __APPLE__ && defined __APPLE_CC__ && __APPLE_CC__ == 5531))
00222 unsigned eax, edx;
00223
00224 asm volatile ("rdtsc" : "=a" (eax), "=d" (edx));
00225
00226 u1.total_time -= ((unsigned long long) edx << 32) + eax;
00227 #elif (defined __i386__ || defined __x86_64__) && (defined __GNUC__ || defined __INTEL_COMPILER)
00228 asm volatile
00229 (
00230 "rdtsc\n\t"
00231 "subl %%eax, %0\n\t"
00232 "sbbl %%edx, %1"
00233 :
00234 "+m" (u1.s1.total_time_low), "+m" (u1.s1.total_time_high)
00235 :
00236 :
00237 "eax", "edx"
00238 );
00239 #elif defined __ia64__ && defined __INTEL_COMPILER
00240 u1.total_time -= __getReg(_IA64_REG_AR_ITC);
00241 #elif defined __ia64__ && defined __GNUC__
00242 long long time;
00243 asm volatile ("mov %0=ar.itc" : "=r" (time));
00244 u1.total_time -= time;
00245 #elif defined __PPC__ && (defined __GNUC__ || defined __xlC__)
00246 int high, low, retry;
00247
00248 asm
00249 (
00250 "0:\n\t"
00251 "mftbu %0\n\t"
00252 "mftb %1\n\t"
00253 "mftbu %2\n\t"
00254 "cmpw %2,%0\n\t"
00255 "bne 0b\n\t"
00256 "subfc %3,%1,%3\n\t"
00257 "subfe %4,%0,%4"
00258 :
00259 "=r" (high), "=r" (low), "=r" (retry),
00260 "=r" (u1.s1.total_time_low), "=r" (u1.s1.total_time_high)
00261 :
00262 "3" (u1.s1.total_time_low), "4" (u1.s1.total_time_high)
00263 );
00264 #endif
00265 }
00266
00267
00268 inline void PrecTimer::stop()
00269 {
00270 #if defined __x86_64__ && defined __INTEL_COMPILER && defined _OPENMP
00271 asm volatile
00272 (
00273 "rdtsc\n\t"
00274 "shlq $32,%%rdx\n\t"
00275 "leaq (%%rax,%%rdx),%%rax\n\t"
00276 "lock;addq %%rax,%0"
00277 :
00278 "+m" (u1.total_time)
00279 :
00280 :
00281 "rax", "rdx"
00282 );
00283 #elif defined __i386__ && defined __INTEL_COMPILER && defined _OPENMP
00284 asm volatile
00285 (
00286 "rdtsc\n\t"
00287 "lock;addl %%eax, %0\n\t"
00288 "lock;adcl %%edx, %1"
00289 :
00290 "+m" (u1.s1.total_time_low), "+m" (u1.s1.total_time_high)
00291 :
00292 :
00293 "eax", "edx"
00294 );
00295 #elif (defined __i386__ || defined __x86_64__) && (defined __PATHSCALE__ || (defined __APPLE__ && defined __APPLE_CC__ && __APPLE_CC__ == 5531))
00296 unsigned eax, edx;
00297
00298 asm volatile ("rdtsc\n\t" : "=a" (eax), "=d" (edx));
00299 u1.total_time += ((unsigned long long) edx << 32) + eax;
00300 #elif (defined __i386__ || defined __x86_64__) && (defined __GNUC__ || defined __INTEL_COMPILER)
00301 asm volatile
00302 (
00303 "rdtsc\n\t"
00304 "addl %%eax, %0\n\t"
00305 "adcl %%edx, %1"
00306 :
00307 "+m" (u1.s1.total_time_low), "+m" (u1.s1.total_time_high)
00308 :
00309 :
00310 "eax", "edx"
00311 );
00312 #elif defined __ia64__ && defined __INTEL_COMPILER
00313 u1.total_time += __getReg(_IA64_REG_AR_ITC);
00314 #elif defined __ia64__ && defined __GNUC__
00315 long long time;
00316 asm volatile ("mov %0=ar.itc" : "=r" (time));
00317 u1.total_time += time;
00318 #elif defined __PPC__ && (defined __GNUC__ || defined __xlC__)
00319 int high, low, retry;
00320
00321 asm
00322 (
00323 "0:\n\t"
00324 "mftbu %0\n\t"
00325 "mftb %1\n\t"
00326 "mftbu %2\n\t"
00327 "cmpw %2,%0\n\t"
00328 "bne 0b\n\t"
00329 "addc %3,%3,%1\n\t"
00330 "adde %4,%4,%0"
00331 :
00332 "=r" (high), "=r" (low), "=r" (retry),
00333 "=r" (u1.s1.total_time_low), "=r" (u1.s1.total_time_high)
00334 :
00335 "3" (u1.s1.total_time_low), "4" (u1.s1.total_time_high)
00336 );
00337 #endif
00338
00339 #if defined __x86_64__ && defined __INTEL_COMPILER && defined _OPENMP
00340 asm volatile ("lock;addq $1,%0" : "+m" (u2.count));
00341 #elif defined __i386__ && defined __INTEL_COMPILER && defined _OPENMP
00342 asm volatile
00343 (
00344 "lock;addl $1,%0\n\t"
00345 "lock;adcl $0,%1"
00346 :
00347 "+m" (u2.s2.count_low), "+m" (u2.s2.count_high)
00348 );
00349 #else
00350 ++u2.count;
00351 #endif
00352 }
00353
00354 }
00355
00356
00357 #endif