PrecTimer.h

Go to the documentation of this file.
00001 //# PrecTimer.h: Precision timer to measure elapsed times in a cumulative way
00002 //# Copyright (C) 2006
00003 //# Associated Universities, Inc. Washington DC, USA.
00004 //#
00005 //# This library is free software; you can redistribute it and/or modify it
00006 //# under the terms of the GNU Library General Public License as published by
00007 //# the Free Software Foundation; either version 2 of the License, or (at your
00008 //# option) any later version.
00009 //#
00010 //# This library is distributed in the hope that it will be useful, but WITHOUT
00011 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
00012 //# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
00013 //# License for more details.
00014 //#
00015 //# You should have received a copy of the GNU Library General Public License
00016 //# along with this library; if not, write to the Free Software Foundation,
00017 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
00018 //#
00019 //# Correspondence concerning AIPS++ should be addressed as follows:
00020 //#        Internet email: aips2-request@nrao.edu.
00021 //#        Postal address: AIPS++ Project Office
00022 //#                        National Radio Astronomy Observatory
00023 //#                        520 Edgemont Road
00024 //#                        Charlottesville, VA 22903-2475 USA
00025 //#
00026 //# $Id$
00027 
00028 #ifndef CASA_PRECTIMER_H
00029 #define CASA_PRECTIMER_H
00030 
00031 
00032 #include <casacore/casa/aips.h>
00033 #include <cstdlib>
00034 #include <iostream>
00035 
00036 #if defined __ia64__ && defined __INTEL_COMPILER
00037 #include <ia64regs.h>
00038 #endif
00039 
00040 
00041 namespace casacore { //# NAMESPACE CASACORE - BEGIN
00042 
00043 // Forward Declaration.
00044 class String;
00045 
00046 
00047 // <summary>
00048 // Precision timer to measure elapsed times in a cumulative way
00049 // </summary>
00050 
00051 // <use visibility=export>
00052 
00053 // <reviewed reviewer="" date="" tests="tPrecTimer" demos="">
00054 // </reviewed>
00055 
00056 // <synopsis>
00057 // The PrecTimer supplements the <linkto class=Timer>Timer</linkto> class.
00058 // If offers a low-overhead and high-resolution interval timer for use
00059 // on i386, x86_64, ia64, and powerpc platforms, using the processor's
00060 // timestamp counter that is incremented each cycle.
00061 // Put timer.start() and timer.stop() calls around the piece of
00062 // code to be timed. Because the timer is cumulative, the total time of
00063 // a particular piece of code can be timed.
00064 // <note role=caution>
00065 // Make sure that start() and stop() calls alternate,
00066 // otherwise very strange times will be the result.
00067 // </note>
00068 //
00069 // A timer can be started and stopped multiple times; both the average and
00070 // total time, as well as the number of iterations are printed.
00071 // The measured time is real time (as opposed to user or system time).
00072 // The timer can be used to measure from 10 nanosecond to a century interval.
00073 //
00074 // Multiple timers can be used in a nested way as long as each of them
00075 // has independent (matching) start and stop calls.
00076 //
00077 // The class is more or less a copy of the original written by John Romein
00078 // at ASTRON, Dwingeloo, the Netherlands.
00079 // </synopsis>
00080 
00081 // <example>
00082 // Here's how to create a timer, start it (the 'mark' member function)
00083 // and display a breakdown.
00084 // <srcblock>
00085 //  PrecTimer ttimer;   // the timer is reset at construction time
00086 //  PrecTimer ctimer;
00087 //  ttimer.reset();     // if you want to reset the timer (not needed here)
00088 //  ttimer.start();     // start the total timer
00089 //  for (int i=0; i<n; ++i) {
00090 //    ... do something ...
00091 //    ctimer.start();   // start the calc timer
00092 //    ...do some calculation which will be timed...
00093 //    ctimer.stop();    // and stop it
00094 //  }
00095 //  ttimer.stop();
00096 //  ttimer.show (cout, "Total       ");
00097 //  ctimer.show (cout, "Calculations");
00098 // </srcblock>
00099 // </example>
00100 
00101   class PrecTimer {
00102   public:
00103     // Construct.
00104     PrecTimer();
00105 
00106     // Destruct.
00107     ~PrecTimer();
00108 
00109     // Restart the timer.
00110     void start();
00111     // Stop the timer
00112     void stop();
00113 
00114     // Reset the timer to zero.
00115     void reset();
00116 
00117     // Show real time on cout or a user supplied stream.
00118     // <group>
00119     void show() const;
00120     void show (std::ostream& os) const;
00121     // </group>
00122 
00123     // Show real time on cout or a user supplied
00124     // stream preceeded by the string parameter.
00125     // <group>
00126     void show (const String&) const;
00127     void show (std::ostream& os, const String& prefix) const;
00128     // </group>
00129 
00130     // Get the real time (in seconds).
00131     double getReal() const;
00132 
00133     // Get the total number of times start/stop is done.
00134     unsigned long long getCount() const;
00135 
00136   private:
00137     void print_time (std::ostream&, double time) const;
00138 
00139     struct TimeStruct {
00140 #if defined __PPC__
00141       int          total_time_high, total_time_low;
00142 #else
00143       int          total_time_low, total_time_high;
00144 #endif
00145     };
00146     union Union1 {
00147       long long    total_time;
00148       TimeStruct   s1;
00149     };
00150 
00151 #if defined __i386__ && defined __INTEL_COMPILER && defined _OPENMP
00152     struct CountStruct {
00153       int count_low, count_high;
00154     };
00155     union Union2 {
00156       unsigned long long count;
00157       CountStruct        s2;
00158     };
00159 #else
00160     struct Union2 {
00161       unsigned long long count;
00162     };
00163 #endif
00164 
00165     Union1 u1;
00166     Union2 u2;
00167 
00168     static double CPU_speed_in_MHz;
00169     static double get_CPU_speed_in_MHz();
00170   };
00171 
00172 
00173 
00174   inline void PrecTimer::reset()
00175   {
00176     u1.total_time = 0;
00177     u2.count      = 0;
00178   }
00179 
00180   inline unsigned long long PrecTimer::getCount() const
00181   {
00182     return u2.count;
00183   }
00184 
00185   inline PrecTimer::PrecTimer()
00186   {
00187     reset();
00188   }
00189 
00190   inline PrecTimer::~PrecTimer()
00191   {}
00192 
00193 
00194   inline void PrecTimer::start()
00195   {
00196 #if defined __x86_64__ && defined __INTEL_COMPILER && defined _OPENMP
00197     asm volatile
00198     (
00199         "rdtsc\n\t"
00200         "shlq $32,%%rdx\n\t"
00201         "leaq (%%rax,%%rdx),%%rax\n\t"
00202         "lock;subq %%rax,%0"
00203     :
00204         "+m" (u1.total_time)
00205     :
00206     :
00207         "rax", "rdx"
00208     );
00209 #elif defined __i386__ && defined __INTEL_COMPILER && defined _OPENMP
00210     asm volatile
00211     (
00212         "rdtsc\n\t"
00213         "lock;subl %%eax,%0\n\t"
00214         "lock;sbbl %%edx,%1"
00215     :
00216         "+m" (u1.s1.total_time_low), "+m" (u1.s1total_time_high)
00217     :
00218     :
00219         "eax", "edx"
00220     );
00221 #elif (defined __i386__ || defined __x86_64__) && (defined __PATHSCALE__ || (defined __APPLE__ && defined __APPLE_CC__ && __APPLE_CC__ == 5531))
00222     unsigned eax, edx;
00223 
00224     asm volatile ("rdtsc" : "=a" (eax), "=d" (edx));
00225 
00226     u1.total_time -= ((unsigned long long) edx << 32) + eax;
00227 #elif (defined __i386__ || defined __x86_64__) && (defined __GNUC__ || defined __INTEL_COMPILER)
00228     asm volatile
00229     (
00230         "rdtsc\n\t"
00231         "subl %%eax, %0\n\t"
00232         "sbbl %%edx, %1"
00233     :
00234         "+m" (u1.s1.total_time_low), "+m" (u1.s1.total_time_high)
00235     :
00236     :
00237         "eax", "edx"
00238     );
00239 #elif defined __ia64__ && defined __INTEL_COMPILER
00240     u1.total_time -= __getReg(_IA64_REG_AR_ITC);
00241 #elif defined __ia64__ && defined __GNUC__
00242     long long time;
00243     asm volatile ("mov %0=ar.itc" : "=r" (time));
00244     u1.total_time -= time;
00245 #elif defined __PPC__ && (defined __GNUC__ || defined __xlC__)
00246     int high, low, retry;
00247 
00248     asm
00249     (
00250         "0:\n\t"
00251         "mftbu %0\n\t"
00252         "mftb %1\n\t"
00253         "mftbu %2\n\t"
00254         "cmpw %2,%0\n\t"
00255         "bne 0b\n\t"
00256         "subfc %3,%1,%3\n\t"
00257         "subfe %4,%0,%4"
00258     :
00259         "=r" (high), "=r" (low), "=r" (retry),
00260         "=r" (u1.s1.total_time_low), "=r" (u1.s1.total_time_high)
00261     :
00262         "3" (u1.s1.total_time_low), "4" (u1.s1.total_time_high)
00263     );
00264 #endif
00265   }
00266 
00267 
00268   inline void PrecTimer::stop()
00269   {
00270 #if defined __x86_64__ && defined __INTEL_COMPILER && defined _OPENMP
00271     asm volatile
00272     (
00273         "rdtsc\n\t"
00274         "shlq $32,%%rdx\n\t"
00275         "leaq (%%rax,%%rdx),%%rax\n\t"
00276         "lock;addq %%rax,%0"
00277     :
00278         "+m" (u1.total_time)
00279     :
00280     :
00281         "rax", "rdx"
00282     );
00283 #elif defined __i386__ && defined __INTEL_COMPILER && defined _OPENMP
00284     asm volatile
00285     (
00286         "rdtsc\n\t"
00287         "lock;addl %%eax, %0\n\t"
00288         "lock;adcl %%edx, %1"
00289     :
00290         "+m" (u1.s1.total_time_low), "+m" (u1.s1.total_time_high)
00291     :
00292     :
00293         "eax", "edx"
00294     );
00295 #elif (defined __i386__ || defined __x86_64__) && (defined __PATHSCALE__ || (defined __APPLE__ && defined __APPLE_CC__ && __APPLE_CC__ == 5531))
00296     unsigned eax, edx;
00297 
00298     asm volatile ("rdtsc\n\t" : "=a" (eax), "=d" (edx));
00299     u1.total_time += ((unsigned long long) edx << 32) + eax;
00300 #elif (defined __i386__ || defined __x86_64__) && (defined __GNUC__ || defined __INTEL_COMPILER)
00301     asm volatile
00302     (
00303         "rdtsc\n\t"
00304         "addl %%eax, %0\n\t"
00305         "adcl %%edx, %1"
00306     :
00307         "+m" (u1.s1.total_time_low), "+m" (u1.s1.total_time_high)
00308     :
00309     :
00310         "eax", "edx"
00311     );
00312 #elif defined __ia64__ && defined __INTEL_COMPILER
00313     u1.total_time += __getReg(_IA64_REG_AR_ITC);
00314 #elif defined __ia64__ && defined __GNUC__
00315     long long time;
00316     asm volatile ("mov %0=ar.itc" : "=r" (time));
00317     u1.total_time += time;
00318 #elif defined __PPC__ && (defined __GNUC__ || defined __xlC__)
00319     int high, low, retry;
00320 
00321     asm
00322     (
00323         "0:\n\t"
00324         "mftbu %0\n\t"
00325         "mftb %1\n\t"
00326         "mftbu %2\n\t"
00327         "cmpw %2,%0\n\t"
00328         "bne 0b\n\t"
00329         "addc %3,%3,%1\n\t"
00330         "adde %4,%4,%0"
00331     :
00332         "=r" (high), "=r" (low), "=r" (retry),
00333         "=r" (u1.s1.total_time_low), "=r" (u1.s1.total_time_high)
00334     :
00335         "3" (u1.s1.total_time_low), "4" (u1.s1.total_time_high)
00336     );
00337 #endif
00338 
00339 #if defined __x86_64__ && defined __INTEL_COMPILER && defined _OPENMP
00340     asm volatile ("lock;addq $1,%0" : "+m" (u2.count));
00341 #elif defined __i386__ && defined __INTEL_COMPILER && defined _OPENMP
00342     asm volatile
00343     (
00344         "lock;addl $1,%0\n\t"
00345         "lock;adcl $0,%1"
00346     :
00347         "+m" (u2.s2.count_low), "+m" (u2.s2.count_high)
00348     );
00349 #else
00350     ++u2.count;
00351 #endif
00352   }
00353 
00354 } //# NAMESPACE CASACORE - END
00355 
00356 
00357 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

Generated on 31 Aug 2016 for casa by  doxygen 1.6.1