casa  $Rev:20696$
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
PrecTimer.h
Go to the documentation of this file.
00001 //# PrecTimer.h: Precision timer to measure elapsed times in a cumulative way
00002 //# Copyright (C) 2006
00003 //# Associated Universities, Inc. Washington DC, USA.
00004 //#
00005 //# This library is free software; you can redistribute it and/or modify it
00006 //# under the terms of the GNU Library General Public License as published by
00007 //# the Free Software Foundation; either version 2 of the License, or (at your
00008 //# option) any later version.
00009 //#
00010 //# This library is distributed in the hope that it will be useful, but WITHOUT
00011 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
00012 //# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
00013 //# License for more details.
00014 //#
00015 //# You should have received a copy of the GNU Library General Public License
00016 //# along with this library; if not, write to the Free Software Foundation,
00017 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
00018 //#
00019 //# Correspondence concerning AIPS++ should be addressed as follows:
00020 //#        Internet email: aips2-request@nrao.edu.
00021 //#        Postal address: AIPS++ Project Office
00022 //#                        National Radio Astronomy Observatory
00023 //#                        520 Edgemont Road
00024 //#                        Charlottesville, VA 22903-2475 USA
00025 //#
00026 //# $Id: PrecTimer.h 21024 2011-03-01 11:46:18Z gervandiepen $
00027 
00028 #ifndef CASA_PRECTIMER_H
00029 #define CASA_PRECTIMER_H
00030 
00031 
00032 #include <cstdlib>
00033 #include <iostream>
00034 
00035 #if defined __ia64__ && defined __INTEL_COMPILER
00036 #include <ia64regs.h>
00037 #endif
00038 
00039 
00040 namespace casa { //# NAMESPACE CASA - BEGIN
00041 
00042 // Forward Declaration.
00043 class String;
00044 
00045 
00046 // <summary>
00047 // Precision timer to measure elapsed times in a cumulative way
00048 // </summary>
00049 
00050 // <use visibility=export>
00051 
00052 // <reviewed reviewer="" date="" tests="tPrecTimer" demos="">
00053 // </reviewed>
00054 
00055 // <synopsis>
00056 // The PrecTimer supplements the <linkto class=Timer>Timer</linkto> class.
00057 // If offers a low-overhead and high-resolution interval timer for use
00058 // on i386, x86_64, ia64, and powerpc platforms, using the processor's
00059 // timestamp counter that is incremented each cycle.
00060 // Put timer.start() and timer.stop() calls around the piece of
00061 // code to be timed. Because the timer is cumulative, the total time of
00062 // a particular piece of code can be timed.
00063 // <note role=caution>
00064 // Make sure that start() and stop() calls alternate,
00065 // otherwise very strange times will be the result.
00066 // </note>
00067 //
00068 // A timer can be started and stopped multiple times; both the average and
00069 // total time, as well as the number of iterations are printed.
00070 // The measured time is real time (as opposed to user or system time).
00071 // The timer can be used to measure from 10 nanosecond to a century interval.
00072 //
00073 // Multiple timers can be used in a nested way as long as each of them
00074 // has independent (matching) start and stop calls.
00075 //
00076 // The class is more or less a copy of the original written by John Romein
00077 // at ASTRON, Dwingeloo, the Netherlands.
00078 // </synopsis>
00079 
00080 // <example>
00081 // Here's how to create a timer, start it (the 'mark' member function)
00082 // and display a breakdown.
00083 // <srcblock>
00084 //  PrecTimer ttimer;   // the timer is reset at construction time
00085 //  PrecTimer ctimer;
00086 //  ttimer.reset();     // if you want to reset the timer (not needed here)
00087 //  ttimer.start();     // start the total timer
00088 //  for (int i=0; i<n; ++i) {
00089 //    ... do something ...
00090 //    ctimer.start();   // start the calc timer
00091 //    ...do some calculation which will be timed...
00092 //    ctimer.stop();    // and stop it
00093 //  }
00094 //  ttimer.stop();
00095 //  ttimer.show (cout, "Total       ");
00096 //  ctimer.show (cout, "Calculations");
00097 // </srcblock>
00098 // </example>
00099 
00100   class PrecTimer {
00101   public:
00102     // Construct.
00103     PrecTimer();
00104 
00105     // Destruct.
00106     ~PrecTimer();
00107 
00108     // Restart the timer.
00109     void start();
00110     // Stop the timer
00111     void stop();
00112 
00113     // Reset the timer to zero.
00114     void reset();
00115 
00116     // Show real time on cout or a user supplied stream.
00117     // <group>
00118     void show() const;
00119     void show (std::ostream& os) const;
00120     // </group>
00121 
00122     // Show real time on cout or a user supplied
00123     // stream preceeded by the string parameter.
00124     // <group>
00125     void show (const String&) const;
00126     void show (std::ostream& os, const String& prefix) const;
00127     // </group>
00128 
00129     // Get the real time (in seconds).
00130     double getReal() const;
00131 
00132     // Get the total number of times start/stop is done.
00133     unsigned long long getCount() const;
00134 
00135   private:
00136     void print_time (std::ostream&, double time) const;
00137 
00138     struct TimeStruct {
00139 #if defined __PPC__
00140       int          total_time_high, total_time_low;
00141 #else
00142       int          total_time_low, total_time_high;
00143 #endif
00144     };
00145     union Union1 {
00146       long long    total_time;
00147       TimeStruct   s1;
00148     };
00149 
00150 #if defined __i386__ && defined __INTEL_COMPILER && defined _OPENMP
00151     struct CountStruct {
00152       int count_low, count_high;
00153     };
00154     union Union2 {
00155       unsigned long long count;
00156       CountStruct        s2;
00157     };
00158 #else
00159     struct Union2 {
00160       unsigned long long count;
00161     };
00162 #endif
00163 
00164     Union1 u1;
00165     Union2 u2;
00166 
00167     static double CPU_speed_in_MHz;
00168     static double get_CPU_speed_in_MHz();
00169   };
00170 
00171 
00172 
00173   inline void PrecTimer::reset()
00174   {
00175     u1.total_time = 0;
00176     u2.count      = 0;
00177   }
00178 
00179   inline unsigned long long PrecTimer::getCount() const
00180   {
00181     return u2.count;
00182   }
00183 
00184   inline PrecTimer::PrecTimer()
00185   {
00186     reset();
00187   }
00188 
00189   inline PrecTimer::~PrecTimer()
00190   {}
00191 
00192 
00193   inline void PrecTimer::start()
00194   {
00195 #if defined __x86_64__ && defined __INTEL_COMPILER && defined _OPENMP
00196     asm volatile
00197     (
00198         "rdtsc\n\t"
00199         "shlq $32,%%rdx\n\t"
00200         "leaq (%%rax,%%rdx),%%rax\n\t"
00201         "lock;subq %%rax,%0"
00202     :
00203         "+m" (u1.total_time)
00204     :
00205     :
00206         "rax", "rdx"
00207     );
00208 #elif defined __i386__ && defined __INTEL_COMPILER && defined _OPENMP
00209     asm volatile
00210     (
00211         "rdtsc\n\t"
00212         "lock;subl %%eax,%0\n\t"
00213         "lock;sbbl %%edx,%1"
00214     :
00215         "+m" (u1.s1.total_time_low), "+m" (u1.s1total_time_high)
00216     :
00217     :
00218         "eax", "edx"
00219     );
00220 #elif (defined __i386__ || defined __x86_64__) && (defined __PATHSCALE__ || (defined __APPLE__ && defined __APPLE_CC__ && __APPLE_CC__ == 5531))
00221     unsigned eax, edx;
00222 
00223     asm volatile ("rdtsc" : "=a" (eax), "=d" (edx));
00224 
00225     u1.total_time -= ((unsigned long long) edx << 32) + eax;
00226 #elif (defined __i386__ || defined __x86_64__) && (defined __GNUC__ || defined __INTEL_COMPILER)
00227     asm volatile
00228     (
00229         "rdtsc\n\t"
00230         "subl %%eax, %0\n\t"
00231         "sbbl %%edx, %1"
00232     :
00233         "+m" (u1.s1.total_time_low), "+m" (u1.s1.total_time_high)
00234     :
00235     :
00236         "eax", "edx"
00237     );
00238 #elif defined __ia64__ && defined __INTEL_COMPILER
00239     u1.total_time -= __getReg(_IA64_REG_AR_ITC);
00240 #elif defined __ia64__ && defined __GNUC__
00241     long long time;
00242     asm volatile ("mov %0=ar.itc" : "=r" (time));
00243     u1.total_time -= time;
00244 #elif defined __PPC__ && (defined __GNUC__ || defined __xlC__)
00245     int high, low, retry;
00246 
00247     asm
00248     (
00249         "0:\n\t"
00250         "mftbu %0\n\t"
00251         "mftb %1\n\t"
00252         "mftbu %2\n\t"
00253         "cmpw %2,%0\n\t"
00254         "bne 0b\n\t"
00255         "subfc %3,%1,%3\n\t"
00256         "subfe %4,%0,%4"
00257     :
00258         "=r" (high), "=r" (low), "=r" (retry),
00259         "=r" (u1.s1.total_time_low), "=r" (u1.s1.total_time_high)
00260     :
00261         "3" (u1.s1.total_time_low), "4" (u1.s1.total_time_high)
00262     );
00263 #endif
00264   }
00265 
00266 
00267   inline void PrecTimer::stop()
00268   {
00269 #if defined __x86_64__ && defined __INTEL_COMPILER && defined _OPENMP
00270     asm volatile
00271     (
00272         "rdtsc\n\t"
00273         "shlq $32,%%rdx\n\t"
00274         "leaq (%%rax,%%rdx),%%rax\n\t"
00275         "lock;addq %%rax,%0"
00276     :
00277         "+m" (u1.total_time)
00278     :
00279     :
00280         "rax", "rdx"
00281     );
00282 #elif defined __i386__ && defined __INTEL_COMPILER && defined _OPENMP
00283     asm volatile
00284     (
00285         "rdtsc\n\t"
00286         "lock;addl %%eax, %0\n\t"
00287         "lock;adcl %%edx, %1"
00288     :
00289         "+m" (u1.s1.total_time_low), "+m" (u1.s1.total_time_high)
00290     :
00291     :
00292         "eax", "edx"
00293     );
00294 #elif (defined __i386__ || defined __x86_64__) && (defined __PATHSCALE__ || (defined __APPLE__ && defined __APPLE_CC__ && __APPLE_CC__ == 5531))
00295     unsigned eax, edx;
00296 
00297     asm volatile ("rdtsc\n\t" : "=a" (eax), "=d" (edx));
00298     u1.total_time += ((unsigned long long) edx << 32) + eax;
00299 #elif (defined __i386__ || defined __x86_64__) && (defined __GNUC__ || defined __INTEL_COMPILER)
00300     asm volatile
00301     (
00302         "rdtsc\n\t"
00303         "addl %%eax, %0\n\t"
00304         "adcl %%edx, %1"
00305     :
00306         "+m" (u1.s1.total_time_low), "+m" (u1.s1.total_time_high)
00307     :
00308     :
00309         "eax", "edx"
00310     );
00311 #elif defined __ia64__ && defined __INTEL_COMPILER
00312     u1.total_time += __getReg(_IA64_REG_AR_ITC);
00313 #elif defined __ia64__ && defined __GNUC__
00314     long long time;
00315     asm volatile ("mov %0=ar.itc" : "=r" (time));
00316     u1.total_time += time;
00317 #elif defined __PPC__ && (defined __GNUC__ || defined __xlC__)
00318     int high, low, retry;
00319 
00320     asm
00321     (
00322         "0:\n\t"
00323         "mftbu %0\n\t"
00324         "mftb %1\n\t"
00325         "mftbu %2\n\t"
00326         "cmpw %2,%0\n\t"
00327         "bne 0b\n\t"
00328         "addc %3,%3,%1\n\t"
00329         "adde %4,%4,%0"
00330     :
00331         "=r" (high), "=r" (low), "=r" (retry),
00332         "=r" (u1.s1.total_time_low), "=r" (u1.s1.total_time_high)
00333     :
00334         "3" (u1.s1.total_time_low), "4" (u1.s1.total_time_high)
00335     );
00336 #endif
00337 
00338 #if defined __x86_64__ && defined __INTEL_COMPILER && defined _OPENMP
00339     asm volatile ("lock;addq $1,%0" : "+m" (u2.count));
00340 #elif defined __i386__ && defined __INTEL_COMPILER && defined _OPENMP
00341     asm volatile
00342     (
00343         "lock;addl $1,%0\n\t"
00344         "lock;adcl $0,%1"
00345     :
00346         "+m" (u2.s2.count_low), "+m" (u2.s2.count_high)
00347     );
00348 #else
00349     ++u2.count;
00350 #endif
00351   }
00352 
00353 } //# NAMESPACE CASA - END
00354 
00355 
00356 #endif