casa
$Rev:20696$
|
00001 //# PrecTimer.h: Precision timer to measure elapsed times in a cumulative way 00002 //# Copyright (C) 2006 00003 //# Associated Universities, Inc. Washington DC, USA. 00004 //# 00005 //# This library is free software; you can redistribute it and/or modify it 00006 //# under the terms of the GNU Library General Public License as published by 00007 //# the Free Software Foundation; either version 2 of the License, or (at your 00008 //# option) any later version. 00009 //# 00010 //# This library is distributed in the hope that it will be useful, but WITHOUT 00011 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 00012 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public 00013 //# License for more details. 00014 //# 00015 //# You should have received a copy of the GNU Library General Public License 00016 //# along with this library; if not, write to the Free Software Foundation, 00017 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA. 00018 //# 00019 //# Correspondence concerning AIPS++ should be addressed as follows: 00020 //# Internet email: aips2-request@nrao.edu. 00021 //# Postal address: AIPS++ Project Office 00022 //# National Radio Astronomy Observatory 00023 //# 520 Edgemont Road 00024 //# Charlottesville, VA 22903-2475 USA 00025 //# 00026 //# $Id: PrecTimer.h 21024 2011-03-01 11:46:18Z gervandiepen $ 00027 00028 #ifndef CASA_PRECTIMER_H 00029 #define CASA_PRECTIMER_H 00030 00031 00032 #include <cstdlib> 00033 #include <iostream> 00034 00035 #if defined __ia64__ && defined __INTEL_COMPILER 00036 #include <ia64regs.h> 00037 #endif 00038 00039 00040 namespace casa { //# NAMESPACE CASA - BEGIN 00041 00042 // Forward Declaration. 00043 class String; 00044 00045 00046 // <summary> 00047 // Precision timer to measure elapsed times in a cumulative way 00048 // </summary> 00049 00050 // <use visibility=export> 00051 00052 // <reviewed reviewer="" date="" tests="tPrecTimer" demos=""> 00053 // </reviewed> 00054 00055 // <synopsis> 00056 // The PrecTimer supplements the <linkto class=Timer>Timer</linkto> class. 00057 // If offers a low-overhead and high-resolution interval timer for use 00058 // on i386, x86_64, ia64, and powerpc platforms, using the processor's 00059 // timestamp counter that is incremented each cycle. 00060 // Put timer.start() and timer.stop() calls around the piece of 00061 // code to be timed. Because the timer is cumulative, the total time of 00062 // a particular piece of code can be timed. 00063 // <note role=caution> 00064 // Make sure that start() and stop() calls alternate, 00065 // otherwise very strange times will be the result. 00066 // </note> 00067 // 00068 // A timer can be started and stopped multiple times; both the average and 00069 // total time, as well as the number of iterations are printed. 00070 // The measured time is real time (as opposed to user or system time). 00071 // The timer can be used to measure from 10 nanosecond to a century interval. 00072 // 00073 // Multiple timers can be used in a nested way as long as each of them 00074 // has independent (matching) start and stop calls. 00075 // 00076 // The class is more or less a copy of the original written by John Romein 00077 // at ASTRON, Dwingeloo, the Netherlands. 00078 // </synopsis> 00079 00080 // <example> 00081 // Here's how to create a timer, start it (the 'mark' member function) 00082 // and display a breakdown. 00083 // <srcblock> 00084 // PrecTimer ttimer; // the timer is reset at construction time 00085 // PrecTimer ctimer; 00086 // ttimer.reset(); // if you want to reset the timer (not needed here) 00087 // ttimer.start(); // start the total timer 00088 // for (int i=0; i<n; ++i) { 00089 // ... do something ... 00090 // ctimer.start(); // start the calc timer 00091 // ...do some calculation which will be timed... 00092 // ctimer.stop(); // and stop it 00093 // } 00094 // ttimer.stop(); 00095 // ttimer.show (cout, "Total "); 00096 // ctimer.show (cout, "Calculations"); 00097 // </srcblock> 00098 // </example> 00099 00100 class PrecTimer { 00101 public: 00102 // Construct. 00103 PrecTimer(); 00104 00105 // Destruct. 00106 ~PrecTimer(); 00107 00108 // Restart the timer. 00109 void start(); 00110 // Stop the timer 00111 void stop(); 00112 00113 // Reset the timer to zero. 00114 void reset(); 00115 00116 // Show real time on cout or a user supplied stream. 00117 // <group> 00118 void show() const; 00119 void show (std::ostream& os) const; 00120 // </group> 00121 00122 // Show real time on cout or a user supplied 00123 // stream preceeded by the string parameter. 00124 // <group> 00125 void show (const String&) const; 00126 void show (std::ostream& os, const String& prefix) const; 00127 // </group> 00128 00129 // Get the real time (in seconds). 00130 double getReal() const; 00131 00132 // Get the total number of times start/stop is done. 00133 unsigned long long getCount() const; 00134 00135 private: 00136 void print_time (std::ostream&, double time) const; 00137 00138 struct TimeStruct { 00139 #if defined __PPC__ 00140 int total_time_high, total_time_low; 00141 #else 00142 int total_time_low, total_time_high; 00143 #endif 00144 }; 00145 union Union1 { 00146 long long total_time; 00147 TimeStruct s1; 00148 }; 00149 00150 #if defined __i386__ && defined __INTEL_COMPILER && defined _OPENMP 00151 struct CountStruct { 00152 int count_low, count_high; 00153 }; 00154 union Union2 { 00155 unsigned long long count; 00156 CountStruct s2; 00157 }; 00158 #else 00159 struct Union2 { 00160 unsigned long long count; 00161 }; 00162 #endif 00163 00164 Union1 u1; 00165 Union2 u2; 00166 00167 static double CPU_speed_in_MHz; 00168 static double get_CPU_speed_in_MHz(); 00169 }; 00170 00171 00172 00173 inline void PrecTimer::reset() 00174 { 00175 u1.total_time = 0; 00176 u2.count = 0; 00177 } 00178 00179 inline unsigned long long PrecTimer::getCount() const 00180 { 00181 return u2.count; 00182 } 00183 00184 inline PrecTimer::PrecTimer() 00185 { 00186 reset(); 00187 } 00188 00189 inline PrecTimer::~PrecTimer() 00190 {} 00191 00192 00193 inline void PrecTimer::start() 00194 { 00195 #if defined __x86_64__ && defined __INTEL_COMPILER && defined _OPENMP 00196 asm volatile 00197 ( 00198 "rdtsc\n\t" 00199 "shlq $32,%%rdx\n\t" 00200 "leaq (%%rax,%%rdx),%%rax\n\t" 00201 "lock;subq %%rax,%0" 00202 : 00203 "+m" (u1.total_time) 00204 : 00205 : 00206 "rax", "rdx" 00207 ); 00208 #elif defined __i386__ && defined __INTEL_COMPILER && defined _OPENMP 00209 asm volatile 00210 ( 00211 "rdtsc\n\t" 00212 "lock;subl %%eax,%0\n\t" 00213 "lock;sbbl %%edx,%1" 00214 : 00215 "+m" (u1.s1.total_time_low), "+m" (u1.s1total_time_high) 00216 : 00217 : 00218 "eax", "edx" 00219 ); 00220 #elif (defined __i386__ || defined __x86_64__) && (defined __PATHSCALE__ || (defined __APPLE__ && defined __APPLE_CC__ && __APPLE_CC__ == 5531)) 00221 unsigned eax, edx; 00222 00223 asm volatile ("rdtsc" : "=a" (eax), "=d" (edx)); 00224 00225 u1.total_time -= ((unsigned long long) edx << 32) + eax; 00226 #elif (defined __i386__ || defined __x86_64__) && (defined __GNUC__ || defined __INTEL_COMPILER) 00227 asm volatile 00228 ( 00229 "rdtsc\n\t" 00230 "subl %%eax, %0\n\t" 00231 "sbbl %%edx, %1" 00232 : 00233 "+m" (u1.s1.total_time_low), "+m" (u1.s1.total_time_high) 00234 : 00235 : 00236 "eax", "edx" 00237 ); 00238 #elif defined __ia64__ && defined __INTEL_COMPILER 00239 u1.total_time -= __getReg(_IA64_REG_AR_ITC); 00240 #elif defined __ia64__ && defined __GNUC__ 00241 long long time; 00242 asm volatile ("mov %0=ar.itc" : "=r" (time)); 00243 u1.total_time -= time; 00244 #elif defined __PPC__ && (defined __GNUC__ || defined __xlC__) 00245 int high, low, retry; 00246 00247 asm 00248 ( 00249 "0:\n\t" 00250 "mftbu %0\n\t" 00251 "mftb %1\n\t" 00252 "mftbu %2\n\t" 00253 "cmpw %2,%0\n\t" 00254 "bne 0b\n\t" 00255 "subfc %3,%1,%3\n\t" 00256 "subfe %4,%0,%4" 00257 : 00258 "=r" (high), "=r" (low), "=r" (retry), 00259 "=r" (u1.s1.total_time_low), "=r" (u1.s1.total_time_high) 00260 : 00261 "3" (u1.s1.total_time_low), "4" (u1.s1.total_time_high) 00262 ); 00263 #endif 00264 } 00265 00266 00267 inline void PrecTimer::stop() 00268 { 00269 #if defined __x86_64__ && defined __INTEL_COMPILER && defined _OPENMP 00270 asm volatile 00271 ( 00272 "rdtsc\n\t" 00273 "shlq $32,%%rdx\n\t" 00274 "leaq (%%rax,%%rdx),%%rax\n\t" 00275 "lock;addq %%rax,%0" 00276 : 00277 "+m" (u1.total_time) 00278 : 00279 : 00280 "rax", "rdx" 00281 ); 00282 #elif defined __i386__ && defined __INTEL_COMPILER && defined _OPENMP 00283 asm volatile 00284 ( 00285 "rdtsc\n\t" 00286 "lock;addl %%eax, %0\n\t" 00287 "lock;adcl %%edx, %1" 00288 : 00289 "+m" (u1.s1.total_time_low), "+m" (u1.s1.total_time_high) 00290 : 00291 : 00292 "eax", "edx" 00293 ); 00294 #elif (defined __i386__ || defined __x86_64__) && (defined __PATHSCALE__ || (defined __APPLE__ && defined __APPLE_CC__ && __APPLE_CC__ == 5531)) 00295 unsigned eax, edx; 00296 00297 asm volatile ("rdtsc\n\t" : "=a" (eax), "=d" (edx)); 00298 u1.total_time += ((unsigned long long) edx << 32) + eax; 00299 #elif (defined __i386__ || defined __x86_64__) && (defined __GNUC__ || defined __INTEL_COMPILER) 00300 asm volatile 00301 ( 00302 "rdtsc\n\t" 00303 "addl %%eax, %0\n\t" 00304 "adcl %%edx, %1" 00305 : 00306 "+m" (u1.s1.total_time_low), "+m" (u1.s1.total_time_high) 00307 : 00308 : 00309 "eax", "edx" 00310 ); 00311 #elif defined __ia64__ && defined __INTEL_COMPILER 00312 u1.total_time += __getReg(_IA64_REG_AR_ITC); 00313 #elif defined __ia64__ && defined __GNUC__ 00314 long long time; 00315 asm volatile ("mov %0=ar.itc" : "=r" (time)); 00316 u1.total_time += time; 00317 #elif defined __PPC__ && (defined __GNUC__ || defined __xlC__) 00318 int high, low, retry; 00319 00320 asm 00321 ( 00322 "0:\n\t" 00323 "mftbu %0\n\t" 00324 "mftb %1\n\t" 00325 "mftbu %2\n\t" 00326 "cmpw %2,%0\n\t" 00327 "bne 0b\n\t" 00328 "addc %3,%3,%1\n\t" 00329 "adde %4,%4,%0" 00330 : 00331 "=r" (high), "=r" (low), "=r" (retry), 00332 "=r" (u1.s1.total_time_low), "=r" (u1.s1.total_time_high) 00333 : 00334 "3" (u1.s1.total_time_low), "4" (u1.s1.total_time_high) 00335 ); 00336 #endif 00337 00338 #if defined __x86_64__ && defined __INTEL_COMPILER && defined _OPENMP 00339 asm volatile ("lock;addq $1,%0" : "+m" (u2.count)); 00340 #elif defined __i386__ && defined __INTEL_COMPILER && defined _OPENMP 00341 asm volatile 00342 ( 00343 "lock;addl $1,%0\n\t" 00344 "lock;adcl $0,%1" 00345 : 00346 "+m" (u2.s2.count_low), "+m" (u2.s2.count_high) 00347 ); 00348 #else 00349 ++u2.count; 00350 #endif 00351 } 00352 00353 } //# NAMESPACE CASA - END 00354 00355 00356 #endif