Pyrogenesis  trunk
timer.h
Go to the documentation of this file.
1 /* Copyright (c) 2010 Wildfire Games
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining
4  * a copy of this software and associated documentation files (the
5  * "Software"), to deal in the Software without restriction, including
6  * without limitation the rights to use, copy, modify, merge, publish,
7  * distribute, sublicense, and/or sell copies of the Software, and to
8  * permit persons to whom the Software is furnished to do so, subject to
9  * the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 /*
24  * platform-independent high resolution timer
25  */
26 
27 #ifndef INCLUDED_TIMER
28 #define INCLUDED_TIMER
29 
30 #include "lib/config2.h" // CONFIG2_TIMER_ALLOW_RDTSC
31 #include "lib/sysdep/cpu.h" // cpu_AtomicAdd
32 #if ARCH_X86_X64 && CONFIG2_TIMER_ALLOW_RDTSC
33 # include "lib/sysdep/os_cpu.h" // os_cpu_ClockFrequency
34 # include "lib/sysdep/arch/x86_x64/x86_x64.h" // x86_x64::rdtsc
35 #endif
36 
37 #include "lib/utf8.h"
38 
39 /**
40  * timer_Time will subsequently return values relative to the current time.
41  **/
42 LIB_API void timer_LatchStartTime();
43 
44 /**
45  * @return high resolution (> 1 us) timestamp [s].
46  **/
47 LIB_API double timer_Time();
48 
49 /**
50  * @return resolution [s] of the timer.
51  **/
52 LIB_API double timer_Resolution();
53 
54 
55 // (allow using XADD (faster than CMPXCHG) in 64-bit builds without casting)
56 #if ARCH_AMD64
57 typedef intptr_t Cycles;
58 #else
59 typedef i64 Cycles;
60 #endif
61 
62 /**
63  * internal helper functions for returning an easily readable
64  * string (i.e. re-scaled to appropriate units)
65  **/
66 LIB_API std::string StringForSeconds(double seconds);
67 LIB_API std::string StringForCycles(Cycles cycles);
68 
69 
70 //-----------------------------------------------------------------------------
71 // scope timing
72 
73 /// used by TIMER
75 {
77 public:
78  ScopeTimer(const wchar_t* description)
79  : m_t0(timer_Time()), m_description(description)
80  {
81  }
82 
84  {
85  const double t1 = timer_Time();
86  const std::string elapsedTimeString = StringForSeconds(t1-m_t0);
87  debug_printf("TIMER| %s: %s\n", utf8_from_wstring(m_description).c_str(), elapsedTimeString.c_str());
88  }
89 
90 private:
91  double m_t0;
92  const wchar_t* m_description;
93 };
94 
95 /**
96  * Measures the time taken to execute code up until end of the current scope;
97  * displays it via debug_printf. Can safely be nested.
98  * Useful for measuring time spent in a function or basic block.
99  * <description> must remain valid over the lifetime of this object;
100  * a string literal is safest.
101  *
102  * Example usage:
103  * void func()
104  * {
105  * TIMER(L"description");
106  * // code to be measured
107  * }
108  **/
109 #define TIMER(description) ScopeTimer UID__(description)
110 
111 /**
112  * Measures the time taken to execute code between BEGIN and END markers;
113  * displays it via debug_printf. Can safely be nested.
114  * Useful for measuring several pieces of code within the same function/block.
115  * <description> must remain valid over the lifetime of this object;
116  * a string literal is safest.
117  *
118  * Caveats:
119  * - this wraps the code to be measured in a basic block, so any
120  * variables defined there are invisible to surrounding code.
121  * - the description passed to END isn't inspected; you are responsible for
122  * ensuring correct nesting!
123  *
124  * Example usage:
125  * void func2()
126  * {
127  * // uninteresting code
128  * TIMER_BEGIN(L"description2");
129  * // code to be measured
130  * TIMER_END(L"description2");
131  * // uninteresting code
132  * }
133  **/
134 #define TIMER_BEGIN(description) { ScopeTimer UID__(description)
135 #define TIMER_END(description) }
136 
137 
138 //-----------------------------------------------------------------------------
139 // cumulative timer API
140 
141 // this supplements in-game profiling by providing low-overhead,
142 // high resolution time accounting of specific areas.
143 
144 // since TIMER_ACCRUE et al. are called so often, we try to keep
145 // overhead to an absolute minimum. storing raw tick counts (e.g. CPU cycles
146 // returned by x86_x64::rdtsc) instead of absolute time has two benefits:
147 // - no need to convert from raw->time on every call
148 // (instead, it's only done once when displaying the totals)
149 // - possibly less overhead to querying the time itself
150 // (timer_Time may be using slower time sources with ~3us overhead)
151 //
152 // however, the cycle count is not necessarily a measure of wall-clock time
153 // (see http://www.gamedev.net/reference/programming/features/timing).
154 // therefore, on systems with SpeedStep active, measurements of I/O or other
155 // non-CPU bound activity may be skewed. this is ok because the timer is
156 // only used for profiling; just be aware of the issue.
157 // if this is a problem, disable CONFIG2_TIMER_ALLOW_RDTSC.
158 //
159 // note that overflow isn't an issue either way (63 bit cycle counts
160 // at 10 GHz cover intervals of 29 years).
161 
162 #if ARCH_X86_X64 && CONFIG2_TIMER_ALLOW_RDTSC
163 
164 class TimerUnit
165 {
166 public:
167  void SetToZero()
168  {
169  m_cycles = 0;
170  }
171 
172  void SetFromTimer()
173  {
174  m_cycles = x86_x64::rdtsc();
175  }
176 
177  void AddDifference(TimerUnit t0, TimerUnit t1)
178  {
179  m_cycles += t1.m_cycles - t0.m_cycles;
180  }
181 
182  void AddDifferenceAtomic(TimerUnit t0, TimerUnit t1)
183  {
184  const Cycles delta = t1.m_cycles - t0.m_cycles;
185 #if ARCH_AMD64
186  cpu_AtomicAdd(&m_cycles, delta);
187 #elif ARCH_IA32
188 retry:
189  if(!cpu_CAS64(&m_cycles, m_cycles, m_cycles+delta))
190  goto retry;
191 #else
192 # error "port"
193 #endif
194  }
195 
196  void Subtract(TimerUnit t)
197  {
198  m_cycles -= t.m_cycles;
199  }
200 
201  std::string ToString() const
202  {
203  ENSURE(m_cycles >= 0);
204  return StringForCycles(m_cycles);
205  }
206 
207  double ToSeconds() const
208  {
209  return (double)m_cycles / os_cpu_ClockFrequency();
210  }
211 
212 private:
213  Cycles m_cycles;
214 };
215 
216 #else
217 
219 {
220 public:
221  void SetToZero()
222  {
223  m_seconds = 0.0;
224  }
225 
227  {
228  m_seconds = timer_Time();
229  }
230 
232  {
233  m_seconds += t1.m_seconds - t0.m_seconds;
234  }
235 
237  {
238 retry:
239  i64 oldRepresentation;
240  memcpy(&oldRepresentation, &m_seconds, sizeof(oldRepresentation));
241 
242  const double seconds = m_seconds + t1.m_seconds - t0.m_seconds;
243  i64 newRepresentation;
244  memcpy(&newRepresentation, &seconds, sizeof(newRepresentation));
245 
246  if(!cpu_CAS64((volatile i64*)&m_seconds, oldRepresentation, newRepresentation))
247  goto retry;
248  }
249 
251  {
252  m_seconds -= t.m_seconds;
253  }
254 
255  std::string ToString() const
256  {
257  ENSURE(m_seconds >= 0.0);
258  return StringForSeconds(m_seconds);
259  }
260 
261  double ToSeconds() const
262  {
263  return m_seconds;
264  }
265 
266 private:
267  double m_seconds;
268 };
269 
270 #endif
271 
272 // opaque - do not access its fields!
273 // note: must be defined here because clients instantiate them;
274 // fields cannot be made private due to POD requirement.
276 {
277  TimerUnit sum; // total bill
278 
279  // only store a pointer for efficiency.
280  const wchar_t* description;
281 
283 
284  // how often the timer was billed (helps measure relative
285  // performance of something that is done indeterminately often).
286  intptr_t num_calls;
287 };
288 
289 /**
290  * make the given TimerClient (usually instantiated as static data)
291  * ready for use. returns its address for TIMER_ADD_CLIENT's convenience.
292  * this client's total (which is increased by a BillingPolicy) will be
293  * displayed by timer_DisplayClientTotals.
294  * notes:
295  * - may be called at any time;
296  * - always succeeds (there's no fixed limit);
297  * - free() is not needed nor possible.
298  * - description must remain valid until exit; a string literal is safest.
299  **/
300 LIB_API TimerClient* timer_AddClient(TimerClient* tc, const wchar_t* description);
301 
302 /**
303  * "allocate" a new TimerClient that will keep track of the total time
304  * billed to it, along with a description string. These are displayed when
305  * timer_DisplayClientTotals is called.
306  * Invoke this at file or function scope; a (static) TimerClient pointer of
307  * name <id> will be defined, which should be passed to TIMER_ACCRUE.
308  **/
309 #define TIMER_ADD_CLIENT(id)\
310  static TimerClient UID__;\
311  static TimerClient* id = timer_AddClient(&UID__, WIDEN(#id))
312 
313 /**
314  * bill the difference between t0 and t1 to the client's total.
315  **/
317 {
318  void operator()(TimerClient* tc, TimerUnit t0, TimerUnit t1) const
319  {
320  tc->sum.AddDifference(t0, t1);
321  tc->num_calls++;
322  }
323 };
324 
325 /**
326  * thread-safe (not used by default due to its higher overhead)
327  * note: we can't just use thread-local variables to avoid
328  * synchronization overhead because we don't have control over all
329  * threads (for accumulating their separate timer copies).
330  **/
332 {
333  void operator()(TimerClient* tc, TimerUnit t0, TimerUnit t1) const
334  {
335  tc->sum.AddDifferenceAtomic(t0, t1);
336  cpu_AtomicAdd(&tc->num_calls, +1);
337  }
338 };
339 
340 /**
341  * display all clients' totals; does not reset them.
342  * typically called at exit.
343  **/
344 LIB_API void timer_DisplayClientTotals();
345 
346 
347 /// used by TIMER_ACCRUE
348 template<class BillingPolicy = BillingPolicy_Default>
350 {
352 public:
354  : m_tc(tc)
355  {
356  m_t0.SetFromTimer();
357  }
358 
360  {
361  TimerUnit t1;
362  t1.SetFromTimer();
363  BillingPolicy()(m_tc, m_t0, t1);
364  }
365 
366 private:
369 };
370 
371 /**
372  * Measure the time taken to execute code up until end of the current scope;
373  * bill it to the given TimerClient object. Can safely be nested.
374  * Useful for measuring total time spent in a function or basic block over the
375  * entire program.
376  * `client' is an identifier registered via TIMER_ADD_CLIENT.
377  *
378  * Example usage:
379  * TIMER_ADD_CLIENT(client);
380  *
381  * void func()
382  * {
383  * TIMER_ACCRUE(client);
384  * // code to be measured
385  * }
386  *
387  * [later or at exit]
388  * timer_DisplayClientTotals();
389  **/
390 #define TIMER_ACCRUE(client) ScopeTimerAccrue<> UID__(client)
391 #define TIMER_ACCRUE_ATOMIC(client) ScopeTimerAccrue<BillingPolicy_Atomic> UID__(client)
392 
393 #endif // #ifndef INCLUDED_TIMER
TimerClient * m_tc
Definition: timer.h:368
int64_t i64
Definition: types.h:35
double os_cpu_ClockFrequency()
Definition: os_cpu.cpp:43
TimerUnit m_t0
Definition: timer.h:367
LIB_API TimerClient * timer_AddClient(TimerClient *tc, const wchar_t *description)
make the given TimerClient (usually instantiated as static data) ready for use.
Definition: timer.cpp:166
Definition: timer.h:218
ScopeTimer(const wchar_t *description)
Definition: timer.h:78
std::string utf8_from_wstring(const std::wstring &src, Status *err)
opposite of wstring_from_utf8
Definition: utf8.cpp:208
void AddDifferenceAtomic(TimerUnit t0, TimerUnit t1)
Definition: timer.h:236
TimerClient * next
Definition: timer.h:282
void operator()(TimerClient *tc, TimerUnit t0, TimerUnit t1) const
Definition: timer.h:318
intptr_t cpu_AtomicAdd(volatile intptr_t *location, intptr_t increment)
add a signed value to a variable without the possibility of interference from other threads/CPUs...
Definition: aarch64.cpp:31
LIB_API std::string StringForCycles(Cycles cycles)
Definition: timer.cpp:220
Definition: timer.h:275
NONCOPYABLE(ScopeTimer)
bill the difference between t0 and t1 to the client&#39;s total.
Definition: timer.h:316
void AddDifference(TimerUnit t0, TimerUnit t1)
Definition: timer.h:231
#define ENSURE(expr)
ensure the expression <expr> evaluates to non-zero.
Definition: debug.h:287
thread-safe (not used by default due to its higher overhead) note: we can&#39;t just use thread-local var...
Definition: timer.h:331
LIB_API double timer_Resolution()
Definition: timer.cpp:145
double ToSeconds() const
Definition: timer.h:261
LIB_API std::string StringForSeconds(double seconds)
internal helper functions for returning an easily readable string (i.e.
Definition: timer.cpp:204
bool cpu_CAS64(volatile i64 *location, i64 expected, i64 newValue)
Definition: aarch64.cpp:41
TimerUnit sum
Definition: timer.h:277
void debug_printf(const char *fmt,...)
write a formatted string to the debug channel, subject to filtering (see below).
Definition: debug.cpp:142
used by TIMER
Definition: timer.h:74
void operator()(TimerClient *tc, TimerUnit t0, TimerUnit t1) const
Definition: timer.h:333
void SetFromTimer()
Definition: timer.h:226
const wchar_t * m_description
Definition: timer.h:92
double m_t0
Definition: timer.h:91
i64 Cycles
Definition: timer.h:59
const wchar_t * description
Definition: timer.h:280
LIB_API void timer_LatchStartTime()
timer_Time will subsequently return values relative to the current time.
Definition: timer.cpp:74
LIB_API void timer_DisplayClientTotals()
display all clients&#39; totals; does not reset them.
Definition: timer.cpp:181
void Subtract(TimerUnit t)
Definition: timer.h:250
used by TIMER_ACCRUE
Definition: timer.h:349
void SetToZero()
Definition: timer.h:221
std::string ToString() const
Definition: timer.h:255
double m_seconds
Definition: timer.h:267
~ScopeTimerAccrue()
Definition: timer.h:359
u64 rdtsc()
Definition: x86_x64.cpp:383
LIB_API double timer_Time()
Definition: timer.cpp:98
~ScopeTimer()
Definition: timer.h:83
intptr_t num_calls
Definition: timer.h:286
ScopeTimerAccrue(TimerClient *tc)
Definition: timer.h:353