casa
$Rev:20696$
|
00001 #!/usr/bin/env python 00002 # 00003 # Copyright 2008-2009 Jose Fonseca 00004 # 00005 # This program is free software: you can redistribute it and/or modify it 00006 # under the terms of the GNU Lesser General Public License as published 00007 # by the Free Software Foundation, either version 3 of the License, or 00008 # (at your option) any later version. 00009 # 00010 # This program is distributed in the hope that it will be useful, 00011 # but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 # GNU Lesser General Public License for more details. 00014 # 00015 # You should have received a copy of the GNU Lesser General Public License 00016 # along with this program. If not, see <http://www.gnu.org/licenses/>. 00017 # 00018 00019 """Generate a dot graph from the output of several profilers.""" 00020 00021 00022 __author__ = "Jose Fonseca" 00023 00024 __version__ = "1.0" 00025 00026 00027 import sys 00028 import math 00029 import os.path 00030 import re 00031 import textwrap 00032 import optparse 00033 import xml.parsers.expat 00034 00035 00036 try: 00037 # Debugging helper module 00038 import debug 00039 except ImportError: 00040 pass 00041 00042 00043 def percentage(p): 00044 return "%.02f%%" % (p*100.0,) 00045 00046 def add(a, b): 00047 return a + b 00048 00049 def equal(a, b): 00050 if a == b: 00051 return a 00052 else: 00053 return None 00054 00055 def fail(a, b): 00056 assert False 00057 00058 00059 tol = 2 ** -23 00060 00061 def ratio(numerator, denominator): 00062 try: 00063 ratio = float(numerator)/float(denominator) 00064 except ZeroDivisionError: 00065 # 0/0 is undefined, but 1.0 yields more useful results 00066 return 1.0 00067 if ratio < 0.0: 00068 if ratio < -tol: 00069 sys.stderr.write('warning: negative ratio (%s/%s)\n' % (numerator, denominator)) 00070 return 0.0 00071 if ratio > 1.0: 00072 if ratio > 1.0 + tol: 00073 sys.stderr.write('warning: ratio greater than one (%s/%s)\n' % (numerator, denominator)) 00074 return 1.0 00075 return ratio 00076 00077 00078 class UndefinedEvent(Exception): 00079 """Raised when attempting to get an event which is undefined.""" 00080 00081 def __init__(self, event): 00082 Exception.__init__(self) 00083 self.event = event 00084 00085 def __str__(self): 00086 return 'unspecified event %s' % self.event.name 00087 00088 00089 class Event(object): 00090 """Describe a kind of event, and its basic operations.""" 00091 00092 def __init__(self, name, null, aggregator, formatter = str): 00093 self.name = name 00094 self._null = null 00095 self._aggregator = aggregator 00096 self._formatter = formatter 00097 00098 def __eq__(self, other): 00099 return self is other 00100 00101 def __hash__(self): 00102 return id(self) 00103 00104 def null(self): 00105 return self._null 00106 00107 def aggregate(self, val1, val2): 00108 """Aggregate two event values.""" 00109 assert val1 is not None 00110 assert val2 is not None 00111 return self._aggregator(val1, val2) 00112 00113 def format(self, val): 00114 """Format an event value.""" 00115 assert val is not None 00116 return self._formatter(val) 00117 00118 00119 MODULE = Event("Module", None, equal) 00120 PROCESS = Event("Process", None, equal) 00121 00122 CALLS = Event("Calls", 0, add) 00123 SAMPLES = Event("Samples", 0, add) 00124 SAMPLES2 = Event("Samples", 0, add) 00125 00126 TIME = Event("Time", 0.0, add, lambda x: '(' + str(x) + ')') 00127 TIME_RATIO = Event("Time ratio", 0.0, add, lambda x: '(' + percentage(x) + ')') 00128 TOTAL_TIME = Event("Total time", 0.0, fail) 00129 TOTAL_TIME_RATIO = Event("Total time ratio", 0.0, fail, percentage) 00130 00131 CALL_RATIO = Event("Call ratio", 0.0, add, percentage) 00132 00133 PRUNE_RATIO = Event("Prune ratio", 0.0, add, percentage) 00134 00135 00136 class Object(object): 00137 """Base class for all objects in profile which can store events.""" 00138 00139 def __init__(self, events=None): 00140 if events is None: 00141 self.events = {} 00142 else: 00143 self.events = events 00144 00145 def __hash__(self): 00146 return id(self) 00147 00148 def __eq__(self, other): 00149 return self is other 00150 00151 def __contains__(self, event): 00152 return event in self.events 00153 00154 def __getitem__(self, event): 00155 try: 00156 return self.events[event] 00157 except KeyError: 00158 raise UndefinedEvent(event) 00159 00160 def __setitem__(self, event, value): 00161 if value is None: 00162 if event in self.events: 00163 del self.events[event] 00164 else: 00165 self.events[event] = value 00166 00167 00168 class Call(Object): 00169 """A call between functions. 00170 00171 There should be at most one call object for every pair of functions. 00172 """ 00173 00174 def __init__(self, callee_id): 00175 Object.__init__(self) 00176 self.callee_id = callee_id 00177 00178 00179 class Function(Object): 00180 """A function.""" 00181 00182 def __init__(self, id, name): 00183 Object.__init__(self) 00184 self.id = id 00185 self.name = name 00186 self.calls = {} 00187 self.cycle = None 00188 00189 def add_call(self, call): 00190 if call.callee_id in self.calls: 00191 sys.stderr.write('warning: overwriting call from function %s to %s\n' % (str(self.id), str(call.callee_id))) 00192 self.calls[call.callee_id] = call 00193 00194 # TODO: write utility functions 00195 00196 def __repr__(self): 00197 return self.name 00198 00199 00200 class Cycle(Object): 00201 """A cycle made from recursive function calls.""" 00202 00203 def __init__(self): 00204 Object.__init__(self) 00205 # XXX: Do cycles need an id? 00206 self.functions = set() 00207 00208 def add_function(self, function): 00209 assert function not in self.functions 00210 self.functions.add(function) 00211 # XXX: Aggregate events? 00212 if function.cycle is not None: 00213 for other in function.cycle.functions: 00214 if function not in self.functions: 00215 self.add_function(other) 00216 function.cycle = self 00217 00218 00219 class Profile(Object): 00220 """The whole profile.""" 00221 00222 def __init__(self): 00223 Object.__init__(self) 00224 self.functions = {} 00225 self.cycles = [] 00226 00227 def add_function(self, function): 00228 if function.id in self.functions: 00229 sys.stderr.write('warning: overwriting function %s (id %s)\n' % (function.name, str(function.id))) 00230 self.functions[function.id] = function 00231 00232 def add_cycle(self, cycle): 00233 self.cycles.append(cycle) 00234 00235 def validate(self): 00236 """Validate the edges.""" 00237 00238 for function in self.functions.itervalues(): 00239 for callee_id in function.calls.keys(): 00240 assert function.calls[callee_id].callee_id == callee_id 00241 if callee_id not in self.functions: 00242 sys.stderr.write('warning: call to undefined function %s from function %s\n' % (str(callee_id), function.name)) 00243 del function.calls[callee_id] 00244 00245 def find_cycles(self): 00246 """Find cycles using Tarjan's strongly connected components algorithm.""" 00247 00248 # Apply the Tarjan's algorithm successively until all functions are visited 00249 visited = set() 00250 for function in self.functions.itervalues(): 00251 if function not in visited: 00252 self._tarjan(function, 0, [], {}, {}, visited) 00253 cycles = [] 00254 for function in self.functions.itervalues(): 00255 if function.cycle is not None and function.cycle not in cycles: 00256 cycles.append(function.cycle) 00257 self.cycles = cycles 00258 if 0: 00259 for cycle in cycles: 00260 sys.stderr.write("Cycle:\n") 00261 for member in cycle.functions: 00262 sys.stderr.write("\t%s\n" % member.name) 00263 00264 def _tarjan(self, function, order, stack, orders, lowlinks, visited): 00265 """Tarjan's strongly connected components algorithm. 00266 00267 See also: 00268 - http://en.wikipedia.org/wiki/Tarjan's_strongly_connected_components_algorithm 00269 """ 00270 00271 visited.add(function) 00272 orders[function] = order 00273 lowlinks[function] = order 00274 order += 1 00275 pos = len(stack) 00276 stack.append(function) 00277 for call in function.calls.itervalues(): 00278 callee = self.functions[call.callee_id] 00279 # TODO: use a set to optimize lookup 00280 if callee not in orders: 00281 order = self._tarjan(callee, order, stack, orders, lowlinks, visited) 00282 lowlinks[function] = min(lowlinks[function], lowlinks[callee]) 00283 elif callee in stack: 00284 lowlinks[function] = min(lowlinks[function], orders[callee]) 00285 if lowlinks[function] == orders[function]: 00286 # Strongly connected component found 00287 members = stack[pos:] 00288 del stack[pos:] 00289 if len(members) > 1: 00290 cycle = Cycle() 00291 for member in members: 00292 cycle.add_function(member) 00293 return order 00294 00295 def call_ratios(self, event): 00296 # Aggregate for incoming calls 00297 cycle_totals = {} 00298 for cycle in self.cycles: 00299 cycle_totals[cycle] = 0.0 00300 function_totals = {} 00301 for function in self.functions.itervalues(): 00302 function_totals[function] = 0.0 00303 for function in self.functions.itervalues(): 00304 for call in function.calls.itervalues(): 00305 if call.callee_id != function.id: 00306 callee = self.functions[call.callee_id] 00307 function_totals[callee] += call[event] 00308 if callee.cycle is not None and callee.cycle is not function.cycle: 00309 cycle_totals[callee.cycle] += call[event] 00310 00311 # Compute the ratios 00312 for function in self.functions.itervalues(): 00313 for call in function.calls.itervalues(): 00314 assert CALL_RATIO not in call 00315 if call.callee_id != function.id: 00316 callee = self.functions[call.callee_id] 00317 if callee.cycle is not None and callee.cycle is not function.cycle: 00318 total = cycle_totals[callee.cycle] 00319 else: 00320 total = function_totals[callee] 00321 call[CALL_RATIO] = ratio(call[event], total) 00322 00323 def integrate(self, outevent, inevent): 00324 """Propagate function time ratio allong the function calls. 00325 00326 Must be called after finding the cycles. 00327 00328 See also: 00329 - http://citeseer.ist.psu.edu/graham82gprof.html 00330 """ 00331 00332 # Sanity checking 00333 assert outevent not in self 00334 for function in self.functions.itervalues(): 00335 assert outevent not in function 00336 assert inevent in function 00337 for call in function.calls.itervalues(): 00338 assert outevent not in call 00339 if call.callee_id != function.id: 00340 assert CALL_RATIO in call 00341 00342 # Aggregate the input for each cycle 00343 for cycle in self.cycles: 00344 total = inevent.null() 00345 for function in self.functions.itervalues(): 00346 total = inevent.aggregate(total, function[inevent]) 00347 self[inevent] = total 00348 00349 # Integrate along the edges 00350 total = inevent.null() 00351 for function in self.functions.itervalues(): 00352 total = inevent.aggregate(total, function[inevent]) 00353 self._integrate_function(function, outevent, inevent) 00354 self[outevent] = total 00355 00356 def _integrate_function(self, function, outevent, inevent): 00357 if function.cycle is not None: 00358 return self._integrate_cycle(function.cycle, outevent, inevent) 00359 else: 00360 if outevent not in function: 00361 total = function[inevent] 00362 for call in function.calls.itervalues(): 00363 if call.callee_id != function.id: 00364 total += self._integrate_call(call, outevent, inevent) 00365 function[outevent] = total 00366 return function[outevent] 00367 00368 def _integrate_call(self, call, outevent, inevent): 00369 assert outevent not in call 00370 assert CALL_RATIO in call 00371 callee = self.functions[call.callee_id] 00372 subtotal = call[CALL_RATIO]*self._integrate_function(callee, outevent, inevent) 00373 call[outevent] = subtotal 00374 return subtotal 00375 00376 def _integrate_cycle(self, cycle, outevent, inevent): 00377 if outevent not in cycle: 00378 00379 total = inevent.null() 00380 for member in cycle.functions: 00381 subtotal = member[inevent] 00382 for call in member.calls.itervalues(): 00383 callee = self.functions[call.callee_id] 00384 if callee.cycle is not cycle: 00385 subtotal += self._integrate_call(call, outevent, inevent) 00386 total += subtotal 00387 cycle[outevent] = total 00388 00389 callees = {} 00390 for function in self.functions.itervalues(): 00391 if function.cycle is not cycle: 00392 for call in function.calls.itervalues(): 00393 callee = self.functions[call.callee_id] 00394 if callee.cycle is cycle: 00395 try: 00396 callees[callee] += call[CALL_RATIO] 00397 except KeyError: 00398 callees[callee] = call[CALL_RATIO] 00399 00400 for callee, call_ratio in callees.iteritems(): 00401 ranks = {} 00402 call_ratios = {} 00403 partials = {} 00404 self._rank_cycle_function(cycle, callee, 0, ranks) 00405 self._call_ratios_cycle(cycle, callee, ranks, call_ratios, set()) 00406 partial = self._integrate_cycle_function(cycle, callee, call_ratio, partials, ranks, call_ratios, outevent, inevent) 00407 assert partial == max(partials.values()) 00408 assert not total or abs(1.0 - partial/(call_ratio*total)) <= 0.001 00409 00410 return cycle[outevent] 00411 00412 def _rank_cycle_function(self, cycle, function, rank, ranks): 00413 if function not in ranks or ranks[function] > rank: 00414 ranks[function] = rank 00415 for call in function.calls.itervalues(): 00416 if call.callee_id != function.id: 00417 callee = self.functions[call.callee_id] 00418 if callee.cycle is cycle: 00419 self._rank_cycle_function(cycle, callee, rank + 1, ranks) 00420 00421 def _call_ratios_cycle(self, cycle, function, ranks, call_ratios, visited): 00422 if function not in visited: 00423 visited.add(function) 00424 for call in function.calls.itervalues(): 00425 if call.callee_id != function.id: 00426 callee = self.functions[call.callee_id] 00427 if callee.cycle is cycle: 00428 if ranks[callee] > ranks[function]: 00429 call_ratios[callee] = call_ratios.get(callee, 0.0) + call[CALL_RATIO] 00430 self._call_ratios_cycle(cycle, callee, ranks, call_ratios, visited) 00431 00432 def _integrate_cycle_function(self, cycle, function, partial_ratio, partials, ranks, call_ratios, outevent, inevent): 00433 if function not in partials: 00434 partial = partial_ratio*function[inevent] 00435 for call in function.calls.itervalues(): 00436 if call.callee_id != function.id: 00437 callee = self.functions[call.callee_id] 00438 if callee.cycle is not cycle: 00439 assert outevent in call 00440 partial += partial_ratio*call[outevent] 00441 else: 00442 if ranks[callee] > ranks[function]: 00443 callee_partial = self._integrate_cycle_function(cycle, callee, partial_ratio, partials, ranks, call_ratios, outevent, inevent) 00444 call_ratio = ratio(call[CALL_RATIO], call_ratios[callee]) 00445 call_partial = call_ratio*callee_partial 00446 try: 00447 call[outevent] += call_partial 00448 except UndefinedEvent: 00449 call[outevent] = call_partial 00450 partial += call_partial 00451 partials[function] = partial 00452 try: 00453 function[outevent] += partial 00454 except UndefinedEvent: 00455 function[outevent] = partial 00456 return partials[function] 00457 00458 def aggregate(self, event): 00459 """Aggregate an event for the whole profile.""" 00460 00461 total = event.null() 00462 for function in self.functions.itervalues(): 00463 try: 00464 total = event.aggregate(total, function[event]) 00465 except UndefinedEvent: 00466 return 00467 self[event] = total 00468 00469 def ratio(self, outevent, inevent): 00470 assert outevent not in self 00471 assert inevent in self 00472 for function in self.functions.itervalues(): 00473 assert outevent not in function 00474 assert inevent in function 00475 function[outevent] = ratio(function[inevent], self[inevent]) 00476 for call in function.calls.itervalues(): 00477 assert outevent not in call 00478 if inevent in call: 00479 call[outevent] = ratio(call[inevent], self[inevent]) 00480 self[outevent] = 1.0 00481 00482 def prune(self, node_thres, edge_thres): 00483 """Prune the profile""" 00484 00485 # compute the prune ratios 00486 for function in self.functions.itervalues(): 00487 try: 00488 function[PRUNE_RATIO] = function[TOTAL_TIME_RATIO] 00489 except UndefinedEvent: 00490 pass 00491 00492 for call in function.calls.itervalues(): 00493 callee = self.functions[call.callee_id] 00494 00495 if TOTAL_TIME_RATIO in call: 00496 # handle exact cases first 00497 call[PRUNE_RATIO] = call[TOTAL_TIME_RATIO] 00498 else: 00499 try: 00500 # make a safe estimate 00501 call[PRUNE_RATIO] = min(function[TOTAL_TIME_RATIO], callee[TOTAL_TIME_RATIO]) 00502 except UndefinedEvent: 00503 pass 00504 00505 # prune the nodes 00506 for function_id in self.functions.keys(): 00507 function = self.functions[function_id] 00508 try: 00509 if function[PRUNE_RATIO] < node_thres: 00510 del self.functions[function_id] 00511 except UndefinedEvent: 00512 pass 00513 00514 # prune the egdes 00515 for function in self.functions.itervalues(): 00516 for callee_id in function.calls.keys(): 00517 call = function.calls[callee_id] 00518 try: 00519 if callee_id not in self.functions or call[PRUNE_RATIO] < edge_thres: 00520 del function.calls[callee_id] 00521 except UndefinedEvent: 00522 pass 00523 00524 def dump(self): 00525 for function in self.functions.itervalues(): 00526 sys.stderr.write('Function %s:\n' % (function.name,)) 00527 self._dump_events(function.events) 00528 for call in function.calls.itervalues(): 00529 callee = self.functions[call.callee_id] 00530 sys.stderr.write(' Call %s:\n' % (callee.name,)) 00531 self._dump_events(call.events) 00532 00533 def _dump_events(self, events): 00534 for event, value in events.iteritems(): 00535 sys.stderr.write(' %s: %s\n' % (event.name, event.format(value))) 00536 00537 00538 class Struct: 00539 """Masquerade a dictionary with a structure-like behavior.""" 00540 00541 def __init__(self, attrs = None): 00542 if attrs is None: 00543 attrs = {} 00544 self.__dict__['_attrs'] = attrs 00545 00546 def __getattr__(self, name): 00547 try: 00548 return self._attrs[name] 00549 except KeyError: 00550 raise AttributeError(name) 00551 00552 def __setattr__(self, name, value): 00553 self._attrs[name] = value 00554 00555 def __str__(self): 00556 return str(self._attrs) 00557 00558 def __repr__(self): 00559 return repr(self._attrs) 00560 00561 00562 class ParseError(Exception): 00563 """Raised when parsing to signal mismatches.""" 00564 00565 def __init__(self, msg, line): 00566 self.msg = msg 00567 # TODO: store more source line information 00568 self.line = line 00569 00570 def __str__(self): 00571 return '%s: %r' % (self.msg, self.line) 00572 00573 00574 class Parser: 00575 """Parser interface.""" 00576 00577 def __init__(self): 00578 pass 00579 00580 def parse(self): 00581 raise NotImplementedError 00582 00583 00584 class LineParser(Parser): 00585 """Base class for parsers that read line-based formats.""" 00586 00587 def __init__(self, file): 00588 Parser.__init__(self) 00589 self._file = file 00590 self.__line = None 00591 self.__eof = False 00592 00593 def readline(self): 00594 line = self._file.readline() 00595 if not line: 00596 self.__line = '' 00597 self.__eof = True 00598 self.__line = line.rstrip('\r\n') 00599 00600 def lookahead(self): 00601 assert self.__line is not None 00602 return self.__line 00603 00604 def consume(self): 00605 assert self.__line is not None 00606 line = self.__line 00607 self.readline() 00608 return line 00609 00610 def eof(self): 00611 assert self.__line is not None 00612 return self.__eof 00613 00614 00615 XML_ELEMENT_START, XML_ELEMENT_END, XML_CHARACTER_DATA, XML_EOF = range(4) 00616 00617 00618 class XmlToken: 00619 00620 def __init__(self, type, name_or_data, attrs = None, line = None, column = None): 00621 assert type in (XML_ELEMENT_START, XML_ELEMENT_END, XML_CHARACTER_DATA, XML_EOF) 00622 self.type = type 00623 self.name_or_data = name_or_data 00624 self.attrs = attrs 00625 self.line = line 00626 self.column = column 00627 00628 def __str__(self): 00629 if self.type == XML_ELEMENT_START: 00630 return '<' + self.name_or_data + ' ...>' 00631 if self.type == XML_ELEMENT_END: 00632 return '</' + self.name_or_data + '>' 00633 if self.type == XML_CHARACTER_DATA: 00634 return self.name_or_data 00635 if self.type == XML_EOF: 00636 return 'end of file' 00637 assert 0 00638 00639 00640 class XmlTokenizer: 00641 """Expat based XML tokenizer.""" 00642 00643 def __init__(self, fp, skip_ws = True): 00644 self.fp = fp 00645 self.tokens = [] 00646 self.index = 0 00647 self.final = False 00648 self.skip_ws = skip_ws 00649 00650 self.character_pos = 0, 0 00651 self.character_data = '' 00652 00653 self.parser = xml.parsers.expat.ParserCreate() 00654 self.parser.StartElementHandler = self.handle_element_start 00655 self.parser.EndElementHandler = self.handle_element_end 00656 self.parser.CharacterDataHandler = self.handle_character_data 00657 00658 def handle_element_start(self, name, attributes): 00659 self.finish_character_data() 00660 line, column = self.pos() 00661 token = XmlToken(XML_ELEMENT_START, name, attributes, line, column) 00662 self.tokens.append(token) 00663 00664 def handle_element_end(self, name): 00665 self.finish_character_data() 00666 line, column = self.pos() 00667 token = XmlToken(XML_ELEMENT_END, name, None, line, column) 00668 self.tokens.append(token) 00669 00670 def handle_character_data(self, data): 00671 if not self.character_data: 00672 self.character_pos = self.pos() 00673 self.character_data += data 00674 00675 def finish_character_data(self): 00676 if self.character_data: 00677 if not self.skip_ws or not self.character_data.isspace(): 00678 line, column = self.character_pos 00679 token = XmlToken(XML_CHARACTER_DATA, self.character_data, None, line, column) 00680 self.tokens.append(token) 00681 self.character_data = '' 00682 00683 def next(self): 00684 size = 16*1024 00685 while self.index >= len(self.tokens) and not self.final: 00686 self.tokens = [] 00687 self.index = 0 00688 data = self.fp.read(size) 00689 self.final = len(data) < size 00690 try: 00691 self.parser.Parse(data, self.final) 00692 except xml.parsers.expat.ExpatError, e: 00693 #if e.code == xml.parsers.expat.errors.XML_ERROR_NO_ELEMENTS: 00694 if e.code == 3: 00695 pass 00696 else: 00697 raise e 00698 if self.index >= len(self.tokens): 00699 line, column = self.pos() 00700 token = XmlToken(XML_EOF, None, None, line, column) 00701 else: 00702 token = self.tokens[self.index] 00703 self.index += 1 00704 return token 00705 00706 def pos(self): 00707 return self.parser.CurrentLineNumber, self.parser.CurrentColumnNumber 00708 00709 00710 class XmlTokenMismatch(Exception): 00711 00712 def __init__(self, expected, found): 00713 self.expected = expected 00714 self.found = found 00715 00716 def __str__(self): 00717 return '%u:%u: %s expected, %s found' % (self.found.line, self.found.column, str(self.expected), str(self.found)) 00718 00719 00720 class XmlParser(Parser): 00721 """Base XML document parser.""" 00722 00723 def __init__(self, fp): 00724 Parser.__init__(self) 00725 self.tokenizer = XmlTokenizer(fp) 00726 self.consume() 00727 00728 def consume(self): 00729 self.token = self.tokenizer.next() 00730 00731 def match_element_start(self, name): 00732 return self.token.type == XML_ELEMENT_START and self.token.name_or_data == name 00733 00734 def match_element_end(self, name): 00735 return self.token.type == XML_ELEMENT_END and self.token.name_or_data == name 00736 00737 def element_start(self, name): 00738 while self.token.type == XML_CHARACTER_DATA: 00739 self.consume() 00740 if self.token.type != XML_ELEMENT_START: 00741 raise XmlTokenMismatch(XmlToken(XML_ELEMENT_START, name), self.token) 00742 if self.token.name_or_data != name: 00743 raise XmlTokenMismatch(XmlToken(XML_ELEMENT_START, name), self.token) 00744 attrs = self.token.attrs 00745 self.consume() 00746 return attrs 00747 00748 def element_end(self, name): 00749 while self.token.type == XML_CHARACTER_DATA: 00750 self.consume() 00751 if self.token.type != XML_ELEMENT_END: 00752 raise XmlTokenMismatch(XmlToken(XML_ELEMENT_END, name), self.token) 00753 if self.token.name_or_data != name: 00754 raise XmlTokenMismatch(XmlToken(XML_ELEMENT_END, name), self.token) 00755 self.consume() 00756 00757 def character_data(self, strip = True): 00758 data = '' 00759 while self.token.type == XML_CHARACTER_DATA: 00760 data += self.token.name_or_data 00761 self.consume() 00762 if strip: 00763 data = data.strip() 00764 return data 00765 00766 00767 class GprofParser(Parser): 00768 """Parser for GNU gprof output. 00769 00770 See also: 00771 - Chapter "Interpreting gprof's Output" from the GNU gprof manual 00772 http://sourceware.org/binutils/docs-2.18/gprof/Call-Graph.html#Call-Graph 00773 - File "cg_print.c" from the GNU gprof source code 00774 http://sourceware.org/cgi-bin/cvsweb.cgi/~checkout~/src/gprof/cg_print.c?rev=1.12&cvsroot=src 00775 """ 00776 00777 def __init__(self, fp): 00778 Parser.__init__(self) 00779 self.fp = fp 00780 self.functions = {} 00781 self.cycles = {} 00782 00783 def readline(self): 00784 line = self.fp.readline() 00785 if not line: 00786 sys.stderr.write('error: unexpected end of file\n') 00787 sys.exit(1) 00788 line = line.rstrip('\r\n') 00789 return line 00790 00791 _int_re = re.compile(r'^\d+$') 00792 _float_re = re.compile(r'^\d+\.\d+$') 00793 00794 def translate(self, mo): 00795 """Extract a structure from a match object, while translating the types in the process.""" 00796 attrs = {} 00797 groupdict = mo.groupdict() 00798 for name, value in groupdict.iteritems(): 00799 if value is None: 00800 value = None 00801 elif self._int_re.match(value): 00802 value = int(value) 00803 elif self._float_re.match(value): 00804 value = float(value) 00805 attrs[name] = (value) 00806 return Struct(attrs) 00807 00808 _cg_header_re = re.compile( 00809 # original gprof header 00810 r'^\s+called/total\s+parents\s*$|' + 00811 r'^index\s+%time\s+self\s+descendents\s+called\+self\s+name\s+index\s*$|' + 00812 r'^\s+called/total\s+children\s*$|' + 00813 # GNU gprof header 00814 r'^index\s+%\s+time\s+self\s+children\s+called\s+name\s*$' 00815 ) 00816 00817 _cg_ignore_re = re.compile( 00818 # spontaneous 00819 r'^\s+<spontaneous>\s*$|' 00820 # internal calls (such as "mcount") 00821 r'^.*\((\d+)\)$' 00822 ) 00823 00824 _cg_primary_re = re.compile( 00825 r'^\[(?P<index>\d+)\]?' + 00826 r'\s+(?P<percentage_time>\d+\.\d+)' + 00827 r'\s+(?P<self>\d+\.\d+)' + 00828 r'\s+(?P<descendants>\d+\.\d+)' + 00829 r'\s+(?:(?P<called>\d+)(?:\+(?P<called_self>\d+))?)?' + 00830 r'\s+(?P<name>\S.*?)' + 00831 r'(?:\s+<cycle\s(?P<cycle>\d+)>)?' + 00832 r'\s\[(\d+)\]$' 00833 ) 00834 00835 _cg_parent_re = re.compile( 00836 r'^\s+(?P<self>\d+\.\d+)?' + 00837 r'\s+(?P<descendants>\d+\.\d+)?' + 00838 r'\s+(?P<called>\d+)(?:/(?P<called_total>\d+))?' + 00839 r'\s+(?P<name>\S.*?)' + 00840 r'(?:\s+<cycle\s(?P<cycle>\d+)>)?' + 00841 r'\s\[(?P<index>\d+)\]$' 00842 ) 00843 00844 _cg_child_re = _cg_parent_re 00845 00846 _cg_cycle_header_re = re.compile( 00847 r'^\[(?P<index>\d+)\]?' + 00848 r'\s+(?P<percentage_time>\d+\.\d+)' + 00849 r'\s+(?P<self>\d+\.\d+)' + 00850 r'\s+(?P<descendants>\d+\.\d+)' + 00851 r'\s+(?:(?P<called>\d+)(?:\+(?P<called_self>\d+))?)?' + 00852 r'\s+<cycle\s(?P<cycle>\d+)\sas\sa\swhole>' + 00853 r'\s\[(\d+)\]$' 00854 ) 00855 00856 _cg_cycle_member_re = re.compile( 00857 r'^\s+(?P<self>\d+\.\d+)?' + 00858 r'\s+(?P<descendants>\d+\.\d+)?' + 00859 r'\s+(?P<called>\d+)(?:\+(?P<called_self>\d+))?' + 00860 r'\s+(?P<name>\S.*?)' + 00861 r'(?:\s+<cycle\s(?P<cycle>\d+)>)?' + 00862 r'\s\[(?P<index>\d+)\]$' 00863 ) 00864 00865 _cg_sep_re = re.compile(r'^--+$') 00866 00867 def parse_function_entry(self, lines): 00868 parents = [] 00869 children = [] 00870 00871 while True: 00872 if not lines: 00873 sys.stderr.write('warning: unexpected end of entry\n') 00874 line = lines.pop(0) 00875 if line.startswith('['): 00876 break 00877 00878 # read function parent line 00879 mo = self._cg_parent_re.match(line) 00880 if not mo: 00881 if self._cg_ignore_re.match(line): 00882 continue 00883 sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line) 00884 else: 00885 parent = self.translate(mo) 00886 parents.append(parent) 00887 00888 # read primary line 00889 mo = self._cg_primary_re.match(line) 00890 if not mo: 00891 sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line) 00892 return 00893 else: 00894 function = self.translate(mo) 00895 00896 while lines: 00897 line = lines.pop(0) 00898 00899 # read function subroutine line 00900 mo = self._cg_child_re.match(line) 00901 if not mo: 00902 if self._cg_ignore_re.match(line): 00903 continue 00904 sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line) 00905 else: 00906 child = self.translate(mo) 00907 children.append(child) 00908 00909 function.parents = parents 00910 function.children = children 00911 00912 self.functions[function.index] = function 00913 00914 def parse_cycle_entry(self, lines): 00915 00916 # read cycle header line 00917 line = lines[0] 00918 mo = self._cg_cycle_header_re.match(line) 00919 if not mo: 00920 sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line) 00921 return 00922 cycle = self.translate(mo) 00923 00924 # read cycle member lines 00925 cycle.functions = [] 00926 for line in lines[1:]: 00927 mo = self._cg_cycle_member_re.match(line) 00928 if not mo: 00929 sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line) 00930 continue 00931 call = self.translate(mo) 00932 cycle.functions.append(call) 00933 00934 self.cycles[cycle.cycle] = cycle 00935 00936 def parse_cg_entry(self, lines): 00937 if lines[0].startswith("["): 00938 self.parse_cycle_entry(lines) 00939 else: 00940 self.parse_function_entry(lines) 00941 00942 def parse_cg(self): 00943 """Parse the call graph.""" 00944 00945 # skip call graph header 00946 while not self._cg_header_re.match(self.readline()): 00947 pass 00948 line = self.readline() 00949 while self._cg_header_re.match(line): 00950 line = self.readline() 00951 00952 # process call graph entries 00953 entry_lines = [] 00954 while line != '\014': # form feed 00955 if line and not line.isspace(): 00956 if self._cg_sep_re.match(line): 00957 self.parse_cg_entry(entry_lines) 00958 entry_lines = [] 00959 else: 00960 entry_lines.append(line) 00961 line = self.readline() 00962 00963 def parse(self): 00964 self.parse_cg() 00965 self.fp.close() 00966 00967 profile = Profile() 00968 profile[TIME] = 0.0 00969 00970 cycles = {} 00971 for index in self.cycles.iterkeys(): 00972 cycles[index] = Cycle() 00973 00974 for entry in self.functions.itervalues(): 00975 # populate the function 00976 function = Function(entry.index, entry.name) 00977 function[TIME] = entry.self 00978 if entry.called is not None: 00979 function[CALLS] = entry.called 00980 if entry.called_self is not None: 00981 call = Call(entry.index) 00982 call[CALLS] = entry.called_self 00983 function[CALLS] += entry.called_self 00984 00985 # populate the function calls 00986 for child in entry.children: 00987 call = Call(child.index) 00988 00989 assert child.called is not None 00990 call[CALLS] = child.called 00991 00992 if child.index not in self.functions: 00993 # NOTE: functions that were never called but were discovered by gprof's 00994 # static call graph analysis dont have a call graph entry so we need 00995 # to add them here 00996 missing = Function(child.index, child.name) 00997 function[TIME] = 0.0 00998 function[CALLS] = 0 00999 profile.add_function(missing) 01000 01001 function.add_call(call) 01002 01003 profile.add_function(function) 01004 01005 if entry.cycle is not None: 01006 cycles[entry.cycle].add_function(function) 01007 01008 profile[TIME] = profile[TIME] + function[TIME] 01009 01010 for cycle in cycles.itervalues(): 01011 profile.add_cycle(cycle) 01012 01013 # Compute derived events 01014 profile.validate() 01015 profile.ratio(TIME_RATIO, TIME) 01016 profile.call_ratios(CALLS) 01017 profile.integrate(TOTAL_TIME, TIME) 01018 profile.ratio(TOTAL_TIME_RATIO, TOTAL_TIME) 01019 01020 return profile 01021 01022 01023 class OprofileParser(LineParser): 01024 """Parser for oprofile callgraph output. 01025 01026 See also: 01027 - http://oprofile.sourceforge.net/doc/opreport.html#opreport-callgraph 01028 """ 01029 01030 _fields_re = { 01031 'samples': r'(?P<samples>\d+)', 01032 '%': r'(?P<percentage>\S+)', 01033 'linenr info': r'(?P<source>\(no location information\)|\S+:\d+)', 01034 'image name': r'(?P<image>\S+(?:\s\(tgid:[^)]*\))?)', 01035 'app name': r'(?P<application>\S+)', 01036 'symbol name': r'(?P<symbol>\(no symbols\)|.+?)', 01037 } 01038 01039 def __init__(self, infile): 01040 LineParser.__init__(self, infile) 01041 self.entries = {} 01042 self.entry_re = None 01043 01044 def add_entry(self, callers, function, callees): 01045 try: 01046 entry = self.entries[function.id] 01047 except KeyError: 01048 self.entries[function.id] = (callers, function, callees) 01049 else: 01050 callers_total, function_total, callees_total = entry 01051 self.update_subentries_dict(callers_total, callers) 01052 function_total.samples += function.samples 01053 self.update_subentries_dict(callees_total, callees) 01054 01055 def update_subentries_dict(self, totals, partials): 01056 for partial in partials.itervalues(): 01057 try: 01058 total = totals[partial.id] 01059 except KeyError: 01060 totals[partial.id] = partial 01061 else: 01062 total.samples += partial.samples 01063 01064 def parse(self): 01065 # read lookahead 01066 self.readline() 01067 01068 self.parse_header() 01069 while self.lookahead(): 01070 self.parse_entry() 01071 01072 profile = Profile() 01073 01074 reverse_call_samples = {} 01075 01076 # populate the profile 01077 profile[SAMPLES] = 0 01078 for _callers, _function, _callees in self.entries.itervalues(): 01079 function = Function(_function.id, _function.name) 01080 function[SAMPLES] = _function.samples 01081 profile.add_function(function) 01082 profile[SAMPLES] += _function.samples 01083 01084 if _function.application: 01085 function[PROCESS] = os.path.basename(_function.application) 01086 if _function.image: 01087 function[MODULE] = os.path.basename(_function.image) 01088 01089 total_callee_samples = 0 01090 for _callee in _callees.itervalues(): 01091 total_callee_samples += _callee.samples 01092 01093 for _callee in _callees.itervalues(): 01094 if not _callee.self: 01095 call = Call(_callee.id) 01096 call[SAMPLES2] = _callee.samples 01097 function.add_call(call) 01098 01099 # compute derived data 01100 profile.validate() 01101 profile.find_cycles() 01102 profile.ratio(TIME_RATIO, SAMPLES) 01103 profile.call_ratios(SAMPLES2) 01104 profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO) 01105 01106 return profile 01107 01108 def parse_header(self): 01109 while not self.match_header(): 01110 self.consume() 01111 line = self.lookahead() 01112 fields = re.split(r'\s\s+', line) 01113 entry_re = r'^\s*' + r'\s+'.join([self._fields_re[field] for field in fields]) + r'(?P<self>\s+\[self\])?$' 01114 self.entry_re = re.compile(entry_re) 01115 self.skip_separator() 01116 01117 def parse_entry(self): 01118 callers = self.parse_subentries() 01119 if self.match_primary(): 01120 function = self.parse_subentry() 01121 if function is not None: 01122 callees = self.parse_subentries() 01123 self.add_entry(callers, function, callees) 01124 self.skip_separator() 01125 01126 def parse_subentries(self): 01127 subentries = {} 01128 while self.match_secondary(): 01129 subentry = self.parse_subentry() 01130 subentries[subentry.id] = subentry 01131 return subentries 01132 01133 def parse_subentry(self): 01134 entry = Struct() 01135 line = self.consume() 01136 mo = self.entry_re.match(line) 01137 if not mo: 01138 raise ParseError('failed to parse', line) 01139 fields = mo.groupdict() 01140 entry.samples = int(fields.get('samples', 0)) 01141 entry.percentage = float(fields.get('percentage', 0.0)) 01142 if 'source' in fields and fields['source'] != '(no location information)': 01143 source = fields['source'] 01144 filename, lineno = source.split(':') 01145 entry.filename = filename 01146 entry.lineno = int(lineno) 01147 else: 01148 source = '' 01149 entry.filename = None 01150 entry.lineno = None 01151 entry.image = fields.get('image', '') 01152 entry.application = fields.get('application', '') 01153 if 'symbol' in fields and fields['symbol'] != '(no symbols)': 01154 entry.symbol = fields['symbol'] 01155 else: 01156 entry.symbol = '' 01157 if entry.symbol.startswith('"') and entry.symbol.endswith('"'): 01158 entry.symbol = entry.symbol[1:-1] 01159 entry.id = ':'.join((entry.application, entry.image, source, entry.symbol)) 01160 entry.self = fields.get('self', None) != None 01161 if entry.self: 01162 entry.id += ':self' 01163 if entry.symbol: 01164 entry.name = entry.symbol 01165 else: 01166 entry.name = entry.image 01167 return entry 01168 01169 def skip_separator(self): 01170 while not self.match_separator(): 01171 self.consume() 01172 self.consume() 01173 01174 def match_header(self): 01175 line = self.lookahead() 01176 return line.startswith('samples') 01177 01178 def match_separator(self): 01179 line = self.lookahead() 01180 return line == '-'*len(line) 01181 01182 def match_primary(self): 01183 line = self.lookahead() 01184 return not line[:1].isspace() 01185 01186 def match_secondary(self): 01187 line = self.lookahead() 01188 return line[:1].isspace() 01189 01190 01191 class SharkParser(LineParser): 01192 """Parser for MacOSX Shark output. 01193 01194 Author: tom@dbservice.com 01195 """ 01196 01197 def __init__(self, infile): 01198 LineParser.__init__(self, infile) 01199 self.stack = [] 01200 self.entries = {} 01201 01202 def add_entry(self, function): 01203 try: 01204 entry = self.entries[function.id] 01205 except KeyError: 01206 self.entries[function.id] = (function, { }) 01207 else: 01208 function_total, callees_total = entry 01209 function_total.samples += function.samples 01210 01211 def add_callee(self, function, callee): 01212 func, callees = self.entries[function.id] 01213 try: 01214 entry = callees[callee.id] 01215 except KeyError: 01216 callees[callee.id] = callee 01217 else: 01218 entry.samples += callee.samples 01219 01220 def parse(self): 01221 self.readline() 01222 self.readline() 01223 self.readline() 01224 self.readline() 01225 01226 match = re.compile(r'(?P<prefix>[|+ ]*)(?P<samples>\d+), (?P<symbol>[^,]+), (?P<image>.*)') 01227 01228 while self.lookahead(): 01229 line = self.consume() 01230 mo = match.match(line) 01231 if not mo: 01232 raise ParseError('failed to parse', line) 01233 01234 fields = mo.groupdict() 01235 prefix = len(fields.get('prefix', 0)) / 2 - 1 01236 01237 symbol = str(fields.get('symbol', 0)) 01238 image = str(fields.get('image', 0)) 01239 01240 entry = Struct() 01241 entry.id = ':'.join([symbol, image]) 01242 entry.samples = int(fields.get('samples', 0)) 01243 01244 entry.name = symbol 01245 entry.image = image 01246 01247 # adjust the callstack 01248 if prefix < len(self.stack): 01249 del self.stack[prefix:] 01250 01251 if prefix == len(self.stack): 01252 self.stack.append(entry) 01253 01254 # if the callstack has had an entry, it's this functions caller 01255 if prefix > 0: 01256 self.add_callee(self.stack[prefix - 1], entry) 01257 01258 self.add_entry(entry) 01259 01260 profile = Profile() 01261 profile[SAMPLES] = 0 01262 for _function, _callees in self.entries.itervalues(): 01263 function = Function(_function.id, _function.name) 01264 function[SAMPLES] = _function.samples 01265 profile.add_function(function) 01266 profile[SAMPLES] += _function.samples 01267 01268 if _function.image: 01269 function[MODULE] = os.path.basename(_function.image) 01270 01271 for _callee in _callees.itervalues(): 01272 call = Call(_callee.id) 01273 call[SAMPLES] = _callee.samples 01274 function.add_call(call) 01275 01276 # compute derived data 01277 profile.validate() 01278 profile.find_cycles() 01279 profile.ratio(TIME_RATIO, SAMPLES) 01280 profile.call_ratios(SAMPLES) 01281 profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO) 01282 01283 return profile 01284 01285 01286 class AQtimeTable: 01287 01288 def __init__(self, name, fields): 01289 self.name = name 01290 01291 self.fields = fields 01292 self.field_column = {} 01293 for column in range(len(fields)): 01294 self.field_column[fields[column]] = column 01295 self.rows = [] 01296 01297 def __len__(self): 01298 return len(self.rows) 01299 01300 def __iter__(self): 01301 for values, children in self.rows: 01302 fields = {} 01303 for name, value in zip(self.fields, values): 01304 fields[name] = value 01305 children = dict([(child.name, child) for child in children]) 01306 yield fields, children 01307 raise StopIteration 01308 01309 def add_row(self, values, children=()): 01310 self.rows.append((values, children)) 01311 01312 01313 class AQtimeParser(XmlParser): 01314 01315 def __init__(self, stream): 01316 XmlParser.__init__(self, stream) 01317 self.tables = {} 01318 01319 def parse(self): 01320 self.element_start('AQtime_Results') 01321 self.parse_headers() 01322 results = self.parse_results() 01323 self.element_end('AQtime_Results') 01324 return self.build_profile(results) 01325 01326 def parse_headers(self): 01327 self.element_start('HEADERS') 01328 while self.token.type == XML_ELEMENT_START: 01329 self.parse_table_header() 01330 self.element_end('HEADERS') 01331 01332 def parse_table_header(self): 01333 attrs = self.element_start('TABLE_HEADER') 01334 name = attrs['NAME'] 01335 id = int(attrs['ID']) 01336 field_types = [] 01337 field_names = [] 01338 while self.token.type == XML_ELEMENT_START: 01339 field_type, field_name = self.parse_table_field() 01340 field_types.append(field_type) 01341 field_names.append(field_name) 01342 self.element_end('TABLE_HEADER') 01343 self.tables[id] = name, field_types, field_names 01344 01345 def parse_table_field(self): 01346 attrs = self.element_start('TABLE_FIELD') 01347 type = attrs['TYPE'] 01348 name = self.character_data() 01349 self.element_end('TABLE_FIELD') 01350 return type, name 01351 01352 def parse_results(self): 01353 self.element_start('RESULTS') 01354 table = self.parse_data() 01355 self.element_end('RESULTS') 01356 return table 01357 01358 def parse_data(self): 01359 rows = [] 01360 attrs = self.element_start('DATA') 01361 table_id = int(attrs['TABLE_ID']) 01362 table_name, field_types, field_names = self.tables[table_id] 01363 table = AQtimeTable(table_name, field_names) 01364 while self.token.type == XML_ELEMENT_START: 01365 row, children = self.parse_row(field_types) 01366 table.add_row(row, children) 01367 self.element_end('DATA') 01368 return table 01369 01370 def parse_row(self, field_types): 01371 row = [None]*len(field_types) 01372 children = [] 01373 self.element_start('ROW') 01374 while self.token.type == XML_ELEMENT_START: 01375 if self.token.name_or_data == 'FIELD': 01376 field_id, field_value = self.parse_field(field_types) 01377 row[field_id] = field_value 01378 elif self.token.name_or_data == 'CHILDREN': 01379 children = self.parse_children() 01380 else: 01381 raise XmlTokenMismatch("<FIELD ...> or <CHILDREN ...>", self.token) 01382 self.element_end('ROW') 01383 return row, children 01384 01385 def parse_field(self, field_types): 01386 attrs = self.element_start('FIELD') 01387 id = int(attrs['ID']) 01388 type = field_types[id] 01389 value = self.character_data() 01390 if type == 'Integer': 01391 value = int(value) 01392 elif type == 'Float': 01393 value = float(value) 01394 elif type == 'Address': 01395 value = int(value) 01396 elif type == 'String': 01397 pass 01398 else: 01399 assert False 01400 self.element_end('FIELD') 01401 return id, value 01402 01403 def parse_children(self): 01404 children = [] 01405 self.element_start('CHILDREN') 01406 while self.token.type == XML_ELEMENT_START: 01407 table = self.parse_data() 01408 assert table.name not in children 01409 children.append(table) 01410 self.element_end('CHILDREN') 01411 return children 01412 01413 def build_profile(self, results): 01414 assert results.name == 'Routines' 01415 profile = Profile() 01416 profile[TIME] = 0.0 01417 for fields, tables in results: 01418 function = self.build_function(fields) 01419 children = tables['Children'] 01420 for fields, _ in children: 01421 call = self.build_call(fields) 01422 function.add_call(call) 01423 profile.add_function(function) 01424 profile[TIME] = profile[TIME] + function[TIME] 01425 profile[TOTAL_TIME] = profile[TIME] 01426 profile.ratio(TOTAL_TIME_RATIO, TOTAL_TIME) 01427 return profile 01428 01429 def build_function(self, fields): 01430 function = Function(self.build_id(fields), self.build_name(fields)) 01431 function[TIME] = fields['Time'] 01432 function[TOTAL_TIME] = fields['Time with Children'] 01433 #function[TIME_RATIO] = fields['% Time']/100.0 01434 #function[TOTAL_TIME_RATIO] = fields['% with Children']/100.0 01435 return function 01436 01437 def build_call(self, fields): 01438 call = Call(self.build_id(fields)) 01439 call[TIME] = fields['Time'] 01440 call[TOTAL_TIME] = fields['Time with Children'] 01441 #call[TIME_RATIO] = fields['% Time']/100.0 01442 #call[TOTAL_TIME_RATIO] = fields['% with Children']/100.0 01443 return call 01444 01445 def build_id(self, fields): 01446 return ':'.join([fields['Module Name'], fields['Unit Name'], fields['Routine Name']]) 01447 01448 def build_name(self, fields): 01449 # TODO: use more fields 01450 return fields['Routine Name'] 01451 01452 01453 class PstatsParser: 01454 """Parser python profiling statistics saved with te pstats module.""" 01455 01456 def __init__(self, *filename): 01457 import pstats 01458 self.stats = pstats.Stats(*filename) 01459 self.profile = Profile() 01460 self.function_ids = {} 01461 01462 def get_function_name(self, (filename, line, name)): 01463 module = os.path.splitext(filename)[0] 01464 module = os.path.basename(module) 01465 return "%s:%d:%s" % (module, line, name) 01466 01467 def get_function(self, key): 01468 try: 01469 id = self.function_ids[key] 01470 except KeyError: 01471 id = len(self.function_ids) 01472 name = self.get_function_name(key) 01473 function = Function(id, name) 01474 self.profile.functions[id] = function 01475 self.function_ids[key] = id 01476 else: 01477 function = self.profile.functions[id] 01478 return function 01479 01480 def parse(self): 01481 self.profile[TIME] = 0.0 01482 self.profile[TOTAL_TIME] = self.stats.total_tt 01483 for fn, (cc, nc, tt, ct, callers) in self.stats.stats.iteritems(): 01484 callee = self.get_function(fn) 01485 callee[CALLS] = nc 01486 callee[TOTAL_TIME] = ct 01487 callee[TIME] = tt 01488 self.profile[TIME] += tt 01489 self.profile[TOTAL_TIME] = max(self.profile[TOTAL_TIME], ct) 01490 for fn, value in callers.iteritems(): 01491 caller = self.get_function(fn) 01492 call = Call(callee.id) 01493 if isinstance(value, tuple): 01494 for i in xrange(0, len(value), 4): 01495 nc, cc, tt, ct = value[i:i+4] 01496 if CALLS in call: 01497 call[CALLS] += cc 01498 else: 01499 call[CALLS] = cc 01500 01501 if TOTAL_TIME in call: 01502 call[TOTAL_TIME] += ct 01503 else: 01504 call[TOTAL_TIME] = ct 01505 01506 else: 01507 call[CALLS] = value 01508 call[TOTAL_TIME] = ratio(value, nc)*ct 01509 01510 caller.add_call(call) 01511 #self.stats.print_stats() 01512 #self.stats.print_callees() 01513 01514 # Compute derived events 01515 self.profile.validate() 01516 self.profile.ratio(TIME_RATIO, TIME) 01517 self.profile.ratio(TOTAL_TIME_RATIO, TOTAL_TIME) 01518 01519 return self.profile 01520 01521 01522 class Theme: 01523 01524 def __init__(self, 01525 bgcolor = (0.0, 0.0, 1.0), 01526 mincolor = (0.0, 0.0, 0.0), 01527 maxcolor = (0.0, 0.0, 1.0), 01528 fontname = "Arial", 01529 minfontsize = 10.0, 01530 maxfontsize = 10.0, 01531 minpenwidth = 0.5, 01532 maxpenwidth = 4.0, 01533 gamma = 2.2): 01534 self.bgcolor = bgcolor 01535 self.mincolor = mincolor 01536 self.maxcolor = maxcolor 01537 self.fontname = fontname 01538 self.minfontsize = minfontsize 01539 self.maxfontsize = maxfontsize 01540 self.minpenwidth = minpenwidth 01541 self.maxpenwidth = maxpenwidth 01542 self.gamma = gamma 01543 01544 def graph_bgcolor(self): 01545 return self.hsl_to_rgb(*self.bgcolor) 01546 01547 def graph_fontname(self): 01548 return self.fontname 01549 01550 def graph_fontsize(self): 01551 return self.minfontsize 01552 01553 def node_bgcolor(self, weight): 01554 return self.color(weight) 01555 01556 def node_fgcolor(self, weight): 01557 return self.graph_bgcolor() 01558 01559 def node_fontsize(self, weight): 01560 return self.fontsize(weight) 01561 01562 def edge_color(self, weight): 01563 return self.color(weight) 01564 01565 def edge_fontsize(self, weight): 01566 return self.fontsize(weight) 01567 01568 def edge_penwidth(self, weight): 01569 return max(weight*self.maxpenwidth, self.minpenwidth) 01570 01571 def edge_arrowsize(self, weight): 01572 return 0.5 * math.sqrt(self.edge_penwidth(weight)) 01573 01574 def fontsize(self, weight): 01575 return max(weight**2 * self.maxfontsize, self.minfontsize) 01576 01577 def color(self, weight): 01578 weight = min(max(weight, 0.0), 1.0) 01579 01580 hmin, smin, lmin = self.mincolor 01581 hmax, smax, lmax = self.maxcolor 01582 01583 h = hmin + math.sqrt(math.sqrt(weight))*(hmax - hmin) 01584 s = smin + math.sqrt(math.sqrt(weight))*(smax - smin) 01585 l = lmin + math.sqrt(math.sqrt(weight))*(lmax - lmin) 01586 01587 return self.hsl_to_rgb(h, s, l) 01588 01589 def hsl_to_rgb(self, h, s, l): 01590 """Convert a color from HSL color-model to RGB. 01591 01592 See also: 01593 - http://www.w3.org/TR/css3-color/#hsl-color 01594 """ 01595 01596 h = h % 1.0 01597 s = min(max(s, 0.0), 1.0) 01598 l = min(max(l, 0.0), 1.0) 01599 01600 if l <= 0.5: 01601 m2 = l*(s + 1.0) 01602 else: 01603 m2 = l + s - l*s 01604 m1 = l*2.0 - m2 01605 r = self._hue_to_rgb(m1, m2, h + 1.0/3.0) 01606 g = self._hue_to_rgb(m1, m2, h) 01607 b = self._hue_to_rgb(m1, m2, h - 1.0/3.0) 01608 01609 # Apply gamma correction 01610 r **= self.gamma 01611 g **= self.gamma 01612 b **= self.gamma 01613 01614 return (r, g, b) 01615 01616 def _hue_to_rgb(self, m1, m2, h): 01617 if h < 0.0: 01618 h += 1.0 01619 elif h > 1.0: 01620 h -= 1.0 01621 if h*6 < 1.0: 01622 return m1 + (m2 - m1)*h*6.0 01623 elif h*2 < 1.0: 01624 return m2 01625 elif h*3 < 2.0: 01626 return m1 + (m2 - m1)*(2.0/3.0 - h)*6.0 01627 else: 01628 return m1 01629 01630 01631 TEMPERATURE_COLORMAP = Theme( 01632 mincolor = (2.0/3.0, 0.80, 0.25), # dark blue 01633 maxcolor = (0.0, 1.0, 0.5), # satured red 01634 gamma = 1.0 01635 ) 01636 01637 PINK_COLORMAP = Theme( 01638 mincolor = (0.0, 1.0, 0.90), # pink 01639 maxcolor = (0.0, 1.0, 0.5), # satured red 01640 ) 01641 01642 GRAY_COLORMAP = Theme( 01643 mincolor = (0.0, 0.0, 0.85), # light gray 01644 maxcolor = (0.0, 0.0, 0.0), # black 01645 ) 01646 01647 BW_COLORMAP = Theme( 01648 minfontsize = 8.0, 01649 maxfontsize = 24.0, 01650 mincolor = (0.0, 0.0, 0.0), # black 01651 maxcolor = (0.0, 0.0, 0.0), # black 01652 minpenwidth = 0.1, 01653 maxpenwidth = 8.0, 01654 ) 01655 01656 01657 class DotWriter: 01658 """Writer for the DOT language. 01659 01660 See also: 01661 - "The DOT Language" specification 01662 http://www.graphviz.org/doc/info/lang.html 01663 """ 01664 01665 def __init__(self, fp): 01666 self.fp = fp 01667 01668 def graph(self, profile, theme): 01669 self.begin_graph() 01670 01671 fontname = theme.graph_fontname() 01672 01673 self.attr('graph', fontname=fontname, ranksep=0.25, nodesep=0.125) 01674 self.attr('node', fontname=fontname, shape="box", style="filled,rounded", fontcolor="white", width=0, height=0) 01675 self.attr('edge', fontname=fontname) 01676 01677 for function in profile.functions.itervalues(): 01678 labels = [] 01679 for event in PROCESS, MODULE: 01680 if event in function.events: 01681 label = event.format(function[event]) 01682 labels.append(label) 01683 labels.append(function.name) 01684 for event in TOTAL_TIME_RATIO, TIME_RATIO, CALLS: 01685 if event in function.events: 01686 label = event.format(function[event]) 01687 labels.append(label) 01688 01689 try: 01690 #weight = function[PRUNE_RATIO] 01691 weight = function[TIME_RATIO] 01692 except UndefinedEvent: 01693 weight = 0.0 01694 01695 label = '\n'.join(labels) 01696 self.node(function.id, 01697 label = label, 01698 color = self.color(theme.node_bgcolor(weight)), 01699 fontcolor = self.color(theme.node_fgcolor(weight)), 01700 fontsize = "%.2f" % theme.node_fontsize(weight), 01701 ) 01702 01703 for call in function.calls.itervalues(): 01704 callee = profile.functions[call.callee_id] 01705 01706 labels = [] 01707 for event in TOTAL_TIME_RATIO, CALLS: 01708 if event in call.events: 01709 label = event.format(call[event]) 01710 labels.append(label) 01711 01712 try: 01713 weight = call[PRUNE_RATIO] 01714 except UndefinedEvent: 01715 try: 01716 weight = callee[PRUNE_RATIO] 01717 except UndefinedEvent: 01718 weight = 0.0 01719 01720 label = '\n'.join(labels) 01721 01722 self.edge(function.id, call.callee_id, 01723 label = label, 01724 color = self.color(theme.edge_color(weight)), 01725 fontcolor = self.color(theme.edge_color(weight)), 01726 fontsize = "%.2f" % theme.edge_fontsize(weight), 01727 penwidth = "%.2f" % theme.edge_penwidth(weight), 01728 labeldistance = "%.2f" % theme.edge_penwidth(weight), 01729 arrowsize = "%.2f" % theme.edge_arrowsize(weight), 01730 ) 01731 01732 self.end_graph() 01733 01734 def begin_graph(self): 01735 self.write('digraph {\n') 01736 01737 def end_graph(self): 01738 self.write('}\n') 01739 01740 def attr(self, what, **attrs): 01741 self.write("\t") 01742 self.write(what) 01743 self.attr_list(attrs) 01744 self.write(";\n") 01745 01746 def node(self, node, **attrs): 01747 self.write("\t") 01748 self.id(node) 01749 self.attr_list(attrs) 01750 self.write(";\n") 01751 01752 def edge(self, src, dst, **attrs): 01753 self.write("\t") 01754 self.id(src) 01755 self.write(" -> ") 01756 self.id(dst) 01757 self.attr_list(attrs) 01758 self.write(";\n") 01759 01760 def attr_list(self, attrs): 01761 if not attrs: 01762 return 01763 self.write(' [') 01764 first = True 01765 for name, value in attrs.iteritems(): 01766 if first: 01767 first = False 01768 else: 01769 self.write(", ") 01770 self.id(name) 01771 self.write('=') 01772 self.id(value) 01773 self.write(']') 01774 01775 def id(self, id): 01776 if isinstance(id, (int, float)): 01777 s = str(id) 01778 elif isinstance(id, basestring): 01779 if id.isalnum(): 01780 s = id 01781 else: 01782 s = self.escape(id) 01783 else: 01784 raise TypeError 01785 self.write(s) 01786 01787 def color(self, (r, g, b)): 01788 01789 def float2int(f): 01790 if f <= 0.0: 01791 return 0 01792 if f >= 1.0: 01793 return 255 01794 return int(255.0*f + 0.5) 01795 01796 return "#" + "".join(["%02x" % float2int(c) for c in (r, g, b)]) 01797 01798 def escape(self, s): 01799 s = s.encode('utf-8') 01800 s = s.replace('\\', r'\\') 01801 s = s.replace('\n', r'\n') 01802 s = s.replace('\t', r'\t') 01803 s = s.replace('"', r'\"') 01804 return '"' + s + '"' 01805 01806 def write(self, s): 01807 self.fp.write(s) 01808 01809 01810 class Main: 01811 """Main program.""" 01812 01813 themes = { 01814 "color": TEMPERATURE_COLORMAP, 01815 "pink": PINK_COLORMAP, 01816 "gray": GRAY_COLORMAP, 01817 "bw": BW_COLORMAP, 01818 } 01819 01820 def main(self): 01821 """Main program.""" 01822 01823 parser = optparse.OptionParser( 01824 usage="\n\t%prog [options] [file] ...", 01825 version="%%prog %s" % __version__) 01826 parser.add_option( 01827 '-o', '--output', metavar='FILE', 01828 type="string", dest="output", 01829 help="output filename [stdout]") 01830 parser.add_option( 01831 '-n', '--node-thres', metavar='PERCENTAGE', 01832 type="float", dest="node_thres", default=0.5, 01833 help="eliminate nodes below this threshold [default: %default]") 01834 parser.add_option( 01835 '-e', '--edge-thres', metavar='PERCENTAGE', 01836 type="float", dest="edge_thres", default=0.1, 01837 help="eliminate edges below this threshold [default: %default]") 01838 parser.add_option( 01839 '-f', '--format', 01840 type="choice", choices=('prof', 'oprofile', 'pstats', 'shark', 'aqtime'), 01841 dest="format", default="prof", 01842 help="profile format: prof, oprofile, shark, aqtime, or pstats [default: %default]") 01843 parser.add_option( 01844 '-c', '--colormap', 01845 type="choice", choices=('color', 'pink', 'gray', 'bw'), 01846 dest="theme", default="color", 01847 help="color map: color, pink, gray, or bw [default: %default]") 01848 parser.add_option( 01849 '-s', '--strip', 01850 action="store_true", 01851 dest="strip", default=False, 01852 help="strip function parameters, template parameters, and const modifiers from demangled C++ function names") 01853 parser.add_option( 01854 '-w', '--wrap', 01855 action="store_true", 01856 dest="wrap", default=False, 01857 help="wrap function names") 01858 (self.options, self.args) = parser.parse_args(sys.argv[1:]) 01859 01860 if len(self.args) > 1 and self.options.format != 'pstats': 01861 parser.error('incorrect number of arguments') 01862 01863 try: 01864 self.theme = self.themes[self.options.theme] 01865 except KeyError: 01866 parser.error('invalid colormap \'%s\'' % self.options.theme) 01867 01868 if self.options.format == 'prof': 01869 if not self.args: 01870 fp = sys.stdin 01871 else: 01872 fp = open(self.args[0], 'rt') 01873 parser = GprofParser(fp) 01874 elif self.options.format == 'oprofile': 01875 if not self.args: 01876 fp = sys.stdin 01877 else: 01878 fp = open(self.args[0], 'rt') 01879 parser = OprofileParser(fp) 01880 elif self.options.format == 'pstats': 01881 if not self.args: 01882 parser.error('at least a file must be specified for pstats input') 01883 parser = PstatsParser(*self.args) 01884 elif self.options.format == 'shark': 01885 if not self.args: 01886 fp = sys.stdin 01887 else: 01888 fp = open(self.args[0], 'rt') 01889 parser = SharkParser(fp) 01890 elif self.options.format == 'aqtime': 01891 if not self.args: 01892 fp = sys.stdin 01893 else: 01894 fp = open(self.args[0], 'rt') 01895 parser = AQtimeParser(fp) 01896 else: 01897 parser.error('invalid format \'%s\'' % self.options.format) 01898 01899 self.profile = parser.parse() 01900 01901 if self.options.output is None: 01902 self.output = sys.stdout 01903 else: 01904 self.output = open(self.options.output, 'wt') 01905 01906 self.write_graph() 01907 01908 _parenthesis_re = re.compile(r'\([^()]*\)') 01909 _angles_re = re.compile(r'<[^<>]*>') 01910 _const_re = re.compile(r'\s+const$') 01911 01912 def strip_function_name(self, name): 01913 """Remove extraneous information from C++ demangled function names.""" 01914 01915 # Strip function parameters from name by recursively removing paired parenthesis 01916 while True: 01917 name, n = self._parenthesis_re.subn('', name) 01918 if not n: 01919 break 01920 01921 # Strip const qualifier 01922 name = self._const_re.sub('', name) 01923 01924 # Strip template parameters from name by recursively removing paired angles 01925 while True: 01926 name, n = self._angles_re.subn('', name) 01927 if not n: 01928 break 01929 01930 return name 01931 01932 def wrap_function_name(self, name): 01933 """Split the function name on multiple lines.""" 01934 01935 if len(name) > 32: 01936 ratio = 2.0/3.0 01937 height = max(int(len(name)/(1.0 - ratio) + 0.5), 1) 01938 width = max(len(name)/height, 32) 01939 # TODO: break lines in symbols 01940 name = textwrap.fill(name, width, break_long_words=False) 01941 01942 # Take away spaces 01943 name = name.replace(", ", ",") 01944 name = name.replace("> >", ">>") 01945 name = name.replace("> >", ">>") # catch consecutive 01946 01947 return name 01948 01949 def compress_function_name(self, name): 01950 """Compress function name according to the user preferences.""" 01951 01952 if self.options.strip: 01953 name = self.strip_function_name(name) 01954 01955 if self.options.wrap: 01956 name = self.wrap_function_name(name) 01957 01958 # TODO: merge functions with same resulting name 01959 01960 return name 01961 01962 def write_graph(self): 01963 dot = DotWriter(self.output) 01964 profile = self.profile 01965 profile.prune(self.options.node_thres/100.0, self.options.edge_thres/100.0) 01966 01967 for function in profile.functions.itervalues(): 01968 function.name = self.compress_function_name(function.name) 01969 01970 dot.graph(profile, self.theme) 01971 01972 01973 if __name__ == '__main__': 01974 Main().main()