casa  $Rev:20696$
 All Classes Namespaces Files Functions Variables
gprof2dot.py
Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 #
00003 # Copyright 2008-2009 Jose Fonseca
00004 #
00005 # This program is free software: you can redistribute it and/or modify it
00006 # under the terms of the GNU Lesser General Public License as published
00007 # by the Free Software Foundation, either version 3 of the License, or
00008 # (at your option) any later version.
00009 #
00010 # This program is distributed in the hope that it will be useful,
00011 # but WITHOUT ANY WARRANTY; without even the implied warranty of
00012 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013 # GNU Lesser General Public License for more details.
00014 #
00015 # You should have received a copy of the GNU Lesser General Public License
00016 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
00017 #
00018 
00019 """Generate a dot graph from the output of several profilers."""
00020 
00021 
00022 __author__ = "Jose Fonseca"
00023 
00024 __version__ = "1.0"
00025 
00026 
00027 import sys
00028 import math
00029 import os.path
00030 import re
00031 import textwrap
00032 import optparse
00033 import xml.parsers.expat
00034 
00035 
00036 try:
00037     # Debugging helper module
00038     import debug
00039 except ImportError:
00040     pass
00041 
00042 
00043 def percentage(p):
00044     return "%.02f%%" % (p*100.0,)
00045 
00046 def add(a, b):
00047     return a + b
00048 
00049 def equal(a, b):
00050     if a == b:
00051         return a
00052     else:
00053         return None
00054 
00055 def fail(a, b):
00056     assert False
00057 
00058 
00059 tol = 2 ** -23
00060 
00061 def ratio(numerator, denominator):
00062     try:
00063         ratio = float(numerator)/float(denominator)
00064     except ZeroDivisionError:
00065         # 0/0 is undefined, but 1.0 yields more useful results
00066         return 1.0
00067     if ratio < 0.0:
00068         if ratio < -tol:
00069             sys.stderr.write('warning: negative ratio (%s/%s)\n' % (numerator, denominator))
00070         return 0.0
00071     if ratio > 1.0:
00072         if ratio > 1.0 + tol:
00073             sys.stderr.write('warning: ratio greater than one (%s/%s)\n' % (numerator, denominator))
00074         return 1.0
00075     return ratio
00076 
00077 
00078 class UndefinedEvent(Exception):
00079     """Raised when attempting to get an event which is undefined."""
00080     
00081     def __init__(self, event):
00082         Exception.__init__(self)
00083         self.event = event
00084 
00085     def __str__(self):
00086         return 'unspecified event %s' % self.event.name
00087 
00088 
00089 class Event(object):
00090     """Describe a kind of event, and its basic operations."""
00091 
00092     def __init__(self, name, null, aggregator, formatter = str):
00093         self.name = name
00094         self._null = null
00095         self._aggregator = aggregator
00096         self._formatter = formatter
00097 
00098     def __eq__(self, other):
00099         return self is other
00100 
00101     def __hash__(self):
00102         return id(self)
00103 
00104     def null(self):
00105         return self._null
00106 
00107     def aggregate(self, val1, val2):
00108         """Aggregate two event values."""
00109         assert val1 is not None
00110         assert val2 is not None
00111         return self._aggregator(val1, val2)
00112     
00113     def format(self, val):
00114         """Format an event value."""
00115         assert val is not None
00116         return self._formatter(val)
00117 
00118 
00119 MODULE = Event("Module", None, equal)
00120 PROCESS = Event("Process", None, equal)
00121 
00122 CALLS = Event("Calls", 0, add)
00123 SAMPLES = Event("Samples", 0, add)
00124 SAMPLES2 = Event("Samples", 0, add)
00125 
00126 TIME = Event("Time", 0.0, add, lambda x: '(' + str(x) + ')')
00127 TIME_RATIO = Event("Time ratio", 0.0, add, lambda x: '(' + percentage(x) + ')')
00128 TOTAL_TIME = Event("Total time", 0.0, fail)
00129 TOTAL_TIME_RATIO = Event("Total time ratio", 0.0, fail, percentage)
00130 
00131 CALL_RATIO = Event("Call ratio", 0.0, add, percentage)
00132 
00133 PRUNE_RATIO = Event("Prune ratio", 0.0, add, percentage)
00134 
00135 
00136 class Object(object):
00137     """Base class for all objects in profile which can store events."""
00138 
00139     def __init__(self, events=None):
00140         if events is None:
00141             self.events = {}
00142         else:
00143             self.events = events
00144 
00145     def __hash__(self):
00146         return id(self)
00147 
00148     def __eq__(self, other):
00149         return self is other
00150 
00151     def __contains__(self, event):
00152         return event in self.events
00153     
00154     def __getitem__(self, event):
00155         try:
00156             return self.events[event]
00157         except KeyError:
00158             raise UndefinedEvent(event)
00159     
00160     def __setitem__(self, event, value):
00161         if value is None:
00162             if event in self.events:
00163                 del self.events[event]
00164         else:
00165             self.events[event] = value
00166 
00167 
00168 class Call(Object):
00169     """A call between functions.
00170     
00171     There should be at most one call object for every pair of functions.
00172     """
00173 
00174     def __init__(self, callee_id):
00175         Object.__init__(self)
00176         self.callee_id = callee_id    
00177 
00178 
00179 class Function(Object):
00180     """A function."""
00181 
00182     def __init__(self, id, name):
00183         Object.__init__(self)
00184         self.id = id
00185         self.name = name
00186         self.calls = {}
00187         self.cycle = None
00188     
00189     def add_call(self, call):
00190         if call.callee_id in self.calls:
00191             sys.stderr.write('warning: overwriting call from function %s to %s\n' % (str(self.id), str(call.callee_id)))
00192         self.calls[call.callee_id] = call
00193 
00194     # TODO: write utility functions
00195 
00196     def __repr__(self):
00197         return self.name
00198 
00199 
00200 class Cycle(Object):
00201     """A cycle made from recursive function calls."""
00202 
00203     def __init__(self):
00204         Object.__init__(self)
00205         # XXX: Do cycles need an id?
00206         self.functions = set()
00207 
00208     def add_function(self, function):
00209         assert function not in self.functions
00210         self.functions.add(function)
00211         # XXX: Aggregate events?
00212         if function.cycle is not None:
00213             for other in function.cycle.functions:
00214                 if function not in self.functions:
00215                     self.add_function(other)
00216         function.cycle = self
00217 
00218 
00219 class Profile(Object):
00220     """The whole profile."""
00221 
00222     def __init__(self):
00223         Object.__init__(self)
00224         self.functions = {}
00225         self.cycles = []
00226 
00227     def add_function(self, function):
00228         if function.id in self.functions:
00229             sys.stderr.write('warning: overwriting function %s (id %s)\n' % (function.name, str(function.id)))
00230         self.functions[function.id] = function
00231 
00232     def add_cycle(self, cycle):
00233         self.cycles.append(cycle)
00234 
00235     def validate(self):
00236         """Validate the edges."""
00237 
00238         for function in self.functions.itervalues():
00239             for callee_id in function.calls.keys():
00240                 assert function.calls[callee_id].callee_id == callee_id
00241                 if callee_id not in self.functions:
00242                     sys.stderr.write('warning: call to undefined function %s from function %s\n' % (str(callee_id), function.name))
00243                     del function.calls[callee_id]
00244 
00245     def find_cycles(self):
00246         """Find cycles using Tarjan's strongly connected components algorithm."""
00247 
00248         # Apply the Tarjan's algorithm successively until all functions are visited
00249         visited = set()
00250         for function in self.functions.itervalues():
00251             if function not in visited:
00252                 self._tarjan(function, 0, [], {}, {}, visited)
00253         cycles = []
00254         for function in self.functions.itervalues():
00255             if function.cycle is not None and function.cycle not in cycles:
00256                 cycles.append(function.cycle)
00257         self.cycles = cycles
00258         if 0:
00259             for cycle in cycles:
00260                 sys.stderr.write("Cycle:\n")
00261                 for member in cycle.functions:
00262                     sys.stderr.write("\t%s\n" % member.name)
00263     
00264     def _tarjan(self, function, order, stack, orders, lowlinks, visited):
00265         """Tarjan's strongly connected components algorithm.
00266 
00267         See also:
00268         - http://en.wikipedia.org/wiki/Tarjan's_strongly_connected_components_algorithm
00269         """
00270 
00271         visited.add(function)
00272         orders[function] = order
00273         lowlinks[function] = order
00274         order += 1
00275         pos = len(stack)
00276         stack.append(function)
00277         for call in function.calls.itervalues():
00278             callee = self.functions[call.callee_id]
00279             # TODO: use a set to optimize lookup
00280             if callee not in orders:
00281                 order = self._tarjan(callee, order, stack, orders, lowlinks, visited)
00282                 lowlinks[function] = min(lowlinks[function], lowlinks[callee])
00283             elif callee in stack:
00284                 lowlinks[function] = min(lowlinks[function], orders[callee])
00285         if lowlinks[function] == orders[function]:
00286             # Strongly connected component found
00287             members = stack[pos:]
00288             del stack[pos:]
00289             if len(members) > 1:
00290                 cycle = Cycle()
00291                 for member in members:
00292                     cycle.add_function(member)
00293         return order
00294 
00295     def call_ratios(self, event):
00296         # Aggregate for incoming calls
00297         cycle_totals = {}
00298         for cycle in self.cycles:
00299             cycle_totals[cycle] = 0.0
00300         function_totals = {}
00301         for function in self.functions.itervalues():
00302             function_totals[function] = 0.0
00303         for function in self.functions.itervalues():
00304             for call in function.calls.itervalues():
00305                 if call.callee_id != function.id:
00306                     callee = self.functions[call.callee_id]
00307                     function_totals[callee] += call[event]
00308                     if callee.cycle is not None and callee.cycle is not function.cycle:
00309                         cycle_totals[callee.cycle] += call[event]
00310 
00311         # Compute the ratios
00312         for function in self.functions.itervalues():
00313             for call in function.calls.itervalues():
00314                 assert CALL_RATIO not in call
00315                 if call.callee_id != function.id:
00316                     callee = self.functions[call.callee_id]
00317                     if callee.cycle is not None and callee.cycle is not function.cycle:
00318                         total = cycle_totals[callee.cycle]
00319                     else:
00320                         total = function_totals[callee]
00321                     call[CALL_RATIO] = ratio(call[event], total)
00322 
00323     def integrate(self, outevent, inevent):
00324         """Propagate function time ratio allong the function calls.
00325 
00326         Must be called after finding the cycles.
00327 
00328         See also:
00329         - http://citeseer.ist.psu.edu/graham82gprof.html
00330         """
00331 
00332         # Sanity checking
00333         assert outevent not in self
00334         for function in self.functions.itervalues():
00335             assert outevent not in function
00336             assert inevent in function
00337             for call in function.calls.itervalues():
00338                 assert outevent not in call
00339                 if call.callee_id != function.id:
00340                     assert CALL_RATIO in call
00341 
00342         # Aggregate the input for each cycle 
00343         for cycle in self.cycles:
00344             total = inevent.null()
00345             for function in self.functions.itervalues():
00346                 total = inevent.aggregate(total, function[inevent])
00347             self[inevent] = total
00348 
00349         # Integrate along the edges
00350         total = inevent.null()
00351         for function in self.functions.itervalues():
00352             total = inevent.aggregate(total, function[inevent])
00353             self._integrate_function(function, outevent, inevent)
00354         self[outevent] = total
00355 
00356     def _integrate_function(self, function, outevent, inevent):
00357         if function.cycle is not None:
00358             return self._integrate_cycle(function.cycle, outevent, inevent)
00359         else:
00360             if outevent not in function:
00361                 total = function[inevent]
00362                 for call in function.calls.itervalues():
00363                     if call.callee_id != function.id:
00364                         total += self._integrate_call(call, outevent, inevent)
00365                 function[outevent] = total
00366             return function[outevent]
00367     
00368     def _integrate_call(self, call, outevent, inevent):
00369         assert outevent not in call
00370         assert CALL_RATIO in call
00371         callee = self.functions[call.callee_id]
00372         subtotal = call[CALL_RATIO]*self._integrate_function(callee, outevent, inevent)
00373         call[outevent] = subtotal
00374         return subtotal
00375 
00376     def _integrate_cycle(self, cycle, outevent, inevent):
00377         if outevent not in cycle:
00378 
00379             total = inevent.null()
00380             for member in cycle.functions:
00381                 subtotal = member[inevent]
00382                 for call in member.calls.itervalues():
00383                     callee = self.functions[call.callee_id]
00384                     if callee.cycle is not cycle:
00385                         subtotal += self._integrate_call(call, outevent, inevent)
00386                 total += subtotal
00387             cycle[outevent] = total
00388             
00389             callees = {}
00390             for function in self.functions.itervalues():
00391                 if function.cycle is not cycle:
00392                     for call in function.calls.itervalues():
00393                         callee = self.functions[call.callee_id]
00394                         if callee.cycle is cycle:
00395                             try:
00396                                 callees[callee] += call[CALL_RATIO]
00397                             except KeyError:
00398                                 callees[callee] = call[CALL_RATIO]
00399 
00400             for callee, call_ratio in callees.iteritems():
00401                 ranks = {}
00402                 call_ratios = {}
00403                 partials = {}
00404                 self._rank_cycle_function(cycle, callee, 0, ranks)
00405                 self._call_ratios_cycle(cycle, callee, ranks, call_ratios, set())
00406                 partial = self._integrate_cycle_function(cycle, callee, call_ratio, partials, ranks, call_ratios, outevent, inevent)
00407                 assert partial == max(partials.values())
00408                 assert not total or abs(1.0 - partial/(call_ratio*total)) <= 0.001
00409             
00410         return cycle[outevent]
00411 
00412     def _rank_cycle_function(self, cycle, function, rank, ranks):
00413         if function not in ranks or ranks[function] > rank:
00414             ranks[function] = rank
00415             for call in function.calls.itervalues():
00416                 if call.callee_id != function.id:
00417                     callee = self.functions[call.callee_id]
00418                     if callee.cycle is cycle:
00419                         self._rank_cycle_function(cycle, callee, rank + 1, ranks)
00420 
00421     def _call_ratios_cycle(self, cycle, function, ranks, call_ratios, visited):
00422         if function not in visited:
00423             visited.add(function)
00424             for call in function.calls.itervalues():
00425                 if call.callee_id != function.id:
00426                     callee = self.functions[call.callee_id]
00427                     if callee.cycle is cycle:
00428                         if ranks[callee] > ranks[function]:
00429                             call_ratios[callee] = call_ratios.get(callee, 0.0) + call[CALL_RATIO]
00430                             self._call_ratios_cycle(cycle, callee, ranks, call_ratios, visited)
00431 
00432     def _integrate_cycle_function(self, cycle, function, partial_ratio, partials, ranks, call_ratios, outevent, inevent):
00433         if function not in partials:
00434             partial = partial_ratio*function[inevent]
00435             for call in function.calls.itervalues():
00436                 if call.callee_id != function.id:
00437                     callee = self.functions[call.callee_id]
00438                     if callee.cycle is not cycle:
00439                         assert outevent in call
00440                         partial += partial_ratio*call[outevent]
00441                     else:
00442                         if ranks[callee] > ranks[function]:
00443                             callee_partial = self._integrate_cycle_function(cycle, callee, partial_ratio, partials, ranks, call_ratios, outevent, inevent)
00444                             call_ratio = ratio(call[CALL_RATIO], call_ratios[callee])
00445                             call_partial = call_ratio*callee_partial
00446                             try:
00447                                 call[outevent] += call_partial
00448                             except UndefinedEvent:
00449                                 call[outevent] = call_partial
00450                             partial += call_partial
00451             partials[function] = partial
00452             try:
00453                 function[outevent] += partial
00454             except UndefinedEvent:
00455                 function[outevent] = partial
00456         return partials[function]
00457 
00458     def aggregate(self, event):
00459         """Aggregate an event for the whole profile."""
00460 
00461         total = event.null()
00462         for function in self.functions.itervalues():
00463             try:
00464                 total = event.aggregate(total, function[event])
00465             except UndefinedEvent:
00466                 return
00467         self[event] = total
00468 
00469     def ratio(self, outevent, inevent):
00470         assert outevent not in self
00471         assert inevent in self
00472         for function in self.functions.itervalues():
00473             assert outevent not in function
00474             assert inevent in function
00475             function[outevent] = ratio(function[inevent], self[inevent])
00476             for call in function.calls.itervalues():
00477                 assert outevent not in call
00478                 if inevent in call:
00479                     call[outevent] = ratio(call[inevent], self[inevent])
00480         self[outevent] = 1.0
00481 
00482     def prune(self, node_thres, edge_thres):
00483         """Prune the profile"""
00484 
00485         # compute the prune ratios
00486         for function in self.functions.itervalues():
00487             try:
00488                 function[PRUNE_RATIO] = function[TOTAL_TIME_RATIO]
00489             except UndefinedEvent:
00490                 pass
00491 
00492             for call in function.calls.itervalues():
00493                 callee = self.functions[call.callee_id]
00494 
00495                 if TOTAL_TIME_RATIO in call:
00496                     # handle exact cases first
00497                     call[PRUNE_RATIO] = call[TOTAL_TIME_RATIO] 
00498                 else:
00499                     try:
00500                         # make a safe estimate
00501                         call[PRUNE_RATIO] = min(function[TOTAL_TIME_RATIO], callee[TOTAL_TIME_RATIO]) 
00502                     except UndefinedEvent:
00503                         pass
00504 
00505         # prune the nodes
00506         for function_id in self.functions.keys():
00507             function = self.functions[function_id]
00508             try:
00509                 if function[PRUNE_RATIO] < node_thres:
00510                     del self.functions[function_id]
00511             except UndefinedEvent:
00512                 pass
00513 
00514         # prune the egdes
00515         for function in self.functions.itervalues():
00516             for callee_id in function.calls.keys():
00517                 call = function.calls[callee_id]
00518                 try:
00519                     if callee_id not in self.functions or call[PRUNE_RATIO] < edge_thres:
00520                         del function.calls[callee_id]
00521                 except UndefinedEvent:
00522                     pass
00523     
00524     def dump(self):
00525         for function in self.functions.itervalues():
00526             sys.stderr.write('Function %s:\n' % (function.name,))
00527             self._dump_events(function.events)
00528             for call in function.calls.itervalues():
00529                 callee = self.functions[call.callee_id]
00530                 sys.stderr.write('  Call %s:\n' % (callee.name,))
00531                 self._dump_events(call.events)
00532 
00533     def _dump_events(self, events):
00534         for event, value in events.iteritems():
00535             sys.stderr.write('    %s: %s\n' % (event.name, event.format(value)))
00536 
00537 
00538 class Struct:
00539     """Masquerade a dictionary with a structure-like behavior."""
00540 
00541     def __init__(self, attrs = None):
00542         if attrs is None:
00543             attrs = {}
00544         self.__dict__['_attrs'] = attrs
00545     
00546     def __getattr__(self, name):
00547         try:
00548             return self._attrs[name]
00549         except KeyError:
00550             raise AttributeError(name)
00551 
00552     def __setattr__(self, name, value):
00553         self._attrs[name] = value
00554 
00555     def __str__(self):
00556         return str(self._attrs)
00557 
00558     def __repr__(self):
00559         return repr(self._attrs)
00560     
00561 
00562 class ParseError(Exception):
00563     """Raised when parsing to signal mismatches."""
00564 
00565     def __init__(self, msg, line):
00566         self.msg = msg
00567         # TODO: store more source line information
00568         self.line = line
00569 
00570     def __str__(self):
00571         return '%s: %r' % (self.msg, self.line)
00572 
00573 
00574 class Parser:
00575     """Parser interface."""
00576 
00577     def __init__(self):
00578         pass
00579 
00580     def parse(self):
00581         raise NotImplementedError
00582 
00583     
00584 class LineParser(Parser):
00585     """Base class for parsers that read line-based formats."""
00586 
00587     def __init__(self, file):
00588         Parser.__init__(self)
00589         self._file = file
00590         self.__line = None
00591         self.__eof = False
00592 
00593     def readline(self):
00594         line = self._file.readline()
00595         if not line:
00596             self.__line = ''
00597             self.__eof = True
00598         self.__line = line.rstrip('\r\n')
00599 
00600     def lookahead(self):
00601         assert self.__line is not None
00602         return self.__line
00603 
00604     def consume(self):
00605         assert self.__line is not None
00606         line = self.__line
00607         self.readline()
00608         return line
00609 
00610     def eof(self):
00611         assert self.__line is not None
00612         return self.__eof
00613 
00614 
00615 XML_ELEMENT_START, XML_ELEMENT_END, XML_CHARACTER_DATA, XML_EOF = range(4)
00616 
00617 
00618 class XmlToken:
00619 
00620     def __init__(self, type, name_or_data, attrs = None, line = None, column = None):
00621         assert type in (XML_ELEMENT_START, XML_ELEMENT_END, XML_CHARACTER_DATA, XML_EOF)
00622         self.type = type
00623         self.name_or_data = name_or_data
00624         self.attrs = attrs
00625         self.line = line
00626         self.column = column
00627 
00628     def __str__(self):
00629         if self.type == XML_ELEMENT_START:
00630             return '<' + self.name_or_data + ' ...>'
00631         if self.type == XML_ELEMENT_END:
00632             return '</' + self.name_or_data + '>'
00633         if self.type == XML_CHARACTER_DATA:
00634             return self.name_or_data
00635         if self.type == XML_EOF:
00636             return 'end of file'
00637         assert 0
00638 
00639 
00640 class XmlTokenizer:
00641     """Expat based XML tokenizer."""
00642 
00643     def __init__(self, fp, skip_ws = True):
00644         self.fp = fp
00645         self.tokens = []
00646         self.index = 0
00647         self.final = False
00648         self.skip_ws = skip_ws
00649         
00650         self.character_pos = 0, 0
00651         self.character_data = ''
00652         
00653         self.parser = xml.parsers.expat.ParserCreate()
00654         self.parser.StartElementHandler  = self.handle_element_start
00655         self.parser.EndElementHandler    = self.handle_element_end
00656         self.parser.CharacterDataHandler = self.handle_character_data
00657     
00658     def handle_element_start(self, name, attributes):
00659         self.finish_character_data()
00660         line, column = self.pos()
00661         token = XmlToken(XML_ELEMENT_START, name, attributes, line, column)
00662         self.tokens.append(token)
00663     
00664     def handle_element_end(self, name):
00665         self.finish_character_data()
00666         line, column = self.pos()
00667         token = XmlToken(XML_ELEMENT_END, name, None, line, column)
00668         self.tokens.append(token)
00669 
00670     def handle_character_data(self, data):
00671         if not self.character_data:
00672             self.character_pos = self.pos()
00673         self.character_data += data
00674     
00675     def finish_character_data(self):
00676         if self.character_data:
00677             if not self.skip_ws or not self.character_data.isspace(): 
00678                 line, column = self.character_pos
00679                 token = XmlToken(XML_CHARACTER_DATA, self.character_data, None, line, column)
00680                 self.tokens.append(token)
00681             self.character_data = ''
00682     
00683     def next(self):
00684         size = 16*1024
00685         while self.index >= len(self.tokens) and not self.final:
00686             self.tokens = []
00687             self.index = 0
00688             data = self.fp.read(size)
00689             self.final = len(data) < size
00690             try:
00691                 self.parser.Parse(data, self.final)
00692             except xml.parsers.expat.ExpatError, e:
00693                 #if e.code == xml.parsers.expat.errors.XML_ERROR_NO_ELEMENTS:
00694                 if e.code == 3:
00695                     pass
00696                 else:
00697                     raise e
00698         if self.index >= len(self.tokens):
00699             line, column = self.pos()
00700             token = XmlToken(XML_EOF, None, None, line, column)
00701         else:
00702             token = self.tokens[self.index]
00703             self.index += 1
00704         return token
00705 
00706     def pos(self):
00707         return self.parser.CurrentLineNumber, self.parser.CurrentColumnNumber
00708 
00709 
00710 class XmlTokenMismatch(Exception):
00711 
00712     def __init__(self, expected, found):
00713         self.expected = expected
00714         self.found = found
00715 
00716     def __str__(self):
00717         return '%u:%u: %s expected, %s found' % (self.found.line, self.found.column, str(self.expected), str(self.found))
00718 
00719 
00720 class XmlParser(Parser):
00721     """Base XML document parser."""
00722 
00723     def __init__(self, fp):
00724         Parser.__init__(self)
00725         self.tokenizer = XmlTokenizer(fp)
00726         self.consume()
00727     
00728     def consume(self):
00729         self.token = self.tokenizer.next()
00730 
00731     def match_element_start(self, name):
00732         return self.token.type == XML_ELEMENT_START and self.token.name_or_data == name
00733     
00734     def match_element_end(self, name):
00735         return self.token.type == XML_ELEMENT_END and self.token.name_or_data == name
00736 
00737     def element_start(self, name):
00738         while self.token.type == XML_CHARACTER_DATA:
00739             self.consume()
00740         if self.token.type != XML_ELEMENT_START:
00741             raise XmlTokenMismatch(XmlToken(XML_ELEMENT_START, name), self.token)
00742         if self.token.name_or_data != name:
00743             raise XmlTokenMismatch(XmlToken(XML_ELEMENT_START, name), self.token)
00744         attrs = self.token.attrs
00745         self.consume()
00746         return attrs
00747     
00748     def element_end(self, name):
00749         while self.token.type == XML_CHARACTER_DATA:
00750             self.consume()
00751         if self.token.type != XML_ELEMENT_END:
00752             raise XmlTokenMismatch(XmlToken(XML_ELEMENT_END, name), self.token)
00753         if self.token.name_or_data != name:
00754             raise XmlTokenMismatch(XmlToken(XML_ELEMENT_END, name), self.token)
00755         self.consume()
00756 
00757     def character_data(self, strip = True):
00758         data = ''
00759         while self.token.type == XML_CHARACTER_DATA:
00760             data += self.token.name_or_data
00761             self.consume()
00762         if strip:
00763             data = data.strip()
00764         return data
00765 
00766 
00767 class GprofParser(Parser):
00768     """Parser for GNU gprof output.
00769 
00770     See also:
00771     - Chapter "Interpreting gprof's Output" from the GNU gprof manual
00772       http://sourceware.org/binutils/docs-2.18/gprof/Call-Graph.html#Call-Graph
00773     - File "cg_print.c" from the GNU gprof source code
00774       http://sourceware.org/cgi-bin/cvsweb.cgi/~checkout~/src/gprof/cg_print.c?rev=1.12&cvsroot=src
00775     """
00776 
00777     def __init__(self, fp):
00778         Parser.__init__(self)
00779         self.fp = fp
00780         self.functions = {}
00781         self.cycles = {}
00782 
00783     def readline(self):
00784         line = self.fp.readline()
00785         if not line:
00786             sys.stderr.write('error: unexpected end of file\n')
00787             sys.exit(1)
00788         line = line.rstrip('\r\n')
00789         return line
00790 
00791     _int_re = re.compile(r'^\d+$')
00792     _float_re = re.compile(r'^\d+\.\d+$')
00793 
00794     def translate(self, mo):
00795         """Extract a structure from a match object, while translating the types in the process."""
00796         attrs = {}
00797         groupdict = mo.groupdict()
00798         for name, value in groupdict.iteritems():
00799             if value is None:
00800                 value = None
00801             elif self._int_re.match(value):
00802                 value = int(value)
00803             elif self._float_re.match(value):
00804                 value = float(value)
00805             attrs[name] = (value)
00806         return Struct(attrs)
00807 
00808     _cg_header_re = re.compile(
00809         # original gprof header
00810         r'^\s+called/total\s+parents\s*$|' +
00811         r'^index\s+%time\s+self\s+descendents\s+called\+self\s+name\s+index\s*$|' +
00812         r'^\s+called/total\s+children\s*$|' +
00813         # GNU gprof header
00814         r'^index\s+%\s+time\s+self\s+children\s+called\s+name\s*$'
00815     )
00816 
00817     _cg_ignore_re = re.compile(
00818         # spontaneous
00819         r'^\s+<spontaneous>\s*$|'
00820         # internal calls (such as "mcount")
00821         r'^.*\((\d+)\)$'
00822     )
00823 
00824     _cg_primary_re = re.compile(
00825         r'^\[(?P<index>\d+)\]?' + 
00826         r'\s+(?P<percentage_time>\d+\.\d+)' + 
00827         r'\s+(?P<self>\d+\.\d+)' + 
00828         r'\s+(?P<descendants>\d+\.\d+)' + 
00829         r'\s+(?:(?P<called>\d+)(?:\+(?P<called_self>\d+))?)?' + 
00830         r'\s+(?P<name>\S.*?)' +
00831         r'(?:\s+<cycle\s(?P<cycle>\d+)>)?' +
00832         r'\s\[(\d+)\]$'
00833     )
00834 
00835     _cg_parent_re = re.compile(
00836         r'^\s+(?P<self>\d+\.\d+)?' + 
00837         r'\s+(?P<descendants>\d+\.\d+)?' + 
00838         r'\s+(?P<called>\d+)(?:/(?P<called_total>\d+))?' + 
00839         r'\s+(?P<name>\S.*?)' +
00840         r'(?:\s+<cycle\s(?P<cycle>\d+)>)?' +
00841         r'\s\[(?P<index>\d+)\]$'
00842     )
00843 
00844     _cg_child_re = _cg_parent_re
00845 
00846     _cg_cycle_header_re = re.compile(
00847         r'^\[(?P<index>\d+)\]?' + 
00848         r'\s+(?P<percentage_time>\d+\.\d+)' + 
00849         r'\s+(?P<self>\d+\.\d+)' + 
00850         r'\s+(?P<descendants>\d+\.\d+)' + 
00851         r'\s+(?:(?P<called>\d+)(?:\+(?P<called_self>\d+))?)?' + 
00852         r'\s+<cycle\s(?P<cycle>\d+)\sas\sa\swhole>' +
00853         r'\s\[(\d+)\]$'
00854     )
00855 
00856     _cg_cycle_member_re = re.compile(
00857         r'^\s+(?P<self>\d+\.\d+)?' + 
00858         r'\s+(?P<descendants>\d+\.\d+)?' + 
00859         r'\s+(?P<called>\d+)(?:\+(?P<called_self>\d+))?' + 
00860         r'\s+(?P<name>\S.*?)' +
00861         r'(?:\s+<cycle\s(?P<cycle>\d+)>)?' +
00862         r'\s\[(?P<index>\d+)\]$'
00863     )
00864 
00865     _cg_sep_re = re.compile(r'^--+$')
00866 
00867     def parse_function_entry(self, lines):
00868         parents = []
00869         children = []
00870 
00871         while True:
00872             if not lines:
00873                 sys.stderr.write('warning: unexpected end of entry\n')
00874             line = lines.pop(0)
00875             if line.startswith('['):
00876                 break
00877         
00878             # read function parent line
00879             mo = self._cg_parent_re.match(line)
00880             if not mo:
00881                 if self._cg_ignore_re.match(line):
00882                     continue
00883                 sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line)
00884             else:
00885                 parent = self.translate(mo)
00886                 parents.append(parent)
00887 
00888         # read primary line
00889         mo = self._cg_primary_re.match(line)
00890         if not mo:
00891             sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line)
00892             return
00893         else:
00894             function = self.translate(mo)
00895 
00896         while lines:
00897             line = lines.pop(0)
00898             
00899             # read function subroutine line
00900             mo = self._cg_child_re.match(line)
00901             if not mo:
00902                 if self._cg_ignore_re.match(line):
00903                     continue
00904                 sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line)
00905             else:
00906                 child = self.translate(mo)
00907                 children.append(child)
00908         
00909         function.parents = parents
00910         function.children = children
00911 
00912         self.functions[function.index] = function
00913 
00914     def parse_cycle_entry(self, lines):
00915 
00916         # read cycle header line
00917         line = lines[0]
00918         mo = self._cg_cycle_header_re.match(line)
00919         if not mo:
00920             sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line)
00921             return
00922         cycle = self.translate(mo)
00923 
00924         # read cycle member lines
00925         cycle.functions = []
00926         for line in lines[1:]:
00927             mo = self._cg_cycle_member_re.match(line)
00928             if not mo:
00929                 sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line)
00930                 continue
00931             call = self.translate(mo)
00932             cycle.functions.append(call)
00933         
00934         self.cycles[cycle.cycle] = cycle
00935 
00936     def parse_cg_entry(self, lines):
00937         if lines[0].startswith("["):
00938             self.parse_cycle_entry(lines)
00939         else:
00940             self.parse_function_entry(lines)
00941 
00942     def parse_cg(self):
00943         """Parse the call graph."""
00944 
00945         # skip call graph header
00946         while not self._cg_header_re.match(self.readline()):
00947             pass
00948         line = self.readline()
00949         while self._cg_header_re.match(line):
00950             line = self.readline()
00951 
00952         # process call graph entries
00953         entry_lines = []
00954         while line != '\014': # form feed
00955             if line and not line.isspace():
00956                 if self._cg_sep_re.match(line):
00957                     self.parse_cg_entry(entry_lines)
00958                     entry_lines = []
00959                 else:
00960                     entry_lines.append(line)            
00961             line = self.readline()
00962     
00963     def parse(self):
00964         self.parse_cg()
00965         self.fp.close()
00966 
00967         profile = Profile()
00968         profile[TIME] = 0.0
00969         
00970         cycles = {}
00971         for index in self.cycles.iterkeys():
00972             cycles[index] = Cycle()
00973 
00974         for entry in self.functions.itervalues():
00975             # populate the function
00976             function = Function(entry.index, entry.name)
00977             function[TIME] = entry.self
00978             if entry.called is not None:
00979                 function[CALLS] = entry.called
00980             if entry.called_self is not None:
00981                 call = Call(entry.index)
00982                 call[CALLS] = entry.called_self
00983                 function[CALLS] += entry.called_self
00984             
00985             # populate the function calls
00986             for child in entry.children:
00987                 call = Call(child.index)
00988                 
00989                 assert child.called is not None
00990                 call[CALLS] = child.called
00991 
00992                 if child.index not in self.functions:
00993                     # NOTE: functions that were never called but were discovered by gprof's 
00994                     # static call graph analysis dont have a call graph entry so we need
00995                     # to add them here
00996                     missing = Function(child.index, child.name)
00997                     function[TIME] = 0.0
00998                     function[CALLS] = 0
00999                     profile.add_function(missing)
01000 
01001                 function.add_call(call)
01002 
01003             profile.add_function(function)
01004 
01005             if entry.cycle is not None:
01006                 cycles[entry.cycle].add_function(function)
01007 
01008             profile[TIME] = profile[TIME] + function[TIME]
01009 
01010         for cycle in cycles.itervalues():
01011             profile.add_cycle(cycle)
01012 
01013         # Compute derived events
01014         profile.validate()
01015         profile.ratio(TIME_RATIO, TIME)
01016         profile.call_ratios(CALLS)
01017         profile.integrate(TOTAL_TIME, TIME)
01018         profile.ratio(TOTAL_TIME_RATIO, TOTAL_TIME)
01019 
01020         return profile
01021 
01022 
01023 class OprofileParser(LineParser):
01024     """Parser for oprofile callgraph output.
01025     
01026     See also:
01027     - http://oprofile.sourceforge.net/doc/opreport.html#opreport-callgraph
01028     """
01029 
01030     _fields_re = {
01031         'samples': r'(?P<samples>\d+)',
01032         '%': r'(?P<percentage>\S+)',
01033         'linenr info': r'(?P<source>\(no location information\)|\S+:\d+)',
01034         'image name': r'(?P<image>\S+(?:\s\(tgid:[^)]*\))?)',
01035         'app name': r'(?P<application>\S+)',
01036         'symbol name': r'(?P<symbol>\(no symbols\)|.+?)',
01037     }
01038 
01039     def __init__(self, infile):
01040         LineParser.__init__(self, infile)
01041         self.entries = {}
01042         self.entry_re = None
01043 
01044     def add_entry(self, callers, function, callees):
01045         try:
01046             entry = self.entries[function.id]
01047         except KeyError:
01048             self.entries[function.id] = (callers, function, callees)
01049         else:
01050             callers_total, function_total, callees_total = entry
01051             self.update_subentries_dict(callers_total, callers)
01052             function_total.samples += function.samples
01053             self.update_subentries_dict(callees_total, callees)
01054     
01055     def update_subentries_dict(self, totals, partials):
01056         for partial in partials.itervalues():
01057             try:
01058                 total = totals[partial.id]
01059             except KeyError:
01060                 totals[partial.id] = partial
01061             else:
01062                 total.samples += partial.samples
01063         
01064     def parse(self):
01065         # read lookahead
01066         self.readline()
01067 
01068         self.parse_header()
01069         while self.lookahead():
01070             self.parse_entry()
01071 
01072         profile = Profile()
01073 
01074         reverse_call_samples = {}
01075         
01076         # populate the profile
01077         profile[SAMPLES] = 0
01078         for _callers, _function, _callees in self.entries.itervalues():
01079             function = Function(_function.id, _function.name)
01080             function[SAMPLES] = _function.samples
01081             profile.add_function(function)
01082             profile[SAMPLES] += _function.samples
01083 
01084             if _function.application:
01085                 function[PROCESS] = os.path.basename(_function.application)
01086             if _function.image:
01087                 function[MODULE] = os.path.basename(_function.image)
01088 
01089             total_callee_samples = 0
01090             for _callee in _callees.itervalues():
01091                 total_callee_samples += _callee.samples
01092 
01093             for _callee in _callees.itervalues():
01094                 if not _callee.self:
01095                     call = Call(_callee.id)
01096                     call[SAMPLES2] = _callee.samples
01097                     function.add_call(call)
01098                 
01099         # compute derived data
01100         profile.validate()
01101         profile.find_cycles()
01102         profile.ratio(TIME_RATIO, SAMPLES)
01103         profile.call_ratios(SAMPLES2)
01104         profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
01105 
01106         return profile
01107 
01108     def parse_header(self):
01109         while not self.match_header():
01110             self.consume()
01111         line = self.lookahead()
01112         fields = re.split(r'\s\s+', line)
01113         entry_re = r'^\s*' + r'\s+'.join([self._fields_re[field] for field in fields]) + r'(?P<self>\s+\[self\])?$'
01114         self.entry_re = re.compile(entry_re)
01115         self.skip_separator()
01116 
01117     def parse_entry(self):
01118         callers = self.parse_subentries()
01119         if self.match_primary():
01120             function = self.parse_subentry()
01121             if function is not None:
01122                 callees = self.parse_subentries()
01123                 self.add_entry(callers, function, callees)
01124         self.skip_separator()
01125 
01126     def parse_subentries(self):
01127         subentries = {}
01128         while self.match_secondary():
01129             subentry = self.parse_subentry()
01130             subentries[subentry.id] = subentry
01131         return subentries
01132 
01133     def parse_subentry(self):
01134         entry = Struct()
01135         line = self.consume()
01136         mo = self.entry_re.match(line)
01137         if not mo:
01138             raise ParseError('failed to parse', line)
01139         fields = mo.groupdict()
01140         entry.samples = int(fields.get('samples', 0))
01141         entry.percentage = float(fields.get('percentage', 0.0))
01142         if 'source' in fields and fields['source'] != '(no location information)':
01143             source = fields['source']
01144             filename, lineno = source.split(':')
01145             entry.filename = filename
01146             entry.lineno = int(lineno)
01147         else:
01148             source = ''
01149             entry.filename = None
01150             entry.lineno = None
01151         entry.image = fields.get('image', '')
01152         entry.application = fields.get('application', '')
01153         if 'symbol' in fields and fields['symbol'] != '(no symbols)':
01154             entry.symbol = fields['symbol']
01155         else:
01156             entry.symbol = ''
01157         if entry.symbol.startswith('"') and entry.symbol.endswith('"'):
01158             entry.symbol = entry.symbol[1:-1]
01159         entry.id = ':'.join((entry.application, entry.image, source, entry.symbol))
01160         entry.self = fields.get('self', None) != None
01161         if entry.self:
01162             entry.id += ':self'
01163         if entry.symbol:
01164             entry.name = entry.symbol
01165         else:
01166             entry.name = entry.image
01167         return entry
01168 
01169     def skip_separator(self):
01170         while not self.match_separator():
01171             self.consume()
01172         self.consume()
01173 
01174     def match_header(self):
01175         line = self.lookahead()
01176         return line.startswith('samples')
01177 
01178     def match_separator(self):
01179         line = self.lookahead()
01180         return line == '-'*len(line)
01181 
01182     def match_primary(self):
01183         line = self.lookahead()
01184         return not line[:1].isspace()
01185     
01186     def match_secondary(self):
01187         line = self.lookahead()
01188         return line[:1].isspace()
01189 
01190 
01191 class SharkParser(LineParser):
01192     """Parser for MacOSX Shark output.
01193 
01194     Author: tom@dbservice.com
01195     """
01196 
01197     def __init__(self, infile):
01198         LineParser.__init__(self, infile)
01199         self.stack = []
01200         self.entries = {}
01201 
01202     def add_entry(self, function):
01203         try:
01204             entry = self.entries[function.id]
01205         except KeyError:
01206             self.entries[function.id] = (function, { })
01207         else:
01208             function_total, callees_total = entry
01209             function_total.samples += function.samples
01210     
01211     def add_callee(self, function, callee):
01212         func, callees = self.entries[function.id]
01213         try:
01214             entry = callees[callee.id]
01215         except KeyError:
01216             callees[callee.id] = callee
01217         else:
01218             entry.samples += callee.samples
01219         
01220     def parse(self):
01221         self.readline()
01222         self.readline()
01223         self.readline()
01224         self.readline()
01225 
01226         match = re.compile(r'(?P<prefix>[|+ ]*)(?P<samples>\d+), (?P<symbol>[^,]+), (?P<image>.*)')
01227 
01228         while self.lookahead():
01229             line = self.consume()
01230             mo = match.match(line)
01231             if not mo:
01232                 raise ParseError('failed to parse', line)
01233 
01234             fields = mo.groupdict()
01235             prefix = len(fields.get('prefix', 0)) / 2 - 1
01236 
01237             symbol = str(fields.get('symbol', 0))
01238             image = str(fields.get('image', 0))
01239 
01240             entry = Struct()
01241             entry.id = ':'.join([symbol, image])
01242             entry.samples = int(fields.get('samples', 0))
01243 
01244             entry.name = symbol
01245             entry.image = image
01246 
01247             # adjust the callstack
01248             if prefix < len(self.stack):
01249                 del self.stack[prefix:]
01250 
01251             if prefix == len(self.stack):
01252                 self.stack.append(entry)
01253 
01254             # if the callstack has had an entry, it's this functions caller
01255             if prefix > 0:
01256                 self.add_callee(self.stack[prefix - 1], entry)
01257                 
01258             self.add_entry(entry)
01259                 
01260         profile = Profile()
01261         profile[SAMPLES] = 0
01262         for _function, _callees in self.entries.itervalues():
01263             function = Function(_function.id, _function.name)
01264             function[SAMPLES] = _function.samples
01265             profile.add_function(function)
01266             profile[SAMPLES] += _function.samples
01267 
01268             if _function.image:
01269                 function[MODULE] = os.path.basename(_function.image)
01270 
01271             for _callee in _callees.itervalues():
01272                 call = Call(_callee.id)
01273                 call[SAMPLES] = _callee.samples
01274                 function.add_call(call)
01275                 
01276         # compute derived data
01277         profile.validate()
01278         profile.find_cycles()
01279         profile.ratio(TIME_RATIO, SAMPLES)
01280         profile.call_ratios(SAMPLES)
01281         profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
01282 
01283         return profile
01284 
01285 
01286 class AQtimeTable:
01287 
01288     def __init__(self, name, fields):
01289         self.name = name
01290 
01291         self.fields = fields
01292         self.field_column = {}
01293         for column in range(len(fields)):
01294             self.field_column[fields[column]] = column
01295         self.rows = []
01296 
01297     def __len__(self):
01298         return len(self.rows)
01299 
01300     def __iter__(self):
01301         for values, children in self.rows:
01302             fields = {}
01303             for name, value in zip(self.fields, values):
01304                 fields[name] = value
01305             children = dict([(child.name, child) for child in children])
01306             yield fields, children
01307         raise StopIteration
01308 
01309     def add_row(self, values, children=()):
01310         self.rows.append((values, children))
01311 
01312 
01313 class AQtimeParser(XmlParser):
01314 
01315     def __init__(self, stream):
01316         XmlParser.__init__(self, stream)
01317         self.tables = {}
01318 
01319     def parse(self):
01320         self.element_start('AQtime_Results')
01321         self.parse_headers()
01322         results = self.parse_results()
01323         self.element_end('AQtime_Results')
01324         return self.build_profile(results) 
01325 
01326     def parse_headers(self):
01327         self.element_start('HEADERS')
01328         while self.token.type == XML_ELEMENT_START:
01329             self.parse_table_header()
01330         self.element_end('HEADERS')
01331 
01332     def parse_table_header(self):
01333         attrs = self.element_start('TABLE_HEADER')
01334         name = attrs['NAME']
01335         id = int(attrs['ID'])
01336         field_types = []
01337         field_names = []
01338         while self.token.type == XML_ELEMENT_START:
01339             field_type, field_name = self.parse_table_field()
01340             field_types.append(field_type)
01341             field_names.append(field_name)
01342         self.element_end('TABLE_HEADER')
01343         self.tables[id] = name, field_types, field_names
01344 
01345     def parse_table_field(self):
01346         attrs = self.element_start('TABLE_FIELD')
01347         type = attrs['TYPE']
01348         name = self.character_data()
01349         self.element_end('TABLE_FIELD')
01350         return type, name
01351 
01352     def parse_results(self):
01353         self.element_start('RESULTS')
01354         table = self.parse_data()
01355         self.element_end('RESULTS')
01356         return table
01357 
01358     def parse_data(self):
01359         rows = []
01360         attrs = self.element_start('DATA')
01361         table_id = int(attrs['TABLE_ID'])
01362         table_name, field_types, field_names = self.tables[table_id]
01363         table = AQtimeTable(table_name, field_names)
01364         while self.token.type == XML_ELEMENT_START:
01365             row, children = self.parse_row(field_types)
01366             table.add_row(row, children)
01367         self.element_end('DATA')
01368         return table
01369 
01370     def parse_row(self, field_types):
01371         row = [None]*len(field_types)
01372         children = []
01373         self.element_start('ROW')
01374         while self.token.type == XML_ELEMENT_START:
01375             if self.token.name_or_data == 'FIELD':
01376                 field_id, field_value = self.parse_field(field_types)
01377                 row[field_id] = field_value
01378             elif self.token.name_or_data == 'CHILDREN':
01379                 children = self.parse_children()
01380             else:
01381                 raise XmlTokenMismatch("<FIELD ...> or <CHILDREN ...>", self.token)
01382         self.element_end('ROW')
01383         return row, children
01384 
01385     def parse_field(self, field_types):
01386         attrs = self.element_start('FIELD')
01387         id = int(attrs['ID'])
01388         type = field_types[id]
01389         value = self.character_data()
01390         if type == 'Integer':
01391             value = int(value)
01392         elif type == 'Float':
01393             value = float(value)
01394         elif type == 'Address':
01395             value = int(value)
01396         elif type == 'String':
01397             pass
01398         else:
01399             assert False
01400         self.element_end('FIELD')
01401         return id, value
01402 
01403     def parse_children(self):
01404         children = []
01405         self.element_start('CHILDREN')
01406         while self.token.type == XML_ELEMENT_START:
01407             table = self.parse_data()
01408             assert table.name not in children
01409             children.append(table)
01410         self.element_end('CHILDREN')
01411         return children
01412 
01413     def build_profile(self, results):
01414         assert results.name == 'Routines'
01415         profile = Profile()
01416         profile[TIME] = 0.0
01417         for fields, tables in results:
01418             function = self.build_function(fields)
01419             children = tables['Children']
01420             for fields, _ in children:
01421                 call = self.build_call(fields)
01422                 function.add_call(call)
01423             profile.add_function(function)
01424             profile[TIME] = profile[TIME] + function[TIME]
01425         profile[TOTAL_TIME] = profile[TIME]
01426         profile.ratio(TOTAL_TIME_RATIO, TOTAL_TIME)
01427         return profile
01428     
01429     def build_function(self, fields):
01430         function = Function(self.build_id(fields), self.build_name(fields))
01431         function[TIME] = fields['Time']
01432         function[TOTAL_TIME] = fields['Time with Children']
01433         #function[TIME_RATIO] = fields['% Time']/100.0
01434         #function[TOTAL_TIME_RATIO] = fields['% with Children']/100.0
01435         return function
01436 
01437     def build_call(self, fields):
01438         call = Call(self.build_id(fields))
01439         call[TIME] = fields['Time']
01440         call[TOTAL_TIME] = fields['Time with Children']
01441         #call[TIME_RATIO] = fields['% Time']/100.0
01442         #call[TOTAL_TIME_RATIO] = fields['% with Children']/100.0
01443         return call
01444 
01445     def build_id(self, fields):
01446         return ':'.join([fields['Module Name'], fields['Unit Name'], fields['Routine Name']])
01447 
01448     def build_name(self, fields):
01449         # TODO: use more fields
01450         return fields['Routine Name']
01451 
01452 
01453 class PstatsParser:
01454     """Parser python profiling statistics saved with te pstats module."""
01455 
01456     def __init__(self, *filename):
01457         import pstats
01458         self.stats = pstats.Stats(*filename)
01459         self.profile = Profile()
01460         self.function_ids = {}
01461 
01462     def get_function_name(self, (filename, line, name)):
01463         module = os.path.splitext(filename)[0]
01464         module = os.path.basename(module)
01465         return "%s:%d:%s" % (module, line, name)
01466 
01467     def get_function(self, key):
01468         try:
01469             id = self.function_ids[key]
01470         except KeyError:
01471             id = len(self.function_ids)
01472             name = self.get_function_name(key)
01473             function = Function(id, name)
01474             self.profile.functions[id] = function
01475             self.function_ids[key] = id
01476         else:
01477             function = self.profile.functions[id]
01478         return function
01479 
01480     def parse(self):
01481         self.profile[TIME] = 0.0
01482         self.profile[TOTAL_TIME] = self.stats.total_tt
01483         for fn, (cc, nc, tt, ct, callers) in self.stats.stats.iteritems():
01484             callee = self.get_function(fn)
01485             callee[CALLS] = nc
01486             callee[TOTAL_TIME] = ct
01487             callee[TIME] = tt
01488             self.profile[TIME] += tt
01489             self.profile[TOTAL_TIME] = max(self.profile[TOTAL_TIME], ct)
01490             for fn, value in callers.iteritems():
01491                 caller = self.get_function(fn)
01492                 call = Call(callee.id)
01493                 if isinstance(value, tuple):
01494                     for i in xrange(0, len(value), 4):
01495                         nc, cc, tt, ct = value[i:i+4]
01496                         if CALLS in call:
01497                             call[CALLS] += cc
01498                         else:
01499                             call[CALLS] = cc
01500 
01501                         if TOTAL_TIME in call:
01502                             call[TOTAL_TIME] += ct
01503                         else:
01504                             call[TOTAL_TIME] = ct
01505 
01506                 else:
01507                     call[CALLS] = value
01508                     call[TOTAL_TIME] = ratio(value, nc)*ct
01509 
01510                 caller.add_call(call)
01511         #self.stats.print_stats()
01512         #self.stats.print_callees()
01513 
01514         # Compute derived events
01515         self.profile.validate()
01516         self.profile.ratio(TIME_RATIO, TIME)
01517         self.profile.ratio(TOTAL_TIME_RATIO, TOTAL_TIME)
01518 
01519         return self.profile
01520 
01521 
01522 class Theme:
01523 
01524     def __init__(self, 
01525             bgcolor = (0.0, 0.0, 1.0),
01526             mincolor = (0.0, 0.0, 0.0),
01527             maxcolor = (0.0, 0.0, 1.0),
01528             fontname = "Arial",
01529             minfontsize = 10.0,
01530             maxfontsize = 10.0,
01531             minpenwidth = 0.5,
01532             maxpenwidth = 4.0,
01533             gamma = 2.2):
01534         self.bgcolor = bgcolor
01535         self.mincolor = mincolor
01536         self.maxcolor = maxcolor
01537         self.fontname = fontname
01538         self.minfontsize = minfontsize
01539         self.maxfontsize = maxfontsize
01540         self.minpenwidth = minpenwidth
01541         self.maxpenwidth = maxpenwidth
01542         self.gamma = gamma
01543 
01544     def graph_bgcolor(self):
01545         return self.hsl_to_rgb(*self.bgcolor)
01546 
01547     def graph_fontname(self):
01548         return self.fontname
01549 
01550     def graph_fontsize(self):
01551         return self.minfontsize
01552 
01553     def node_bgcolor(self, weight):
01554         return self.color(weight)
01555 
01556     def node_fgcolor(self, weight):
01557         return self.graph_bgcolor()
01558 
01559     def node_fontsize(self, weight):
01560         return self.fontsize(weight)
01561 
01562     def edge_color(self, weight):
01563         return self.color(weight)
01564 
01565     def edge_fontsize(self, weight):
01566         return self.fontsize(weight)
01567 
01568     def edge_penwidth(self, weight):
01569         return max(weight*self.maxpenwidth, self.minpenwidth)
01570 
01571     def edge_arrowsize(self, weight):
01572         return 0.5 * math.sqrt(self.edge_penwidth(weight))
01573 
01574     def fontsize(self, weight):
01575         return max(weight**2 * self.maxfontsize, self.minfontsize)
01576 
01577     def color(self, weight):
01578         weight = min(max(weight, 0.0), 1.0)
01579     
01580         hmin, smin, lmin = self.mincolor
01581         hmax, smax, lmax = self.maxcolor
01582 
01583         h = hmin + math.sqrt(math.sqrt(weight))*(hmax - hmin)
01584         s = smin + math.sqrt(math.sqrt(weight))*(smax - smin)
01585         l = lmin + math.sqrt(math.sqrt(weight))*(lmax - lmin)
01586 
01587         return self.hsl_to_rgb(h, s, l)
01588 
01589     def hsl_to_rgb(self, h, s, l):
01590         """Convert a color from HSL color-model to RGB.
01591 
01592         See also:
01593         - http://www.w3.org/TR/css3-color/#hsl-color
01594         """
01595 
01596         h = h % 1.0
01597         s = min(max(s, 0.0), 1.0)
01598         l = min(max(l, 0.0), 1.0)
01599 
01600         if l <= 0.5:
01601             m2 = l*(s + 1.0)
01602         else:
01603             m2 = l + s - l*s
01604         m1 = l*2.0 - m2
01605         r = self._hue_to_rgb(m1, m2, h + 1.0/3.0)
01606         g = self._hue_to_rgb(m1, m2, h)
01607         b = self._hue_to_rgb(m1, m2, h - 1.0/3.0)
01608 
01609         # Apply gamma correction
01610         r **= self.gamma
01611         g **= self.gamma
01612         b **= self.gamma
01613 
01614         return (r, g, b)
01615 
01616     def _hue_to_rgb(self, m1, m2, h):
01617         if h < 0.0:
01618             h += 1.0
01619         elif h > 1.0:
01620             h -= 1.0
01621         if h*6 < 1.0:
01622             return m1 + (m2 - m1)*h*6.0
01623         elif h*2 < 1.0:
01624             return m2
01625         elif h*3 < 2.0:
01626             return m1 + (m2 - m1)*(2.0/3.0 - h)*6.0
01627         else:
01628             return m1
01629 
01630 
01631 TEMPERATURE_COLORMAP = Theme(
01632     mincolor = (2.0/3.0, 0.80, 0.25), # dark blue
01633     maxcolor = (0.0, 1.0, 0.5), # satured red
01634     gamma = 1.0
01635 )
01636 
01637 PINK_COLORMAP = Theme(
01638     mincolor = (0.0, 1.0, 0.90), # pink
01639     maxcolor = (0.0, 1.0, 0.5), # satured red
01640 )
01641 
01642 GRAY_COLORMAP = Theme(
01643     mincolor = (0.0, 0.0, 0.85), # light gray
01644     maxcolor = (0.0, 0.0, 0.0), # black
01645 )
01646 
01647 BW_COLORMAP = Theme(
01648     minfontsize = 8.0,
01649     maxfontsize = 24.0,
01650     mincolor = (0.0, 0.0, 0.0), # black
01651     maxcolor = (0.0, 0.0, 0.0), # black
01652     minpenwidth = 0.1,
01653     maxpenwidth = 8.0,
01654 )
01655 
01656 
01657 class DotWriter:
01658     """Writer for the DOT language.
01659 
01660     See also:
01661     - "The DOT Language" specification
01662       http://www.graphviz.org/doc/info/lang.html
01663     """
01664 
01665     def __init__(self, fp):
01666         self.fp = fp
01667 
01668     def graph(self, profile, theme):
01669         self.begin_graph()
01670 
01671         fontname = theme.graph_fontname()
01672 
01673         self.attr('graph', fontname=fontname, ranksep=0.25, nodesep=0.125)
01674         self.attr('node', fontname=fontname, shape="box", style="filled,rounded", fontcolor="white", width=0, height=0)
01675         self.attr('edge', fontname=fontname)
01676 
01677         for function in profile.functions.itervalues():
01678             labels = []
01679             for event in PROCESS, MODULE:
01680                 if event in function.events:
01681                     label = event.format(function[event])
01682                     labels.append(label)
01683             labels.append(function.name)
01684             for event in TOTAL_TIME_RATIO, TIME_RATIO, CALLS:
01685                 if event in function.events:
01686                     label = event.format(function[event])
01687                     labels.append(label)
01688 
01689             try:
01690                 #weight = function[PRUNE_RATIO]
01691                 weight = function[TIME_RATIO]
01692             except UndefinedEvent:
01693                 weight = 0.0
01694 
01695             label = '\n'.join(labels)
01696             self.node(function.id, 
01697                 label = label, 
01698                 color = self.color(theme.node_bgcolor(weight)), 
01699                 fontcolor = self.color(theme.node_fgcolor(weight)), 
01700                 fontsize = "%.2f" % theme.node_fontsize(weight),
01701             )
01702 
01703             for call in function.calls.itervalues():
01704                 callee = profile.functions[call.callee_id]
01705 
01706                 labels = []
01707                 for event in TOTAL_TIME_RATIO, CALLS:
01708                     if event in call.events:
01709                         label = event.format(call[event])
01710                         labels.append(label)
01711 
01712                 try:
01713                     weight = call[PRUNE_RATIO]
01714                 except UndefinedEvent:
01715                     try:
01716                         weight = callee[PRUNE_RATIO]
01717                     except UndefinedEvent:
01718                         weight = 0.0
01719 
01720                 label = '\n'.join(labels)
01721 
01722                 self.edge(function.id, call.callee_id, 
01723                     label = label, 
01724                     color = self.color(theme.edge_color(weight)), 
01725                     fontcolor = self.color(theme.edge_color(weight)),
01726                     fontsize = "%.2f" % theme.edge_fontsize(weight), 
01727                     penwidth = "%.2f" % theme.edge_penwidth(weight), 
01728                     labeldistance = "%.2f" % theme.edge_penwidth(weight), 
01729                     arrowsize = "%.2f" % theme.edge_arrowsize(weight),
01730                 )
01731 
01732         self.end_graph()
01733 
01734     def begin_graph(self):
01735         self.write('digraph {\n')
01736 
01737     def end_graph(self):
01738         self.write('}\n')
01739 
01740     def attr(self, what, **attrs):
01741         self.write("\t")
01742         self.write(what)
01743         self.attr_list(attrs)
01744         self.write(";\n")
01745 
01746     def node(self, node, **attrs):
01747         self.write("\t")
01748         self.id(node)
01749         self.attr_list(attrs)
01750         self.write(";\n")
01751 
01752     def edge(self, src, dst, **attrs):
01753         self.write("\t")
01754         self.id(src)
01755         self.write(" -> ")
01756         self.id(dst)
01757         self.attr_list(attrs)
01758         self.write(";\n")
01759 
01760     def attr_list(self, attrs):
01761         if not attrs:
01762             return
01763         self.write(' [')
01764         first = True
01765         for name, value in attrs.iteritems():
01766             if first:
01767                 first = False
01768             else:
01769                 self.write(", ")
01770             self.id(name)
01771             self.write('=')
01772             self.id(value)
01773         self.write(']')
01774 
01775     def id(self, id):
01776         if isinstance(id, (int, float)):
01777             s = str(id)
01778         elif isinstance(id, basestring):
01779             if id.isalnum():
01780                 s = id
01781             else:
01782                 s = self.escape(id)
01783         else:
01784             raise TypeError
01785         self.write(s)
01786 
01787     def color(self, (r, g, b)):
01788 
01789         def float2int(f):
01790             if f <= 0.0:
01791                 return 0
01792             if f >= 1.0:
01793                 return 255
01794             return int(255.0*f + 0.5)
01795 
01796         return "#" + "".join(["%02x" % float2int(c) for c in (r, g, b)])
01797 
01798     def escape(self, s):
01799         s = s.encode('utf-8')
01800         s = s.replace('\\', r'\\')
01801         s = s.replace('\n', r'\n')
01802         s = s.replace('\t', r'\t')
01803         s = s.replace('"', r'\"')
01804         return '"' + s + '"'
01805 
01806     def write(self, s):
01807         self.fp.write(s)
01808 
01809 
01810 class Main:
01811     """Main program."""
01812 
01813     themes = {
01814             "color": TEMPERATURE_COLORMAP,
01815             "pink": PINK_COLORMAP,
01816             "gray": GRAY_COLORMAP,
01817             "bw": BW_COLORMAP,
01818     }
01819 
01820     def main(self):
01821         """Main program."""
01822 
01823         parser = optparse.OptionParser(
01824             usage="\n\t%prog [options] [file] ...",
01825             version="%%prog %s" % __version__)
01826         parser.add_option(
01827             '-o', '--output', metavar='FILE',
01828             type="string", dest="output",
01829             help="output filename [stdout]")
01830         parser.add_option(
01831             '-n', '--node-thres', metavar='PERCENTAGE',
01832             type="float", dest="node_thres", default=0.5,
01833             help="eliminate nodes below this threshold [default: %default]")
01834         parser.add_option(
01835             '-e', '--edge-thres', metavar='PERCENTAGE',
01836             type="float", dest="edge_thres", default=0.1,
01837             help="eliminate edges below this threshold [default: %default]")
01838         parser.add_option(
01839             '-f', '--format',
01840             type="choice", choices=('prof', 'oprofile', 'pstats', 'shark', 'aqtime'),
01841             dest="format", default="prof",
01842             help="profile format: prof, oprofile, shark, aqtime, or pstats [default: %default]")
01843         parser.add_option(
01844             '-c', '--colormap',
01845             type="choice", choices=('color', 'pink', 'gray', 'bw'),
01846             dest="theme", default="color",
01847             help="color map: color, pink, gray, or bw [default: %default]")
01848         parser.add_option(
01849             '-s', '--strip',
01850             action="store_true",
01851             dest="strip", default=False,
01852             help="strip function parameters, template parameters, and const modifiers from demangled C++ function names")
01853         parser.add_option(
01854             '-w', '--wrap',
01855             action="store_true",
01856             dest="wrap", default=False,
01857             help="wrap function names")
01858         (self.options, self.args) = parser.parse_args(sys.argv[1:])
01859 
01860         if len(self.args) > 1 and self.options.format != 'pstats':
01861             parser.error('incorrect number of arguments')
01862 
01863         try:
01864             self.theme = self.themes[self.options.theme]
01865         except KeyError:
01866             parser.error('invalid colormap \'%s\'' % self.options.theme)
01867 
01868         if self.options.format == 'prof':
01869             if not self.args:
01870                 fp = sys.stdin
01871             else:
01872                 fp = open(self.args[0], 'rt')
01873             parser = GprofParser(fp)
01874         elif self.options.format == 'oprofile':
01875             if not self.args:
01876                 fp = sys.stdin
01877             else:
01878                 fp = open(self.args[0], 'rt')
01879             parser = OprofileParser(fp)
01880         elif self.options.format == 'pstats':
01881             if not self.args:
01882                 parser.error('at least a file must be specified for pstats input')
01883             parser = PstatsParser(*self.args)
01884         elif self.options.format == 'shark':
01885             if not self.args:
01886                 fp = sys.stdin
01887             else:
01888                 fp = open(self.args[0], 'rt')
01889             parser = SharkParser(fp)
01890         elif self.options.format == 'aqtime':
01891             if not self.args:
01892                 fp = sys.stdin
01893             else:
01894                 fp = open(self.args[0], 'rt')
01895             parser = AQtimeParser(fp)
01896         else:
01897             parser.error('invalid format \'%s\'' % self.options.format)
01898 
01899         self.profile = parser.parse()
01900         
01901         if self.options.output is None:
01902             self.output = sys.stdout
01903         else:
01904             self.output = open(self.options.output, 'wt')
01905 
01906         self.write_graph()
01907 
01908     _parenthesis_re = re.compile(r'\([^()]*\)')
01909     _angles_re = re.compile(r'<[^<>]*>')
01910     _const_re = re.compile(r'\s+const$')
01911 
01912     def strip_function_name(self, name):
01913         """Remove extraneous information from C++ demangled function names."""
01914 
01915         # Strip function parameters from name by recursively removing paired parenthesis
01916         while True:
01917             name, n = self._parenthesis_re.subn('', name)
01918             if not n:
01919                 break
01920 
01921         # Strip const qualifier
01922         name = self._const_re.sub('', name)
01923 
01924         # Strip template parameters from name by recursively removing paired angles
01925         while True:
01926             name, n = self._angles_re.subn('', name)
01927             if not n:
01928                 break
01929 
01930         return name
01931 
01932     def wrap_function_name(self, name):
01933         """Split the function name on multiple lines."""
01934 
01935         if len(name) > 32:
01936             ratio = 2.0/3.0
01937             height = max(int(len(name)/(1.0 - ratio) + 0.5), 1)
01938             width = max(len(name)/height, 32)
01939             # TODO: break lines in symbols
01940             name = textwrap.fill(name, width, break_long_words=False)
01941 
01942         # Take away spaces
01943         name = name.replace(", ", ",")
01944         name = name.replace("> >", ">>")
01945         name = name.replace("> >", ">>") # catch consecutive
01946 
01947         return name
01948 
01949     def compress_function_name(self, name):
01950         """Compress function name according to the user preferences."""
01951 
01952         if self.options.strip:
01953             name = self.strip_function_name(name)
01954 
01955         if self.options.wrap:
01956             name = self.wrap_function_name(name)
01957 
01958         # TODO: merge functions with same resulting name
01959 
01960         return name
01961 
01962     def write_graph(self):
01963         dot = DotWriter(self.output)
01964         profile = self.profile
01965         profile.prune(self.options.node_thres/100.0, self.options.edge_thres/100.0)
01966 
01967         for function in profile.functions.itervalues():
01968             function.name = self.compress_function_name(function.name)
01969 
01970         dot.graph(profile, self.theme)
01971 
01972 
01973 if __name__ == '__main__':
01974     Main().main()