#!/usr/bin/python import re import math import cairo class ReadFile: def __init__(self, filename): self.filename = filename def __enter__(self): self.f = open(self.filename, 'r') self.lineno = 0 self.getnext(); return self def __exit__(self, type, value, traceback): self.f.close() def __nonzero__(self): return self.line != "" def getnext(self): self.lineno += 1 self.line = self.f.readline() def read_times(rf): times = [] while True: match = re.match('[+-]?[0-9]+ ', rf.line) if not match: break times += [int(x) for x in re.split(' ', rf.line.rstrip())] rf.getnext() return times[::-1] class Thread: thread_number = 0 def __init__(self, thread_name): # no one cares about the thread address match = re.match('(.*) \(0x.*?\) (.*)', thread_name) if match: thread_name = match.group(1) + " " + match.group(2) self.thread_name = thread_name self.thread_number = Thread.thread_number self.events = [] Thread.thread_number += 1 all_events = [] class Event: def __init__(self, thread, gate_location, gate_name, start, stop): self.thread = thread self.gate_location = gate_location self.gate_name = gate_name self.work = False self.wait = False self.memory = False if gate_location == "memory": self.memory = True elif re.match('.*work.*', gate_name): self.work = True elif re.match('.*wait.*', gate_name): self.wait = True if self.memory: self.start = start self.stop = start self.size = stop else: self.start = start self.stop = stop thread.events.append(self) all_events.append(self) input_filename = 'vips-profile.txt' thread_id = 0 threads = [] n_events = 0 print 'reading from', input_filename with ReadFile(input_filename) as rf: while rf: if rf.line.rstrip() == "": rf.getnext() continue if rf.line[0] == "#": rf.getnext() continue match = re.match('thread: (.*)', rf.line) if not match: print 'parse error line %d, expected "thread"' % rf.lineno thread_name = match.group(1) + " " + str(thread_id) thread_id += 1 thread = Thread(thread_name) threads.append(thread) rf.getnext() while True: match = re.match('^gate: (.*?)(: (.*))?$', rf.line) if not match: break gate_location = match.group(1) gate_name = match.group(3) rf.getnext() match = re.match('start:', rf.line) if not match: continue rf.getnext() start = read_times(rf) match = re.match('stop:', rf.line) if not match: continue rf.getnext() stop = read_times(rf) if len(start) != len(stop): print 'start and stop length mismatch' for a, b in zip(start, stop): Event(thread, gate_location, gate_name, a, b) n_events += 1 for thread in threads: thread.events.sort(lambda x, y: cmp(x.start, y.start)) all_events.sort(lambda x, y: cmp(x.start, y.start)) print 'loaded %d events' % n_events # move time axis to secs of computation ticks_per_sec = 1000000.0 first_time = threads[0].events[0].start last_time = 0 for thread in threads: for event in thread.events: if event.start < first_time: first_time = event.start if event.stop > last_time: last_time = event.stop for thread in threads: for event in thread.events: event.start = (event.start - first_time) / ticks_per_sec event.stop = (event.stop - first_time) / ticks_per_sec last_time = (last_time - first_time) / ticks_per_sec first_time = 0 print 'last time =', last_time # calculate some simple stats for thread in threads: thread.start = last_time thread.stop = 0 thread.wait = 0 thread.work = 0 thread.memory = 0 thread.peak_memory = 0 for event in thread.events: if event.start < thread.start: thread.start = event.start if event.stop > thread.stop: thread.stop = event.stop if event.wait: thread.wait += event.stop - event.start if event.work: thread.work += event.stop - event.start if event.memory: thread.memory += event.size if thread.memory > thread.peak_memory: thread.peak_memory = thread.memory thread.alive = thread.stop - thread.start # hide very short-lived threads thread.hide = thread.alive < 0.01 print 'name\t\talive\twait%\twork%\tunkn%\tmemory\tpeakm' for thread in threads: if thread.hide: continue wait_percent = 100 * thread.wait / thread.alive work_percent = 100 * thread.work / thread.alive unkn_percent = 100 - 100 * (thread.work + thread.wait) / thread.alive print '%13s\t%6.2g\t' % (thread.thread_name, thread.alive), print '%.3g\t%.3g\t%.3g\t' % (wait_percent, work_percent, unkn_percent), print '%.3g\t' % (float(thread.memory) / (1024 * 1024)), print '%.3g\t' % (float(thread.peak_memory) / (1024 * 1024)) memory = 0 peak_memory = 0 for event in all_events: if event.memory: memory += event.size if memory > peak_memory: peak_memory = memory print 'peak memory = %.3g MB' % (float(peak_memory) / (1024 * 1024)) if memory != 0: print 'leak! final memory = %.3g MB' % (float(memory) / (1024 * 1024)) # do two gates overlap? def is_overlap(events, gate_name1, gate_name2): for event1 in events: if event1.gate_name != gate_name1: continue for event2 in events: if event2.gate_name != gate_name2: continue # events are sorted by start time, so if we've gone past event1's # stop time, we can give up if event2.start > event1.stop: break # ... or if we're before event1's start if event2.stop < event1.start: continue # if either endpoint of 1 is within 2 if event1.start > event2.start and event1.stop < event2.stop: return True if event1.stop > event2.start and event1.stop < event2.stop: return True return False # allocate a y position for each gate total_y = 0 for thread in threads: if thread.hide: continue thread.total_y = total_y n_thread_events = len(thread.events) if n_thread_events == 0: continue # first pass .. move work and wait events to y == 0 print 'positioning work/wait/mem ...' i = 0 gate_positions = {} for event in thread.events: i += 1 if i % (1 + n_thread_events / 100) == 0: print '%d%% complete \r' % (100 * i / n_thread_events), if not event.work and not event.wait and not event.memory: continue # works and waits must not overlap if event.work or event.wait: if not event.gate_name in gate_positions: for gate_name in gate_positions: if is_overlap(thread.events, event.gate_name, gate_name): print 'gate overlap on thread', thread.thread_name print '\t', event.gate_location print '\t', event.gate_name print '\t', gate_name break gate_positions[event.gate_name] = 0 event.y = 0 event.total_y = total_y # second pass: move all other events to non-overlapping ys print 'finding maximal sets of non-overlapping gates ...' y = 1 i = 0 for event in thread.events: i += 1 if i % (1 + n_thread_events / 100) == 0: print '%d%% complete \r' % (100 * i / n_thread_events), if event.work or event.wait or event.memory: continue if not event.gate_name in gate_positions: # look at all the ys we've allocated previously and see if we can # add this gate to one of them for gate_y in range(1, y): found_overlap = False for gate_name in gate_positions: if gate_positions[gate_name] != gate_y: continue if is_overlap(thread.events, event.gate_name, gate_name): found_overlap = True break if not found_overlap: gate_positions[event.gate_name] = gate_y break # failure? add a new y if not event.gate_name in gate_positions: gate_positions[event.gate_name] = y y += 1 event.y = gate_positions[event.gate_name] # third pass: flip the order of the ys to get the lowest-level ones at the # top, next to the wait/work line print 'ordering timelines by granularity ...' for event in thread.events: if event.work or event.wait or event.memory: continue event.y = y - event.y event.total_y = total_y + event.y total_y += y PIXELS_PER_SECOND = 1000 PIXELS_PER_GATE = 20 LEFT_BORDER = 130 BAR_HEIGHT = 5 MEM_HEIGHT = 100 WIDTH = int(LEFT_BORDER + last_time * PIXELS_PER_SECOND) + 20 HEIGHT = int(total_y * PIXELS_PER_GATE) + MEM_HEIGHT + 30 output_filename = "vips-profile.svg" print 'writing to', output_filename surface = cairo.SVGSurface(output_filename, WIDTH, HEIGHT) ctx = cairo.Context(surface) ctx.select_font_face('Sans') ctx.set_font_size(15) ctx.rectangle(0, 0, WIDTH, HEIGHT) ctx.set_source_rgba(0.0, 0.0, 0.3, 1.0) ctx.fill() def draw_event(ctx, event): left = event.start * PIXELS_PER_SECOND + LEFT_BORDER top = event.total_y * PIXELS_PER_GATE + BAR_HEIGHT / 2 width = (event.stop - event.start) * PIXELS_PER_SECOND height = BAR_HEIGHT if event.memory: width = 1 height /= 2 top += BAR_HEIGHT ctx.rectangle(left, top, width, height) if event.wait: ctx.set_source_rgb(0.9, 0.1, 0.1) elif event.work: ctx.set_source_rgb(0.1, 0.9, 0.1) elif event.memory: ctx.set_source_rgb(1.0, 1.0, 1.0) else: ctx.set_source_rgb(0.1, 0.1, 0.9) ctx.fill() if not event.wait and not event.work and not event.memory: xbearing, ybearing, twidth, theight, xadvance, yadvance = \ ctx.text_extents(event.gate_name) ctx.move_to(left + width / 2 - twidth / 2, top + 3 * BAR_HEIGHT) ctx.set_source_rgb(1.00, 0.83, 0.00) ctx.show_text(event.gate_name) for thread in threads: if thread.hide: continue ctx.rectangle(0, thread.total_y * PIXELS_PER_GATE, WIDTH, 1) ctx.set_source_rgb(1.00, 1.00, 1.00) ctx.fill() xbearing, ybearing, twidth, theight, xadvance, yadvance = \ ctx.text_extents(thread.thread_name) ctx.move_to(0, theight + thread.total_y * PIXELS_PER_GATE + BAR_HEIGHT / 2) ctx.set_source_rgb(1.00, 1.00, 1.00) ctx.show_text(thread.thread_name) for event in thread.events: draw_event(ctx, event) memory_y = total_y * PIXELS_PER_GATE label = "memory" xbearing, ybearing, twidth, theight, xadvance, yadvance = \ ctx.text_extents(label) ctx.move_to(0, memory_y + theight + 8) ctx.set_source_rgb(1.00, 1.00, 1.00) ctx.show_text(label) memory = 0 ctx.move_to(LEFT_BORDER, memory_y + MEM_HEIGHT) for event in all_events: if event.memory: memory += event.size left = LEFT_BORDER + event.start * PIXELS_PER_SECOND top = memory_y + MEM_HEIGHT - (MEM_HEIGHT * memory / peak_memory) ctx.line_to(left, top) ctx.set_line_width(1) ctx.set_source_rgb(1.00, 1.00, 1.00) ctx.stroke() axis_y = total_y * PIXELS_PER_GATE + MEM_HEIGHT ctx.rectangle(LEFT_BORDER, axis_y, last_time * PIXELS_PER_SECOND, 1) ctx.set_source_rgb(1.00, 1.00, 1.00) ctx.fill() label = "time" xbearing, ybearing, twidth, theight, xadvance, yadvance = \ ctx.text_extents(label) ctx.move_to(0, axis_y + theight + 8) ctx.set_source_rgb(1.00, 1.00, 1.00) ctx.show_text(label) for t in range(0, int(last_time * PIXELS_PER_SECOND), PIXELS_PER_SECOND / 10): left = t + LEFT_BORDER top = axis_y ctx.rectangle(left, top, 1, 5) ctx.set_source_rgb(1.00, 1.00, 1.00) ctx.fill() label = str(float(t) / PIXELS_PER_SECOND) xbearing, ybearing, twidth, theight, xadvance, yadvance = \ ctx.text_extents(label) ctx.move_to(left - twidth / 2, top + theight + 8) ctx.set_source_rgb(1.00, 1.00, 1.00) ctx.show_text(label) surface.finish()