444 lines
12 KiB
Python
444 lines
12 KiB
Python
#!/usr/bin/python
|
|
|
|
import re
|
|
import math
|
|
import cairo
|
|
|
|
class ReadFile:
|
|
def __init__(self, filename):
|
|
self.filename = filename
|
|
|
|
def __enter__(self):
|
|
self.f = open(self.filename, 'r')
|
|
self.lineno = 0
|
|
self.getnext();
|
|
return self
|
|
|
|
def __exit__(self, type, value, traceback):
|
|
self.f.close()
|
|
|
|
def __nonzero__(self):
|
|
return self.line != ""
|
|
|
|
def getnext(self):
|
|
self.lineno += 1
|
|
self.line = self.f.readline()
|
|
|
|
def read_times(rf):
|
|
times = []
|
|
|
|
while True:
|
|
match = re.match('[+-]?[0-9]+ ', rf.line)
|
|
if not match:
|
|
break
|
|
times += [int(x) for x in re.split(' ', rf.line.rstrip())]
|
|
rf.getnext()
|
|
|
|
return times[::-1]
|
|
|
|
class Thread:
|
|
thread_number = 0
|
|
|
|
def __init__(self, thread_name):
|
|
# no one cares about the thread address
|
|
match = re.match('(.*) \(0x.*?\) (.*)', thread_name)
|
|
if match:
|
|
thread_name = match.group(1) + " " + match.group(2)
|
|
|
|
self.thread_name = thread_name
|
|
self.thread_number = Thread.thread_number
|
|
self.events = []
|
|
Thread.thread_number += 1
|
|
|
|
all_events = []
|
|
|
|
class Event:
|
|
def __init__(self, thread, gate_location, gate_name, start, stop):
|
|
self.thread = thread
|
|
self.gate_location = gate_location
|
|
self.gate_name = gate_name
|
|
|
|
self.work = False
|
|
self.wait = False
|
|
self.memory = False
|
|
if gate_location == "memory":
|
|
self.memory = True
|
|
elif re.match('.*work.*', gate_name):
|
|
self.work = True
|
|
elif re.match('.*wait.*', gate_name):
|
|
self.wait = True
|
|
|
|
if self.memory:
|
|
self.start = start
|
|
self.stop = start
|
|
self.size = stop
|
|
else:
|
|
self.start = start
|
|
self.stop = stop
|
|
|
|
thread.events.append(self)
|
|
all_events.append(self)
|
|
|
|
input_filename = 'vips-profile.txt'
|
|
|
|
thread_id = 0
|
|
threads = []
|
|
n_events = 0
|
|
print 'reading from', input_filename
|
|
with ReadFile(input_filename) as rf:
|
|
while rf:
|
|
if rf.line.rstrip() == "":
|
|
rf.getnext()
|
|
continue
|
|
if rf.line[0] == "#":
|
|
rf.getnext()
|
|
continue
|
|
|
|
match = re.match('thread: (.*)', rf.line)
|
|
if not match:
|
|
print 'parse error line %d, expected "thread"' % rf.lineno
|
|
thread_name = match.group(1) + " " + str(thread_id)
|
|
thread_id += 1
|
|
thread = Thread(thread_name)
|
|
threads.append(thread)
|
|
rf.getnext()
|
|
|
|
while True:
|
|
match = re.match('^gate: (.*?)(: (.*))?$', rf.line)
|
|
if not match:
|
|
break
|
|
gate_location = match.group(1)
|
|
gate_name = match.group(3)
|
|
rf.getnext()
|
|
|
|
match = re.match('start:', rf.line)
|
|
if not match:
|
|
continue
|
|
rf.getnext()
|
|
|
|
start = read_times(rf)
|
|
|
|
match = re.match('stop:', rf.line)
|
|
if not match:
|
|
continue
|
|
rf.getnext()
|
|
|
|
stop = read_times(rf)
|
|
|
|
if len(start) != len(stop):
|
|
print 'start and stop length mismatch'
|
|
|
|
for a, b in zip(start, stop):
|
|
Event(thread, gate_location, gate_name, a, b)
|
|
n_events += 1
|
|
|
|
for thread in threads:
|
|
thread.events.sort(lambda x, y: cmp(x.start, y.start))
|
|
|
|
all_events.sort(lambda x, y: cmp(x.start, y.start))
|
|
|
|
print 'loaded %d events' % n_events
|
|
|
|
# move time axis to secs of computation
|
|
ticks_per_sec = 1000000.0
|
|
first_time = threads[0].events[0].start
|
|
last_time = 0
|
|
for thread in threads:
|
|
for event in thread.events:
|
|
if event.start < first_time:
|
|
first_time = event.start
|
|
if event.stop > last_time:
|
|
last_time = event.stop
|
|
|
|
for thread in threads:
|
|
for event in thread.events:
|
|
event.start = (event.start - first_time) / ticks_per_sec
|
|
event.stop = (event.stop - first_time) / ticks_per_sec
|
|
|
|
last_time = (last_time - first_time) / ticks_per_sec
|
|
first_time = 0
|
|
|
|
print 'last time =', last_time
|
|
|
|
# calculate some simple stats
|
|
for thread in threads:
|
|
thread.start = last_time
|
|
thread.stop = 0
|
|
thread.wait = 0
|
|
thread.work = 0
|
|
thread.memory = 0
|
|
thread.peak_memory = 0
|
|
for event in thread.events:
|
|
if event.start < thread.start:
|
|
thread.start = event.start
|
|
if event.stop > thread.stop:
|
|
thread.stop = event.stop
|
|
if event.wait:
|
|
thread.wait += event.stop - event.start
|
|
if event.work:
|
|
thread.work += event.stop - event.start
|
|
if event.memory:
|
|
thread.memory += event.size
|
|
if thread.memory > thread.peak_memory:
|
|
thread.peak_memory = thread.memory
|
|
|
|
thread.alive = thread.stop - thread.start
|
|
|
|
# hide very short-lived threads
|
|
thread.hide = thread.alive < 0.01
|
|
|
|
print 'name\t\talive\twait%\twork%\tunkn%\tmemory\tpeakm'
|
|
for thread in threads:
|
|
if thread.hide:
|
|
continue
|
|
|
|
wait_percent = 100 * thread.wait / thread.alive
|
|
work_percent = 100 * thread.work / thread.alive
|
|
unkn_percent = 100 - 100 * (thread.work + thread.wait) / thread.alive
|
|
|
|
print '%13s\t%6.2g\t' % (thread.thread_name, thread.alive),
|
|
print '%.3g\t%.3g\t%.3g\t' % (wait_percent, work_percent, unkn_percent),
|
|
print '%.3g\t' % (float(thread.memory) / (1024 * 1024)),
|
|
print '%.3g\t' % (float(thread.peak_memory) / (1024 * 1024))
|
|
|
|
memory = 0
|
|
peak_memory = 0
|
|
for event in all_events:
|
|
if event.memory:
|
|
memory += event.size
|
|
if memory > peak_memory:
|
|
peak_memory = memory
|
|
|
|
print 'peak memory = %.3g MB' % (float(peak_memory) / (1024 * 1024))
|
|
if memory != 0:
|
|
print 'leak! final memory = %.3g MB' % (float(memory) / (1024 * 1024))
|
|
|
|
# do two gates overlap?
|
|
def is_overlap(events, gate_name1, gate_name2):
|
|
for event1 in events:
|
|
if event1.gate_name != gate_name1:
|
|
continue
|
|
|
|
for event2 in events:
|
|
if event2.gate_name != gate_name2:
|
|
continue
|
|
|
|
# if either endpoint of 1 is within 2
|
|
if event1.start > event2.start and event1.stop < event2.stop:
|
|
return True
|
|
if event1.stop > event2.start and event1.stop < event2.stop:
|
|
return True
|
|
|
|
return False
|
|
|
|
# allocate a y position for each gate
|
|
total_y = 0
|
|
for thread in threads:
|
|
if thread.hide:
|
|
continue
|
|
|
|
thread.total_y = total_y
|
|
|
|
n_thread_events = len(thread.events)
|
|
if n_thread_events == 0:
|
|
continue
|
|
|
|
# first pass .. move work and wait events to y == 0
|
|
print 'positioning work/wait/mem ...'
|
|
i = 0
|
|
gate_positions = {}
|
|
for event in thread.events:
|
|
i += 1
|
|
if i % (1 + n_thread_events / 100) == 0:
|
|
print '%d%% complete \r' % (100 * i / n_thread_events),
|
|
|
|
if not event.work and not event.wait and not event.memory:
|
|
continue
|
|
|
|
# works and waits must not overlap
|
|
if event.work or event.wait:
|
|
if not event.gate_name in gate_positions:
|
|
for gate_name in gate_positions:
|
|
if is_overlap(thread.events, event.gate_name, gate_name):
|
|
print 'gate overlap on thread', thread.thread_name
|
|
print '\t', event.gate_location
|
|
print '\t', event.gate_name
|
|
print '\t', gate_name
|
|
break
|
|
|
|
gate_positions[event.gate_name] = 0
|
|
event.y = 0
|
|
event.total_y = total_y
|
|
|
|
# second pass: move all other events to non-overlapping ys
|
|
print 'finding maximal sets of non-overlapping gates ...'
|
|
y = 1
|
|
i = 0
|
|
for event in thread.events:
|
|
i += 1
|
|
if i % (1 + n_thread_events / 100) == 0:
|
|
print '%d%% complete \r' % (100 * i / n_thread_events),
|
|
|
|
if event.work or event.wait or event.memory:
|
|
continue
|
|
|
|
if not event.gate_name in gate_positions:
|
|
# look at all the ys we've allocated previously and see if we can
|
|
# add this gate to one of them
|
|
for gate_y in range(1, y):
|
|
found_overlap = False
|
|
for gate_name in gate_positions:
|
|
if gate_positions[gate_name] != gate_y:
|
|
continue
|
|
|
|
if is_overlap(thread.events, event.gate_name, gate_name):
|
|
found_overlap = True
|
|
break
|
|
|
|
if not found_overlap:
|
|
gate_positions[event.gate_name] = gate_y
|
|
break
|
|
|
|
# failure? add a new y
|
|
if not event.gate_name in gate_positions:
|
|
gate_positions[event.gate_name] = y
|
|
y += 1
|
|
|
|
event.y = gate_positions[event.gate_name]
|
|
|
|
# third pass: flip the order of the ys to get the lowest-level ones at the
|
|
# top, next to the wait/work line
|
|
print 'ordering timelines by granularity ...'
|
|
for event in thread.events:
|
|
if event.work or event.wait or event.memory:
|
|
continue
|
|
|
|
event.y = y - event.y
|
|
event.total_y = total_y + event.y
|
|
|
|
total_y += y
|
|
|
|
PIXELS_PER_SECOND = 1000
|
|
PIXELS_PER_GATE = 20
|
|
LEFT_BORDER = 130
|
|
BAR_HEIGHT = 5
|
|
MEM_HEIGHT = 100
|
|
WIDTH = int(LEFT_BORDER + last_time * PIXELS_PER_SECOND) + 20
|
|
HEIGHT = int(total_y * PIXELS_PER_GATE) + MEM_HEIGHT + 30
|
|
|
|
output_filename = "vips-profile.svg"
|
|
print 'writing to', output_filename
|
|
|
|
surface = cairo.SVGSurface(output_filename, WIDTH, HEIGHT)
|
|
|
|
ctx = cairo.Context(surface)
|
|
ctx.select_font_face('Sans')
|
|
ctx.set_font_size(15)
|
|
|
|
ctx.rectangle(0, 0, WIDTH, HEIGHT)
|
|
ctx.set_source_rgba(0.0, 0.0, 0.3, 1.0)
|
|
ctx.fill()
|
|
|
|
def draw_event(ctx, event):
|
|
left = event.start * PIXELS_PER_SECOND + LEFT_BORDER
|
|
top = event.total_y * PIXELS_PER_GATE + BAR_HEIGHT / 2
|
|
width = (event.stop - event.start) * PIXELS_PER_SECOND
|
|
height = BAR_HEIGHT
|
|
|
|
if event.memory:
|
|
width = 1
|
|
height /= 2
|
|
top += BAR_HEIGHT
|
|
|
|
ctx.rectangle(left, top, width, height)
|
|
|
|
if event.wait:
|
|
ctx.set_source_rgb(0.9, 0.1, 0.1)
|
|
elif event.work:
|
|
ctx.set_source_rgb(0.1, 0.9, 0.1)
|
|
elif event.memory:
|
|
ctx.set_source_rgb(1.0, 1.0, 1.0)
|
|
else:
|
|
ctx.set_source_rgb(0.1, 0.1, 0.9)
|
|
|
|
ctx.fill()
|
|
|
|
if not event.wait and not event.work and not event.memory:
|
|
xbearing, ybearing, twidth, theight, xadvance, yadvance = \
|
|
ctx.text_extents(event.gate_name)
|
|
ctx.move_to(left + width / 2 - twidth / 2, top + 3 * BAR_HEIGHT)
|
|
ctx.set_source_rgb(1.00, 0.83, 0.00)
|
|
ctx.show_text(event.gate_name)
|
|
|
|
for thread in threads:
|
|
if thread.hide:
|
|
continue
|
|
|
|
ctx.rectangle(0, thread.total_y * PIXELS_PER_GATE, WIDTH, 1)
|
|
ctx.set_source_rgb(1.00, 1.00, 1.00)
|
|
ctx.fill()
|
|
|
|
xbearing, ybearing, twidth, theight, xadvance, yadvance = \
|
|
ctx.text_extents(thread.thread_name)
|
|
ctx.move_to(0, theight + thread.total_y * PIXELS_PER_GATE + BAR_HEIGHT / 2)
|
|
ctx.set_source_rgb(1.00, 1.00, 1.00)
|
|
ctx.show_text(thread.thread_name)
|
|
|
|
for event in thread.events:
|
|
draw_event(ctx, event)
|
|
|
|
memory_y = total_y * PIXELS_PER_GATE
|
|
|
|
label = "memory"
|
|
xbearing, ybearing, twidth, theight, xadvance, yadvance = \
|
|
ctx.text_extents(label)
|
|
ctx.move_to(0, memory_y + theight + 8)
|
|
ctx.set_source_rgb(1.00, 1.00, 1.00)
|
|
ctx.show_text(label)
|
|
|
|
memory = 0
|
|
ctx.move_to(LEFT_BORDER, memory_y + MEM_HEIGHT)
|
|
|
|
for event in all_events:
|
|
if event.memory:
|
|
memory += event.size
|
|
|
|
left = LEFT_BORDER + event.start * PIXELS_PER_SECOND
|
|
top = memory_y + MEM_HEIGHT - (MEM_HEIGHT * memory / peak_memory)
|
|
|
|
ctx.line_to(left, top)
|
|
|
|
ctx.set_line_width(1)
|
|
ctx.set_source_rgb(1.00, 1.00, 1.00)
|
|
ctx.stroke()
|
|
|
|
axis_y = total_y * PIXELS_PER_GATE + MEM_HEIGHT
|
|
|
|
ctx.rectangle(LEFT_BORDER, axis_y, last_time * PIXELS_PER_SECOND, 1)
|
|
ctx.set_source_rgb(1.00, 1.00, 1.00)
|
|
ctx.fill()
|
|
|
|
label = "time"
|
|
xbearing, ybearing, twidth, theight, xadvance, yadvance = \
|
|
ctx.text_extents(label)
|
|
ctx.move_to(0, axis_y + theight + 8)
|
|
ctx.set_source_rgb(1.00, 1.00, 1.00)
|
|
ctx.show_text(label)
|
|
|
|
for t in range(0, int(last_time * PIXELS_PER_SECOND), PIXELS_PER_SECOND / 10):
|
|
left = t + LEFT_BORDER
|
|
top = axis_y
|
|
|
|
ctx.rectangle(left, top, 1, 5)
|
|
ctx.set_source_rgb(1.00, 1.00, 1.00)
|
|
ctx.fill()
|
|
|
|
label = str(float(t) / PIXELS_PER_SECOND)
|
|
xbearing, ybearing, twidth, theight, xadvance, yadvance = \
|
|
ctx.text_extents(label)
|
|
ctx.move_to(left - twidth / 2, top + theight + 8)
|
|
ctx.set_source_rgb(1.00, 1.00, 1.00)
|
|
ctx.show_text(label)
|
|
|
|
surface.finish()
|