2c5ee332f0
so now recycle lists are short, scale with pipeline complexity, and buffers are always appropriately sized for the image instead of being slowly sized up to the max size for the pipeline before: $ vips sharpen k2.jpg x.jpg --radius 20 memory: high-water mark 38.99 MB after: $ vips sharpen k2.jpg x.jpg --radius 20 memory: high-water mark 29.46 MB
453 lines
13 KiB
Python
453 lines
13 KiB
Python
#!/usr/bin/python
|
|
|
|
import re
|
|
import math
|
|
import cairo
|
|
|
|
class ReadFile:
|
|
def __init__(self, filename):
|
|
self.filename = filename
|
|
|
|
def __enter__(self):
|
|
self.f = open(self.filename, 'r')
|
|
self.lineno = 0
|
|
self.getnext();
|
|
return self
|
|
|
|
def __exit__(self, type, value, traceback):
|
|
self.f.close()
|
|
|
|
def __nonzero__(self):
|
|
return self.line != ""
|
|
|
|
def getnext(self):
|
|
self.lineno += 1
|
|
self.line = self.f.readline()
|
|
|
|
def read_times(rf):
|
|
times = []
|
|
|
|
while True:
|
|
match = re.match('[+-]?[0-9]+ ', rf.line)
|
|
if not match:
|
|
break
|
|
times += [int(x) for x in re.split(' ', rf.line.rstrip())]
|
|
rf.getnext()
|
|
|
|
return times[::-1]
|
|
|
|
class Thread:
|
|
thread_number = 0
|
|
|
|
def __init__(self, thread_name):
|
|
# no one cares about the thread address
|
|
match = re.match('(.*) \(0x.*?\) (.*)', thread_name)
|
|
if match:
|
|
thread_name = match.group(1) + " " + match.group(2)
|
|
|
|
self.thread_name = thread_name
|
|
self.thread_number = Thread.thread_number
|
|
self.events = []
|
|
Thread.thread_number += 1
|
|
|
|
all_events = []
|
|
|
|
class Event:
|
|
def __init__(self, thread, gate_location, gate_name, start, stop):
|
|
self.thread = thread
|
|
self.gate_location = gate_location
|
|
self.gate_name = gate_name
|
|
|
|
self.work = False
|
|
self.wait = False
|
|
self.memory = False
|
|
if gate_location == "memory":
|
|
self.memory = True
|
|
elif re.match('.*work.*', gate_name):
|
|
self.work = True
|
|
elif re.match('.*wait.*', gate_name):
|
|
self.wait = True
|
|
|
|
if self.memory:
|
|
self.start = start
|
|
self.stop = start
|
|
self.size = stop
|
|
else:
|
|
self.start = start
|
|
self.stop = stop
|
|
|
|
thread.events.append(self)
|
|
all_events.append(self)
|
|
|
|
input_filename = 'vips-profile.txt'
|
|
|
|
thread_id = 0
|
|
threads = []
|
|
n_events = 0
|
|
print 'reading from', input_filename
|
|
with ReadFile(input_filename) as rf:
|
|
while rf:
|
|
if rf.line.rstrip() == "":
|
|
rf.getnext()
|
|
continue
|
|
if rf.line[0] == "#":
|
|
rf.getnext()
|
|
continue
|
|
|
|
match = re.match('thread: (.*)', rf.line)
|
|
if not match:
|
|
print 'parse error line %d, expected "thread"' % rf.lineno
|
|
thread_name = match.group(1) + " " + str(thread_id)
|
|
thread_id += 1
|
|
thread = Thread(thread_name)
|
|
threads.append(thread)
|
|
rf.getnext()
|
|
|
|
while True:
|
|
match = re.match('^gate: (.*?)(: (.*))?$', rf.line)
|
|
if not match:
|
|
break
|
|
gate_location = match.group(1)
|
|
gate_name = match.group(3)
|
|
rf.getnext()
|
|
|
|
match = re.match('start:', rf.line)
|
|
if not match:
|
|
continue
|
|
rf.getnext()
|
|
|
|
start = read_times(rf)
|
|
|
|
match = re.match('stop:', rf.line)
|
|
if not match:
|
|
continue
|
|
rf.getnext()
|
|
|
|
stop = read_times(rf)
|
|
|
|
if len(start) != len(stop):
|
|
print 'start and stop length mismatch'
|
|
|
|
for a, b in zip(start, stop):
|
|
Event(thread, gate_location, gate_name, a, b)
|
|
n_events += 1
|
|
|
|
for thread in threads:
|
|
thread.events.sort(lambda x, y: cmp(x.start, y.start))
|
|
|
|
all_events.sort(lambda x, y: cmp(x.start, y.start))
|
|
|
|
print 'loaded %d events' % n_events
|
|
|
|
# move time axis to secs of computation
|
|
ticks_per_sec = 1000000.0
|
|
first_time = threads[0].events[0].start
|
|
last_time = 0
|
|
for thread in threads:
|
|
for event in thread.events:
|
|
if event.start < first_time:
|
|
first_time = event.start
|
|
if event.stop > last_time:
|
|
last_time = event.stop
|
|
|
|
for thread in threads:
|
|
for event in thread.events:
|
|
event.start = (event.start - first_time) / ticks_per_sec
|
|
event.stop = (event.stop - first_time) / ticks_per_sec
|
|
|
|
last_time = (last_time - first_time) / ticks_per_sec
|
|
first_time = 0
|
|
|
|
print 'last time =', last_time
|
|
|
|
# calculate some simple stats
|
|
for thread in threads:
|
|
thread.start = last_time
|
|
thread.stop = 0
|
|
thread.wait = 0
|
|
thread.work = 0
|
|
thread.memory = 0
|
|
thread.peak_memory = 0
|
|
for event in thread.events:
|
|
if event.start < thread.start:
|
|
thread.start = event.start
|
|
if event.stop > thread.stop:
|
|
thread.stop = event.stop
|
|
if event.wait:
|
|
thread.wait += event.stop - event.start
|
|
if event.work:
|
|
thread.work += event.stop - event.start
|
|
if event.memory:
|
|
thread.memory += event.size
|
|
if thread.memory > thread.peak_memory:
|
|
thread.peak_memory = thread.memory
|
|
|
|
thread.alive = thread.stop - thread.start
|
|
|
|
# hide very short-lived threads
|
|
thread.hide = thread.alive < 0.01
|
|
|
|
print 'name\t\talive\twait%\twork%\tunkn%\tmemory\tpeakm'
|
|
for thread in threads:
|
|
if thread.hide:
|
|
continue
|
|
|
|
wait_percent = 100 * thread.wait / thread.alive
|
|
work_percent = 100 * thread.work / thread.alive
|
|
unkn_percent = 100 - 100 * (thread.work + thread.wait) / thread.alive
|
|
|
|
print '%13s\t%6.2g\t' % (thread.thread_name, thread.alive),
|
|
print '%.3g\t%.3g\t%.3g\t' % (wait_percent, work_percent, unkn_percent),
|
|
print '%.3g\t' % (float(thread.memory) / (1024 * 1024)),
|
|
print '%.3g\t' % (float(thread.peak_memory) / (1024 * 1024))
|
|
|
|
memory = 0
|
|
peak_memory = 0
|
|
for event in all_events:
|
|
if event.memory:
|
|
memory += event.size
|
|
if memory > peak_memory:
|
|
peak_memory = memory
|
|
|
|
print 'peak memory = %.3g MB' % (float(peak_memory) / (1024 * 1024))
|
|
if memory != 0:
|
|
print 'leak! final memory = %.3g MB' % (float(memory) / (1024 * 1024))
|
|
|
|
# do two gates overlap?
|
|
def is_overlap(events, gate_name1, gate_name2):
|
|
for event1 in events:
|
|
if event1.gate_name != gate_name1:
|
|
continue
|
|
|
|
for event2 in events:
|
|
if event2.gate_name != gate_name2:
|
|
continue
|
|
|
|
# events are sorted by start time, so if we've gone past event1's
|
|
# stop time, we can give up
|
|
if event2.start > event1.stop:
|
|
break
|
|
|
|
# ... or if we're before event1's start
|
|
if event2.stop < event1.start:
|
|
continue
|
|
|
|
# if either endpoint of 1 is within 2
|
|
if event1.start > event2.start and event1.stop < event2.stop:
|
|
return True
|
|
if event1.stop > event2.start and event1.stop < event2.stop:
|
|
return True
|
|
|
|
return False
|
|
|
|
# allocate a y position for each gate
|
|
total_y = 0
|
|
for thread in threads:
|
|
if thread.hide:
|
|
continue
|
|
|
|
thread.total_y = total_y
|
|
|
|
n_thread_events = len(thread.events)
|
|
if n_thread_events == 0:
|
|
continue
|
|
|
|
# first pass .. move work and wait events to y == 0
|
|
print 'positioning work/wait/mem ...'
|
|
i = 0
|
|
gate_positions = {}
|
|
for event in thread.events:
|
|
i += 1
|
|
if i % (1 + n_thread_events / 100) == 0:
|
|
print '%d%% complete \r' % (100 * i / n_thread_events),
|
|
|
|
if not event.work and not event.wait and not event.memory:
|
|
continue
|
|
|
|
# works and waits must not overlap
|
|
if event.work or event.wait:
|
|
if not event.gate_name in gate_positions:
|
|
for gate_name in gate_positions:
|
|
if is_overlap(thread.events, event.gate_name, gate_name):
|
|
print 'gate overlap on thread', thread.thread_name
|
|
print '\t', event.gate_location
|
|
print '\t', event.gate_name
|
|
print '\t', gate_name
|
|
break
|
|
|
|
gate_positions[event.gate_name] = 0
|
|
event.y = 0
|
|
event.total_y = total_y
|
|
|
|
# second pass: move all other events to non-overlapping ys
|
|
print 'finding maximal sets of non-overlapping gates ...'
|
|
y = 1
|
|
i = 0
|
|
for event in thread.events:
|
|
i += 1
|
|
if i % (1 + n_thread_events / 100) == 0:
|
|
print '%d%% complete \r' % (100 * i / n_thread_events),
|
|
|
|
if event.work or event.wait or event.memory:
|
|
continue
|
|
|
|
if not event.gate_name in gate_positions:
|
|
# look at all the ys we've allocated previously and see if we can
|
|
# add this gate to one of them
|
|
for gate_y in range(1, y):
|
|
found_overlap = False
|
|
for gate_name in gate_positions:
|
|
if gate_positions[gate_name] != gate_y:
|
|
continue
|
|
|
|
if is_overlap(thread.events, event.gate_name, gate_name):
|
|
found_overlap = True
|
|
break
|
|
|
|
if not found_overlap:
|
|
gate_positions[event.gate_name] = gate_y
|
|
break
|
|
|
|
# failure? add a new y
|
|
if not event.gate_name in gate_positions:
|
|
gate_positions[event.gate_name] = y
|
|
y += 1
|
|
|
|
event.y = gate_positions[event.gate_name]
|
|
|
|
# third pass: flip the order of the ys to get the lowest-level ones at the
|
|
# top, next to the wait/work line
|
|
print 'ordering timelines by granularity ...'
|
|
for event in thread.events:
|
|
if event.work or event.wait or event.memory:
|
|
continue
|
|
|
|
event.y = y - event.y
|
|
event.total_y = total_y + event.y
|
|
|
|
total_y += y
|
|
|
|
PIXELS_PER_SECOND = 1000
|
|
PIXELS_PER_GATE = 20
|
|
LEFT_BORDER = 130
|
|
BAR_HEIGHT = 5
|
|
MEM_HEIGHT = 100
|
|
WIDTH = int(LEFT_BORDER + last_time * PIXELS_PER_SECOND) + 20
|
|
HEIGHT = int(total_y * PIXELS_PER_GATE) + MEM_HEIGHT + 30
|
|
|
|
output_filename = "vips-profile.svg"
|
|
print 'writing to', output_filename
|
|
|
|
surface = cairo.SVGSurface(output_filename, WIDTH, HEIGHT)
|
|
|
|
ctx = cairo.Context(surface)
|
|
ctx.select_font_face('Sans')
|
|
ctx.set_font_size(15)
|
|
|
|
ctx.rectangle(0, 0, WIDTH, HEIGHT)
|
|
ctx.set_source_rgba(0.0, 0.0, 0.3, 1.0)
|
|
ctx.fill()
|
|
|
|
def draw_event(ctx, event):
|
|
left = event.start * PIXELS_PER_SECOND + LEFT_BORDER
|
|
top = event.total_y * PIXELS_PER_GATE + BAR_HEIGHT / 2
|
|
width = (event.stop - event.start) * PIXELS_PER_SECOND
|
|
height = BAR_HEIGHT
|
|
|
|
if event.memory:
|
|
width = 1
|
|
height /= 2
|
|
top += BAR_HEIGHT
|
|
|
|
ctx.rectangle(left, top, width, height)
|
|
|
|
if event.wait:
|
|
ctx.set_source_rgb(0.9, 0.1, 0.1)
|
|
elif event.work:
|
|
ctx.set_source_rgb(0.1, 0.9, 0.1)
|
|
elif event.memory:
|
|
ctx.set_source_rgb(1.0, 1.0, 1.0)
|
|
else:
|
|
ctx.set_source_rgb(0.1, 0.1, 0.9)
|
|
|
|
ctx.fill()
|
|
|
|
if not event.wait and not event.work and not event.memory:
|
|
xbearing, ybearing, twidth, theight, xadvance, yadvance = \
|
|
ctx.text_extents(event.gate_name)
|
|
ctx.move_to(left + width / 2 - twidth / 2, top + 3 * BAR_HEIGHT)
|
|
ctx.set_source_rgb(1.00, 0.83, 0.00)
|
|
ctx.show_text(event.gate_name)
|
|
|
|
for thread in threads:
|
|
if thread.hide:
|
|
continue
|
|
|
|
ctx.rectangle(0, thread.total_y * PIXELS_PER_GATE, WIDTH, 1)
|
|
ctx.set_source_rgb(1.00, 1.00, 1.00)
|
|
ctx.fill()
|
|
|
|
xbearing, ybearing, twidth, theight, xadvance, yadvance = \
|
|
ctx.text_extents(thread.thread_name)
|
|
ctx.move_to(0, theight + thread.total_y * PIXELS_PER_GATE + BAR_HEIGHT / 2)
|
|
ctx.set_source_rgb(1.00, 1.00, 1.00)
|
|
ctx.show_text(thread.thread_name)
|
|
|
|
for event in thread.events:
|
|
draw_event(ctx, event)
|
|
|
|
memory_y = total_y * PIXELS_PER_GATE
|
|
|
|
label = "memory"
|
|
xbearing, ybearing, twidth, theight, xadvance, yadvance = \
|
|
ctx.text_extents(label)
|
|
ctx.move_to(0, memory_y + theight + 8)
|
|
ctx.set_source_rgb(1.00, 1.00, 1.00)
|
|
ctx.show_text(label)
|
|
|
|
memory = 0
|
|
ctx.move_to(LEFT_BORDER, memory_y + MEM_HEIGHT)
|
|
|
|
for event in all_events:
|
|
if event.memory:
|
|
memory += event.size
|
|
|
|
left = LEFT_BORDER + event.start * PIXELS_PER_SECOND
|
|
top = memory_y + MEM_HEIGHT - (MEM_HEIGHT * memory / peak_memory)
|
|
|
|
ctx.line_to(left, top)
|
|
|
|
ctx.set_line_width(1)
|
|
ctx.set_source_rgb(1.00, 1.00, 1.00)
|
|
ctx.stroke()
|
|
|
|
axis_y = total_y * PIXELS_PER_GATE + MEM_HEIGHT
|
|
|
|
ctx.rectangle(LEFT_BORDER, axis_y, last_time * PIXELS_PER_SECOND, 1)
|
|
ctx.set_source_rgb(1.00, 1.00, 1.00)
|
|
ctx.fill()
|
|
|
|
label = "time"
|
|
xbearing, ybearing, twidth, theight, xadvance, yadvance = \
|
|
ctx.text_extents(label)
|
|
ctx.move_to(0, axis_y + theight + 8)
|
|
ctx.set_source_rgb(1.00, 1.00, 1.00)
|
|
ctx.show_text(label)
|
|
|
|
for t in range(0, int(last_time * PIXELS_PER_SECOND), PIXELS_PER_SECOND / 10):
|
|
left = t + LEFT_BORDER
|
|
top = axis_y
|
|
|
|
ctx.rectangle(left, top, 1, 5)
|
|
ctx.set_source_rgb(1.00, 1.00, 1.00)
|
|
ctx.fill()
|
|
|
|
label = str(float(t) / PIXELS_PER_SECOND)
|
|
xbearing, ybearing, twidth, theight, xadvance, yadvance = \
|
|
ctx.text_extents(label)
|
|
ctx.move_to(left - twidth / 2, top + theight + 8)
|
|
ctx.set_source_rgb(1.00, 1.00, 1.00)
|
|
ctx.show_text(label)
|
|
|
|
surface.finish()
|