vipsprofile speed problems fixed
moved to O(n) layout
This commit is contained in:
parent
2c5ee332f0
commit
a9f85e1fd9
24
TODO
24
TODO
@ -3,30 +3,6 @@
|
|||||||
|
|
||||||
- vipsprofile reports a leak, strangely
|
- vipsprofile reports a leak, strangely
|
||||||
|
|
||||||
- vipsprofile performance is very poor for large data sets, eg.
|
|
||||||
|
|
||||||
time vips sharpen wtc.jpg x.jpg --radius 20 --vips-profile
|
|
||||||
recording profile in vips-profile.txt
|
|
||||||
real 0m14.728s
|
|
||||||
user 0m55.515s
|
|
||||||
sys 0m0.200s
|
|
||||||
john@bambam ~/pics $ vipsprofile
|
|
||||||
reading from vips-profile.txt
|
|
||||||
loaded 157716 events
|
|
||||||
last time = 14.584175
|
|
||||||
name alive wait% work% unkn% memory peakm
|
|
||||||
worker 20 14 2.22 95.8 2 22.7 22.7
|
|
||||||
worker 21 14 2.67 95.4 1.93 8.5 8.5
|
|
||||||
worker 22 14 2.95 95.2 1.84 17.8 17.8
|
|
||||||
worker 23 14 2.44 95.5 2.1 11.4 11.4
|
|
||||||
wbuffer 24 15 96 4.02 0.000654 0 0
|
|
||||||
wbuffer 25 15 95.4 4.62 0.000696 0 0
|
|
||||||
main 26 15 99.1 0 0.923 -37.4 6.87
|
|
||||||
peak memory = 67.3 MB
|
|
||||||
leak! final memory = 23 MB
|
|
||||||
positioning work/wait/mem ...
|
|
||||||
0% complete
|
|
||||||
|
|
||||||
- vipsprofile needs a man page for Debian, I guess
|
- vipsprofile needs a man page for Debian, I guess
|
||||||
|
|
||||||
- new_heart.ws fails with libvips master
|
- new_heart.ws fails with libvips master
|
||||||
|
@ -48,6 +48,9 @@ class Thread:
|
|||||||
self.thread_name = thread_name
|
self.thread_name = thread_name
|
||||||
self.thread_number = Thread.thread_number
|
self.thread_number = Thread.thread_number
|
||||||
self.events = []
|
self.events = []
|
||||||
|
self.workwait = []
|
||||||
|
self.memory = []
|
||||||
|
self.other = []
|
||||||
Thread.thread_number += 1
|
Thread.thread_number += 1
|
||||||
|
|
||||||
all_events = []
|
all_events = []
|
||||||
@ -78,6 +81,12 @@ class Event:
|
|||||||
|
|
||||||
thread.events.append(self)
|
thread.events.append(self)
|
||||||
all_events.append(self)
|
all_events.append(self)
|
||||||
|
if self.wait or self.work:
|
||||||
|
thread.workwait.append(self)
|
||||||
|
elif self.memory:
|
||||||
|
thread.memory.append(self)
|
||||||
|
else:
|
||||||
|
thread.other.append(self)
|
||||||
|
|
||||||
input_filename = 'vips-profile.txt'
|
input_filename = 'vips-profile.txt'
|
||||||
|
|
||||||
@ -134,6 +143,9 @@ with ReadFile(input_filename) as rf:
|
|||||||
|
|
||||||
for thread in threads:
|
for thread in threads:
|
||||||
thread.events.sort(lambda x, y: cmp(x.start, y.start))
|
thread.events.sort(lambda x, y: cmp(x.start, y.start))
|
||||||
|
thread.workwait.sort(lambda x, y: cmp(x.start, y.start))
|
||||||
|
thread.memory.sort(lambda x, y: cmp(x.start, y.start))
|
||||||
|
thread.other.sort(lambda x, y: cmp(x.start, y.start))
|
||||||
|
|
||||||
all_events.sort(lambda x, y: cmp(x.start, y.start))
|
all_events.sort(lambda x, y: cmp(x.start, y.start))
|
||||||
|
|
||||||
@ -141,19 +153,17 @@ print 'loaded %d events' % n_events
|
|||||||
|
|
||||||
# move time axis to secs of computation
|
# move time axis to secs of computation
|
||||||
ticks_per_sec = 1000000.0
|
ticks_per_sec = 1000000.0
|
||||||
first_time = threads[0].events[0].start
|
first_time = all_events[0].start
|
||||||
last_time = 0
|
last_time = 0
|
||||||
for thread in threads:
|
for event in all_events:
|
||||||
for event in thread.events:
|
if event.start < first_time:
|
||||||
if event.start < first_time:
|
first_time = event.start
|
||||||
first_time = event.start
|
if event.stop > last_time:
|
||||||
if event.stop > last_time:
|
last_time = event.stop
|
||||||
last_time = event.stop
|
|
||||||
|
|
||||||
for thread in threads:
|
for event in all_events:
|
||||||
for event in thread.events:
|
event.start = (event.start - first_time) / ticks_per_sec
|
||||||
event.start = (event.start - first_time) / ticks_per_sec
|
event.stop = (event.stop - first_time) / ticks_per_sec
|
||||||
event.stop = (event.stop - first_time) / ticks_per_sec
|
|
||||||
|
|
||||||
last_time = (last_time - first_time) / ticks_per_sec
|
last_time = (last_time - first_time) / ticks_per_sec
|
||||||
first_time = 0
|
first_time = 0
|
||||||
@ -166,8 +176,8 @@ for thread in threads:
|
|||||||
thread.stop = 0
|
thread.stop = 0
|
||||||
thread.wait = 0
|
thread.wait = 0
|
||||||
thread.work = 0
|
thread.work = 0
|
||||||
thread.memory = 0
|
thread.mem = 0
|
||||||
thread.peak_memory = 0
|
thread.peak_mem = 0
|
||||||
for event in thread.events:
|
for event in thread.events:
|
||||||
if event.start < thread.start:
|
if event.start < thread.start:
|
||||||
thread.start = event.start
|
thread.start = event.start
|
||||||
@ -178,9 +188,9 @@ for thread in threads:
|
|||||||
if event.work:
|
if event.work:
|
||||||
thread.work += event.stop - event.start
|
thread.work += event.stop - event.start
|
||||||
if event.memory:
|
if event.memory:
|
||||||
thread.memory += event.size
|
thread.mem += event.size
|
||||||
if thread.memory > thread.peak_memory:
|
if thread.mem > thread.peak_mem:
|
||||||
thread.peak_memory = thread.memory
|
thread.peak_mem = thread.mem
|
||||||
|
|
||||||
thread.alive = thread.stop - thread.start
|
thread.alive = thread.stop - thread.start
|
||||||
|
|
||||||
@ -198,48 +208,47 @@ for thread in threads:
|
|||||||
|
|
||||||
print '%13s\t%6.2g\t' % (thread.thread_name, thread.alive),
|
print '%13s\t%6.2g\t' % (thread.thread_name, thread.alive),
|
||||||
print '%.3g\t%.3g\t%.3g\t' % (wait_percent, work_percent, unkn_percent),
|
print '%.3g\t%.3g\t%.3g\t' % (wait_percent, work_percent, unkn_percent),
|
||||||
print '%.3g\t' % (float(thread.memory) / (1024 * 1024)),
|
print '%.3g\t' % (float(thread.mem) / (1024 * 1024)),
|
||||||
print '%.3g\t' % (float(thread.peak_memory) / (1024 * 1024))
|
print '%.3g\t' % (float(thread.peak_mem) / (1024 * 1024))
|
||||||
|
|
||||||
memory = 0
|
mem = 0
|
||||||
peak_memory = 0
|
peak_mem = 0
|
||||||
for event in all_events:
|
for event in all_events:
|
||||||
if event.memory:
|
if event.memory:
|
||||||
memory += event.size
|
mem += event.size
|
||||||
if memory > peak_memory:
|
if mem > peak_mem:
|
||||||
peak_memory = memory
|
peak_mem = mem
|
||||||
|
|
||||||
print 'peak memory = %.3g MB' % (float(peak_memory) / (1024 * 1024))
|
print 'peak memory = %.3g MB' % (float(peak_mem) / (1024 * 1024))
|
||||||
if memory != 0:
|
if mem != 0:
|
||||||
print 'leak! final memory = %.3g MB' % (float(memory) / (1024 * 1024))
|
print 'leak! final memory = %.3g MB' % (float(mem) / (1024 * 1024))
|
||||||
|
|
||||||
# do two gates overlap?
|
# does a list of events contain an overlap?
|
||||||
def is_overlap(events, gate_name1, gate_name2):
|
# assume the list of events has been sorted by start time
|
||||||
for event1 in events:
|
def events_overlap(events):
|
||||||
if event1.gate_name != gate_name1:
|
for i in range(0, len(events) - 1):
|
||||||
|
# a length 0 event can't overlap with anything
|
||||||
|
if events[i].stop - events[i].start == 0:
|
||||||
continue
|
continue
|
||||||
|
if events[i + 1].stop - events[i + 1].start == 0:
|
||||||
for event2 in events:
|
continue
|
||||||
if event2.gate_name != gate_name2:
|
if events[i].stop > events[i + 1].start:
|
||||||
continue
|
return True
|
||||||
|
|
||||||
# events are sorted by start time, so if we've gone past event1's
|
|
||||||
# stop time, we can give up
|
|
||||||
if event2.start > event1.stop:
|
|
||||||
break
|
|
||||||
|
|
||||||
# ... or if we're before event1's start
|
|
||||||
if event2.stop < event1.start:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# if either endpoint of 1 is within 2
|
|
||||||
if event1.start > event2.start and event1.stop < event2.stop:
|
|
||||||
return True
|
|
||||||
if event1.stop > event2.start and event1.stop < event2.stop:
|
|
||||||
return True
|
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
# do the events on two gates overlap?
|
||||||
|
def gates_overlap(events, gate_name1, gate_name2):
|
||||||
|
merged = []
|
||||||
|
|
||||||
|
for event in events:
|
||||||
|
if event.gate_name == gate_name1 or event.gate_name == gate_name2:
|
||||||
|
merged.append(event)
|
||||||
|
|
||||||
|
merged.sort(lambda x, y: cmp(x.start, y.start))
|
||||||
|
|
||||||
|
return events_overlap(merged)
|
||||||
|
|
||||||
# allocate a y position for each gate
|
# allocate a y position for each gate
|
||||||
total_y = 0
|
total_y = 0
|
||||||
for thread in threads:
|
for thread in threads:
|
||||||
@ -248,49 +257,34 @@ for thread in threads:
|
|||||||
|
|
||||||
thread.total_y = total_y
|
thread.total_y = total_y
|
||||||
|
|
||||||
n_thread_events = len(thread.events)
|
gate_positions = {}
|
||||||
if n_thread_events == 0:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# first pass .. move work and wait events to y == 0
|
# first pass .. move work and wait events to y == 0
|
||||||
print 'positioning work/wait/mem ...'
|
if events_overlap(thread.workwait):
|
||||||
i = 0
|
print 'gate overlap on thread', thread.thread_name
|
||||||
gate_positions = {}
|
for i in range(0, len(thread.workwait) - 1):
|
||||||
for event in thread.events:
|
event1 = thread.workwait[i]
|
||||||
i += 1
|
event2 = thread.workwait[i + 1]
|
||||||
if i % (1 + n_thread_events / 100) == 0:
|
if event1.stop > event2.start:
|
||||||
print '%d%% complete \r' % (100 * i / n_thread_events),
|
print 'overlap:'
|
||||||
|
print 'event', event1.gate_location, event1.gate_name,
|
||||||
|
print 'starts at', event1.start, 'stops at', event1.stop
|
||||||
|
print 'event', event2.gate_location, event2.gate_name,
|
||||||
|
print 'starts at', event2.start, 'stops at', event2.stop
|
||||||
|
|
||||||
if not event.work and not event.wait and not event.memory:
|
for event in thread.workwait:
|
||||||
continue
|
gate_positions[event.gate_name] = 0
|
||||||
|
event.y = 0
|
||||||
# works and waits must not overlap
|
event.total_y = total_y
|
||||||
if event.work or event.wait:
|
|
||||||
if not event.gate_name in gate_positions:
|
|
||||||
for gate_name in gate_positions:
|
|
||||||
if is_overlap(thread.events, event.gate_name, gate_name):
|
|
||||||
print 'gate overlap on thread', thread.thread_name
|
|
||||||
print '\t', event.gate_location
|
|
||||||
print '\t', event.gate_name
|
|
||||||
print '\t', gate_name
|
|
||||||
break
|
|
||||||
|
|
||||||
|
for event in thread.memory:
|
||||||
gate_positions[event.gate_name] = 0
|
gate_positions[event.gate_name] = 0
|
||||||
event.y = 0
|
event.y = 0
|
||||||
event.total_y = total_y
|
event.total_y = total_y
|
||||||
|
|
||||||
# second pass: move all other events to non-overlapping ys
|
# second pass: move all other events to non-overlapping ys
|
||||||
print 'finding maximal sets of non-overlapping gates ...'
|
|
||||||
y = 1
|
y = 1
|
||||||
i = 0
|
for event in thread.other:
|
||||||
for event in thread.events:
|
|
||||||
i += 1
|
|
||||||
if i % (1 + n_thread_events / 100) == 0:
|
|
||||||
print '%d%% complete \r' % (100 * i / n_thread_events),
|
|
||||||
|
|
||||||
if event.work or event.wait or event.memory:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if not event.gate_name in gate_positions:
|
if not event.gate_name in gate_positions:
|
||||||
# look at all the ys we've allocated previously and see if we can
|
# look at all the ys we've allocated previously and see if we can
|
||||||
# add this gate to one of them
|
# add this gate to one of them
|
||||||
@ -300,7 +294,7 @@ for thread in threads:
|
|||||||
if gate_positions[gate_name] != gate_y:
|
if gate_positions[gate_name] != gate_y:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if is_overlap(thread.events, event.gate_name, gate_name):
|
if gates_overlap(thread.other, event.gate_name, gate_name):
|
||||||
found_overlap = True
|
found_overlap = True
|
||||||
break
|
break
|
||||||
|
|
||||||
@ -317,11 +311,7 @@ for thread in threads:
|
|||||||
|
|
||||||
# third pass: flip the order of the ys to get the lowest-level ones at the
|
# third pass: flip the order of the ys to get the lowest-level ones at the
|
||||||
# top, next to the wait/work line
|
# top, next to the wait/work line
|
||||||
print 'ordering timelines by granularity ...'
|
for event in thread.other:
|
||||||
for event in thread.events:
|
|
||||||
if event.work or event.wait or event.memory:
|
|
||||||
continue
|
|
||||||
|
|
||||||
event.y = y - event.y
|
event.y = y - event.y
|
||||||
event.total_y = total_y + event.y
|
event.total_y = total_y + event.y
|
||||||
|
|
||||||
@ -405,15 +395,15 @@ ctx.move_to(0, memory_y + theight + 8)
|
|||||||
ctx.set_source_rgb(1.00, 1.00, 1.00)
|
ctx.set_source_rgb(1.00, 1.00, 1.00)
|
||||||
ctx.show_text(label)
|
ctx.show_text(label)
|
||||||
|
|
||||||
memory = 0
|
mem = 0
|
||||||
ctx.move_to(LEFT_BORDER, memory_y + MEM_HEIGHT)
|
ctx.move_to(LEFT_BORDER, memory_y + MEM_HEIGHT)
|
||||||
|
|
||||||
for event in all_events:
|
for event in all_events:
|
||||||
if event.memory:
|
if event.memory:
|
||||||
memory += event.size
|
mem += event.size
|
||||||
|
|
||||||
left = LEFT_BORDER + event.start * PIXELS_PER_SECOND
|
left = LEFT_BORDER + event.start * PIXELS_PER_SECOND
|
||||||
top = memory_y + MEM_HEIGHT - (MEM_HEIGHT * memory / peak_memory)
|
top = memory_y + MEM_HEIGHT - (MEM_HEIGHT * mem / peak_mem)
|
||||||
|
|
||||||
ctx.line_to(left, top)
|
ctx.line_to(left, top)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user