vipsprofile graphs memory use
marks malloc/free as well
This commit is contained in:
parent
466f25cf28
commit
b3d31ced79
@ -23,7 +23,7 @@
|
|||||||
- support XYZ as a PCS for vips_icc_import() and vips_icc_export()
|
- support XYZ as a PCS for vips_icc_import() and vips_icc_export()
|
||||||
- add --strip option to jpegsave
|
- add --strip option to jpegsave
|
||||||
- added vips_gaussblur() convenience function
|
- added vips_gaussblur() convenience function
|
||||||
- added --vips-profile, records and dumps thread timing info
|
- added --vips-profile, records and dumps thread timing and memory use info
|
||||||
- added vipsprofile, visualises --vips-profile output
|
- added vipsprofile, visualises --vips-profile output
|
||||||
- auto-vectorization-friendly inner loops
|
- auto-vectorization-friendly inner loops
|
||||||
|
|
||||||
|
@ -95,11 +95,14 @@ vips_thread_gate_block_save( VipsThreadGateBlock *block, FILE *fp )
|
|||||||
static void
|
static void
|
||||||
vips_thread_profile_save_gate( VipsThreadGate *gate, FILE *fp )
|
vips_thread_profile_save_gate( VipsThreadGate *gate, FILE *fp )
|
||||||
{
|
{
|
||||||
fprintf( fp, "gate: %s\n", gate->name );
|
if( gate->start->i ||
|
||||||
fprintf( fp, "start:\n" );
|
gate->start->prev ) {
|
||||||
vips_thread_gate_block_save( gate->start, fp );
|
fprintf( fp, "gate: %s\n", gate->name );
|
||||||
fprintf( fp, "stop:\n" );
|
fprintf( fp, "start:\n" );
|
||||||
vips_thread_gate_block_save( gate->stop, fp );
|
vips_thread_gate_block_save( gate->start, fp );
|
||||||
|
fprintf( fp, "stop:\n" );
|
||||||
|
vips_thread_gate_block_save( gate->stop, fp );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
@ -28,7 +28,7 @@ def read_times(rf):
|
|||||||
times = []
|
times = []
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
match = re.match('[0-9]+ ', rf.line)
|
match = re.match('[+-]?[0-9]+ ', rf.line)
|
||||||
if not match:
|
if not match:
|
||||||
break
|
break
|
||||||
times += [int(x) for x in re.split(' ', rf.line.rstrip())]
|
times += [int(x) for x in re.split(' ', rf.line.rstrip())]
|
||||||
@ -50,22 +50,34 @@ class Thread:
|
|||||||
self.events = []
|
self.events = []
|
||||||
Thread.thread_number += 1
|
Thread.thread_number += 1
|
||||||
|
|
||||||
|
all_events = []
|
||||||
|
|
||||||
class Event:
|
class Event:
|
||||||
def __init__(self, thread, gate_location, gate_name, start, stop):
|
def __init__(self, thread, gate_location, gate_name, start, stop):
|
||||||
self.thread = thread
|
self.thread = thread
|
||||||
self.gate_location = gate_location
|
self.gate_location = gate_location
|
||||||
self.gate_name = gate_name
|
self.gate_name = gate_name
|
||||||
self.start = start
|
|
||||||
self.stop = stop
|
|
||||||
|
|
||||||
self.work = False
|
self.work = False
|
||||||
self.wait = False
|
self.wait = False
|
||||||
if re.match('.*work.*', gate_name):
|
self.memory = False
|
||||||
|
if gate_location == "memory":
|
||||||
|
self.memory = True
|
||||||
|
elif re.match('.*work.*', gate_name):
|
||||||
self.work = True
|
self.work = True
|
||||||
if re.match('.*wait.*', gate_name):
|
elif re.match('.*wait.*', gate_name):
|
||||||
self.wait = True
|
self.wait = True
|
||||||
|
|
||||||
|
if self.memory:
|
||||||
|
self.start = start
|
||||||
|
self.stop = start
|
||||||
|
self.size = stop
|
||||||
|
else:
|
||||||
|
self.start = start
|
||||||
|
self.stop = stop
|
||||||
|
|
||||||
thread.events.append(self)
|
thread.events.append(self)
|
||||||
|
all_events.append(self)
|
||||||
|
|
||||||
input_filename = 'vips-profile.txt'
|
input_filename = 'vips-profile.txt'
|
||||||
|
|
||||||
@ -92,11 +104,11 @@ with ReadFile(input_filename) as rf:
|
|||||||
rf.getnext()
|
rf.getnext()
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
match = re.match('gate: (.*?): (.*)', rf.line)
|
match = re.match('^gate: (.*?)(: (.*))?$', rf.line)
|
||||||
if not match:
|
if not match:
|
||||||
break
|
break
|
||||||
gate_location = match.group(1)
|
gate_location = match.group(1)
|
||||||
gate_name = match.group(2)
|
gate_name = match.group(3)
|
||||||
rf.getnext()
|
rf.getnext()
|
||||||
|
|
||||||
match = re.match('start:', rf.line)
|
match = re.match('start:', rf.line)
|
||||||
@ -123,6 +135,8 @@ with ReadFile(input_filename) as rf:
|
|||||||
for thread in threads:
|
for thread in threads:
|
||||||
thread.events.sort(lambda x, y: cmp(x.start, y.start))
|
thread.events.sort(lambda x, y: cmp(x.start, y.start))
|
||||||
|
|
||||||
|
all_events.sort(lambda x, y: cmp(x.start, y.start))
|
||||||
|
|
||||||
print 'loaded %d events' % n_events
|
print 'loaded %d events' % n_events
|
||||||
|
|
||||||
# move time axis to secs of computation
|
# move time axis to secs of computation
|
||||||
@ -152,6 +166,8 @@ for thread in threads:
|
|||||||
thread.stop = 0
|
thread.stop = 0
|
||||||
thread.wait = 0
|
thread.wait = 0
|
||||||
thread.work = 0
|
thread.work = 0
|
||||||
|
thread.memory = 0
|
||||||
|
thread.peak_memory = 0
|
||||||
for event in thread.events:
|
for event in thread.events:
|
||||||
if event.start < thread.start:
|
if event.start < thread.start:
|
||||||
thread.start = event.start
|
thread.start = event.start
|
||||||
@ -161,13 +177,17 @@ for thread in threads:
|
|||||||
thread.wait += event.stop - event.start
|
thread.wait += event.stop - event.start
|
||||||
if event.work:
|
if event.work:
|
||||||
thread.work += event.stop - event.start
|
thread.work += event.stop - event.start
|
||||||
|
if event.memory:
|
||||||
|
thread.memory += event.size
|
||||||
|
if thread.memory > thread.peak_memory:
|
||||||
|
thread.peak_memory = thread.memory
|
||||||
|
|
||||||
thread.alive = thread.stop - thread.start
|
thread.alive = thread.stop - thread.start
|
||||||
|
|
||||||
# hide very short-lived threads
|
# hide very short-lived threads
|
||||||
thread.hide = thread.alive < 0.01
|
thread.hide = thread.alive < 0.01
|
||||||
|
|
||||||
print 'name\t\talive\twait%\twork%\tunknown%'
|
print 'name\t\talive\twait%\twork%\tunkn%\tmemory\tpeakm'
|
||||||
for thread in threads:
|
for thread in threads:
|
||||||
if thread.hide:
|
if thread.hide:
|
||||||
continue
|
continue
|
||||||
@ -177,7 +197,21 @@ for thread in threads:
|
|||||||
unkn_percent = 100 - 100 * (thread.work + thread.wait) / thread.alive
|
unkn_percent = 100 - 100 * (thread.work + thread.wait) / thread.alive
|
||||||
|
|
||||||
print '%13s\t%6.2g\t' % (thread.thread_name, thread.alive),
|
print '%13s\t%6.2g\t' % (thread.thread_name, thread.alive),
|
||||||
print '%.3g\t%.3g\t%.3g' % (wait_percent, work_percent, unkn_percent)
|
print '%.3g\t%.3g\t%.3g\t' % (wait_percent, work_percent, unkn_percent),
|
||||||
|
print '%.3g\t' % (float(thread.memory) / (1024 * 1024)),
|
||||||
|
print '%.3g\t' % (float(thread.peak_memory) / (1024 * 1024))
|
||||||
|
|
||||||
|
memory = 0
|
||||||
|
peak_memory = 0
|
||||||
|
for event in all_events:
|
||||||
|
if event.memory:
|
||||||
|
memory += event.size
|
||||||
|
if memory > peak_memory:
|
||||||
|
peak_memory = memory
|
||||||
|
|
||||||
|
print 'peak memory = %.3g MB' % (float(peak_memory) / (1024 * 1024))
|
||||||
|
if memory != 0:
|
||||||
|
print 'leak! final memory = %.3g MB' % (float(memory) / (1024 * 1024))
|
||||||
|
|
||||||
# do two gates overlap?
|
# do two gates overlap?
|
||||||
def is_overlap(events, gate_name1, gate_name2):
|
def is_overlap(events, gate_name1, gate_name2):
|
||||||
@ -208,28 +242,28 @@ for thread in threads:
|
|||||||
# first pass .. move work and wait events to y == 0
|
# first pass .. move work and wait events to y == 0
|
||||||
gate_positions = {}
|
gate_positions = {}
|
||||||
for event in thread.events:
|
for event in thread.events:
|
||||||
if not event.work and not event.wait:
|
if not event.work and not event.wait and not event.memory:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# no works and waits must overlap
|
# no works and waits must overlap
|
||||||
if not event.gate_name in gate_positions:
|
if event.work or event.wait:
|
||||||
for gate_name in gate_positions:
|
if not event.gate_name in gate_positions:
|
||||||
if is_overlap(thread.events, event.gate_name, gate_name):
|
for gate_name in gate_positions:
|
||||||
print 'gate overlap on thread', thread.thread_name
|
if is_overlap(thread.events, event.gate_name, gate_name):
|
||||||
print '\t', event.gate_location
|
print 'gate overlap on thread', thread.thread_name
|
||||||
print '\t', event.gate_name
|
print '\t', event.gate_location
|
||||||
print '\t', gate_name
|
print '\t', event.gate_name
|
||||||
break
|
print '\t', gate_name
|
||||||
|
break
|
||||||
|
|
||||||
gate_positions[event.gate_name] = 0
|
gate_positions[event.gate_name] = 0
|
||||||
|
event.y = 0
|
||||||
event.y = gate_positions[event.gate_name]
|
event.total_y = total_y
|
||||||
event.total_y = total_y + event.y
|
|
||||||
|
|
||||||
# second pass: move all other events to non-overlapping ys
|
# second pass: move all other events to non-overlapping ys
|
||||||
y = 1
|
y = 1
|
||||||
for event in thread.events:
|
for event in thread.events:
|
||||||
if event.work or event.wait:
|
if event.work or event.wait or event.memory:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if not event.gate_name in gate_positions:
|
if not event.gate_name in gate_positions:
|
||||||
@ -259,7 +293,7 @@ for thread in threads:
|
|||||||
# third pass: flip the order of the ys to get the lowest-level ones at the
|
# third pass: flip the order of the ys to get the lowest-level ones at the
|
||||||
# top, next to the wait/work line
|
# top, next to the wait/work line
|
||||||
for event in thread.events:
|
for event in thread.events:
|
||||||
if event.work or event.wait:
|
if event.work or event.wait or event.memory:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
event.y = y - event.y
|
event.y = y - event.y
|
||||||
@ -271,8 +305,9 @@ PIXELS_PER_SECOND = 1000
|
|||||||
PIXELS_PER_GATE = 20
|
PIXELS_PER_GATE = 20
|
||||||
LEFT_BORDER = 130
|
LEFT_BORDER = 130
|
||||||
BAR_HEIGHT = 5
|
BAR_HEIGHT = 5
|
||||||
|
MEM_HEIGHT = 100
|
||||||
WIDTH = int(LEFT_BORDER + last_time * PIXELS_PER_SECOND) + 20
|
WIDTH = int(LEFT_BORDER + last_time * PIXELS_PER_SECOND) + 20
|
||||||
HEIGHT = int((total_y + 1) * PIXELS_PER_GATE)
|
HEIGHT = int(total_y * PIXELS_PER_GATE) + MEM_HEIGHT + 30
|
||||||
|
|
||||||
output_filename = "vips-profile.svg"
|
output_filename = "vips-profile.svg"
|
||||||
print 'writing to', output_filename
|
print 'writing to', output_filename
|
||||||
@ -293,18 +328,25 @@ def draw_event(ctx, event):
|
|||||||
width = (event.stop - event.start) * PIXELS_PER_SECOND
|
width = (event.stop - event.start) * PIXELS_PER_SECOND
|
||||||
height = BAR_HEIGHT
|
height = BAR_HEIGHT
|
||||||
|
|
||||||
|
if event.memory:
|
||||||
|
width = 1
|
||||||
|
height /= 2
|
||||||
|
top += BAR_HEIGHT
|
||||||
|
|
||||||
ctx.rectangle(left, top, width, height)
|
ctx.rectangle(left, top, width, height)
|
||||||
|
|
||||||
if event.wait:
|
if event.wait:
|
||||||
ctx.set_source_rgb(0.9, 0.1, 0.1)
|
ctx.set_source_rgb(0.9, 0.1, 0.1)
|
||||||
elif event.work:
|
elif event.work:
|
||||||
ctx.set_source_rgb(0.1, 0.9, 0.1)
|
ctx.set_source_rgb(0.1, 0.9, 0.1)
|
||||||
|
elif event.memory:
|
||||||
|
ctx.set_source_rgb(1.0, 1.0, 1.0)
|
||||||
else:
|
else:
|
||||||
ctx.set_source_rgb(0.1, 0.1, 0.9)
|
ctx.set_source_rgb(0.1, 0.1, 0.9)
|
||||||
|
|
||||||
ctx.fill()
|
ctx.fill()
|
||||||
|
|
||||||
if not event.wait and not event.work:
|
if not event.wait and not event.work and not event.memory:
|
||||||
xbearing, ybearing, twidth, theight, xadvance, yadvance = \
|
xbearing, ybearing, twidth, theight, xadvance, yadvance = \
|
||||||
ctx.text_extents(event.gate_name)
|
ctx.text_extents(event.gate_name)
|
||||||
ctx.move_to(left + width / 2 - twidth / 2, top + 3 * BAR_HEIGHT)
|
ctx.move_to(left + width / 2 - twidth / 2, top + 3 * BAR_HEIGHT)
|
||||||
@ -328,21 +370,37 @@ for thread in threads:
|
|||||||
for event in thread.events:
|
for event in thread.events:
|
||||||
draw_event(ctx, event)
|
draw_event(ctx, event)
|
||||||
|
|
||||||
ctx.rectangle(LEFT_BORDER, total_y * PIXELS_PER_GATE,
|
memory = 0
|
||||||
last_time * PIXELS_PER_SECOND, 1)
|
for event in all_events:
|
||||||
|
if event.memory:
|
||||||
|
memory += event.size
|
||||||
|
|
||||||
|
left = LEFT_BORDER + event.start * PIXELS_PER_SECOND
|
||||||
|
top = total_y * PIXELS_PER_GATE + MEM_HEIGHT
|
||||||
|
top -= MEM_HEIGHT * memory / peak_memory
|
||||||
|
width = 1
|
||||||
|
height = 1
|
||||||
|
|
||||||
|
ctx.rectangle(left, top, width, height)
|
||||||
|
ctx.set_source_rgb(1.00, 1.00, 1.00)
|
||||||
|
ctx.fill()
|
||||||
|
|
||||||
|
axis_y = total_y * PIXELS_PER_GATE + MEM_HEIGHT
|
||||||
|
|
||||||
|
ctx.rectangle(LEFT_BORDER, axis_y, last_time * PIXELS_PER_SECOND, 1)
|
||||||
ctx.set_source_rgb(1.00, 1.00, 1.00)
|
ctx.set_source_rgb(1.00, 1.00, 1.00)
|
||||||
ctx.fill()
|
ctx.fill()
|
||||||
|
|
||||||
label = "time"
|
label = "time"
|
||||||
xbearing, ybearing, twidth, theight, xadvance, yadvance = \
|
xbearing, ybearing, twidth, theight, xadvance, yadvance = \
|
||||||
ctx.text_extents(label)
|
ctx.text_extents(label)
|
||||||
ctx.move_to(0, total_y * PIXELS_PER_GATE + theight + 8)
|
ctx.move_to(0, axis_y + theight + 8)
|
||||||
ctx.set_source_rgb(1.00, 1.00, 1.00)
|
ctx.set_source_rgb(1.00, 1.00, 1.00)
|
||||||
ctx.show_text(label)
|
ctx.show_text(label)
|
||||||
|
|
||||||
for t in range(0, int(last_time * PIXELS_PER_SECOND), PIXELS_PER_SECOND / 10):
|
for t in range(0, int(last_time * PIXELS_PER_SECOND), PIXELS_PER_SECOND / 10):
|
||||||
left = t + LEFT_BORDER
|
left = t + LEFT_BORDER
|
||||||
top = total_y * PIXELS_PER_GATE
|
top = axis_y
|
||||||
|
|
||||||
ctx.rectangle(left, top, 1, 5)
|
ctx.rectangle(left, top, 1, 5)
|
||||||
ctx.set_source_rgb(1.00, 1.00, 1.00)
|
ctx.set_source_rgb(1.00, 1.00, 1.00)
|
||||||
|
Loading…
Reference in New Issue
Block a user