diff --git a/TODO b/TODO index 76edded6..f8ea7a4a 100644 --- a/TODO +++ b/TODO @@ -1,8 +1,31 @@ +- vipsprofile reports a leak, strangely + +- vipsprofile performance is very poor for large data sets, eg. + + time vips sharpen wtc.jpg x.jpg --radius 20 --vips-profile + recording profile in vips-profile.txt + real 0m14.728s + user 0m55.515s + sys 0m0.200s + john@bambam ~/pics $ vipsprofile + reading from vips-profile.txt + loaded 157716 events + last time = 14.584175 + name alive wait% work% unkn% memory peakm + worker 20 14 2.22 95.8 2 22.7 22.7 + worker 21 14 2.67 95.4 1.93 8.5 8.5 + worker 22 14 2.95 95.2 1.84 17.8 17.8 + worker 23 14 2.44 95.5 2.1 11.4 11.4 + wbuffer 24 15 96 4.02 0.000654 0 0 + wbuffer 25 15 95.4 4.62 0.000696 0 0 + main 26 15 99.1 0 0.923 -37.4 6.87 + peak memory = 67.3 MB + leak! final memory = 23 MB + positioning work/wait/mem ... + 0% complete - vipsprofile needs a man page for Debian, I guess -- vipsprofile reports a leak, strangely - - new_heart.ws fails with libvips master has the sharing fix resolved this? diff --git a/tools/vipsprofile b/tools/vipsprofile index f58e730e..08ad62a3 100644 --- a/tools/vipsprofile +++ b/tools/vipsprofile @@ -239,13 +239,24 @@ for thread in threads: thread.total_y = total_y + n_thread_events = len(thread.events) + if n_thread_events == 0: + continue + # first pass .. move work and wait events to y == 0 + print 'positioning work/wait/mem ...' + i = 0 gate_positions = {} for event in thread.events: + i += 1 + if i % (1 + n_thread_events / 100) == 0: + print '%d%% complete \r' % (100 * i / n_thread_events), + print '%d%% complete \r' % (100 * i / n_thread_events), + if not event.work and not event.wait and not event.memory: continue - # no works and waits must overlap + # works and waits must not overlap if event.work or event.wait: if not event.gate_name in gate_positions: for gate_name in gate_positions: @@ -261,8 +272,14 @@ for thread in threads: event.total_y = total_y # second pass: move all other events to non-overlapping ys + print 'finding maximal sets of non-overlapping gates ...' y = 1 + i = 0 for event in thread.events: + i += 1 + if i % (1 + n_thread_events / 100) == 0: + print '%d%% complete \r' % (100 * i / n_thread_events), + if event.work or event.wait or event.memory: continue @@ -292,6 +309,7 @@ for thread in threads: # third pass: flip the order of the ys to get the lowest-level ones at the # top, next to the wait/work line + print 'ordering timelines by granularity ...' for event in thread.events: if event.work or event.wait or event.memory: continue