diff --git a/TODO b/TODO index 6b0b20e3..34f8d88d 100644 --- a/TODO +++ b/TODO @@ -1,11 +1,39 @@ -- overlap doesn't seem to work - -- check operations called in sharpen, we seem to have some gaps, looking at the - trace - try disabling turbo-boost? -- calculate unk% from total time - (work time + wait time) + see http://blog.tube42.se/?p=1225 + + for i in {0..11}; do sudo cpufreq-set -f 1600000 -c $i; done + +-r doesn't seem to work + + verify with + + cpufreq-info | more + + with cpu locked at 1.6 GHz + +$ time vips sharpen k2.v x.v --radius 20 --vips-concurrency=1 +real 0m2.232s +user 0m2.212s +sys 0m0.036s +$ time vips sharpen k2.v x.v --radius 20 --vips-concurrency=2 +real 0m1.295s +user 0m2.448s +sys 0m0.060s + + +$ time vips sharpen k2.jpg x.jpg --radius 20 --vips-concurrency=1 +real 0m2.760s +user 0m2.768s +sys 0m0.024s +$ time vips sharpen k2.jpg x.jpg --radius 20 --vips-concurrency=2 +real 0m1.558s +user 0m2.988s +sys 0m0.060s + + + diff --git a/libvips/foreign/tiff2vips.c b/libvips/foreign/tiff2vips.c index 04dfa92e..4e5c3e0e 100644 --- a/libvips/foreign/tiff2vips.c +++ b/libvips/foreign/tiff2vips.c @@ -1083,12 +1083,18 @@ tiff_fill_region_aligned( VipsRegion *out, void *seq, void *a, void *b ) r->left, r->top ); #endif /*DEBUG*/ + VIPS_GATE_START( "tiff_fill_region_aligned: work" ); + /* Read that tile directly into the vips tile. */ if( TIFFReadTile( rtiff->tiff, VIPS_REGION_ADDR( out, r->left, r->top ), - r->left, r->top, 0, 0 ) < 0 ) + r->left, r->top, 0, 0 ) < 0 ) { + VIPS_GATE_STOP( "tiff_fill_region_aligned: work" ); return( -1 ); + } + + VIPS_GATE_STOP( "tiff_fill_region_aligned: work" ); return( 0 ); } @@ -1132,6 +1138,8 @@ tiff_fill_region( VipsRegion *out, void *seq, void *a, void *b, gboolean *stop ) VIPS_REGION_LSKIP( out ) == VIPS_REGION_SIZEOF_LINE( out ) ) return( tiff_fill_region_aligned( out, seq, a, b ) ); + VIPS_GATE_START( "tiff_fill_region: work" ); + for( y = ys; y < VIPS_RECT_BOTTOM( r ); y += rtiff->theight ) for( x = xs; x < VIPS_RECT_RIGHT( r ); x += rtiff->twidth ) { VipsRect tile; @@ -1139,8 +1147,10 @@ tiff_fill_region( VipsRegion *out, void *seq, void *a, void *b, gboolean *stop ) /* Read that tile. */ - if( TIFFReadTile( rtiff->tiff, buf, x, y, 0, 0 ) < 0 ) + if( TIFFReadTile( rtiff->tiff, buf, x, y, 0, 0 ) < 0 ) { + VIPS_GATE_STOP( "tiff_fill_region: work" ); return( -1 ); + } /* The tile we read. */ @@ -1168,6 +1178,8 @@ tiff_fill_region( VipsRegion *out, void *seq, void *a, void *b, gboolean *stop ) } } + VIPS_GATE_STOP( "tiff_fill_region: work" ); + return( 0 ); } @@ -1353,6 +1365,8 @@ tiff2vips_stripwise_generate( VipsRegion *or, g_assert( r->height == VIPS_MIN( rtiff->rows_per_strip, or->im->Ysize - r->top ) ); + VIPS_GATE_START( "tiff2vips_stripwise_generate: work" ); + for( y = 0; y < r->height; y += rtiff->rows_per_strip ) { tdata_t dst; @@ -1364,8 +1378,11 @@ tiff2vips_stripwise_generate( VipsRegion *or, else dst = rtiff->contig_buf; - if( tiff2vips_strip_read_interleaved( rtiff, r->top + y, dst ) ) + if( tiff2vips_strip_read_interleaved( rtiff, + r->top + y, dst ) ) { + VIPS_GATE_STOP( "tiff2vips_stripwise_generate: work" ); return( -1 ); + } /* If necessary, unpack to destination. */ @@ -1389,6 +1406,8 @@ tiff2vips_stripwise_generate( VipsRegion *or, } } + VIPS_GATE_STOP( "tiff2vips_stripwise_generate: work" ); + return( 0 ); } diff --git a/libvips/resample/affine.c b/libvips/resample/affine.c index 6d07d1d2..61494661 100644 --- a/libvips/resample/affine.c +++ b/libvips/resample/affine.c @@ -300,6 +300,8 @@ vips_affine_gen( VipsRegion *or, void *seq, void *a, void *b, gboolean *stop ) clipped.height ); #endif /*DEBUG*/ + VIPS_GATE_START( "vips_affine_gen: work" ); + /* Resample! x/y loop over pixels in the output image (5). */ for( y = to; y < bo; y++ ) { @@ -366,6 +368,8 @@ vips_affine_gen( VipsRegion *or, void *seq, void *a, void *b, gboolean *stop ) } } + VIPS_GATE_STOP( "vips_affine_gen: work" ); + return( 0 ); } diff --git a/libvips/resample/shrink.c b/libvips/resample/shrink.c index 14ee61a1..5bc996c5 100644 --- a/libvips/resample/shrink.c +++ b/libvips/resample/shrink.c @@ -294,9 +294,13 @@ vips_shrink_gen( VipsRegion *or, void *vseq, void *a, void *b, gboolean *stop ) if( vips_region_prepare( ir, &s ) ) return( -1 ); + VIPS_GATE_START( "vips_shrink_gen: work" ); + vips_shrink_gen2( shrink, seq, or, ir, r->left, r->top + y, r->width, height ); + + VIPS_GATE_STOP( "vips_shrink_gen: work" ); } return( 0 ); diff --git a/tools/vipsprofile b/tools/vipsprofile index 3e1f55e5..9d95c7ce 100755 --- a/tools/vipsprofile +++ b/tools/vipsprofile @@ -40,6 +40,11 @@ class Thread: thread_number = 0 def __init__(self, thread_name): + # no one cares about the thread address + match = re.match('(.*) \(0x.*?\) (.*)', thread_name) + if match: + thread_name = match.group(1) + " " + match.group(2) + self.thread_name = thread_name self.thread_number = Thread.thread_number self.events = [] @@ -133,27 +138,37 @@ for thread in threads: print 'last time =', last_time # calculate some simple stats -print 'name\t\t\t\talive\twait%\twork%\tunknown%' for thread in threads: - start = last_time - stop = 0 - wait = 0 - work = 0 + thread.start = last_time + thread.stop = 0 + thread.wait = 0 + thread.work = 0 for event in thread.events: - if event.start < start: - start = event.start - if event.stop > stop: - stop = event.stop + if event.start < thread.start: + thread.start = event.start + if event.stop > thread.stop: + thread.stop = event.stop if event.wait: - wait += event.stop - event.start + thread.wait += event.stop - event.start if event.work: - work += event.stop - event.start + thread.work += event.stop - event.start - alive = stop - start - wait_percent = 100 * wait / alive - work_percent = 100 * work / alive - unkn_percent = 100 - 100 * (work + wait) / alive - print '%30s\t%6.2g\t%.3g\t%.3g\t%.3g' % (thread.thread_name, alive, wait_percent, work_percent, unkn_percent) + thread.alive = thread.stop - thread.start + + # hide very short-lived threads + thread.hide = thread.alive < 0.001 + +print 'name\t\talive\twait%\twork%\tunknown%' +for thread in threads: + if thread.hide: + continue + + wait_percent = 100 * thread.wait / thread.alive + work_percent = 100 * thread.work / thread.alive + unkn_percent = 100 - 100 * (thread.work + thread.wait) / thread.alive + + print '%13s\t%6.2g\t' % (thread.thread_name, thread.alive), + print '%.3g\t%.3g\t%.3g' % (wait_percent, work_percent, unkn_percent) # do two gates overlap? def is_overlap(events, gate_name1, gate_name2): @@ -176,6 +191,9 @@ def is_overlap(events, gate_name1, gate_name2): # allocate a y position for each gate total_y = 0 for thread in threads: + if thread.hide: + continue + thread.total_y = total_y # first pass .. move work and wait events to y == 0 @@ -239,9 +257,9 @@ for thread in threads: PIXELS_PER_SECOND = 1000 PIXELS_PER_GATE = 20 -LEFT_BORDER = 320 +LEFT_BORDER = 130 BAR_HEIGHT = 5 -WIDTH = int(LEFT_BORDER + last_time * PIXELS_PER_SECOND) + 50 +WIDTH = int(LEFT_BORDER + last_time * PIXELS_PER_SECOND) + 20 HEIGHT = int((total_y + 1) * PIXELS_PER_GATE) output_filename = "vips-profile.svg" @@ -260,7 +278,7 @@ ctx.fill() def draw_event(ctx, event): left = event.start * PIXELS_PER_SECOND + LEFT_BORDER top = event.total_y * PIXELS_PER_GATE + BAR_HEIGHT / 2 - width = (event.stop - event.start) * PIXELS_PER_SECOND - 1 + width = (event.stop - event.start) * PIXELS_PER_SECOND height = BAR_HEIGHT ctx.rectangle(left, top, width, height) @@ -283,13 +301,16 @@ def draw_event(ctx, event): ctx.show_text(event.gate_name) for thread in threads: + if thread.hide: + continue + ctx.rectangle(0, thread.total_y * PIXELS_PER_GATE, WIDTH, 1) ctx.set_source_rgb(1.00, 1.00, 1.00) ctx.fill() xbearing, ybearing, twidth, theight, xadvance, yadvance = \ ctx.text_extents(thread.thread_name) - ctx.move_to(0, theight + thread.total_y * PIXELS_PER_GATE) + ctx.move_to(0, theight + thread.total_y * PIXELS_PER_GATE + BAR_HEIGHT / 2) ctx.set_source_rgb(1.00, 1.00, 1.00) ctx.show_text(thread.thread_name)