more polishing

and instrument stuff for vipsthumbnail
This commit is contained in:
John Cupitt 2013-11-28 14:56:19 +00:00
parent a83b44b7a4
commit e0b9b3cd5c
5 changed files with 104 additions and 28 deletions

38
TODO
View File

@ -1,11 +1,39 @@
- overlap doesn't seem to work
- check operations called in sharpen, we seem to have some gaps, looking at the
trace
- try disabling turbo-boost? - try disabling turbo-boost?
- calculate unk% from total time - (work time + wait time) see http://blog.tube42.se/?p=1225
for i in {0..11}; do sudo cpufreq-set -f 1600000 -c $i; done
-r doesn't seem to work
verify with
cpufreq-info | more
with cpu locked at 1.6 GHz
$ time vips sharpen k2.v x.v --radius 20 --vips-concurrency=1
real 0m2.232s
user 0m2.212s
sys 0m0.036s
$ time vips sharpen k2.v x.v --radius 20 --vips-concurrency=2
real 0m1.295s
user 0m2.448s
sys 0m0.060s
$ time vips sharpen k2.jpg x.jpg --radius 20 --vips-concurrency=1
real 0m2.760s
user 0m2.768s
sys 0m0.024s
$ time vips sharpen k2.jpg x.jpg --radius 20 --vips-concurrency=2
real 0m1.558s
user 0m2.988s
sys 0m0.060s

View File

@ -1083,12 +1083,18 @@ tiff_fill_region_aligned( VipsRegion *out, void *seq, void *a, void *b )
r->left, r->top ); r->left, r->top );
#endif /*DEBUG*/ #endif /*DEBUG*/
VIPS_GATE_START( "tiff_fill_region_aligned: work" );
/* Read that tile directly into the vips tile. /* Read that tile directly into the vips tile.
*/ */
if( TIFFReadTile( rtiff->tiff, if( TIFFReadTile( rtiff->tiff,
VIPS_REGION_ADDR( out, r->left, r->top ), VIPS_REGION_ADDR( out, r->left, r->top ),
r->left, r->top, 0, 0 ) < 0 ) r->left, r->top, 0, 0 ) < 0 ) {
VIPS_GATE_STOP( "tiff_fill_region_aligned: work" );
return( -1 ); return( -1 );
}
VIPS_GATE_STOP( "tiff_fill_region_aligned: work" );
return( 0 ); return( 0 );
} }
@ -1132,6 +1138,8 @@ tiff_fill_region( VipsRegion *out, void *seq, void *a, void *b, gboolean *stop )
VIPS_REGION_LSKIP( out ) == VIPS_REGION_SIZEOF_LINE( out ) ) VIPS_REGION_LSKIP( out ) == VIPS_REGION_SIZEOF_LINE( out ) )
return( tiff_fill_region_aligned( out, seq, a, b ) ); return( tiff_fill_region_aligned( out, seq, a, b ) );
VIPS_GATE_START( "tiff_fill_region: work" );
for( y = ys; y < VIPS_RECT_BOTTOM( r ); y += rtiff->theight ) for( y = ys; y < VIPS_RECT_BOTTOM( r ); y += rtiff->theight )
for( x = xs; x < VIPS_RECT_RIGHT( r ); x += rtiff->twidth ) { for( x = xs; x < VIPS_RECT_RIGHT( r ); x += rtiff->twidth ) {
VipsRect tile; VipsRect tile;
@ -1139,8 +1147,10 @@ tiff_fill_region( VipsRegion *out, void *seq, void *a, void *b, gboolean *stop )
/* Read that tile. /* Read that tile.
*/ */
if( TIFFReadTile( rtiff->tiff, buf, x, y, 0, 0 ) < 0 ) if( TIFFReadTile( rtiff->tiff, buf, x, y, 0, 0 ) < 0 ) {
VIPS_GATE_STOP( "tiff_fill_region: work" );
return( -1 ); return( -1 );
}
/* The tile we read. /* The tile we read.
*/ */
@ -1168,6 +1178,8 @@ tiff_fill_region( VipsRegion *out, void *seq, void *a, void *b, gboolean *stop )
} }
} }
VIPS_GATE_STOP( "tiff_fill_region: work" );
return( 0 ); return( 0 );
} }
@ -1353,6 +1365,8 @@ tiff2vips_stripwise_generate( VipsRegion *or,
g_assert( r->height == g_assert( r->height ==
VIPS_MIN( rtiff->rows_per_strip, or->im->Ysize - r->top ) ); VIPS_MIN( rtiff->rows_per_strip, or->im->Ysize - r->top ) );
VIPS_GATE_START( "tiff2vips_stripwise_generate: work" );
for( y = 0; y < r->height; y += rtiff->rows_per_strip ) { for( y = 0; y < r->height; y += rtiff->rows_per_strip ) {
tdata_t dst; tdata_t dst;
@ -1364,8 +1378,11 @@ tiff2vips_stripwise_generate( VipsRegion *or,
else else
dst = rtiff->contig_buf; dst = rtiff->contig_buf;
if( tiff2vips_strip_read_interleaved( rtiff, r->top + y, dst ) ) if( tiff2vips_strip_read_interleaved( rtiff,
r->top + y, dst ) ) {
VIPS_GATE_STOP( "tiff2vips_stripwise_generate: work" );
return( -1 ); return( -1 );
}
/* If necessary, unpack to destination. /* If necessary, unpack to destination.
*/ */
@ -1389,6 +1406,8 @@ tiff2vips_stripwise_generate( VipsRegion *or,
} }
} }
VIPS_GATE_STOP( "tiff2vips_stripwise_generate: work" );
return( 0 ); return( 0 );
} }

View File

@ -300,6 +300,8 @@ vips_affine_gen( VipsRegion *or, void *seq, void *a, void *b, gboolean *stop )
clipped.height ); clipped.height );
#endif /*DEBUG*/ #endif /*DEBUG*/
VIPS_GATE_START( "vips_affine_gen: work" );
/* Resample! x/y loop over pixels in the output image (5). /* Resample! x/y loop over pixels in the output image (5).
*/ */
for( y = to; y < bo; y++ ) { for( y = to; y < bo; y++ ) {
@ -366,6 +368,8 @@ vips_affine_gen( VipsRegion *or, void *seq, void *a, void *b, gboolean *stop )
} }
} }
VIPS_GATE_STOP( "vips_affine_gen: work" );
return( 0 ); return( 0 );
} }

View File

@ -294,9 +294,13 @@ vips_shrink_gen( VipsRegion *or, void *vseq, void *a, void *b, gboolean *stop )
if( vips_region_prepare( ir, &s ) ) if( vips_region_prepare( ir, &s ) )
return( -1 ); return( -1 );
VIPS_GATE_START( "vips_shrink_gen: work" );
vips_shrink_gen2( shrink, seq, vips_shrink_gen2( shrink, seq,
or, ir, or, ir,
r->left, r->top + y, r->width, height ); r->left, r->top + y, r->width, height );
VIPS_GATE_STOP( "vips_shrink_gen: work" );
} }
return( 0 ); return( 0 );

View File

@ -40,6 +40,11 @@ class Thread:
thread_number = 0 thread_number = 0
def __init__(self, thread_name): def __init__(self, thread_name):
# no one cares about the thread address
match = re.match('(.*) \(0x.*?\) (.*)', thread_name)
if match:
thread_name = match.group(1) + " " + match.group(2)
self.thread_name = thread_name self.thread_name = thread_name
self.thread_number = Thread.thread_number self.thread_number = Thread.thread_number
self.events = [] self.events = []
@ -133,27 +138,37 @@ for thread in threads:
print 'last time =', last_time print 'last time =', last_time
# calculate some simple stats # calculate some simple stats
print 'name\t\t\t\talive\twait%\twork%\tunknown%'
for thread in threads: for thread in threads:
start = last_time thread.start = last_time
stop = 0 thread.stop = 0
wait = 0 thread.wait = 0
work = 0 thread.work = 0
for event in thread.events: for event in thread.events:
if event.start < start: if event.start < thread.start:
start = event.start thread.start = event.start
if event.stop > stop: if event.stop > thread.stop:
stop = event.stop thread.stop = event.stop
if event.wait: if event.wait:
wait += event.stop - event.start thread.wait += event.stop - event.start
if event.work: if event.work:
work += event.stop - event.start thread.work += event.stop - event.start
alive = stop - start thread.alive = thread.stop - thread.start
wait_percent = 100 * wait / alive
work_percent = 100 * work / alive # hide very short-lived threads
unkn_percent = 100 - 100 * (work + wait) / alive thread.hide = thread.alive < 0.001
print '%30s\t%6.2g\t%.3g\t%.3g\t%.3g' % (thread.thread_name, alive, wait_percent, work_percent, unkn_percent)
print 'name\t\talive\twait%\twork%\tunknown%'
for thread in threads:
if thread.hide:
continue
wait_percent = 100 * thread.wait / thread.alive
work_percent = 100 * thread.work / thread.alive
unkn_percent = 100 - 100 * (thread.work + thread.wait) / thread.alive
print '%13s\t%6.2g\t' % (thread.thread_name, thread.alive),
print '%.3g\t%.3g\t%.3g' % (wait_percent, work_percent, unkn_percent)
# do two gates overlap? # do two gates overlap?
def is_overlap(events, gate_name1, gate_name2): def is_overlap(events, gate_name1, gate_name2):
@ -176,6 +191,9 @@ def is_overlap(events, gate_name1, gate_name2):
# allocate a y position for each gate # allocate a y position for each gate
total_y = 0 total_y = 0
for thread in threads: for thread in threads:
if thread.hide:
continue
thread.total_y = total_y thread.total_y = total_y
# first pass .. move work and wait events to y == 0 # first pass .. move work and wait events to y == 0
@ -239,9 +257,9 @@ for thread in threads:
PIXELS_PER_SECOND = 1000 PIXELS_PER_SECOND = 1000
PIXELS_PER_GATE = 20 PIXELS_PER_GATE = 20
LEFT_BORDER = 320 LEFT_BORDER = 130
BAR_HEIGHT = 5 BAR_HEIGHT = 5
WIDTH = int(LEFT_BORDER + last_time * PIXELS_PER_SECOND) + 50 WIDTH = int(LEFT_BORDER + last_time * PIXELS_PER_SECOND) + 20
HEIGHT = int((total_y + 1) * PIXELS_PER_GATE) HEIGHT = int((total_y + 1) * PIXELS_PER_GATE)
output_filename = "vips-profile.svg" output_filename = "vips-profile.svg"
@ -260,7 +278,7 @@ ctx.fill()
def draw_event(ctx, event): def draw_event(ctx, event):
left = event.start * PIXELS_PER_SECOND + LEFT_BORDER left = event.start * PIXELS_PER_SECOND + LEFT_BORDER
top = event.total_y * PIXELS_PER_GATE + BAR_HEIGHT / 2 top = event.total_y * PIXELS_PER_GATE + BAR_HEIGHT / 2
width = (event.stop - event.start) * PIXELS_PER_SECOND - 1 width = (event.stop - event.start) * PIXELS_PER_SECOND
height = BAR_HEIGHT height = BAR_HEIGHT
ctx.rectangle(left, top, width, height) ctx.rectangle(left, top, width, height)
@ -283,13 +301,16 @@ def draw_event(ctx, event):
ctx.show_text(event.gate_name) ctx.show_text(event.gate_name)
for thread in threads: for thread in threads:
if thread.hide:
continue
ctx.rectangle(0, thread.total_y * PIXELS_PER_GATE, WIDTH, 1) ctx.rectangle(0, thread.total_y * PIXELS_PER_GATE, WIDTH, 1)
ctx.set_source_rgb(1.00, 1.00, 1.00) ctx.set_source_rgb(1.00, 1.00, 1.00)
ctx.fill() ctx.fill()
xbearing, ybearing, twidth, theight, xadvance, yadvance = \ xbearing, ybearing, twidth, theight, xadvance, yadvance = \
ctx.text_extents(thread.thread_name) ctx.text_extents(thread.thread_name)
ctx.move_to(0, theight + thread.total_y * PIXELS_PER_GATE) ctx.move_to(0, theight + thread.total_y * PIXELS_PER_GATE + BAR_HEIGHT / 2)
ctx.set_source_rgb(1.00, 1.00, 1.00) ctx.set_source_rgb(1.00, 1.00, 1.00)
ctx.show_text(thread.thread_name) ctx.show_text(thread.thread_name)