more polishing

and instrument stuff for vipsthumbnail
This commit is contained in:
John Cupitt 2013-11-28 14:56:19 +00:00
parent a83b44b7a4
commit e0b9b3cd5c
5 changed files with 104 additions and 28 deletions

38
TODO
View File

@ -1,11 +1,39 @@
- overlap doesn't seem to work
- check operations called in sharpen, we seem to have some gaps, looking at the
trace
- try disabling turbo-boost?
- calculate unk% from total time - (work time + wait time)
see http://blog.tube42.se/?p=1225
for i in {0..11}; do sudo cpufreq-set -f 1600000 -c $i; done
-r doesn't seem to work
verify with
cpufreq-info | more
with cpu locked at 1.6 GHz
$ time vips sharpen k2.v x.v --radius 20 --vips-concurrency=1
real 0m2.232s
user 0m2.212s
sys 0m0.036s
$ time vips sharpen k2.v x.v --radius 20 --vips-concurrency=2
real 0m1.295s
user 0m2.448s
sys 0m0.060s
$ time vips sharpen k2.jpg x.jpg --radius 20 --vips-concurrency=1
real 0m2.760s
user 0m2.768s
sys 0m0.024s
$ time vips sharpen k2.jpg x.jpg --radius 20 --vips-concurrency=2
real 0m1.558s
user 0m2.988s
sys 0m0.060s

View File

@ -1083,12 +1083,18 @@ tiff_fill_region_aligned( VipsRegion *out, void *seq, void *a, void *b )
r->left, r->top );
#endif /*DEBUG*/
VIPS_GATE_START( "tiff_fill_region_aligned: work" );
/* Read that tile directly into the vips tile.
*/
if( TIFFReadTile( rtiff->tiff,
VIPS_REGION_ADDR( out, r->left, r->top ),
r->left, r->top, 0, 0 ) < 0 )
r->left, r->top, 0, 0 ) < 0 ) {
VIPS_GATE_STOP( "tiff_fill_region_aligned: work" );
return( -1 );
}
VIPS_GATE_STOP( "tiff_fill_region_aligned: work" );
return( 0 );
}
@ -1132,6 +1138,8 @@ tiff_fill_region( VipsRegion *out, void *seq, void *a, void *b, gboolean *stop )
VIPS_REGION_LSKIP( out ) == VIPS_REGION_SIZEOF_LINE( out ) )
return( tiff_fill_region_aligned( out, seq, a, b ) );
VIPS_GATE_START( "tiff_fill_region: work" );
for( y = ys; y < VIPS_RECT_BOTTOM( r ); y += rtiff->theight )
for( x = xs; x < VIPS_RECT_RIGHT( r ); x += rtiff->twidth ) {
VipsRect tile;
@ -1139,8 +1147,10 @@ tiff_fill_region( VipsRegion *out, void *seq, void *a, void *b, gboolean *stop )
/* Read that tile.
*/
if( TIFFReadTile( rtiff->tiff, buf, x, y, 0, 0 ) < 0 )
if( TIFFReadTile( rtiff->tiff, buf, x, y, 0, 0 ) < 0 ) {
VIPS_GATE_STOP( "tiff_fill_region: work" );
return( -1 );
}
/* The tile we read.
*/
@ -1168,6 +1178,8 @@ tiff_fill_region( VipsRegion *out, void *seq, void *a, void *b, gboolean *stop )
}
}
VIPS_GATE_STOP( "tiff_fill_region: work" );
return( 0 );
}
@ -1353,6 +1365,8 @@ tiff2vips_stripwise_generate( VipsRegion *or,
g_assert( r->height ==
VIPS_MIN( rtiff->rows_per_strip, or->im->Ysize - r->top ) );
VIPS_GATE_START( "tiff2vips_stripwise_generate: work" );
for( y = 0; y < r->height; y += rtiff->rows_per_strip ) {
tdata_t dst;
@ -1364,8 +1378,11 @@ tiff2vips_stripwise_generate( VipsRegion *or,
else
dst = rtiff->contig_buf;
if( tiff2vips_strip_read_interleaved( rtiff, r->top + y, dst ) )
if( tiff2vips_strip_read_interleaved( rtiff,
r->top + y, dst ) ) {
VIPS_GATE_STOP( "tiff2vips_stripwise_generate: work" );
return( -1 );
}
/* If necessary, unpack to destination.
*/
@ -1389,6 +1406,8 @@ tiff2vips_stripwise_generate( VipsRegion *or,
}
}
VIPS_GATE_STOP( "tiff2vips_stripwise_generate: work" );
return( 0 );
}

View File

@ -300,6 +300,8 @@ vips_affine_gen( VipsRegion *or, void *seq, void *a, void *b, gboolean *stop )
clipped.height );
#endif /*DEBUG*/
VIPS_GATE_START( "vips_affine_gen: work" );
/* Resample! x/y loop over pixels in the output image (5).
*/
for( y = to; y < bo; y++ ) {
@ -366,6 +368,8 @@ vips_affine_gen( VipsRegion *or, void *seq, void *a, void *b, gboolean *stop )
}
}
VIPS_GATE_STOP( "vips_affine_gen: work" );
return( 0 );
}

View File

@ -294,9 +294,13 @@ vips_shrink_gen( VipsRegion *or, void *vseq, void *a, void *b, gboolean *stop )
if( vips_region_prepare( ir, &s ) )
return( -1 );
VIPS_GATE_START( "vips_shrink_gen: work" );
vips_shrink_gen2( shrink, seq,
or, ir,
r->left, r->top + y, r->width, height );
VIPS_GATE_STOP( "vips_shrink_gen: work" );
}
return( 0 );

View File

@ -40,6 +40,11 @@ class Thread:
thread_number = 0
def __init__(self, thread_name):
# no one cares about the thread address
match = re.match('(.*) \(0x.*?\) (.*)', thread_name)
if match:
thread_name = match.group(1) + " " + match.group(2)
self.thread_name = thread_name
self.thread_number = Thread.thread_number
self.events = []
@ -133,27 +138,37 @@ for thread in threads:
print 'last time =', last_time
# calculate some simple stats
print 'name\t\t\t\talive\twait%\twork%\tunknown%'
for thread in threads:
start = last_time
stop = 0
wait = 0
work = 0
thread.start = last_time
thread.stop = 0
thread.wait = 0
thread.work = 0
for event in thread.events:
if event.start < start:
start = event.start
if event.stop > stop:
stop = event.stop
if event.start < thread.start:
thread.start = event.start
if event.stop > thread.stop:
thread.stop = event.stop
if event.wait:
wait += event.stop - event.start
thread.wait += event.stop - event.start
if event.work:
work += event.stop - event.start
thread.work += event.stop - event.start
alive = stop - start
wait_percent = 100 * wait / alive
work_percent = 100 * work / alive
unkn_percent = 100 - 100 * (work + wait) / alive
print '%30s\t%6.2g\t%.3g\t%.3g\t%.3g' % (thread.thread_name, alive, wait_percent, work_percent, unkn_percent)
thread.alive = thread.stop - thread.start
# hide very short-lived threads
thread.hide = thread.alive < 0.001
print 'name\t\talive\twait%\twork%\tunknown%'
for thread in threads:
if thread.hide:
continue
wait_percent = 100 * thread.wait / thread.alive
work_percent = 100 * thread.work / thread.alive
unkn_percent = 100 - 100 * (thread.work + thread.wait) / thread.alive
print '%13s\t%6.2g\t' % (thread.thread_name, thread.alive),
print '%.3g\t%.3g\t%.3g' % (wait_percent, work_percent, unkn_percent)
# do two gates overlap?
def is_overlap(events, gate_name1, gate_name2):
@ -176,6 +191,9 @@ def is_overlap(events, gate_name1, gate_name2):
# allocate a y position for each gate
total_y = 0
for thread in threads:
if thread.hide:
continue
thread.total_y = total_y
# first pass .. move work and wait events to y == 0
@ -239,9 +257,9 @@ for thread in threads:
PIXELS_PER_SECOND = 1000
PIXELS_PER_GATE = 20
LEFT_BORDER = 320
LEFT_BORDER = 130
BAR_HEIGHT = 5
WIDTH = int(LEFT_BORDER + last_time * PIXELS_PER_SECOND) + 50
WIDTH = int(LEFT_BORDER + last_time * PIXELS_PER_SECOND) + 20
HEIGHT = int((total_y + 1) * PIXELS_PER_GATE)
output_filename = "vips-profile.svg"
@ -260,7 +278,7 @@ ctx.fill()
def draw_event(ctx, event):
left = event.start * PIXELS_PER_SECOND + LEFT_BORDER
top = event.total_y * PIXELS_PER_GATE + BAR_HEIGHT / 2
width = (event.stop - event.start) * PIXELS_PER_SECOND - 1
width = (event.stop - event.start) * PIXELS_PER_SECOND
height = BAR_HEIGHT
ctx.rectangle(left, top, width, height)
@ -283,13 +301,16 @@ def draw_event(ctx, event):
ctx.show_text(event.gate_name)
for thread in threads:
if thread.hide:
continue
ctx.rectangle(0, thread.total_y * PIXELS_PER_GATE, WIDTH, 1)
ctx.set_source_rgb(1.00, 1.00, 1.00)
ctx.fill()
xbearing, ybearing, twidth, theight, xadvance, yadvance = \
ctx.text_extents(thread.thread_name)
ctx.move_to(0, theight + thread.total_y * PIXELS_PER_GATE)
ctx.move_to(0, theight + thread.total_y * PIXELS_PER_GATE + BAR_HEIGHT / 2)
ctx.set_source_rgb(1.00, 1.00, 1.00)
ctx.show_text(thread.thread_name)