run over memuse for sharpen

This commit is contained in:
John Cupitt 2013-12-16 09:22:05 +00:00
parent bd3b3e04f9
commit a9fd318712
4 changed files with 150 additions and 32 deletions

80
TODO
View File

@ -3,11 +3,91 @@
$ vips sharpen wtc.jpg x.jpg --vips-concurrency=1
memory: high-water mark 97.82 MB
$ vips copy wtc.jpg x.v
memory: high-water mark 15.98 MB
$ vips sharpen x.v x2.v --vips-concurrency=1
memory: high-water mark 74.01 MB
$ vips sharpen wtc.jpg x2.jpg --vips-concurrency=1
memory: high-water mark 146.94 MB
and with 12 threads you get huge memuse
can we get this down at all?
baseline:
$ sharpen wtc.jpg x2.jpg
memory: high-water mark 330.14 MB
one thread:
$ vips sharpen wtc.jpg x2.jpg --vips-concurrency=1
vips_line_cache_build: max size = 21.2363 MB
memory: high-water mark 146.94 MB
so about 120mb without the input cache
turn off the buffer recycling system (ie. never put buffers on reserve, always
free them):
$ vips sharpen wtc.jpg x2.jpg --vips-concurrency=1
memory: high-water mark 97.82 MB
ie. buffer recycling costs about 50MB
base + 1 * per-thread == 97mb
base + 2 * per-thread == 161mb
base + 4 * per-thread == 286mb
63mb per thread?
base == 34
input cache == 21, so base is 12 without that
out buffer is 256 lines, two of them, each 9372 across, about 14mb, explains
rest of base cost
63mb per-thread compute cost still mysterious ... does not include input cache
or output buffer
$ vips sharpen x.v x2.v --vips-concurrency=1
vips__buffer_init: buffer reserve disabled
memory: high-water mark 74.01 MB
so 63mb per-thread computation cost, no input cache, output buffer, souds
about right
tiles are 9372 x 16, so 440kb, we must somehow have about 120 tiles in the
pipeline, is this really right?
load
sRGB2scRGB
scRGB2XYZ
XYZ2Lab
Lab2LabS
extract_band 0 extract_band 1, 2
embed
conv
embed
conv
bandjoin
save
13 operations, plus some extra copies and writes joining them up
have 48 regions alive at peak, that's with two large ones for the two output
buffers
now we recycle buffers, can we revert to more aggressive freeing of buffers?

View File

@ -59,8 +59,9 @@
*/
/*
#define VIPS_DEBUG
#define VIPS_DEBUG_RED
*/
#define VIPS_DEBUG
#ifdef HAVE_CONFIG_H
#include <config.h>
@ -295,8 +296,8 @@ vips_tile_find( VipsBlockCache *cache, int x, int y )
/* In cache already?
*/
if( (tile = vips_tile_search( cache, x, y )) ) {
VIPS_DEBUG_MSG( "vips_tile_find: tile %d x %d in cache\n",
x, y );
VIPS_DEBUG_MSG_RED( "vips_tile_find: "
"tile %d x %d in cache\n", x, y );
return( tile );
}
@ -304,8 +305,8 @@ vips_tile_find( VipsBlockCache *cache, int x, int y )
*/
if( cache->max_tiles == -1 ||
cache->ntiles < cache->max_tiles ) {
VIPS_DEBUG_MSG( "vips_tile_find: making new tile at %d x %d\n",
x, y );
VIPS_DEBUG_MSG_RED( "vips_tile_find: "
"making new tile at %d x %d\n", x, y );
if( !(tile = vips_tile_new( cache, x, y )) )
return( NULL );
@ -330,7 +331,7 @@ vips_tile_find( VipsBlockCache *cache, int x, int y )
return( tile );
}
VIPS_DEBUG_MSG( "tilecache: reusing tile %d x %d\n",
VIPS_DEBUG_MSG_RED( "vips_tile_find: reusing tile %d x %d\n",
tile->pos.left, tile->pos.top );
if( vips_tile_move( tile, x, y ) )
@ -366,12 +367,16 @@ vips_block_cache_build( VipsObject *object )
VipsConversion *conversion = VIPS_CONVERSION( object );
VipsBlockCache *cache = (VipsBlockCache *) object;
VIPS_DEBUG_MSG( "vips_block_cache_build\n" );
VIPS_DEBUG_MSG( "vips_block_cache_build:\n" );
if( VIPS_OBJECT_CLASS( vips_block_cache_parent_class )->
build( object ) )
return( -1 );
VIPS_DEBUG_MSG( "vips_block_cache_build: max size = %g MB\n",
(cache->max_tiles * cache->tile_width * cache->tile_height *
VIPS_IMAGE_SIZEOF_PEL( cache->in )) / (1024 * 1024.0) );
if( !cache->persistent )
g_signal_connect( conversion->out, "minimise",
G_CALLBACK( vips_block_cache_minimise ), cache );
@ -459,7 +464,7 @@ vips_tile_destroy( VipsTile *tile )
{
VipsBlockCache *cache = tile->cache;
VIPS_DEBUG_MSG( "vips_tile_destroy: tile %d, %d (%p)\n",
VIPS_DEBUG_MSG_RED( "vips_tile_destroy: tile %d, %d (%p)\n",
tile->pos.left, tile->pos.top, tile );
cache->ntiles -= 1;
@ -564,7 +569,7 @@ vips_tile_cache_ref( VipsBlockCache *cache, VipsRect *r )
*/
work = g_slist_append( work, tile );
VIPS_DEBUG_MSG( "vips_tile_cache_ref: "
VIPS_DEBUG_MSG_RED( "vips_tile_cache_ref: "
"tile %d, %d (%p)\n", x, y, tile );
}
@ -604,7 +609,7 @@ vips_tile_cache_gen( VipsRegion *or,
VIPS_GATE_STOP( "vips_tile_cache_gen: wait1" );
VIPS_DEBUG_MSG( "vips_tile_cache_gen: "
VIPS_DEBUG_MSG_RED( "vips_tile_cache_gen: "
"left = %d, top = %d, width = %d, height = %d\n",
r->left, r->top, r->width, r->height );
@ -626,8 +631,8 @@ vips_tile_cache_gen( VipsRegion *or,
if( !p )
break;
VIPS_DEBUG_MSG( "vips_tile_cache_gen: pasting %p\n",
tile );
VIPS_DEBUG_MSG_RED( "vips_tile_cache_gen: "
"pasting %p\n", tile );
vips_tile_paste( tile, or );
@ -650,7 +655,7 @@ vips_tile_cache_gen( VipsRegion *or,
tile->state = VIPS_TILE_STATE_CALC;
VIPS_DEBUG_MSG( "vips_tile_cache_gen: "
VIPS_DEBUG_MSG_RED( "vips_tile_cache_gen: "
"calc of %p\n", tile );
/* In threaded mode, we let other threads run
@ -667,12 +672,12 @@ vips_tile_cache_gen( VipsRegion *or,
if( cache->threaded ) {
VIPS_GATE_START( "vips_tile_cache_gen: "
"wait2" );
"wait2" );
g_mutex_lock( cache->lock );
VIPS_GATE_STOP( "vips_tile_cache_gen: "
"wait2" );
"wait2" );
}
/* If there was an error calculating this
@ -683,7 +688,8 @@ vips_tile_cache_gen( VipsRegion *or,
* read to fail because of one broken tile.
*/
if( result ) {
VIPS_DEBUG_MSG( "vips_tile_cache_gen: "
VIPS_DEBUG_MSG_RED(
"vips_tile_cache_gen: "
"error on tile %p\n", tile );
vips_warn( class->nickname,
@ -722,7 +728,7 @@ vips_tile_cache_gen( VipsRegion *or,
g_assert( tile->state == VIPS_TILE_STATE_CALC );
}
VIPS_DEBUG_MSG( "vips_tile_cache_gen: waiting\n" );
VIPS_DEBUG_MSG_RED( "vips_tile_cache_gen: waiting\n" );
VIPS_GATE_START( "vips_tile_cache_gen: wait3" );
@ -946,6 +952,12 @@ vips_line_cache_build( VipsObject *object )
"max_tiles = %d, tile_height = %d\n",
block_cache->max_tiles, block_cache->tile_height );
VIPS_DEBUG_MSG( "vips_line_cache_build: max size = %g MB\n",
(block_cache->max_tiles *
block_cache->tile_width *
block_cache->tile_height *
VIPS_IMAGE_SIZEOF_PEL( block_cache->in )) / (1024 * 1024.0) );
if( vips_image_pio_input( block_cache->in ) )
return( -1 );

View File

@ -73,6 +73,11 @@ static GSList *vips__buffers_all = NULL;
static int buffer_cache_n = 0;
#endif /*DEBUG_CREATE*/
/* The maximum numbers of buffers we hold in reserve per thread. About 5 seems
* enough to stop malloc cycling on vips_sharpen().
*/
static const int buffer_cache_max_reserve = 0;
static GPrivate *thread_buffer_cache_key = NULL;
static void
@ -293,7 +298,7 @@ vips_buffer_unref( VipsBuffer *buffer )
/* Place on this thread's reserve list for reuse.
*/
if( cache->n_reserve < 5 ) {
if( cache->n_reserve < buffer_cache_max_reserve ) {
cache->reserve =
g_slist_prepend( cache->reserve, buffer );
cache->n_reserve += 1;
@ -528,6 +533,9 @@ vips__buffer_init( void )
thread_buffer_cache_key = g_private_new(
(GDestroyNotify) buffer_cache_free );
#endif
if( buffer_cache_max_reserve < 1 )
printf( "vips__buffer_init: buffer reserve disabled\n" );
}
void

View File

@ -76,6 +76,7 @@
#define DEBUG_CREATE
#define DEBUG
*/
#define VIPS_DEBUG
#ifdef HAVE_CONFIG_H
#include <config.h>
@ -184,12 +185,23 @@ enum {
G_DEFINE_TYPE( VipsRegion, vips_region, VIPS_TYPE_OBJECT );
#ifdef VIPS_DEBUG
static int vips_n_regions = 0;
#endif /*DEBUG*/
static void
vips_region_finalize( GObject *gobject )
{
#ifdef VIPS_DEBUG
VIPS_DEBUG_MSG( "vips_region_finalize: " );
vips_object_print_name( VIPS_OBJECT( gobject ) );
VIPS_DEBUG_MSG( "\n" );
#endif /*VIPS_DEBUG*/
#ifdef VIPS_DEBUG
g_mutex_lock( vips__global_lock );
vips_n_regions -= 1;
g_mutex_unlock( vips__global_lock );
#endif /*VIPS_DEBUG*/
G_OBJECT_CLASS( vips_region_parent_class )->finalize( gobject );
@ -267,6 +279,7 @@ vips_region_dispose( GObject *gobject )
#ifdef VIPS_DEBUG
VIPS_DEBUG_MSG( "vips_region_dispose: " );
vips_object_print_name( VIPS_OBJECT( gobject ) );
VIPS_DEBUG_MSG( "\n" );
#endif /*VIPS_DEBUG*/
vips_object_preclose( VIPS_OBJECT( gobject ) );
@ -475,6 +488,13 @@ static void
vips_region_init( VipsRegion *region )
{
region->type = VIPS_REGION_NONE;
#ifdef VIPS_DEBUG
g_mutex_lock( vips__global_lock );
vips_n_regions += 1;
printf( "vips_region_init: %d regions in vips\n", vips_n_regions );
g_mutex_unlock( vips__global_lock );
#endif /*VIPS_DEBUG*/
}
/**
@ -558,24 +578,24 @@ vips_region_buffer( VipsRegion *reg, VipsRect *r )
return( -1 );
}
VIPS_FREEF( vips_window_unref, reg->window );
/* Have we been asked to drop caches? We want to throw everything
* away.
*
* If not, try to reuse the current buffer.
*/
if( reg->invalid ) {
if( reg->buffer )
vips_buffer_undone( reg->buffer );
VIPS_FREEF( vips_buffer_unref, reg->buffer );
reg->invalid = FALSE;
if( !(reg->buffer = vips_buffer_new( im, &clipped )) )
return( -1 );
}
else {
/* Don't call vips_region_reset() ... we combine buffer unref
* and new buffer ref in one call to reduce malloc/free
* cycling.
/* We combine buffer unref and new buffer ref in one call
* to reduce malloc/free cycling.
*/
VIPS_FREEF( vips_window_unref, reg->window );
if( !(reg->buffer =
vips_buffer_unref_ref( reg->buffer, im, &clipped )) )
return( -1 );
@ -630,12 +650,13 @@ vips_region_image( VipsRegion *reg, VipsRect *r )
return( -1 );
}
VIPS_FREEF( vips_buffer_unref, reg->buffer );
VIPS_FREEF( vips_window_unref, reg->window );
reg->invalid = FALSE;
if( image->data ) {
/* We have the whole image available ... easy!
*/
if( reg->buffer )
vips_buffer_undone( reg->buffer );
reg->invalid = FALSE;
/* We can't just set valid = clipped, since this may be an
* incompletely calculated memory buffer. Just set valid to r.
@ -653,9 +674,6 @@ vips_region_image( VipsRegion *reg, VipsRect *r )
reg->window->top > clipped.top ||
reg->window->top + reg->window->height <
clipped.top + clipped.height ) {
reg->invalid = FALSE;
VIPS_FREEF( vips_window_unref, reg->window );
if( !(reg->window = vips_window_ref( image,
clipped.top, clipped.height )) )
return( -1 );
@ -782,8 +800,8 @@ vips_region_region( VipsRegion *reg,
/* Init new stuff.
*/
if( reg->buffer )
vips_buffer_undone( reg->buffer );
VIPS_FREEF( vips_buffer_unref, reg->buffer );
VIPS_FREEF( vips_window_unref, reg->window );
reg->invalid = FALSE;
reg->valid = final;
reg->bpl = dest->bpl;