run over memuse for sharpen

This commit is contained in:
John Cupitt 2013-12-16 09:22:05 +00:00
parent bd3b3e04f9
commit a9fd318712
4 changed files with 150 additions and 32 deletions

80
TODO
View File

@ -3,11 +3,91 @@
$ vips sharpen wtc.jpg x.jpg --vips-concurrency=1 $ vips sharpen wtc.jpg x.jpg --vips-concurrency=1
memory: high-water mark 97.82 MB memory: high-water mark 97.82 MB
$ vips copy wtc.jpg x.v
memory: high-water mark 15.98 MB
$ vips sharpen x.v x2.v --vips-concurrency=1
memory: high-water mark 74.01 MB
$ vips sharpen wtc.jpg x2.jpg --vips-concurrency=1
memory: high-water mark 146.94 MB
and with 12 threads you get huge memuse and with 12 threads you get huge memuse
can we get this down at all? can we get this down at all?
baseline:
$ sharpen wtc.jpg x2.jpg
memory: high-water mark 330.14 MB
one thread:
$ vips sharpen wtc.jpg x2.jpg --vips-concurrency=1
vips_line_cache_build: max size = 21.2363 MB
memory: high-water mark 146.94 MB
so about 120mb without the input cache
turn off the buffer recycling system (ie. never put buffers on reserve, always
free them):
$ vips sharpen wtc.jpg x2.jpg --vips-concurrency=1
memory: high-water mark 97.82 MB
ie. buffer recycling costs about 50MB
base + 1 * per-thread == 97mb
base + 2 * per-thread == 161mb
base + 4 * per-thread == 286mb
63mb per thread?
base == 34
input cache == 21, so base is 12 without that
out buffer is 256 lines, two of them, each 9372 across, about 14mb, explains
rest of base cost
63mb per-thread compute cost still mysterious ... does not include input cache
or output buffer
$ vips sharpen x.v x2.v --vips-concurrency=1
vips__buffer_init: buffer reserve disabled
memory: high-water mark 74.01 MB
so 63mb per-thread computation cost, no input cache, output buffer, souds
about right
tiles are 9372 x 16, so 440kb, we must somehow have about 120 tiles in the
pipeline, is this really right?
load
sRGB2scRGB
scRGB2XYZ
XYZ2Lab
Lab2LabS
extract_band 0 extract_band 1, 2
embed
conv
embed
conv
bandjoin
save
13 operations, plus some extra copies and writes joining them up
have 48 regions alive at peak, that's with two large ones for the two output
buffers
now we recycle buffers, can we revert to more aggressive freeing of buffers?

View File

@ -59,8 +59,9 @@
*/ */
/* /*
#define VIPS_DEBUG #define VIPS_DEBUG_RED
*/ */
#define VIPS_DEBUG
#ifdef HAVE_CONFIG_H #ifdef HAVE_CONFIG_H
#include <config.h> #include <config.h>
@ -295,8 +296,8 @@ vips_tile_find( VipsBlockCache *cache, int x, int y )
/* In cache already? /* In cache already?
*/ */
if( (tile = vips_tile_search( cache, x, y )) ) { if( (tile = vips_tile_search( cache, x, y )) ) {
VIPS_DEBUG_MSG( "vips_tile_find: tile %d x %d in cache\n", VIPS_DEBUG_MSG_RED( "vips_tile_find: "
x, y ); "tile %d x %d in cache\n", x, y );
return( tile ); return( tile );
} }
@ -304,8 +305,8 @@ vips_tile_find( VipsBlockCache *cache, int x, int y )
*/ */
if( cache->max_tiles == -1 || if( cache->max_tiles == -1 ||
cache->ntiles < cache->max_tiles ) { cache->ntiles < cache->max_tiles ) {
VIPS_DEBUG_MSG( "vips_tile_find: making new tile at %d x %d\n", VIPS_DEBUG_MSG_RED( "vips_tile_find: "
x, y ); "making new tile at %d x %d\n", x, y );
if( !(tile = vips_tile_new( cache, x, y )) ) if( !(tile = vips_tile_new( cache, x, y )) )
return( NULL ); return( NULL );
@ -330,7 +331,7 @@ vips_tile_find( VipsBlockCache *cache, int x, int y )
return( tile ); return( tile );
} }
VIPS_DEBUG_MSG( "tilecache: reusing tile %d x %d\n", VIPS_DEBUG_MSG_RED( "vips_tile_find: reusing tile %d x %d\n",
tile->pos.left, tile->pos.top ); tile->pos.left, tile->pos.top );
if( vips_tile_move( tile, x, y ) ) if( vips_tile_move( tile, x, y ) )
@ -366,12 +367,16 @@ vips_block_cache_build( VipsObject *object )
VipsConversion *conversion = VIPS_CONVERSION( object ); VipsConversion *conversion = VIPS_CONVERSION( object );
VipsBlockCache *cache = (VipsBlockCache *) object; VipsBlockCache *cache = (VipsBlockCache *) object;
VIPS_DEBUG_MSG( "vips_block_cache_build\n" ); VIPS_DEBUG_MSG( "vips_block_cache_build:\n" );
if( VIPS_OBJECT_CLASS( vips_block_cache_parent_class )-> if( VIPS_OBJECT_CLASS( vips_block_cache_parent_class )->
build( object ) ) build( object ) )
return( -1 ); return( -1 );
VIPS_DEBUG_MSG( "vips_block_cache_build: max size = %g MB\n",
(cache->max_tiles * cache->tile_width * cache->tile_height *
VIPS_IMAGE_SIZEOF_PEL( cache->in )) / (1024 * 1024.0) );
if( !cache->persistent ) if( !cache->persistent )
g_signal_connect( conversion->out, "minimise", g_signal_connect( conversion->out, "minimise",
G_CALLBACK( vips_block_cache_minimise ), cache ); G_CALLBACK( vips_block_cache_minimise ), cache );
@ -459,7 +464,7 @@ vips_tile_destroy( VipsTile *tile )
{ {
VipsBlockCache *cache = tile->cache; VipsBlockCache *cache = tile->cache;
VIPS_DEBUG_MSG( "vips_tile_destroy: tile %d, %d (%p)\n", VIPS_DEBUG_MSG_RED( "vips_tile_destroy: tile %d, %d (%p)\n",
tile->pos.left, tile->pos.top, tile ); tile->pos.left, tile->pos.top, tile );
cache->ntiles -= 1; cache->ntiles -= 1;
@ -564,7 +569,7 @@ vips_tile_cache_ref( VipsBlockCache *cache, VipsRect *r )
*/ */
work = g_slist_append( work, tile ); work = g_slist_append( work, tile );
VIPS_DEBUG_MSG( "vips_tile_cache_ref: " VIPS_DEBUG_MSG_RED( "vips_tile_cache_ref: "
"tile %d, %d (%p)\n", x, y, tile ); "tile %d, %d (%p)\n", x, y, tile );
} }
@ -604,7 +609,7 @@ vips_tile_cache_gen( VipsRegion *or,
VIPS_GATE_STOP( "vips_tile_cache_gen: wait1" ); VIPS_GATE_STOP( "vips_tile_cache_gen: wait1" );
VIPS_DEBUG_MSG( "vips_tile_cache_gen: " VIPS_DEBUG_MSG_RED( "vips_tile_cache_gen: "
"left = %d, top = %d, width = %d, height = %d\n", "left = %d, top = %d, width = %d, height = %d\n",
r->left, r->top, r->width, r->height ); r->left, r->top, r->width, r->height );
@ -626,8 +631,8 @@ vips_tile_cache_gen( VipsRegion *or,
if( !p ) if( !p )
break; break;
VIPS_DEBUG_MSG( "vips_tile_cache_gen: pasting %p\n", VIPS_DEBUG_MSG_RED( "vips_tile_cache_gen: "
tile ); "pasting %p\n", tile );
vips_tile_paste( tile, or ); vips_tile_paste( tile, or );
@ -650,7 +655,7 @@ vips_tile_cache_gen( VipsRegion *or,
tile->state = VIPS_TILE_STATE_CALC; tile->state = VIPS_TILE_STATE_CALC;
VIPS_DEBUG_MSG( "vips_tile_cache_gen: " VIPS_DEBUG_MSG_RED( "vips_tile_cache_gen: "
"calc of %p\n", tile ); "calc of %p\n", tile );
/* In threaded mode, we let other threads run /* In threaded mode, we let other threads run
@ -667,12 +672,12 @@ vips_tile_cache_gen( VipsRegion *or,
if( cache->threaded ) { if( cache->threaded ) {
VIPS_GATE_START( "vips_tile_cache_gen: " VIPS_GATE_START( "vips_tile_cache_gen: "
"wait2" ); "wait2" );
g_mutex_lock( cache->lock ); g_mutex_lock( cache->lock );
VIPS_GATE_STOP( "vips_tile_cache_gen: " VIPS_GATE_STOP( "vips_tile_cache_gen: "
"wait2" ); "wait2" );
} }
/* If there was an error calculating this /* If there was an error calculating this
@ -683,7 +688,8 @@ vips_tile_cache_gen( VipsRegion *or,
* read to fail because of one broken tile. * read to fail because of one broken tile.
*/ */
if( result ) { if( result ) {
VIPS_DEBUG_MSG( "vips_tile_cache_gen: " VIPS_DEBUG_MSG_RED(
"vips_tile_cache_gen: "
"error on tile %p\n", tile ); "error on tile %p\n", tile );
vips_warn( class->nickname, vips_warn( class->nickname,
@ -722,7 +728,7 @@ vips_tile_cache_gen( VipsRegion *or,
g_assert( tile->state == VIPS_TILE_STATE_CALC ); g_assert( tile->state == VIPS_TILE_STATE_CALC );
} }
VIPS_DEBUG_MSG( "vips_tile_cache_gen: waiting\n" ); VIPS_DEBUG_MSG_RED( "vips_tile_cache_gen: waiting\n" );
VIPS_GATE_START( "vips_tile_cache_gen: wait3" ); VIPS_GATE_START( "vips_tile_cache_gen: wait3" );
@ -946,6 +952,12 @@ vips_line_cache_build( VipsObject *object )
"max_tiles = %d, tile_height = %d\n", "max_tiles = %d, tile_height = %d\n",
block_cache->max_tiles, block_cache->tile_height ); block_cache->max_tiles, block_cache->tile_height );
VIPS_DEBUG_MSG( "vips_line_cache_build: max size = %g MB\n",
(block_cache->max_tiles *
block_cache->tile_width *
block_cache->tile_height *
VIPS_IMAGE_SIZEOF_PEL( block_cache->in )) / (1024 * 1024.0) );
if( vips_image_pio_input( block_cache->in ) ) if( vips_image_pio_input( block_cache->in ) )
return( -1 ); return( -1 );

View File

@ -73,6 +73,11 @@ static GSList *vips__buffers_all = NULL;
static int buffer_cache_n = 0; static int buffer_cache_n = 0;
#endif /*DEBUG_CREATE*/ #endif /*DEBUG_CREATE*/
/* The maximum numbers of buffers we hold in reserve per thread. About 5 seems
* enough to stop malloc cycling on vips_sharpen().
*/
static const int buffer_cache_max_reserve = 0;
static GPrivate *thread_buffer_cache_key = NULL; static GPrivate *thread_buffer_cache_key = NULL;
static void static void
@ -293,7 +298,7 @@ vips_buffer_unref( VipsBuffer *buffer )
/* Place on this thread's reserve list for reuse. /* Place on this thread's reserve list for reuse.
*/ */
if( cache->n_reserve < 5 ) { if( cache->n_reserve < buffer_cache_max_reserve ) {
cache->reserve = cache->reserve =
g_slist_prepend( cache->reserve, buffer ); g_slist_prepend( cache->reserve, buffer );
cache->n_reserve += 1; cache->n_reserve += 1;
@ -528,6 +533,9 @@ vips__buffer_init( void )
thread_buffer_cache_key = g_private_new( thread_buffer_cache_key = g_private_new(
(GDestroyNotify) buffer_cache_free ); (GDestroyNotify) buffer_cache_free );
#endif #endif
if( buffer_cache_max_reserve < 1 )
printf( "vips__buffer_init: buffer reserve disabled\n" );
} }
void void

View File

@ -76,6 +76,7 @@
#define DEBUG_CREATE #define DEBUG_CREATE
#define DEBUG #define DEBUG
*/ */
#define VIPS_DEBUG
#ifdef HAVE_CONFIG_H #ifdef HAVE_CONFIG_H
#include <config.h> #include <config.h>
@ -184,12 +185,23 @@ enum {
G_DEFINE_TYPE( VipsRegion, vips_region, VIPS_TYPE_OBJECT ); G_DEFINE_TYPE( VipsRegion, vips_region, VIPS_TYPE_OBJECT );
#ifdef VIPS_DEBUG
static int vips_n_regions = 0;
#endif /*DEBUG*/
static void static void
vips_region_finalize( GObject *gobject ) vips_region_finalize( GObject *gobject )
{ {
#ifdef VIPS_DEBUG #ifdef VIPS_DEBUG
VIPS_DEBUG_MSG( "vips_region_finalize: " ); VIPS_DEBUG_MSG( "vips_region_finalize: " );
vips_object_print_name( VIPS_OBJECT( gobject ) ); vips_object_print_name( VIPS_OBJECT( gobject ) );
VIPS_DEBUG_MSG( "\n" );
#endif /*VIPS_DEBUG*/
#ifdef VIPS_DEBUG
g_mutex_lock( vips__global_lock );
vips_n_regions -= 1;
g_mutex_unlock( vips__global_lock );
#endif /*VIPS_DEBUG*/ #endif /*VIPS_DEBUG*/
G_OBJECT_CLASS( vips_region_parent_class )->finalize( gobject ); G_OBJECT_CLASS( vips_region_parent_class )->finalize( gobject );
@ -267,6 +279,7 @@ vips_region_dispose( GObject *gobject )
#ifdef VIPS_DEBUG #ifdef VIPS_DEBUG
VIPS_DEBUG_MSG( "vips_region_dispose: " ); VIPS_DEBUG_MSG( "vips_region_dispose: " );
vips_object_print_name( VIPS_OBJECT( gobject ) ); vips_object_print_name( VIPS_OBJECT( gobject ) );
VIPS_DEBUG_MSG( "\n" );
#endif /*VIPS_DEBUG*/ #endif /*VIPS_DEBUG*/
vips_object_preclose( VIPS_OBJECT( gobject ) ); vips_object_preclose( VIPS_OBJECT( gobject ) );
@ -475,6 +488,13 @@ static void
vips_region_init( VipsRegion *region ) vips_region_init( VipsRegion *region )
{ {
region->type = VIPS_REGION_NONE; region->type = VIPS_REGION_NONE;
#ifdef VIPS_DEBUG
g_mutex_lock( vips__global_lock );
vips_n_regions += 1;
printf( "vips_region_init: %d regions in vips\n", vips_n_regions );
g_mutex_unlock( vips__global_lock );
#endif /*VIPS_DEBUG*/
} }
/** /**
@ -558,24 +578,24 @@ vips_region_buffer( VipsRegion *reg, VipsRect *r )
return( -1 ); return( -1 );
} }
VIPS_FREEF( vips_window_unref, reg->window );
/* Have we been asked to drop caches? We want to throw everything /* Have we been asked to drop caches? We want to throw everything
* away. * away.
* *
* If not, try to reuse the current buffer. * If not, try to reuse the current buffer.
*/ */
if( reg->invalid ) { if( reg->invalid ) {
if( reg->buffer ) VIPS_FREEF( vips_buffer_unref, reg->buffer );
vips_buffer_undone( reg->buffer );
reg->invalid = FALSE; reg->invalid = FALSE;
if( !(reg->buffer = vips_buffer_new( im, &clipped )) ) if( !(reg->buffer = vips_buffer_new( im, &clipped )) )
return( -1 ); return( -1 );
} }
else { else {
/* Don't call vips_region_reset() ... we combine buffer unref /* We combine buffer unref and new buffer ref in one call
* and new buffer ref in one call to reduce malloc/free * to reduce malloc/free cycling.
* cycling.
*/ */
VIPS_FREEF( vips_window_unref, reg->window );
if( !(reg->buffer = if( !(reg->buffer =
vips_buffer_unref_ref( reg->buffer, im, &clipped )) ) vips_buffer_unref_ref( reg->buffer, im, &clipped )) )
return( -1 ); return( -1 );
@ -630,12 +650,13 @@ vips_region_image( VipsRegion *reg, VipsRect *r )
return( -1 ); return( -1 );
} }
VIPS_FREEF( vips_buffer_unref, reg->buffer );
VIPS_FREEF( vips_window_unref, reg->window );
reg->invalid = FALSE;
if( image->data ) { if( image->data ) {
/* We have the whole image available ... easy! /* We have the whole image available ... easy!
*/ */
if( reg->buffer )
vips_buffer_undone( reg->buffer );
reg->invalid = FALSE;
/* We can't just set valid = clipped, since this may be an /* We can't just set valid = clipped, since this may be an
* incompletely calculated memory buffer. Just set valid to r. * incompletely calculated memory buffer. Just set valid to r.
@ -653,9 +674,6 @@ vips_region_image( VipsRegion *reg, VipsRect *r )
reg->window->top > clipped.top || reg->window->top > clipped.top ||
reg->window->top + reg->window->height < reg->window->top + reg->window->height <
clipped.top + clipped.height ) { clipped.top + clipped.height ) {
reg->invalid = FALSE;
VIPS_FREEF( vips_window_unref, reg->window );
if( !(reg->window = vips_window_ref( image, if( !(reg->window = vips_window_ref( image,
clipped.top, clipped.height )) ) clipped.top, clipped.height )) )
return( -1 ); return( -1 );
@ -782,8 +800,8 @@ vips_region_region( VipsRegion *reg,
/* Init new stuff. /* Init new stuff.
*/ */
if( reg->buffer ) VIPS_FREEF( vips_buffer_unref, reg->buffer );
vips_buffer_undone( reg->buffer ); VIPS_FREEF( vips_window_unref, reg->window );
reg->invalid = FALSE; reg->invalid = FALSE;
reg->valid = final; reg->valid = final;
reg->bpl = dest->bpl; reg->bpl = dest->bpl;