From a9fd31871258b5faa164add2255412713d390208 Mon Sep 17 00:00:00 2001 From: John Cupitt Date: Mon, 16 Dec 2013 09:22:05 +0000 Subject: [PATCH] run over memuse for sharpen --- TODO | 80 ++++++++++++++++++++++++++++++++++ libvips/conversion/tilecache.c | 46 +++++++++++-------- libvips/iofuncs/buffer.c | 10 ++++- libvips/iofuncs/region.c | 46 +++++++++++++------ 4 files changed, 150 insertions(+), 32 deletions(-) diff --git a/TODO b/TODO index a8fe786a..96b25422 100644 --- a/TODO +++ b/TODO @@ -3,11 +3,91 @@ $ vips sharpen wtc.jpg x.jpg --vips-concurrency=1 memory: high-water mark 97.82 MB + +$ vips copy wtc.jpg x.v +memory: high-water mark 15.98 MB +$ vips sharpen x.v x2.v --vips-concurrency=1 +memory: high-water mark 74.01 MB +$ vips sharpen wtc.jpg x2.jpg --vips-concurrency=1 +memory: high-water mark 146.94 MB + and with 12 threads you get huge memuse can we get this down at all? +baseline: + +$ sharpen wtc.jpg x2.jpg +memory: high-water mark 330.14 MB + +one thread: + +$ vips sharpen wtc.jpg x2.jpg --vips-concurrency=1 +vips_line_cache_build: max size = 21.2363 MB +memory: high-water mark 146.94 MB + +so about 120mb without the input cache + +turn off the buffer recycling system (ie. never put buffers on reserve, always +free them): + +$ vips sharpen wtc.jpg x2.jpg --vips-concurrency=1 +memory: high-water mark 97.82 MB + +ie. buffer recycling costs about 50MB + +base + 1 * per-thread == 97mb +base + 2 * per-thread == 161mb +base + 4 * per-thread == 286mb + +63mb per thread? + +base == 34 + +input cache == 21, so base is 12 without that + +out buffer is 256 lines, two of them, each 9372 across, about 14mb, explains +rest of base cost + +63mb per-thread compute cost still mysterious ... does not include input cache +or output buffer + +$ vips sharpen x.v x2.v --vips-concurrency=1 +vips__buffer_init: buffer reserve disabled +memory: high-water mark 74.01 MB + +so 63mb per-thread computation cost, no input cache, output buffer, souds +about right + +tiles are 9372 x 16, so 440kb, we must somehow have about 120 tiles in the +pipeline, is this really right? + + load + sRGB2scRGB + scRGB2XYZ + XYZ2Lab + Lab2LabS + extract_band 0 extract_band 1, 2 + embed + conv + embed + conv + bandjoin + save + +13 operations, plus some extra copies and writes joining them up + +have 48 regions alive at peak, that's with two large ones for the two output +buffers + +now we recycle buffers, can we revert to more aggressive freeing of buffers? + + + + + + diff --git a/libvips/conversion/tilecache.c b/libvips/conversion/tilecache.c index b3bb28ac..2ddc0096 100644 --- a/libvips/conversion/tilecache.c +++ b/libvips/conversion/tilecache.c @@ -59,8 +59,9 @@ */ /* -#define VIPS_DEBUG +#define VIPS_DEBUG_RED */ +#define VIPS_DEBUG #ifdef HAVE_CONFIG_H #include @@ -295,8 +296,8 @@ vips_tile_find( VipsBlockCache *cache, int x, int y ) /* In cache already? */ if( (tile = vips_tile_search( cache, x, y )) ) { - VIPS_DEBUG_MSG( "vips_tile_find: tile %d x %d in cache\n", - x, y ); + VIPS_DEBUG_MSG_RED( "vips_tile_find: " + "tile %d x %d in cache\n", x, y ); return( tile ); } @@ -304,8 +305,8 @@ vips_tile_find( VipsBlockCache *cache, int x, int y ) */ if( cache->max_tiles == -1 || cache->ntiles < cache->max_tiles ) { - VIPS_DEBUG_MSG( "vips_tile_find: making new tile at %d x %d\n", - x, y ); + VIPS_DEBUG_MSG_RED( "vips_tile_find: " + "making new tile at %d x %d\n", x, y ); if( !(tile = vips_tile_new( cache, x, y )) ) return( NULL ); @@ -330,7 +331,7 @@ vips_tile_find( VipsBlockCache *cache, int x, int y ) return( tile ); } - VIPS_DEBUG_MSG( "tilecache: reusing tile %d x %d\n", + VIPS_DEBUG_MSG_RED( "vips_tile_find: reusing tile %d x %d\n", tile->pos.left, tile->pos.top ); if( vips_tile_move( tile, x, y ) ) @@ -366,12 +367,16 @@ vips_block_cache_build( VipsObject *object ) VipsConversion *conversion = VIPS_CONVERSION( object ); VipsBlockCache *cache = (VipsBlockCache *) object; - VIPS_DEBUG_MSG( "vips_block_cache_build\n" ); + VIPS_DEBUG_MSG( "vips_block_cache_build:\n" ); if( VIPS_OBJECT_CLASS( vips_block_cache_parent_class )-> build( object ) ) return( -1 ); + VIPS_DEBUG_MSG( "vips_block_cache_build: max size = %g MB\n", + (cache->max_tiles * cache->tile_width * cache->tile_height * + VIPS_IMAGE_SIZEOF_PEL( cache->in )) / (1024 * 1024.0) ); + if( !cache->persistent ) g_signal_connect( conversion->out, "minimise", G_CALLBACK( vips_block_cache_minimise ), cache ); @@ -459,7 +464,7 @@ vips_tile_destroy( VipsTile *tile ) { VipsBlockCache *cache = tile->cache; - VIPS_DEBUG_MSG( "vips_tile_destroy: tile %d, %d (%p)\n", + VIPS_DEBUG_MSG_RED( "vips_tile_destroy: tile %d, %d (%p)\n", tile->pos.left, tile->pos.top, tile ); cache->ntiles -= 1; @@ -564,7 +569,7 @@ vips_tile_cache_ref( VipsBlockCache *cache, VipsRect *r ) */ work = g_slist_append( work, tile ); - VIPS_DEBUG_MSG( "vips_tile_cache_ref: " + VIPS_DEBUG_MSG_RED( "vips_tile_cache_ref: " "tile %d, %d (%p)\n", x, y, tile ); } @@ -604,7 +609,7 @@ vips_tile_cache_gen( VipsRegion *or, VIPS_GATE_STOP( "vips_tile_cache_gen: wait1" ); - VIPS_DEBUG_MSG( "vips_tile_cache_gen: " + VIPS_DEBUG_MSG_RED( "vips_tile_cache_gen: " "left = %d, top = %d, width = %d, height = %d\n", r->left, r->top, r->width, r->height ); @@ -626,8 +631,8 @@ vips_tile_cache_gen( VipsRegion *or, if( !p ) break; - VIPS_DEBUG_MSG( "vips_tile_cache_gen: pasting %p\n", - tile ); + VIPS_DEBUG_MSG_RED( "vips_tile_cache_gen: " + "pasting %p\n", tile ); vips_tile_paste( tile, or ); @@ -650,7 +655,7 @@ vips_tile_cache_gen( VipsRegion *or, tile->state = VIPS_TILE_STATE_CALC; - VIPS_DEBUG_MSG( "vips_tile_cache_gen: " + VIPS_DEBUG_MSG_RED( "vips_tile_cache_gen: " "calc of %p\n", tile ); /* In threaded mode, we let other threads run @@ -667,12 +672,12 @@ vips_tile_cache_gen( VipsRegion *or, if( cache->threaded ) { VIPS_GATE_START( "vips_tile_cache_gen: " - "wait2" ); + "wait2" ); g_mutex_lock( cache->lock ); VIPS_GATE_STOP( "vips_tile_cache_gen: " - "wait2" ); + "wait2" ); } /* If there was an error calculating this @@ -683,7 +688,8 @@ vips_tile_cache_gen( VipsRegion *or, * read to fail because of one broken tile. */ if( result ) { - VIPS_DEBUG_MSG( "vips_tile_cache_gen: " + VIPS_DEBUG_MSG_RED( + "vips_tile_cache_gen: " "error on tile %p\n", tile ); vips_warn( class->nickname, @@ -722,7 +728,7 @@ vips_tile_cache_gen( VipsRegion *or, g_assert( tile->state == VIPS_TILE_STATE_CALC ); } - VIPS_DEBUG_MSG( "vips_tile_cache_gen: waiting\n" ); + VIPS_DEBUG_MSG_RED( "vips_tile_cache_gen: waiting\n" ); VIPS_GATE_START( "vips_tile_cache_gen: wait3" ); @@ -946,6 +952,12 @@ vips_line_cache_build( VipsObject *object ) "max_tiles = %d, tile_height = %d\n", block_cache->max_tiles, block_cache->tile_height ); + VIPS_DEBUG_MSG( "vips_line_cache_build: max size = %g MB\n", + (block_cache->max_tiles * + block_cache->tile_width * + block_cache->tile_height * + VIPS_IMAGE_SIZEOF_PEL( block_cache->in )) / (1024 * 1024.0) ); + if( vips_image_pio_input( block_cache->in ) ) return( -1 ); diff --git a/libvips/iofuncs/buffer.c b/libvips/iofuncs/buffer.c index 93b7fc90..2f55c986 100644 --- a/libvips/iofuncs/buffer.c +++ b/libvips/iofuncs/buffer.c @@ -73,6 +73,11 @@ static GSList *vips__buffers_all = NULL; static int buffer_cache_n = 0; #endif /*DEBUG_CREATE*/ +/* The maximum numbers of buffers we hold in reserve per thread. About 5 seems + * enough to stop malloc cycling on vips_sharpen(). + */ +static const int buffer_cache_max_reserve = 0; + static GPrivate *thread_buffer_cache_key = NULL; static void @@ -293,7 +298,7 @@ vips_buffer_unref( VipsBuffer *buffer ) /* Place on this thread's reserve list for reuse. */ - if( cache->n_reserve < 5 ) { + if( cache->n_reserve < buffer_cache_max_reserve ) { cache->reserve = g_slist_prepend( cache->reserve, buffer ); cache->n_reserve += 1; @@ -528,6 +533,9 @@ vips__buffer_init( void ) thread_buffer_cache_key = g_private_new( (GDestroyNotify) buffer_cache_free ); #endif + + if( buffer_cache_max_reserve < 1 ) + printf( "vips__buffer_init: buffer reserve disabled\n" ); } void diff --git a/libvips/iofuncs/region.c b/libvips/iofuncs/region.c index 77598091..d53eff3c 100644 --- a/libvips/iofuncs/region.c +++ b/libvips/iofuncs/region.c @@ -76,6 +76,7 @@ #define DEBUG_CREATE #define DEBUG */ +#define VIPS_DEBUG #ifdef HAVE_CONFIG_H #include @@ -184,12 +185,23 @@ enum { G_DEFINE_TYPE( VipsRegion, vips_region, VIPS_TYPE_OBJECT ); +#ifdef VIPS_DEBUG +static int vips_n_regions = 0; +#endif /*DEBUG*/ + static void vips_region_finalize( GObject *gobject ) { #ifdef VIPS_DEBUG VIPS_DEBUG_MSG( "vips_region_finalize: " ); vips_object_print_name( VIPS_OBJECT( gobject ) ); + VIPS_DEBUG_MSG( "\n" ); +#endif /*VIPS_DEBUG*/ + +#ifdef VIPS_DEBUG + g_mutex_lock( vips__global_lock ); + vips_n_regions -= 1; + g_mutex_unlock( vips__global_lock ); #endif /*VIPS_DEBUG*/ G_OBJECT_CLASS( vips_region_parent_class )->finalize( gobject ); @@ -267,6 +279,7 @@ vips_region_dispose( GObject *gobject ) #ifdef VIPS_DEBUG VIPS_DEBUG_MSG( "vips_region_dispose: " ); vips_object_print_name( VIPS_OBJECT( gobject ) ); + VIPS_DEBUG_MSG( "\n" ); #endif /*VIPS_DEBUG*/ vips_object_preclose( VIPS_OBJECT( gobject ) ); @@ -475,6 +488,13 @@ static void vips_region_init( VipsRegion *region ) { region->type = VIPS_REGION_NONE; + +#ifdef VIPS_DEBUG + g_mutex_lock( vips__global_lock ); + vips_n_regions += 1; + printf( "vips_region_init: %d regions in vips\n", vips_n_regions ); + g_mutex_unlock( vips__global_lock ); +#endif /*VIPS_DEBUG*/ } /** @@ -558,24 +578,24 @@ vips_region_buffer( VipsRegion *reg, VipsRect *r ) return( -1 ); } + VIPS_FREEF( vips_window_unref, reg->window ); + /* Have we been asked to drop caches? We want to throw everything * away. * * If not, try to reuse the current buffer. */ if( reg->invalid ) { - if( reg->buffer ) - vips_buffer_undone( reg->buffer ); + VIPS_FREEF( vips_buffer_unref, reg->buffer ); reg->invalid = FALSE; + if( !(reg->buffer = vips_buffer_new( im, &clipped )) ) return( -1 ); } else { - /* Don't call vips_region_reset() ... we combine buffer unref - * and new buffer ref in one call to reduce malloc/free - * cycling. + /* We combine buffer unref and new buffer ref in one call + * to reduce malloc/free cycling. */ - VIPS_FREEF( vips_window_unref, reg->window ); if( !(reg->buffer = vips_buffer_unref_ref( reg->buffer, im, &clipped )) ) return( -1 ); @@ -630,12 +650,13 @@ vips_region_image( VipsRegion *reg, VipsRect *r ) return( -1 ); } + VIPS_FREEF( vips_buffer_unref, reg->buffer ); + VIPS_FREEF( vips_window_unref, reg->window ); + reg->invalid = FALSE; + if( image->data ) { /* We have the whole image available ... easy! */ - if( reg->buffer ) - vips_buffer_undone( reg->buffer ); - reg->invalid = FALSE; /* We can't just set valid = clipped, since this may be an * incompletely calculated memory buffer. Just set valid to r. @@ -653,9 +674,6 @@ vips_region_image( VipsRegion *reg, VipsRect *r ) reg->window->top > clipped.top || reg->window->top + reg->window->height < clipped.top + clipped.height ) { - reg->invalid = FALSE; - - VIPS_FREEF( vips_window_unref, reg->window ); if( !(reg->window = vips_window_ref( image, clipped.top, clipped.height )) ) return( -1 ); @@ -782,8 +800,8 @@ vips_region_region( VipsRegion *reg, /* Init new stuff. */ - if( reg->buffer ) - vips_buffer_undone( reg->buffer ); + VIPS_FREEF( vips_buffer_unref, reg->buffer ); + VIPS_FREEF( vips_window_unref, reg->window ); reg->invalid = FALSE; reg->valid = final; reg->bpl = dest->bpl;