From 0d792218534e237fcab47f13bc89e2f046cd90f8 Mon Sep 17 00:00:00 2001 From: John Cupitt Date: Tue, 17 Dec 2013 15:21:21 +0000 Subject: [PATCH] memuse improvements - conv is now SMALLTILE - more instrumentation - better buffer recycling - quicker buf freeing --- ChangeLog | 1 + TODO | 103 +------------------------------- libvips/conversion/tilecache.c | 2 +- libvips/convolution/im_conv.c | 5 +- libvips/convolution/im_conv_f.c | 5 +- libvips/include/vips/private.h | 1 + libvips/include/vips/region.h | 2 + libvips/iofuncs/buffer.c | 71 ++++++++++++++-------- libvips/iofuncs/image.c | 4 ++ libvips/iofuncs/init.c | 3 +- libvips/iofuncs/region.c | 47 ++++++++++++--- 11 files changed, 101 insertions(+), 143 deletions(-) diff --git a/ChangeLog b/ChangeLog index f8c2cd53..a521333a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -28,6 +28,7 @@ - auto-vectorization-friendly inner loops - added vips::init() and vips::shutdown() to C++ API - reuse pixel buffers on sharing to reduce mem cycling +- conv is SMALLTILE, huge mem use saving on wide images 20/11/13 started 7.36.5 - better cache sizing in unbuffered sequential mode diff --git a/TODO b/TODO index 4305225b..76edded6 100644 --- a/TODO +++ b/TODO @@ -1,107 +1,8 @@ -- memuse still seems high - - $ vips sharpen wtc.jpg x.jpg --vips-concurrency=1 - memory: high-water mark 97.82 MB - - -$ vips copy wtc.jpg x.v -memory: high-water mark 15.98 MB -$ vips sharpen x.v x2.v --vips-concurrency=1 -memory: high-water mark 74.01 MB -$ vips sharpen wtc.jpg x2.jpg --vips-concurrency=1 -memory: high-water mark 146.94 MB - - and with 12 threads you get huge memuse - - can we get this down at all? - - -baseline: - -$ sharpen wtc.jpg x2.jpg -memory: high-water mark 330.14 MB - -one thread: - -$ vips sharpen wtc.jpg x2.jpg --vips-concurrency=1 -vips_line_cache_build: max size = 21.2363 MB -memory: high-water mark 146.94 MB - -so about 120mb without the input cache - -turn off the buffer recycling system (ie. never put buffers on reserve, always -free them): - -$ vips sharpen wtc.jpg x2.jpg --vips-concurrency=1 -memory: high-water mark 97.82 MB - -ie. buffer recycling costs about 50MB - -base + 1 * per-thread == 97mb -base + 2 * per-thread == 161mb -base + 4 * per-thread == 286mb - -63mb per thread? - -base == 34 - -input cache == 21, so base is 12 without that - -out buffer is 256 lines, two of them, each 9372 across, about 14mb, explains -rest of base cost - -63mb per-thread compute cost still mysterious ... does not include input cache -or output buffer - -$ vips sharpen x.v x2.v --vips-concurrency=1 -vips__buffer_init: buffer reserve disabled -memory: high-water mark 74.01 MB - -so 63mb per-thread computation cost, no input cache, output buffer, souds -about right - -tiles are 9372 x 16, so 440kb, we must somehow have about 120 tiles in the -pipeline, is this really right? - - load - sRGB2scRGB - scRGB2XYZ - XYZ2Lab - Lab2LabS - extract_band 0 extract_band 1, 2 - embed - conv - embed - conv - bandjoin - save - -13 operations, plus some extra copies and writes joining them up - -have 48 regions alive at peak, that's with two large ones for the two output -buffers - -now we recycle buffers, can we revert to more aggressive freeing of buffers? - -put the buffer unrefs back: - - $ vips sharpen x.v x2.v --vips-concurrency=1 - vips__buffer_init: buffer reserve disabled - memory: high-water mark 62mb - -2 regions are 7mb each (the output buffers), so that's 48mb in 46 regions, -still strangely high? - - - - - - - - - vipsprofile needs a man page for Debian, I guess +- vipsprofile reports a leak, strangely + - new_heart.ws fails with libvips master has the sharing fix resolved this? diff --git a/libvips/conversion/tilecache.c b/libvips/conversion/tilecache.c index 2ddc0096..69c88487 100644 --- a/libvips/conversion/tilecache.c +++ b/libvips/conversion/tilecache.c @@ -60,8 +60,8 @@ /* #define VIPS_DEBUG_RED - */ #define VIPS_DEBUG + */ #ifdef HAVE_CONFIG_H #include diff --git a/libvips/convolution/im_conv.c b/libvips/convolution/im_conv.c index 0474a2c8..a148eef7 100644 --- a/libvips/convolution/im_conv.c +++ b/libvips/convolution/im_conv.c @@ -1079,10 +1079,7 @@ im_conv_raw( IMAGE *in, IMAGE *out, INTMASK *mask ) #endif /*DEBUG*/ } - /* Set demand hints. FATSTRIP is good for us, as THINSTRIP will cause - * too many recalculations on overlaps. - */ - if( im_demand_hint( out, IM_FATSTRIP, in, NULL ) || + if( im_demand_hint( out, IM_SMALLTILE, in, NULL ) || im_generate( out, conv_start, generate, conv_stop, in, conv ) ) return( -1 ); diff --git a/libvips/convolution/im_conv_f.c b/libvips/convolution/im_conv_f.c index d4fefad2..ecee8d03 100644 --- a/libvips/convolution/im_conv_f.c +++ b/libvips/convolution/im_conv_f.c @@ -342,10 +342,7 @@ im_conv_f_raw( IMAGE *in, IMAGE *out, DOUBLEMASK *mask ) return( -1 ); } - /* Set demand hints. FATSTRIP is good for us, as THINSTRIP will cause - * too many recalculations on overlaps. - */ - if( im_demand_hint( out, IM_FATSTRIP, in, NULL ) ) + if( im_demand_hint( out, IM_SMALLTILE, in, NULL ) ) return( -1 ); if( im_generate( out, conv_start, conv_gen, conv_stop, in, conv ) ) diff --git a/libvips/include/vips/private.h b/libvips/include/vips/private.h index 42b6de9d..b8ee9522 100644 --- a/libvips/include/vips/private.h +++ b/libvips/include/vips/private.h @@ -116,6 +116,7 @@ typedef struct _VipsBuffer { size_t bsize; /* Size of private malloc() */ } VipsBuffer; +void vips_buffer_dump_all( void ); void vips_buffer_done( VipsBuffer *buffer ); void vips_buffer_undone( VipsBuffer *buffer ); void vips_buffer_unref( VipsBuffer *buffer ); diff --git a/libvips/include/vips/region.h b/libvips/include/vips/region.h index 1888cf2e..a38bda33 100644 --- a/libvips/include/vips/region.h +++ b/libvips/include/vips/region.h @@ -120,6 +120,8 @@ int vips_region_prepare_to( VipsRegion *reg, VipsRegion *dest, VipsRect *r, int x, int y ); int vips_region_prepare_many( VipsRegion **reg, VipsRect *r ); +void vips_region_dump_all( void ); + /* Macros on VipsRegion. * VIPS_REGION_LSKIP() add to move down line * VIPS_REGION_N_ELEMENTS() number of elements across region diff --git a/libvips/iofuncs/buffer.c b/libvips/iofuncs/buffer.c index 2f55c986..8521802a 100644 --- a/libvips/iofuncs/buffer.c +++ b/libvips/iofuncs/buffer.c @@ -48,6 +48,7 @@ /* #define DEBUG_CREATE +#define DEBUG_VERBOSE #define DEBUG */ @@ -76,10 +77,43 @@ static int buffer_cache_n = 0; /* The maximum numbers of buffers we hold in reserve per thread. About 5 seems * enough to stop malloc cycling on vips_sharpen(). */ -static const int buffer_cache_max_reserve = 0; +static const int buffer_cache_max_reserve = 40; static GPrivate *thread_buffer_cache_key = NULL; +#ifdef DEBUG +static void * +vips_buffer_dump( VipsBuffer *buffer, size_t *reserve, size_t *alive ) +{ + if( buffer->im && + buffer->buf ) { + printf( "buffer %p, %gMB\n", + buffer, buffer->bsize / (1024 * 1024.0) ); + *alive += buffer->bsize; + } + else if( !buffer->im ) + *reserve += buffer->bsize; + else + printf( "buffer craziness!\n" ); + + return( NULL ); +} + +void +vips_buffer_dump_all( void ) +{ + size_t reserve; + size_t alive; + + reserve = 0; + alive = 0; + vips_slist_map2( vips__buffers_all, + (VipsSListMap2Fn) vips_buffer_dump, &reserve, &alive ); + printf( "%gMB alive\n", alive / (1024 * 1024.0) ); + printf( "%gMB in reserve\n", reserve / (1024 * 1024.0) ); +} +#endif /*DEBUG*/ + static void vips_buffer_free( VipsBuffer *buffer ) { @@ -92,7 +126,6 @@ vips_buffer_free( VipsBuffer *buffer ) g_assert( g_slist_find( vips__buffers_all, buffer ) ); vips__buffers_all = g_slist_remove( vips__buffers_all, buffer ); - printf( "%d buffers in vips\n", g_slist_length( vips__buffers_all ) ); g_mutex_unlock( vips__global_lock ); #endif /*DEBUG*/ @@ -208,11 +241,11 @@ vips_buffer_done( VipsBuffer *buffer ) VipsBufferCache *cache = buffer_cache_get(); VipsBufferCacheList *cache_list; -#ifdef DEBUG +#ifdef DEBUG_VERBOSE printf( "vips_buffer_done: thread %p adding to cache %p\n", g_thread_self(), cache ); vips_buffer_print( buffer ); -#endif /*DEBUG*/ +#endif /*DEBUG_VERBOSE*/ /* Look up and update the buffer list. */ @@ -241,11 +274,11 @@ vips_buffer_undone( VipsBuffer *buffer ) VipsBufferCache *cache = buffer->cache; VipsBufferCacheList *cache_list; -#ifdef DEBUG +#ifdef DEBUG_VERBOSE printf( "vips_buffer_undone: thread %p removing " "buffer %p from cache %p\n", g_thread_self(), buffer, cache ); -#endif /*DEBUG*/ +#endif /*DEBUG_VERBOSE*/ g_assert( cache->thread == g_thread_self() ); @@ -261,10 +294,10 @@ vips_buffer_undone( VipsBuffer *buffer ) buffer->cache = NULL; -#ifdef DEBUG +#ifdef DEBUG_VERBOSE printf( "vips_buffer_undone: %d buffers left\n", g_slist_length( cache_list->buffers ) ); -#endif /*DEBUG*/ +#endif /*DEBUG_VERBOSE*/ } buffer->area.width = 0; @@ -274,13 +307,13 @@ vips_buffer_undone( VipsBuffer *buffer ) void vips_buffer_unref( VipsBuffer *buffer ) { -#ifdef DEBUG +#ifdef DEBUG_VERBOSE printf( "** vips_buffer_unref: left = %d, top = %d, " "width = %d, height = %d (%p)\n", buffer->area.left, buffer->area.top, buffer->area.width, buffer->area.height, buffer ); -#endif /*DEBUG*/ +#endif /*DEBUG_VERBOSE*/ g_assert( buffer->ref_count > 0 ); @@ -289,10 +322,10 @@ vips_buffer_unref( VipsBuffer *buffer ) if( buffer->ref_count == 0 ) { VipsBufferCache *cache = buffer_cache_get(); -#ifdef DEBUG +#ifdef DEBUG_VERBOSE if( !buffer->done ) printf( "vips_buffer_unref: buffer was not done\n" ); -#endif /*DEBUG*/ +#endif /*DEBUG_VERBOSE*/ vips_buffer_undone( buffer ); @@ -368,20 +401,10 @@ vips_buffer_new( VipsImage *im, VipsRect *area ) buffer->buf = NULL; buffer->bsize = 0; -#ifdef DEBUG - printf( "** vips_buffer_new: left = %d, top = %d, " - "width = %d, height = %d (%p)\n", - buffer->area.left, buffer->area.top, - buffer->area.width, buffer->area.height, - buffer ); -#endif /*DEBUG*/ - #ifdef DEBUG g_mutex_lock( vips__global_lock ); vips__buffers_all = g_slist_prepend( vips__buffers_all, buffer ); - printf( "%d buffers in vips\n", - g_slist_length( vips__buffers_all ) ); g_mutex_unlock( vips__global_lock ); #endif /*DEBUG*/ } @@ -424,14 +447,14 @@ buffer_find( VipsImage *im, VipsRect *r ) area->top + area->height >= r->top + r->height ) { buffer->ref_count += 1; -#ifdef DEBUG +#ifdef DEBUG_VERBOSE printf( "vips_buffer_find: left = %d, top = %d, " "width = %d, height = %d, count = %d (%p)\n", buffer->area.left, buffer->area.top, buffer->area.width, buffer->area.height, buffer->ref_count, buffer ); -#endif /*DEBUG*/ +#endif /*DEBUG_VERBOSE*/ break; } diff --git a/libvips/iofuncs/image.c b/libvips/iofuncs/image.c index d6c78822..499f6f45 100644 --- a/libvips/iofuncs/image.c +++ b/libvips/iofuncs/image.c @@ -710,6 +710,10 @@ vips_image_eval_cb( VipsImage *image, VipsProgress *progress, int *last ) fflush( stdout ); *last = progress->percent; + + /* Needs DEBUG in region.c + vips_region_dump_all(); + */ } return( 0 ); diff --git a/libvips/iofuncs/init.c b/libvips/iofuncs/init.c index b27f391d..7de07213 100644 --- a/libvips/iofuncs/init.c +++ b/libvips/iofuncs/init.c @@ -396,9 +396,8 @@ vips_shutdown( void ) } vips__thread_profile_detach(); - vips__thread_profile_stop(); - vips__buffer_shutdown(); + vips__thread_profile_stop(); /* In dev releases, always show leaks. But not more than once, it's * annoying. diff --git a/libvips/iofuncs/region.c b/libvips/iofuncs/region.c index d53eff3c..1cf20a24 100644 --- a/libvips/iofuncs/region.c +++ b/libvips/iofuncs/region.c @@ -75,8 +75,8 @@ #define DEBUG_ENVIRONMENT 1 #define DEBUG_CREATE #define DEBUG - */ #define VIPS_DEBUG + */ #ifdef HAVE_CONFIG_H #include @@ -186,8 +186,8 @@ enum { G_DEFINE_TYPE( VipsRegion, vips_region, VIPS_TYPE_OBJECT ); #ifdef VIPS_DEBUG -static int vips_n_regions = 0; -#endif /*DEBUG*/ +static GSList *vips__regions_all = NULL; +#endif /*VIPS_DEBUG*/ static void vips_region_finalize( GObject *gobject ) @@ -200,7 +200,7 @@ vips_region_finalize( GObject *gobject ) #ifdef VIPS_DEBUG g_mutex_lock( vips__global_lock ); - vips_n_regions -= 1; + vips__regions_all = g_slist_remove( vips__regions_all, gobject ); g_mutex_unlock( vips__global_lock ); #endif /*VIPS_DEBUG*/ @@ -348,7 +348,8 @@ vips_region_summary( VipsObject *object, VipsBuf *buf ) vips_buf_appendf( buf, "height = %d", region->valid.height ); if( region->buffer && region->buffer->buf ) - vips_buf_appendf( buf, ", bytes = %zd", region->buffer->bsize ); + vips_buf_appendf( buf, ", %.3gMB", + region->buffer->bsize / (1024 * 1024.0) ); VIPS_OBJECT_CLASS( vips_region_parent_class )->summary( object, buf ); } @@ -491,8 +492,9 @@ vips_region_init( VipsRegion *region ) #ifdef VIPS_DEBUG g_mutex_lock( vips__global_lock ); - vips_n_regions += 1; - printf( "vips_region_init: %d regions in vips\n", vips_n_regions ); + vips__regions_all = g_slist_prepend( vips__regions_all, region ); + printf( "vips_region_init: %d regions in vips\n", + g_slist_length( vips__regions_all ) ); g_mutex_unlock( vips__global_lock ); #endif /*VIPS_DEBUG*/ } @@ -1372,3 +1374,34 @@ vips_region_prepare_many( VipsRegion **reg, VipsRect *r ) return( 0 ); } + +#ifdef VIPS_DEBUG +static void * +vips_region_dump_all_cb( VipsRegion *region, size_t *alive ) +{ + char str[2048]; + VipsBuf buf = VIPS_BUF_STATIC( str ); + + vips_object_summary( VIPS_OBJECT( region ), &buf ); + printf( "%s\n", vips_buf_all( &buf ) ); + + if( region->buffer && region->buffer->buf ) + *alive += region->buffer->bsize; + + return( NULL ); +} + +void +vips_region_dump_all( void ) +{ + size_t alive; + + g_mutex_lock( vips__global_lock ); + alive = 0; + printf( "%d regions in vips\n", g_slist_length( vips__regions_all ) ); + vips_slist_map2( vips__regions_all, + (VipsSListMap2Fn) vips_region_dump_all_cb, &alive, NULL ); + printf( "%gMB alive\n", alive / (1024 * 1024.0) ); + g_mutex_unlock( vips__global_lock ); +} +#endif /*VIPS_DEBUG*/