memuse improvements

- conv is now SMALLTILE
- more instrumentation
- better buffer recycling
- quicker buf freeing
This commit is contained in:
John Cupitt 2013-12-17 15:21:21 +00:00
parent d09041a500
commit 0d79221853
11 changed files with 101 additions and 143 deletions

View File

@ -28,6 +28,7 @@
- auto-vectorization-friendly inner loops
- added vips::init() and vips::shutdown() to C++ API
- reuse pixel buffers on sharing to reduce mem cycling
- conv is SMALLTILE, huge mem use saving on wide images
20/11/13 started 7.36.5
- better cache sizing in unbuffered sequential mode

103
TODO
View File

@ -1,107 +1,8 @@
- memuse still seems high
$ vips sharpen wtc.jpg x.jpg --vips-concurrency=1
memory: high-water mark 97.82 MB
$ vips copy wtc.jpg x.v
memory: high-water mark 15.98 MB
$ vips sharpen x.v x2.v --vips-concurrency=1
memory: high-water mark 74.01 MB
$ vips sharpen wtc.jpg x2.jpg --vips-concurrency=1
memory: high-water mark 146.94 MB
and with 12 threads you get huge memuse
can we get this down at all?
baseline:
$ sharpen wtc.jpg x2.jpg
memory: high-water mark 330.14 MB
one thread:
$ vips sharpen wtc.jpg x2.jpg --vips-concurrency=1
vips_line_cache_build: max size = 21.2363 MB
memory: high-water mark 146.94 MB
so about 120mb without the input cache
turn off the buffer recycling system (ie. never put buffers on reserve, always
free them):
$ vips sharpen wtc.jpg x2.jpg --vips-concurrency=1
memory: high-water mark 97.82 MB
ie. buffer recycling costs about 50MB
base + 1 * per-thread == 97mb
base + 2 * per-thread == 161mb
base + 4 * per-thread == 286mb
63mb per thread?
base == 34
input cache == 21, so base is 12 without that
out buffer is 256 lines, two of them, each 9372 across, about 14mb, explains
rest of base cost
63mb per-thread compute cost still mysterious ... does not include input cache
or output buffer
$ vips sharpen x.v x2.v --vips-concurrency=1
vips__buffer_init: buffer reserve disabled
memory: high-water mark 74.01 MB
so 63mb per-thread computation cost, no input cache, output buffer, souds
about right
tiles are 9372 x 16, so 440kb, we must somehow have about 120 tiles in the
pipeline, is this really right?
load
sRGB2scRGB
scRGB2XYZ
XYZ2Lab
Lab2LabS
extract_band 0 extract_band 1, 2
embed
conv
embed
conv
bandjoin
save
13 operations, plus some extra copies and writes joining them up
have 48 regions alive at peak, that's with two large ones for the two output
buffers
now we recycle buffers, can we revert to more aggressive freeing of buffers?
put the buffer unrefs back:
$ vips sharpen x.v x2.v --vips-concurrency=1
vips__buffer_init: buffer reserve disabled
memory: high-water mark 62mb
2 regions are 7mb each (the output buffers), so that's 48mb in 46 regions,
still strangely high?
- vipsprofile needs a man page for Debian, I guess
- vipsprofile reports a leak, strangely
- new_heart.ws fails with libvips master
has the sharing fix resolved this?

View File

@ -60,8 +60,8 @@
/*
#define VIPS_DEBUG_RED
*/
#define VIPS_DEBUG
*/
#ifdef HAVE_CONFIG_H
#include <config.h>

View File

@ -1079,10 +1079,7 @@ im_conv_raw( IMAGE *in, IMAGE *out, INTMASK *mask )
#endif /*DEBUG*/
}
/* Set demand hints. FATSTRIP is good for us, as THINSTRIP will cause
* too many recalculations on overlaps.
*/
if( im_demand_hint( out, IM_FATSTRIP, in, NULL ) ||
if( im_demand_hint( out, IM_SMALLTILE, in, NULL ) ||
im_generate( out, conv_start, generate, conv_stop, in, conv ) )
return( -1 );

View File

@ -342,10 +342,7 @@ im_conv_f_raw( IMAGE *in, IMAGE *out, DOUBLEMASK *mask )
return( -1 );
}
/* Set demand hints. FATSTRIP is good for us, as THINSTRIP will cause
* too many recalculations on overlaps.
*/
if( im_demand_hint( out, IM_FATSTRIP, in, NULL ) )
if( im_demand_hint( out, IM_SMALLTILE, in, NULL ) )
return( -1 );
if( im_generate( out, conv_start, conv_gen, conv_stop, in, conv ) )

View File

@ -116,6 +116,7 @@ typedef struct _VipsBuffer {
size_t bsize; /* Size of private malloc() */
} VipsBuffer;
void vips_buffer_dump_all( void );
void vips_buffer_done( VipsBuffer *buffer );
void vips_buffer_undone( VipsBuffer *buffer );
void vips_buffer_unref( VipsBuffer *buffer );

View File

@ -120,6 +120,8 @@ int vips_region_prepare_to( VipsRegion *reg,
VipsRegion *dest, VipsRect *r, int x, int y );
int vips_region_prepare_many( VipsRegion **reg, VipsRect *r );
void vips_region_dump_all( void );
/* Macros on VipsRegion.
* VIPS_REGION_LSKIP() add to move down line
* VIPS_REGION_N_ELEMENTS() number of elements across region

View File

@ -48,6 +48,7 @@
/*
#define DEBUG_CREATE
#define DEBUG_VERBOSE
#define DEBUG
*/
@ -76,10 +77,43 @@ static int buffer_cache_n = 0;
/* The maximum numbers of buffers we hold in reserve per thread. About 5 seems
* enough to stop malloc cycling on vips_sharpen().
*/
static const int buffer_cache_max_reserve = 0;
static const int buffer_cache_max_reserve = 40;
static GPrivate *thread_buffer_cache_key = NULL;
#ifdef DEBUG
static void *
vips_buffer_dump( VipsBuffer *buffer, size_t *reserve, size_t *alive )
{
if( buffer->im &&
buffer->buf ) {
printf( "buffer %p, %gMB\n",
buffer, buffer->bsize / (1024 * 1024.0) );
*alive += buffer->bsize;
}
else if( !buffer->im )
*reserve += buffer->bsize;
else
printf( "buffer craziness!\n" );
return( NULL );
}
void
vips_buffer_dump_all( void )
{
size_t reserve;
size_t alive;
reserve = 0;
alive = 0;
vips_slist_map2( vips__buffers_all,
(VipsSListMap2Fn) vips_buffer_dump, &reserve, &alive );
printf( "%gMB alive\n", alive / (1024 * 1024.0) );
printf( "%gMB in reserve\n", reserve / (1024 * 1024.0) );
}
#endif /*DEBUG*/
static void
vips_buffer_free( VipsBuffer *buffer )
{
@ -92,7 +126,6 @@ vips_buffer_free( VipsBuffer *buffer )
g_assert( g_slist_find( vips__buffers_all, buffer ) );
vips__buffers_all = g_slist_remove( vips__buffers_all, buffer );
printf( "%d buffers in vips\n", g_slist_length( vips__buffers_all ) );
g_mutex_unlock( vips__global_lock );
#endif /*DEBUG*/
@ -208,11 +241,11 @@ vips_buffer_done( VipsBuffer *buffer )
VipsBufferCache *cache = buffer_cache_get();
VipsBufferCacheList *cache_list;
#ifdef DEBUG
#ifdef DEBUG_VERBOSE
printf( "vips_buffer_done: thread %p adding to cache %p\n",
g_thread_self(), cache );
vips_buffer_print( buffer );
#endif /*DEBUG*/
#endif /*DEBUG_VERBOSE*/
/* Look up and update the buffer list.
*/
@ -241,11 +274,11 @@ vips_buffer_undone( VipsBuffer *buffer )
VipsBufferCache *cache = buffer->cache;
VipsBufferCacheList *cache_list;
#ifdef DEBUG
#ifdef DEBUG_VERBOSE
printf( "vips_buffer_undone: thread %p removing "
"buffer %p from cache %p\n",
g_thread_self(), buffer, cache );
#endif /*DEBUG*/
#endif /*DEBUG_VERBOSE*/
g_assert( cache->thread == g_thread_self() );
@ -261,10 +294,10 @@ vips_buffer_undone( VipsBuffer *buffer )
buffer->cache = NULL;
#ifdef DEBUG
#ifdef DEBUG_VERBOSE
printf( "vips_buffer_undone: %d buffers left\n",
g_slist_length( cache_list->buffers ) );
#endif /*DEBUG*/
#endif /*DEBUG_VERBOSE*/
}
buffer->area.width = 0;
@ -274,13 +307,13 @@ vips_buffer_undone( VipsBuffer *buffer )
void
vips_buffer_unref( VipsBuffer *buffer )
{
#ifdef DEBUG
#ifdef DEBUG_VERBOSE
printf( "** vips_buffer_unref: left = %d, top = %d, "
"width = %d, height = %d (%p)\n",
buffer->area.left, buffer->area.top,
buffer->area.width, buffer->area.height,
buffer );
#endif /*DEBUG*/
#endif /*DEBUG_VERBOSE*/
g_assert( buffer->ref_count > 0 );
@ -289,10 +322,10 @@ vips_buffer_unref( VipsBuffer *buffer )
if( buffer->ref_count == 0 ) {
VipsBufferCache *cache = buffer_cache_get();
#ifdef DEBUG
#ifdef DEBUG_VERBOSE
if( !buffer->done )
printf( "vips_buffer_unref: buffer was not done\n" );
#endif /*DEBUG*/
#endif /*DEBUG_VERBOSE*/
vips_buffer_undone( buffer );
@ -368,20 +401,10 @@ vips_buffer_new( VipsImage *im, VipsRect *area )
buffer->buf = NULL;
buffer->bsize = 0;
#ifdef DEBUG
printf( "** vips_buffer_new: left = %d, top = %d, "
"width = %d, height = %d (%p)\n",
buffer->area.left, buffer->area.top,
buffer->area.width, buffer->area.height,
buffer );
#endif /*DEBUG*/
#ifdef DEBUG
g_mutex_lock( vips__global_lock );
vips__buffers_all =
g_slist_prepend( vips__buffers_all, buffer );
printf( "%d buffers in vips\n",
g_slist_length( vips__buffers_all ) );
g_mutex_unlock( vips__global_lock );
#endif /*DEBUG*/
}
@ -424,14 +447,14 @@ buffer_find( VipsImage *im, VipsRect *r )
area->top + area->height >= r->top + r->height ) {
buffer->ref_count += 1;
#ifdef DEBUG
#ifdef DEBUG_VERBOSE
printf( "vips_buffer_find: left = %d, top = %d, "
"width = %d, height = %d, count = %d (%p)\n",
buffer->area.left, buffer->area.top,
buffer->area.width, buffer->area.height,
buffer->ref_count,
buffer );
#endif /*DEBUG*/
#endif /*DEBUG_VERBOSE*/
break;
}

View File

@ -710,6 +710,10 @@ vips_image_eval_cb( VipsImage *image, VipsProgress *progress, int *last )
fflush( stdout );
*last = progress->percent;
/* Needs DEBUG in region.c
vips_region_dump_all();
*/
}
return( 0 );

View File

@ -396,9 +396,8 @@ vips_shutdown( void )
}
vips__thread_profile_detach();
vips__thread_profile_stop();
vips__buffer_shutdown();
vips__thread_profile_stop();
/* In dev releases, always show leaks. But not more than once, it's
* annoying.

View File

@ -75,8 +75,8 @@
#define DEBUG_ENVIRONMENT 1
#define DEBUG_CREATE
#define DEBUG
*/
#define VIPS_DEBUG
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
@ -186,8 +186,8 @@ enum {
G_DEFINE_TYPE( VipsRegion, vips_region, VIPS_TYPE_OBJECT );
#ifdef VIPS_DEBUG
static int vips_n_regions = 0;
#endif /*DEBUG*/
static GSList *vips__regions_all = NULL;
#endif /*VIPS_DEBUG*/
static void
vips_region_finalize( GObject *gobject )
@ -200,7 +200,7 @@ vips_region_finalize( GObject *gobject )
#ifdef VIPS_DEBUG
g_mutex_lock( vips__global_lock );
vips_n_regions -= 1;
vips__regions_all = g_slist_remove( vips__regions_all, gobject );
g_mutex_unlock( vips__global_lock );
#endif /*VIPS_DEBUG*/
@ -348,7 +348,8 @@ vips_region_summary( VipsObject *object, VipsBuf *buf )
vips_buf_appendf( buf, "height = %d", region->valid.height );
if( region->buffer && region->buffer->buf )
vips_buf_appendf( buf, ", bytes = %zd", region->buffer->bsize );
vips_buf_appendf( buf, ", %.3gMB",
region->buffer->bsize / (1024 * 1024.0) );
VIPS_OBJECT_CLASS( vips_region_parent_class )->summary( object, buf );
}
@ -491,8 +492,9 @@ vips_region_init( VipsRegion *region )
#ifdef VIPS_DEBUG
g_mutex_lock( vips__global_lock );
vips_n_regions += 1;
printf( "vips_region_init: %d regions in vips\n", vips_n_regions );
vips__regions_all = g_slist_prepend( vips__regions_all, region );
printf( "vips_region_init: %d regions in vips\n",
g_slist_length( vips__regions_all ) );
g_mutex_unlock( vips__global_lock );
#endif /*VIPS_DEBUG*/
}
@ -1372,3 +1374,34 @@ vips_region_prepare_many( VipsRegion **reg, VipsRect *r )
return( 0 );
}
#ifdef VIPS_DEBUG
static void *
vips_region_dump_all_cb( VipsRegion *region, size_t *alive )
{
char str[2048];
VipsBuf buf = VIPS_BUF_STATIC( str );
vips_object_summary( VIPS_OBJECT( region ), &buf );
printf( "%s\n", vips_buf_all( &buf ) );
if( region->buffer && region->buffer->buf )
*alive += region->buffer->bsize;
return( NULL );
}
void
vips_region_dump_all( void )
{
size_t alive;
g_mutex_lock( vips__global_lock );
alive = 0;
printf( "%d regions in vips\n", g_slist_length( vips__regions_all ) );
vips_slist_map2( vips__regions_all,
(VipsSListMap2Fn) vips_region_dump_all_cb, &alive, NULL );
printf( "%gMB alive\n", alive / (1024 * 1024.0) );
g_mutex_unlock( vips__global_lock );
}
#endif /*VIPS_DEBUG*/