From f8c2a36746fc8217a82fc05ebcf2cdb8f5fc9518 Mon Sep 17 00:00:00 2001 From: John Cupitt Date: Mon, 6 Sep 2021 23:14:36 +0100 Subject: [PATCH] arrayjoin signalling minimise also frees memory By making the sequential line cache non-persistent, and only minimising when the read point is well past the image. On large arrayjoin operations, this saves many GB of memory. See https://github.com/kleisauke/net-vips/issues/135 --- ChangeLog | 3 ++- libvips/conversion/arrayjoin.c | 32 ++++++++++++++++++-------------- libvips/conversion/sequential.c | 15 ++++++++++----- 3 files changed, 30 insertions(+), 20 deletions(-) diff --git a/ChangeLog b/ChangeLog index baac5a7c..9b808694 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,7 +4,8 @@ - fix thumbnail with small image plus crop plus no upsize [Andrewsville] - rename speed / reduction-effort / etc. params as "effort" - add gifsave [lovell] -- arrayjoin minimises inputs during processing +- arrayjoin minimises inputs during sequential processing, saving a lot of + memory and file descriptors 16/8/21 started 8.11.4 - fix off-by-one error in new rank fast path diff --git a/libvips/conversion/arrayjoin.c b/libvips/conversion/arrayjoin.c index 813decbb..a263394f 100644 --- a/libvips/conversion/arrayjoin.c +++ b/libvips/conversion/arrayjoin.c @@ -68,6 +68,7 @@ typedef struct _VipsArrayjoin { int down; VipsRect *rects; + gboolean *minimised; } VipsArrayjoin; @@ -119,23 +120,20 @@ vips_arrayjoin_gen( VipsRegion *or, void *seq, } if( vips_image_is_sequential( conversion->out ) ) { - /* In sequential mode, we can minimise an input once we've - * fetched the final line of pixels from it. + /* In sequential mode, we can minimise an input once our + * generate point is well past the end of it. This can save a + * lot of memory and file descriptors on large image arrays. * - * Find all inputs whose final line is inside this rect and - * shut them down. + * minimise_all is quite expensive, so only trigger once for + * each input. */ - for( i = 0; i < n; i++ ) { - VipsRect final_line = { - join->rects[i].left, - VIPS_RECT_BOTTOM( &join->rects[i] ) - 1, - join->rects[i].width, - 1 - }; - - if( vips_rect_includesrect( &final_line, r ) ) + for( i = 0; i < n; i++ ) + if( !join->minimised[i] && + r->top > VIPS_RECT_BOTTOM( &join->rects[i] ) + + 256 ) { vips_image_minimise_all( in[i] ); - } + join->minimised[i] = TRUE; + } } return( 0 ); @@ -247,6 +245,12 @@ vips_arrayjoin_build( VipsObject *object ) output_width - join->rects[i].left; } + /* A thing to track which inputs we've signalled minimise on. + */ + join->minimised = VIPS_ARRAY( join, n, gboolean ); + for( i = 0; i < n; i++ ) + join->minimised[i] = FALSE; + /* Each image must be cropped and aligned within an @hspacing by * @vspacing box. */ diff --git a/libvips/conversion/sequential.c b/libvips/conversion/sequential.c index 1823ba48..7328772b 100644 --- a/libvips/conversion/sequential.c +++ b/libvips/conversion/sequential.c @@ -23,6 +23,8 @@ * - re-enable skipahead now we have the single-thread-first-tile idea * 6/3/17 * - deprecate @trace, @access now seq is much simpler + * 6/9/21 + * - don't set "persistent", it can cause huge memory use */ /* @@ -192,14 +194,17 @@ vips_sequential_build( VipsObject *object ) if( VIPS_OBJECT_CLASS( vips_sequential_parent_class )->build( object ) ) return( -1 ); + /* We've gone forwards and backwards on sequential caches being + * persistent. Persistent caches can be useful if you want to eg. + * make several crop() operations on a seq image source, but they use + * a lot of memory with eg. arrayjoin. + * + * On balance, if you want to make many crops from one source, use a + * RANDOM image. + */ if( vips_linecache( sequential->in, &t, "tile_height", sequential->tile_height, "access", VIPS_ACCESS_SEQUENTIAL, - /* We need seq caches to persist across minimise in case - * someone is trying to read an image with a series of crop - * operations. - */ - "persistent", TRUE, NULL ) ) return( -1 );