arrayjoin signalling minimise also frees memory

By making the sequential line cache non-persistent, and only minimising
when the read point is well past the image.

On large arrayjoin operations, this saves many GB of memory.

See https://github.com/kleisauke/net-vips/issues/135
This commit is contained in:
John Cupitt 2021-09-06 23:14:36 +01:00
parent 579bdff012
commit f8c2a36746
3 changed files with 30 additions and 20 deletions

View File

@ -4,7 +4,8 @@
- fix thumbnail with small image plus crop plus no upsize [Andrewsville] - fix thumbnail with small image plus crop plus no upsize [Andrewsville]
- rename speed / reduction-effort / etc. params as "effort" - rename speed / reduction-effort / etc. params as "effort"
- add gifsave [lovell] - add gifsave [lovell]
- arrayjoin minimises inputs during processing - arrayjoin minimises inputs during sequential processing, saving a lot of
memory and file descriptors
16/8/21 started 8.11.4 16/8/21 started 8.11.4
- fix off-by-one error in new rank fast path - fix off-by-one error in new rank fast path

View File

@ -68,6 +68,7 @@ typedef struct _VipsArrayjoin {
int down; int down;
VipsRect *rects; VipsRect *rects;
gboolean *minimised;
} VipsArrayjoin; } VipsArrayjoin;
@ -119,22 +120,19 @@ vips_arrayjoin_gen( VipsRegion *or, void *seq,
} }
if( vips_image_is_sequential( conversion->out ) ) { if( vips_image_is_sequential( conversion->out ) ) {
/* In sequential mode, we can minimise an input once we've /* In sequential mode, we can minimise an input once our
* fetched the final line of pixels from it. * generate point is well past the end of it. This can save a
* lot of memory and file descriptors on large image arrays.
* *
* Find all inputs whose final line is inside this rect and * minimise_all is quite expensive, so only trigger once for
* shut them down. * each input.
*/ */
for( i = 0; i < n; i++ ) { for( i = 0; i < n; i++ )
VipsRect final_line = { if( !join->minimised[i] &&
join->rects[i].left, r->top > VIPS_RECT_BOTTOM( &join->rects[i] ) +
VIPS_RECT_BOTTOM( &join->rects[i] ) - 1, 256 ) {
join->rects[i].width,
1
};
if( vips_rect_includesrect( &final_line, r ) )
vips_image_minimise_all( in[i] ); vips_image_minimise_all( in[i] );
join->minimised[i] = TRUE;
} }
} }
@ -247,6 +245,12 @@ vips_arrayjoin_build( VipsObject *object )
output_width - join->rects[i].left; output_width - join->rects[i].left;
} }
/* A thing to track which inputs we've signalled minimise on.
*/
join->minimised = VIPS_ARRAY( join, n, gboolean );
for( i = 0; i < n; i++ )
join->minimised[i] = FALSE;
/* Each image must be cropped and aligned within an @hspacing by /* Each image must be cropped and aligned within an @hspacing by
* @vspacing box. * @vspacing box.
*/ */

View File

@ -23,6 +23,8 @@
* - re-enable skipahead now we have the single-thread-first-tile idea * - re-enable skipahead now we have the single-thread-first-tile idea
* 6/3/17 * 6/3/17
* - deprecate @trace, @access now seq is much simpler * - deprecate @trace, @access now seq is much simpler
* 6/9/21
* - don't set "persistent", it can cause huge memory use
*/ */
/* /*
@ -192,14 +194,17 @@ vips_sequential_build( VipsObject *object )
if( VIPS_OBJECT_CLASS( vips_sequential_parent_class )->build( object ) ) if( VIPS_OBJECT_CLASS( vips_sequential_parent_class )->build( object ) )
return( -1 ); return( -1 );
/* We've gone forwards and backwards on sequential caches being
* persistent. Persistent caches can be useful if you want to eg.
* make several crop() operations on a seq image source, but they use
* a lot of memory with eg. arrayjoin.
*
* On balance, if you want to make many crops from one source, use a
* RANDOM image.
*/
if( vips_linecache( sequential->in, &t, if( vips_linecache( sequential->in, &t,
"tile_height", sequential->tile_height, "tile_height", sequential->tile_height,
"access", VIPS_ACCESS_SEQUENTIAL, "access", VIPS_ACCESS_SEQUENTIAL,
/* We need seq caches to persist across minimise in case
* someone is trying to read an image with a series of crop
* operations.
*/
"persistent", TRUE,
NULL ) ) NULL ) )
return( -1 ); return( -1 );