diff --git a/TODO b/TODO index f8813b1e..8f92ee04 100644 --- a/TODO +++ b/TODO @@ -2,14 +2,6 @@ - need tests for reducel3, test every kernel plues every numeric type -- removed the cache from resize, can we get out-of-order reads? - - yup, try - - for i in 200 300 400 500 600; do time vipsthumbnail Chicago.png -s $i; done - - bench on ws as well - - try orc version of reducev? - still see rounding problems on resize, try ghibli3 diff --git a/libvips/resample/resize.c b/libvips/resample/resize.c index 620b52fb..f7ae4fca 100644 --- a/libvips/resample/resize.c +++ b/libvips/resample/resize.c @@ -143,6 +143,56 @@ vips_resize_build( VipsObject *object ) else vresidual = hresidual; + /* We want to make sure we read the image sequentially. + * However, the convolution we may be doing later will force us + * into SMALLTILE or maybe FATSTRIP mode and that will break + * sequentiality. + * + * So ... read into a cache where tiles are scanlines, and make sure + * we keep enough scanlines. + * + * We use a threaded tilecache to avoid a deadlock: suppose thread1, + * evaluating the top block of the output, is delayed, and thread2, + * evaluating the second block, gets here first (this can happen on + * a heavily-loaded system). + * + * With an unthreaded tilecache, thread2 will get + * the cache lock and start evaling the second block of the shrink. + * When it reaches the png reader it will stall until the first block + * has been used ... but it never will, since thread1 will block on + * this cache lock. + * + * Cache sizing: we double-buffer writes, so threads can be up to one + * line of tiles behind. For example, one thread could be allocated + * tile (0,0) and then stall, the whole write system won't stall until + * it tries to allocate tile (0, 2). + * + * We reduce down after this, which can be a scale of up to @residual, + * perhaps 0.5 or down as low as 0.3 depending on the interpolator. So + * the number of scanlines we need to keep for the worst case is + * 2 * @tile_height / @residual, plus a little extra. + */ + if( int_vshrink > 1 ) { + int tile_width; + int tile_height; + int n_lines; + + int need_lines; + + vips_get_tile_size( in, + &tile_width, &tile_height, &n_lines ); + need_lines = 1.2 * n_lines / vresidual; + if( vips_tilecache( in, &t[6], + "tile_width", in->Xsize, + "tile_height", 10, + "max_tiles", 1 + need_lines / 10, + "access", VIPS_ACCESS_SEQUENTIAL, + "threaded", TRUE, + NULL ) ) + return( -1 ); + in = t[6]; + } + /* If the final affine will be doing a large downsample, we can get * nasty aliasing on hard edges. Blur before affine to smooth this out. *