smarter cache sizing for vips_resize

see comment in code, should now account for max delay possible
2015-11-11 10:23:50 +00:00 · 2015-11-11 10:23:50 +00:00 · d94a2807c9
commit d94a2807c9
parent 66fe338d36
4 changed files with 25 additions and 5 deletions
--- a/1
+++ b/1
@ -8,6 +8,7 @@
 - add memory.h to Python API .. makes tracked highwater visible
 - added bandjoin_const to add constant bands to an image
 - better alpha handling for tiff write, thanks sadaqatullahn
+- better cache sizing for vips_resize()

 7/5/15 started 8.1.1
 - oop, vips-8.0 wrapper script should be vips-8.1, thanks Danilo
--- a/2
+++ b/2
@ -11,6 +11,8 @@

  trying new shrinker

+  also try resize change
+
 - colour needs to split _build() into preprocess / process / postprocess
  phases

--- a/libvips/foreign/vips2tiff.c
+++ b/libvips/foreign/vips2tiff.c
@ -621,7 +621,8 @@ write_tiff_header( Write *write, Layer *layer )
 			}
 		}

-		alpha_bands = write->im->Bands - colour_bands;
+		alpha_bands = VIPS_CLIP( 0, 
+			write->im->Bands - colour_bands, MAX_ALPHA );
 		if( alpha_bands > 0 ) { 
 			uint16 v[MAX_ALPHA];
 			int i;
--- a/libvips/resample/resize.c
+++ b/libvips/resample/resize.c
@ -4,6 +4,8 @@
 * 	- from affine.c
 * 18/11/14
 * 	- add the fancier algorithm from vipsthumbnail
+ * 11/11/15
+ * 	- smarter cache sizing
 */

 /*
@ -137,30 +139,44 @@ vips_resize_build( VipsObject *object )
 	 * sequentiality.
 	 *
 	 * So ... read into a cache where tiles are scanlines, and make sure
-	 * we keep enough scanlines to be able to serve a line of tiles.
+	 * we keep enough scanlines.
 	 *
 	 * We use a threaded tilecache to avoid a deadlock: suppose thread1,
 	 * evaluating the top block of the output, is delayed, and thread2, 
 	 * evaluating the second block, gets here first (this can happen on 
 	 * a heavily-loaded system). 
 	 *
-	 * With an unthreaded tilecache (as we had before), thread2 will get
+	 * With an unthreaded tilecache, thread2 will get
 	 * the cache lock and start evaling the second block of the shrink. 
 	 * When it reaches the png reader it will stall until the first block 
 	 * has been used ... but it never will, since thread1 will block on 
 	 * this cache lock. 
+	 *
+	 * Cache sizing: we double-buffer writes, so threads can be up to one 
+	 * line of tiles behind. For example, one thread could be allocated
+	 * tile (0,0) and then stall, the whole write system won't stall until
+	 * it tries to allocate tile (0, 2).
+	 *
+	 * We affine down after this, which can be a scale of up to @residual, 
+	 * perhaps 0.5 or down as low as 0.3 depending on the interpolator. So
+	 * the number of scanlines we need to keep for the worst case is
+	 * 2 * @tile_height / @residual, plus a little extra.
+	 *
 	 */
 	if( int_shrink > 1 ) { 
 		int tile_width;
 		int tile_height;
 		int nlines;

+		int need_lines;
+
 		vips_get_tile_size( in, 
 			&tile_width, &tile_height, &nlines );
+		need_lines = 2.5 * tile_height / residual;
 		if( vips_tilecache( in, &t[6], 
 			"tile_width", in->Xsize,
 			"tile_height", 10,
-			"max_tiles", 1 + (nlines * 2) / 10,
+			"max_tiles", 1 + need_lines / 10,
 			"access", VIPS_ACCESS_SEQUENTIAL,
 			"threaded", TRUE, 
 			NULL ) )