From 6e5b44ce1365c764212f957825a218789c7bb6c0 Mon Sep 17 00:00:00 2001
From: John Cupitt <jcupitt@gmail.com>
Date: Fri, 3 Mar 2017 14:34:22 +0000
Subject: [PATCH] try setting a seq meta

and only caching in shrinkv if we see the tag
---
 TODO                            | 18 ++++++++++++++-
 libvips/conversion/sequential.c |  1 +
 libvips/include/vips/header.h   |  9 ++++++++
 libvips/resample/resize.c       | 39 ---------------------------------
 libvips/resample/shrinkh.c      |  4 ----
 libvips/resample/shrinkv.c      | 36 +++++++++++++++++++++++++++++-
 libvips/resample/thumbnail.c    |  7 +++++-
 7 files changed, 68 insertions(+), 46 deletions(-)

diff --git a/TODO b/TODO
index 035d083c..9b223089 100644
--- a/TODO
+++ b/TODO
@@ -11,7 +11,8 @@
   another alternative
 
   	- add this caching to thumbnail.c
-	- remove the cache from resize
+	- maybe remove the cache from resize? does sharp need it? nope, safe to
+	  remove
 	- don't use vips_resize(), expand code out
 	- put the cache just after shrinkv
 	- now there's no need to thread, since thumbnail is (mostly) just 
@@ -19,6 +20,21 @@
 	- and we leave shink and resize nice and simple, though they will no
 	  longer work on seq files
 
+	- should linecache ask for inbetween lines in unthreaded seq mode?
+	- could allow many readers, but only one writer active at once? perhaps
+	  that's what we have?
+
+	- sharp makes quite a few assumptions about cache size, it'd be easy to
+	  break
+
+	- loaders could add a "seq-mode" meta tag, shrinkv could single-thread
+	  if it sees the tag
+	- the seq tag could just be added by vips_sequential
+	- good idea! keeps compat with everything, keeps non-seq vips siomple
+	  and quick
+
+  do we need a seq cache for reducev as well? we could have up to a 3x
+  reduction there
 
 
 
diff --git a/libvips/conversion/sequential.c b/libvips/conversion/sequential.c
index d5a9a83c..a63d0658 100644
--- a/libvips/conversion/sequential.c
+++ b/libvips/conversion/sequential.c
@@ -203,6 +203,7 @@ vips_sequential_build( VipsObject *object )
 	if( vips_image_pipelinev( conversion->out, 
 		VIPS_DEMAND_STYLE_THINSTRIP, t, NULL ) )
 		return( -1 );
+	vips_image_set_int( conversion->out, VIPS_META_SEQUENTIAL, 1 ); 
 	if( vips_image_generate( conversion->out,
 		vips_start_one, vips_sequential_generate, vips_stop_one, 
 		t, sequential ) )
diff --git a/libvips/include/vips/header.h b/libvips/include/vips/header.h
index 1ae81011..e9cf840b 100644
--- a/libvips/include/vips/header.h
+++ b/libvips/include/vips/header.h
@@ -108,6 +108,15 @@ extern "C" {
  */
 #define VIPS_META_LOADER "vips-loader"
 
+/**
+ * VIPS_META_SEQUENTIAL:
+ *
+ * Images loaded via vips_sequential() have this int field defined. Some
+ * operations (eg. vips_shrinkv()) add extra caches if they see it on their
+ * input. 
+ */
+#define VIPS_META_SEQUENTIAL "vips-sequential"
+
 /**
  * VIPS_META_ORIENTATION:
  *
diff --git a/libvips/resample/resize.c b/libvips/resample/resize.c
index 3a70cf5f..c3667dae 100644
--- a/libvips/resample/resize.c
+++ b/libvips/resample/resize.c
@@ -202,45 +202,6 @@ vips_resize_build( VipsObject *object )
 		hscale *= int_hshrink;
 	}
 
-	/* We will get overcomputation on vips_shrink() from the vips_reduce() 
-	 * coming later, so read into a cache where tiles are scanlines, and 
-	 * make sure we keep enough scanlines.
-	 *
-	 * Cache sizing: we double-buffer writes, so threads can be up to one 
-	 * line of tiles behind. For example, one thread could be allocated
-	 * tile (0,0) and then stall, the whole write system won't stall until
-	 * it tries to allocate tile (0, 2).
-	 *
-	 * We reduce down after this, which can be a scale of up to @residual, 
-	 * perhaps 0.5 or down as low as 0.3. So the number of scanlines we 
-	 * need to keep for the worst case is 2 * @tile_height / @residual, 
-	 * plus a little extra.
-	 *
-	 * Use an unthreaded tilecache to limit the range of Y values that an
-	 * image source has to span. Suppose we are shrinkv-ing by 100x and
-	 * need to span two tile rows on the output. Now the input source might
-	 * need to refer back 128 * 100 lines, argh. 
-	 */
-	if( int_vshrink > 1 ) { 
-		int tile_width;
-		int tile_height;
-		int n_lines;
-		int need_lines;
-
-		vips_get_tile_size( in, 
-			&tile_width, &tile_height, &n_lines );
-		need_lines = 1.2 * n_lines / vscale;
-		if( vips_tilecache( in, &t[6], 
-			"tile_width", in->Xsize,
-			"tile_height", 10,
-			"max_tiles", 1 + need_lines / 10,
-			"access", VIPS_ACCESS_SEQUENTIAL,
-			"threaded", FALSE, 
-			NULL ) )
-			return( -1 );
-		in = t[6];
-	}
-
 	/* Any residual downsizing.
 	 */
 	if( vscale < 1.0 ) { 
diff --git a/libvips/resample/shrinkh.c b/libvips/resample/shrinkh.c
index ed05f98e..e44f8352 100644
--- a/libvips/resample/shrinkh.c
+++ b/libvips/resample/shrinkh.c
@@ -267,10 +267,6 @@ vips_shrinkh_build( VipsObject *object )
 		return( -1 );
 	in = t[1];
 
-	/* THINSTRIP will work, anything else will break seq mode. If you 
-	 * combine shrink with conv you'll need to use a line cache to maintain
-	 * sequentiality.
-	 */
 	if( vips_image_pipelinev( resample->out, 
 		VIPS_DEMAND_STYLE_THINSTRIP, in, NULL ) )
 		return( -1 );
diff --git a/libvips/resample/shrinkv.c b/libvips/resample/shrinkv.c
index 1e73ba41..c5544928 100644
--- a/libvips/resample/shrinkv.c
+++ b/libvips/resample/shrinkv.c
@@ -330,7 +330,7 @@ vips_shrinkv_build( VipsObject *object )
 	VipsResample *resample = VIPS_RESAMPLE( object );
 	VipsShrinkv *shrink = (VipsShrinkv *) object;
 	VipsImage **t = (VipsImage **) 
-		vips_object_local_array( object, 2 );
+		vips_object_local_array( object, 3 );
 
 	VipsImage *in;
 
@@ -365,6 +365,40 @@ vips_shrinkv_build( VipsObject *object )
 		return( -1 );
 	in = t[1];
 
+	/* Large vshrinks will throw off sequential mode. Suppose thread1 is
+	 * generating tile (0, 0), but stalls. thread2 generates tile
+	 * (0, 1), 128 lines further down the output. After it has done,
+	 * thread1 tries to generate (0, 0), but by then the pixels it needs
+	 * have gone from the input image line cache if the vshrink is large.
+	 *
+	 * To fix this, cache the output of vshrink, and disable threading. Now
+	 * thread1 will make the whole of tile (0, 0) and thread2 will block
+	 * until it's done. 
+	 *
+	 * We could still get out of order if thread2 arrives here before
+	 * thread1. Most images will be wide enough that many tiles will fit
+	 * across the image for row0 and they would all have to be delayed
+	 * behind a row1 request. This seems very unlikely, but perhaps could
+	 * happen for a very tall, thin image with a very large shrink factor. 
+	 */
+	if( vips_image_get_typeof( in, VIPS_META_SEQUENTIAL ) ) { 
+		int tile_width;
+		int tile_height;
+		int n_lines;
+
+		vips_get_tile_size( in, 
+			&tile_width, &tile_height, &n_lines );
+		if( vips_tilecache( in, &t[2], 
+			"tile_width", in->Xsize,
+			"tile_height", 10,
+			"max_tiles", 1 + n_lines / 10,
+			"access", VIPS_ACCESS_SEQUENTIAL,
+			"threaded", FALSE, 
+			NULL ) )
+			return( -1 );
+		in = t[2];
+	}
+
 	/* We have to keep a line buffer as we sum columns.
 	 */
 	shrink->sizeof_line_buffer = 
diff --git a/libvips/resample/thumbnail.c b/libvips/resample/thumbnail.c
index 897aef31..ff691ad1 100644
--- a/libvips/resample/thumbnail.c
+++ b/libvips/resample/thumbnail.c
@@ -202,7 +202,8 @@ vips_thumbnail_find_jpegshrink( VipsThumbnail *thumbnail, int width, int height
 
 	/* Shrink-on-load is a simple block shrink and will add quite a bit of
 	 * extra sharpness to the image. We want to block shrink to a
-	 * bit above our target, then vips_resize() to the final size. 
+	 * bit above our target, then vips_shrink() / vips_reduce() to the 
+	 * final size. 
 	 *
 	 * Leave at least a factor of two for the final resize step.
 	 */
@@ -375,6 +376,10 @@ vips_thumbnail_build( VipsObject *object )
 	shrink = vips_thumbnail_calculate_shrink( thumbnail, 
 		in->Xsize, in->Ysize );
 
+
+
+
+
 	/* Use centre convention to better match imagemagick.
 	 */
 	if( vips_resize( in, &t[4], 1.0 / shrink,