clean up after fix

threaded tile cache seems a good solution --- clean up and get ready to merge back into master
2013-01-14 17:35:05 +00:00 · 2013-01-14 17:35:05 +00:00 · a431f5e021
commit a431f5e021
parent 8ab2f3eeb6
8 changed files with 68 additions and 517 deletions
--- a/7
+++ b/7
@ -1,11 +1,8 @@
 31/12/12 started 7.30.7
 - better option parsing for "vips", thanks Haida
 - small fixes to help OS X
- removed sequential skip-ahead, it was not reliable on machines under 
-  heavy load
- backported threaded tile cache from next version, im_tile_cache() now uses
-it to prevent a deadlock 
-
+- backported threaded tile cache from next version, im_tile_cache() now 
+  uses it to prevent a deadlock, see comment there

 14/11/12 started 7.30.6
 - capture tiff warnings earlier
--- a/63
+++ b/63
@ -1,66 +1,3 @@
- carrierwave-vips is doing
-
-	im_png2vips
-		vips_sequential	
-			vips_linecache
-				vips_blockcache(), but upsizes
-			vips_sequential_generate() to delay ahead threads
-	im_shrink
-	im_tile_cache (old single-threaded vips7 tile cache)
-	im_affine
-	im_conv
-	im_vips2png
-
-  the im_tile_cache() single-threads, so one threads get out of order there,
-  no amount of delay in vips_sequential_generate() is going to fix it ...
-  we've deadlocked
-
-  vipsthumbnail does exactly the same thing
-
-  the comment there says that im_tile_cache() is necessary, since the conv
-  will force SMALLTILE, and that will break sequentiality
-
-  turning the im_tile_cache() into an im_copy() fixes the deadlock, but might
-  break for large shrink factors
-
-	yes, try eg. $ vipsthumbnail Chicago.png --verbose --size 20
-	fails with a shrink of 1462
-
-  there seems to be a good speed improvement from deleting the tile cache
-
-	ruby-vips, jpeg image: 55ms
-	ruby-vips, png image: 1853ms
-
-  compared to 
-
-  	ruby-vips, jpeg image:          50ms
-	ruby-vips, png image:           2401ms
-
-  from the README ... benchmark again after tests
-
-
-  how about using the threaded tile cache from master? would that fix it?
-
-  	seems to!
-
-	remove the old im_tile_cache() code
-
-	benchmark again
-
-	check the way we disabled extract/insert skipahead
-
-	check vips_sequential skipahead code
-
-	add a comment to the im_tile_cache() wrapper explaining why it turns
-	on threading
-
-	when we port forward again, we'll need to re-add the vips_ prefix to
-	g_mutex_new() in tilecache.c
-
-
-
-
-
 blocking bugs
 =============

--- a/libvips/conversion/extract.c
+++ b/libvips/conversion/extract.c
@ -181,6 +181,7 @@ vips_extract_area_class_init( VipsExtractAreaClass *class )
 {
 	GObjectClass *gobject_class = G_OBJECT_CLASS( class );
 	VipsObjectClass *vobject_class = VIPS_OBJECT_CLASS( class );
+	VipsOperationClass *operation_class = VIPS_OPERATION_CLASS( class );

 	VIPS_DEBUG_MSG( "vips_extract_area_class_init\n" );

@ -191,6 +192,8 @@ vips_extract_area_class_init( VipsExtractAreaClass *class )
 	vobject_class->description = _( "extract an area from an image" );
 	vobject_class->build = vips_extract_area_build;

+	operation_class->flags = VIPS_OPERATION_SEQUENTIAL;
+
 	VIPS_ARG_IMAGE( class, "input", 0, 
 		_( "Input" ), 
 		_( "Input image" ),
--- a/libvips/conversion/insert.c
+++ b/libvips/conversion/insert.c
@ -342,6 +342,7 @@ vips_insert_class_init( VipsInsertClass *class )
 {
 	GObjectClass *gobject_class = G_OBJECT_CLASS( class );
 	VipsObjectClass *vobject_class = VIPS_OBJECT_CLASS( class );
+	VipsOperationClass *operation_class = VIPS_OPERATION_CLASS( class );

 	VIPS_DEBUG_MSG( "vips_insert_class_init\n" );

@ -352,6 +353,8 @@ vips_insert_class_init( VipsInsertClass *class )
 	vobject_class->description = _( "insert an image" );
 	vobject_class->build = vips_insert_build;

+	operation_class->flags = VIPS_OPERATION_SEQUENTIAL;
+
 	VIPS_ARG_IMAGE( class, "main", -1, 
 		_( "Main" ), 
 		_( "Main input image" ),
--- a/libvips/conversion/sequential.c
+++ b/libvips/conversion/sequential.c
@ -66,9 +66,9 @@
 /* Stall threads that run ahead for this long, in seconds.
 *
 * This has to be a long time: if we're trying to use all cores on a busy 
- * system it could be ages until all the other threads get a chance to run. 
+ * system, it could be ages until all the other threads get a chance to run. 
 */
-#define STALL_TIME (10000.0)
+#define STALL_TIME (1.0)

 typedef struct _VipsSequential {
 	VipsConversion parent_instance;
@ -136,12 +136,20 @@ vips_sequential_generate( VipsRegion *or,
 		return( -1 );
 	}

-	if( r->top > sequential->y_pos ) {
+	if( r->top > sequential->y_pos &&
+		sequential->y_pos > 0 ) {
+		/* We have started reading (y_pos > 0) and this request is for
+		 * stuff beyond that, stall for a while to give other
+		 * threads time to catch up.
+		 * 
+		 * The stall can be cancelled by a signal on @ready.
+		 *
+		 * We don't stall forever, since an error would be better than
+		 * deadlock, and we don't fail on timeout, since the timeout 
+		 * may be harmless.
+		 */
 		GTimeVal time;

-		/* This read is ahead of the current read point. 
-		 * Stall for a while to give other threads time to catch up.
-		 */
 		VIPS_DEBUG_MSG( "thread %p stalling for up to %gs ...\n", 
 			STALL_TIME, g_thread_self() ); 
 		g_get_current_time( &time );
@ -159,15 +167,14 @@ vips_sequential_generate( VipsRegion *or,
 			g_thread_self() ); 
 	}

-	/* This is a request for something some way down the image, and we've
-	 * either not read anything yet or fallen through from the stall
-	 * above. 
-	 *
-	 * Probably the operation is something like
-	 * extract_area and we should skip the initial part of the image. In
-	 * fact we read to cache.
-	 */
 	if( r->top > sequential->y_pos ) {
+		/* This is a request for something some way down the image, 
+		 * and we've fallen through from the stall above. 
+		 *
+		 * Probably the operation is something like extract_area and 
+		 * we should skip the initial part of the image. In fact, 
+		 * we read to cache, since it may be useful.
+		 */
 		VipsRect area;

 		VIPS_DEBUG_MSG( "thread %p skipping to line %d ...\n", 
--- a/libvips/deprecated/Makefile.am
+++ b/libvips/deprecated/Makefile.am
@ -56,7 +56,6 @@ libdeprecated_la_SOURCES = \
 	im_png2vips.c \
 	im_ppm2vips.c \
 	im_tiff2vips.c \
-	im_tile_cache.c \
 	im_vips2csv.c \
 	im_vips2jpeg.c \
 	im_vips2png.c \
--- a/libvips/deprecated/im_tile_cache.c
+++ b/libvips/deprecated/im_tile_cache.c
@ -1,434 +0,0 @@
-/* Tile tile cache from tiff2vips ... broken out so it can be shared with
- * openexr read.
- *
- * This isn't the same as im_cache(): we don't sub-divide, and we 
- * single-thread our callee.
- *
- * 23/8/06
- * 	- take ownership of reused tiles in case the cache is being shared
- * 13/2/07
- * 	- release ownership after fillng with pixels in case we read across
- * 	  threads
- * 4/2/10
- * 	- gtkdoc
- * 12/12/10
- * 	- use im_prepare_to() and avoid making a sequence for every cache tile
- */
-
-/*
-
-    This file is part of VIPS.
-    
-    VIPS is free software; you can redistribute it and/or modify
-    it under the terms of the GNU Lesser General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU Lesser General Public License for more details.
-
-    You should have received a copy of the GNU Lesser General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
- */
-
-/*
-
-    These files are distributed with VIPS - http://www.vips.ecs.soton.ac.uk
-
- */
-
-/* Turn on debugging output.
-#define DEBUG
- */
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif /*HAVE_CONFIG_H*/
-#include <vips/intl.h>
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <vips/vips.h>
-#include <vips/thread.h>
-
-/* A tile in our cache.
- */
-typedef struct {
-	struct _Read *read;
-
-	REGION *region;			/* REGION with private mem for data */
-	int time;			/* Time of last use for flush */
-	int x;				/* xy pos in VIPS image cods */
-	int y;
-} Tile;
-
-/* Stuff we track during a read.
- */
-typedef struct _Read {
-	/* Parameters.
-	 */
-	IMAGE *in;
-	IMAGE *out;
-	int tile_width;	
-	int tile_height;
-	int max_tiles;
-
-	/* Cache.
-	 */
-	int time;			/* Update ticks for LRU here */
-	int ntiles;			/* Current cache size */
-	GMutex *lock;			/* Lock everything here */
-	GSList *cache;			/* List of tiles */
-} Read;
-
-static void
-tile_destroy( Tile *tile )
-{
-	Read *read = tile->read;
-
-	read->cache = g_slist_remove( read->cache, tile );
-	read->ntiles -= 1;
-	g_assert( read->ntiles >= 0 );
-	tile->read = NULL;
-
-	IM_FREEF( im_region_free, tile->region );
-
-	im_free( tile );
-}
-
-static void
-read_destroy( Read *read )
-{
-	IM_FREEF( g_mutex_free, read->lock );
-
-	while( read->cache ) {
-		Tile *tile = (Tile *) read->cache->data;
-
-		tile_destroy( tile );
-	}
-
-	im_free( read );
-}
-
-static Read *
-read_new( IMAGE *in, IMAGE *out, 
-	int tile_width, int tile_height, int max_tiles )
-{
-	Read *read;
-
-	if( !(read = IM_NEW( NULL, Read )) )
-		return( NULL );
-	read->in = in;
-	read->out = out;
-	read->tile_width = tile_width;
-	read->tile_height = tile_height;
-	read->max_tiles = max_tiles;
-	read->time = 0;
-	read->ntiles = 0;
-	read->lock = g_mutex_new();
-	read->cache = NULL;
-
-	if( im_add_close_callback( out, 
-		(im_callback_fn) read_destroy, read, NULL ) ) {
-		read_destroy( read );
-		return( NULL );
-	}
-
-	return( read );
-}
-
-static Tile *
-tile_new( Read *read )
-{
-	Tile *tile;
-
-	if( !(tile = IM_NEW( NULL, Tile )) )
-		return( NULL );
-
-	tile->read = read;
-	tile->region = NULL;
-	tile->time = read->time;
-	tile->x = -1;
-	tile->y = -1;
-	read->cache = g_slist_prepend( read->cache, tile );
-	g_assert( read->ntiles >= 0 );
-	read->ntiles += 1;
-
-	if( !(tile->region = im_region_create( read->in )) ) {
-		tile_destroy( tile );
-		return( NULL );
-	}
-	im__region_no_ownership( tile->region );
-
-	return( tile );
-}
-
-static int
-tile_move( Tile *tile, int x, int y )
-{
-	Rect area;
-
-	tile->x = x;
-	tile->y = y;
-
-	area.left = x;
-	area.top = y;
-	area.width = tile->read->tile_width;
-	area.height = tile->read->tile_height;
-
-	if( im_region_buffer( tile->region, &area ) )
-		return( -1 );
-
-	return( 0 );
-}
-
-/* Do we have a tile in the cache?
- */
-static Tile *
-tile_search( Read *read, int x, int y )
-{
-	GSList *p;
-
-	for( p = read->cache; p; p = p->next ) {
-		Tile *tile = (Tile *) p->data;
-
-		if( tile->x == x && tile->y == y )
-			return( tile );
-	}
-
-	return( NULL );
-}
-
-static void
-tile_touch( Tile *tile )
-{
-	g_assert( tile->read->ntiles >= 0 );
-
-	tile->time = tile->read->time++;
-}
-
-/* Fill a tile with pixels.
- */
-static int
-tile_fill( Tile *tile, REGION *in )
-{
-	Rect area;
-
-#ifdef DEBUG
-	printf( "im_tile_cache: filling tile %d x %d\n", tile->x, tile->y );
-#endif /*DEBUG*/
-
-	area.left = tile->x;
-	area.top = tile->y;
-	area.width = tile->read->tile_width;
-	area.height = tile->read->tile_height;
-
-	if( im_prepare_to( in, tile->region, &area, area.left, area.top ) ) 
-		return( -1 );
-
-	tile_touch( tile );
-
-	return( 0 );
-}
-
-/* Find existing tile, make a new tile, or if we have a full set of tiles, 
- * reuse LRU.
- */
-static Tile *
-tile_find( Read *read, REGION *in, int x, int y )
-{
-	Tile *tile;
-	int oldest;
-	GSList *p;
-
-	/* In cache already?
-	 */
-	if( (tile = tile_search( read, x, y )) ) {
-		tile_touch( tile );
-
-		return( tile );
-	}
-
-	/* Cache not full?
-	 */
-	if( read->max_tiles == -1 ||
-		read->ntiles < read->max_tiles ) {
-		if( !(tile = tile_new( read )) ||
-			tile_move( tile, x, y ) ||
-			tile_fill( tile, in ) )
-			return( NULL );
-
-		return( tile );
-	}
-
-	/* Reuse an old one.
-	 */
-	oldest = read->time;
-	tile = NULL;
-	for( p = read->cache; p; p = p->next ) {
-		Tile *t = (Tile *) p->data;
-
-		if( t->time < oldest ) {
-			oldest = t->time;
-			tile = t;
-		}
-	}
-
-	g_assert( tile );
-
-#ifdef DEBUG
-	printf( "im_tile_cache: reusing tile %d x %d\n", tile->x, tile->y );
-#endif /*DEBUG*/
-
-	if( tile_move( tile, x, y ) ||
-		tile_fill( tile, in ) )
-		return( NULL );
-
-	return( tile );
-}
-
-/* Copy rect from from to to.
- */
-static void
-copy_region( REGION *from, REGION *to, Rect *area )
-{
-	int y;
-
-	/* Area should be inside both from and to.
-	 */
-	g_assert( im_rect_includesrect( &from->valid, area ) );
-	g_assert( im_rect_includesrect( &to->valid, area ) );
-
-	/* Loop down common area, copying.
-	 */
-	for( y = area->top; y < IM_RECT_BOTTOM( area ); y++ ) {
-		PEL *p = (PEL *) IM_REGION_ADDR( from, area->left, y );
-		PEL *q = (PEL *) IM_REGION_ADDR( to, area->left, y );
-
-		memcpy( q, p, IM_IMAGE_SIZEOF_PEL( from->im ) * area->width );
-	}
-}
-
-/* Loop over the output region, filling with data from cache.
- */
-static int
-tile_cache_fill( REGION *out, void *seq, void *a, void *b )
-{
-	REGION *in = (REGION *) seq;
-	Read *read = (Read *) b;
-	const int tw = read->tile_width;
-	const int th = read->tile_height;
-	Rect *r = &out->valid;
-
-	/* Find top left of tiles we need.
-	 */
-	int xs = (r->left / tw) * tw;
-	int ys = (r->top / th) * th;
-
-	int x, y;
-
-	g_mutex_lock( read->lock );
-
-	for( y = ys; y < IM_RECT_BOTTOM( r ); y += th )
-		for( x = xs; x < IM_RECT_RIGHT( r ); x += tw ) {
-			Tile *tile;
-			Rect tarea;
-			Rect hit;
-
-			if( !(tile = tile_find( read, in, x, y )) ) {
-				g_mutex_unlock( read->lock );
-				return( -1 );
-			}
-
-			/* The area of the tile.
-			 */
-			tarea.left = x;
-			tarea.top = y;
-			tarea.width = tw;
-			tarea.height = th;
-
-			/* The part of the tile that we need.
-			 */
-			im_rect_intersectrect( &tarea, r, &hit );
-
-			copy_region( tile->region, out, &hit );
-		}
-
-	g_mutex_unlock( read->lock );
-
-	return( 0 );
-}
-
-/** 
- * im_tile_cache:
- * @in: input image
- * @out: output image
- * @tile_width: tile width
- * @tile_height: tile height
- * @max_tiles: maximum number of tiles to cache
- *
- * This operation behaves rather like im_copy() between images
- * @in and @out, except that it keeps a cache of computed pixels. 
- * This cache is made of up to @max_tiles tiles (a value of -1 for
- * means any number of tiles), and each tile is of size @tile_width
- * by @tile_height pixels. Each cache tile is made with a single call to 
- * im_prepare().
- *
- * This is a lower-level operation than im_cache() since it does no 
- * subdivision. It is suitable for caching the output of operations like
- * im_exr2vips() on tiled images.
- *
- * See also: im_cache().
- *
- * Returns: 0 on success, -1 on error.
- */
-int
-im_tile_cache( IMAGE *in, IMAGE *out,
-	int tile_width, int tile_height, int max_tiles )
-{
-	VipsImage *x;
-
-	if( vips_tilecache( in, &x, 
-		"tile_width", tile_width, 
-		"tile_height", tile_height, 
-		"max_tiles", max_tiles, 
-		"strategy", VIPS_CACHE_SEQUENTIAL,
-		"threaded", TRUE, 
-		NULL ) )
-		return( -1 );
-	if( im_copy( x, out ) ) {
-		g_object_unref( x );
-		return( -1 );
-	}
-	g_object_unref( x );
-
-	return( 0 );
-
-	/*
-	Read *read;
-
-	if( tile_width <= 0 || tile_height <= 0 || max_tiles < -1 ) {
-		im_error( "im_tile_cache", "%s", _( "bad parameters" ) );
-		return( -1 );
-	}
-
-	if( im_piocheck( in, out ) ||
-		im_cp_desc( out, in ) ||
-		im_demand_hint( out, IM_ANY, in, NULL ) ||
-		!(read = read_new( in, out, 
-			tile_width, tile_height, max_tiles )) ||
-		im_generate( out, 
-			im_start_one, tile_cache_fill, im_stop_one, in, read ) )
-		return( -1 );
-
-	return( 0 );
-	 */
-
-	return( im_copy( in, out ) ); 
-}
--- a/libvips/deprecated/vips7compat.c
+++ b/libvips/deprecated/vips7compat.c
@ -2143,3 +2143,42 @@ im_rightshift_size( IMAGE *in, IMAGE *out,

 	return( 0 );
 }
+
+/* This is used by vipsthumbnail and the carrierwave shrinker to cache the
+ * output of shrink before doing the final affine.
+ *
+ * We use the vips8 threaded tilecache to avoid a deadlock: suppose thread1,
+ * evaluating the top block of the output is delayed, and thread2, evaluating
+ * the second block gets here first (this can happen on a heavily-loaded
+ * system). 
+ *
+ * With an unthreaded tilecache (as we had before), thread2 will get
+ * the cache lock and start evaling the second block of the shrink. When it
+ * reaches the png reader it will stall untilthe first block has been used ...
+ * but it never will, since thread1 will block on this cache lock. 
+ *
+ * This function is only used in those two places (I think), so it's OK to
+ * hard-wire this to be a sequential threaded cache. 
+ */
+int
+im_tile_cache( IMAGE *in, IMAGE *out,
+	int tile_width, int tile_height, int max_tiles )
+{
+	VipsImage *x;
+
+	if( vips_tilecache( in, &x, 
+		"tile_width", tile_width, 
+		"tile_height", tile_height, 
+		"max_tiles", max_tiles, 
+		"strategy", VIPS_CACHE_SEQUENTIAL,
+		"threaded", TRUE, 
+		NULL ) )
+		return( -1 );
+	if( im_copy( x, out ) ) {
+		g_object_unref( x );
+		return( -1 );
+	}
+	g_object_unref( x );
+
+	return( 0 );
+}