From 75fdc5ae4e1a15f8cc5ef50ef2d73ddc17ae188a Mon Sep 17 00:00:00 2001
From: John Cupitt <jcupitt@gmail.com>
Date: Wed, 23 Jun 2010 15:45:43 +0000
Subject: [PATCH] moreconst fixes, sinkdisc fixes, constant vector clip added

---
 ChangeLog                         |  3 ++
 libvips/arithmetic/im_remainder.c | 60 +++++++++++++++++++++++++------
 libvips/iofuncs/sinkdisc.c        | 33 +++++++++--------
 libvips/relational/relational.c   |  6 ++--
 4 files changed, 75 insertions(+), 27 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 240864fd..3e007872 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,8 @@
 12/5/10 started 7.22.1
 - fix a problem with tiff pyramid write and >1cpu, thanks Ruven
+- constant ops clip to target range
+- oops, moreconst and moreeqconst were the same
+- better buffer handling in sinkdisc for single-line images
 
 12/5/10 started 7.22.0
 - bump and rename
diff --git a/libvips/arithmetic/im_remainder.c b/libvips/arithmetic/im_remainder.c
index 37b2d89c..5dfd716c 100644
--- a/libvips/arithmetic/im_remainder.c
+++ b/libvips/arithmetic/im_remainder.c
@@ -14,6 +14,8 @@
  * 27/8/08
  * 	- revise upcasting system
  * 	- add gtkdoc comments
+ * 23/6/10
+ * 	- constant ops clip to target range
  */
 
 /*
@@ -170,6 +172,15 @@ im_remainder( IMAGE *in1, IMAGE *in2, IMAGE *out )
 		(im_wrapmany_fn) remainder_buffer, NULL ) );
 }
 
+/* Cast a vector of double to a vector of TYPE, clipping to a range.
+ */
+#define CAST_CLIP( TYPE, N, X ) { \
+	TYPE *tq = (TYPE *) q; \
+	\
+	for( i = 0; i < n; i++ ) \
+		tq[i] = (TYPE) IM_CLIP( N, p[i], X ); \
+}
+
 /* Cast a vector of double to a vector of TYPE.
  */
 #define CAST( TYPE ) { \
@@ -203,16 +214,45 @@ make_pixel( IMAGE *out, VipsBandFmt fmt, int n, double *p )
 		return( NULL );
 
         switch( fmt ) {
-        case IM_BANDFMT_CHAR:		CAST( signed char ); break;
-        case IM_BANDFMT_UCHAR:  	CAST( unsigned char ); break;
-        case IM_BANDFMT_SHORT:  	CAST( signed short ); break;
-        case IM_BANDFMT_USHORT: 	CAST( unsigned short ); break;
-        case IM_BANDFMT_INT:    	CAST( signed int ); break;
-        case IM_BANDFMT_UINT:   	CAST( unsigned int ); break;
-        case IM_BANDFMT_FLOAT: 		CAST( float ); break; 
-        case IM_BANDFMT_DOUBLE:		CAST( double ); break;
-        case IM_BANDFMT_COMPLEX: 	CASTC( float ); break; 
-        case IM_BANDFMT_DPCOMPLEX:	CASTC( double ); break;
+        case IM_BANDFMT_CHAR:		
+		CAST_CLIP( signed char, SCHAR_MIN, SCHAR_MAX ); 
+		break;
+
+        case IM_BANDFMT_UCHAR:  	
+		CAST_CLIP( unsigned char, 0, UCHAR_MAX ); 
+		break;
+
+        case IM_BANDFMT_SHORT:  	
+		CAST_CLIP( signed short, SCHAR_MIN, SCHAR_MAX ); 
+		break;
+
+        case IM_BANDFMT_USHORT: 	
+		CAST_CLIP( unsigned short, 0, USHRT_MAX ); 
+		break;
+
+        case IM_BANDFMT_INT:    	
+		CAST_CLIP( signed int, INT_MIN, INT_MAX ); 
+		break;
+
+        case IM_BANDFMT_UINT:   	
+		CAST_CLIP( unsigned int, 0, UINT_MAX ); 
+		break;
+
+        case IM_BANDFMT_FLOAT: 		
+		CAST( float ); 
+		break; 
+
+        case IM_BANDFMT_DOUBLE:		
+		CAST( double ); 
+		break;
+
+        case IM_BANDFMT_COMPLEX: 	
+		CASTC( float ); 
+		break; 
+
+        case IM_BANDFMT_DPCOMPLEX:	
+		CASTC( double ); 
+		break;
 
         default:
                 g_assert( 0 );
diff --git a/libvips/iofuncs/sinkdisc.c b/libvips/iofuncs/sinkdisc.c
index 73a44f96..304be2cc 100644
--- a/libvips/iofuncs/sinkdisc.c
+++ b/libvips/iofuncs/sinkdisc.c
@@ -3,6 +3,8 @@
  * 19/3/10
  * 	- from im_wbuffer.c
  * 	- move on top of VipsThreadpool, instead of im_threadgroup_t
+ * 23/6/10
+ * 	- better buffer handling for single-line images
  */
 
 /*
@@ -253,13 +255,11 @@ wbuffer_new( Write *write )
 	return( wbuffer );
 }
 
-/* Write and swap buffers.
+/* Block until the previous write completes, then write the front buffer.
  */
 static int
 wbuffer_flush( Write *write )
 {
-	WriteBuffer *t;
-
 	VIPS_DEBUG_MSG( "wbuffer_flush:\n" );
 
 	/* Block until the other buffer has been written. We have to do this 
@@ -287,12 +287,6 @@ wbuffer_flush( Write *write )
 	wbuffer_write( write->buf );
 #endif /*HAVE_THREADS*/
 
-	/* Swap buffers.
-	 */
-	t = write->buf; 
-	write->buf = write->buf_back; 
-	write->buf_back = t;
-
 	return( 0 );
 }
 
@@ -355,7 +349,8 @@ wbuffer_allocate_fn( VipsThreadState *state, void *a, gboolean *stop )
 		write->y += write->tile_height;
 
 		if( write->y >= IM_RECT_BOTTOM( &write->buf->area ) ) {
-			/* Write and swap buffers.
+			/* Block until the last write is done, then set write
+			 * of the front buffer going.
 			 */
 			if( wbuffer_flush( write ) )
 				return( -1 );
@@ -367,6 +362,16 @@ wbuffer_allocate_fn( VipsThreadState *state, void *a, gboolean *stop )
 				return( 0 );
 			}
 
+			/* Swap buffers.
+			 */
+			{
+				WriteBuffer *t;
+
+				t = write->buf; 
+				write->buf = write->buf_back; 
+				write->buf_back = t;
+			}
+
 			/* Position buf at the new y.
 			 */
 			if( wbuffer_position( write->buf, 
@@ -518,16 +523,14 @@ vips_sink_disc( VipsImage *im, VipsRegionWrite write_fn, void *a )
 			&write ) )  
 		result = -1;
 
-	/* We've set all the buffers writing, but not waited for the BG
-	 * writer to finish. This can take a while: it has to wait for the
-	 * last worker to make the last tile.
+	/* Just before allocate signalled stop, it set write.buf writing. We
+	 * need to wait for this write to finish. 
 	 *
 	 * We can't just free the buffers (which will wait for the bg threads 
 	 * to finish), since the bg thread might see the kill before it gets a 
 	 * chance to write.
 	 */
-	if( write.buf->area.top > 0 )
-		im_semaphore_down( &write.buf_back->done );
+	im_semaphore_down( &write.buf->done );
 
 	im__end_eval( im );
 
diff --git a/libvips/relational/relational.c b/libvips/relational/relational.c
index b2344e1c..dd934fbe 100644
--- a/libvips/relational/relational.c
+++ b/libvips/relational/relational.c
@@ -22,6 +22,8 @@
  * 	- gtkdoc
  * 	- use new im__arith_binary*() functions
  * 	- more meta-programming
+ * 23/6/10
+ * 	- oops, moreconst and moreeqconst were the same
  */
 
 /*
@@ -521,7 +523,7 @@ im_lesseq_vec( IMAGE *in, IMAGE *out, int n, double *c )
 }
 
 #define MORE_REAL( Q, A, B ) { \
-	if( (A) >= (B) ) \
+	if( (A) > (B) ) \
 		Q = 255; \
 	else \
 		Q = 0; \
@@ -531,7 +533,7 @@ im_lesseq_vec( IMAGE *in, IMAGE *out, int n, double *c )
 	double m1 = (A)[0] * (A)[0] + (A)[1] * (A)[1]; \
 	double m2 = (B)[0] * (B)[0] + (B)[1] * (B)[1]; \
 	\
-	if( m1 >= m2 ) \
+	if( m1 > m2 ) \
 		Q = 255; \
 	else \
 		Q = 0; \