From 288161e420056d1f501ef50de1fc94ed700c081b Mon Sep 17 00:00:00 2001
From: John Cupitt <jcupitt@gmail.com>
Date: Wed, 23 Apr 2008 12:17:26 +0000
Subject: [PATCH] stuff

---
 ChangeLog                        |  2 +
 TODO                             |  2 +-
 libsrc/conversion/im_subsample.c | 18 +++---
 libsrc/iofuncs/im_render.c       |  6 +-
 libsrc/morphology/im_dilate.c    | 97 ++++++++++++++++++--------------
 libsrc/morphology/im_erode.c     | 97 ++++++++++++++++++--------------
 6 files changed, 128 insertions(+), 94 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index a442135d..6fce3058 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -10,6 +10,8 @@
 - use meta to preserve resunit between tiff load and save
 - small doc improvements
 - read and write CMYKA tiff (thanks Doron)
+- performance improvements for morphology ops, esp. when zooming out
+- oop, im_render() was broken for mask == NULL
 
 25/1/08 started 7.14.0
 - bump all version numbers for new stable
diff --git a/TODO b/TODO
index 0b95dc15..be9448fb 100644
--- a/TODO
+++ b/TODO
@@ -34,7 +34,7 @@ https://sourceforge.net/tracker/index.php?func=detail&aid=1836080&group_id=10005
 
   how big should the table be for 16 bits? 256 times larger? too big!
 
-  we really just need a LUUT for pow() with the right exponent, eg. 2.4 for
+  we really just need a LUT for pow() with the right exponent, eg. 2.4 for
   sRGBs, and one for 1/2.4 ... see what calcul_tables does:
 
   	table->t_r2Yr[i] = yo + a * pow( i * table->ristep / f + c, ga );
diff --git a/libsrc/conversion/im_subsample.c b/libsrc/conversion/im_subsample.c
index bad3d182..2e9e41d7 100644
--- a/libsrc/conversion/im_subsample.c
+++ b/libsrc/conversion/im_subsample.c
@@ -13,6 +13,9 @@
  *	- adapted from im_shrink()
  * 3/8/02 JC
  *	- fall back to im_copy() for x/y factors == 1
+ * 21/4/08
+ * 	- don't fall back to pixel-wise shrinks for smalltile, it kills
+ * 	  performance, just bring IM_MAX_WIDTH down instead
  */
 
 /*
@@ -57,7 +60,7 @@
 
 /* Maximum width of input we ask for.
  */
-#define IM_MAX_WIDTH (1000)
+#define IM_MAX_WIDTH (100)
 
 /* Our main parameter struct.
  */
@@ -133,8 +136,7 @@ line_shrink_gen( REGION *or, void *seq, void *a, void *b )
 	return( 0 );
 }
 
-/* Fetch one pixel at a time ... good for very large shrinks, or for SMALLTILE
- * pipes.
+/* Fetch one pixel at a time ... good for very large shrinks.
  */
 static int
 point_shrink_gen( REGION *or, void *seq, void *a, void *b )
@@ -193,7 +195,7 @@ im_subsample( IMAGE *in, IMAGE *out, int xshrink, int yshrink )
 	/* Check parameters.
 	 */
 	if( xshrink < 1 || yshrink < 1 ) {
-		im_errormsg( "im_subsample: factors should both be >= 1" );
+		im_error( "im_subsample", _( "factors should both be >= 1" ) );
 		return( -1 );
 	}
 	if( xshrink == 1 && yshrink == 1 ) 
@@ -210,7 +212,7 @@ im_subsample( IMAGE *in, IMAGE *out, int xshrink, int yshrink )
 	out->Xres = in->Xres / xshrink;
 	out->Yres = in->Yres / yshrink;
 	if( out->Xsize <= 0 || out->Ysize <= 0 ) {
-		im_errormsg( "im_subsample: image has shrunk to nothing" );
+		im_error( "im_subsample", _( "image has shrunk to nothing" ) );
 		return( -1 );
 	}
 
@@ -228,11 +230,9 @@ im_subsample( IMAGE *in, IMAGE *out, int xshrink, int yshrink )
 		return( -1 );
 
 	/* Generate! If this is a very large shrink, then it's
-	 * probably faster to do it a pixel at a time. If this is SMALLTILE,
-	 * then it will hate long lines and we should always do 1 pixel at a 
-	 * time.
+	 * probably faster to do it a pixel at a time. 
 	 */
-	if( in->dhint == IM_SMALLTILE || xshrink > 10 ) {
+	if( xshrink > 10 ) {
 		if( im_generate( out, 
 			im_start_one, point_shrink_gen, im_stop_one, in, st ) )
 			return( -1 );
diff --git a/libsrc/iofuncs/im_render.c b/libsrc/iofuncs/im_render.c
index 3bb2d024..ad0cac90 100644
--- a/libsrc/iofuncs/im_render.c
+++ b/libsrc/iofuncs/im_render.c
@@ -31,6 +31,8 @@
  * 14/3/08
  * 	- oop, still making fade threads even when not fading
  * 	- more instrumenting
+ * 23/4/08
+ * 	- oop, broken for mask == NULL
  */
 
 /*
@@ -1198,7 +1200,9 @@ im_render_fade( IMAGE *in, IMAGE *out, IMAGE *mask,
 	}
 	if( im_cp_desc( out, in ) )
 		return( -1 );
-	if( im_demand_hint( out, IM_SMALLTILE, in, NULL ) ||
+	if( im_demand_hint( out, IM_SMALLTILE, in, NULL ) )
+		return( -1 );
+	if( mask && 
 		im_demand_hint( mask, IM_SMALLTILE, in, NULL ) )
 		return( -1 );
 
diff --git a/libsrc/morphology/im_dilate.c b/libsrc/morphology/im_dilate.c
index 94c7f4e0..cf410057 100644
--- a/libsrc/morphology/im_dilate.c
+++ b/libsrc/morphology/im_dilate.c
@@ -20,6 +20,9 @@
  *	- now uses im_embed() with edge stretching on the input, not
  *	  the output
  *	- sets Xoffset / Yoffset
+ * 21/4/08
+ * 	- only rebuild the buffer offsets if bpl changes
+ * 	- small cleanups
  */
 
 /*
@@ -48,6 +51,10 @@
 
  */
 
+/*
+#define DEBUG
+ */
+
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif /*HAVE_CONFIG_H*/
@@ -71,6 +78,7 @@ typedef struct {
 	int ss;			/* ... and number we check for set */
 	int *coff;		/* Offsets we check for clear */
 	int cs;			/* ... and number we check for clear */
+	int last_bpl;		/* Avoid recalcing offsets, if we can */
 } SeqInfo;
 
 /* Stop function.
@@ -105,6 +113,7 @@ dilate_start( IMAGE *out, void *a, void *b )
 	seq->ss = 0;
 	seq->coff = NULL;
 	seq->cs = 0;
+	seq->last_bpl = -1;
 
 	/* Attach region and arrays.
 	 */
@@ -141,7 +150,7 @@ dilate_gen( REGION *or, void *vseq, void *a, void *b )
 	int *t;
 
 	int x, y;
-	int found, i;
+	int result, i;
 
 	/* Prepare the section of the input image we need. A little larger
 	 * than the section of the output image we are producing.
@@ -152,33 +161,46 @@ dilate_gen( REGION *or, void *vseq, void *a, void *b )
 	if( im_prepare( ir, &s ) )
 		return( -1 );
 
-	/* Scan mask, building offsets we check when processing.
+#ifdef DEBUG
+	printf( "erode_gen: preparing %dx%d pixels\n", s.width, s.height );
+#endif /*DEBUG*/
+
+	/* Scan mask, building offsets we check when processing. Only do this
+	 * if the bpl has changed since the previous im_prepare().
 	 */
-	seq->ss = 0;
-	seq->cs = 0;
-	for( t = msk->coeff, y = 0; y < msk->ysize; y++ )
-		for( x = 0; x < msk->xsize; x++, t++ )
-			switch( *t ) {
-			case 255:
-				soff[seq->ss++] = 
-					IM_REGION_ADDR( ir, x + le, y + to ) - 
-					IM_REGION_ADDR( ir, le, to );
-				break;
+	if( seq->last_bpl != IM_REGION_LSKIP( ir ) ) {
+		seq->last_bpl = IM_REGION_LSKIP( ir );
 
-			case 128:
-				break;
+		seq->ss = 0;
+		seq->cs = 0;
+		for( t = msk->coeff, y = 0; y < msk->ysize; y++ )
+			for( x = 0; x < msk->xsize; x++, t++ )
+				switch( *t ) {
+				case 255:
+					soff[seq->ss++] = 
+						IM_REGION_ADDR( ir, 
+							x + le, y + to ) - 
+						IM_REGION_ADDR( ir, le, to );
+					break;
 
-			case 0:
-				coff[seq->cs++] = 
-					IM_REGION_ADDR( ir, x + le, y + to ) - 
-					IM_REGION_ADDR( ir, le, to );
-				break;
+				case 128:
+					break;
 
-			default:
-				im_errormsg( "im_dilate: bad mask element "
-					"(%d should be 0, 128 or 255)", *t );
-				return( -1 ); 
-			}
+				case 0:
+					coff[seq->cs++] = 
+						IM_REGION_ADDR( ir, 
+							x + le, y + to ) - 
+						IM_REGION_ADDR( ir, le, to );
+					break;
+
+				default:
+					im_error( "im_dilate", 
+						_( "bad mask element (%d "
+						"should be 0, 128 or 255)" ), 
+						*t );
+					return( -1 ); 
+				}
+	}
 
 	/* Dilate!
 	 */
@@ -191,35 +213,28 @@ dilate_gen( REGION *or, void *vseq, void *a, void *b )
 		for( x = 0; x < sz; x++, q++, p++ ) {
 			/* Search for a hit on the set list.
 			 */
-			found = 0;
+			result = 0;
 			for( i = 0; i < seq->ss; i++ )
 				if( p[soff[i]] ) {
-					/* Found a match! Set this output
-					 * pixel and continue.
+					/* Found a match! 
 					 */
-					*q = 255;
-					found = 1;
+					result = 255;
 					break;
 				}
 
 			/* No set pixels ... search for a hit in the clear
 			 * pixels.
 			 */
-			if( !found )
+			if( !result )
 				for( i = 0; i < seq->cs; i++ )
 					if( !p[coff[i]] ) {
-						/* Found a match! Set this 
-						 * output pixel and continue.
+						/* Found a match! 
 						 */
-						*q = 255;
-						found = 1;
+						result = 255;
 						break;
 					}
 
-			if( !found )
-				/* All matches failed. Clear this output pixel.
-				 */
-				*q = 0;
+			*q = result;
 
 		}
 	}
@@ -238,7 +253,7 @@ im_dilate_raw( IMAGE *in, IMAGE *out, INTMASK *m )
 	 */
 	if( m->xsize < 1 || !(m->xsize & 0x1) ||
 		m->ysize < 1 || !(m->ysize & 0x1) ) {
-		im_errormsg( "im_dilate: mask size not odd" ); 
+		im_error( "im_dilate", _( "mask size not odd" ) ); 
 		return( -1 ); 
 	}
 
@@ -248,7 +263,7 @@ im_dilate_raw( IMAGE *in, IMAGE *out, INTMASK *m )
 		return( -1 ); 
 	if( in->Coding != IM_CODING_NONE || in->Bbits != 8 || 
 		in->BandFmt != IM_BANDFMT_UCHAR ) {
-		im_errormsg( "im_dilate: uchar uncoded only" );
+		im_error( "im_dilate", _( "uchar uncoded only" ) );
 		return( -1 );
 	}
 	if( im_cp_desc( out, in ) ) 
@@ -262,7 +277,7 @@ im_dilate_raw( IMAGE *in, IMAGE *out, INTMASK *m )
 	out->Xsize -= m->xsize - 1;
 	out->Ysize -= m->ysize - 1;
 	if( out->Xsize <= 0 || out->Ysize <= 0 ) {
-		im_errormsg( "im_dilate: image too small for mask" );
+		im_error( "im_dilate", _( "image too small for mask" ) );
 		return( -1 );
 	}
 
diff --git a/libsrc/morphology/im_erode.c b/libsrc/morphology/im_erode.c
index 94f876d2..bdf1db38 100644
--- a/libsrc/morphology/im_erode.c
+++ b/libsrc/morphology/im_erode.c
@@ -20,6 +20,9 @@
  *	- now uses im_embed() with edge stretching on the input, not
  *	  the output
  *	- sets Xoffset / Yoffset
+ * 21/4/08
+ * 	- only rebuild the buffer offsets if bpl changes
+ * 	- small cleanups
  */
 
 /*
@@ -48,6 +51,10 @@
 
  */
 
+/*
+#define DEBUG
+ */
+
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif /*HAVE_CONFIG_H*/
@@ -71,6 +78,7 @@ typedef struct {
 	int ss;			/* ... and number we check for set */
 	int *coff;		/* Offsets we check for clear */
 	int cs;			/* ... and number we check for clear */
+	int last_bpl;		/* Avoid recalcing offsets, if we can */
 } SeqInfo;
 
 /* Stop function.
@@ -105,6 +113,7 @@ erode_start( IMAGE *out, void *a, void *b )
 	seq->ss = 0;
 	seq->coff = NULL;
 	seq->cs = 0;
+	seq->last_bpl = -1;
 
 	/* Attach region and arrays.
 	 */
@@ -141,7 +150,7 @@ erode_gen( REGION *or, void *vseq, void *a, void *b )
 	int *t;
 
 	int x, y;
-	int found, i;
+	int result, i;
 
 	/* Prepare the section of the input image we need. A little larger
 	 * than the section of the output image we are producing.
@@ -152,33 +161,46 @@ erode_gen( REGION *or, void *vseq, void *a, void *b )
 	if( im_prepare( ir, &s ) )
 		return( -1 );
 
-	/* Scan mask, building offsets we check when processing.
+#ifdef DEBUG
+	printf( "erode_gen: preparing %dx%d pixels\n", s.width, s.height );
+#endif /*DEBUG*/
+
+	/* Scan mask, building offsets we check when processing. Only do this
+	 * if the bpl has changed since the previous im_prepare().
 	 */
-	seq->ss = 0;
-	seq->cs = 0;
-	for( t = msk->coeff, y = 0; y < msk->ysize; y++ )
-		for( x = 0; x < msk->xsize; x++, t++ )
-			switch( *t ) {
-			case 255:
-				soff[seq->ss++] = 
-					IM_REGION_ADDR( ir, x + le, y + to ) - 
-					IM_REGION_ADDR( ir, le, to );
-				break;
+	if( seq->last_bpl != IM_REGION_LSKIP( ir ) ) {
+		seq->last_bpl = IM_REGION_LSKIP( ir );
 
-			case 128:
-				break;
+		seq->ss = 0;
+		seq->cs = 0;
+		for( t = msk->coeff, y = 0; y < msk->ysize; y++ )
+			for( x = 0; x < msk->xsize; x++, t++ )
+				switch( *t ) {
+				case 255:
+					soff[seq->ss++] = 
+						IM_REGION_ADDR( ir, 
+							x + le, y + to ) - 
+						IM_REGION_ADDR( ir, le, to );
+					break;
 
-			case 0:
-				coff[seq->cs++] = 
-					IM_REGION_ADDR( ir, x + le, y + to ) - 
-					IM_REGION_ADDR( ir, le, to );
-				break;
+				case 128:
+					break;
 
-			default:
-				im_errormsg( "im_erode: bad mask element "
-					"(%d should be 0, 128 or 255)", *t );
-				return( -1 ); 
-			}
+				case 0:
+					coff[seq->cs++] = 
+						IM_REGION_ADDR( ir, 
+							x + le, y + to ) - 
+						IM_REGION_ADDR( ir, le, to );
+					break;
+
+				default:
+					im_error( "im_erode", 
+						_( "bad mask element (%d "
+						"should be 0, 128 or 255)" ), 
+						*t );
+					return( -1 ); 
+				}
+	}
 
 	/* Erode!
 	 */
@@ -191,34 +213,25 @@ erode_gen( REGION *or, void *vseq, void *a, void *b )
 		for( x = 0; x < sz; x++, q++, p++ ) {
 			/* Check all set pixels are set.
 			 */
-			found = 0;
+			result = 255;
 			for( i = 0; i < seq->ss; i++ )
 				if( !p[soff[i]] ) {
-					/* Found a mismatch! Clear this output
-					 * pixel and continue.
+					/* Found a mismatch! 
 					 */
-					*q = 0;
-					found = 1;
+					result = 0;
 					break;
 				}
 
 			/* Check all clear pixels are clear.
 			 */
-			if( !found )
+			if( result )
 				for( i = 0; i < seq->cs; i++ )
 					if( p[coff[i]] ) {
-						/* Found a mismatch! Clear this 
-						 * output pixel and continue.
-						 */
-						*q = 0;
-						found = 1;
+						result = 0;
 						break;
 					}
 
-			if( !found )
-				/* No mismatches found - set output pixel.
-				 */
-				*q = 255;
+			*q = result;
 		}
 	}
 	
@@ -236,7 +249,7 @@ im_erode_raw( IMAGE *in, IMAGE *out, INTMASK *m )
 	 */
 	if( m->xsize < 1 || !(m->xsize & 0x1) ||
 		m->ysize < 1 || !(m->ysize & 0x1) ) {
-		im_errormsg( "im_erode: mask size not odd" ); 
+		im_error( "im_erode", _( "mask size not odd" ) ); 
 		return( -1 ); 
 	}
 
@@ -246,7 +259,7 @@ im_erode_raw( IMAGE *in, IMAGE *out, INTMASK *m )
 		return( -1 ); 
 	if( in->Coding != IM_CODING_NONE || in->Bbits != 8 || 
 		in->BandFmt != IM_BANDFMT_UCHAR ) {
-		im_errormsg( "im_erode: 1-band uchar uncoded only" );
+		im_error( "im_erode", _( "1-band uchar uncoded only" ) );
 		return( -1 );
 	}
 	if( im_cp_desc( out, in ) ) 
@@ -260,7 +273,7 @@ im_erode_raw( IMAGE *in, IMAGE *out, INTMASK *m )
 	out->Xsize -= m->xsize - 1;
 	out->Ysize -= m->ysize - 1;
 	if( out->Xsize <= 0 || out->Ysize <= 0 ) {
-		im_errormsg( "im_erode: image too small for mask" );
+		im_error( "im_erode", _( "image too small for mask" ) );
 		return( -1 );
 	}