Merge branch 'new-bilinear'

2015-12-17 13:43:51 +00:00 · 2015-12-17 13:43:51 +00:00 · efed79f298
commit efed79f298
parent c74f7457a6 fb084ef6a6
2 changed files with 65 additions and 17 deletions
--- a/1
+++ b/1
@ -22,6 +22,7 @@
 - Python x.bandjoin(y) is now x.ibandjoin(y), sorry
 - oop, removed a DEBUG from buffer.c, vips is 30% faster
 - faster and lower-mem TIFF read
 - faster bilinear interpolator
 7/5/15 started 8.1.1
 - oop, vips-8.0 wrapper script should be vips-8.1, thanks Danilo
--- a/libvips/resample/interpolate.c
+++ b/libvips/resample/interpolate.c
@ -14,6 +14,8 @@
 * 	- faster, more accuarate uchar bilinear (thanks Nicolas)
 * 2/2/11
 * 	- gtk-doc
 * 16/12/15
 * 	- faster bilinear
 */
 /*
@ -426,27 +428,45 @@ G_DEFINE_TYPE( VipsInterpolateBilinear, vips_interpolate_bilinear,
 * p3  p4
 */
 #define BILINEAR_INT_INNER { \
 	tq[z] = (sc1 * tp1[z] + sc2 * tp2[z] + \
 		 sc3 * tp3[z] + sc4 * tp4[z]) >> VIPS_INTERPOLATE_SHIFT; \
 	z += 1; \
 }
 /* Fixed-point arithmetic, no tables.
 */
 #define BILINEAR_INT( TYPE ) { \
 	TYPE * restrict tq = (TYPE *) out; \
 	\
-	const int X = (x - ix) * VIPS_INTERPOLATE_SCALE; \
+	float Y = y - iy; \
-	const int Y = (iy - y) * VIPS_INTERPOLATE_SCALE; \
+	float X = x - ix; \
        \
 	float Yd = 1.0f - Y; \
        \
 	float c4 = Y * X; \
 	float c2 = Yd * X; \
 	float c3 = Y - c4; \
 	float c1 = Yd - c2; \
 	\
 	int sc1 = VIPS_INTERPOLATE_SCALE * c1;\
 	int sc2 = VIPS_INTERPOLATE_SCALE * c2;\
 	int sc3 = VIPS_INTERPOLATE_SCALE * c3;\
 	int sc4 = VIPS_INTERPOLATE_SCALE * c4;\
 	\
 	const TYPE * restrict tp1 = (TYPE *) p1; \
 	const TYPE * restrict tp2 = (TYPE *) p2; \
 	const TYPE * restrict tp3 = (TYPE *) p3; \
 	const TYPE * restrict tp4 = (TYPE *) p4; \
 	\
-	for( z = 0; z < b; z++ ) { \
+	z = 0; \
-		const int top = tp1[z] + \
+	VIPS_UNROLL( b, BILINEAR_INT_INNER ); \
-			((X * (tp2[z] - tp1[z])) >> VIPS_INTERPOLATE_SHIFT); \
+}
-		const int bot = tp3[z] + \
+
-			((X * (tp4[z] - tp3[z])) >> VIPS_INTERPOLATE_SHIFT); \
+#define BILINEAR_FLOAT_INNER { \
-		\
+	tq[z] = c1 * tp1[z] + c2 * tp2[z] + \
-		tq[z] = top - ((Y * (bot - top)) >> VIPS_INTERPOLATE_SHIFT); \
+		c3 * tp3[z] + c4 * tp4[z]; \
-	} \
+	z += 1; \
 }
 /* Interpolate a pel ... int32 and float types, no tables, float 
@ -470,9 +490,8 @@ G_DEFINE_TYPE( VipsInterpolateBilinear, vips_interpolate_bilinear,
 	const TYPE * restrict tp3 = (TYPE *) p3; \
 	const TYPE * restrict tp4 = (TYPE *) p4; \
 	\
-	for( z = 0; z < b; z++ ) \
+	z = 0; \
-		tq[z] = c1 * tp1[z] + c2 * tp2[z] + \
+	VIPS_UNROLL( b, BILINEAR_FLOAT_INNER ); \
 			c3 * tp3[z] + c4 * tp4[z]; \
 }
 /* Expand for band types. with a fixed-point interpolator and a float
@ -518,8 +537,36 @@ vips_interpolate_bilinear_interpolate( VipsInterpolate *interpolate,
 	g_assert( (int) x + 1 < VIPS_RECT_RIGHT( &in->valid ) );
 	g_assert( (int) y + 1 < VIPS_RECT_BOTTOM( &in->valid ) );
 	unsigned char * restrict tq = (unsigned char *) out; 
 	float Y = y - iy; 
 	float X = x - ix; 
 	float Yd = 1.0f - Y; 
 	float c4 = Y * X; 
 	float c2 = Yd * X; 
 	float c3 = Y - c4; 
 	float c1 = Yd - c2; 
 	int sc1 = VIPS_INTERPOLATE_SCALE * c1;
 	int sc2 = VIPS_INTERPOLATE_SCALE * c2;
 	int sc3 = VIPS_INTERPOLATE_SCALE * c3;
 	int sc4 = VIPS_INTERPOLATE_SCALE * c4;
 	const unsigned char * restrict tp1 = (unsigned char *) p1; 
 	const unsigned char * restrict tp2 = (unsigned char *) p2; 
 	const unsigned char * restrict tp3 = (unsigned char *) p3; 
 	const unsigned char * restrict tp4 = (unsigned char *) p4; 
 	for( z = 0; z < b; z++ ) 
 		tq[z] = (sc1 * tp1[z] + sc2 * tp2[z] + 
 			 sc3 * tp3[z] + sc4 * tp4[z]) >> VIPS_INTERPOLATE_SHIFT; 
 /*
 	SWITCH_INTERPOLATE( in->im->BandFmt,
 		BILINEAR_INT, BILINEAR_FLOAT );
 		 */
 }
 static void