From 7fd672f5952f591153cea5d5ee4c1014b01af961 Mon Sep 17 00:00:00 2001
From: John Cupitt <jcupitt@gmail.com>
Date: Sun, 30 May 2010 17:26:23 +0000
Subject: [PATCH] faster, more accurate bilinear/bicubic

---
 ChangeLog                          |  2 +
 TODO                               | 98 ------------------------------
 libvips/include/vips/interpolate.h |  2 +-
 libvips/iofuncs/dispatch_types.c   | 12 +++-
 libvips/resample/bicubic.cpp       | 27 ++++----
 libvips/resample/interpolate.c     | 53 ++++++++--------
 6 files changed, 55 insertions(+), 139 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index f86e1cd8..7630d676 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -9,6 +9,8 @@
 - all "colour" in messages changed to "color", have a proper en_GB 
   translation file
 - vipsthumbnail delete profile failed if there was a profile
+- interpolate cli unref was broken
+- more accurate, slightly faster bilinear and bicubic (thanks Nicolas)
 
 21/3/10 started 7.21.3
 - added progress feedback to threadpool
diff --git a/TODO b/TODO
index 13799004..74f34992 100644
--- a/TODO
+++ b/TODO
@@ -1,103 +1,5 @@
-
-mask selection seems reasonable:
-
-vips_interpolate_bicubic_interpolate: 2 2
-	left=1, top=1, width=4, height=4
-	maskx=0, masky=0
-vips_interpolate_bicubic_interpolate: 2.1 2
-	left=1, top=1, width=4, height=4
-	maskx=3, masky=0
-vips_interpolate_bicubic_interpolate: 2.2 2
-	left=1, top=1, width=4, height=4
-	maskx=6, masky=0
-vips_interpolate_bicubic_interpolate: 2.3 2
-	left=1, top=1, width=4, height=4
-	maskx=9, masky=0
-vips_interpolate_bicubic_interpolate: 2.4 2
-	left=1, top=1, width=4, height=4
-	maskx=12, masky=0
-vips_interpolate_bicubic_interpolate: 2.5 2
-	left=1, top=1, width=4, height=4
-	maskx=16, masky=0
-vips_interpolate_bicubic_interpolate: 2.6 2
-	left=1, top=1, width=4, height=4
-	maskx=19, masky=0
-vips_interpolate_bicubic_interpolate: 2.7 2
-	left=1, top=1, width=4, height=4
-	maskx=22, masky=0
-vips_interpolate_bicubic_interpolate: 2.8 2
-	left=1, top=1, width=4, height=4
-	maskx=25, masky=0
-vips_interpolate_bicubic_interpolate: 2.9 2
-	left=1, top=1, width=4, height=4
-	maskx=28, masky=0
-vips_interpolate_bicubic_interpolate: 3 2
-	left=2, top=1, width=4, height=4
-	maskx=0, masky=0
-
-so for (eg.) 2.4 we pick mask 12, which is (12 / 32), or 0.375
-
-however, mask13 would be closer: 13 / 32) = 0.4076
-
-calculation is 
-
-	2.4 * 32 == 76.8
-	FLOOR(76.8) == 76
-	76 & 31 == 12
-
-
-
-vips_interpolate_bicubic_class_init:
-mask = 0, calculate_coefficients_catmull: 0
-mask = 1, calculate_coefficients_catmull: 0.03125
-mask = 2, calculate_coefficients_catmull: 0.0625
-mask = 3, calculate_coefficients_catmull: 0.09375
-mask = 4, calculate_coefficients_catmull: 0.125
-mask = 5, calculate_coefficients_catmull: 0.15625
-mask = 6, calculate_coefficients_catmull: 0.1875
-mask = 7, calculate_coefficients_catmull: 0.21875
-mask = 8, calculate_coefficients_catmull: 0.25
-mask = 9, calculate_coefficients_catmull: 0.28125
-mask = 10, calculate_coefficients_catmull: 0.3125
-mask = 11, calculate_coefficients_catmull: 0.34375
-mask = 12, calculate_coefficients_catmull: 0.375
-mask = 13, calculate_coefficients_catmull: 0.40625
-mask = 14, calculate_coefficients_catmull: 0.4375
-mask = 15, calculate_coefficients_catmull: 0.46875
-mask = 16, calculate_coefficients_catmull: 0.5
-mask = 17, calculate_coefficients_catmull: 0.53125
-mask = 18, calculate_coefficients_catmull: 0.5625
-mask = 19, calculate_coefficients_catmull: 0.59375
-mask = 20, calculate_coefficients_catmull: 0.625
-mask = 21, calculate_coefficients_catmull: 0.65625
-mask = 22, calculate_coefficients_catmull: 0.6875
-mask = 23, calculate_coefficients_catmull: 0.71875
-mask = 24, calculate_coefficients_catmull: 0.75
-mask = 25, calculate_coefficients_catmull: 0.78125
-mask = 26, calculate_coefficients_catmull: 0.8125
-mask = 27, calculate_coefficients_catmull: 0.84375
-mask = 28, calculate_coefficients_catmull: 0.875
-mask = 29, calculate_coefficients_catmull: 0.90625
-mask = 30, calculate_coefficients_catmull: 0.9375
-mask = 31, calculate_coefficients_catmull: 0.96875
-mask = 32, calculate_coefficients_catmull: 1
-
-
-
-
-
-
 - tools subdirs are now pretty stupid :-( just have a single dir
 
-- int bicubic needs more precision? try a 10x upscale and compare to the
-  double path
-
-  yes, looks like a rounding problem? 
-  
-  upscale 10x, you get a small offset if you compare the double and int paths
-
-  and slight banding
-
 - test
 
 		python setup.py build_ext
diff --git a/libvips/include/vips/interpolate.h b/libvips/include/vips/interpolate.h
index 4132ecc7..e695f39c 100644
--- a/libvips/include/vips/interpolate.h
+++ b/libvips/include/vips/interpolate.h
@@ -96,7 +96,7 @@ int vips_interpolate_get_window_offset( VipsInterpolate *interpolate );
 /* How many bits of precision we keep for transformations, ie. how many
  * pre-computed matricies we have.
  */
-#define VIPS_TRANSFORM_SHIFT (2)
+#define VIPS_TRANSFORM_SHIFT (6)
 #define VIPS_TRANSFORM_SCALE (1 << VIPS_TRANSFORM_SHIFT)
 
 /* How many bits of precision we keep for interpolation, ie. where the decimal
diff --git a/libvips/iofuncs/dispatch_types.c b/libvips/iofuncs/dispatch_types.c
index 75c02bdf..64673a62 100644
--- a/libvips/iofuncs/dispatch_types.c
+++ b/libvips/iofuncs/dispatch_types.c
@@ -894,11 +894,19 @@ input_interpolate_init( im_object *obj, char *str )
 	return( 0 );
 }
 
+static int
+input_interpolate_dest( im_object obj )
+{
+	g_object_unref( (GObject *) obj );
+
+	return( 0 );
+}
+
 im_type_desc im__input_interpolate = {
 	IM_TYPE_INTERPOLATE,	
 	0,      		/* No storage required */
 	IM_TYPE_ARG,		/* It requires a command-line arg */
-	(im_init_obj_fn) input_interpolate_init,/* Init function */
-	(im_dest_obj_fn) g_object_unref	/* Destroy function */
+	input_interpolate_init,	/* Init function */
+	input_interpolate_dest	/* Destroy function */
 };
 
diff --git a/libvips/resample/bicubic.cpp b/libvips/resample/bicubic.cpp
index 69314744..636f0749 100644
--- a/libvips/resample/bicubic.cpp
+++ b/libvips/resample/bicubic.cpp
@@ -302,23 +302,24 @@ static void
 vips_interpolate_bicubic_interpolate( VipsInterpolate *interpolate,
 	PEL *out, REGION *in, double x, double y )
 {
-	/* Scaled int.
+	/* Find the mask index. We round-to-nearest, so we need to generate 
+	 * indexes in 0 to VIPS_TRANSFORM_SCALE, 2^n + 1 values. We multiply 
+	 * by 2 more than we need to, add one, mask, then shift down again to 
+	 * get the extra range.
 	 */
-	const double sx = x * VIPS_TRANSFORM_SCALE;
-	const double sy = y * VIPS_TRANSFORM_SCALE;
+	const int sx = x * VIPS_TRANSFORM_SCALE * 2;
+	const int sy = y * VIPS_TRANSFORM_SCALE * 2;
 
-	/* We know sx/sy are always positive, so we can just (int) them. 
-	 */
-	const int sxi = (int) sx;
-	const int syi = (int) sy;
+	const int six = sx & (VIPS_TRANSFORM_SCALE * 2 - 1);
+	const int siy = sy & (VIPS_TRANSFORM_SCALE * 2 - 1);
 
-	/* Get index into interpolation table and unscaled integer
-	 * position.
+	const int tx = (six + 1) >> 1;
+	const int ty = (siy + 1) >> 1;
+
+	/* We know x/y are always positive, so we can just (int) them. 
 	 */
-	const int tx = sxi & (VIPS_TRANSFORM_SCALE - 1);
-	const int ty = syi & (VIPS_TRANSFORM_SCALE - 1);
-	const int ix = sxi >> VIPS_TRANSFORM_SHIFT;
-	const int iy = syi >> VIPS_TRANSFORM_SHIFT;
+	const int ix = (int) x;
+	const int iy = (int) y;
 
 	/* Look up the tables we need.
 	 */
diff --git a/libvips/resample/interpolate.c b/libvips/resample/interpolate.c
index 9dc03e9d..9fe25d7e 100644
--- a/libvips/resample/interpolate.c
+++ b/libvips/resample/interpolate.c
@@ -325,15 +325,16 @@ static float vips_bilinear_matrixd
  * p3  p4
  */
 
-/* Interpolate a section ... int8/16 types.
+/* Interpolate a section ... int8/16 types, lookup tables for interpolation 
+ * factors, fixed-point arithmetic.
  */
 #define BILINEAR_INT( TYPE ) { \
 	TYPE *tq = (TYPE *) out; \
  	\
-	const int c1 = vips_bilinear_matrixi[xi][yi][0]; \
-	const int c2 = vips_bilinear_matrixi[xi][yi][1]; \
-	const int c3 = vips_bilinear_matrixi[xi][yi][2]; \
-	const int c4 = vips_bilinear_matrixi[xi][yi][3]; \
+	const int c1 = vips_bilinear_matrixi[tx][ty][0]; \
+	const int c2 = vips_bilinear_matrixi[tx][ty][1]; \
+	const int c3 = vips_bilinear_matrixi[tx][ty][2]; \
+	const int c4 = vips_bilinear_matrixi[tx][ty][3]; \
  	\
 	const TYPE *tp1 = (TYPE *) p1; \
 	const TYPE *tp2 = (TYPE *) p2; \
@@ -345,15 +346,16 @@ static float vips_bilinear_matrixd
 			 c3 * tp3[z] + c4 * tp4[z]) >> VIPS_INTERPOLATE_SHIFT; \
 }
 
-/* Interpolate a pel ... int32 and float types.
+/* Interpolate a pel ... int32 and float types, lookup tables, float 
+ * arithmetic.
  */
 #define BILINEAR_FLOAT( TYPE ) { \
 	TYPE *tq = (TYPE *) out; \
  	\
-	const double c1 = vips_bilinear_matrixd[xi][yi][0]; \
-	const double c2 = vips_bilinear_matrixd[xi][yi][1]; \
-	const double c3 = vips_bilinear_matrixd[xi][yi][2]; \
-	const double c4 = vips_bilinear_matrixd[xi][yi][3]; \
+	const double c1 = vips_bilinear_matrixd[tx][ty][0]; \
+	const double c2 = vips_bilinear_matrixd[tx][ty][1]; \
+	const double c3 = vips_bilinear_matrixd[tx][ty][2]; \
+	const double c4 = vips_bilinear_matrixd[tx][ty][3]; \
 	\
 	const TYPE *tp1 = (TYPE *) p1; \
 	const TYPE *tp2 = (TYPE *) p2; \
@@ -393,25 +395,26 @@ vips_interpolate_bilinear_interpolate( VipsInterpolate *interpolate,
 	const int ls = IM_REGION_LSKIP( in );
 	const int b = in->im->Bands;
 
-	/* Now go to scaled int.
+	/* Find the mask index. We round-to-nearest, so we need to generate 
+	 * indexes in 0 to VIPS_TRANSFORM_SCALE, 2^n + 1 values. We multiply 
+	 * by 2 more than we need to, add one, mask, then shift down again to 
+	 * get the extra range.
 	 */
-	const double sx = x * VIPS_TRANSFORM_SCALE;
-	const double sy = y * VIPS_TRANSFORM_SCALE;
+	const int sx = x * VIPS_TRANSFORM_SCALE * 2;
+	const int sy = y * VIPS_TRANSFORM_SCALE * 2;
 
-	/* We know sx/sy are always positive so we can just (int) them. 
+	const int six = sx & (VIPS_TRANSFORM_SCALE * 2 - 1);
+	const int siy = sy & (VIPS_TRANSFORM_SCALE * 2 - 1);
+
+	const int tx = (six + 1) >> 1;
+	const int ty = (siy + 1) >> 1;
+
+	/* We know x/y are always positive, so we can just (int) them. 
 	 */
-	const int sxi = (int) sx;
-	const int syi = (int) sy;
+	const int ix = (int) x;
+	const int iy = (int) y;
 
-	/* Get index into interpolation table and unscaled integer
-	 * position.
-	 */
-	const int xi = sxi & (VIPS_TRANSFORM_SCALE - 1);
-	const int yi = syi & (VIPS_TRANSFORM_SCALE - 1);
-	const int x_int = sxi >> VIPS_TRANSFORM_SHIFT;
-	const int y_int = syi >> VIPS_TRANSFORM_SHIFT;
-
-	const PEL *p1 = (PEL *) IM_REGION_ADDR( in, x_int, y_int );
+	const PEL *p1 = (PEL *) IM_REGION_ADDR( in, ix, iy );
 	const PEL *p2 = p1 + ps;
 	const PEL *p3 = p1 + ls;
 	const PEL *p4 = p3 + ps;