From 7068fbb20b7f6c64e8ea53109556963724e1ded5 Mon Sep 17 00:00:00 2001 From: John Cupitt Date: Wed, 16 Dec 2015 14:48:00 +0000 Subject: [PATCH 1/2] new bilinear interpolator --- libvips/resample/interpolate.c | 77 +++++++++++++++++++++++++--------- 1 file changed, 58 insertions(+), 19 deletions(-) diff --git a/libvips/resample/interpolate.c b/libvips/resample/interpolate.c index d1de9d6f..1bc7fde7 100644 --- a/libvips/resample/interpolate.c +++ b/libvips/resample/interpolate.c @@ -14,6 +14,8 @@ * - faster, more accuarate uchar bilinear (thanks Nicolas) * 2/2/11 * - gtk-doc + * 16/12/15 + * - faster bilinear */ /* @@ -426,27 +428,65 @@ G_DEFINE_TYPE( VipsInterpolateBilinear, vips_interpolate_bilinear, * p3 p4 */ +#define BILINEAR_INT_INNER { \ + tq[z] = (c1 * tp1[z] + c2 * tp2[z] + \ + c3 * tp3[z] + c4 * tp4[z]) >> VIPS_INTERPOLATE_SHIFT; \ + z += 1; \ +} + /* Fixed-point arithmetic, no tables. - */ #define BILINEAR_INT( TYPE ) { \ TYPE * restrict tq = (TYPE *) out; \ \ - const int X = (x - ix) * VIPS_INTERPOLATE_SCALE; \ - const int Y = (iy - y) * VIPS_INTERPOLATE_SCALE; \ - \ + float Y = y - iy; \ + float X = x - ix; \ + \ + float Yd = 1.0f - Y; \ + \ + int c4 = VIPS_INTERPOLATE_SCALE * (Y * X); \ + int c2 = VIPS_INTERPOLATE_SCALE * (Yd * X); \ + int c3 = VIPS_INTERPOLATE_SCALE * (Y - c4); \ + int c1 = VIPS_INTERPOLATE_SCALE * (Yd - c2); \ + \ const TYPE * restrict tp1 = (TYPE *) p1; \ const TYPE * restrict tp2 = (TYPE *) p2; \ const TYPE * restrict tp3 = (TYPE *) p3; \ const TYPE * restrict tp4 = (TYPE *) p4; \ \ - for( z = 0; z < b; z++ ) { \ - const int top = tp1[z] + \ - ((X * (tp2[z] - tp1[z])) >> VIPS_INTERPOLATE_SHIFT); \ - const int bot = tp3[z] + \ - ((X * (tp4[z] - tp3[z])) >> VIPS_INTERPOLATE_SHIFT); \ - \ - tq[z] = top - ((Y * (bot - top)) >> VIPS_INTERPOLATE_SHIFT); \ - } \ + z = 0; \ + VIPS_UNROLL( b, BILINEAR_INT_INNER ); \ +} + */ + +/* Fixed-point arithmetic, no tables. + */ +#define BILINEAR_INT( TYPE ) { \ + TYPE * restrict tq = (TYPE *) out; \ + \ + float Y = y - iy; \ + float X = x - ix; \ + \ + float Yd = 1.0f - Y; \ + \ + int c4 = VIPS_INTERPOLATE_SCALE * (Y * X); \ + int c2 = VIPS_INTERPOLATE_SCALE * (Yd * X); \ + int c3 = VIPS_INTERPOLATE_SCALE * (Y - c4); \ + int c1 = VIPS_INTERPOLATE_SCALE * (Yd - c2); \ + \ + const TYPE * restrict tp1 = (TYPE *) p1; \ + const TYPE * restrict tp2 = (TYPE *) p2; \ + const TYPE * restrict tp3 = (TYPE *) p3; \ + const TYPE * restrict tp4 = (TYPE *) p4; \ + \ + for( z = 0; z < b; z++ ) \ + tq[z] = (c1 * tp1[z] + c2 * tp2[z] + \ + c3 * tp3[z] + c4 * tp4[z]) >> VIPS_INTERPOLATE_SHIFT; \ +} + +#define BILINEAR_FLOAT_INNER { \ + tq[z] = c1 * tp1[z] + c2 * tp2[z] + \ + c3 * tp3[z] + c4 * tp4[z]; \ + z += 1; \ } /* Interpolate a pel ... int32 and float types, no tables, float @@ -455,14 +495,14 @@ G_DEFINE_TYPE( VipsInterpolateBilinear, vips_interpolate_bilinear, #define BILINEAR_FLOAT( TYPE ) { \ TYPE * restrict tq = (TYPE *) out; \ \ - float Y = y - iy; \ - float X = x - ix; \ + float Y = y - iy; \ + float X = x - ix; \ \ float Yd = 1.0f - Y; \ \ - float c4 = Y * X; \ + float c4 = Y * X; \ float c2 = Yd * X; \ - float c3 = Y - c4; \ + float c3 = Y - c4; \ float c1 = Yd - c2; \ \ const TYPE * restrict tp1 = (TYPE *) p1; \ @@ -470,9 +510,8 @@ G_DEFINE_TYPE( VipsInterpolateBilinear, vips_interpolate_bilinear, const TYPE * restrict tp3 = (TYPE *) p3; \ const TYPE * restrict tp4 = (TYPE *) p4; \ \ - for( z = 0; z < b; z++ ) \ - tq[z] = c1 * tp1[z] + c2 * tp2[z] + \ - c3 * tp3[z] + c4 * tp4[z]; \ + z = 0; \ + VIPS_UNROLL( b, BILINEAR_FLOAT_INNER ); \ } /* Expand for band types. with a fixed-point interpolator and a float From fb084ef6a6d4e30cad40eb898be284a0a8b0c6f0 Mon Sep 17 00:00:00 2001 From: John Cupitt Date: Thu, 17 Dec 2015 13:43:34 +0000 Subject: [PATCH 2/2] working --- ChangeLog | 1 + libvips/resample/interpolate.c | 72 +++++++++++++++++++--------------- 2 files changed, 41 insertions(+), 32 deletions(-) diff --git a/ChangeLog b/ChangeLog index 0e17fb59..4b73a8f6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -20,6 +20,7 @@ - Python x.bandjoin(y) is now x.ibandjoin(y), sorry - oop, removed a DEBUG from buffer.c, vips is 30% faster - faster and lower-mem TIFF read +- faster bilinear interpolator 7/5/15 started 8.1.1 - oop, vips-8.0 wrapper script should be vips-8.1, thanks Danilo diff --git a/libvips/resample/interpolate.c b/libvips/resample/interpolate.c index 1bc7fde7..c8b3ac19 100644 --- a/libvips/resample/interpolate.c +++ b/libvips/resample/interpolate.c @@ -429,12 +429,13 @@ G_DEFINE_TYPE( VipsInterpolateBilinear, vips_interpolate_bilinear, */ #define BILINEAR_INT_INNER { \ - tq[z] = (c1 * tp1[z] + c2 * tp2[z] + \ - c3 * tp3[z] + c4 * tp4[z]) >> VIPS_INTERPOLATE_SHIFT; \ + tq[z] = (sc1 * tp1[z] + sc2 * tp2[z] + \ + sc3 * tp3[z] + sc4 * tp4[z]) >> VIPS_INTERPOLATE_SHIFT; \ z += 1; \ } /* Fixed-point arithmetic, no tables. + */ #define BILINEAR_INT( TYPE ) { \ TYPE * restrict tq = (TYPE *) out; \ \ @@ -443,10 +444,15 @@ G_DEFINE_TYPE( VipsInterpolateBilinear, vips_interpolate_bilinear, \ float Yd = 1.0f - Y; \ \ - int c4 = VIPS_INTERPOLATE_SCALE * (Y * X); \ - int c2 = VIPS_INTERPOLATE_SCALE * (Yd * X); \ - int c3 = VIPS_INTERPOLATE_SCALE * (Y - c4); \ - int c1 = VIPS_INTERPOLATE_SCALE * (Yd - c2); \ + float c4 = Y * X; \ + float c2 = Yd * X; \ + float c3 = Y - c4; \ + float c1 = Yd - c2; \ + \ + int sc1 = VIPS_INTERPOLATE_SCALE * c1;\ + int sc2 = VIPS_INTERPOLATE_SCALE * c2;\ + int sc3 = VIPS_INTERPOLATE_SCALE * c3;\ + int sc4 = VIPS_INTERPOLATE_SCALE * c4;\ \ const TYPE * restrict tp1 = (TYPE *) p1; \ const TYPE * restrict tp2 = (TYPE *) p2; \ @@ -456,32 +462,6 @@ G_DEFINE_TYPE( VipsInterpolateBilinear, vips_interpolate_bilinear, z = 0; \ VIPS_UNROLL( b, BILINEAR_INT_INNER ); \ } - */ - -/* Fixed-point arithmetic, no tables. - */ -#define BILINEAR_INT( TYPE ) { \ - TYPE * restrict tq = (TYPE *) out; \ - \ - float Y = y - iy; \ - float X = x - ix; \ - \ - float Yd = 1.0f - Y; \ - \ - int c4 = VIPS_INTERPOLATE_SCALE * (Y * X); \ - int c2 = VIPS_INTERPOLATE_SCALE * (Yd * X); \ - int c3 = VIPS_INTERPOLATE_SCALE * (Y - c4); \ - int c1 = VIPS_INTERPOLATE_SCALE * (Yd - c2); \ - \ - const TYPE * restrict tp1 = (TYPE *) p1; \ - const TYPE * restrict tp2 = (TYPE *) p2; \ - const TYPE * restrict tp3 = (TYPE *) p3; \ - const TYPE * restrict tp4 = (TYPE *) p4; \ - \ - for( z = 0; z < b; z++ ) \ - tq[z] = (c1 * tp1[z] + c2 * tp2[z] + \ - c3 * tp3[z] + c4 * tp4[z]) >> VIPS_INTERPOLATE_SHIFT; \ -} #define BILINEAR_FLOAT_INNER { \ tq[z] = c1 * tp1[z] + c2 * tp2[z] + \ @@ -557,8 +537,36 @@ vips_interpolate_bilinear_interpolate( VipsInterpolate *interpolate, g_assert( (int) x + 1 < VIPS_RECT_RIGHT( &in->valid ) ); g_assert( (int) y + 1 < VIPS_RECT_BOTTOM( &in->valid ) ); + unsigned char * restrict tq = (unsigned char *) out; + + float Y = y - iy; + float X = x - ix; + + float Yd = 1.0f - Y; + + float c4 = Y * X; + float c2 = Yd * X; + float c3 = Y - c4; + float c1 = Yd - c2; + + int sc1 = VIPS_INTERPOLATE_SCALE * c1; + int sc2 = VIPS_INTERPOLATE_SCALE * c2; + int sc3 = VIPS_INTERPOLATE_SCALE * c3; + int sc4 = VIPS_INTERPOLATE_SCALE * c4; + + const unsigned char * restrict tp1 = (unsigned char *) p1; + const unsigned char * restrict tp2 = (unsigned char *) p2; + const unsigned char * restrict tp3 = (unsigned char *) p3; + const unsigned char * restrict tp4 = (unsigned char *) p4; + + for( z = 0; z < b; z++ ) + tq[z] = (sc1 * tp1[z] + sc2 * tp2[z] + + sc3 * tp3[z] + sc4 * tp4[z]) >> VIPS_INTERPOLATE_SHIFT; + +/* SWITCH_INTERPOLATE( in->im->BandFmt, BILINEAR_INT, BILINEAR_FLOAT ); + */ } static void