From 7fd672f5952f591153cea5d5ee4c1014b01af961 Mon Sep 17 00:00:00 2001 From: John Cupitt <jcupitt@gmail.com> Date: Sun, 30 May 2010 17:26:23 +0000 Subject: [PATCH] faster, more accurate bilinear/bicubic --- ChangeLog | 2 + TODO | 98 ------------------------------ libvips/include/vips/interpolate.h | 2 +- libvips/iofuncs/dispatch_types.c | 12 +++- libvips/resample/bicubic.cpp | 27 ++++---- libvips/resample/interpolate.c | 53 ++++++++-------- 6 files changed, 55 insertions(+), 139 deletions(-) diff --git a/ChangeLog b/ChangeLog index f86e1cd8..7630d676 100644 --- a/ChangeLog +++ b/ChangeLog @@ -9,6 +9,8 @@ - all "colour" in messages changed to "color", have a proper en_GB translation file - vipsthumbnail delete profile failed if there was a profile +- interpolate cli unref was broken +- more accurate, slightly faster bilinear and bicubic (thanks Nicolas) 21/3/10 started 7.21.3 - added progress feedback to threadpool diff --git a/TODO b/TODO index 13799004..74f34992 100644 --- a/TODO +++ b/TODO @@ -1,103 +1,5 @@ - -mask selection seems reasonable: - -vips_interpolate_bicubic_interpolate: 2 2 - left=1, top=1, width=4, height=4 - maskx=0, masky=0 -vips_interpolate_bicubic_interpolate: 2.1 2 - left=1, top=1, width=4, height=4 - maskx=3, masky=0 -vips_interpolate_bicubic_interpolate: 2.2 2 - left=1, top=1, width=4, height=4 - maskx=6, masky=0 -vips_interpolate_bicubic_interpolate: 2.3 2 - left=1, top=1, width=4, height=4 - maskx=9, masky=0 -vips_interpolate_bicubic_interpolate: 2.4 2 - left=1, top=1, width=4, height=4 - maskx=12, masky=0 -vips_interpolate_bicubic_interpolate: 2.5 2 - left=1, top=1, width=4, height=4 - maskx=16, masky=0 -vips_interpolate_bicubic_interpolate: 2.6 2 - left=1, top=1, width=4, height=4 - maskx=19, masky=0 -vips_interpolate_bicubic_interpolate: 2.7 2 - left=1, top=1, width=4, height=4 - maskx=22, masky=0 -vips_interpolate_bicubic_interpolate: 2.8 2 - left=1, top=1, width=4, height=4 - maskx=25, masky=0 -vips_interpolate_bicubic_interpolate: 2.9 2 - left=1, top=1, width=4, height=4 - maskx=28, masky=0 -vips_interpolate_bicubic_interpolate: 3 2 - left=2, top=1, width=4, height=4 - maskx=0, masky=0 - -so for (eg.) 2.4 we pick mask 12, which is (12 / 32), or 0.375 - -however, mask13 would be closer: 13 / 32) = 0.4076 - -calculation is - - 2.4 * 32 == 76.8 - FLOOR(76.8) == 76 - 76 & 31 == 12 - - - -vips_interpolate_bicubic_class_init: -mask = 0, calculate_coefficients_catmull: 0 -mask = 1, calculate_coefficients_catmull: 0.03125 -mask = 2, calculate_coefficients_catmull: 0.0625 -mask = 3, calculate_coefficients_catmull: 0.09375 -mask = 4, calculate_coefficients_catmull: 0.125 -mask = 5, calculate_coefficients_catmull: 0.15625 -mask = 6, calculate_coefficients_catmull: 0.1875 -mask = 7, calculate_coefficients_catmull: 0.21875 -mask = 8, calculate_coefficients_catmull: 0.25 -mask = 9, calculate_coefficients_catmull: 0.28125 -mask = 10, calculate_coefficients_catmull: 0.3125 -mask = 11, calculate_coefficients_catmull: 0.34375 -mask = 12, calculate_coefficients_catmull: 0.375 -mask = 13, calculate_coefficients_catmull: 0.40625 -mask = 14, calculate_coefficients_catmull: 0.4375 -mask = 15, calculate_coefficients_catmull: 0.46875 -mask = 16, calculate_coefficients_catmull: 0.5 -mask = 17, calculate_coefficients_catmull: 0.53125 -mask = 18, calculate_coefficients_catmull: 0.5625 -mask = 19, calculate_coefficients_catmull: 0.59375 -mask = 20, calculate_coefficients_catmull: 0.625 -mask = 21, calculate_coefficients_catmull: 0.65625 -mask = 22, calculate_coefficients_catmull: 0.6875 -mask = 23, calculate_coefficients_catmull: 0.71875 -mask = 24, calculate_coefficients_catmull: 0.75 -mask = 25, calculate_coefficients_catmull: 0.78125 -mask = 26, calculate_coefficients_catmull: 0.8125 -mask = 27, calculate_coefficients_catmull: 0.84375 -mask = 28, calculate_coefficients_catmull: 0.875 -mask = 29, calculate_coefficients_catmull: 0.90625 -mask = 30, calculate_coefficients_catmull: 0.9375 -mask = 31, calculate_coefficients_catmull: 0.96875 -mask = 32, calculate_coefficients_catmull: 1 - - - - - - - tools subdirs are now pretty stupid :-( just have a single dir -- int bicubic needs more precision? try a 10x upscale and compare to the - double path - - yes, looks like a rounding problem? - - upscale 10x, you get a small offset if you compare the double and int paths - - and slight banding - - test python setup.py build_ext diff --git a/libvips/include/vips/interpolate.h b/libvips/include/vips/interpolate.h index 4132ecc7..e695f39c 100644 --- a/libvips/include/vips/interpolate.h +++ b/libvips/include/vips/interpolate.h @@ -96,7 +96,7 @@ int vips_interpolate_get_window_offset( VipsInterpolate *interpolate ); /* How many bits of precision we keep for transformations, ie. how many * pre-computed matricies we have. */ -#define VIPS_TRANSFORM_SHIFT (2) +#define VIPS_TRANSFORM_SHIFT (6) #define VIPS_TRANSFORM_SCALE (1 << VIPS_TRANSFORM_SHIFT) /* How many bits of precision we keep for interpolation, ie. where the decimal diff --git a/libvips/iofuncs/dispatch_types.c b/libvips/iofuncs/dispatch_types.c index 75c02bdf..64673a62 100644 --- a/libvips/iofuncs/dispatch_types.c +++ b/libvips/iofuncs/dispatch_types.c @@ -894,11 +894,19 @@ input_interpolate_init( im_object *obj, char *str ) return( 0 ); } +static int +input_interpolate_dest( im_object obj ) +{ + g_object_unref( (GObject *) obj ); + + return( 0 ); +} + im_type_desc im__input_interpolate = { IM_TYPE_INTERPOLATE, 0, /* No storage required */ IM_TYPE_ARG, /* It requires a command-line arg */ - (im_init_obj_fn) input_interpolate_init,/* Init function */ - (im_dest_obj_fn) g_object_unref /* Destroy function */ + input_interpolate_init, /* Init function */ + input_interpolate_dest /* Destroy function */ }; diff --git a/libvips/resample/bicubic.cpp b/libvips/resample/bicubic.cpp index 69314744..636f0749 100644 --- a/libvips/resample/bicubic.cpp +++ b/libvips/resample/bicubic.cpp @@ -302,23 +302,24 @@ static void vips_interpolate_bicubic_interpolate( VipsInterpolate *interpolate, PEL *out, REGION *in, double x, double y ) { - /* Scaled int. + /* Find the mask index. We round-to-nearest, so we need to generate + * indexes in 0 to VIPS_TRANSFORM_SCALE, 2^n + 1 values. We multiply + * by 2 more than we need to, add one, mask, then shift down again to + * get the extra range. */ - const double sx = x * VIPS_TRANSFORM_SCALE; - const double sy = y * VIPS_TRANSFORM_SCALE; + const int sx = x * VIPS_TRANSFORM_SCALE * 2; + const int sy = y * VIPS_TRANSFORM_SCALE * 2; - /* We know sx/sy are always positive, so we can just (int) them. - */ - const int sxi = (int) sx; - const int syi = (int) sy; + const int six = sx & (VIPS_TRANSFORM_SCALE * 2 - 1); + const int siy = sy & (VIPS_TRANSFORM_SCALE * 2 - 1); - /* Get index into interpolation table and unscaled integer - * position. + const int tx = (six + 1) >> 1; + const int ty = (siy + 1) >> 1; + + /* We know x/y are always positive, so we can just (int) them. */ - const int tx = sxi & (VIPS_TRANSFORM_SCALE - 1); - const int ty = syi & (VIPS_TRANSFORM_SCALE - 1); - const int ix = sxi >> VIPS_TRANSFORM_SHIFT; - const int iy = syi >> VIPS_TRANSFORM_SHIFT; + const int ix = (int) x; + const int iy = (int) y; /* Look up the tables we need. */ diff --git a/libvips/resample/interpolate.c b/libvips/resample/interpolate.c index 9dc03e9d..9fe25d7e 100644 --- a/libvips/resample/interpolate.c +++ b/libvips/resample/interpolate.c @@ -325,15 +325,16 @@ static float vips_bilinear_matrixd * p3 p4 */ -/* Interpolate a section ... int8/16 types. +/* Interpolate a section ... int8/16 types, lookup tables for interpolation + * factors, fixed-point arithmetic. */ #define BILINEAR_INT( TYPE ) { \ TYPE *tq = (TYPE *) out; \ \ - const int c1 = vips_bilinear_matrixi[xi][yi][0]; \ - const int c2 = vips_bilinear_matrixi[xi][yi][1]; \ - const int c3 = vips_bilinear_matrixi[xi][yi][2]; \ - const int c4 = vips_bilinear_matrixi[xi][yi][3]; \ + const int c1 = vips_bilinear_matrixi[tx][ty][0]; \ + const int c2 = vips_bilinear_matrixi[tx][ty][1]; \ + const int c3 = vips_bilinear_matrixi[tx][ty][2]; \ + const int c4 = vips_bilinear_matrixi[tx][ty][3]; \ \ const TYPE *tp1 = (TYPE *) p1; \ const TYPE *tp2 = (TYPE *) p2; \ @@ -345,15 +346,16 @@ static float vips_bilinear_matrixd c3 * tp3[z] + c4 * tp4[z]) >> VIPS_INTERPOLATE_SHIFT; \ } -/* Interpolate a pel ... int32 and float types. +/* Interpolate a pel ... int32 and float types, lookup tables, float + * arithmetic. */ #define BILINEAR_FLOAT( TYPE ) { \ TYPE *tq = (TYPE *) out; \ \ - const double c1 = vips_bilinear_matrixd[xi][yi][0]; \ - const double c2 = vips_bilinear_matrixd[xi][yi][1]; \ - const double c3 = vips_bilinear_matrixd[xi][yi][2]; \ - const double c4 = vips_bilinear_matrixd[xi][yi][3]; \ + const double c1 = vips_bilinear_matrixd[tx][ty][0]; \ + const double c2 = vips_bilinear_matrixd[tx][ty][1]; \ + const double c3 = vips_bilinear_matrixd[tx][ty][2]; \ + const double c4 = vips_bilinear_matrixd[tx][ty][3]; \ \ const TYPE *tp1 = (TYPE *) p1; \ const TYPE *tp2 = (TYPE *) p2; \ @@ -393,25 +395,26 @@ vips_interpolate_bilinear_interpolate( VipsInterpolate *interpolate, const int ls = IM_REGION_LSKIP( in ); const int b = in->im->Bands; - /* Now go to scaled int. + /* Find the mask index. We round-to-nearest, so we need to generate + * indexes in 0 to VIPS_TRANSFORM_SCALE, 2^n + 1 values. We multiply + * by 2 more than we need to, add one, mask, then shift down again to + * get the extra range. */ - const double sx = x * VIPS_TRANSFORM_SCALE; - const double sy = y * VIPS_TRANSFORM_SCALE; + const int sx = x * VIPS_TRANSFORM_SCALE * 2; + const int sy = y * VIPS_TRANSFORM_SCALE * 2; - /* We know sx/sy are always positive so we can just (int) them. + const int six = sx & (VIPS_TRANSFORM_SCALE * 2 - 1); + const int siy = sy & (VIPS_TRANSFORM_SCALE * 2 - 1); + + const int tx = (six + 1) >> 1; + const int ty = (siy + 1) >> 1; + + /* We know x/y are always positive, so we can just (int) them. */ - const int sxi = (int) sx; - const int syi = (int) sy; + const int ix = (int) x; + const int iy = (int) y; - /* Get index into interpolation table and unscaled integer - * position. - */ - const int xi = sxi & (VIPS_TRANSFORM_SCALE - 1); - const int yi = syi & (VIPS_TRANSFORM_SCALE - 1); - const int x_int = sxi >> VIPS_TRANSFORM_SHIFT; - const int y_int = syi >> VIPS_TRANSFORM_SHIFT; - - const PEL *p1 = (PEL *) IM_REGION_ADDR( in, x_int, y_int ); + const PEL *p1 = (PEL *) IM_REGION_ADDR( in, ix, iy ); const PEL *p2 = p1 + ps; const PEL *p3 = p1 + ls; const PEL *p4 = p3 + ps;