faster, more accurate bilinear/bicubic

This commit is contained in:
John Cupitt 2010-05-30 17:26:23 +00:00
parent 32f481d524
commit 7fd672f595
6 changed files with 55 additions and 139 deletions

View File

@ -9,6 +9,8 @@
- all "colour" in messages changed to "color", have a proper en_GB - all "colour" in messages changed to "color", have a proper en_GB
translation file translation file
- vipsthumbnail delete profile failed if there was a profile - vipsthumbnail delete profile failed if there was a profile
- interpolate cli unref was broken
- more accurate, slightly faster bilinear and bicubic (thanks Nicolas)
21/3/10 started 7.21.3 21/3/10 started 7.21.3
- added progress feedback to threadpool - added progress feedback to threadpool

98
TODO
View File

@ -1,103 +1,5 @@
mask selection seems reasonable:
vips_interpolate_bicubic_interpolate: 2 2
left=1, top=1, width=4, height=4
maskx=0, masky=0
vips_interpolate_bicubic_interpolate: 2.1 2
left=1, top=1, width=4, height=4
maskx=3, masky=0
vips_interpolate_bicubic_interpolate: 2.2 2
left=1, top=1, width=4, height=4
maskx=6, masky=0
vips_interpolate_bicubic_interpolate: 2.3 2
left=1, top=1, width=4, height=4
maskx=9, masky=0
vips_interpolate_bicubic_interpolate: 2.4 2
left=1, top=1, width=4, height=4
maskx=12, masky=0
vips_interpolate_bicubic_interpolate: 2.5 2
left=1, top=1, width=4, height=4
maskx=16, masky=0
vips_interpolate_bicubic_interpolate: 2.6 2
left=1, top=1, width=4, height=4
maskx=19, masky=0
vips_interpolate_bicubic_interpolate: 2.7 2
left=1, top=1, width=4, height=4
maskx=22, masky=0
vips_interpolate_bicubic_interpolate: 2.8 2
left=1, top=1, width=4, height=4
maskx=25, masky=0
vips_interpolate_bicubic_interpolate: 2.9 2
left=1, top=1, width=4, height=4
maskx=28, masky=0
vips_interpolate_bicubic_interpolate: 3 2
left=2, top=1, width=4, height=4
maskx=0, masky=0
so for (eg.) 2.4 we pick mask 12, which is (12 / 32), or 0.375
however, mask13 would be closer: 13 / 32) = 0.4076
calculation is
2.4 * 32 == 76.8
FLOOR(76.8) == 76
76 & 31 == 12
vips_interpolate_bicubic_class_init:
mask = 0, calculate_coefficients_catmull: 0
mask = 1, calculate_coefficients_catmull: 0.03125
mask = 2, calculate_coefficients_catmull: 0.0625
mask = 3, calculate_coefficients_catmull: 0.09375
mask = 4, calculate_coefficients_catmull: 0.125
mask = 5, calculate_coefficients_catmull: 0.15625
mask = 6, calculate_coefficients_catmull: 0.1875
mask = 7, calculate_coefficients_catmull: 0.21875
mask = 8, calculate_coefficients_catmull: 0.25
mask = 9, calculate_coefficients_catmull: 0.28125
mask = 10, calculate_coefficients_catmull: 0.3125
mask = 11, calculate_coefficients_catmull: 0.34375
mask = 12, calculate_coefficients_catmull: 0.375
mask = 13, calculate_coefficients_catmull: 0.40625
mask = 14, calculate_coefficients_catmull: 0.4375
mask = 15, calculate_coefficients_catmull: 0.46875
mask = 16, calculate_coefficients_catmull: 0.5
mask = 17, calculate_coefficients_catmull: 0.53125
mask = 18, calculate_coefficients_catmull: 0.5625
mask = 19, calculate_coefficients_catmull: 0.59375
mask = 20, calculate_coefficients_catmull: 0.625
mask = 21, calculate_coefficients_catmull: 0.65625
mask = 22, calculate_coefficients_catmull: 0.6875
mask = 23, calculate_coefficients_catmull: 0.71875
mask = 24, calculate_coefficients_catmull: 0.75
mask = 25, calculate_coefficients_catmull: 0.78125
mask = 26, calculate_coefficients_catmull: 0.8125
mask = 27, calculate_coefficients_catmull: 0.84375
mask = 28, calculate_coefficients_catmull: 0.875
mask = 29, calculate_coefficients_catmull: 0.90625
mask = 30, calculate_coefficients_catmull: 0.9375
mask = 31, calculate_coefficients_catmull: 0.96875
mask = 32, calculate_coefficients_catmull: 1
- tools subdirs are now pretty stupid :-( just have a single dir - tools subdirs are now pretty stupid :-( just have a single dir
- int bicubic needs more precision? try a 10x upscale and compare to the
double path
yes, looks like a rounding problem?
upscale 10x, you get a small offset if you compare the double and int paths
and slight banding
- test - test
python setup.py build_ext python setup.py build_ext

View File

@ -96,7 +96,7 @@ int vips_interpolate_get_window_offset( VipsInterpolate *interpolate );
/* How many bits of precision we keep for transformations, ie. how many /* How many bits of precision we keep for transformations, ie. how many
* pre-computed matricies we have. * pre-computed matricies we have.
*/ */
#define VIPS_TRANSFORM_SHIFT (2) #define VIPS_TRANSFORM_SHIFT (6)
#define VIPS_TRANSFORM_SCALE (1 << VIPS_TRANSFORM_SHIFT) #define VIPS_TRANSFORM_SCALE (1 << VIPS_TRANSFORM_SHIFT)
/* How many bits of precision we keep for interpolation, ie. where the decimal /* How many bits of precision we keep for interpolation, ie. where the decimal

View File

@ -894,11 +894,19 @@ input_interpolate_init( im_object *obj, char *str )
return( 0 ); return( 0 );
} }
static int
input_interpolate_dest( im_object obj )
{
g_object_unref( (GObject *) obj );
return( 0 );
}
im_type_desc im__input_interpolate = { im_type_desc im__input_interpolate = {
IM_TYPE_INTERPOLATE, IM_TYPE_INTERPOLATE,
0, /* No storage required */ 0, /* No storage required */
IM_TYPE_ARG, /* It requires a command-line arg */ IM_TYPE_ARG, /* It requires a command-line arg */
(im_init_obj_fn) input_interpolate_init,/* Init function */ input_interpolate_init, /* Init function */
(im_dest_obj_fn) g_object_unref /* Destroy function */ input_interpolate_dest /* Destroy function */
}; };

View File

@ -302,23 +302,24 @@ static void
vips_interpolate_bicubic_interpolate( VipsInterpolate *interpolate, vips_interpolate_bicubic_interpolate( VipsInterpolate *interpolate,
PEL *out, REGION *in, double x, double y ) PEL *out, REGION *in, double x, double y )
{ {
/* Scaled int. /* Find the mask index. We round-to-nearest, so we need to generate
* indexes in 0 to VIPS_TRANSFORM_SCALE, 2^n + 1 values. We multiply
* by 2 more than we need to, add one, mask, then shift down again to
* get the extra range.
*/ */
const double sx = x * VIPS_TRANSFORM_SCALE; const int sx = x * VIPS_TRANSFORM_SCALE * 2;
const double sy = y * VIPS_TRANSFORM_SCALE; const int sy = y * VIPS_TRANSFORM_SCALE * 2;
/* We know sx/sy are always positive, so we can just (int) them. const int six = sx & (VIPS_TRANSFORM_SCALE * 2 - 1);
*/ const int siy = sy & (VIPS_TRANSFORM_SCALE * 2 - 1);
const int sxi = (int) sx;
const int syi = (int) sy;
/* Get index into interpolation table and unscaled integer const int tx = (six + 1) >> 1;
* position. const int ty = (siy + 1) >> 1;
/* We know x/y are always positive, so we can just (int) them.
*/ */
const int tx = sxi & (VIPS_TRANSFORM_SCALE - 1); const int ix = (int) x;
const int ty = syi & (VIPS_TRANSFORM_SCALE - 1); const int iy = (int) y;
const int ix = sxi >> VIPS_TRANSFORM_SHIFT;
const int iy = syi >> VIPS_TRANSFORM_SHIFT;
/* Look up the tables we need. /* Look up the tables we need.
*/ */

View File

@ -325,15 +325,16 @@ static float vips_bilinear_matrixd
* p3 p4 * p3 p4
*/ */
/* Interpolate a section ... int8/16 types. /* Interpolate a section ... int8/16 types, lookup tables for interpolation
* factors, fixed-point arithmetic.
*/ */
#define BILINEAR_INT( TYPE ) { \ #define BILINEAR_INT( TYPE ) { \
TYPE *tq = (TYPE *) out; \ TYPE *tq = (TYPE *) out; \
\ \
const int c1 = vips_bilinear_matrixi[xi][yi][0]; \ const int c1 = vips_bilinear_matrixi[tx][ty][0]; \
const int c2 = vips_bilinear_matrixi[xi][yi][1]; \ const int c2 = vips_bilinear_matrixi[tx][ty][1]; \
const int c3 = vips_bilinear_matrixi[xi][yi][2]; \ const int c3 = vips_bilinear_matrixi[tx][ty][2]; \
const int c4 = vips_bilinear_matrixi[xi][yi][3]; \ const int c4 = vips_bilinear_matrixi[tx][ty][3]; \
\ \
const TYPE *tp1 = (TYPE *) p1; \ const TYPE *tp1 = (TYPE *) p1; \
const TYPE *tp2 = (TYPE *) p2; \ const TYPE *tp2 = (TYPE *) p2; \
@ -345,15 +346,16 @@ static float vips_bilinear_matrixd
c3 * tp3[z] + c4 * tp4[z]) >> VIPS_INTERPOLATE_SHIFT; \ c3 * tp3[z] + c4 * tp4[z]) >> VIPS_INTERPOLATE_SHIFT; \
} }
/* Interpolate a pel ... int32 and float types. /* Interpolate a pel ... int32 and float types, lookup tables, float
* arithmetic.
*/ */
#define BILINEAR_FLOAT( TYPE ) { \ #define BILINEAR_FLOAT( TYPE ) { \
TYPE *tq = (TYPE *) out; \ TYPE *tq = (TYPE *) out; \
\ \
const double c1 = vips_bilinear_matrixd[xi][yi][0]; \ const double c1 = vips_bilinear_matrixd[tx][ty][0]; \
const double c2 = vips_bilinear_matrixd[xi][yi][1]; \ const double c2 = vips_bilinear_matrixd[tx][ty][1]; \
const double c3 = vips_bilinear_matrixd[xi][yi][2]; \ const double c3 = vips_bilinear_matrixd[tx][ty][2]; \
const double c4 = vips_bilinear_matrixd[xi][yi][3]; \ const double c4 = vips_bilinear_matrixd[tx][ty][3]; \
\ \
const TYPE *tp1 = (TYPE *) p1; \ const TYPE *tp1 = (TYPE *) p1; \
const TYPE *tp2 = (TYPE *) p2; \ const TYPE *tp2 = (TYPE *) p2; \
@ -393,25 +395,26 @@ vips_interpolate_bilinear_interpolate( VipsInterpolate *interpolate,
const int ls = IM_REGION_LSKIP( in ); const int ls = IM_REGION_LSKIP( in );
const int b = in->im->Bands; const int b = in->im->Bands;
/* Now go to scaled int. /* Find the mask index. We round-to-nearest, so we need to generate
* indexes in 0 to VIPS_TRANSFORM_SCALE, 2^n + 1 values. We multiply
* by 2 more than we need to, add one, mask, then shift down again to
* get the extra range.
*/ */
const double sx = x * VIPS_TRANSFORM_SCALE; const int sx = x * VIPS_TRANSFORM_SCALE * 2;
const double sy = y * VIPS_TRANSFORM_SCALE; const int sy = y * VIPS_TRANSFORM_SCALE * 2;
/* We know sx/sy are always positive so we can just (int) them. const int six = sx & (VIPS_TRANSFORM_SCALE * 2 - 1);
const int siy = sy & (VIPS_TRANSFORM_SCALE * 2 - 1);
const int tx = (six + 1) >> 1;
const int ty = (siy + 1) >> 1;
/* We know x/y are always positive, so we can just (int) them.
*/ */
const int sxi = (int) sx; const int ix = (int) x;
const int syi = (int) sy; const int iy = (int) y;
/* Get index into interpolation table and unscaled integer const PEL *p1 = (PEL *) IM_REGION_ADDR( in, ix, iy );
* position.
*/
const int xi = sxi & (VIPS_TRANSFORM_SCALE - 1);
const int yi = syi & (VIPS_TRANSFORM_SCALE - 1);
const int x_int = sxi >> VIPS_TRANSFORM_SHIFT;
const int y_int = syi >> VIPS_TRANSFORM_SHIFT;
const PEL *p1 = (PEL *) IM_REGION_ADDR( in, x_int, y_int );
const PEL *p2 = p1 + ps; const PEL *p2 = p1 + ps;
const PEL *p3 = p1 + ls; const PEL *p3 = p1 + ls;
const PEL *p4 = p3 + ps; const PEL *p4 = p3 + ps;