fix window_offset stuff, fix a performance problem

This commit is contained in:
John Cupitt 2010-08-12 15:56:07 +00:00
parent 6d97500587
commit f27ab2fa3f
13 changed files with 147 additions and 118 deletions

View File

@ -10,6 +10,9 @@
- various small cleanups (thanks Tim) - various small cleanups (thanks Tim)
- add lcms2 support - add lcms2 support
- VImage(filename) defaults to "rd" mode - VImage(filename) defaults to "rd" mode
- revise window_offset / window_size, again
- fix a mixup with ANY hints that caused performance problems on the main
benchmark
12/5/10 started 7.22.2 12/5/10 started 7.22.2
- the conditional image of ifthenelse can be any format, a (!=0) is added if - the conditional image of ifthenelse can be any format, a (!=0) is added if

26
TODO
View File

@ -1,29 +1,3 @@
- large and puzzling performance regression cf. 7.18 on the benchmark
7.18
$ time ./vips.py wtc_tiled_small.tif wtc2.tif
vips warning: im_conv: 6534 overflows and 13439 underflows detected
real 0m1.891s
user 0m3.110s
sys 0m0.320s
7.20 (and 7.22)
$ time ./vips.py wtc_tiled_small.tif wtc2.tif
vips warning: im_conv: 6502 overflows and 13429 underflows detected
real 0m2.962s
user 0m4.950s
sys 0m0.250s
and why are the conv numbers different?
seems to be a difference in the bilinear interpolator? we did increase
accuracy in 7.18 -> 7.20
I suppose conv must have changed too?
- use D65 in cmsCreateLab4Profile() ? not sure - use D65 in cmsCreateLab4Profile() ? not sure
- VImage("poop.v") should use "rd" as well? Python too? - VImage("poop.v") should use "rd" as well? Python too?

View File

@ -79,8 +79,8 @@ typedef struct _VipsInterpolateClass {
*/ */
int window_size; int window_size;
/* Stencils are offset by this much. Default to window_size / 2 /* Stencils are offset by this much. Default to window_size / 2 - 1
* (centering) if undefined. * (centering) if get_window_offset is NULL and window_offset is -1.
*/ */
int (*get_window_offset)( VipsInterpolate * ); int (*get_window_offset)( VipsInterpolate * );
int window_offset; int window_offset;

View File

@ -186,6 +186,7 @@ int
im_demand_hint_array( IMAGE *im, VipsDemandStyle hint, IMAGE **in ) im_demand_hint_array( IMAGE *im, VipsDemandStyle hint, IMAGE **in )
{ {
int i, len, nany; int i, len, nany;
VipsDemandStyle set_hint;
/* How many input images are there? And how many are IM_ANY? /* How many input images are there? And how many are IM_ANY?
*/ */
@ -193,6 +194,7 @@ im_demand_hint_array( IMAGE *im, VipsDemandStyle hint, IMAGE **in )
if( in[i]->dhint == IM_ANY ) if( in[i]->dhint == IM_ANY )
nany++; nany++;
set_hint = hint;
if( len == 0 ) if( len == 0 )
/* No input images? Just set the requested hint. We don't /* No input images? Just set the requested hint. We don't
* force ANY, since the operation might be something like * force ANY, since the operation might be something like
@ -204,18 +206,24 @@ im_demand_hint_array( IMAGE *im, VipsDemandStyle hint, IMAGE **in )
/* Special case: if all the inputs are IM_ANY, then output can /* Special case: if all the inputs are IM_ANY, then output can
* be IM_ANY regardless of what this function wants. * be IM_ANY regardless of what this function wants.
*/ */
hint = IM_ANY; set_hint = IM_ANY;
else else
/* Find the most restrictive of all the hints available to us. /* Find the most restrictive of all the hints available to us.
*/ */
for( i = 0; i < len; i++ ) for( i = 0; i < len; i++ )
hint = find_least( hint, in[i]->dhint ); set_hint = find_least( set_hint, in[i]->dhint );
im->dhint = hint; im->dhint = set_hint;
#ifdef DEBUG #ifdef DEBUG
printf( "im_demand_hint_array: set dhint for \"%s\" to %s\n", printf( "im_demand_hint_array: set dhint for \"%s\" to %s\n",
im->filename, im_dhint2char( im->dhint ) ); im->filename, im_dhint2char( im->dhint ) );
printf( "\toperation requested %s\n", im_dhint2char( hint ) );
printf( "\tinputs were:\n" );
printf( "\t" );
for( i = 0; in[i]; i++ )
printf( "%s ", im_dhint2char( in[i]->dhint ) );
printf( "\n" );
#endif /*DEBUG*/ #endif /*DEBUG*/
/* im depends on all these ims. /* im depends on all these ims.
@ -255,8 +263,7 @@ im_demand_hint( IMAGE *im, VipsDemandStyle hint, ... )
; ;
va_end( ap ); va_end( ap );
if( i == MAX_IMAGES ) { if( i == MAX_IMAGES ) {
im_error( "im_demand_hint", im_error( "im_demand_hint", "%s", _( "too many images" ) );
"%s", _( "too many images" ) );
return( -1 ); return( -1 );
} }

View File

@ -421,10 +421,12 @@ open_lazy( VipsFormatClass *format, gboolean disc, IMAGE *out )
if( !(lazy = lazy_new( out, format, disc )) ) if( !(lazy = lazy_new( out, format, disc )) )
return( -1 ); return( -1 );
/* Read header fields to init the return image. /* Read header fields to init the return image. THINSTRIP since this is
* probably a disc file. We can't tell yet whether we will be opening
* to memory, sadly, so we can't suggest ANY.
*/ */
if( format->header( out->filename, out ) || if( format->header( out->filename, out ) ||
im_demand_hint( out, IM_ANY, NULL ) ) im_demand_hint( out, IM_THINSTRIP, NULL ) )
return( -1 ); return( -1 );
/* Then 'start' creates the real image and 'gen' paints 'out' with /* Then 'start' creates the real image and 'gen' paints 'out' with

View File

@ -914,13 +914,13 @@ vips_get_tile_size( VipsImage *im,
(1 + nthr / IM_MAX( 1, im->Xsize / *tile_width )) * 2; (1 + nthr / IM_MAX( 1, im->Xsize / *tile_width )) * 2;
break; break;
case IM_ANY:
case IM_FATSTRIP: case IM_FATSTRIP:
*tile_width = im->Xsize; *tile_width = im->Xsize;
*tile_height = im__fatstrip_height; *tile_height = im__fatstrip_height;
*nlines = *tile_height * nthr * 2; *nlines = *tile_height * nthr * 2;
break; break;
case IM_ANY:
case IM_THINSTRIP: case IM_THINSTRIP:
*tile_width = im->Xsize; *tile_width = im->Xsize;
*tile_height = im__thinstrip_height; *tile_height = im__thinstrip_height;

View File

@ -1,4 +1,7 @@
/* bicubic (catmull-rom) interpolator /* bicubic (catmull-rom) interpolator
*
* 12/8/10
* - revise window_size / window_offset stuff again
*/ */
/* /*
@ -321,6 +324,10 @@ vips_interpolate_bicubic_interpolate( VipsInterpolate *interpolate,
const int ix = (int) x; const int ix = (int) x;
const int iy = (int) y; const int iy = (int) y;
/* Back and up one to get the top-left of the 4x4.
*/
const PEL *p = (PEL *) IM_REGION_ADDR( in, ix - 1, iy - 1 );
/* Look up the tables we need. /* Look up the tables we need.
*/ */
const int *cxi = vips_bicubic_matrixi[tx]; const int *cxi = vips_bicubic_matrixi[tx];
@ -328,10 +335,6 @@ vips_interpolate_bicubic_interpolate( VipsInterpolate *interpolate,
const double *cxf = vips_bicubic_matrixf[tx]; const double *cxf = vips_bicubic_matrixf[tx];
const double *cyf = vips_bicubic_matrixf[ty]; const double *cyf = vips_bicubic_matrixf[ty];
/* Back and up one to get the top-left of the 4x4.
*/
const PEL *p = (PEL *) IM_REGION_ADDR( in, ix - 1, iy - 1 );
/* Pel size and line size. /* Pel size and line size.
*/ */
const int bands = in->im->Bands; const int bands = in->im->Bands;
@ -428,11 +431,6 @@ vips_interpolate_bicubic_class_init( VipsInterpolateBicubicClass *iclass )
interpolate_class->interpolate = vips_interpolate_bicubic_interpolate; interpolate_class->interpolate = vips_interpolate_bicubic_interpolate;
interpolate_class->window_size = 4; interpolate_class->window_size = 4;
interpolate_class->window_offset = 2;
/*
* Note from nicolas: If things were programmed sanely, I
* think window_offset should be 1, not 2.
*/
/* Build the tables of pre-computed coefficients. /* Build the tables of pre-computed coefficients.
*/ */

View File

@ -85,6 +85,9 @@
* - revise clipping / transform stuff, again * - revise clipping / transform stuff, again
* - now do corner rather than centre: this way the identity transform * - now do corner rather than centre: this way the identity transform
* returns the input exactly * returns the input exactly
* 12/8/10
* - revise window_size / window_offset stuff again, see also
* interpolate.c
*/ */
/* /*
@ -137,9 +140,28 @@
#include <dmalloc.h> #include <dmalloc.h>
#endif /*WITH_DMALLOC*/ #endif /*WITH_DMALLOC*/
/* "fast" floor() ... on my laptop, anyway. /*
* FAST_PSEUDO_FLOOR is a floor and floorf replacement which has been
* found to be faster on several linux boxes than the library
* version. It returns the floor of its argument unless the argument
* is a negative integer, in which case it returns one less than the
* floor. For example:
*
* FAST_PSEUDO_FLOOR(0.5) = 0
*
* FAST_PSEUDO_FLOOR(0.) = 0
*
* FAST_PSEUDO_FLOOR(-.5) = -1
*
* as expected, but
*
* FAST_PSEUDO_FLOOR(-1.) = -2
*
* The locations of the discontinuities of FAST_PSEUDO_FLOOR are the
* same as floor and floorf; it is just that at negative integers the
* function is discontinuous on the right instead of the left.
*/ */
#define FLOOR( V ) ((V) >= 0 ? (int)(V) : (int)((V) - 1)) #define FAST_PSEUDO_FLOOR(x) ( (int)(x) - ( (x) < 0. ) )
/* Per-call state. /* Per-call state.
*/ */
@ -158,7 +180,7 @@ affine_free( Affine *affine )
return( 0 ); return( 0 );
} }
/* We have five (!!) coordinate systems. Working forward through them, there /* We have five (!!) coordinate systems. Working forward through them, these
* are: * are:
* *
* 1. The original input image * 1. The original input image
@ -167,6 +189,14 @@ affine_free( Affine *affine )
* interpolator. iarea->left/top give the offset. These are the coordinates we * interpolator. iarea->left/top give the offset. These are the coordinates we
* pass to IM_REGION_ADDR()/im_prepare() for the input image. * pass to IM_REGION_ADDR()/im_prepare() for the input image.
* *
* The borders are sized by the interpolator's window_size property and offset
* by the interpolator's window_offset property. For example,
* for bilinear (window_size 2, window_offset 0) we add a single line
* of extra pixels along the bottom and right (window_size - 1). For
* bicubic (window_size 4, window_offset 1) we add a single line top and left
* (window_offset), and two lines bottom and right (window_size - 1 -
* window_offset).
*
* 3. We need point (0, 0) in (1) to be at (0, 0) for the transformation. So * 3. We need point (0, 0) in (1) to be at (0, 0) for the transformation. So
* shift everything up and left to make the displaced input image. This is the * shift everything up and left to make the displaced input image. This is the
* space that the transformation maps from, and can have negative pixels * space that the transformation maps from, and can have negative pixels
@ -186,6 +216,8 @@ affinei_gen( REGION *or, void *seq, void *a, void *b )
REGION *ir = (REGION *) seq; REGION *ir = (REGION *) seq;
const IMAGE *in = (IMAGE *) a; const IMAGE *in = (IMAGE *) a;
const Affine *affine = (Affine *) b; const Affine *affine = (Affine *) b;
const int window_size =
vips_interpolate_get_window_size( affine->interpolate );
const int window_offset = const int window_offset =
vips_interpolate_get_window_offset( affine->interpolate ); vips_interpolate_get_window_offset( affine->interpolate );
const VipsInterpolateMethod interpolate = const VipsInterpolateMethod interpolate =
@ -225,14 +257,27 @@ affinei_gen( REGION *or, void *seq, void *a, void *b )
*/ */
im__transform_invert_rect( &affine->trn, &want, &need ); im__transform_invert_rect( &affine->trn, &want, &need );
/* That does round-to-nearest, because it has to stop rounding errors
* growing images unexpectedly. We need round-down, so we must
* add half a pixel along the left and top. But we are int :( so add 1
* pixel.
*/
need.top -= 1;
need.left -= 1;
need.width += 1;
need.height += 1;
/* Now go to space (2) above. /* Now go to space (2) above.
*/ */
need.left += iarea->left; need.left += iarea->left;
need.top += iarea->top; need.top += iarea->top;
/* Add a border for interpolation. Plus one for rounding errors. /* Add a border for interpolation.
*/ */
im_rect_marginadjust( &need, window_offset + 1 ); need.width += window_size - 1;
need.height += window_size - 1;
need.left -= window_offset;
need.top -= window_offset;
/* Clip against the size of (2). /* Clip against the size of (2).
*/ */
@ -304,8 +349,8 @@ affinei_gen( REGION *or, void *seq, void *a, void *b )
for( x = le; x < ri; x++ ) { for( x = le; x < ri; x++ ) {
int fx, fy; int fx, fy;
fx = FLOOR( ix ); fx = FAST_PSEUDO_FLOOR( ix );
fy = FLOOR( iy ); fy = FAST_PSEUDO_FLOOR( iy );
/* Clipping! /* Clipping!
*/ */
@ -336,9 +381,8 @@ affinei( IMAGE *in, IMAGE *out,
/* Make output image. /* Make output image.
*/ */
if( im_piocheck( in, out ) ) if( im_piocheck( in, out ) ||
return( -1 ); im_cp_desc( out, in ) )
if( im_cp_desc( out, in ) )
return( -1 ); return( -1 );
/* Need a copy of the params for the lifetime of out. /* Need a copy of the params for the lifetime of out.

View File

@ -1,6 +1,12 @@
/* vipsinterpolate ... abstract base class for various interpolators /* vipsinterpolate ... abstract base class for various interpolators
* *
* J. Cupitt, 15/10/08 * J. Cupitt, 15/10/08
*
* 12/8/10
* - revise window_size / window_offset stuff again: window_offset now
* defaults to (window_size / 2 - 1), so for a 4x4 stencil (eg.
* bicubic) we have an offset of 1
* - tiny speedups
*/ */
/* /*
@ -109,13 +115,19 @@ vips_interpolate_real_get_window_offset( VipsInterpolate *interpolate )
{ {
VipsInterpolateClass *class = VIPS_INTERPOLATE_GET_CLASS( interpolate ); VipsInterpolateClass *class = VIPS_INTERPOLATE_GET_CLASS( interpolate );
g_assert( class->window_offset != -1 ); /* Default to half window size - 1. For example, bicubic is a 4x4
* stencil and needs an offset of 1.
*/
if( class->window_offset != -1 )
return( class->window_offset ); return( class->window_offset );
else {
int window_size =
vips_interpolate_get_window_size( interpolate );
/* /\* Default to half window size. */ /* Don't go -ve, of course, for window_size 1.
/* *\/ */ */
/* return( vips_interpolate_get_window_size( interpolate ) / 2 ); */ return( IM_MAX( 0, window_size / 2 - 1 ) );
}
} }
static void static void
@ -228,15 +240,17 @@ vips_interpolate_nearest_interpolate( VipsInterpolate *interpolate,
/* Pel size and line size. /* Pel size and line size.
*/ */
const int ps = IM_IMAGE_SIZEOF_PEL( in->im ); const int ps = IM_IMAGE_SIZEOF_PEL( in->im );
int z;
/* Top left corner we interpolate from. /* Top left corner we interpolate from. We know x/y are always
* positive, so we can just (int) them.
*/ */
const int xi = FAST_PSEUDO_FLOOR( x ); const int xi = (int) x;
const int yi = FAST_PSEUDO_FLOOR( y ); const int yi = (int) y;
const PEL *p = (PEL *) IM_REGION_ADDR( in, xi, yi ); const PEL *p = (PEL *) IM_REGION_ADDR( in, xi, yi );
int z;
for( z = 0; z < ps; z++ ) for( z = 0; z < ps; z++ )
out[z] = p[z]; out[z] = p[z];
} }
@ -253,7 +267,6 @@ vips_interpolate_nearest_class_init( VipsInterpolateNearestClass *class )
interpolate_class->interpolate = vips_interpolate_nearest_interpolate; interpolate_class->interpolate = vips_interpolate_nearest_interpolate;
interpolate_class->window_size = 1; interpolate_class->window_size = 1;
interpolate_class->window_offset = 0;
} }
static void static void
@ -409,8 +422,8 @@ vips_interpolate_bilinear_interpolate( VipsInterpolate *interpolate,
const int tx = (six + 1) >> 1; const int tx = (six + 1) >> 1;
const int ty = (siy + 1) >> 1; const int ty = (siy + 1) >> 1;
/* We want ((int)x) ... save this double -> int conversion by just /* We want ((int)x) ... void repeating this double -> int conversion
* shifting sx down. * by just shifting sx down.
*/ */
const int ix = sx >> (VIPS_TRANSFORM_SHIFT + 1); const int ix = sx >> (VIPS_TRANSFORM_SHIFT + 1);
const int iy = sy >> (VIPS_TRANSFORM_SHIFT + 1); const int iy = sy >> (VIPS_TRANSFORM_SHIFT + 1);
@ -439,7 +452,6 @@ vips_interpolate_bilinear_class_init( VipsInterpolateBilinearClass *class )
interpolate_class->interpolate = vips_interpolate_bilinear_interpolate; interpolate_class->interpolate = vips_interpolate_bilinear_interpolate;
interpolate_class->window_size = 2; interpolate_class->window_size = 2;
interpolate_class->window_offset = 1;
/* Calculate the interpolation matricies. /* Calculate the interpolation matricies.
*/ */

View File

@ -851,11 +851,6 @@ vips_interpolate_lbb_class_init( VipsInterpolateLbbClass *klass )
interpolate_class->interpolate = vips_interpolate_lbb_interpolate; interpolate_class->interpolate = vips_interpolate_lbb_interpolate;
interpolate_class->window_size = 4; interpolate_class->window_size = 4;
interpolate_class->window_offset = 2;
/*
* Note from nicolas: If things were sane, window_offset should be
* 1, not 2.
*/
} }
static void static void

View File

@ -1560,13 +1560,9 @@ vips_interpolate_nohalo_interpolate( VipsInterpolate* restrict interpolate,
static void static void
vips_interpolate_nohalo_class_init( VipsInterpolateNohaloClass *klass ) vips_interpolate_nohalo_class_init( VipsInterpolateNohaloClass *klass )
{ {
GObjectClass *gobject_class = G_OBJECT_CLASS( klass );
VipsObjectClass *object_class = VIPS_OBJECT_CLASS( klass ); VipsObjectClass *object_class = VIPS_OBJECT_CLASS( klass );
VipsInterpolateClass *interpolate_class = VIPS_INTERPOLATE_CLASS( klass ); VipsInterpolateClass *interpolate_class = VIPS_INTERPOLATE_CLASS( klass );
gobject_class->set_property = vips_object_set_property;
gobject_class->get_property = vips_object_get_property;
object_class->nickname = "nohalo"; object_class->nickname = "nohalo";
object_class->description = object_class->description =
_( "Edge sharpening resampler with halo reduction" ); _( "Edge sharpening resampler with halo reduction" );

View File

@ -58,6 +58,7 @@ template <typename T> static T inline
to_fptypes( const double val ) to_fptypes( const double val )
{ {
const T newval = val; const T newval = val;
return( newval ); return( newval );
} }
@ -67,6 +68,7 @@ to_withsign( const double val )
const int sign_of_val = 2 * ( val >= 0. ) - 1; const int sign_of_val = 2 * ( val >= 0. ) - 1;
const int rounded_abs_val = .5 + sign_of_val * val; const int rounded_abs_val = .5 + sign_of_val * val;
const T newval = sign_of_val * rounded_abs_val; const T newval = sign_of_val * rounded_abs_val;
return( newval ); return( newval );
} }
@ -74,6 +76,7 @@ template <typename T> static T inline
to_nosign( const double val ) to_nosign( const double val )
{ {
const T newval = .5 + val; const T newval = .5 + val;
return( newval ); return( newval );
} }
@ -82,6 +85,7 @@ to_nosign( const double val )
* is used: There is an assumption that the data is such that * is used: There is an assumption that the data is such that
* over/underflow is not an issue: * over/underflow is not an issue:
*/ */
/* /*
* Bilinear interpolation for float and double types. The first four * Bilinear interpolation for float and double types. The first four
* inputs are weights, the last four are the corresponding pixel * inputs are weights, the last four are the corresponding pixel
@ -243,9 +247,7 @@ bicubic_float(
static void inline static void inline
calculate_coefficients_catmull( const double x, double c[4] ) calculate_coefficients_catmull( const double x, double c[4] )
{ {
/* Nicolas believes that the following is an hitherto unknown
/*
* Nicolas believes that the following is an hitherto unknown
* hyper-efficient method of computing Catmull-Rom coefficients. It * hyper-efficient method of computing Catmull-Rom coefficients. It
* only uses 4* & 1+ & 5- for a total of only 10 flops to compute * only uses 4* & 1+ & 5- for a total of only 10 flops to compute
* four coefficients. * four coefficients.

View File

@ -393,13 +393,9 @@ vips_interpolate_vsqbs_interpolate( VipsInterpolate* restrict interpolate,
static void static void
vips_interpolate_vsqbs_class_init( VipsInterpolateVsqbsClass *klass ) vips_interpolate_vsqbs_class_init( VipsInterpolateVsqbsClass *klass )
{ {
GObjectClass *gobject_class = G_OBJECT_CLASS( klass );
VipsObjectClass *object_class = VIPS_OBJECT_CLASS( klass ); VipsObjectClass *object_class = VIPS_OBJECT_CLASS( klass );
VipsInterpolateClass *interpolate_class = VIPS_INTERPOLATE_CLASS( klass ); VipsInterpolateClass *interpolate_class = VIPS_INTERPOLATE_CLASS( klass );
gobject_class->set_property = vips_object_set_property;
gobject_class->get_property = vips_object_get_property;
object_class->nickname = "vsqbs"; object_class->nickname = "vsqbs";
object_class->description = _( "B-Splines with antialiasing smoothing" ); object_class->description = _( "B-Splines with antialiasing smoothing" );