fix window_offset stuff, fix a performance problem

This commit is contained in:
John Cupitt 2010-08-12 15:56:07 +00:00
parent 6d97500587
commit f27ab2fa3f
13 changed files with 147 additions and 118 deletions

View File

@ -10,6 +10,9 @@
- various small cleanups (thanks Tim)
- add lcms2 support
- VImage(filename) defaults to "rd" mode
- revise window_offset / window_size, again
- fix a mixup with ANY hints that caused performance problems on the main
benchmark
12/5/10 started 7.22.2
- the conditional image of ifthenelse can be any format, a (!=0) is added if

26
TODO
View File

@ -1,29 +1,3 @@
- large and puzzling performance regression cf. 7.18 on the benchmark
7.18
$ time ./vips.py wtc_tiled_small.tif wtc2.tif
vips warning: im_conv: 6534 overflows and 13439 underflows detected
real 0m1.891s
user 0m3.110s
sys 0m0.320s
7.20 (and 7.22)
$ time ./vips.py wtc_tiled_small.tif wtc2.tif
vips warning: im_conv: 6502 overflows and 13429 underflows detected
real 0m2.962s
user 0m4.950s
sys 0m0.250s
and why are the conv numbers different?
seems to be a difference in the bilinear interpolator? we did increase
accuracy in 7.18 -> 7.20
I suppose conv must have changed too?
- use D65 in cmsCreateLab4Profile() ? not sure
- VImage("poop.v") should use "rd" as well? Python too?

View File

@ -79,8 +79,8 @@ typedef struct _VipsInterpolateClass {
*/
int window_size;
/* Stencils are offset by this much. Default to window_size / 2
* (centering) if undefined.
/* Stencils are offset by this much. Default to window_size / 2 - 1
* (centering) if get_window_offset is NULL and window_offset is -1.
*/
int (*get_window_offset)( VipsInterpolate * );
int window_offset;

View File

@ -186,6 +186,7 @@ int
im_demand_hint_array( IMAGE *im, VipsDemandStyle hint, IMAGE **in )
{
int i, len, nany;
VipsDemandStyle set_hint;
/* How many input images are there? And how many are IM_ANY?
*/
@ -193,6 +194,7 @@ im_demand_hint_array( IMAGE *im, VipsDemandStyle hint, IMAGE **in )
if( in[i]->dhint == IM_ANY )
nany++;
set_hint = hint;
if( len == 0 )
/* No input images? Just set the requested hint. We don't
* force ANY, since the operation might be something like
@ -204,18 +206,24 @@ im_demand_hint_array( IMAGE *im, VipsDemandStyle hint, IMAGE **in )
/* Special case: if all the inputs are IM_ANY, then output can
* be IM_ANY regardless of what this function wants.
*/
hint = IM_ANY;
set_hint = IM_ANY;
else
/* Find the most restrictive of all the hints available to us.
*/
for( i = 0; i < len; i++ )
hint = find_least( hint, in[i]->dhint );
set_hint = find_least( set_hint, in[i]->dhint );
im->dhint = hint;
im->dhint = set_hint;
#ifdef DEBUG
printf( "im_demand_hint_array: set dhint for \"%s\" to %s\n",
im->filename, im_dhint2char( im->dhint ) );
printf( "\toperation requested %s\n", im_dhint2char( hint ) );
printf( "\tinputs were:\n" );
printf( "\t" );
for( i = 0; in[i]; i++ )
printf( "%s ", im_dhint2char( in[i]->dhint ) );
printf( "\n" );
#endif /*DEBUG*/
/* im depends on all these ims.
@ -255,8 +263,7 @@ im_demand_hint( IMAGE *im, VipsDemandStyle hint, ... )
;
va_end( ap );
if( i == MAX_IMAGES ) {
im_error( "im_demand_hint",
"%s", _( "too many images" ) );
im_error( "im_demand_hint", "%s", _( "too many images" ) );
return( -1 );
}

View File

@ -421,10 +421,12 @@ open_lazy( VipsFormatClass *format, gboolean disc, IMAGE *out )
if( !(lazy = lazy_new( out, format, disc )) )
return( -1 );
/* Read header fields to init the return image.
/* Read header fields to init the return image. THINSTRIP since this is
* probably a disc file. We can't tell yet whether we will be opening
* to memory, sadly, so we can't suggest ANY.
*/
if( format->header( out->filename, out ) ||
im_demand_hint( out, IM_ANY, NULL ) )
im_demand_hint( out, IM_THINSTRIP, NULL ) )
return( -1 );
/* Then 'start' creates the real image and 'gen' paints 'out' with

View File

@ -914,13 +914,13 @@ vips_get_tile_size( VipsImage *im,
(1 + nthr / IM_MAX( 1, im->Xsize / *tile_width )) * 2;
break;
case IM_ANY:
case IM_FATSTRIP:
*tile_width = im->Xsize;
*tile_height = im__fatstrip_height;
*nlines = *tile_height * nthr * 2;
break;
case IM_ANY:
case IM_THINSTRIP:
*tile_width = im->Xsize;
*tile_height = im__thinstrip_height;

View File

@ -1,4 +1,7 @@
/* bicubic (catmull-rom) interpolator
*
* 12/8/10
* - revise window_size / window_offset stuff again
*/
/*
@ -321,6 +324,10 @@ vips_interpolate_bicubic_interpolate( VipsInterpolate *interpolate,
const int ix = (int) x;
const int iy = (int) y;
/* Back and up one to get the top-left of the 4x4.
*/
const PEL *p = (PEL *) IM_REGION_ADDR( in, ix - 1, iy - 1 );
/* Look up the tables we need.
*/
const int *cxi = vips_bicubic_matrixi[tx];
@ -328,10 +335,6 @@ vips_interpolate_bicubic_interpolate( VipsInterpolate *interpolate,
const double *cxf = vips_bicubic_matrixf[tx];
const double *cyf = vips_bicubic_matrixf[ty];
/* Back and up one to get the top-left of the 4x4.
*/
const PEL *p = (PEL *) IM_REGION_ADDR( in, ix - 1, iy - 1 );
/* Pel size and line size.
*/
const int bands = in->im->Bands;
@ -423,16 +426,11 @@ vips_interpolate_bicubic_class_init( VipsInterpolateBicubicClass *iclass )
VipsInterpolateClass *interpolate_class =
VIPS_INTERPOLATE_CLASS( iclass );
object_class->nickname = "bicubic";
object_class->nickname = "bicubic";
object_class->description = _( "Bicubic interpolation (Catmull-Rom)" );
interpolate_class->interpolate = vips_interpolate_bicubic_interpolate;
interpolate_class->window_size = 4;
interpolate_class->window_offset = 2;
/*
* Note from nicolas: If things were programmed sanely, I
* think window_offset should be 1, not 2.
*/
interpolate_class->interpolate = vips_interpolate_bicubic_interpolate;
interpolate_class->window_size = 4;
/* Build the tables of pre-computed coefficients.
*/

View File

@ -85,6 +85,9 @@
* - revise clipping / transform stuff, again
* - now do corner rather than centre: this way the identity transform
* returns the input exactly
* 12/8/10
* - revise window_size / window_offset stuff again, see also
* interpolate.c
*/
/*
@ -137,9 +140,28 @@
#include <dmalloc.h>
#endif /*WITH_DMALLOC*/
/* "fast" floor() ... on my laptop, anyway.
/*
* FAST_PSEUDO_FLOOR is a floor and floorf replacement which has been
* found to be faster on several linux boxes than the library
* version. It returns the floor of its argument unless the argument
* is a negative integer, in which case it returns one less than the
* floor. For example:
*
* FAST_PSEUDO_FLOOR(0.5) = 0
*
* FAST_PSEUDO_FLOOR(0.) = 0
*
* FAST_PSEUDO_FLOOR(-.5) = -1
*
* as expected, but
*
* FAST_PSEUDO_FLOOR(-1.) = -2
*
* The locations of the discontinuities of FAST_PSEUDO_FLOOR are the
* same as floor and floorf; it is just that at negative integers the
* function is discontinuous on the right instead of the left.
*/
#define FLOOR( V ) ((V) >= 0 ? (int)(V) : (int)((V) - 1))
#define FAST_PSEUDO_FLOOR(x) ( (int)(x) - ( (x) < 0. ) )
/* Per-call state.
*/
@ -158,14 +180,22 @@ affine_free( Affine *affine )
return( 0 );
}
/* We have five (!!) coordinate systems. Working forward through them, there
/* We have five (!!) coordinate systems. Working forward through them, these
* are:
*
* 1. The original input image
*
* 2. This is embedded in a larger image to provide borders for the
* interpolator. iarea->left/top give the offset. These are the coordinates we
* pass to IM_REGION_ADDR()/im_prepare() for the input image.
* pass to IM_REGION_ADDR()/im_prepare() for the input image.
*
* The borders are sized by the interpolator's window_size property and offset
* by the interpolator's window_offset property. For example,
* for bilinear (window_size 2, window_offset 0) we add a single line
* of extra pixels along the bottom and right (window_size - 1). For
* bicubic (window_size 4, window_offset 1) we add a single line top and left
* (window_offset), and two lines bottom and right (window_size - 1 -
* window_offset).
*
* 3. We need point (0, 0) in (1) to be at (0, 0) for the transformation. So
* shift everything up and left to make the displaced input image. This is the
@ -186,6 +216,8 @@ affinei_gen( REGION *or, void *seq, void *a, void *b )
REGION *ir = (REGION *) seq;
const IMAGE *in = (IMAGE *) a;
const Affine *affine = (Affine *) b;
const int window_size =
vips_interpolate_get_window_size( affine->interpolate );
const int window_offset =
vips_interpolate_get_window_offset( affine->interpolate );
const VipsInterpolateMethod interpolate =
@ -225,14 +257,27 @@ affinei_gen( REGION *or, void *seq, void *a, void *b )
*/
im__transform_invert_rect( &affine->trn, &want, &need );
/* That does round-to-nearest, because it has to stop rounding errors
* growing images unexpectedly. We need round-down, so we must
* add half a pixel along the left and top. But we are int :( so add 1
* pixel.
*/
need.top -= 1;
need.left -= 1;
need.width += 1;
need.height += 1;
/* Now go to space (2) above.
*/
need.left += iarea->left;
need.top += iarea->top;
/* Add a border for interpolation. Plus one for rounding errors.
/* Add a border for interpolation.
*/
im_rect_marginadjust( &need, window_offset + 1 );
need.width += window_size - 1;
need.height += window_size - 1;
need.left -= window_offset;
need.top -= window_offset;
/* Clip against the size of (2).
*/
@ -304,8 +349,8 @@ affinei_gen( REGION *or, void *seq, void *a, void *b )
for( x = le; x < ri; x++ ) {
int fx, fy;
fx = FLOOR( ix );
fy = FLOOR( iy );
fx = FAST_PSEUDO_FLOOR( ix );
fy = FAST_PSEUDO_FLOOR( iy );
/* Clipping!
*/
@ -336,9 +381,8 @@ affinei( IMAGE *in, IMAGE *out,
/* Make output image.
*/
if( im_piocheck( in, out ) )
return( -1 );
if( im_cp_desc( out, in ) )
if( im_piocheck( in, out ) ||
im_cp_desc( out, in ) )
return( -1 );
/* Need a copy of the params for the lifetime of out.

View File

@ -1,6 +1,12 @@
/* vipsinterpolate ... abstract base class for various interpolators
*
* J. Cupitt, 15/10/08
*
* 12/8/10
* - revise window_size / window_offset stuff again: window_offset now
* defaults to (window_size / 2 - 1), so for a 4x4 stencil (eg.
* bicubic) we have an offset of 1
* - tiny speedups
*/
/*
@ -109,13 +115,19 @@ vips_interpolate_real_get_window_offset( VipsInterpolate *interpolate )
{
VipsInterpolateClass *class = VIPS_INTERPOLATE_GET_CLASS( interpolate );
g_assert( class->window_offset != -1 );
/* Default to half window size - 1. For example, bicubic is a 4x4
* stencil and needs an offset of 1.
*/
if( class->window_offset != -1 )
return( class->window_offset );
else {
int window_size =
vips_interpolate_get_window_size( interpolate );
return( class->window_offset );
/* /\* Default to half window size. */
/* *\/ */
/* return( vips_interpolate_get_window_size( interpolate ) / 2 ); */
/* Don't go -ve, of course, for window_size 1.
*/
return( IM_MAX( 0, window_size / 2 - 1 ) );
}
}
static void
@ -228,15 +240,17 @@ vips_interpolate_nearest_interpolate( VipsInterpolate *interpolate,
/* Pel size and line size.
*/
const int ps = IM_IMAGE_SIZEOF_PEL( in->im );
int z;
/* Top left corner we interpolate from.
/* Top left corner we interpolate from. We know x/y are always
* positive, so we can just (int) them.
*/
const int xi = FAST_PSEUDO_FLOOR( x );
const int yi = FAST_PSEUDO_FLOOR( y );
const int xi = (int) x;
const int yi = (int) y;
const PEL *p = (PEL *) IM_REGION_ADDR( in, xi, yi );
int z;
for( z = 0; z < ps; z++ )
out[z] = p[z];
}
@ -251,9 +265,8 @@ vips_interpolate_nearest_class_init( VipsInterpolateNearestClass *class )
object_class->nickname = "nearest";
object_class->description = _( "Nearest-neighbour interpolation" );
interpolate_class->interpolate = vips_interpolate_nearest_interpolate;
interpolate_class->window_size = 1;
interpolate_class->window_offset = 0;
interpolate_class->interpolate = vips_interpolate_nearest_interpolate;
interpolate_class->window_size = 1;
}
static void
@ -409,8 +422,8 @@ vips_interpolate_bilinear_interpolate( VipsInterpolate *interpolate,
const int tx = (six + 1) >> 1;
const int ty = (siy + 1) >> 1;
/* We want ((int)x) ... save this double -> int conversion by just
* shifting sx down.
/* We want ((int)x) ... void repeating this double -> int conversion
* by just shifting sx down.
*/
const int ix = sx >> (VIPS_TRANSFORM_SHIFT + 1);
const int iy = sy >> (VIPS_TRANSFORM_SHIFT + 1);
@ -437,9 +450,8 @@ vips_interpolate_bilinear_class_init( VipsInterpolateBilinearClass *class )
object_class->nickname = "bilinear";
object_class->description = _( "Bilinear interpolation" );
interpolate_class->interpolate = vips_interpolate_bilinear_interpolate;
interpolate_class->window_size = 2;
interpolate_class->window_offset = 1;
interpolate_class->interpolate = vips_interpolate_bilinear_interpolate;
interpolate_class->window_size = 2;
/* Calculate the interpolation matricies.
*/

View File

@ -851,11 +851,6 @@ vips_interpolate_lbb_class_init( VipsInterpolateLbbClass *klass )
interpolate_class->interpolate = vips_interpolate_lbb_interpolate;
interpolate_class->window_size = 4;
interpolate_class->window_offset = 2;
/*
* Note from nicolas: If things were sane, window_offset should be
* 1, not 2.
*/
}
static void

View File

@ -1560,13 +1560,9 @@ vips_interpolate_nohalo_interpolate( VipsInterpolate* restrict interpolate,
static void
vips_interpolate_nohalo_class_init( VipsInterpolateNohaloClass *klass )
{
GObjectClass *gobject_class = G_OBJECT_CLASS( klass );
VipsObjectClass *object_class = VIPS_OBJECT_CLASS( klass );
VipsInterpolateClass *interpolate_class = VIPS_INTERPOLATE_CLASS( klass );
gobject_class->set_property = vips_object_set_property;
gobject_class->get_property = vips_object_get_property;
object_class->nickname = "nohalo";
object_class->description =
_( "Edge sharpening resampler with halo reduction" );

View File

@ -57,24 +57,27 @@
template <typename T> static T inline
to_fptypes( const double val )
{
const T newval = val;
return( newval );
const T newval = val;
return( newval );
}
template <typename T> static T inline
to_withsign( const double val )
{
const int sign_of_val = 2 * ( val >= 0. ) - 1;
const int rounded_abs_val = .5 + sign_of_val * val;
const T newval = sign_of_val * rounded_abs_val;
return( newval );
const int sign_of_val = 2 * ( val >= 0. ) - 1;
const int rounded_abs_val = .5 + sign_of_val * val;
const T newval = sign_of_val * rounded_abs_val;
return( newval );
}
template <typename T> static T inline
to_nosign( const double val )
{
const T newval = .5 + val;
return( newval );
const T newval = .5 + val;
return( newval );
}
/*
@ -82,6 +85,7 @@ to_nosign( const double val )
* is used: There is an assumption that the data is such that
* over/underflow is not an issue:
*/
/*
* Bilinear interpolation for float and double types. The first four
* inputs are weights, the last four are the corresponding pixel
@ -243,26 +247,24 @@ bicubic_float(
static void inline
calculate_coefficients_catmull( const double x, double c[4] )
{
/* Nicolas believes that the following is an hitherto unknown
* hyper-efficient method of computing Catmull-Rom coefficients. It
* only uses 4* & 1+ & 5- for a total of only 10 flops to compute
* four coefficients.
*/
const double cr1 = 1. - x;
const double cr2 = -.5 * x;
const double cr3 = cr1 * cr2;
const double cone = cr1 * cr3;
const double cfou = x * cr3;
const double cr4 = cfou - cone;
const double ctwo = cr1 - cone + cr4;
const double cthr = x - cfou - cr4;
/*
* Nicolas believes that the following is an hitherto unknown
* hyper-efficient method of computing Catmull-Rom coefficients. It
* only uses 4* & 1+ & 5- for a total of only 10 flops to compute
* four coefficients.
*/
const double cr1 = 1. - x;
const double cr2 = -.5 * x;
const double cr3 = cr1 * cr2;
const double cone = cr1 * cr3;
const double cfou = x * cr3;
const double cr4 = cfou - cone;
const double ctwo = cr1 - cone + cr4;
const double cthr = x - cfou - cr4;
g_assert( x >= 0. && x <= 1. );
g_assert( x >= 0. && x <= 1. );
c[0] = cone;
c[3] = cfou;
c[1] = ctwo;
c[2] = cthr;
c[0] = cone;
c[3] = cfou;
c[1] = ctwo;
c[2] = cthr;
}

View File

@ -393,13 +393,9 @@ vips_interpolate_vsqbs_interpolate( VipsInterpolate* restrict interpolate,
static void
vips_interpolate_vsqbs_class_init( VipsInterpolateVsqbsClass *klass )
{
GObjectClass *gobject_class = G_OBJECT_CLASS( klass );
VipsObjectClass *object_class = VIPS_OBJECT_CLASS( klass );
VipsInterpolateClass *interpolate_class = VIPS_INTERPOLATE_CLASS( klass );
gobject_class->set_property = vips_object_set_property;
gobject_class->get_property = vips_object_get_property;
object_class->nickname = "vsqbs";
object_class->description = _( "B-Splines with antialiasing smoothing" );