speed up bilinear on float

This commit is contained in:
John Cupitt 2011-01-07 15:03:01 +00:00
parent 8bd2322b1f
commit dfd97464b0
4 changed files with 154 additions and 33 deletions

View File

@ -5,6 +5,8 @@
- add -lstdc++ to vips-7.xx.pc, if we used it
- im_vips2png() / im_png2vips() set / get png resolution (thanks Zhiyu Wu)
- updated README
- don't use tables for bilinear on float data for a small speedup (thanks
Nicolas)
30/11/10 started 7.24.0
- bump for new stable

18
TODO
View File

@ -1,3 +1,19 @@
- bilinear benchmarks
8-bit image, tables, fixed-point, 3.7s (7.24 one)
8-bit image, no tables, float, 5.2s
8-bit image, no tables, fixed-point, 4.2s
float image, tables, 5.2s (7.24 one)
float image, no tables, sp float coeff, 4.2s
float image, no tables, dp float coeff, 4.8s
- im_conv()/im_morph() could have more than 10 programs? try 20 and see if we
@ -8,6 +24,8 @@
- fits save
- lazy fits load

View File

@ -60,6 +60,12 @@
#include <dmalloc.h>
#endif /*WITH_DMALLOC*/
/* vips only supports 3 dimensions, but we allow up to MAX_DIMENSIONS as long
* as the higher dimensions are all empty. If you change this value, change
* fits2vips_get_header() as well.
*/
#define MAX_DIMENSIONS (10)
/* What we track during a cfitsio-file read.
*/
typedef struct {
@ -69,7 +75,7 @@ typedef struct {
fitsfile *fptr;
int datatype;
int naxis;
long long int naxes[10];
long long int naxes[MAX_DIMENSIONS];
} Read;
static void
@ -154,15 +160,18 @@ fits2vips_get_header( Read *read )
return( -1 );
}
#ifdef DEBUG
printf( "naxis = %d\n", read->naxis );
for( i = 0; i < read->naxis; i++ )
printf( "%d) %lld\n", i, read->naxes[i] );
#endif /*DEBUG*/
width = 1;
height = 1;
bands = 1;
switch( read->naxis ) {
/* If you add more dimensions here, adjust data read below.
/* If you add more dimensions here, adjust data read below. See also
* the definition of MAX_DIMENSIONS above.
*/
case 10:
case 9:
@ -284,8 +293,10 @@ fits2vips_header( const char *filename, IMAGE *out )
return( 0 );
}
/* Read the whole image in scanlines.
*/
static int
fits2vips_get_data( Read *read )
fits2vips_get_data_scanlinewise( Read *read )
{
IMAGE *im = read->out;
const int es = IM_IMAGE_SIZEOF_ELEMENT( im );
@ -296,6 +307,8 @@ fits2vips_get_data( Read *read )
int x, y, b, z;
int status;
long fpixel[MAX_DIMENSIONS];
status = 0;
if( !(line_buffer = IM_ARRAY( im, IM_IMAGE_SIZEOF_LINE( im ), PEL )) ||
@ -305,14 +318,9 @@ fits2vips_get_data( Read *read )
return( -1 );
for( y = 0; y < im->Ysize; y++ ) {
/* Keep max no of dimensions in line with the header check
* above.
*/
long int fpixel[10];
/* Start of scanline. We have to read top-to-bottom.
*/
for( b = 0; b < 10; b++ )
for( b = 0; b < MAX_DIMENSIONS; b++ )
fpixel[b] = 1;
fpixel[1] = im->Ysize - y;
@ -347,6 +355,94 @@ fits2vips_get_data( Read *read )
return( 0 );
}
static int
fits2vips_generate( REGION *out, void *seq, void *a, void *b )
{
Read *read = (Read *) a;
Rect *r = &out->valid;
IMAGE *im = read->out;
const int es = IM_IMAGE_SIZEOF_ELEMENT( im );
PEL *line_buffer;
PEL *band_buffer;
PEL *p, *q;
int x, y, z, k;
int status;
status = 0;
long fpixel[MAX_DIMENSIONS];
long lpixel[MAX_DIMENSIONS];
long inc[MAX_DIMENSIONS];
if( !(line_buffer = IM_ARRAY( im, IM_IMAGE_SIZEOF_LINE( im ), PEL )) ||
!(band_buffer = IM_ARRAY( im, es * im->Xsize, PEL )) ||
im_outcheck( im ) ||
im_setupout( im ) )
return( -1 );
/* Read out the entire
for( b = 0; b < MAX_DIMENSIONS; b++ )
fpixel[b] = 1;
fpixel[1] = im->Ysize - y;
if( fits_read_subset( read->fptr, read->datatype,
long *fpixel,
long *lpixel, long *inc, void *nulval, void *array,
int *anynul, int *status)
*/
for( y = 0; y < im->Ysize; y++ ) {
long int fpixel[MAX_DIMENSIONS];
/* Start of scanline. We have to read top-to-bottom.
*/
for( z = 0; z < MAX_DIMENSIONS; z++ )
fpixel[z] = 1;
fpixel[1] = im->Ysize - y;
for( z = 0; z < im->Bands; z++ ) {
fpixel[2] = z + 1;
/* Read one band of one scanline, then scatter-write
* into the line buffer.
*/
if( fits_read_pix( read->fptr,
read->datatype, fpixel, im->Xsize,
NULL, band_buffer, NULL, &status ) ) {
read_error( status );
return( -1 );
}
p = band_buffer;
q = line_buffer + z * es;
for( x = 0; x < im->Xsize; x++ ) {
for( k = 0; k < es; k++ )
q[k] = p[k];
p += es;
q += im->Bands * es;
}
}
if( im_writeline( y, im, line_buffer ) )
return( -1 );
}
return( 0 );
}
/* Read the image in chunks on demand.
*/
static int
fits2vips_get_data_lazy( Read *read )
{
return( im_generate( read->out,
NULL, fits2vips_generate, NULL, read, NULL ) );
}
/**
* im_fits2vips:
* @filename: file to load
@ -371,7 +467,7 @@ im_fits2vips( const char *filename, IMAGE *out )
if( !(read = read_new( filename, out )) )
return( -1 );
if( fits2vips_get_header( read ) ||
fits2vips_get_data( read ) ) {
fits2vips_get_data_scanlinewise( read ) ) {
read_destroy( read );
return( -1 );
}

View File

@ -7,6 +7,9 @@
* defaults to (window_size / 2 - 1), so for a 4x4 stencil (eg.
* bicubic) we have an offset of 1
* - tiny speedups
* 7/1/11
* - don't use tables for bilinear on float data for a small speedup
* (thanks Nicolas)
*/
/*
@ -323,15 +326,12 @@ typedef VipsInterpolateClass VipsInterpolateBilinearClass;
G_DEFINE_TYPE( VipsInterpolateBilinear, vips_interpolate_bilinear,
VIPS_TYPE_INTERPOLATE );
/* Precalculated interpolation matricies. int (used for pel sizes up
* to short), and float (for all others). We go to scale + 1 so
* we can round-to-nearest safely. Don't bother with double, since
* this is an approximation anyway.
/* Precalculated interpolation matricies, only for int types.
* We go to scale + 1 so
* we can round-to-nearest safely.
*/
static int vips_bilinear_matrixi
[VIPS_TRANSFORM_SCALE + 1][VIPS_TRANSFORM_SCALE + 1][4];
static float vips_bilinear_matrixd
[VIPS_TRANSFORM_SCALE + 1][VIPS_TRANSFORM_SCALE + 1][4];
/* in this class, name vars in the 2x2 grid as eg.
* p1 p2
@ -344,6 +344,12 @@ static float vips_bilinear_matrixd
#define BILINEAR_INT( TYPE ) { \
TYPE *tq = (TYPE *) out; \
\
const int six = sx & (VIPS_TRANSFORM_SCALE * 2 - 1); \
const int siy = sy & (VIPS_TRANSFORM_SCALE * 2 - 1); \
\
const int tx = (six + 1) >> 1; \
const int ty = (siy + 1) >> 1; \
\
const int c1 = vips_bilinear_matrixi[tx][ty][0]; \
const int c2 = vips_bilinear_matrixi[tx][ty][1]; \
const int c3 = vips_bilinear_matrixi[tx][ty][2]; \
@ -364,12 +370,18 @@ static float vips_bilinear_matrixd
*/
#define BILINEAR_FLOAT( TYPE ) { \
TYPE *tq = (TYPE *) out; \
\
const double c1 = vips_bilinear_matrixd[tx][ty][0]; \
const double c2 = vips_bilinear_matrixd[tx][ty][1]; \
const double c3 = vips_bilinear_matrixd[tx][ty][2]; \
const double c4 = vips_bilinear_matrixd[tx][ty][3]; \
\
float X = x - ix; \
float Y = y - iy; \
\
float Xd = 1.0 - X; \
float Yd = 1.0 - Y; \
\
float c1 = Xd * Yd; \
float c2 = X * Yd; \
float c3 = Xd * Y; \
float c4 = X * Y; \
\
const TYPE *tp1 = (TYPE *) p1; \
const TYPE *tp2 = (TYPE *) p2; \
const TYPE *tp3 = (TYPE *) p3; \
@ -408,6 +420,10 @@ vips_interpolate_bilinear_interpolate( VipsInterpolate *interpolate,
const int ls = IM_REGION_LSKIP( in );
const int b = in->im->Bands;
/* We want ((int)x), but the tables versions needs to find a mask
* index quickly from the residual. Calculate both.
*/
/* Find the mask index. We round-to-nearest, so we need to generate
* indexes in 0 to VIPS_TRANSFORM_SCALE, 2^n + 1 values. We multiply
* by 2 more than we need to, add one, mask, then shift down again to
@ -416,13 +432,7 @@ vips_interpolate_bilinear_interpolate( VipsInterpolate *interpolate,
const int sx = x * VIPS_TRANSFORM_SCALE * 2;
const int sy = y * VIPS_TRANSFORM_SCALE * 2;
const int six = sx & (VIPS_TRANSFORM_SCALE * 2 - 1);
const int siy = sy & (VIPS_TRANSFORM_SCALE * 2 - 1);
const int tx = (six + 1) >> 1;
const int ty = (siy + 1) >> 1;
/* We want ((int)x) ... void repeating this double -> int conversion
/* We want ((int)x) ... avoid repeating this double -> int conversion
* by just shifting sx down.
*/
const int ix = sx >> (VIPS_TRANSFORM_SHIFT + 1);
@ -474,11 +484,6 @@ vips_interpolate_bilinear_class_init( VipsInterpolateBilinearClass *class )
c3 = Xd * Y;
c4 = X * Y;
vips_bilinear_matrixd[x][y][0] = c1;
vips_bilinear_matrixd[x][y][1] = c2;
vips_bilinear_matrixd[x][y][2] = c3;
vips_bilinear_matrixd[x][y][3] = c4;
vips_bilinear_matrixi[x][y][0] =
c1 * VIPS_INTERPOLATE_SCALE;
vips_bilinear_matrixi[x][y][1] =