speed up bilinear on float

This commit is contained in:
John Cupitt 2011-01-07 15:03:01 +00:00
parent 8bd2322b1f
commit dfd97464b0
4 changed files with 154 additions and 33 deletions

View File

@ -5,6 +5,8 @@
- add -lstdc++ to vips-7.xx.pc, if we used it - add -lstdc++ to vips-7.xx.pc, if we used it
- im_vips2png() / im_png2vips() set / get png resolution (thanks Zhiyu Wu) - im_vips2png() / im_png2vips() set / get png resolution (thanks Zhiyu Wu)
- updated README - updated README
- don't use tables for bilinear on float data for a small speedup (thanks
Nicolas)
30/11/10 started 7.24.0 30/11/10 started 7.24.0
- bump for new stable - bump for new stable

18
TODO
View File

@ -1,3 +1,19 @@
- bilinear benchmarks
8-bit image, tables, fixed-point, 3.7s (7.24 one)
8-bit image, no tables, float, 5.2s
8-bit image, no tables, fixed-point, 4.2s
float image, tables, 5.2s (7.24 one)
float image, no tables, sp float coeff, 4.2s
float image, no tables, dp float coeff, 4.8s
- im_conv()/im_morph() could have more than 10 programs? try 20 and see if we - im_conv()/im_morph() could have more than 10 programs? try 20 and see if we
@ -8,6 +24,8 @@
- fits save - fits save
- lazy fits load

View File

@ -60,6 +60,12 @@
#include <dmalloc.h> #include <dmalloc.h>
#endif /*WITH_DMALLOC*/ #endif /*WITH_DMALLOC*/
/* vips only supports 3 dimensions, but we allow up to MAX_DIMENSIONS as long
* as the higher dimensions are all empty. If you change this value, change
* fits2vips_get_header() as well.
*/
#define MAX_DIMENSIONS (10)
/* What we track during a cfitsio-file read. /* What we track during a cfitsio-file read.
*/ */
typedef struct { typedef struct {
@ -69,7 +75,7 @@ typedef struct {
fitsfile *fptr; fitsfile *fptr;
int datatype; int datatype;
int naxis; int naxis;
long long int naxes[10]; long long int naxes[MAX_DIMENSIONS];
} Read; } Read;
static void static void
@ -154,15 +160,18 @@ fits2vips_get_header( Read *read )
return( -1 ); return( -1 );
} }
#ifdef DEBUG
printf( "naxis = %d\n", read->naxis ); printf( "naxis = %d\n", read->naxis );
for( i = 0; i < read->naxis; i++ ) for( i = 0; i < read->naxis; i++ )
printf( "%d) %lld\n", i, read->naxes[i] ); printf( "%d) %lld\n", i, read->naxes[i] );
#endif /*DEBUG*/
width = 1; width = 1;
height = 1; height = 1;
bands = 1; bands = 1;
switch( read->naxis ) { switch( read->naxis ) {
/* If you add more dimensions here, adjust data read below. /* If you add more dimensions here, adjust data read below. See also
* the definition of MAX_DIMENSIONS above.
*/ */
case 10: case 10:
case 9: case 9:
@ -284,8 +293,10 @@ fits2vips_header( const char *filename, IMAGE *out )
return( 0 ); return( 0 );
} }
/* Read the whole image in scanlines.
*/
static int static int
fits2vips_get_data( Read *read ) fits2vips_get_data_scanlinewise( Read *read )
{ {
IMAGE *im = read->out; IMAGE *im = read->out;
const int es = IM_IMAGE_SIZEOF_ELEMENT( im ); const int es = IM_IMAGE_SIZEOF_ELEMENT( im );
@ -296,6 +307,8 @@ fits2vips_get_data( Read *read )
int x, y, b, z; int x, y, b, z;
int status; int status;
long fpixel[MAX_DIMENSIONS];
status = 0; status = 0;
if( !(line_buffer = IM_ARRAY( im, IM_IMAGE_SIZEOF_LINE( im ), PEL )) || if( !(line_buffer = IM_ARRAY( im, IM_IMAGE_SIZEOF_LINE( im ), PEL )) ||
@ -305,14 +318,9 @@ fits2vips_get_data( Read *read )
return( -1 ); return( -1 );
for( y = 0; y < im->Ysize; y++ ) { for( y = 0; y < im->Ysize; y++ ) {
/* Keep max no of dimensions in line with the header check
* above.
*/
long int fpixel[10];
/* Start of scanline. We have to read top-to-bottom. /* Start of scanline. We have to read top-to-bottom.
*/ */
for( b = 0; b < 10; b++ ) for( b = 0; b < MAX_DIMENSIONS; b++ )
fpixel[b] = 1; fpixel[b] = 1;
fpixel[1] = im->Ysize - y; fpixel[1] = im->Ysize - y;
@ -347,6 +355,94 @@ fits2vips_get_data( Read *read )
return( 0 ); return( 0 );
} }
static int
fits2vips_generate( REGION *out, void *seq, void *a, void *b )
{
Read *read = (Read *) a;
Rect *r = &out->valid;
IMAGE *im = read->out;
const int es = IM_IMAGE_SIZEOF_ELEMENT( im );
PEL *line_buffer;
PEL *band_buffer;
PEL *p, *q;
int x, y, z, k;
int status;
status = 0;
long fpixel[MAX_DIMENSIONS];
long lpixel[MAX_DIMENSIONS];
long inc[MAX_DIMENSIONS];
if( !(line_buffer = IM_ARRAY( im, IM_IMAGE_SIZEOF_LINE( im ), PEL )) ||
!(band_buffer = IM_ARRAY( im, es * im->Xsize, PEL )) ||
im_outcheck( im ) ||
im_setupout( im ) )
return( -1 );
/* Read out the entire
for( b = 0; b < MAX_DIMENSIONS; b++ )
fpixel[b] = 1;
fpixel[1] = im->Ysize - y;
if( fits_read_subset( read->fptr, read->datatype,
long *fpixel,
long *lpixel, long *inc, void *nulval, void *array,
int *anynul, int *status)
*/
for( y = 0; y < im->Ysize; y++ ) {
long int fpixel[MAX_DIMENSIONS];
/* Start of scanline. We have to read top-to-bottom.
*/
for( z = 0; z < MAX_DIMENSIONS; z++ )
fpixel[z] = 1;
fpixel[1] = im->Ysize - y;
for( z = 0; z < im->Bands; z++ ) {
fpixel[2] = z + 1;
/* Read one band of one scanline, then scatter-write
* into the line buffer.
*/
if( fits_read_pix( read->fptr,
read->datatype, fpixel, im->Xsize,
NULL, band_buffer, NULL, &status ) ) {
read_error( status );
return( -1 );
}
p = band_buffer;
q = line_buffer + z * es;
for( x = 0; x < im->Xsize; x++ ) {
for( k = 0; k < es; k++ )
q[k] = p[k];
p += es;
q += im->Bands * es;
}
}
if( im_writeline( y, im, line_buffer ) )
return( -1 );
}
return( 0 );
}
/* Read the image in chunks on demand.
*/
static int
fits2vips_get_data_lazy( Read *read )
{
return( im_generate( read->out,
NULL, fits2vips_generate, NULL, read, NULL ) );
}
/** /**
* im_fits2vips: * im_fits2vips:
* @filename: file to load * @filename: file to load
@ -371,7 +467,7 @@ im_fits2vips( const char *filename, IMAGE *out )
if( !(read = read_new( filename, out )) ) if( !(read = read_new( filename, out )) )
return( -1 ); return( -1 );
if( fits2vips_get_header( read ) || if( fits2vips_get_header( read ) ||
fits2vips_get_data( read ) ) { fits2vips_get_data_scanlinewise( read ) ) {
read_destroy( read ); read_destroy( read );
return( -1 ); return( -1 );
} }

View File

@ -7,6 +7,9 @@
* defaults to (window_size / 2 - 1), so for a 4x4 stencil (eg. * defaults to (window_size / 2 - 1), so for a 4x4 stencil (eg.
* bicubic) we have an offset of 1 * bicubic) we have an offset of 1
* - tiny speedups * - tiny speedups
* 7/1/11
* - don't use tables for bilinear on float data for a small speedup
* (thanks Nicolas)
*/ */
/* /*
@ -323,15 +326,12 @@ typedef VipsInterpolateClass VipsInterpolateBilinearClass;
G_DEFINE_TYPE( VipsInterpolateBilinear, vips_interpolate_bilinear, G_DEFINE_TYPE( VipsInterpolateBilinear, vips_interpolate_bilinear,
VIPS_TYPE_INTERPOLATE ); VIPS_TYPE_INTERPOLATE );
/* Precalculated interpolation matricies. int (used for pel sizes up /* Precalculated interpolation matricies, only for int types.
* to short), and float (for all others). We go to scale + 1 so * We go to scale + 1 so
* we can round-to-nearest safely. Don't bother with double, since * we can round-to-nearest safely.
* this is an approximation anyway.
*/ */
static int vips_bilinear_matrixi static int vips_bilinear_matrixi
[VIPS_TRANSFORM_SCALE + 1][VIPS_TRANSFORM_SCALE + 1][4]; [VIPS_TRANSFORM_SCALE + 1][VIPS_TRANSFORM_SCALE + 1][4];
static float vips_bilinear_matrixd
[VIPS_TRANSFORM_SCALE + 1][VIPS_TRANSFORM_SCALE + 1][4];
/* in this class, name vars in the 2x2 grid as eg. /* in this class, name vars in the 2x2 grid as eg.
* p1 p2 * p1 p2
@ -344,6 +344,12 @@ static float vips_bilinear_matrixd
#define BILINEAR_INT( TYPE ) { \ #define BILINEAR_INT( TYPE ) { \
TYPE *tq = (TYPE *) out; \ TYPE *tq = (TYPE *) out; \
\ \
const int six = sx & (VIPS_TRANSFORM_SCALE * 2 - 1); \
const int siy = sy & (VIPS_TRANSFORM_SCALE * 2 - 1); \
\
const int tx = (six + 1) >> 1; \
const int ty = (siy + 1) >> 1; \
\
const int c1 = vips_bilinear_matrixi[tx][ty][0]; \ const int c1 = vips_bilinear_matrixi[tx][ty][0]; \
const int c2 = vips_bilinear_matrixi[tx][ty][1]; \ const int c2 = vips_bilinear_matrixi[tx][ty][1]; \
const int c3 = vips_bilinear_matrixi[tx][ty][2]; \ const int c3 = vips_bilinear_matrixi[tx][ty][2]; \
@ -365,10 +371,16 @@ static float vips_bilinear_matrixd
#define BILINEAR_FLOAT( TYPE ) { \ #define BILINEAR_FLOAT( TYPE ) { \
TYPE *tq = (TYPE *) out; \ TYPE *tq = (TYPE *) out; \
\ \
const double c1 = vips_bilinear_matrixd[tx][ty][0]; \ float X = x - ix; \
const double c2 = vips_bilinear_matrixd[tx][ty][1]; \ float Y = y - iy; \
const double c3 = vips_bilinear_matrixd[tx][ty][2]; \ \
const double c4 = vips_bilinear_matrixd[tx][ty][3]; \ float Xd = 1.0 - X; \
float Yd = 1.0 - Y; \
\
float c1 = Xd * Yd; \
float c2 = X * Yd; \
float c3 = Xd * Y; \
float c4 = X * Y; \
\ \
const TYPE *tp1 = (TYPE *) p1; \ const TYPE *tp1 = (TYPE *) p1; \
const TYPE *tp2 = (TYPE *) p2; \ const TYPE *tp2 = (TYPE *) p2; \
@ -408,6 +420,10 @@ vips_interpolate_bilinear_interpolate( VipsInterpolate *interpolate,
const int ls = IM_REGION_LSKIP( in ); const int ls = IM_REGION_LSKIP( in );
const int b = in->im->Bands; const int b = in->im->Bands;
/* We want ((int)x), but the tables versions needs to find a mask
* index quickly from the residual. Calculate both.
*/
/* Find the mask index. We round-to-nearest, so we need to generate /* Find the mask index. We round-to-nearest, so we need to generate
* indexes in 0 to VIPS_TRANSFORM_SCALE, 2^n + 1 values. We multiply * indexes in 0 to VIPS_TRANSFORM_SCALE, 2^n + 1 values. We multiply
* by 2 more than we need to, add one, mask, then shift down again to * by 2 more than we need to, add one, mask, then shift down again to
@ -416,13 +432,7 @@ vips_interpolate_bilinear_interpolate( VipsInterpolate *interpolate,
const int sx = x * VIPS_TRANSFORM_SCALE * 2; const int sx = x * VIPS_TRANSFORM_SCALE * 2;
const int sy = y * VIPS_TRANSFORM_SCALE * 2; const int sy = y * VIPS_TRANSFORM_SCALE * 2;
const int six = sx & (VIPS_TRANSFORM_SCALE * 2 - 1); /* We want ((int)x) ... avoid repeating this double -> int conversion
const int siy = sy & (VIPS_TRANSFORM_SCALE * 2 - 1);
const int tx = (six + 1) >> 1;
const int ty = (siy + 1) >> 1;
/* We want ((int)x) ... void repeating this double -> int conversion
* by just shifting sx down. * by just shifting sx down.
*/ */
const int ix = sx >> (VIPS_TRANSFORM_SHIFT + 1); const int ix = sx >> (VIPS_TRANSFORM_SHIFT + 1);
@ -474,11 +484,6 @@ vips_interpolate_bilinear_class_init( VipsInterpolateBilinearClass *class )
c3 = Xd * Y; c3 = Xd * Y;
c4 = X * Y; c4 = X * Y;
vips_bilinear_matrixd[x][y][0] = c1;
vips_bilinear_matrixd[x][y][1] = c2;
vips_bilinear_matrixd[x][y][2] = c3;
vips_bilinear_matrixd[x][y][3] = c4;
vips_bilinear_matrixi[x][y][0] = vips_bilinear_matrixi[x][y][0] =
c1 * VIPS_INTERPOLATE_SCALE; c1 * VIPS_INTERPOLATE_SCALE;
vips_bilinear_matrixi[x][y][1] = vips_bilinear_matrixi[x][y][1] =