From dfd97464b002af26f60a496be701f77f16484674 Mon Sep 17 00:00:00 2001 From: John Cupitt Date: Fri, 7 Jan 2011 15:03:01 +0000 Subject: [PATCH] speed up bilinear on float --- ChangeLog | 2 + TODO | 18 +++++ libvips/format/fits.c | 116 ++++++++++++++++++++++++++++++--- libvips/resample/interpolate.c | 51 ++++++++------- 4 files changed, 154 insertions(+), 33 deletions(-) diff --git a/ChangeLog b/ChangeLog index de702aa0..84a4bace 100644 --- a/ChangeLog +++ b/ChangeLog @@ -5,6 +5,8 @@ - add -lstdc++ to vips-7.xx.pc, if we used it - im_vips2png() / im_png2vips() set / get png resolution (thanks Zhiyu Wu) - updated README +- don't use tables for bilinear on float data for a small speedup (thanks + Nicolas) 30/11/10 started 7.24.0 - bump for new stable diff --git a/TODO b/TODO index 2bbbd25c..66b356b6 100644 --- a/TODO +++ b/TODO @@ -1,3 +1,19 @@ +- bilinear benchmarks + + 8-bit image, tables, fixed-point, 3.7s (7.24 one) + 8-bit image, no tables, float, 5.2s + 8-bit image, no tables, fixed-point, 4.2s + + float image, tables, 5.2s (7.24 one) + float image, no tables, sp float coeff, 4.2s + float image, no tables, dp float coeff, 4.8s + + + + + + + - im_conv()/im_morph() could have more than 10 programs? try 20 and see if we @@ -8,6 +24,8 @@ - fits save +- lazy fits load + diff --git a/libvips/format/fits.c b/libvips/format/fits.c index ef1886cb..f281879b 100644 --- a/libvips/format/fits.c +++ b/libvips/format/fits.c @@ -60,6 +60,12 @@ #include #endif /*WITH_DMALLOC*/ +/* vips only supports 3 dimensions, but we allow up to MAX_DIMENSIONS as long + * as the higher dimensions are all empty. If you change this value, change + * fits2vips_get_header() as well. + */ +#define MAX_DIMENSIONS (10) + /* What we track during a cfitsio-file read. */ typedef struct { @@ -69,7 +75,7 @@ typedef struct { fitsfile *fptr; int datatype; int naxis; - long long int naxes[10]; + long long int naxes[MAX_DIMENSIONS]; } Read; static void @@ -154,15 +160,18 @@ fits2vips_get_header( Read *read ) return( -1 ); } +#ifdef DEBUG printf( "naxis = %d\n", read->naxis ); for( i = 0; i < read->naxis; i++ ) printf( "%d) %lld\n", i, read->naxes[i] ); +#endif /*DEBUG*/ width = 1; height = 1; bands = 1; switch( read->naxis ) { - /* If you add more dimensions here, adjust data read below. + /* If you add more dimensions here, adjust data read below. See also + * the definition of MAX_DIMENSIONS above. */ case 10: case 9: @@ -284,8 +293,10 @@ fits2vips_header( const char *filename, IMAGE *out ) return( 0 ); } +/* Read the whole image in scanlines. + */ static int -fits2vips_get_data( Read *read ) +fits2vips_get_data_scanlinewise( Read *read ) { IMAGE *im = read->out; const int es = IM_IMAGE_SIZEOF_ELEMENT( im ); @@ -296,6 +307,8 @@ fits2vips_get_data( Read *read ) int x, y, b, z; int status; + long fpixel[MAX_DIMENSIONS]; + status = 0; if( !(line_buffer = IM_ARRAY( im, IM_IMAGE_SIZEOF_LINE( im ), PEL )) || @@ -305,14 +318,9 @@ fits2vips_get_data( Read *read ) return( -1 ); for( y = 0; y < im->Ysize; y++ ) { - /* Keep max no of dimensions in line with the header check - * above. - */ - long int fpixel[10]; - /* Start of scanline. We have to read top-to-bottom. */ - for( b = 0; b < 10; b++ ) + for( b = 0; b < MAX_DIMENSIONS; b++ ) fpixel[b] = 1; fpixel[1] = im->Ysize - y; @@ -347,6 +355,94 @@ fits2vips_get_data( Read *read ) return( 0 ); } +static int +fits2vips_generate( REGION *out, void *seq, void *a, void *b ) +{ + Read *read = (Read *) a; + Rect *r = &out->valid; + + IMAGE *im = read->out; + const int es = IM_IMAGE_SIZEOF_ELEMENT( im ); + + PEL *line_buffer; + PEL *band_buffer; + PEL *p, *q; + int x, y, z, k; + int status; + + status = 0; + + long fpixel[MAX_DIMENSIONS]; + long lpixel[MAX_DIMENSIONS]; + long inc[MAX_DIMENSIONS]; + + if( !(line_buffer = IM_ARRAY( im, IM_IMAGE_SIZEOF_LINE( im ), PEL )) || + !(band_buffer = IM_ARRAY( im, es * im->Xsize, PEL )) || + im_outcheck( im ) || + im_setupout( im ) ) + return( -1 ); + + /* Read out the entire + for( b = 0; b < MAX_DIMENSIONS; b++ ) + fpixel[b] = 1; + fpixel[1] = im->Ysize - y; + + if( fits_read_subset( read->fptr, read->datatype, + long *fpixel, + long *lpixel, long *inc, void *nulval, void *array, + int *anynul, int *status) + */ + + + for( y = 0; y < im->Ysize; y++ ) { + long int fpixel[MAX_DIMENSIONS]; + + /* Start of scanline. We have to read top-to-bottom. + */ + for( z = 0; z < MAX_DIMENSIONS; z++ ) + fpixel[z] = 1; + fpixel[1] = im->Ysize - y; + + for( z = 0; z < im->Bands; z++ ) { + fpixel[2] = z + 1; + + /* Read one band of one scanline, then scatter-write + * into the line buffer. + */ + if( fits_read_pix( read->fptr, + read->datatype, fpixel, im->Xsize, + NULL, band_buffer, NULL, &status ) ) { + read_error( status ); + return( -1 ); + } + + p = band_buffer; + q = line_buffer + z * es; + for( x = 0; x < im->Xsize; x++ ) { + for( k = 0; k < es; k++ ) + q[k] = p[k]; + + p += es; + q += im->Bands * es; + } + } + + if( im_writeline( y, im, line_buffer ) ) + return( -1 ); + } + + return( 0 ); +} + +/* Read the image in chunks on demand. + */ +static int +fits2vips_get_data_lazy( Read *read ) +{ + return( im_generate( read->out, + NULL, fits2vips_generate, NULL, read, NULL ) ); +} + /** * im_fits2vips: * @filename: file to load @@ -371,7 +467,7 @@ im_fits2vips( const char *filename, IMAGE *out ) if( !(read = read_new( filename, out )) ) return( -1 ); if( fits2vips_get_header( read ) || - fits2vips_get_data( read ) ) { + fits2vips_get_data_scanlinewise( read ) ) { read_destroy( read ); return( -1 ); } diff --git a/libvips/resample/interpolate.c b/libvips/resample/interpolate.c index 95bcf90d..849cbc10 100644 --- a/libvips/resample/interpolate.c +++ b/libvips/resample/interpolate.c @@ -7,6 +7,9 @@ * defaults to (window_size / 2 - 1), so for a 4x4 stencil (eg. * bicubic) we have an offset of 1 * - tiny speedups + * 7/1/11 + * - don't use tables for bilinear on float data for a small speedup + * (thanks Nicolas) */ /* @@ -323,15 +326,12 @@ typedef VipsInterpolateClass VipsInterpolateBilinearClass; G_DEFINE_TYPE( VipsInterpolateBilinear, vips_interpolate_bilinear, VIPS_TYPE_INTERPOLATE ); -/* Precalculated interpolation matricies. int (used for pel sizes up - * to short), and float (for all others). We go to scale + 1 so - * we can round-to-nearest safely. Don't bother with double, since - * this is an approximation anyway. +/* Precalculated interpolation matricies, only for int types. + * We go to scale + 1 so + * we can round-to-nearest safely. */ static int vips_bilinear_matrixi [VIPS_TRANSFORM_SCALE + 1][VIPS_TRANSFORM_SCALE + 1][4]; -static float vips_bilinear_matrixd - [VIPS_TRANSFORM_SCALE + 1][VIPS_TRANSFORM_SCALE + 1][4]; /* in this class, name vars in the 2x2 grid as eg. * p1 p2 @@ -344,6 +344,12 @@ static float vips_bilinear_matrixd #define BILINEAR_INT( TYPE ) { \ TYPE *tq = (TYPE *) out; \ \ + const int six = sx & (VIPS_TRANSFORM_SCALE * 2 - 1); \ + const int siy = sy & (VIPS_TRANSFORM_SCALE * 2 - 1); \ + \ + const int tx = (six + 1) >> 1; \ + const int ty = (siy + 1) >> 1; \ + \ const int c1 = vips_bilinear_matrixi[tx][ty][0]; \ const int c2 = vips_bilinear_matrixi[tx][ty][1]; \ const int c3 = vips_bilinear_matrixi[tx][ty][2]; \ @@ -364,12 +370,18 @@ static float vips_bilinear_matrixd */ #define BILINEAR_FLOAT( TYPE ) { \ TYPE *tq = (TYPE *) out; \ - \ - const double c1 = vips_bilinear_matrixd[tx][ty][0]; \ - const double c2 = vips_bilinear_matrixd[tx][ty][1]; \ - const double c3 = vips_bilinear_matrixd[tx][ty][2]; \ - const double c4 = vips_bilinear_matrixd[tx][ty][3]; \ \ + float X = x - ix; \ + float Y = y - iy; \ + \ + float Xd = 1.0 - X; \ + float Yd = 1.0 - Y; \ + \ + float c1 = Xd * Yd; \ + float c2 = X * Yd; \ + float c3 = Xd * Y; \ + float c4 = X * Y; \ + \ const TYPE *tp1 = (TYPE *) p1; \ const TYPE *tp2 = (TYPE *) p2; \ const TYPE *tp3 = (TYPE *) p3; \ @@ -408,6 +420,10 @@ vips_interpolate_bilinear_interpolate( VipsInterpolate *interpolate, const int ls = IM_REGION_LSKIP( in ); const int b = in->im->Bands; + /* We want ((int)x), but the tables versions needs to find a mask + * index quickly from the residual. Calculate both. + */ + /* Find the mask index. We round-to-nearest, so we need to generate * indexes in 0 to VIPS_TRANSFORM_SCALE, 2^n + 1 values. We multiply * by 2 more than we need to, add one, mask, then shift down again to @@ -416,13 +432,7 @@ vips_interpolate_bilinear_interpolate( VipsInterpolate *interpolate, const int sx = x * VIPS_TRANSFORM_SCALE * 2; const int sy = y * VIPS_TRANSFORM_SCALE * 2; - const int six = sx & (VIPS_TRANSFORM_SCALE * 2 - 1); - const int siy = sy & (VIPS_TRANSFORM_SCALE * 2 - 1); - - const int tx = (six + 1) >> 1; - const int ty = (siy + 1) >> 1; - - /* We want ((int)x) ... void repeating this double -> int conversion + /* We want ((int)x) ... avoid repeating this double -> int conversion * by just shifting sx down. */ const int ix = sx >> (VIPS_TRANSFORM_SHIFT + 1); @@ -474,11 +484,6 @@ vips_interpolate_bilinear_class_init( VipsInterpolateBilinearClass *class ) c3 = Xd * Y; c4 = X * Y; - vips_bilinear_matrixd[x][y][0] = c1; - vips_bilinear_matrixd[x][y][1] = c2; - vips_bilinear_matrixd[x][y][2] = c3; - vips_bilinear_matrixd[x][y][3] = c4; - vips_bilinear_matrixi[x][y][0] = c1 * VIPS_INTERPOLATE_SCALE; vips_bilinear_matrixi[x][y][1] =