speed up bilinear on float
This commit is contained in:
parent
8bd2322b1f
commit
dfd97464b0
@ -5,6 +5,8 @@
|
||||
- add -lstdc++ to vips-7.xx.pc, if we used it
|
||||
- im_vips2png() / im_png2vips() set / get png resolution (thanks Zhiyu Wu)
|
||||
- updated README
|
||||
- don't use tables for bilinear on float data for a small speedup (thanks
|
||||
Nicolas)
|
||||
|
||||
30/11/10 started 7.24.0
|
||||
- bump for new stable
|
||||
|
18
TODO
18
TODO
@ -1,3 +1,19 @@
|
||||
- bilinear benchmarks
|
||||
|
||||
8-bit image, tables, fixed-point, 3.7s (7.24 one)
|
||||
8-bit image, no tables, float, 5.2s
|
||||
8-bit image, no tables, fixed-point, 4.2s
|
||||
|
||||
float image, tables, 5.2s (7.24 one)
|
||||
float image, no tables, sp float coeff, 4.2s
|
||||
float image, no tables, dp float coeff, 4.8s
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
- im_conv()/im_morph() could have more than 10 programs? try 20 and see if we
|
||||
@ -8,6 +24,8 @@
|
||||
|
||||
- fits save
|
||||
|
||||
- lazy fits load
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -60,6 +60,12 @@
|
||||
#include <dmalloc.h>
|
||||
#endif /*WITH_DMALLOC*/
|
||||
|
||||
/* vips only supports 3 dimensions, but we allow up to MAX_DIMENSIONS as long
|
||||
* as the higher dimensions are all empty. If you change this value, change
|
||||
* fits2vips_get_header() as well.
|
||||
*/
|
||||
#define MAX_DIMENSIONS (10)
|
||||
|
||||
/* What we track during a cfitsio-file read.
|
||||
*/
|
||||
typedef struct {
|
||||
@ -69,7 +75,7 @@ typedef struct {
|
||||
fitsfile *fptr;
|
||||
int datatype;
|
||||
int naxis;
|
||||
long long int naxes[10];
|
||||
long long int naxes[MAX_DIMENSIONS];
|
||||
} Read;
|
||||
|
||||
static void
|
||||
@ -154,15 +160,18 @@ fits2vips_get_header( Read *read )
|
||||
return( -1 );
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
printf( "naxis = %d\n", read->naxis );
|
||||
for( i = 0; i < read->naxis; i++ )
|
||||
printf( "%d) %lld\n", i, read->naxes[i] );
|
||||
#endif /*DEBUG*/
|
||||
|
||||
width = 1;
|
||||
height = 1;
|
||||
bands = 1;
|
||||
switch( read->naxis ) {
|
||||
/* If you add more dimensions here, adjust data read below.
|
||||
/* If you add more dimensions here, adjust data read below. See also
|
||||
* the definition of MAX_DIMENSIONS above.
|
||||
*/
|
||||
case 10:
|
||||
case 9:
|
||||
@ -284,8 +293,10 @@ fits2vips_header( const char *filename, IMAGE *out )
|
||||
return( 0 );
|
||||
}
|
||||
|
||||
/* Read the whole image in scanlines.
|
||||
*/
|
||||
static int
|
||||
fits2vips_get_data( Read *read )
|
||||
fits2vips_get_data_scanlinewise( Read *read )
|
||||
{
|
||||
IMAGE *im = read->out;
|
||||
const int es = IM_IMAGE_SIZEOF_ELEMENT( im );
|
||||
@ -296,6 +307,8 @@ fits2vips_get_data( Read *read )
|
||||
int x, y, b, z;
|
||||
int status;
|
||||
|
||||
long fpixel[MAX_DIMENSIONS];
|
||||
|
||||
status = 0;
|
||||
|
||||
if( !(line_buffer = IM_ARRAY( im, IM_IMAGE_SIZEOF_LINE( im ), PEL )) ||
|
||||
@ -305,14 +318,9 @@ fits2vips_get_data( Read *read )
|
||||
return( -1 );
|
||||
|
||||
for( y = 0; y < im->Ysize; y++ ) {
|
||||
/* Keep max no of dimensions in line with the header check
|
||||
* above.
|
||||
*/
|
||||
long int fpixel[10];
|
||||
|
||||
/* Start of scanline. We have to read top-to-bottom.
|
||||
*/
|
||||
for( b = 0; b < 10; b++ )
|
||||
for( b = 0; b < MAX_DIMENSIONS; b++ )
|
||||
fpixel[b] = 1;
|
||||
fpixel[1] = im->Ysize - y;
|
||||
|
||||
@ -347,6 +355,94 @@ fits2vips_get_data( Read *read )
|
||||
return( 0 );
|
||||
}
|
||||
|
||||
static int
|
||||
fits2vips_generate( REGION *out, void *seq, void *a, void *b )
|
||||
{
|
||||
Read *read = (Read *) a;
|
||||
Rect *r = &out->valid;
|
||||
|
||||
IMAGE *im = read->out;
|
||||
const int es = IM_IMAGE_SIZEOF_ELEMENT( im );
|
||||
|
||||
PEL *line_buffer;
|
||||
PEL *band_buffer;
|
||||
PEL *p, *q;
|
||||
int x, y, z, k;
|
||||
int status;
|
||||
|
||||
status = 0;
|
||||
|
||||
long fpixel[MAX_DIMENSIONS];
|
||||
long lpixel[MAX_DIMENSIONS];
|
||||
long inc[MAX_DIMENSIONS];
|
||||
|
||||
if( !(line_buffer = IM_ARRAY( im, IM_IMAGE_SIZEOF_LINE( im ), PEL )) ||
|
||||
!(band_buffer = IM_ARRAY( im, es * im->Xsize, PEL )) ||
|
||||
im_outcheck( im ) ||
|
||||
im_setupout( im ) )
|
||||
return( -1 );
|
||||
|
||||
/* Read out the entire
|
||||
for( b = 0; b < MAX_DIMENSIONS; b++ )
|
||||
fpixel[b] = 1;
|
||||
fpixel[1] = im->Ysize - y;
|
||||
|
||||
if( fits_read_subset( read->fptr, read->datatype,
|
||||
long *fpixel,
|
||||
long *lpixel, long *inc, void *nulval, void *array,
|
||||
int *anynul, int *status)
|
||||
*/
|
||||
|
||||
|
||||
for( y = 0; y < im->Ysize; y++ ) {
|
||||
long int fpixel[MAX_DIMENSIONS];
|
||||
|
||||
/* Start of scanline. We have to read top-to-bottom.
|
||||
*/
|
||||
for( z = 0; z < MAX_DIMENSIONS; z++ )
|
||||
fpixel[z] = 1;
|
||||
fpixel[1] = im->Ysize - y;
|
||||
|
||||
for( z = 0; z < im->Bands; z++ ) {
|
||||
fpixel[2] = z + 1;
|
||||
|
||||
/* Read one band of one scanline, then scatter-write
|
||||
* into the line buffer.
|
||||
*/
|
||||
if( fits_read_pix( read->fptr,
|
||||
read->datatype, fpixel, im->Xsize,
|
||||
NULL, band_buffer, NULL, &status ) ) {
|
||||
read_error( status );
|
||||
return( -1 );
|
||||
}
|
||||
|
||||
p = band_buffer;
|
||||
q = line_buffer + z * es;
|
||||
for( x = 0; x < im->Xsize; x++ ) {
|
||||
for( k = 0; k < es; k++ )
|
||||
q[k] = p[k];
|
||||
|
||||
p += es;
|
||||
q += im->Bands * es;
|
||||
}
|
||||
}
|
||||
|
||||
if( im_writeline( y, im, line_buffer ) )
|
||||
return( -1 );
|
||||
}
|
||||
|
||||
return( 0 );
|
||||
}
|
||||
|
||||
/* Read the image in chunks on demand.
|
||||
*/
|
||||
static int
|
||||
fits2vips_get_data_lazy( Read *read )
|
||||
{
|
||||
return( im_generate( read->out,
|
||||
NULL, fits2vips_generate, NULL, read, NULL ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* im_fits2vips:
|
||||
* @filename: file to load
|
||||
@ -371,7 +467,7 @@ im_fits2vips( const char *filename, IMAGE *out )
|
||||
if( !(read = read_new( filename, out )) )
|
||||
return( -1 );
|
||||
if( fits2vips_get_header( read ) ||
|
||||
fits2vips_get_data( read ) ) {
|
||||
fits2vips_get_data_scanlinewise( read ) ) {
|
||||
read_destroy( read );
|
||||
return( -1 );
|
||||
}
|
||||
|
@ -7,6 +7,9 @@
|
||||
* defaults to (window_size / 2 - 1), so for a 4x4 stencil (eg.
|
||||
* bicubic) we have an offset of 1
|
||||
* - tiny speedups
|
||||
* 7/1/11
|
||||
* - don't use tables for bilinear on float data for a small speedup
|
||||
* (thanks Nicolas)
|
||||
*/
|
||||
|
||||
/*
|
||||
@ -323,15 +326,12 @@ typedef VipsInterpolateClass VipsInterpolateBilinearClass;
|
||||
G_DEFINE_TYPE( VipsInterpolateBilinear, vips_interpolate_bilinear,
|
||||
VIPS_TYPE_INTERPOLATE );
|
||||
|
||||
/* Precalculated interpolation matricies. int (used for pel sizes up
|
||||
* to short), and float (for all others). We go to scale + 1 so
|
||||
* we can round-to-nearest safely. Don't bother with double, since
|
||||
* this is an approximation anyway.
|
||||
/* Precalculated interpolation matricies, only for int types.
|
||||
* We go to scale + 1 so
|
||||
* we can round-to-nearest safely.
|
||||
*/
|
||||
static int vips_bilinear_matrixi
|
||||
[VIPS_TRANSFORM_SCALE + 1][VIPS_TRANSFORM_SCALE + 1][4];
|
||||
static float vips_bilinear_matrixd
|
||||
[VIPS_TRANSFORM_SCALE + 1][VIPS_TRANSFORM_SCALE + 1][4];
|
||||
|
||||
/* in this class, name vars in the 2x2 grid as eg.
|
||||
* p1 p2
|
||||
@ -344,6 +344,12 @@ static float vips_bilinear_matrixd
|
||||
#define BILINEAR_INT( TYPE ) { \
|
||||
TYPE *tq = (TYPE *) out; \
|
||||
\
|
||||
const int six = sx & (VIPS_TRANSFORM_SCALE * 2 - 1); \
|
||||
const int siy = sy & (VIPS_TRANSFORM_SCALE * 2 - 1); \
|
||||
\
|
||||
const int tx = (six + 1) >> 1; \
|
||||
const int ty = (siy + 1) >> 1; \
|
||||
\
|
||||
const int c1 = vips_bilinear_matrixi[tx][ty][0]; \
|
||||
const int c2 = vips_bilinear_matrixi[tx][ty][1]; \
|
||||
const int c3 = vips_bilinear_matrixi[tx][ty][2]; \
|
||||
@ -364,12 +370,18 @@ static float vips_bilinear_matrixd
|
||||
*/
|
||||
#define BILINEAR_FLOAT( TYPE ) { \
|
||||
TYPE *tq = (TYPE *) out; \
|
||||
\
|
||||
const double c1 = vips_bilinear_matrixd[tx][ty][0]; \
|
||||
const double c2 = vips_bilinear_matrixd[tx][ty][1]; \
|
||||
const double c3 = vips_bilinear_matrixd[tx][ty][2]; \
|
||||
const double c4 = vips_bilinear_matrixd[tx][ty][3]; \
|
||||
\
|
||||
float X = x - ix; \
|
||||
float Y = y - iy; \
|
||||
\
|
||||
float Xd = 1.0 - X; \
|
||||
float Yd = 1.0 - Y; \
|
||||
\
|
||||
float c1 = Xd * Yd; \
|
||||
float c2 = X * Yd; \
|
||||
float c3 = Xd * Y; \
|
||||
float c4 = X * Y; \
|
||||
\
|
||||
const TYPE *tp1 = (TYPE *) p1; \
|
||||
const TYPE *tp2 = (TYPE *) p2; \
|
||||
const TYPE *tp3 = (TYPE *) p3; \
|
||||
@ -408,6 +420,10 @@ vips_interpolate_bilinear_interpolate( VipsInterpolate *interpolate,
|
||||
const int ls = IM_REGION_LSKIP( in );
|
||||
const int b = in->im->Bands;
|
||||
|
||||
/* We want ((int)x), but the tables versions needs to find a mask
|
||||
* index quickly from the residual. Calculate both.
|
||||
*/
|
||||
|
||||
/* Find the mask index. We round-to-nearest, so we need to generate
|
||||
* indexes in 0 to VIPS_TRANSFORM_SCALE, 2^n + 1 values. We multiply
|
||||
* by 2 more than we need to, add one, mask, then shift down again to
|
||||
@ -416,13 +432,7 @@ vips_interpolate_bilinear_interpolate( VipsInterpolate *interpolate,
|
||||
const int sx = x * VIPS_TRANSFORM_SCALE * 2;
|
||||
const int sy = y * VIPS_TRANSFORM_SCALE * 2;
|
||||
|
||||
const int six = sx & (VIPS_TRANSFORM_SCALE * 2 - 1);
|
||||
const int siy = sy & (VIPS_TRANSFORM_SCALE * 2 - 1);
|
||||
|
||||
const int tx = (six + 1) >> 1;
|
||||
const int ty = (siy + 1) >> 1;
|
||||
|
||||
/* We want ((int)x) ... void repeating this double -> int conversion
|
||||
/* We want ((int)x) ... avoid repeating this double -> int conversion
|
||||
* by just shifting sx down.
|
||||
*/
|
||||
const int ix = sx >> (VIPS_TRANSFORM_SHIFT + 1);
|
||||
@ -474,11 +484,6 @@ vips_interpolate_bilinear_class_init( VipsInterpolateBilinearClass *class )
|
||||
c3 = Xd * Y;
|
||||
c4 = X * Y;
|
||||
|
||||
vips_bilinear_matrixd[x][y][0] = c1;
|
||||
vips_bilinear_matrixd[x][y][1] = c2;
|
||||
vips_bilinear_matrixd[x][y][2] = c3;
|
||||
vips_bilinear_matrixd[x][y][3] = c4;
|
||||
|
||||
vips_bilinear_matrixi[x][y][0] =
|
||||
c1 * VIPS_INTERPOLATE_SCALE;
|
||||
vips_bilinear_matrixi[x][y][1] =
|
||||
|
Loading…
Reference in New Issue
Block a user