speed up bilinear on float
This commit is contained in:
parent
8bd2322b1f
commit
dfd97464b0
@ -5,6 +5,8 @@
|
|||||||
- add -lstdc++ to vips-7.xx.pc, if we used it
|
- add -lstdc++ to vips-7.xx.pc, if we used it
|
||||||
- im_vips2png() / im_png2vips() set / get png resolution (thanks Zhiyu Wu)
|
- im_vips2png() / im_png2vips() set / get png resolution (thanks Zhiyu Wu)
|
||||||
- updated README
|
- updated README
|
||||||
|
- don't use tables for bilinear on float data for a small speedup (thanks
|
||||||
|
Nicolas)
|
||||||
|
|
||||||
30/11/10 started 7.24.0
|
30/11/10 started 7.24.0
|
||||||
- bump for new stable
|
- bump for new stable
|
||||||
|
18
TODO
18
TODO
@ -1,3 +1,19 @@
|
|||||||
|
- bilinear benchmarks
|
||||||
|
|
||||||
|
8-bit image, tables, fixed-point, 3.7s (7.24 one)
|
||||||
|
8-bit image, no tables, float, 5.2s
|
||||||
|
8-bit image, no tables, fixed-point, 4.2s
|
||||||
|
|
||||||
|
float image, tables, 5.2s (7.24 one)
|
||||||
|
float image, no tables, sp float coeff, 4.2s
|
||||||
|
float image, no tables, dp float coeff, 4.8s
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
- im_conv()/im_morph() could have more than 10 programs? try 20 and see if we
|
- im_conv()/im_morph() could have more than 10 programs? try 20 and see if we
|
||||||
@ -8,6 +24,8 @@
|
|||||||
|
|
||||||
- fits save
|
- fits save
|
||||||
|
|
||||||
|
- lazy fits load
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -60,6 +60,12 @@
|
|||||||
#include <dmalloc.h>
|
#include <dmalloc.h>
|
||||||
#endif /*WITH_DMALLOC*/
|
#endif /*WITH_DMALLOC*/
|
||||||
|
|
||||||
|
/* vips only supports 3 dimensions, but we allow up to MAX_DIMENSIONS as long
|
||||||
|
* as the higher dimensions are all empty. If you change this value, change
|
||||||
|
* fits2vips_get_header() as well.
|
||||||
|
*/
|
||||||
|
#define MAX_DIMENSIONS (10)
|
||||||
|
|
||||||
/* What we track during a cfitsio-file read.
|
/* What we track during a cfitsio-file read.
|
||||||
*/
|
*/
|
||||||
typedef struct {
|
typedef struct {
|
||||||
@ -69,7 +75,7 @@ typedef struct {
|
|||||||
fitsfile *fptr;
|
fitsfile *fptr;
|
||||||
int datatype;
|
int datatype;
|
||||||
int naxis;
|
int naxis;
|
||||||
long long int naxes[10];
|
long long int naxes[MAX_DIMENSIONS];
|
||||||
} Read;
|
} Read;
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@ -154,15 +160,18 @@ fits2vips_get_header( Read *read )
|
|||||||
return( -1 );
|
return( -1 );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef DEBUG
|
||||||
printf( "naxis = %d\n", read->naxis );
|
printf( "naxis = %d\n", read->naxis );
|
||||||
for( i = 0; i < read->naxis; i++ )
|
for( i = 0; i < read->naxis; i++ )
|
||||||
printf( "%d) %lld\n", i, read->naxes[i] );
|
printf( "%d) %lld\n", i, read->naxes[i] );
|
||||||
|
#endif /*DEBUG*/
|
||||||
|
|
||||||
width = 1;
|
width = 1;
|
||||||
height = 1;
|
height = 1;
|
||||||
bands = 1;
|
bands = 1;
|
||||||
switch( read->naxis ) {
|
switch( read->naxis ) {
|
||||||
/* If you add more dimensions here, adjust data read below.
|
/* If you add more dimensions here, adjust data read below. See also
|
||||||
|
* the definition of MAX_DIMENSIONS above.
|
||||||
*/
|
*/
|
||||||
case 10:
|
case 10:
|
||||||
case 9:
|
case 9:
|
||||||
@ -284,8 +293,10 @@ fits2vips_header( const char *filename, IMAGE *out )
|
|||||||
return( 0 );
|
return( 0 );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Read the whole image in scanlines.
|
||||||
|
*/
|
||||||
static int
|
static int
|
||||||
fits2vips_get_data( Read *read )
|
fits2vips_get_data_scanlinewise( Read *read )
|
||||||
{
|
{
|
||||||
IMAGE *im = read->out;
|
IMAGE *im = read->out;
|
||||||
const int es = IM_IMAGE_SIZEOF_ELEMENT( im );
|
const int es = IM_IMAGE_SIZEOF_ELEMENT( im );
|
||||||
@ -296,6 +307,8 @@ fits2vips_get_data( Read *read )
|
|||||||
int x, y, b, z;
|
int x, y, b, z;
|
||||||
int status;
|
int status;
|
||||||
|
|
||||||
|
long fpixel[MAX_DIMENSIONS];
|
||||||
|
|
||||||
status = 0;
|
status = 0;
|
||||||
|
|
||||||
if( !(line_buffer = IM_ARRAY( im, IM_IMAGE_SIZEOF_LINE( im ), PEL )) ||
|
if( !(line_buffer = IM_ARRAY( im, IM_IMAGE_SIZEOF_LINE( im ), PEL )) ||
|
||||||
@ -305,14 +318,9 @@ fits2vips_get_data( Read *read )
|
|||||||
return( -1 );
|
return( -1 );
|
||||||
|
|
||||||
for( y = 0; y < im->Ysize; y++ ) {
|
for( y = 0; y < im->Ysize; y++ ) {
|
||||||
/* Keep max no of dimensions in line with the header check
|
|
||||||
* above.
|
|
||||||
*/
|
|
||||||
long int fpixel[10];
|
|
||||||
|
|
||||||
/* Start of scanline. We have to read top-to-bottom.
|
/* Start of scanline. We have to read top-to-bottom.
|
||||||
*/
|
*/
|
||||||
for( b = 0; b < 10; b++ )
|
for( b = 0; b < MAX_DIMENSIONS; b++ )
|
||||||
fpixel[b] = 1;
|
fpixel[b] = 1;
|
||||||
fpixel[1] = im->Ysize - y;
|
fpixel[1] = im->Ysize - y;
|
||||||
|
|
||||||
@ -347,6 +355,94 @@ fits2vips_get_data( Read *read )
|
|||||||
return( 0 );
|
return( 0 );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
fits2vips_generate( REGION *out, void *seq, void *a, void *b )
|
||||||
|
{
|
||||||
|
Read *read = (Read *) a;
|
||||||
|
Rect *r = &out->valid;
|
||||||
|
|
||||||
|
IMAGE *im = read->out;
|
||||||
|
const int es = IM_IMAGE_SIZEOF_ELEMENT( im );
|
||||||
|
|
||||||
|
PEL *line_buffer;
|
||||||
|
PEL *band_buffer;
|
||||||
|
PEL *p, *q;
|
||||||
|
int x, y, z, k;
|
||||||
|
int status;
|
||||||
|
|
||||||
|
status = 0;
|
||||||
|
|
||||||
|
long fpixel[MAX_DIMENSIONS];
|
||||||
|
long lpixel[MAX_DIMENSIONS];
|
||||||
|
long inc[MAX_DIMENSIONS];
|
||||||
|
|
||||||
|
if( !(line_buffer = IM_ARRAY( im, IM_IMAGE_SIZEOF_LINE( im ), PEL )) ||
|
||||||
|
!(band_buffer = IM_ARRAY( im, es * im->Xsize, PEL )) ||
|
||||||
|
im_outcheck( im ) ||
|
||||||
|
im_setupout( im ) )
|
||||||
|
return( -1 );
|
||||||
|
|
||||||
|
/* Read out the entire
|
||||||
|
for( b = 0; b < MAX_DIMENSIONS; b++ )
|
||||||
|
fpixel[b] = 1;
|
||||||
|
fpixel[1] = im->Ysize - y;
|
||||||
|
|
||||||
|
if( fits_read_subset( read->fptr, read->datatype,
|
||||||
|
long *fpixel,
|
||||||
|
long *lpixel, long *inc, void *nulval, void *array,
|
||||||
|
int *anynul, int *status)
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
for( y = 0; y < im->Ysize; y++ ) {
|
||||||
|
long int fpixel[MAX_DIMENSIONS];
|
||||||
|
|
||||||
|
/* Start of scanline. We have to read top-to-bottom.
|
||||||
|
*/
|
||||||
|
for( z = 0; z < MAX_DIMENSIONS; z++ )
|
||||||
|
fpixel[z] = 1;
|
||||||
|
fpixel[1] = im->Ysize - y;
|
||||||
|
|
||||||
|
for( z = 0; z < im->Bands; z++ ) {
|
||||||
|
fpixel[2] = z + 1;
|
||||||
|
|
||||||
|
/* Read one band of one scanline, then scatter-write
|
||||||
|
* into the line buffer.
|
||||||
|
*/
|
||||||
|
if( fits_read_pix( read->fptr,
|
||||||
|
read->datatype, fpixel, im->Xsize,
|
||||||
|
NULL, band_buffer, NULL, &status ) ) {
|
||||||
|
read_error( status );
|
||||||
|
return( -1 );
|
||||||
|
}
|
||||||
|
|
||||||
|
p = band_buffer;
|
||||||
|
q = line_buffer + z * es;
|
||||||
|
for( x = 0; x < im->Xsize; x++ ) {
|
||||||
|
for( k = 0; k < es; k++ )
|
||||||
|
q[k] = p[k];
|
||||||
|
|
||||||
|
p += es;
|
||||||
|
q += im->Bands * es;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if( im_writeline( y, im, line_buffer ) )
|
||||||
|
return( -1 );
|
||||||
|
}
|
||||||
|
|
||||||
|
return( 0 );
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Read the image in chunks on demand.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
fits2vips_get_data_lazy( Read *read )
|
||||||
|
{
|
||||||
|
return( im_generate( read->out,
|
||||||
|
NULL, fits2vips_generate, NULL, read, NULL ) );
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* im_fits2vips:
|
* im_fits2vips:
|
||||||
* @filename: file to load
|
* @filename: file to load
|
||||||
@ -371,7 +467,7 @@ im_fits2vips( const char *filename, IMAGE *out )
|
|||||||
if( !(read = read_new( filename, out )) )
|
if( !(read = read_new( filename, out )) )
|
||||||
return( -1 );
|
return( -1 );
|
||||||
if( fits2vips_get_header( read ) ||
|
if( fits2vips_get_header( read ) ||
|
||||||
fits2vips_get_data( read ) ) {
|
fits2vips_get_data_scanlinewise( read ) ) {
|
||||||
read_destroy( read );
|
read_destroy( read );
|
||||||
return( -1 );
|
return( -1 );
|
||||||
}
|
}
|
||||||
|
@ -7,6 +7,9 @@
|
|||||||
* defaults to (window_size / 2 - 1), so for a 4x4 stencil (eg.
|
* defaults to (window_size / 2 - 1), so for a 4x4 stencil (eg.
|
||||||
* bicubic) we have an offset of 1
|
* bicubic) we have an offset of 1
|
||||||
* - tiny speedups
|
* - tiny speedups
|
||||||
|
* 7/1/11
|
||||||
|
* - don't use tables for bilinear on float data for a small speedup
|
||||||
|
* (thanks Nicolas)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -323,15 +326,12 @@ typedef VipsInterpolateClass VipsInterpolateBilinearClass;
|
|||||||
G_DEFINE_TYPE( VipsInterpolateBilinear, vips_interpolate_bilinear,
|
G_DEFINE_TYPE( VipsInterpolateBilinear, vips_interpolate_bilinear,
|
||||||
VIPS_TYPE_INTERPOLATE );
|
VIPS_TYPE_INTERPOLATE );
|
||||||
|
|
||||||
/* Precalculated interpolation matricies. int (used for pel sizes up
|
/* Precalculated interpolation matricies, only for int types.
|
||||||
* to short), and float (for all others). We go to scale + 1 so
|
* We go to scale + 1 so
|
||||||
* we can round-to-nearest safely. Don't bother with double, since
|
* we can round-to-nearest safely.
|
||||||
* this is an approximation anyway.
|
|
||||||
*/
|
*/
|
||||||
static int vips_bilinear_matrixi
|
static int vips_bilinear_matrixi
|
||||||
[VIPS_TRANSFORM_SCALE + 1][VIPS_TRANSFORM_SCALE + 1][4];
|
[VIPS_TRANSFORM_SCALE + 1][VIPS_TRANSFORM_SCALE + 1][4];
|
||||||
static float vips_bilinear_matrixd
|
|
||||||
[VIPS_TRANSFORM_SCALE + 1][VIPS_TRANSFORM_SCALE + 1][4];
|
|
||||||
|
|
||||||
/* in this class, name vars in the 2x2 grid as eg.
|
/* in this class, name vars in the 2x2 grid as eg.
|
||||||
* p1 p2
|
* p1 p2
|
||||||
@ -344,6 +344,12 @@ static float vips_bilinear_matrixd
|
|||||||
#define BILINEAR_INT( TYPE ) { \
|
#define BILINEAR_INT( TYPE ) { \
|
||||||
TYPE *tq = (TYPE *) out; \
|
TYPE *tq = (TYPE *) out; \
|
||||||
\
|
\
|
||||||
|
const int six = sx & (VIPS_TRANSFORM_SCALE * 2 - 1); \
|
||||||
|
const int siy = sy & (VIPS_TRANSFORM_SCALE * 2 - 1); \
|
||||||
|
\
|
||||||
|
const int tx = (six + 1) >> 1; \
|
||||||
|
const int ty = (siy + 1) >> 1; \
|
||||||
|
\
|
||||||
const int c1 = vips_bilinear_matrixi[tx][ty][0]; \
|
const int c1 = vips_bilinear_matrixi[tx][ty][0]; \
|
||||||
const int c2 = vips_bilinear_matrixi[tx][ty][1]; \
|
const int c2 = vips_bilinear_matrixi[tx][ty][1]; \
|
||||||
const int c3 = vips_bilinear_matrixi[tx][ty][2]; \
|
const int c3 = vips_bilinear_matrixi[tx][ty][2]; \
|
||||||
@ -364,12 +370,18 @@ static float vips_bilinear_matrixd
|
|||||||
*/
|
*/
|
||||||
#define BILINEAR_FLOAT( TYPE ) { \
|
#define BILINEAR_FLOAT( TYPE ) { \
|
||||||
TYPE *tq = (TYPE *) out; \
|
TYPE *tq = (TYPE *) out; \
|
||||||
\
|
|
||||||
const double c1 = vips_bilinear_matrixd[tx][ty][0]; \
|
|
||||||
const double c2 = vips_bilinear_matrixd[tx][ty][1]; \
|
|
||||||
const double c3 = vips_bilinear_matrixd[tx][ty][2]; \
|
|
||||||
const double c4 = vips_bilinear_matrixd[tx][ty][3]; \
|
|
||||||
\
|
\
|
||||||
|
float X = x - ix; \
|
||||||
|
float Y = y - iy; \
|
||||||
|
\
|
||||||
|
float Xd = 1.0 - X; \
|
||||||
|
float Yd = 1.0 - Y; \
|
||||||
|
\
|
||||||
|
float c1 = Xd * Yd; \
|
||||||
|
float c2 = X * Yd; \
|
||||||
|
float c3 = Xd * Y; \
|
||||||
|
float c4 = X * Y; \
|
||||||
|
\
|
||||||
const TYPE *tp1 = (TYPE *) p1; \
|
const TYPE *tp1 = (TYPE *) p1; \
|
||||||
const TYPE *tp2 = (TYPE *) p2; \
|
const TYPE *tp2 = (TYPE *) p2; \
|
||||||
const TYPE *tp3 = (TYPE *) p3; \
|
const TYPE *tp3 = (TYPE *) p3; \
|
||||||
@ -408,6 +420,10 @@ vips_interpolate_bilinear_interpolate( VipsInterpolate *interpolate,
|
|||||||
const int ls = IM_REGION_LSKIP( in );
|
const int ls = IM_REGION_LSKIP( in );
|
||||||
const int b = in->im->Bands;
|
const int b = in->im->Bands;
|
||||||
|
|
||||||
|
/* We want ((int)x), but the tables versions needs to find a mask
|
||||||
|
* index quickly from the residual. Calculate both.
|
||||||
|
*/
|
||||||
|
|
||||||
/* Find the mask index. We round-to-nearest, so we need to generate
|
/* Find the mask index. We round-to-nearest, so we need to generate
|
||||||
* indexes in 0 to VIPS_TRANSFORM_SCALE, 2^n + 1 values. We multiply
|
* indexes in 0 to VIPS_TRANSFORM_SCALE, 2^n + 1 values. We multiply
|
||||||
* by 2 more than we need to, add one, mask, then shift down again to
|
* by 2 more than we need to, add one, mask, then shift down again to
|
||||||
@ -416,13 +432,7 @@ vips_interpolate_bilinear_interpolate( VipsInterpolate *interpolate,
|
|||||||
const int sx = x * VIPS_TRANSFORM_SCALE * 2;
|
const int sx = x * VIPS_TRANSFORM_SCALE * 2;
|
||||||
const int sy = y * VIPS_TRANSFORM_SCALE * 2;
|
const int sy = y * VIPS_TRANSFORM_SCALE * 2;
|
||||||
|
|
||||||
const int six = sx & (VIPS_TRANSFORM_SCALE * 2 - 1);
|
/* We want ((int)x) ... avoid repeating this double -> int conversion
|
||||||
const int siy = sy & (VIPS_TRANSFORM_SCALE * 2 - 1);
|
|
||||||
|
|
||||||
const int tx = (six + 1) >> 1;
|
|
||||||
const int ty = (siy + 1) >> 1;
|
|
||||||
|
|
||||||
/* We want ((int)x) ... void repeating this double -> int conversion
|
|
||||||
* by just shifting sx down.
|
* by just shifting sx down.
|
||||||
*/
|
*/
|
||||||
const int ix = sx >> (VIPS_TRANSFORM_SHIFT + 1);
|
const int ix = sx >> (VIPS_TRANSFORM_SHIFT + 1);
|
||||||
@ -474,11 +484,6 @@ vips_interpolate_bilinear_class_init( VipsInterpolateBilinearClass *class )
|
|||||||
c3 = Xd * Y;
|
c3 = Xd * Y;
|
||||||
c4 = X * Y;
|
c4 = X * Y;
|
||||||
|
|
||||||
vips_bilinear_matrixd[x][y][0] = c1;
|
|
||||||
vips_bilinear_matrixd[x][y][1] = c2;
|
|
||||||
vips_bilinear_matrixd[x][y][2] = c3;
|
|
||||||
vips_bilinear_matrixd[x][y][3] = c4;
|
|
||||||
|
|
||||||
vips_bilinear_matrixi[x][y][0] =
|
vips_bilinear_matrixi[x][y][0] =
|
||||||
c1 * VIPS_INTERPOLATE_SCALE;
|
c1 * VIPS_INTERPOLATE_SCALE;
|
||||||
vips_bilinear_matrixi[x][y][1] =
|
vips_bilinear_matrixi[x][y][1] =
|
||||||
|
Loading…
Reference in New Issue
Block a user