small speedup to conv

remove a table for a few % speedup
This commit is contained in:
John Cupitt 2017-07-02 19:25:52 +01:00
parent 91e52f3d36
commit 7d98b39843
2 changed files with 27 additions and 37 deletions

View File

@ -47,6 +47,8 @@
* - remove Duff for a 25% speedup * - remove Duff for a 25% speedup
* 23/6/16 * 23/6/16
* - redone as a class * - redone as a class
* 2/7/17
* - remove pts for a small speedup
*/ */
/* /*
@ -110,7 +112,6 @@ typedef struct {
VipsRegion *ir; /* Input region */ VipsRegion *ir; /* Input region */
int *offsets; /* Offsets for each non-zero matrix element */ int *offsets; /* Offsets for each non-zero matrix element */
VipsPel **pts; /* Per-non-zero mask element image pointers */
int last_bpl; /* Avoid recalcing offsets, if we can */ int last_bpl; /* Avoid recalcing offsets, if we can */
} VipsConvfSequence; } VipsConvfSequence;
@ -141,12 +142,10 @@ vips_convf_start( VipsImage *out, void *a, void *b )
seq->convf = convf; seq->convf = convf;
seq->ir = NULL; seq->ir = NULL;
seq->pts = NULL;
seq->last_bpl = -1; seq->last_bpl = -1;
seq->ir = vips_region_new( in ); seq->ir = vips_region_new( in );
if( !(seq->offsets = VIPS_ARRAY( out, convf->nnz, int )) || if( !(seq->offsets = VIPS_ARRAY( out, convf->nnz, int )) ) {
!(seq->pts = VIPS_ARRAY( out, convf->nnz, VipsPel * )) ) {
vips_convf_stop( seq, in, convf ); vips_convf_stop( seq, in, convf );
return( NULL ); return( NULL );
} }
@ -155,8 +154,9 @@ vips_convf_start( VipsImage *out, void *a, void *b )
} }
#define CONV_FLOAT( ITYPE, OTYPE ) { \ #define CONV_FLOAT( ITYPE, OTYPE ) { \
ITYPE ** restrict p = (ITYPE **) seq->pts; \ ITYPE * restrict p = (ITYPE *) VIPS_REGION_ADDR( ir, le, y ); \
OTYPE * restrict q = (OTYPE *) VIPS_REGION_ADDR( or, le, y ); \ OTYPE * restrict q = (OTYPE *) VIPS_REGION_ADDR( or, le, y ); \
int * restrict offsets = seq->offsets; \
\ \
for( x = 0; x < sz; x++ ) { \ for( x = 0; x < sz; x++ ) { \
double sum; \ double sum; \
@ -164,11 +164,12 @@ vips_convf_start( VipsImage *out, void *a, void *b )
\ \
sum = 0; \ sum = 0; \
for ( i = 0; i < nnz; i++ ) \ for ( i = 0; i < nnz; i++ ) \
sum += t[i] * p[i][x]; \ sum += t[i] * p[offsets[i]]; \
\ \
sum = (sum / scale) + offset; \ sum = (sum / scale) + offset; \
\ \
q[x] = sum; \ q[x] = sum; \
p += 1; \
} \ } \
} }
@ -218,20 +219,15 @@ vips_convf_gen( REGION *or, void *vseq, void *a, void *b, gboolean *stop )
y = z / M->Xsize; y = z / M->Xsize;
seq->offsets[i] = seq->offsets[i] =
VIPS_REGION_ADDR( ir, x + le, y + to ) - (VIPS_REGION_ADDR( ir, x + le, y + to ) -
VIPS_REGION_ADDR( ir, le, to ); VIPS_REGION_ADDR( ir, le, to )) /
VIPS_IMAGE_SIZEOF_ELEMENT( ir->im );
} }
} }
VIPS_GATE_START( "vips_convf_gen: work" ); VIPS_GATE_START( "vips_convf_gen: work" );
for( y = to; y < bo; y++ ) { for( y = to; y < bo; y++ ) {
/* Init pts for this line of PELs.
*/
for( z = 0; z < nnz; z++ )
seq->pts[z] = seq->offsets[z] +
VIPS_REGION_ADDR( ir, le, y );
switch( in->BandFmt ) { switch( in->BandFmt ) {
case VIPS_FORMAT_UCHAR: case VIPS_FORMAT_UCHAR:
CONV_FLOAT( unsigned char, float ); CONV_FLOAT( unsigned char, float );

View File

@ -72,6 +72,8 @@
* 23/6/16 * 23/6/16
* - rewritten as a class * - rewritten as a class
* - new fixed-point vector path, up to 2x faster * - new fixed-point vector path, up to 2x faster
* 2/7/17
* - remove pts for a small speedup
*/ */
/* /*
@ -184,7 +186,6 @@ typedef struct {
VipsRegion *ir; /* Input region */ VipsRegion *ir; /* Input region */
int *offsets; /* Offsets for each non-zero matrix element */ int *offsets; /* Offsets for each non-zero matrix element */
VipsPel **pts; /* Per-non-zero mask element image pointers */
int last_bpl; /* Avoid recalcing offsets, if we can */ int last_bpl; /* Avoid recalcing offsets, if we can */
@ -204,7 +205,6 @@ vips_convi_stop( void *vseq, void *a, void *b )
VIPS_UNREF( seq->ir ); VIPS_UNREF( seq->ir );
VIPS_FREE( seq->offsets ); VIPS_FREE( seq->offsets );
VIPS_FREE( seq->pts );
VIPS_FREE( seq->t1 ); VIPS_FREE( seq->t1 );
VIPS_FREE( seq->t2 ); VIPS_FREE( seq->t2 );
@ -226,7 +226,6 @@ vips_convi_start( VipsImage *out, void *a, void *b )
seq->convi = convi; seq->convi = convi;
seq->ir = NULL; seq->ir = NULL;
seq->offsets = NULL; seq->offsets = NULL;
seq->pts = NULL;
seq->last_bpl = -1; seq->last_bpl = -1;
seq->t1 = NULL; seq->t1 = NULL;
seq->t2 = NULL; seq->t2 = NULL;
@ -236,11 +235,7 @@ vips_convi_start( VipsImage *out, void *a, void *b )
/* C mode. /* C mode.
*/ */
if( convi->nnz ) { if( convi->nnz ) {
seq->offsets = VIPS_ARRAY( NULL, convi->nnz, int ); if( !(seq->offsets = VIPS_ARRAY( NULL, convi->nnz, int )) ) {
seq->pts = VIPS_ARRAY( NULL, convi->nnz, VipsPel * );
if( !seq->offsets ||
!seq->pts ) {
vips_convi_stop( seq, in, convi ); vips_convi_stop( seq, in, convi );
return( NULL ); return( NULL );
} }
@ -568,8 +563,9 @@ vips_convi_gen_vector( VipsRegion *or,
/* INT inner loops. /* INT inner loops.
*/ */
#define CONV_INT( TYPE, CLIP ) { \ #define CONV_INT( TYPE, CLIP ) { \
TYPE ** restrict p = (TYPE **) seq->pts; \ TYPE * restrict p = (TYPE *) VIPS_REGION_ADDR( ir, le, y ); \
TYPE * restrict q = (TYPE *) VIPS_REGION_ADDR( or, le, y ); \ TYPE * restrict q = (TYPE *) VIPS_REGION_ADDR( or, le, y ); \
int * restrict offsets = seq->offsets; \
\ \
for( x = 0; x < sz; x++ ) { \ for( x = 0; x < sz; x++ ) { \
int sum; \ int sum; \
@ -577,21 +573,23 @@ vips_convi_gen_vector( VipsRegion *or,
\ \
sum = 0; \ sum = 0; \
for ( i = 0; i < nnz; i++ ) \ for ( i = 0; i < nnz; i++ ) \
sum += t[i] * p[i][x]; \ sum += t[i] * p[offsets[i]]; \
\ \
sum = ((sum + rounding) / scale) + offset; \ sum = ((sum + rounding) / scale) + offset; \
\ \
CLIP; \ CLIP; \
\ \
q[x] = sum; \ q[x] = sum; \
p += 1; \
} \ } \
} }
/* FLOAT inner loops. /* FLOAT inner loops.
*/ */
#define CONV_FLOAT( TYPE ) { \ #define CONV_FLOAT( TYPE ) { \
TYPE ** restrict p = (TYPE **) seq->pts; \ TYPE * restrict p = (TYPE *) VIPS_REGION_ADDR( ir, le, y ); \
TYPE * restrict q = (TYPE *) VIPS_REGION_ADDR( or, le, y ); \ TYPE * restrict q = (TYPE *) VIPS_REGION_ADDR( or, le, y ); \
int * restrict offsets = seq->offsets; \
\ \
for( x = 0; x < sz; x++ ) { \ for( x = 0; x < sz; x++ ) { \
double sum; \ double sum; \
@ -599,11 +597,12 @@ vips_convi_gen_vector( VipsRegion *or,
\ \
sum = 0; \ sum = 0; \
for ( i = 0; i < nnz; i++ ) \ for ( i = 0; i < nnz; i++ ) \
sum += t[i] * p[i][x]; \ sum += t[i] * p[offsets[i]]; \
\ \
sum = (sum / scale) + offset; \ sum = (sum / scale) + offset; \
\ \
q[x] = sum; \ q[x] = sum; \
p += 1; \
} \ } \
} }
@ -691,20 +690,15 @@ vips_convi_gen( VipsRegion *or,
y = z / M->Xsize; y = z / M->Xsize;
seq->offsets[i] = seq->offsets[i] =
VIPS_REGION_ADDR( ir, x + le, y + to ) - (VIPS_REGION_ADDR( ir, x + le, y + to ) -
VIPS_REGION_ADDR( ir, le, to ); VIPS_REGION_ADDR( ir, le, to )) /
VIPS_IMAGE_SIZEOF_ELEMENT( ir->im );
} }
} }
VIPS_GATE_START( "vips_convi_gen: work" ); VIPS_GATE_START( "vips_convi_gen: work" );
for( y = to; y < bo; y++ ) { for( y = to; y < bo; y++ ) {
/* Init pts for this line of PELs.
*/
for( z = 0; z < nnz; z++ )
seq->pts[z] = seq->offsets[z] +
VIPS_REGION_ADDR( ir, le, y );
switch( in->BandFmt ) { switch( in->BandFmt ) {
case VIPS_FORMAT_UCHAR: case VIPS_FORMAT_UCHAR:
CONV_INT( unsigned char, CLIP_UCHAR( sum ) ); CONV_INT( unsigned char, CLIP_UCHAR( sum ) );