small speedup to conv
remove a table for a few % speedup
This commit is contained in:
parent
91e52f3d36
commit
7d98b39843
@ -47,6 +47,8 @@
|
||||
* - remove Duff for a 25% speedup
|
||||
* 23/6/16
|
||||
* - redone as a class
|
||||
* 2/7/17
|
||||
* - remove pts for a small speedup
|
||||
*/
|
||||
|
||||
/*
|
||||
@ -110,7 +112,6 @@ typedef struct {
|
||||
VipsRegion *ir; /* Input region */
|
||||
|
||||
int *offsets; /* Offsets for each non-zero matrix element */
|
||||
VipsPel **pts; /* Per-non-zero mask element image pointers */
|
||||
|
||||
int last_bpl; /* Avoid recalcing offsets, if we can */
|
||||
} VipsConvfSequence;
|
||||
@ -141,12 +142,10 @@ vips_convf_start( VipsImage *out, void *a, void *b )
|
||||
|
||||
seq->convf = convf;
|
||||
seq->ir = NULL;
|
||||
seq->pts = NULL;
|
||||
seq->last_bpl = -1;
|
||||
|
||||
seq->ir = vips_region_new( in );
|
||||
if( !(seq->offsets = VIPS_ARRAY( out, convf->nnz, int )) ||
|
||||
!(seq->pts = VIPS_ARRAY( out, convf->nnz, VipsPel * )) ) {
|
||||
if( !(seq->offsets = VIPS_ARRAY( out, convf->nnz, int )) ) {
|
||||
vips_convf_stop( seq, in, convf );
|
||||
return( NULL );
|
||||
}
|
||||
@ -155,8 +154,9 @@ vips_convf_start( VipsImage *out, void *a, void *b )
|
||||
}
|
||||
|
||||
#define CONV_FLOAT( ITYPE, OTYPE ) { \
|
||||
ITYPE ** restrict p = (ITYPE **) seq->pts; \
|
||||
ITYPE * restrict p = (ITYPE *) VIPS_REGION_ADDR( ir, le, y ); \
|
||||
OTYPE * restrict q = (OTYPE *) VIPS_REGION_ADDR( or, le, y ); \
|
||||
int * restrict offsets = seq->offsets; \
|
||||
\
|
||||
for( x = 0; x < sz; x++ ) { \
|
||||
double sum; \
|
||||
@ -164,13 +164,14 @@ vips_convf_start( VipsImage *out, void *a, void *b )
|
||||
\
|
||||
sum = 0; \
|
||||
for ( i = 0; i < nnz; i++ ) \
|
||||
sum += t[i] * p[i][x]; \
|
||||
sum += t[i] * p[offsets[i]]; \
|
||||
\
|
||||
sum = (sum / scale) + offset; \
|
||||
\
|
||||
q[x] = sum; \
|
||||
} \
|
||||
}
|
||||
q[x] = sum; \
|
||||
p += 1; \
|
||||
} \
|
||||
}
|
||||
|
||||
/* Convolve!
|
||||
*/
|
||||
@ -218,20 +219,15 @@ vips_convf_gen( REGION *or, void *vseq, void *a, void *b, gboolean *stop )
|
||||
y = z / M->Xsize;
|
||||
|
||||
seq->offsets[i] =
|
||||
VIPS_REGION_ADDR( ir, x + le, y + to ) -
|
||||
VIPS_REGION_ADDR( ir, le, to );
|
||||
(VIPS_REGION_ADDR( ir, x + le, y + to ) -
|
||||
VIPS_REGION_ADDR( ir, le, to )) /
|
||||
VIPS_IMAGE_SIZEOF_ELEMENT( ir->im );
|
||||
}
|
||||
}
|
||||
|
||||
VIPS_GATE_START( "vips_convf_gen: work" );
|
||||
|
||||
for( y = to; y < bo; y++ ) {
|
||||
/* Init pts for this line of PELs.
|
||||
*/
|
||||
for( z = 0; z < nnz; z++ )
|
||||
seq->pts[z] = seq->offsets[z] +
|
||||
VIPS_REGION_ADDR( ir, le, y );
|
||||
|
||||
switch( in->BandFmt ) {
|
||||
case VIPS_FORMAT_UCHAR:
|
||||
CONV_FLOAT( unsigned char, float );
|
||||
|
@ -72,6 +72,8 @@
|
||||
* 23/6/16
|
||||
* - rewritten as a class
|
||||
* - new fixed-point vector path, up to 2x faster
|
||||
* 2/7/17
|
||||
* - remove pts for a small speedup
|
||||
*/
|
||||
|
||||
/*
|
||||
@ -184,7 +186,6 @@ typedef struct {
|
||||
VipsRegion *ir; /* Input region */
|
||||
|
||||
int *offsets; /* Offsets for each non-zero matrix element */
|
||||
VipsPel **pts; /* Per-non-zero mask element image pointers */
|
||||
|
||||
int last_bpl; /* Avoid recalcing offsets, if we can */
|
||||
|
||||
@ -204,7 +205,6 @@ vips_convi_stop( void *vseq, void *a, void *b )
|
||||
|
||||
VIPS_UNREF( seq->ir );
|
||||
VIPS_FREE( seq->offsets );
|
||||
VIPS_FREE( seq->pts );
|
||||
VIPS_FREE( seq->t1 );
|
||||
VIPS_FREE( seq->t2 );
|
||||
|
||||
@ -226,7 +226,6 @@ vips_convi_start( VipsImage *out, void *a, void *b )
|
||||
seq->convi = convi;
|
||||
seq->ir = NULL;
|
||||
seq->offsets = NULL;
|
||||
seq->pts = NULL;
|
||||
seq->last_bpl = -1;
|
||||
seq->t1 = NULL;
|
||||
seq->t2 = NULL;
|
||||
@ -236,11 +235,7 @@ vips_convi_start( VipsImage *out, void *a, void *b )
|
||||
/* C mode.
|
||||
*/
|
||||
if( convi->nnz ) {
|
||||
seq->offsets = VIPS_ARRAY( NULL, convi->nnz, int );
|
||||
seq->pts = VIPS_ARRAY( NULL, convi->nnz, VipsPel * );
|
||||
|
||||
if( !seq->offsets ||
|
||||
!seq->pts ) {
|
||||
if( !(seq->offsets = VIPS_ARRAY( NULL, convi->nnz, int )) ) {
|
||||
vips_convi_stop( seq, in, convi );
|
||||
return( NULL );
|
||||
}
|
||||
@ -568,8 +563,9 @@ vips_convi_gen_vector( VipsRegion *or,
|
||||
/* INT inner loops.
|
||||
*/
|
||||
#define CONV_INT( TYPE, CLIP ) { \
|
||||
TYPE ** restrict p = (TYPE **) seq->pts; \
|
||||
TYPE * restrict p = (TYPE *) VIPS_REGION_ADDR( ir, le, y ); \
|
||||
TYPE * restrict q = (TYPE *) VIPS_REGION_ADDR( or, le, y ); \
|
||||
int * restrict offsets = seq->offsets; \
|
||||
\
|
||||
for( x = 0; x < sz; x++ ) { \
|
||||
int sum; \
|
||||
@ -577,21 +573,23 @@ vips_convi_gen_vector( VipsRegion *or,
|
||||
\
|
||||
sum = 0; \
|
||||
for ( i = 0; i < nnz; i++ ) \
|
||||
sum += t[i] * p[i][x]; \
|
||||
sum += t[i] * p[offsets[i]]; \
|
||||
\
|
||||
sum = ((sum + rounding) / scale) + offset; \
|
||||
\
|
||||
CLIP; \
|
||||
\
|
||||
q[x] = sum; \
|
||||
p += 1; \
|
||||
} \
|
||||
}
|
||||
|
||||
/* FLOAT inner loops.
|
||||
*/
|
||||
#define CONV_FLOAT( TYPE ) { \
|
||||
TYPE ** restrict p = (TYPE **) seq->pts; \
|
||||
TYPE * restrict p = (TYPE *) VIPS_REGION_ADDR( ir, le, y ); \
|
||||
TYPE * restrict q = (TYPE *) VIPS_REGION_ADDR( or, le, y ); \
|
||||
int * restrict offsets = seq->offsets; \
|
||||
\
|
||||
for( x = 0; x < sz; x++ ) { \
|
||||
double sum; \
|
||||
@ -599,11 +597,12 @@ vips_convi_gen_vector( VipsRegion *or,
|
||||
\
|
||||
sum = 0; \
|
||||
for ( i = 0; i < nnz; i++ ) \
|
||||
sum += t[i] * p[i][x]; \
|
||||
sum += t[i] * p[offsets[i]]; \
|
||||
\
|
||||
sum = (sum / scale) + offset; \
|
||||
\
|
||||
q[x] = sum; \
|
||||
p += 1; \
|
||||
} \
|
||||
}
|
||||
|
||||
@ -691,20 +690,15 @@ vips_convi_gen( VipsRegion *or,
|
||||
y = z / M->Xsize;
|
||||
|
||||
seq->offsets[i] =
|
||||
VIPS_REGION_ADDR( ir, x + le, y + to ) -
|
||||
VIPS_REGION_ADDR( ir, le, to );
|
||||
(VIPS_REGION_ADDR( ir, x + le, y + to ) -
|
||||
VIPS_REGION_ADDR( ir, le, to )) /
|
||||
VIPS_IMAGE_SIZEOF_ELEMENT( ir->im );
|
||||
}
|
||||
}
|
||||
|
||||
VIPS_GATE_START( "vips_convi_gen: work" );
|
||||
|
||||
for( y = to; y < bo; y++ ) {
|
||||
/* Init pts for this line of PELs.
|
||||
*/
|
||||
for( z = 0; z < nnz; z++ )
|
||||
seq->pts[z] = seq->offsets[z] +
|
||||
VIPS_REGION_ADDR( ir, le, y );
|
||||
|
||||
switch( in->BandFmt ) {
|
||||
case VIPS_FORMAT_UCHAR:
|
||||
CONV_INT( unsigned char, CLIP_UCHAR( sum ) );
|
||||
|
Loading…
Reference in New Issue
Block a user