small speedup to conv
remove a table for a few % speedup
This commit is contained in:
parent
91e52f3d36
commit
7d98b39843
@ -47,6 +47,8 @@
|
|||||||
* - remove Duff for a 25% speedup
|
* - remove Duff for a 25% speedup
|
||||||
* 23/6/16
|
* 23/6/16
|
||||||
* - redone as a class
|
* - redone as a class
|
||||||
|
* 2/7/17
|
||||||
|
* - remove pts for a small speedup
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -110,7 +112,6 @@ typedef struct {
|
|||||||
VipsRegion *ir; /* Input region */
|
VipsRegion *ir; /* Input region */
|
||||||
|
|
||||||
int *offsets; /* Offsets for each non-zero matrix element */
|
int *offsets; /* Offsets for each non-zero matrix element */
|
||||||
VipsPel **pts; /* Per-non-zero mask element image pointers */
|
|
||||||
|
|
||||||
int last_bpl; /* Avoid recalcing offsets, if we can */
|
int last_bpl; /* Avoid recalcing offsets, if we can */
|
||||||
} VipsConvfSequence;
|
} VipsConvfSequence;
|
||||||
@ -141,12 +142,10 @@ vips_convf_start( VipsImage *out, void *a, void *b )
|
|||||||
|
|
||||||
seq->convf = convf;
|
seq->convf = convf;
|
||||||
seq->ir = NULL;
|
seq->ir = NULL;
|
||||||
seq->pts = NULL;
|
|
||||||
seq->last_bpl = -1;
|
seq->last_bpl = -1;
|
||||||
|
|
||||||
seq->ir = vips_region_new( in );
|
seq->ir = vips_region_new( in );
|
||||||
if( !(seq->offsets = VIPS_ARRAY( out, convf->nnz, int )) ||
|
if( !(seq->offsets = VIPS_ARRAY( out, convf->nnz, int )) ) {
|
||||||
!(seq->pts = VIPS_ARRAY( out, convf->nnz, VipsPel * )) ) {
|
|
||||||
vips_convf_stop( seq, in, convf );
|
vips_convf_stop( seq, in, convf );
|
||||||
return( NULL );
|
return( NULL );
|
||||||
}
|
}
|
||||||
@ -155,8 +154,9 @@ vips_convf_start( VipsImage *out, void *a, void *b )
|
|||||||
}
|
}
|
||||||
|
|
||||||
#define CONV_FLOAT( ITYPE, OTYPE ) { \
|
#define CONV_FLOAT( ITYPE, OTYPE ) { \
|
||||||
ITYPE ** restrict p = (ITYPE **) seq->pts; \
|
ITYPE * restrict p = (ITYPE *) VIPS_REGION_ADDR( ir, le, y ); \
|
||||||
OTYPE * restrict q = (OTYPE *) VIPS_REGION_ADDR( or, le, y ); \
|
OTYPE * restrict q = (OTYPE *) VIPS_REGION_ADDR( or, le, y ); \
|
||||||
|
int * restrict offsets = seq->offsets; \
|
||||||
\
|
\
|
||||||
for( x = 0; x < sz; x++ ) { \
|
for( x = 0; x < sz; x++ ) { \
|
||||||
double sum; \
|
double sum; \
|
||||||
@ -164,11 +164,12 @@ vips_convf_start( VipsImage *out, void *a, void *b )
|
|||||||
\
|
\
|
||||||
sum = 0; \
|
sum = 0; \
|
||||||
for ( i = 0; i < nnz; i++ ) \
|
for ( i = 0; i < nnz; i++ ) \
|
||||||
sum += t[i] * p[i][x]; \
|
sum += t[i] * p[offsets[i]]; \
|
||||||
\
|
\
|
||||||
sum = (sum / scale) + offset; \
|
sum = (sum / scale) + offset; \
|
||||||
\
|
\
|
||||||
q[x] = sum; \
|
q[x] = sum; \
|
||||||
|
p += 1; \
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -218,20 +219,15 @@ vips_convf_gen( REGION *or, void *vseq, void *a, void *b, gboolean *stop )
|
|||||||
y = z / M->Xsize;
|
y = z / M->Xsize;
|
||||||
|
|
||||||
seq->offsets[i] =
|
seq->offsets[i] =
|
||||||
VIPS_REGION_ADDR( ir, x + le, y + to ) -
|
(VIPS_REGION_ADDR( ir, x + le, y + to ) -
|
||||||
VIPS_REGION_ADDR( ir, le, to );
|
VIPS_REGION_ADDR( ir, le, to )) /
|
||||||
|
VIPS_IMAGE_SIZEOF_ELEMENT( ir->im );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
VIPS_GATE_START( "vips_convf_gen: work" );
|
VIPS_GATE_START( "vips_convf_gen: work" );
|
||||||
|
|
||||||
for( y = to; y < bo; y++ ) {
|
for( y = to; y < bo; y++ ) {
|
||||||
/* Init pts for this line of PELs.
|
|
||||||
*/
|
|
||||||
for( z = 0; z < nnz; z++ )
|
|
||||||
seq->pts[z] = seq->offsets[z] +
|
|
||||||
VIPS_REGION_ADDR( ir, le, y );
|
|
||||||
|
|
||||||
switch( in->BandFmt ) {
|
switch( in->BandFmt ) {
|
||||||
case VIPS_FORMAT_UCHAR:
|
case VIPS_FORMAT_UCHAR:
|
||||||
CONV_FLOAT( unsigned char, float );
|
CONV_FLOAT( unsigned char, float );
|
||||||
|
@ -72,6 +72,8 @@
|
|||||||
* 23/6/16
|
* 23/6/16
|
||||||
* - rewritten as a class
|
* - rewritten as a class
|
||||||
* - new fixed-point vector path, up to 2x faster
|
* - new fixed-point vector path, up to 2x faster
|
||||||
|
* 2/7/17
|
||||||
|
* - remove pts for a small speedup
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -184,7 +186,6 @@ typedef struct {
|
|||||||
VipsRegion *ir; /* Input region */
|
VipsRegion *ir; /* Input region */
|
||||||
|
|
||||||
int *offsets; /* Offsets for each non-zero matrix element */
|
int *offsets; /* Offsets for each non-zero matrix element */
|
||||||
VipsPel **pts; /* Per-non-zero mask element image pointers */
|
|
||||||
|
|
||||||
int last_bpl; /* Avoid recalcing offsets, if we can */
|
int last_bpl; /* Avoid recalcing offsets, if we can */
|
||||||
|
|
||||||
@ -204,7 +205,6 @@ vips_convi_stop( void *vseq, void *a, void *b )
|
|||||||
|
|
||||||
VIPS_UNREF( seq->ir );
|
VIPS_UNREF( seq->ir );
|
||||||
VIPS_FREE( seq->offsets );
|
VIPS_FREE( seq->offsets );
|
||||||
VIPS_FREE( seq->pts );
|
|
||||||
VIPS_FREE( seq->t1 );
|
VIPS_FREE( seq->t1 );
|
||||||
VIPS_FREE( seq->t2 );
|
VIPS_FREE( seq->t2 );
|
||||||
|
|
||||||
@ -226,7 +226,6 @@ vips_convi_start( VipsImage *out, void *a, void *b )
|
|||||||
seq->convi = convi;
|
seq->convi = convi;
|
||||||
seq->ir = NULL;
|
seq->ir = NULL;
|
||||||
seq->offsets = NULL;
|
seq->offsets = NULL;
|
||||||
seq->pts = NULL;
|
|
||||||
seq->last_bpl = -1;
|
seq->last_bpl = -1;
|
||||||
seq->t1 = NULL;
|
seq->t1 = NULL;
|
||||||
seq->t2 = NULL;
|
seq->t2 = NULL;
|
||||||
@ -236,11 +235,7 @@ vips_convi_start( VipsImage *out, void *a, void *b )
|
|||||||
/* C mode.
|
/* C mode.
|
||||||
*/
|
*/
|
||||||
if( convi->nnz ) {
|
if( convi->nnz ) {
|
||||||
seq->offsets = VIPS_ARRAY( NULL, convi->nnz, int );
|
if( !(seq->offsets = VIPS_ARRAY( NULL, convi->nnz, int )) ) {
|
||||||
seq->pts = VIPS_ARRAY( NULL, convi->nnz, VipsPel * );
|
|
||||||
|
|
||||||
if( !seq->offsets ||
|
|
||||||
!seq->pts ) {
|
|
||||||
vips_convi_stop( seq, in, convi );
|
vips_convi_stop( seq, in, convi );
|
||||||
return( NULL );
|
return( NULL );
|
||||||
}
|
}
|
||||||
@ -568,8 +563,9 @@ vips_convi_gen_vector( VipsRegion *or,
|
|||||||
/* INT inner loops.
|
/* INT inner loops.
|
||||||
*/
|
*/
|
||||||
#define CONV_INT( TYPE, CLIP ) { \
|
#define CONV_INT( TYPE, CLIP ) { \
|
||||||
TYPE ** restrict p = (TYPE **) seq->pts; \
|
TYPE * restrict p = (TYPE *) VIPS_REGION_ADDR( ir, le, y ); \
|
||||||
TYPE * restrict q = (TYPE *) VIPS_REGION_ADDR( or, le, y ); \
|
TYPE * restrict q = (TYPE *) VIPS_REGION_ADDR( or, le, y ); \
|
||||||
|
int * restrict offsets = seq->offsets; \
|
||||||
\
|
\
|
||||||
for( x = 0; x < sz; x++ ) { \
|
for( x = 0; x < sz; x++ ) { \
|
||||||
int sum; \
|
int sum; \
|
||||||
@ -577,21 +573,23 @@ vips_convi_gen_vector( VipsRegion *or,
|
|||||||
\
|
\
|
||||||
sum = 0; \
|
sum = 0; \
|
||||||
for ( i = 0; i < nnz; i++ ) \
|
for ( i = 0; i < nnz; i++ ) \
|
||||||
sum += t[i] * p[i][x]; \
|
sum += t[i] * p[offsets[i]]; \
|
||||||
\
|
\
|
||||||
sum = ((sum + rounding) / scale) + offset; \
|
sum = ((sum + rounding) / scale) + offset; \
|
||||||
\
|
\
|
||||||
CLIP; \
|
CLIP; \
|
||||||
\
|
\
|
||||||
q[x] = sum; \
|
q[x] = sum; \
|
||||||
|
p += 1; \
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
/* FLOAT inner loops.
|
/* FLOAT inner loops.
|
||||||
*/
|
*/
|
||||||
#define CONV_FLOAT( TYPE ) { \
|
#define CONV_FLOAT( TYPE ) { \
|
||||||
TYPE ** restrict p = (TYPE **) seq->pts; \
|
TYPE * restrict p = (TYPE *) VIPS_REGION_ADDR( ir, le, y ); \
|
||||||
TYPE * restrict q = (TYPE *) VIPS_REGION_ADDR( or, le, y ); \
|
TYPE * restrict q = (TYPE *) VIPS_REGION_ADDR( or, le, y ); \
|
||||||
|
int * restrict offsets = seq->offsets; \
|
||||||
\
|
\
|
||||||
for( x = 0; x < sz; x++ ) { \
|
for( x = 0; x < sz; x++ ) { \
|
||||||
double sum; \
|
double sum; \
|
||||||
@ -599,11 +597,12 @@ vips_convi_gen_vector( VipsRegion *or,
|
|||||||
\
|
\
|
||||||
sum = 0; \
|
sum = 0; \
|
||||||
for ( i = 0; i < nnz; i++ ) \
|
for ( i = 0; i < nnz; i++ ) \
|
||||||
sum += t[i] * p[i][x]; \
|
sum += t[i] * p[offsets[i]]; \
|
||||||
\
|
\
|
||||||
sum = (sum / scale) + offset; \
|
sum = (sum / scale) + offset; \
|
||||||
\
|
\
|
||||||
q[x] = sum; \
|
q[x] = sum; \
|
||||||
|
p += 1; \
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -691,20 +690,15 @@ vips_convi_gen( VipsRegion *or,
|
|||||||
y = z / M->Xsize;
|
y = z / M->Xsize;
|
||||||
|
|
||||||
seq->offsets[i] =
|
seq->offsets[i] =
|
||||||
VIPS_REGION_ADDR( ir, x + le, y + to ) -
|
(VIPS_REGION_ADDR( ir, x + le, y + to ) -
|
||||||
VIPS_REGION_ADDR( ir, le, to );
|
VIPS_REGION_ADDR( ir, le, to )) /
|
||||||
|
VIPS_IMAGE_SIZEOF_ELEMENT( ir->im );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
VIPS_GATE_START( "vips_convi_gen: work" );
|
VIPS_GATE_START( "vips_convi_gen: work" );
|
||||||
|
|
||||||
for( y = to; y < bo; y++ ) {
|
for( y = to; y < bo; y++ ) {
|
||||||
/* Init pts for this line of PELs.
|
|
||||||
*/
|
|
||||||
for( z = 0; z < nnz; z++ )
|
|
||||||
seq->pts[z] = seq->offsets[z] +
|
|
||||||
VIPS_REGION_ADDR( ir, le, y );
|
|
||||||
|
|
||||||
switch( in->BandFmt ) {
|
switch( in->BandFmt ) {
|
||||||
case VIPS_FORMAT_UCHAR:
|
case VIPS_FORMAT_UCHAR:
|
||||||
CONV_INT( unsigned char, CLIP_UCHAR( sum ) );
|
CONV_INT( unsigned char, CLIP_UCHAR( sum ) );
|
||||||
|
Loading…
Reference in New Issue
Block a user