This commit is contained in:
John Cupitt 2009-04-05 10:14:57 +00:00
parent bfac02d427
commit 90ecad14fc
3 changed files with 44 additions and 15 deletions

View File

@ -1,5 +1,6 @@
3/4/09 started 7.19.0 3/4/09 started 7.19.0
- version bump - version bump
- tiny conv speedup
25/3/09 started 7.18.0 25/3/09 started 7.18.0
- revised version numbers - revised version numbers

15
TODO
View File

@ -1,8 +1,23 @@
- restrict in im_conv
before
$ time vips im_conv wtc.v t.v mask.con
vips warning: im_conv: 59722 overflows and 675925 underflows detected
real 0m13.303s
user 0m11.013s
sys 0m1.120s
WONTFIX for 7.18 WONTFIX for 7.18
================ ================
- try adding "restrict" to im_conv? other interpolators? - try adding "restrict" to im_conv? other interpolators?
- can we use conv_sep to speed up the memuse benchmarks?
- move im_shrink & friends to resample? - move im_shrink & friends to resample?
match_linear, match_linear_search? match_linear, match_linear_search?

View File

@ -64,6 +64,9 @@
* 12/5/08 * 12/5/08
* - int rounding was +1 too much, argh * - int rounding was +1 too much, argh
* - only rebuild the buffer offsets if bpl changes * - only rebuild the buffer offsets if bpl changes
* 5/4/09
* - tiny speedup ... change ++ to +=1 in inner loop
* - add restrict, though it doesn't seem to help gcc
*/ */
/* /*
@ -117,7 +120,7 @@ typedef struct {
INTMASK *mask; /* Copy of mask arg */ INTMASK *mask; /* Copy of mask arg */
int nnz; /* Number of non-zero mask elements */ int nnz; /* Number of non-zero mask elements */
int *coeff; /* Array of non-zero mask coefficients */ int * restrict coeff; /* Array of non-zero mask coefficients */
int underflow; /* Global underflow/overflow counts */ int underflow; /* Global underflow/overflow counts */
int overflow; int overflow;
@ -199,7 +202,7 @@ typedef struct {
REGION *ir; /* Input region */ REGION *ir; /* Input region */
int *offsets; /* Offsets for each non-zero matrix element */ int *offsets; /* Offsets for each non-zero matrix element */
PEL **pts; /* Per-non-zero mask element image pointers */ PEL * restrict * restrict pts; /* Per-non-zero mask element pointers */
int underflow; /* Underflow/overflow counts */ int underflow; /* Underflow/overflow counts */
int overflow; int overflow;
@ -259,7 +262,11 @@ conv_start( IMAGE *out, void *a, void *b )
return( seq ); return( seq );
} }
#define INNER sum += *t++ * (*p++)[x] #define INNER { \
sum += *t * (*p)[x]; \
t += 1; \
p += 1; \
}
/* INT and FLOAT inner loops. /* INT and FLOAT inner loops.
*/ */
@ -267,11 +274,14 @@ conv_start( IMAGE *out, void *a, void *b )
TYPE *q = (TYPE *) IM_REGION_ADDR( or, le, y ); \ TYPE *q = (TYPE *) IM_REGION_ADDR( or, le, y ); \
\ \
for( x = 0; x < sz; x++ ) { \ for( x = 0; x < sz; x++ ) { \
int sum = 0; \ int * restrict t; \
int *t = conv->coeff; \ TYPE ** restrict p; \
TYPE **p = (TYPE **) seq->pts; \ int sum; \
\ \
z = 0; \ z = 0; \
sum = 0; \
t = conv->coeff; \
p = (TYPE **) seq->pts; \
IM_UNROLL( conv->nnz, INNER ); \ IM_UNROLL( conv->nnz, INNER ); \
\ \
sum = ((sum + rounding) / mask->scale) + mask->offset; \ sum = ((sum + rounding) / mask->scale) + mask->offset; \
@ -286,11 +296,14 @@ conv_start( IMAGE *out, void *a, void *b )
TYPE *q = (TYPE *) IM_REGION_ADDR( or, le, y ); \ TYPE *q = (TYPE *) IM_REGION_ADDR( or, le, y ); \
\ \
for( x = 0; x < sz; x++ ) { \ for( x = 0; x < sz; x++ ) { \
double sum = 0; \ int * restrict t; \
int *t = conv->coeff; \ TYPE ** restrict p; \
TYPE **p = (TYPE **) seq->pts; \ double sum; \
\ \
z = 0; \ z = 0; \
sum = 0; \
t = conv->coeff; \
p = (TYPE **) seq->pts; \
IM_UNROLL( conv->nnz, INNER ); \ IM_UNROLL( conv->nnz, INNER ); \
\ \
sum = (sum / mask->scale) + mask->offset; \ sum = (sum / mask->scale) + mask->offset; \
@ -405,13 +418,13 @@ im_conv_raw( IMAGE *in, IMAGE *out, INTMASK *mask )
/* Check parameters. /* Check parameters.
*/ */
if( !in || in->Coding != IM_CODING_NONE || im_iscomplex( in ) ) { if( !in || in->Coding != IM_CODING_NONE || im_iscomplex( in ) ) {
im_errormsg( "im_conv: input non-complex uncoded please!"); im_error( "im_conv", "%s", _( "non-complex uncoded only" ) );
return( -1 ); return( -1 );
} }
if( !mask || mask->xsize > 1000 || mask->ysize > 1000 || if( !mask || mask->xsize > 1000 || mask->ysize > 1000 ||
mask->xsize <= 0 || mask->ysize <= 0 || !mask->coeff || mask->xsize <= 0 || mask->ysize <= 0 || !mask->coeff ||
mask->scale == 0 ) { mask->scale == 0 ) {
im_errormsg( "im_conv: nonsense mask parameters" ); im_error( "im_conv", "%s", _( "nonsense mask parameters" ) );
return( -1 ); return( -1 );
} }
if( im_piocheck( in, out ) ) if( im_piocheck( in, out ) )
@ -427,7 +440,7 @@ im_conv_raw( IMAGE *in, IMAGE *out, INTMASK *mask )
out->Xsize -= mask->xsize - 1; out->Xsize -= mask->xsize - 1;
out->Ysize -= mask->ysize - 1; out->Ysize -= mask->ysize - 1;
if( out->Xsize <= 0 || out->Ysize <= 0 ) { if( out->Xsize <= 0 || out->Ysize <= 0 ) {
im_errormsg( "im_conv: image too small for mask" ); im_error( "im_conv", "%s", _( "image too small for mask" ) );
return( -1 ); return( -1 );
} }