This commit is contained in:
John Cupitt 2009-04-05 10:14:57 +00:00
parent bfac02d427
commit 90ecad14fc
3 changed files with 44 additions and 15 deletions

View File

@ -1,5 +1,6 @@
3/4/09 started 7.19.0
- version bump
- tiny conv speedup
25/3/09 started 7.18.0
- revised version numbers

15
TODO
View File

@ -1,8 +1,23 @@
- restrict in im_conv
before
$ time vips im_conv wtc.v t.v mask.con
vips warning: im_conv: 59722 overflows and 675925 underflows detected
real 0m13.303s
user 0m11.013s
sys 0m1.120s
WONTFIX for 7.18
================
- try adding "restrict" to im_conv? other interpolators?
- can we use conv_sep to speed up the memuse benchmarks?
- move im_shrink & friends to resample?
match_linear, match_linear_search?

View File

@ -64,6 +64,9 @@
* 12/5/08
* - int rounding was +1 too much, argh
* - only rebuild the buffer offsets if bpl changes
* 5/4/09
* - tiny speedup ... change ++ to +=1 in inner loop
* - add restrict, though it doesn't seem to help gcc
*/
/*
@ -114,12 +117,12 @@
typedef struct {
IMAGE *in;
IMAGE *out;
INTMASK *mask; /* Copy of mask arg */
INTMASK *mask; /* Copy of mask arg */
int nnz; /* Number of non-zero mask elements */
int *coeff; /* Array of non-zero mask coefficients */
int nnz; /* Number of non-zero mask elements */
int * restrict coeff; /* Array of non-zero mask coefficients */
int underflow; /* Global underflow/overflow counts */
int underflow; /* Global underflow/overflow counts */
int overflow;
} Conv;
@ -199,7 +202,7 @@ typedef struct {
REGION *ir; /* Input region */
int *offsets; /* Offsets for each non-zero matrix element */
PEL **pts; /* Per-non-zero mask element image pointers */
PEL * restrict * restrict pts; /* Per-non-zero mask element pointers */
int underflow; /* Underflow/overflow counts */
int overflow;
@ -259,7 +262,11 @@ conv_start( IMAGE *out, void *a, void *b )
return( seq );
}
#define INNER sum += *t++ * (*p++)[x]
#define INNER { \
sum += *t * (*p)[x]; \
t += 1; \
p += 1; \
}
/* INT and FLOAT inner loops.
*/
@ -267,11 +274,14 @@ conv_start( IMAGE *out, void *a, void *b )
TYPE *q = (TYPE *) IM_REGION_ADDR( or, le, y ); \
\
for( x = 0; x < sz; x++ ) { \
int sum = 0; \
int *t = conv->coeff; \
TYPE **p = (TYPE **) seq->pts; \
int * restrict t; \
TYPE ** restrict p; \
int sum; \
\
z = 0; \
sum = 0; \
t = conv->coeff; \
p = (TYPE **) seq->pts; \
IM_UNROLL( conv->nnz, INNER ); \
\
sum = ((sum + rounding) / mask->scale) + mask->offset; \
@ -286,11 +296,14 @@ conv_start( IMAGE *out, void *a, void *b )
TYPE *q = (TYPE *) IM_REGION_ADDR( or, le, y ); \
\
for( x = 0; x < sz; x++ ) { \
double sum = 0; \
int *t = conv->coeff; \
TYPE **p = (TYPE **) seq->pts; \
int * restrict t; \
TYPE ** restrict p; \
double sum; \
\
z = 0; \
sum = 0; \
t = conv->coeff; \
p = (TYPE **) seq->pts; \
IM_UNROLL( conv->nnz, INNER ); \
\
sum = (sum / mask->scale) + mask->offset; \
@ -405,13 +418,13 @@ im_conv_raw( IMAGE *in, IMAGE *out, INTMASK *mask )
/* Check parameters.
*/
if( !in || in->Coding != IM_CODING_NONE || im_iscomplex( in ) ) {
im_errormsg( "im_conv: input non-complex uncoded please!");
im_error( "im_conv", "%s", _( "non-complex uncoded only" ) );
return( -1 );
}
if( !mask || mask->xsize > 1000 || mask->ysize > 1000 ||
mask->xsize <= 0 || mask->ysize <= 0 || !mask->coeff ||
mask->scale == 0 ) {
im_errormsg( "im_conv: nonsense mask parameters" );
im_error( "im_conv", "%s", _( "nonsense mask parameters" ) );
return( -1 );
}
if( im_piocheck( in, out ) )
@ -427,7 +440,7 @@ im_conv_raw( IMAGE *in, IMAGE *out, INTMASK *mask )
out->Xsize -= mask->xsize - 1;
out->Ysize -= mask->ysize - 1;
if( out->Xsize <= 0 || out->Ysize <= 0 ) {
im_errormsg( "im_conv: image too small for mask" );
im_error( "im_conv", "%s", _( "image too small for mask" ) );
return( -1 );
}