From 90ecad14fc27ae8f97175c5a78a5d7bd99ca5651 Mon Sep 17 00:00:00 2001 From: John Cupitt Date: Sun, 5 Apr 2009 10:14:57 +0000 Subject: [PATCH] stuff --- ChangeLog | 1 + TODO | 15 +++++++++++++ libsrc/convolution/im_conv.c | 43 +++++++++++++++++++++++------------- 3 files changed, 44 insertions(+), 15 deletions(-) diff --git a/ChangeLog b/ChangeLog index 4c0f9008..42fca896 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,6 @@ 3/4/09 started 7.19.0 - version bump +- tiny conv speedup 25/3/09 started 7.18.0 - revised version numbers diff --git a/TODO b/TODO index 33ecf8c1..49bb07c5 100644 --- a/TODO +++ b/TODO @@ -1,8 +1,23 @@ +- restrict in im_conv + + before + + $ time vips im_conv wtc.v t.v mask.con + vips warning: im_conv: 59722 overflows and 675925 underflows detected + + real 0m13.303s + user 0m11.013s + sys 0m1.120s + + + WONTFIX for 7.18 ================ - try adding "restrict" to im_conv? other interpolators? +- can we use conv_sep to speed up the memuse benchmarks? + - move im_shrink & friends to resample? match_linear, match_linear_search? diff --git a/libsrc/convolution/im_conv.c b/libsrc/convolution/im_conv.c index 1417fbf1..ab935588 100644 --- a/libsrc/convolution/im_conv.c +++ b/libsrc/convolution/im_conv.c @@ -64,6 +64,9 @@ * 12/5/08 * - int rounding was +1 too much, argh * - only rebuild the buffer offsets if bpl changes + * 5/4/09 + * - tiny speedup ... change ++ to +=1 in inner loop + * - add restrict, though it doesn't seem to help gcc */ /* @@ -114,12 +117,12 @@ typedef struct { IMAGE *in; IMAGE *out; - INTMASK *mask; /* Copy of mask arg */ + INTMASK *mask; /* Copy of mask arg */ - int nnz; /* Number of non-zero mask elements */ - int *coeff; /* Array of non-zero mask coefficients */ + int nnz; /* Number of non-zero mask elements */ + int * restrict coeff; /* Array of non-zero mask coefficients */ - int underflow; /* Global underflow/overflow counts */ + int underflow; /* Global underflow/overflow counts */ int overflow; } Conv; @@ -199,7 +202,7 @@ typedef struct { REGION *ir; /* Input region */ int *offsets; /* Offsets for each non-zero matrix element */ - PEL **pts; /* Per-non-zero mask element image pointers */ + PEL * restrict * restrict pts; /* Per-non-zero mask element pointers */ int underflow; /* Underflow/overflow counts */ int overflow; @@ -259,7 +262,11 @@ conv_start( IMAGE *out, void *a, void *b ) return( seq ); } -#define INNER sum += *t++ * (*p++)[x] +#define INNER { \ + sum += *t * (*p)[x]; \ + t += 1; \ + p += 1; \ +} /* INT and FLOAT inner loops. */ @@ -267,11 +274,14 @@ conv_start( IMAGE *out, void *a, void *b ) TYPE *q = (TYPE *) IM_REGION_ADDR( or, le, y ); \ \ for( x = 0; x < sz; x++ ) { \ - int sum = 0; \ - int *t = conv->coeff; \ - TYPE **p = (TYPE **) seq->pts; \ + int * restrict t; \ + TYPE ** restrict p; \ + int sum; \ \ z = 0; \ + sum = 0; \ + t = conv->coeff; \ + p = (TYPE **) seq->pts; \ IM_UNROLL( conv->nnz, INNER ); \ \ sum = ((sum + rounding) / mask->scale) + mask->offset; \ @@ -286,11 +296,14 @@ conv_start( IMAGE *out, void *a, void *b ) TYPE *q = (TYPE *) IM_REGION_ADDR( or, le, y ); \ \ for( x = 0; x < sz; x++ ) { \ - double sum = 0; \ - int *t = conv->coeff; \ - TYPE **p = (TYPE **) seq->pts; \ + int * restrict t; \ + TYPE ** restrict p; \ + double sum; \ \ z = 0; \ + sum = 0; \ + t = conv->coeff; \ + p = (TYPE **) seq->pts; \ IM_UNROLL( conv->nnz, INNER ); \ \ sum = (sum / mask->scale) + mask->offset; \ @@ -405,13 +418,13 @@ im_conv_raw( IMAGE *in, IMAGE *out, INTMASK *mask ) /* Check parameters. */ if( !in || in->Coding != IM_CODING_NONE || im_iscomplex( in ) ) { - im_errormsg( "im_conv: input non-complex uncoded please!"); + im_error( "im_conv", "%s", _( "non-complex uncoded only" ) ); return( -1 ); } if( !mask || mask->xsize > 1000 || mask->ysize > 1000 || mask->xsize <= 0 || mask->ysize <= 0 || !mask->coeff || mask->scale == 0 ) { - im_errormsg( "im_conv: nonsense mask parameters" ); + im_error( "im_conv", "%s", _( "nonsense mask parameters" ) ); return( -1 ); } if( im_piocheck( in, out ) ) @@ -427,7 +440,7 @@ im_conv_raw( IMAGE *in, IMAGE *out, INTMASK *mask ) out->Xsize -= mask->xsize - 1; out->Ysize -= mask->ysize - 1; if( out->Xsize <= 0 || out->Ysize <= 0 ) { - im_errormsg( "im_conv: image too small for mask" ); + im_error( "im_conv", "%s", _( "image too small for mask" ) ); return( -1 ); }