This commit is contained in:
John Cupitt 2009-04-06 11:14:23 +00:00
parent 90ecad14fc
commit 2219ad0703
1 changed files with 14 additions and 18 deletions

View File

@ -65,7 +65,7 @@
* - int rounding was +1 too much, argh
* - only rebuild the buffer offsets if bpl changes
* 5/4/09
* - tiny speedup ... change ++ to +=1 in inner loop
* - tiny speedups and cleanups
* - add restrict, though it doesn't seem to help gcc
*/
@ -120,7 +120,7 @@ typedef struct {
INTMASK *mask; /* Copy of mask arg */
int nnz; /* Number of non-zero mask elements */
int * restrict coeff; /* Array of non-zero mask coefficients */
int *coeff; /* Array of non-zero mask coefficients */
int underflow; /* Global underflow/overflow counts */
int overflow;
@ -202,7 +202,7 @@ typedef struct {
REGION *ir; /* Input region */
int *offsets; /* Offsets for each non-zero matrix element */
PEL * restrict * restrict pts; /* Per-non-zero mask element pointers */
PEL **pts; /* Per-non-zero mask element pointers */
int underflow; /* Underflow/overflow counts */
int overflow;
@ -263,25 +263,22 @@ conv_start( IMAGE *out, void *a, void *b )
}
#define INNER { \
sum += *t * (*p)[x]; \
t += 1; \
p += 1; \
sum += t[i] * p[i][x]; \
i += 1; \
}
/* INT and FLOAT inner loops.
*/
#define CONV_INT( TYPE, IM_CLIP ) { \
TYPE *q = (TYPE *) IM_REGION_ADDR( or, le, y ); \
TYPE ** restrict p = (TYPE **) seq->pts; \
TYPE * restrict q = (TYPE *) IM_REGION_ADDR( or, le, y ); \
\
for( x = 0; x < sz; x++ ) { \
int * restrict t; \
TYPE ** restrict p; \
int sum; \
int i; \
\
z = 0; \
sum = 0; \
t = conv->coeff; \
p = (TYPE **) seq->pts; \
i = 0; \
IM_UNROLL( conv->nnz, INNER ); \
\
sum = ((sum + rounding) / mask->scale) + mask->offset; \
@ -293,17 +290,15 @@ conv_start( IMAGE *out, void *a, void *b )
}
#define CONV_FLOAT( TYPE ) { \
TYPE *q = (TYPE *) IM_REGION_ADDR( or, le, y ); \
TYPE ** restrict p = (TYPE **) seq->pts; \
TYPE * restrict q = (TYPE *) IM_REGION_ADDR( or, le, y ); \
\
for( x = 0; x < sz; x++ ) { \
int * restrict t; \
TYPE ** restrict p; \
double sum; \
int i; \
\
z = 0; \
sum = 0; \
t = conv->coeff; \
p = (TYPE **) seq->pts; \
i = 0; \
IM_UNROLL( conv->nnz, INNER ); \
\
sum = (sum / mask->scale) + mask->offset; \
@ -322,6 +317,7 @@ conv_gen( REGION *or, void *vseq, void *a, void *b )
Conv *conv = (Conv *) b;
REGION *ir = seq->ir;
INTMASK *mask = conv->mask;
int * restrict t = conv->coeff;
/* You might think this should be (scale+1)/2, but then we'd be adding
* one for scale == 1.