3x3 mode for im_conv()
This commit is contained in:
parent
f5faed779c
commit
38dde13826
@ -14,6 +14,7 @@
|
|||||||
- fix a mixup with ANY hints that caused performance problems on the main
|
- fix a mixup with ANY hints that caused performance problems on the main
|
||||||
benchmark
|
benchmark
|
||||||
- rewritten im_circle as im_draw_circle, im_circle moved to almostdeprecated
|
- rewritten im_circle as im_draw_circle, im_circle moved to almostdeprecated
|
||||||
|
- special-case 3x3 makss in im_conv() for a 20% speedup
|
||||||
|
|
||||||
12/5/10 started 7.22.2
|
12/5/10 started 7.22.2
|
||||||
- the conditional image of ifthenelse can be any format, a (!=0) is added if
|
- the conditional image of ifthenelse can be any format, a (!=0) is added if
|
||||||
|
@ -51,6 +51,8 @@
|
|||||||
* 3/2/10
|
* 3/2/10
|
||||||
* - gtkdoc
|
* - gtkdoc
|
||||||
* - more cleanups
|
* - more cleanups
|
||||||
|
* 23/08/10
|
||||||
|
* - add a special case for 3x3 masks, about 20% faster
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -307,7 +309,7 @@ conv_start( IMAGE *out, void *a, void *b )
|
|||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Convolve!
|
/* Convolve! See below for the special-case 3x3 path.
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
conv_gen( REGION *or, void *vseq, void *a, void *b )
|
conv_gen( REGION *or, void *vseq, void *a, void *b )
|
||||||
@ -407,10 +409,162 @@ conv_gen( REGION *or, void *vseq, void *a, void *b )
|
|||||||
return( 0 );
|
return( 0 );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* INT inner loops.
|
||||||
|
*/
|
||||||
|
#define CONV3x3_INT( TYPE, IM_CLIP ) { \
|
||||||
|
TYPE * restrict p0 = (TYPE *) IM_REGION_ADDR( ir, le, y ); \
|
||||||
|
TYPE * restrict p1 = (TYPE *) IM_REGION_ADDR( ir, le, y + 1 ); \
|
||||||
|
TYPE * restrict p2 = (TYPE *) IM_REGION_ADDR( ir, le, y + 2 ); \
|
||||||
|
TYPE * restrict q = (TYPE *) IM_REGION_ADDR( or, le, y ); \
|
||||||
|
\
|
||||||
|
for( x = 0; x < sz; x++ ) { \
|
||||||
|
int sum; \
|
||||||
|
\
|
||||||
|
sum = 0; \
|
||||||
|
sum += m[0] * p0[0]; \
|
||||||
|
sum += m[1] * p0[bands]; \
|
||||||
|
sum += m[2] * p0[bands * 2]; \
|
||||||
|
sum += m[3] * p1[0]; \
|
||||||
|
sum += m[4] * p1[bands]; \
|
||||||
|
sum += m[5] * p1[bands * 2]; \
|
||||||
|
sum += m[6] * p2[0]; \
|
||||||
|
sum += m[7] * p2[bands]; \
|
||||||
|
sum += m[8] * p2[bands * 2]; \
|
||||||
|
\
|
||||||
|
p0 += 1; \
|
||||||
|
p1 += 1; \
|
||||||
|
p2 += 1; \
|
||||||
|
\
|
||||||
|
sum = ((sum + rounding) / mask->scale) + mask->offset; \
|
||||||
|
\
|
||||||
|
IM_CLIP; \
|
||||||
|
\
|
||||||
|
q[x] = sum; \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
/* FLOAT inner loops.
|
||||||
|
*/
|
||||||
|
#define CONV3x3_FLOAT( TYPE ) { \
|
||||||
|
TYPE * restrict p0 = (TYPE *) IM_REGION_ADDR( ir, le, y ); \
|
||||||
|
TYPE * restrict p1 = (TYPE *) IM_REGION_ADDR( ir, le, y + 1 ); \
|
||||||
|
TYPE * restrict p2 = (TYPE *) IM_REGION_ADDR( ir, le, y + 2 ); \
|
||||||
|
TYPE * restrict q = (TYPE *) IM_REGION_ADDR( or, le, y ); \
|
||||||
|
\
|
||||||
|
for( x = 0; x < sz; x++ ) { \
|
||||||
|
double sum; \
|
||||||
|
\
|
||||||
|
sum = 0; \
|
||||||
|
sum += m[0] * p0[0]; \
|
||||||
|
sum += m[1] * p0[bands]; \
|
||||||
|
sum += m[2] * p0[bands * 2]; \
|
||||||
|
sum += m[3] * p1[0]; \
|
||||||
|
sum += m[4] * p1[bands]; \
|
||||||
|
sum += m[5] * p1[bands * 2]; \
|
||||||
|
sum += m[6] * p2[0]; \
|
||||||
|
sum += m[7] * p2[bands]; \
|
||||||
|
sum += m[8] * p2[bands * 2]; \
|
||||||
|
\
|
||||||
|
p0 += 1; \
|
||||||
|
p1 += 1; \
|
||||||
|
p2 += 1; \
|
||||||
|
\
|
||||||
|
sum = (sum / mask->scale) + mask->offset; \
|
||||||
|
\
|
||||||
|
q[x] = sum; \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 3x3 masks are very common, so we have a special path for them. This is
|
||||||
|
* about 20% faster than the general convolver above.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
conv3x3_gen( REGION *or, void *vseq, void *a, void *b )
|
||||||
|
{
|
||||||
|
ConvSequence *seq = (ConvSequence *) vseq;
|
||||||
|
IMAGE *in = (IMAGE *) a;
|
||||||
|
Conv *conv = (Conv *) b;
|
||||||
|
REGION *ir = seq->ir;
|
||||||
|
INTMASK *mask = conv->mask;
|
||||||
|
int * restrict m = mask->coeff;
|
||||||
|
|
||||||
|
/* You might think this should be (scale + 1) / 2, but then we'd be
|
||||||
|
* adding one for scale == 1.
|
||||||
|
*/
|
||||||
|
int rounding = mask->scale / 2;
|
||||||
|
|
||||||
|
Rect *r = &or->valid;
|
||||||
|
int le = r->left;
|
||||||
|
int to = r->top;
|
||||||
|
int bo = IM_RECT_BOTTOM( r );
|
||||||
|
int sz = IM_REGION_N_ELEMENTS( or );
|
||||||
|
int bands = in->Bands;
|
||||||
|
|
||||||
|
Rect s;
|
||||||
|
int x, y, z;
|
||||||
|
|
||||||
|
/* Prepare the section of the input image we need. A little larger
|
||||||
|
* than the section of the output image we are producing.
|
||||||
|
*/
|
||||||
|
s = *r;
|
||||||
|
s.width += 2;
|
||||||
|
s.height += 2;
|
||||||
|
if( im_prepare( ir, &s ) )
|
||||||
|
return( -1 );
|
||||||
|
|
||||||
|
for( y = to; y < bo; y++ ) {
|
||||||
|
switch( in->BandFmt ) {
|
||||||
|
case IM_BANDFMT_UCHAR:
|
||||||
|
CONV3x3_INT( unsigned char,
|
||||||
|
IM_CLIP_UCHAR( sum, seq ) );
|
||||||
|
break;
|
||||||
|
|
||||||
|
case IM_BANDFMT_CHAR:
|
||||||
|
CONV3x3_INT( signed char,
|
||||||
|
IM_CLIP_CHAR( sum, seq ) );
|
||||||
|
break;
|
||||||
|
|
||||||
|
case IM_BANDFMT_USHORT:
|
||||||
|
CONV3x3_INT( unsigned short,
|
||||||
|
IM_CLIP_USHORT( sum, seq ) );
|
||||||
|
break;
|
||||||
|
|
||||||
|
case IM_BANDFMT_SHORT:
|
||||||
|
CONV3x3_INT( signed short,
|
||||||
|
IM_CLIP_SHORT( sum, seq ) );
|
||||||
|
break;
|
||||||
|
|
||||||
|
case IM_BANDFMT_UINT:
|
||||||
|
CONV3x3_INT( unsigned int,
|
||||||
|
IM_CLIP_NONE( sum, seq ) );
|
||||||
|
break;
|
||||||
|
|
||||||
|
case IM_BANDFMT_INT:
|
||||||
|
CONV3x3_INT( signed int,
|
||||||
|
IM_CLIP_NONE( sum, seq ) );
|
||||||
|
break;
|
||||||
|
|
||||||
|
case IM_BANDFMT_FLOAT:
|
||||||
|
CONV3x3_FLOAT( float );
|
||||||
|
break;
|
||||||
|
|
||||||
|
case IM_BANDFMT_DOUBLE:
|
||||||
|
CONV3x3_FLOAT( double );
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
g_assert( 0 );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return( 0 );
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
im_conv_raw( IMAGE *in, IMAGE *out, INTMASK *mask )
|
im_conv_raw( IMAGE *in, IMAGE *out, INTMASK *mask )
|
||||||
{
|
{
|
||||||
Conv *conv;
|
Conv *conv;
|
||||||
|
im_generate_fn generate;
|
||||||
|
|
||||||
/* Check parameters.
|
/* Check parameters.
|
||||||
*/
|
*/
|
||||||
@ -438,11 +592,16 @@ im_conv_raw( IMAGE *in, IMAGE *out, INTMASK *mask )
|
|||||||
return( -1 );
|
return( -1 );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if( mask->xsize == 3 && mask->ysize == 3 )
|
||||||
|
generate = conv3x3_gen;
|
||||||
|
else
|
||||||
|
generate = conv_gen;
|
||||||
|
|
||||||
/* Set demand hints. FATSTRIP is good for us, as THINSTRIP will cause
|
/* Set demand hints. FATSTRIP is good for us, as THINSTRIP will cause
|
||||||
* too many recalculations on overlaps.
|
* too many recalculations on overlaps.
|
||||||
*/
|
*/
|
||||||
if( im_demand_hint( out, IM_FATSTRIP, in, NULL ) ||
|
if( im_demand_hint( out, IM_FATSTRIP, in, NULL ) ||
|
||||||
im_generate( out, conv_start, conv_gen, conv_stop, in, conv ) )
|
im_generate( out, conv_start, generate, conv_stop, in, conv ) )
|
||||||
return( -1 );
|
return( -1 );
|
||||||
|
|
||||||
out->Xoffset = -mask->xsize / 2;
|
out->Xoffset = -mask->xsize / 2;
|
||||||
|
Loading…
Reference in New Issue
Block a user