speed up for vips_shrinkh()

remove the sum buffer, we can just use a local, about 5% faster

see https://github.com/jcupitt/libvips/pull/369
This commit is contained in:
John Cupitt 2016-01-22 09:15:38 +00:00
parent d29710db66
commit 2e5880b69c
2 changed files with 23 additions and 62 deletions

View File

@ -1,16 +1,18 @@
12/1/16 started 8.2.2 12/1/16 started 8.2.2
- changes to ease compiling C++ binding with MSVC - changes to ease compiling C++ binding with MSVC [Lovell Fuller]
- reorder file tests to put slow loaders last - reorder file tests to put slow loaders last
- ifthenelse needs less C stack during eval - ifthenelse needs less C stack during eval
- better rounding in bilinear interpolator - better rounding in bilinear interpolator
- fix to "make check" in non-C locales - fix to "make check" in non-C locales [felixbuenemann]
- use builtin isnan isinf when possible [Lovell Fuller]
- tune vips_shrinkh(), 30% faster [Lovell Fuller]
1/1/16 started 8.2.1 1/1/16 started 8.2.1
- add a compat stub, thanks Benjamin - add a compat stub [Benjamin Gilbert]
- python bandjoin is now just an instance function - python bandjoin is now just an instance function
- small doc improvements - small doc improvements
- small vips7 C++ improvement - small vips7 C++ improvement
- remove exception specifications from vips8 C++ interface - remove exception specifications from vips8 C++ interface [Lovell Fuller]
- VImage::get_typeof() now returns GType - VImage::get_typeof() now returns GType
7/10/15 started 8.2.0 7/10/15 started 8.2.0

View File

@ -2,6 +2,8 @@
* *
* 30/10/15 * 30/10/15
* - from shrink.c * - from shrink.c
* 22/1/16
* - reorganise loops, 30% faster
*/ */
/* /*
@ -61,61 +63,20 @@ typedef VipsResampleClass VipsShrinkhClass;
G_DEFINE_TYPE( VipsShrinkh, vips_shrinkh, VIPS_TYPE_RESAMPLE ); G_DEFINE_TYPE( VipsShrinkh, vips_shrinkh, VIPS_TYPE_RESAMPLE );
/* Our per-sequence parameter struct. Somewhere to sum band elements.
*/
typedef struct {
VipsRegion *ir;
VipsPel *sum;
} VipsShrinkhSequence;
/* Free a sequence value.
*/
static int
vips_shrinkh_stop( void *vseq, void *a, void *b )
{
VipsShrinkhSequence *seq = (VipsShrinkhSequence *) vseq;
VIPS_FREEF( g_object_unref, seq->ir );
return( 0 );
}
/* Make a sequence value.
*/
static void *
vips_shrinkh_start( VipsImage *out, void *a, void *b )
{
VipsImage *in = (VipsImage *) a;
VipsShrinkhSequence *seq;
if( !(seq = VIPS_NEW( out, VipsShrinkhSequence )) )
return( NULL );
seq->ir = vips_region_new( in );
/* Big enough for the largest intermediate.
*/
seq->sum = VIPS_ARRAY( out,
in->Bands * vips_format_sizeof( VIPS_FORMAT_DPCOMPLEX ),
VipsPel );
return( (void *) seq );
}
/* Integer shrink. /* Integer shrink.
*/ */
#define ISHRINK( TYPE ) { \ #define ISHRINK( TYPE ) { \
int * restrict sum = (int *) seq->sum; \
TYPE * restrict p = (TYPE *) in; \ TYPE * restrict p = (TYPE *) in; \
TYPE * restrict q = (TYPE *) out; \ TYPE * restrict q = (TYPE *) out; \
\ \
for( x = 0; x < width; x++ ) { \ for( x = 0; x < width; x++ ) { \
for( b = 0; b < bands; b++ ) { \ for( b = 0; b < bands; b++ ) { \
sum[b] = 0; \ int sum; \
\
sum = 0; \
for( x1 = b; x1 < ne; x1 += bands ) \ for( x1 = b; x1 < ne; x1 += bands ) \
sum[b] += p[x1]; \ sum += p[x1]; \
q[b] = (sum[b] + shrink->xshrink / 2) / \ q[b] = (sum + shrink->xshrink / 2) / \
shrink->xshrink; \ shrink->xshrink; \
} \ } \
p += ne; \ p += ne; \
@ -126,16 +87,17 @@ vips_shrinkh_start( VipsImage *out, void *a, void *b )
/* Float shrink. /* Float shrink.
*/ */
#define FSHRINK( TYPE ) { \ #define FSHRINK( TYPE ) { \
double * restrict sum = (double *) seq->sum; \
TYPE * restrict p = (TYPE *) in; \ TYPE * restrict p = (TYPE *) in; \
TYPE * restrict q = (TYPE *) out; \ TYPE * restrict q = (TYPE *) out; \
\ \
for( x = 0; x < width; x++ ) { \ for( x = 0; x < width; x++ ) { \
for( b = 0; b < bands; b++ ) { \ for( b = 0; b < bands; b++ ) { \
sum[b] = 0.0; \ double sum; \
\
sum = 0.0; \
for( x1 = b; x1 < ne; x1 += bands ) \ for( x1 = b; x1 < ne; x1 += bands ) \
sum[b] += p[x1]; \ sum += p[x1]; \
q[b] = sum[b] / shrink->xshrink; \ q[b] = sum / shrink->xshrink; \
} \ } \
p += ne; \ p += ne; \
q += bands; \ q += bands; \
@ -145,8 +107,7 @@ vips_shrinkh_start( VipsImage *out, void *a, void *b )
/* Generate an area of @or. @ir is large enough. /* Generate an area of @or. @ir is large enough.
*/ */
static void static void
vips_shrinkh_gen2( VipsShrinkh *shrink, VipsShrinkhSequence *seq, vips_shrinkh_gen2( VipsShrinkh *shrink, VipsRegion *or, VipsRegion *ir,
VipsRegion *or, VipsRegion *ir,
int left, int top, int width ) int left, int top, int width )
{ {
VipsResample *resample = VIPS_RESAMPLE( shrink ); VipsResample *resample = VIPS_RESAMPLE( shrink );
@ -188,12 +149,11 @@ vips_shrinkh_gen2( VipsShrinkh *shrink, VipsShrinkhSequence *seq,
} }
static int static int
vips_shrinkh_gen( VipsRegion *or, void *vseq, vips_shrinkh_gen( VipsRegion *or, void *seq,
void *a, void *b, gboolean *stop ) void *a, void *b, gboolean *stop )
{ {
VipsShrinkhSequence *seq = (VipsShrinkhSequence *) vseq;
VipsShrinkh *shrink = (VipsShrinkh *) b; VipsShrinkh *shrink = (VipsShrinkh *) b;
VipsRegion *ir = seq->ir; VipsRegion *ir = (VipsRegion *) seq;
VipsRect *r = &or->valid; VipsRect *r = &or->valid;
int y; int y;
@ -231,8 +191,7 @@ vips_shrinkh_gen( VipsRegion *or, void *vseq,
VIPS_GATE_START( "vips_shrinkh_gen: work" ); VIPS_GATE_START( "vips_shrinkh_gen: work" );
vips_shrinkh_gen2( shrink, seq, vips_shrinkh_gen2( shrink, or, ir,
or, ir,
r->left, r->top + y, r->width ); r->left, r->top + y, r->width );
VIPS_GATE_STOP( "vips_shrinkh_gen: work" ); VIPS_GATE_STOP( "vips_shrinkh_gen: work" );
@ -300,7 +259,7 @@ vips_shrinkh_build( VipsObject *object )
#endif /*DEBUG*/ #endif /*DEBUG*/
if( vips_image_generate( resample->out, if( vips_image_generate( resample->out,
vips_shrinkh_start, vips_shrinkh_gen, vips_shrinkh_stop, vips_start_one, vips_shrinkh_gen, vips_stop_one,
in, shrink ) ) in, shrink ) )
return( -1 ); return( -1 );