From 24e86ae458298a387bf3f7cb6c73105f34ae4e9b Mon Sep 17 00:00:00 2001 From: John Cupitt Date: Fri, 10 Feb 2012 10:00:39 +0000 Subject: [PATCH] better im_shrink() faster, lower memuse, simpler --- ChangeLog | 1 + libvips/resample/im_shrink.c | 210 +++++++++++++++++++++-------------- 2 files changed, 126 insertions(+), 85 deletions(-) diff --git a/ChangeLog b/ChangeLog index c2d488c6..72db8471 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,7 @@ - delayed write to foreign via a "w" image was not working - support operations with many returns in Python - sequential read mode for pngload +- better im_shrink() 20/8/11 started 7.27.0 - version bump for new dev cycle diff --git a/libvips/resample/im_shrink.c b/libvips/resample/im_shrink.c index 73ce9089..eabbc369 100644 --- a/libvips/resample/im_shrink.c +++ b/libvips/resample/im_shrink.c @@ -27,6 +27,9 @@ * - fall back to im_copy() for 1/1 shrink * 2/2/11 * - gtk-doc + * 10/2/12 + * - faster, simpler + * - shrink in chunks to reduce peak memuse for large shrinks */ /* @@ -69,19 +72,22 @@ /* Our main parameter struct. */ typedef struct { + IMAGE *in; + IMAGE *out; double xshrink; /* Shrink factors */ double yshrink; + int mw; /* Size of area we average */ int mh; int np; /* Number of pels we average */ } ShrinkInfo; -/* Our per-sequence parameter struct. We hold an offset for each pel we - * average. +/* Our per-sequence parameter struct. Somewhere to sum band elements. */ typedef struct { REGION *ir; - int *off; + + VipsPel *sum; } SeqInfo; /* Free a sequence value. @@ -111,10 +117,10 @@ shrink_start( IMAGE *out, void *a, void *b ) /* Init! */ seq->ir = NULL; - seq->off = NULL; + seq->sum = NULL; seq->ir = im_region_create( in ); - seq->off = IM_ARRAY( out, st->np, int ); - if( !seq->off || !seq->ir ) { + seq->sum = (void *) IM_ARRAY( out, in->Bands, double ); + if( !seq->sum || !seq->ir ) { shrink_stop( seq, in, st ); return( NULL ); } @@ -124,51 +130,92 @@ shrink_start( IMAGE *out, void *a, void *b ) /* Integer shrink. */ -#define ishrink(TYPE) \ - for( y = to; y < bo; y++ ) { \ - TYPE *q = (TYPE *) IM_REGION_ADDR( or, le, y ); \ - \ - for( x = le; x < ri; x++ ) { \ - int ix = x * st->xshrink; \ - int iy = y * st->yshrink; \ - TYPE *p = (TYPE *) IM_REGION_ADDR( ir, ix, iy ); \ - \ - for( k = 0; k < ir->im->Bands; k++ ) { \ - int sum = 0; \ - int *t = seq->off; \ - \ - for( z = 0; z < st->np; z++ ) \ - sum += p[*t++]; \ - \ - *q++ = sum / st->np; \ - p++; \ - } \ - } \ - } +#define ISHRINK( TYPE ) { \ + int *sum = (int *) seq->sum; \ + TYPE *p = (TYPE *) in; \ + TYPE *q = (TYPE *) out; \ + \ + for( y1 = 0; y1 < st->mh; y1++ ) { \ + for( i = 0, x1 = 0; x1 < st->mw; x1++ ) \ + for( b = 0; b < bands; b++, i++ ) \ + sum[b] += p[i]; \ + \ + p += ls; \ + } \ + \ + for( b = 0; b < bands; b++ ) \ + q[b] = (sum[b] + st->np / 2) / st->np; \ +} -/* FP shrink. +/* Float shrink. */ -#define fshrink(TYPE) \ - for( y = to; y < bo; y++ ) { \ - TYPE *q = (TYPE *) IM_REGION_ADDR( or, le, y ); \ - \ - for( x = le; x < ri; x++ ) { \ - int ix = x * st->xshrink; \ - int iy = y * st->yshrink; \ - TYPE *p = (TYPE *) IM_REGION_ADDR( ir, ix, iy ); \ - \ - for( k = 0; k < ir->im->Bands; k++ ) { \ - double sum = 0; \ - int *t = seq->off; \ - \ - for( z = 0; z < st->np; z++ ) \ - sum += p[*t++]; \ - \ - *q++ = sum / st->np; \ - p++; \ - } \ - } \ +#define FSHRINK( TYPE ) { \ + double *sum = (double *) seq->sum; \ + TYPE *p = (TYPE *) in; \ + TYPE *q = (TYPE *) out; \ + \ + for( y1 = 0; y1 < st->mh; y1++ ) { \ + for( i = 0, x1 = 0; x1 < st->mw; x1++ ) \ + for( b = 0; b < bands; b++, i++ ) \ + sum[b] += p[i]; \ + \ + p += ls; \ + } \ + \ + for( b = 0; b < bands; b++ ) \ + q[b] = sum[b] / st->np; \ +} + +/* Generate an area of @or. @ir is large enough. + */ +static void +shrink_gen2( ShrinkInfo *st, SeqInfo *seq, + REGION *or, REGION *ir, + int left, int top, int width, int height ) +{ + const int bands = st->in->Bands; + const int sizeof_pixel = VIPS_IMAGE_SIZEOF_PEL( st->in ); + const int ls = VIPS_REGION_LSKIP( ir ) / + VIPS_IMAGE_SIZEOF_ELEMENT( st->in ); + + int x, y, i; + int x1, y1, b; + + for( y = 0; y < height; y++ ) { + VipsPel *out = IM_REGION_ADDR( or, left, top + y ); + + for( x = 0; x < width; x++ ) { + int ix = (left + x) * st->xshrink; + int iy = (top + y) * st->yshrink; + VipsPel *in = IM_REGION_ADDR( ir, ix, iy ); + + for( i = 0; i < sizeof_pixel; i++ ) + seq->sum[i] = 0; + + switch( st->in->BandFmt ) { + case IM_BANDFMT_UCHAR: + ISHRINK( unsigned char ); break; + case IM_BANDFMT_CHAR: + ISHRINK( char ); break; + case IM_BANDFMT_USHORT: + ISHRINK( unsigned short ); break; + case IM_BANDFMT_SHORT: + ISHRINK( short ); break; + case IM_BANDFMT_UINT: + ISHRINK( unsigned int ); break; + case IM_BANDFMT_INT: + ISHRINK( int ); break; + case IM_BANDFMT_FLOAT: + FSHRINK( float ); break; + case IM_BANDFMT_DOUBLE: + FSHRINK( double ); break; + + default: + g_assert( 0 ); + } + } } +} /* Shrink a REGION. */ @@ -176,51 +223,44 @@ static int shrink_gen( REGION *or, void *vseq, void *a, void *b ) { SeqInfo *seq = (SeqInfo *) vseq; + IMAGE *in = (IMAGE *) a; ShrinkInfo *st = (ShrinkInfo *) b; REGION *ir = seq->ir; Rect *r = &or->valid; - Rect s; - int le = r->left; - int ri = IM_RECT_RIGHT( r ); - int to = r->top; - int bo = IM_RECT_BOTTOM(r); - int x, y, z, k; - - /* What part of the input image do we need? Very careful: round left - * down, round right up. + /* How do we chunk up the image? We don't want to prepare the whole of + * the input region corresponding to *r since it could be huge. + * + * Each pixel of *r will depend on roughly mw x mh + * pixels, so we walk *r in chunks which map to the tile size. + * */ - s.left = r->left * st->xshrink; - s.top = r->top * st->yshrink; - s.width = ceil( IM_RECT_RIGHT( r ) * st->xshrink ) - s.left; - s.height = ceil( IM_RECT_BOTTOM( r ) * st->yshrink ) - s.top; - if( im_prepare( ir, &s ) ) - return( -1 ); + int xstep = 1 + VIPS__TILE_WIDTH / st->mw; + int ystep = 1 + VIPS__TILE_HEIGHT / st->mh; - /* Init offsets for pel addressing. Note that offsets must be for the - * type we will address the memory array with. - */ - for( z = 0, y = 0; y < st->mh; y++ ) - for( x = 0; x < st->mw; x++ ) - seq->off[z++] = (IM_REGION_ADDR( ir, x, y ) - - IM_REGION_ADDR( ir, 0, 0 )) / - IM_IMAGE_SIZEOF_ELEMENT( ir->im ); + int x, y; - switch( ir->im->BandFmt ) { - case IM_BANDFMT_UCHAR: ishrink(unsigned char); break; - case IM_BANDFMT_CHAR: ishrink(char); break; - case IM_BANDFMT_USHORT: ishrink(unsigned short); break; - case IM_BANDFMT_SHORT: ishrink(short); break; - case IM_BANDFMT_UINT: ishrink(unsigned int); break; - case IM_BANDFMT_INT: ishrink(int); break; - case IM_BANDFMT_FLOAT: fshrink(float); break; - case IM_BANDFMT_DOUBLE: fshrink(double); break; + for( y = 0; y < r->height; y += ystep ) + for( x = 0; x < r->width; x += xstep ) { + /* Clip the this rect against the image size. + */ + int width = VIPS_MIN( xstep, in->Xsize - r->left - x ); + int height = VIPS_MIN( ystep, in->Ysize - r->top - y ); + + Rect s; + + s.left = (r->left + x) * st->xshrink; + s.top = (r->top + y) * st->yshrink; + s.width = 1 + ceil( width * st->xshrink ); + s.height = 1 + ceil( height * st->yshrink ); + if( im_prepare( ir, &s ) ) + return( -1 ); + + shrink_gen2( st, seq, + or, ir, + r->left + x, r->top + y, width, height ); + } - default: - im_error( "im_shrink", "%s", _( "unsupported input format" ) ); - return( -1 ); - } - return( 0 ); }