/* @(#) Convolve an image with a seperable (1xN, or Nx1) INTMASK. Image can * @(#) have any number of bands, any non-complex type. Size and type of * @(#) output image matches type of input image. * @(#) * @(#) int * @(#) im_convsep( in, out, mask ) * @(#) IMAGE *in, *out; * @(#) INTMASK *mask; * @(#) * @(#) Also: im_convsep_raw(). As above, but does not add a black border. * @(#) * @(#) Returns either 0 (success) or -1 (fail) * * Copyright: 1990, N. Dessipris. * * Author: Nicos Dessipris * Written on: 29/04/1991 * Modified on: 29/4/93 K.Martinez for Sys5 * 9/3/01 JC * - rewritten using im_conv() * 27/7/01 JC * - rejects masks with scale == 0 * 7/4/04 * - now uses im_embed() with edge stretching on the input, not * the output * - sets Xoffset / Yoffset * 21/4/04 * - scale down int convolves at 1/2 way mark, much less likely to integer * overflow on intermediates * 12/5/08 * - int rounding was +1 too much, argh */ /* This file is part of VIPS. VIPS is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* These files are distributed with VIPS - http://www.vips.ecs.soton.ac.uk */ #ifdef HAVE_CONFIG_H #include #endif /*HAVE_CONFIG_H*/ #include #include #include #include #include #include #ifdef WITH_DMALLOC #include #endif /*WITH_DMALLOC*/ /* Our parameters ... we take a copy of the mask argument. */ typedef struct { IMAGE *in; IMAGE *out; INTMASK *mask; /* Copy of mask arg */ int size; /* N for our 1xN or Nx1 mask */ int scale; /* Our scale ... we have to square mask->scale */ int underflow; /* Global underflow/overflow counts */ int overflow; } Conv; /* End of evaluation --- print overflows and underflows. */ static int conv_destroy( Conv *conv ) { /* Print underflow/overflow count. */ if( conv->overflow || conv->underflow ) im_warn( "im_convsep", _( "%d overflows and %d underflows " "detected" ), conv->overflow, conv->underflow ); if( conv->mask ) { (void) im_free_imask( conv->mask ); conv->mask = NULL; } return( 0 ); } static Conv * conv_new( IMAGE *in, IMAGE *out, INTMASK *mask ) { Conv *conv = IM_NEW( out, Conv ); if( !conv ) return( NULL ); conv->in = in; conv->out = out; conv->mask = NULL; conv->size = mask->xsize * mask->ysize; conv->scale = mask->scale * mask->scale; conv->underflow = 0; conv->overflow = 0; if( im_add_close_callback( out, (im_callback_fn) conv_destroy, conv, NULL ) || !(conv->mask = im_dup_imask( mask, "conv_mask" )) ) return( NULL ); return( conv ); } /* Our sequence value. */ typedef struct { Conv *conv; REGION *ir; /* Input region */ PEL *sum; /* Line buffer */ int underflow; /* Underflow/overflow counts */ int overflow; } ConvSequence; /* Free a sequence value. */ static int conv_stop( void *vseq, void *a, void *b ) { ConvSequence *seq = (ConvSequence *) vseq; Conv *conv = (Conv *) b; /* Add local under/over counts to global counts. */ conv->overflow += seq->overflow; conv->underflow += seq->underflow; IM_FREEF( im_region_free, seq->ir ); return( 0 ); } /* Convolution start function. */ static void * conv_start( IMAGE *out, void *a, void *b ) { IMAGE *in = (IMAGE *) a; Conv *conv = (Conv *) b; ConvSequence *seq; if( !(seq = IM_NEW( out, ConvSequence )) ) return( NULL ); /* Init! */ seq->conv = conv; seq->ir = NULL; seq->sum = NULL; seq->underflow = 0; seq->overflow = 0; /* Attach region and arrays. */ seq->ir = im_region_create( in ); if( im_isint( conv->out ) ) seq->sum = (PEL *) IM_ARRAY( out, IM_IMAGE_N_ELEMENTS( in ), int ); else seq->sum = (PEL *) IM_ARRAY( out, IM_IMAGE_N_ELEMENTS( in ), double ); if( !seq->ir || !seq->sum ) { conv_stop( seq, in, conv ); return( NULL ); } return( (void *) seq ); } /* What we do for every point in the mask, for each pixel. */ #define VERTICAL_CONV { z -= 1; li -= lskip; sum += coeff[z] * vfrom[li]; } #define HORIZONTAL_CONV { z -= 1; li -= bands; sum += coeff[z] * hfrom[li]; } /* INT and FLOAT inner loops. */ #define CONV_INT( TYPE, IM_CLIP ) { \ TYPE *vfrom; \ int *vto; \ int *hfrom; \ TYPE *hto; \ \ /* Convolve to sum array. We convolve the full width of \ * this input line. \ */ \ vfrom = (TYPE *) IM_REGION_ADDR( ir, le, y ); \ vto = (int *) seq->sum; \ for( x = 0; x < isz; x++ ) { \ int sum; \ \ z = conv->size; \ li = lskip * z; \ sum = 0; \ \ IM_UNROLL( z, VERTICAL_CONV ); \ \ sum = ((sum + rounding) / mask->scale) + mask->offset; \ \ vto[x] = sum; \ vfrom += 1; \ } \ \ /* Convolve sums to output. \ */ \ hfrom = (int *) seq->sum; \ hto = (TYPE *) IM_REGION_ADDR( or, le, y ); \ for( x = 0; x < osz; x++ ) { \ int sum; \ \ z = conv->size; \ li = bands * z; \ sum = 0; \ \ IM_UNROLL( z, HORIZONTAL_CONV ); \ \ sum = ((sum + rounding) / mask->scale) + mask->offset; \ \ IM_CLIP; \ \ hto[x] = sum; \ hfrom += 1; \ } \ } #define CONV_FLOAT( TYPE ) { \ TYPE *vfrom; \ double *vto; \ double *hfrom; \ TYPE *hto; \ \ /* Convolve to sum array. We convolve the full width of \ * this input line. \ */ \ vfrom = (TYPE *) IM_REGION_ADDR( ir, le, y ); \ vto = (double *) seq->sum; \ for( x = 0; x < isz; x++ ) { \ double sum; \ \ z = conv->size; \ li = lskip * z; \ sum = 0; \ \ IM_UNROLL( z, VERTICAL_CONV ); \ \ vto[x] = sum; \ vfrom += 1; \ } \ \ /* Convolve sums to output. \ */ \ hfrom = (double *) seq->sum; \ hto = (TYPE *) IM_REGION_ADDR( or, le, y ); \ for( x = 0; x < osz; x++ ) { \ double sum; \ \ z = conv->size; \ li = bands * z; \ sum = 0; \ \ IM_UNROLL( z, HORIZONTAL_CONV ); \ \ sum = (sum / conv->scale) + mask->offset; \ \ hto[x] = sum; \ hfrom += 1; \ } \ } /* Convolve! */ static int conv_gen( REGION *or, void *vseq, void *a, void *b ) { ConvSequence *seq = (ConvSequence *) vseq; IMAGE *in = (IMAGE *) a; Conv *conv = (Conv *) b; REGION *ir = seq->ir; INTMASK *mask = conv->mask; /* You might think this should be (scale+1)/2, but then we'd be adding * one for scale == 1. */ int rounding = mask->scale / 2; int bands = in->Bands; int *coeff = conv->mask->coeff; Rect *r = &or->valid; int le = r->left; int to = r->top; int bo = IM_RECT_BOTTOM(r); int osz = IM_REGION_N_ELEMENTS( or ); Rect s; int lskip; int isz; int x, y, z, li; /* Prepare the section of the input image we need. A little larger * than the section of the output image we are producing. */ s = *r; s.width += conv->size - 1; s.height += conv->size - 1; if( im_prepare( ir, &s ) ) return( -1 ); lskip = IM_REGION_LSKIP( ir ) / IM_IMAGE_SIZEOF_ELEMENT( in ); isz = IM_REGION_N_ELEMENTS( ir ); for( y = to; y < bo; y++ ) { switch( in->BandFmt ) { case IM_BANDFMT_UCHAR: CONV_INT( unsigned char, IM_CLIP_UCHAR( sum, seq ) ); break; case IM_BANDFMT_CHAR: CONV_INT( signed char, IM_CLIP_CHAR( sum, seq ) ); break; case IM_BANDFMT_USHORT: CONV_INT( unsigned short, IM_CLIP_USHORT( sum, seq ) ); break; case IM_BANDFMT_SHORT: CONV_INT( signed short, IM_CLIP_SHORT( sum, seq ) ); break; case IM_BANDFMT_UINT: CONV_INT( unsigned int, IM_CLIP_NONE( sum, seq ) ); break; case IM_BANDFMT_INT: CONV_INT( signed int, IM_CLIP_NONE( sum, seq ) ); break; case IM_BANDFMT_FLOAT: CONV_FLOAT( float ); break; case IM_BANDFMT_DOUBLE: CONV_FLOAT( double ); break; default: assert( 0 ); } } return( 0 ); } int im_convsep_raw( IMAGE *in, IMAGE *out, INTMASK *mask ) { Conv *conv; /* Check parameters. */ if( !in || in->Coding != IM_CODING_NONE || im_iscomplex( in ) ) { im_error( "im_convsep", _( "non-complex uncoded only" ) ); return( -1 ); } if( !mask || mask->xsize > 1000 || mask->ysize > 1000 || mask->xsize <= 0 || mask->ysize <= 0 || !mask->coeff || mask->scale == 0 ) { im_error( "im_convsep", _( "nonsense mask parameters" ) ); return( -1 ); } if( mask->xsize != 1 && mask->ysize != 1 ) { im_error( "im_convsep", _( "expect 1xN or Nx1 input mask" ) ); return( -1 ); } if( im_piocheck( in, out ) ) return( -1 ); if( !(conv = conv_new( in, out, mask )) ) return( -1 ); /* Prepare output. Consider a 7x7 mask and a 7x7 image --- the output * would be 1x1. */ if( im_cp_desc( out, in ) ) return( -1 ); out->Xsize -= conv->size - 1; out->Ysize -= conv->size - 1; if( out->Xsize <= 0 || out->Ysize <= 0 ) { im_error( "im_convsep", _( "image too small for mask" ) ); return( -1 ); } /* SMALLTILE seems the fastest in benchmarks. */ if( im_demand_hint( out, IM_SMALLTILE, in, NULL ) || im_generate( out, conv_start, conv_gen, conv_stop, in, conv ) ) return( -1 ); out->Xoffset = -mask->xsize / 2; out->Yoffset = -mask->ysize / 2; return( 0 ); } /* The above, with a border to make out the same size as in. */ int im_convsep( IMAGE *in, IMAGE *out, INTMASK *mask ) { IMAGE *t1 = im_open_local( out, "im_convsep intermediate", "p" ); int size = mask->xsize * mask->ysize; if( !t1 || im_embed( in, t1, 1, size / 2, size / 2, in->Xsize + size - 1, in->Ysize + size - 1 ) || im_convsep_raw( t1, out, mask ) ) return( -1 ); out->Xoffset = 0; out->Yoffset = 0; return( 0 ); }