libvips/libsrc/convolution/im_convsep.c
2008-11-02 22:11:01 +00:00

446 lines
10 KiB
C

/* @(#) Convolve an image with a seperable (1xN, or Nx1) INTMASK. Image can
* @(#) have any number of bands, any non-complex type. Size and type of
* @(#) output image matches type of input image.
* @(#)
* @(#) int
* @(#) im_convsep( in, out, mask )
* @(#) IMAGE *in, *out;
* @(#) INTMASK *mask;
* @(#)
* @(#) Also: im_convsep_raw(). As above, but does not add a black border.
* @(#)
* @(#) Returns either 0 (success) or -1 (fail)
*
* Copyright: 1990, N. Dessipris.
*
* Author: Nicos Dessipris
* Written on: 29/04/1991
* Modified on: 29/4/93 K.Martinez for Sys5
* 9/3/01 JC
* - rewritten using im_conv()
* 27/7/01 JC
* - rejects masks with scale == 0
* 7/4/04
* - now uses im_embed() with edge stretching on the input, not
* the output
* - sets Xoffset / Yoffset
* 21/4/04
* - scale down int convolves at 1/2 way mark, much less likely to integer
* overflow on intermediates
* 12/5/08
* - int rounding was +1 too much, argh
*/
/*
This file is part of VIPS.
VIPS is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
These files are distributed with VIPS - http://www.vips.ecs.soton.ac.uk
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif /*HAVE_CONFIG_H*/
#include <vips/intl.h>
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <assert.h>
#include <vips/vips.h>
#ifdef WITH_DMALLOC
#include <dmalloc.h>
#endif /*WITH_DMALLOC*/
/* Our parameters ... we take a copy of the mask argument.
*/
typedef struct {
IMAGE *in;
IMAGE *out;
INTMASK *mask; /* Copy of mask arg */
int size; /* N for our 1xN or Nx1 mask */
int scale; /* Our scale ... we have to square mask->scale */
int underflow; /* Global underflow/overflow counts */
int overflow;
} Conv;
/* End of evaluation --- print overflows and underflows.
*/
static int
conv_destroy( Conv *conv )
{
/* Print underflow/overflow count.
*/
if( conv->overflow || conv->underflow )
im_warn( "im_convsep", _( "%d overflows and %d underflows "
"detected" ), conv->overflow, conv->underflow );
if( conv->mask ) {
(void) im_free_imask( conv->mask );
conv->mask = NULL;
}
return( 0 );
}
static Conv *
conv_new( IMAGE *in, IMAGE *out, INTMASK *mask )
{
Conv *conv = IM_NEW( out, Conv );
if( !conv )
return( NULL );
conv->in = in;
conv->out = out;
conv->mask = NULL;
conv->size = mask->xsize * mask->ysize;
conv->scale = mask->scale * mask->scale;
conv->underflow = 0;
conv->overflow = 0;
if( im_add_close_callback( out,
(im_callback_fn) conv_destroy, conv, NULL ) ||
!(conv->mask = im_dup_imask( mask, "conv_mask" )) )
return( NULL );
return( conv );
}
/* Our sequence value.
*/
typedef struct {
Conv *conv;
REGION *ir; /* Input region */
PEL *sum; /* Line buffer */
int underflow; /* Underflow/overflow counts */
int overflow;
} ConvSequence;
/* Free a sequence value.
*/
static int
conv_stop( void *vseq, void *a, void *b )
{
ConvSequence *seq = (ConvSequence *) vseq;
Conv *conv = (Conv *) b;
/* Add local under/over counts to global counts.
*/
conv->overflow += seq->overflow;
conv->underflow += seq->underflow;
IM_FREEF( im_region_free, seq->ir );
return( 0 );
}
/* Convolution start function.
*/
static void *
conv_start( IMAGE *out, void *a, void *b )
{
IMAGE *in = (IMAGE *) a;
Conv *conv = (Conv *) b;
ConvSequence *seq;
if( !(seq = IM_NEW( out, ConvSequence )) )
return( NULL );
/* Init!
*/
seq->conv = conv;
seq->ir = NULL;
seq->sum = NULL;
seq->underflow = 0;
seq->overflow = 0;
/* Attach region and arrays.
*/
seq->ir = im_region_create( in );
if( im_isint( conv->out ) )
seq->sum = (PEL *)
IM_ARRAY( out, IM_IMAGE_N_ELEMENTS( in ), int );
else
seq->sum = (PEL *)
IM_ARRAY( out, IM_IMAGE_N_ELEMENTS( in ), double );
if( !seq->ir || !seq->sum ) {
conv_stop( seq, in, conv );
return( NULL );
}
return( (void *) seq );
}
/* What we do for every point in the mask, for each pixel.
*/
#define VERTICAL_CONV { z -= 1; li -= lskip; sum += coeff[z] * vfrom[li]; }
#define HORIZONTAL_CONV { z -= 1; li -= bands; sum += coeff[z] * hfrom[li]; }
/* INT and FLOAT inner loops.
*/
#define CONV_INT( TYPE, IM_CLIP ) { \
TYPE *vfrom; \
int *vto; \
int *hfrom; \
TYPE *hto; \
\
/* Convolve to sum array. We convolve the full width of \
* this input line. \
*/ \
vfrom = (TYPE *) IM_REGION_ADDR( ir, le, y ); \
vto = (int *) seq->sum; \
for( x = 0; x < isz; x++ ) { \
int sum; \
\
z = conv->size; \
li = lskip * z; \
sum = 0; \
\
IM_UNROLL( z, VERTICAL_CONV ); \
\
sum = ((sum + rounding) / mask->scale) + mask->offset; \
\
vto[x] = sum; \
vfrom += 1; \
} \
\
/* Convolve sums to output. \
*/ \
hfrom = (int *) seq->sum; \
hto = (TYPE *) IM_REGION_ADDR( or, le, y ); \
for( x = 0; x < osz; x++ ) { \
int sum; \
\
z = conv->size; \
li = bands * z; \
sum = 0; \
\
IM_UNROLL( z, HORIZONTAL_CONV ); \
\
sum = ((sum + rounding) / mask->scale) + mask->offset; \
\
IM_CLIP; \
\
hto[x] = sum; \
hfrom += 1; \
} \
}
#define CONV_FLOAT( TYPE ) { \
TYPE *vfrom; \
double *vto; \
double *hfrom; \
TYPE *hto; \
\
/* Convolve to sum array. We convolve the full width of \
* this input line. \
*/ \
vfrom = (TYPE *) IM_REGION_ADDR( ir, le, y ); \
vto = (double *) seq->sum; \
for( x = 0; x < isz; x++ ) { \
double sum; \
\
z = conv->size; \
li = lskip * z; \
sum = 0; \
\
IM_UNROLL( z, VERTICAL_CONV ); \
\
vto[x] = sum; \
vfrom += 1; \
} \
\
/* Convolve sums to output. \
*/ \
hfrom = (double *) seq->sum; \
hto = (TYPE *) IM_REGION_ADDR( or, le, y ); \
for( x = 0; x < osz; x++ ) { \
double sum; \
\
z = conv->size; \
li = bands * z; \
sum = 0; \
\
IM_UNROLL( z, HORIZONTAL_CONV ); \
\
sum = (sum / conv->scale) + mask->offset; \
\
hto[x] = sum; \
hfrom += 1; \
} \
}
/* Convolve!
*/
static int
conv_gen( REGION *or, void *vseq, void *a, void *b )
{
ConvSequence *seq = (ConvSequence *) vseq;
IMAGE *in = (IMAGE *) a;
Conv *conv = (Conv *) b;
REGION *ir = seq->ir;
INTMASK *mask = conv->mask;
/* You might think this should be (scale+1)/2, but then we'd be adding
* one for scale == 1.
*/
int rounding = mask->scale / 2;
int bands = in->Bands;
int *coeff = conv->mask->coeff;
Rect *r = &or->valid;
int le = r->left;
int to = r->top;
int bo = IM_RECT_BOTTOM(r);
int osz = IM_REGION_N_ELEMENTS( or );
Rect s;
int lskip;
int isz;
int x, y, z, li;
/* Prepare the section of the input image we need. A little larger
* than the section of the output image we are producing.
*/
s = *r;
s.width += conv->size - 1;
s.height += conv->size - 1;
if( im_prepare( ir, &s ) )
return( -1 );
lskip = IM_REGION_LSKIP( ir ) / IM_IMAGE_SIZEOF_ELEMENT( in );
isz = IM_REGION_N_ELEMENTS( ir );
for( y = to; y < bo; y++ ) {
switch( in->BandFmt ) {
case IM_BANDFMT_UCHAR:
CONV_INT( unsigned char, IM_CLIP_UCHAR( sum, seq ) );
break;
case IM_BANDFMT_CHAR:
CONV_INT( signed char, IM_CLIP_CHAR( sum, seq ) );
break;
case IM_BANDFMT_USHORT:
CONV_INT( unsigned short, IM_CLIP_USHORT( sum, seq ) );
break;
case IM_BANDFMT_SHORT:
CONV_INT( signed short, IM_CLIP_SHORT( sum, seq ) );
break;
case IM_BANDFMT_UINT:
CONV_INT( unsigned int, IM_CLIP_NONE( sum, seq ) );
break;
case IM_BANDFMT_INT:
CONV_INT( signed int, IM_CLIP_NONE( sum, seq ) );
break;
case IM_BANDFMT_FLOAT:
CONV_FLOAT( float );
break;
case IM_BANDFMT_DOUBLE:
CONV_FLOAT( double );
break;
default:
assert( 0 );
}
}
return( 0 );
}
int
im_convsep_raw( IMAGE *in, IMAGE *out, INTMASK *mask )
{
Conv *conv;
/* Check parameters.
*/
if( !in || in->Coding != IM_CODING_NONE || im_iscomplex( in ) ) {
im_error( "im_convsep", "%s", _( "non-complex uncoded only" ) );
return( -1 );
}
if( !mask || mask->xsize > 1000 || mask->ysize > 1000 ||
mask->xsize <= 0 || mask->ysize <= 0 || !mask->coeff ||
mask->scale == 0 ) {
im_error( "im_convsep", "%s", _( "nonsense mask parameters" ) );
return( -1 );
}
if( mask->xsize != 1 && mask->ysize != 1 ) {
im_error( "im_convsep",
"%s", _( "expect 1xN or Nx1 input mask" ) );
return( -1 );
}
if( im_piocheck( in, out ) )
return( -1 );
if( !(conv = conv_new( in, out, mask )) )
return( -1 );
/* Prepare output. Consider a 7x7 mask and a 7x7 image --- the output
* would be 1x1.
*/
if( im_cp_desc( out, in ) )
return( -1 );
out->Xsize -= conv->size - 1;
out->Ysize -= conv->size - 1;
if( out->Xsize <= 0 || out->Ysize <= 0 ) {
im_error( "im_convsep", "%s", _( "image too small for mask" ) );
return( -1 );
}
/* SMALLTILE seems the fastest in benchmarks.
*/
if( im_demand_hint( out, IM_SMALLTILE, in, NULL ) ||
im_generate( out, conv_start, conv_gen, conv_stop, in, conv ) )
return( -1 );
out->Xoffset = -mask->xsize / 2;
out->Yoffset = -mask->ysize / 2;
return( 0 );
}
/* The above, with a border to make out the same size as in.
*/
int
im_convsep( IMAGE *in, IMAGE *out, INTMASK *mask )
{
IMAGE *t1 = im_open_local( out, "im_convsep intermediate", "p" );
int size = mask->xsize * mask->ysize;
if( !t1 ||
im_embed( in, t1, 1, size / 2, size / 2,
in->Xsize + size - 1,
in->Ysize + size - 1 ) ||
im_convsep_raw( t1, out, mask ) )
return( -1 );
out->Xoffset = 0;
out->Yoffset = 0;
return( 0 );
}