From 0aa9f81c3075e7d6041f65f31bd07dbaf40113f8 Mon Sep 17 00:00:00 2001 From: John Cupitt Date: Mon, 6 Jun 2011 17:35:32 +0100 Subject: [PATCH] started im_aconv() started hacking non-separable version of im_aconvsep() --- TODO | 43 +- libvips/convolution/im_aconv.c | 917 +++++++++++++++++++++++++++++++++ 2 files changed, 955 insertions(+), 5 deletions(-) create mode 100644 libvips/convolution/im_aconv.c diff --git a/TODO b/TODO index db2fd347..636a0364 100644 --- a/TODO +++ b/TODO @@ -1,4 +1,3 @@ - - revisit orc conv use an 8.8 accumulator ... build the scale into the 8.8 coeffs ... no div at @@ -8,22 +7,56 @@ + + + - also VipsFormat ... could this replace vips_image_new_from_string()? or could we call this from vips_image_new_from_string()? at the moment, VipsFormat subclasses never get made, we just use the classes ... we'd need to start making real vipsformat objects for this to work - how much effort would this be to change? + how would this work? + + at the moment we have + + image = vips_image_new_from_file( filename ); + + build a VipsImage with filename "r" + + we also have the new CLI thing + + obj = vips_object_new_from_string( class, str ); + + calls class->new_from_string( first-component(str) ), then sets + optional args from rest-of-str(str), then does _build() + + image-class->new_from_string() just make a vipsimage "r" str + + the _build() uses VipsFormat() to load via im_tiff2vips() or whatever + + so ... we could change vips_image_new_from_file() to be + + VIPS_IMAGE( vips_object_new_from_string( VipsImageClass, str ) ) + + we could also make VipsImage::new_from_string() make a real VipsFormat + object, then load options could be set from the str + + how does save work? we call image-class->output_to_arg(obj, str), which in + turn calls vips_image_write(), which in turn uses VipsFormat + + + +- make a vips8 binding for Python as well -- make something for Python as well + use ctypes and not swig so we get an easy Win version + + wrap new API for C++ -use ctypes and not swig so we get an easy Win version -wrap new API for C++ diff --git a/libvips/convolution/im_aconv.c b/libvips/convolution/im_aconv.c new file mode 100644 index 00000000..57aecaa1 --- /dev/null +++ b/libvips/convolution/im_aconv.c @@ -0,0 +1,917 @@ +/* im_aconv ... approximate convolution + * + * This operation does an approximate convolution. + * + * Author: John Cupitt & Nicolas Robidoux + * Written on: 31/5/11 + * Modified on: + * 31/5/11 + * - from im_aconvsep() + */ + +/* + + This file is part of VIPS. + + VIPS is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + */ + +/* + + These files are distributed with VIPS - http://www.vips.ecs.soton.ac.uk + + */ + +/* + + See: + + http://incubator.quasimondo.com/processing/stackblur.pde + + This thing is a little like stackblur, but generalised to any separable + mask. + + */ + +/* + + TODO + + - block average masks by 2, 3, 4 .... before calling boxes_new(), + then boxes_new() just makes 1 line high lines, not boxes + + - use the downsample factor to set box height for the vertical + component + + - are we handling mask offset correctly? + + */ + +/* Show sample pixels as they are transformed. +#define DEBUG_PIXELS + */ + +/* +#define DEBUG +#define VIPS_DEBUG + */ + +#ifdef HAVE_CONFIG_H +#include +#endif /*HAVE_CONFIG_H*/ +#include + +#include +#include +#include +#include + +#include +#include +#include + +#ifdef WITH_DMALLOC +#include +#endif /*WITH_DMALLOC*/ + +/* Maximum number of boxes we can break the mask into. + */ +#define MAX_LINES (1000) + +/* Euclid's algorithm. Use this to common up mults. + */ +static int +gcd( int a, int b ) +{ + if( b == 0 ) + return( abs( a ) ); + else + return( gcd( b, a % b ) ); +} + +/* A set of lines. + */ +typedef struct _Lines { + struct _Boxes *boxes; + + int n_lines; + + /* Start is the left-most pixel in the line, end is one beyond the + * right-most pixel. + */ + int start[MAX_LINES]; + int end[MAX_LINES]; + + /* Integer scale factor for each line. + */ + int factor[MAX_LINES]; + + /* Band to read/write the sum from. + */ + int band[MAX_LINES]; +} Lines; + +/* A set of boxes. Each box is formed from a pair of lines. + */ +typedef struct _Boxes { + /* Copy of our arguments. + */ + IMAGE *in; + IMAGE *out; + DOUBLEMASK *mask; + int n_layers; + + int area; + int rounding; + + Lines hlines; + Lines vlines; +} Boxes; + +static void +line_start( Lines *lines, int x, int factor, int band ) +{ + lines->start[lines->n_lines] = x; + lines->factor[lines->n_lines] = factor; + lines->band[lines->n_lines] = band; +} + +static int +line_end( Lines *lines, int x ) +{ + lines->end[lines->n_lines] = x; + + if( lines->n_lines >= MAX_LINES - 1 ) { + vips_error( "im_aconv", "%s", _( "mask too complex" ) ); + return( -1 ); + } + lines->n_lines += 1; + + return( 0 ); +} + +/* Break a mask into boxes. + */ +static Boxes * +boxes_new( IMAGE *in, IMAGE *out, DOUBLEMASK *mask, int n_layers ) +{ + const int size = mask->xsize * mask->ysize; + + Boxes *boxes; + double max; + double min; + double depth; + double sum; + int layers_above; + int layers_below; + int band_offset; + int z, n, x; + + /* Check parameters. + */ + if( im_piocheck( in, out ) || + im_check_uncoded( "im_aconv", in ) || + vips_check_dmask_1d( "im_aconv", mask ) ) + return( NULL ); + + if( !(boxes = VIPS_NEW( out, Boxes )) ) + return( NULL ); + boxes->in = in; + boxes->out = out; + if( !(boxes->mask = (DOUBLEMASK *) im_local( out, + (im_construct_fn) im_dup_dmask, + (im_callback_fn) im_free_dmask, mask, mask->filename, NULL )) ) + return( NULL ); + boxes->n_layers = n_layers; + + boxes->hlines.boxes = boxes; + boxes->hlines.n_lines = 0; + boxes->vlines.boxes = boxes; + boxes->vlines.n_lines = 0; + + VIPS_DEBUG_MSG( "boxes_new: breaking into %d layers ...\n", n_layers ); + + /* Find mask range. We must always include the zero axis in the mask. + */ + max = 0; + min = 0; + for( n = 0; n < size; n++ ) { + if( mask->coeff[n] > max ) + max = mask->coeff[n]; + if( mask->coeff[n] < min ) + min = mask->coeff[n]; + } + + /* The zero axis must fall on a layer boundary. Estimate the + * depth, find n-lines-above-zero, get exact depth, then calculate a + * fixed n-lines which includes any negative parts. + */ + depth = (max - min) / n_layers; + layers_above = ceil( max / depth ); + depth = max / layers_above; + layers_below = floor( min / depth ); + n_layers = layers_above - layers_below; + + VIPS_DEBUG_MSG( "depth = %g, n_layers = %d\n", depth, n_layers ); + + /* For each layer, generate a set of lines which are inside the + * perimeter. Work down from the top. + */ + band_offset = 0; + for( z = 0; z < n_layers; z++ ) { + /* How deep we are into the mask, as a double we can test + * against. Add half the layer depth so we can easily find >50% + * mask elements. + */ + double z_ph = max - (1 + z) * depth + depth / 2; + + /* Odd, but we must avoid rounding errors that make us miss 0 + * in the line above. + */ + int z_positive = z < layers_above; + + for( y = 0; y < mask->ysize; y++ ) { + int inside; + + /* Start outside the perimeter. + */ + inside = 0; + + for( x = 0; x < mask->xsize; x++ ) { + double coeff = mask->coeff[x + y * mask->xsize]; + + /* The vertical line from mask[x, y] to 0 is + * inside. Is our current square (x, y) part + * of that line? + */ + if( (y_positive && coeff >= y_ph) || + (!y_positive && coeff <= y_ph) ) { + if( !inside ) { + line_start( lines, x, + y_positive ? 1 : -1 ); + inside = 1; + } + } + else { + if( inside ) { + line_end( lines, x ); + inside = 0; + } + } + } + + if( inside && + line_end( lines, width ) ) + return( NULL ); + } + } + + /* Can we common up any lines? Search for lines with identical + * start/end. + */ + for( z = 0; z < lines->n_lines; z++ ) { + for( n = z + 1; n < lines->n_lines; n++ ) { + if( lines->start[z] == lines->start[n] && + lines->end[z] == lines->end[n] ) { + lines->factor[z] += lines->factor[n]; + + /* n can be deleted. Do this in a separate + * pass below. + */ + lines->factor[n] = 0; + } + } + } + + /* Now we can remove all factor 0 lines. + */ + for( z = 0; z < lines->n_lines; z++ ) { + if( lines->factor[z] == 0 ) { + for( x = z; x < lines->n_lines; x++ ) { + lines->start[x] = lines->start[x + 1]; + lines->end[x] = lines->end[x + 1]; + lines->factor[x] = lines->factor[x + 1]; + } + lines->n_lines -= 1; + } + } + + /* Find the area of the lines. + */ + lines->area = 0; + for( z = 0; z < lines->n_lines; z++ ) + lines->area += lines->factor[z] * + (lines->end[z] - lines->start[z]); + + /* Strength reduction: if all lines are divisible by n, we can move + * that n out into the ->area factor. The aim is to produce as many + * factor 1 lines as we can and to reduce the chance of overflow. + */ + x = lines->factor[0]; + for( z = 1; z < lines->n_lines; z++ ) + x = gcd( x, lines->factor[z] ); + for( z = 0; z < lines->n_lines; z++ ) + lines->factor[z] /= x; + lines->area *= x; + + /* Find the area of the original mask. + */ + sum = 0; + for( z = 0; z < width; z++ ) + sum += mask->coeff[z]; + + lines->area = rint( sum * lines->area / mask->scale ); + lines->rounding = (lines->area + 1) / 2 + mask->offset * lines->area; + + /* ASCII-art layer drawing. + printf( "lines:\n" ); + for( z = 0; z < lines->n_lines; z++ ) { + printf( "%3d - %2d x ", z, lines->factor[z] ); + for( x = 0; x < 55; x++ ) { + int rx = x * (width + 1) / 55; + + if( rx >= lines->start[z] && rx < lines->end[z] ) + printf( "#" ); + else + printf( " " ); + } + printf( " %3d .. %3d\n", lines->start[z], lines->end[z] ); + } + printf( "area = %d\n", lines->area ); + printf( "rounding = %d\n", lines->rounding ); + */ + + return( lines ); +} + +/* Our sequence value. + */ +typedef struct { + Lines *lines; + REGION *ir; /* Input region */ + + int *start; /* Offsets for start and stop */ + int *end; + + /* The sums for each line. int for integer types, double for floating + * point types. + */ + void *sum; + + int last_stride; /* Avoid recalcing offsets, if we can */ +} LinesSequence; + +/* Free a sequence value. + */ +static int +lines_stop( void *vseq, void *a, void *b ) +{ + LinesSequence *seq = (LinesSequence *) vseq; + + IM_FREEF( im_region_free, seq->ir ); + + return( 0 ); +} + +/* Convolution start function. + */ +static void * +lines_start( IMAGE *out, void *a, void *b ) +{ + IMAGE *in = (IMAGE *) a; + Lines *lines = (Lines *) b; + + LinesSequence *seq; + + if( !(seq = IM_NEW( out, LinesSequence )) ) + return( NULL ); + + /* Init! + */ + seq->lines = lines; + seq->ir = im_region_create( in ); + seq->start = IM_ARRAY( out, lines->n_lines, int ); + seq->end = IM_ARRAY( out, lines->n_lines, int ); + if( vips_band_format_isint( out->BandFmt ) ) + seq->sum = IM_ARRAY( out, lines->n_lines, int ); + else + seq->sum = IM_ARRAY( out, lines->n_lines, double ); + seq->last_stride = -1; + + if( !seq->ir || !seq->start || !seq->end || !seq->sum ) { + lines_stop( seq, in, lines ); + return( NULL ); + } + + return( seq ); +} + +#define CLIP_UCHAR( V ) \ +G_STMT_START { \ + if( (V) < 0 ) \ + (V) = 0; \ + else if( (V) > UCHAR_MAX ) \ + (V) = UCHAR_MAX; \ +} G_STMT_END + +#define CLIP_CHAR( V ) \ +G_STMT_START { \ + if( (V) < SCHAR_MIN ) \ + (V) = SCHAR_MIN; \ + else if( (V) > SCHAR_MAX ) \ + (V) = SCHAR_MAX; \ +} G_STMT_END + +#define CLIP_USHORT( V ) \ +G_STMT_START { \ + if( (V) < 0 ) \ + (V) = 0; \ + else if( (V) > USHRT_MAX ) \ + (V) = USHRT_MAX; \ +} G_STMT_END + +#define CLIP_SHORT( V ) \ +G_STMT_START { \ + if( (V) < SHRT_MIN ) \ + (V) = SHRT_MIN; \ + else if( (V) > SHRT_MAX ) \ + (V) = SHRT_MAX; \ +} G_STMT_END + +#define CLIP_NONE( V ) {} + +/* The h and v loops are very similar, but also annoyingly different. Keep + * them separate for easy debugging. + */ + +#define HCONV_INT( TYPE, CLIP ) { \ + for( i = 0; i < bands; i++ ) { \ + int *seq_sum = (int *) seq->sum; \ + \ + TYPE *q; \ + TYPE *p; \ + int sum; \ + \ + p = i + (TYPE *) IM_REGION_ADDR( ir, r->left, r->top + y ); \ + q = i + (TYPE *) IM_REGION_ADDR( or, r->left, r->top + y ); \ + \ + sum = 0; \ + for( z = 0; z < lines->n_lines; z++ ) { \ + seq_sum[z] = 0; \ + for( x = lines->start[z]; x < lines->end[z]; x++ ) \ + seq_sum[z] += p[x * istride]; \ + sum += lines->factor[z] * seq_sum[z]; \ + } \ + sum = (sum + lines->rounding) / lines->area; \ + CLIP( sum ); \ + *q = sum; \ + q += ostride; \ + \ + for( x = 1; x < r->width; x++ ) { \ + sum = 0; \ + for( z = 0; z < lines->n_lines; z++ ) { \ + seq_sum[z] += p[seq->end[z]]; \ + seq_sum[z] -= p[seq->start[z]]; \ + sum += lines->factor[z] * seq_sum[z]; \ + } \ + p += istride; \ + sum = (sum + lines->rounding) / lines->area; \ + CLIP( sum ); \ + *q = sum; \ + q += ostride; \ + } \ + } \ +} + +#define HCONV_FLOAT( TYPE ) { \ + for( i = 0; i < bands; i++ ) { \ + double *seq_sum = (double *) seq->sum; \ + \ + TYPE *q; \ + TYPE *p; \ + double sum; \ + \ + p = i + (TYPE *) IM_REGION_ADDR( ir, r->left, r->top + y ); \ + q = i + (TYPE *) IM_REGION_ADDR( or, r->left, r->top + y ); \ + \ + sum = 0; \ + for( z = 0; z < lines->n_lines; z++ ) { \ + seq_sum[z] = 0; \ + for( x = lines->start[z]; x < lines->end[z]; x++ ) \ + seq_sum[z] += p[x * istride]; \ + sum += lines->factor[z] * seq_sum[z]; \ + } \ + sum = sum / lines->area + mask->offset; \ + *q = sum; \ + q += ostride; \ + \ + for( x = 1; x < r->width; x++ ) { \ + sum = 0; \ + for( z = 0; z < lines->n_lines; z++ ) { \ + seq_sum[z] += p[seq->end[z]]; \ + seq_sum[z] -= p[seq->start[z]]; \ + sum += lines->factor[z] * seq_sum[z]; \ + } \ + p += istride; \ + sum = sum / lines->area + mask->offset; \ + *q = sum; \ + q += ostride; \ + } \ + } \ +} + +/* Do horizontal masks ... we scan the mask along scanlines. + */ +static int +lines_generate_horizontal( REGION *or, void *vseq, void *a, void *b ) +{ + LinesSequence *seq = (LinesSequence *) vseq; + IMAGE *in = (IMAGE *) a; + Lines *lines = (Lines *) b; + + REGION *ir = seq->ir; + const int n_lines = lines->n_lines; + DOUBLEMASK *mask = lines->mask; + Rect *r = &or->valid; + + /* Double the bands (notionally) for complex. + */ + int bands = vips_band_format_iscomplex( in->BandFmt ) ? + 2 * in->Bands : in->Bands; + + Rect s; + int x, y, z, i; + int istride; + int ostride; + + /* Prepare the section of the input image we need. A little larger + * than the section of the output image we are producing. + */ + s = *r; + s.width += mask->xsize - 1; + s.height += mask->ysize - 1; + if( im_prepare( ir, &s ) ) + return( -1 ); + + /* Stride can be different for the vertical case, keep this here for + * ease of direction change. + */ + istride = IM_IMAGE_SIZEOF_PEL( in ) / + IM_IMAGE_SIZEOF_ELEMENT( in ); + ostride = IM_IMAGE_SIZEOF_PEL( lines->out ) / + IM_IMAGE_SIZEOF_ELEMENT( lines->out ); + + /* Init offset array. + */ + if( seq->last_stride != istride ) { + seq->last_stride = istride; + + for( z = 0; z < n_lines; z++ ) { + seq->start[z] = lines->start[z] * istride; + seq->end[z] = lines->end[z] * istride; + } + } + + for( y = 0; y < r->height; y++ ) { + switch( in->BandFmt ) { + case IM_BANDFMT_UCHAR: + HCONV_INT( unsigned char, CLIP_UCHAR ); + break; + + case IM_BANDFMT_CHAR: + HCONV_INT( signed char, CLIP_UCHAR ); + break; + + case IM_BANDFMT_USHORT: + HCONV_INT( unsigned short, CLIP_USHORT ); + break; + + case IM_BANDFMT_SHORT: + HCONV_INT( signed short, CLIP_SHORT ); + break; + + case IM_BANDFMT_UINT: + HCONV_INT( unsigned int, CLIP_NONE ); + break; + + case IM_BANDFMT_INT: + HCONV_INT( signed int, CLIP_NONE ); + break; + + case IM_BANDFMT_FLOAT: + HCONV_FLOAT( float ); + break; + + case IM_BANDFMT_DOUBLE: + HCONV_FLOAT( double ); + break; + + case IM_BANDFMT_COMPLEX: + HCONV_FLOAT( float ); + break; + + case IM_BANDFMT_DPCOMPLEX: + HCONV_FLOAT( double ); + break; + + default: + g_assert( 0 ); + } + } + + return( 0 ); +} + +#define VCONV_INT( TYPE, CLIP ) { \ + for( x = 0; x < sz; x++ ) { \ + int *seq_sum = (int *) seq->sum; \ + \ + TYPE *q; \ + TYPE *p; \ + int sum; \ + \ + p = x + (TYPE *) IM_REGION_ADDR( ir, r->left, r->top ); \ + q = x + (TYPE *) IM_REGION_ADDR( or, r->left, r->top ); \ + \ + sum = 0; \ + for( z = 0; z < lines->n_lines; z++ ) { \ + seq_sum[z] = 0; \ + for( y = lines->start[z]; y < lines->end[z]; y++ ) \ + seq_sum[z] += p[y * istride]; \ + sum += lines->factor[z] * seq_sum[z]; \ + } \ + sum = (sum + lines->rounding) / lines->area; \ + CLIP( sum ); \ + *q = sum; \ + q += ostride; \ + \ + for( y = 1; y < r->height; y++ ) { \ + sum = 0; \ + for( z = 0; z < lines->n_lines; z++ ) { \ + seq_sum[z] += p[seq->end[z]]; \ + seq_sum[z] -= p[seq->start[z]]; \ + sum += lines->factor[z] * seq_sum[z]; \ + } \ + p += istride; \ + sum = (sum + lines->rounding) / lines->area; \ + CLIP( sum ); \ + *q = sum; \ + q += ostride; \ + } \ + } \ +} + +#define VCONV_FLOAT( TYPE ) { \ + for( x = 0; x < sz; x++ ) { \ + double *seq_sum = (double *) seq->sum; \ + \ + TYPE *q; \ + TYPE *p; \ + double sum; \ + \ + p = x + (TYPE *) IM_REGION_ADDR( ir, r->left, r->top ); \ + q = x + (TYPE *) IM_REGION_ADDR( or, r->left, r->top ); \ + \ + sum = 0; \ + for( z = 0; z < lines->n_lines; z++ ) { \ + seq_sum[z] = 0; \ + for( y = lines->start[z]; y < lines->end[z]; y++ ) \ + seq_sum[z] += p[y * istride]; \ + sum += lines->factor[z] * seq_sum[z]; \ + } \ + sum = sum / lines->area + mask->offset; \ + *q = sum; \ + q += ostride; \ + \ + for( y = 1; y < r->height; y++ ) { \ + sum = 0; \ + for( z = 0; z < lines->n_lines; z++ ) { \ + seq_sum[z] += p[seq->end[z]]; \ + seq_sum[z] -= p[seq->start[z]]; \ + sum += lines->factor[z] * seq_sum[z]; \ + } \ + p += istride; \ + sum = sum / lines->area + mask->offset; \ + *q = sum; \ + q += ostride; \ + } \ + } \ +} + +/* Do vertical masks ... we scan the mask down columns of pixels. Copy-paste + * from above with small changes. + */ +static int +lines_generate_vertical( REGION *or, void *vseq, void *a, void *b ) +{ + LinesSequence *seq = (LinesSequence *) vseq; + IMAGE *in = (IMAGE *) a; + Lines *lines = (Lines *) b; + + REGION *ir = seq->ir; + const int n_lines = lines->n_lines; + DOUBLEMASK *mask = lines->mask; + Rect *r = &or->valid; + + /* Double the width (notionally) for complex. + */ + int sz = vips_band_format_iscomplex( in->BandFmt ) ? + 2 * IM_REGION_N_ELEMENTS( or ) : IM_REGION_N_ELEMENTS( or ); + + Rect s; + int x, y, z; + int istride; + int ostride; + + /* Prepare the section of the input image we need. A little larger + * than the section of the output image we are producing. + */ + s = *r; + s.width += mask->xsize - 1; + s.height += mask->ysize - 1; + if( im_prepare( ir, &s ) ) + return( -1 ); + + /* Stride can be different for the vertical case, keep this here for + * ease of direction change. + */ + istride = IM_REGION_LSKIP( ir ) / + IM_IMAGE_SIZEOF_ELEMENT( lines->in ); + ostride = IM_REGION_LSKIP( or ) / + IM_IMAGE_SIZEOF_ELEMENT( lines->out ); + + /* Init offset array. + */ + if( seq->last_stride != istride ) { + seq->last_stride = istride; + + for( z = 0; z < n_lines; z++ ) { + seq->start[z] = lines->start[z] * istride; + seq->end[z] = lines->end[z] * istride; + } + } + + switch( in->BandFmt ) { + case IM_BANDFMT_UCHAR: + VCONV_INT( unsigned char, CLIP_UCHAR ); + break; + + case IM_BANDFMT_CHAR: + VCONV_INT( signed char, CLIP_UCHAR ); + break; + + case IM_BANDFMT_USHORT: + VCONV_INT( unsigned short, CLIP_USHORT ); + break; + + case IM_BANDFMT_SHORT: + VCONV_INT( signed short, CLIP_SHORT ); + break; + + case IM_BANDFMT_UINT: + VCONV_INT( unsigned int, CLIP_NONE ); + break; + + case IM_BANDFMT_INT: + VCONV_INT( signed int, CLIP_NONE ); + break; + + case IM_BANDFMT_FLOAT: + VCONV_FLOAT( float ); + break; + + case IM_BANDFMT_DOUBLE: + VCONV_FLOAT( double ); + break; + + case IM_BANDFMT_COMPLEX: + VCONV_FLOAT( float ); + break; + + case IM_BANDFMT_DPCOMPLEX: + VCONV_FLOAT( double ); + break; + + default: + g_assert( 0 ); + } + + return( 0 ); +} + +static int +aconv_raw( IMAGE *in, IMAGE *out, DOUBLEMASK *mask, int n_layers ) +{ + Lines *lines; + im_generate_fn generate; + +#ifdef DEBUG + printf( "aconv_raw: starting with matrix:\n" ); + im_print_dmask( mask ); +#endif /*DEBUG*/ + + if( !(lines = lines_new( in, out, mask, n_layers )) ) + return( -1 ); + + /* Prepare output. Consider a 7x7 mask and a 7x7 image --- the output + * would be 1x1. + */ + if( im_cp_desc( out, in ) ) + return( -1 ); + out->Xsize -= mask->xsize - 1; + out->Ysize -= mask->ysize - 1; + if( out->Xsize <= 0 || out->Ysize <= 0 ) { + im_error( "im_aconv", "%s", _( "image too small for mask" ) ); + return( -1 ); + } + + if( mask->xsize == 1 ) + generate = lines_generate_vertical; + else + generate = lines_generate_horizontal; + + if( im_demand_hint( out, IM_SMALLTILE, in, NULL ) || + im_generate( out, + lines_start, generate, lines_stop, in, lines ) ) + return( -1 ); + + out->Xoffset = -mask->xsize / 2; + out->Yoffset = -mask->ysize / 2; + + return( 0 ); +} + +/** + * im_aconv: + * @in: input image + * @out: output image + * @mask: convolution mask + * @n_layers: number of layers for approximation + * + * Perform an approximate separable convolution of @in with @mask. + * + * The mask must be 1xn or nx1 elements. + * The output image + * always has the same #VipsBandFmt as the input image. + * + * The image is convolved twice: once with @mask and then again with @mask + * rotated by 90 degrees. + * + * Larger values for @n_layers give more accurate + * results, but are slower. As @n_layers approaches the mask radius, the + * accuracy will become close to exact convolution and the speed will drop to + * match. For many large masks, such as Gaussian, @n_layers need be only 10% of + * this value and accuracy will still be good. + * + * See also: im_convsep_f(), im_create_dmaskv(). + * + * Returns: 0 on success, -1 on error + */ +int +im_aconv( IMAGE *in, IMAGE *out, DOUBLEMASK *mask, int n_layers ) +{ + IMAGE *t[2]; + const int n_mask = mask->xsize * mask->ysize; + DOUBLEMASK *rmask; + + if( im_open_local_array( out, t, 2, "im_aconv", "p" ) || + !(rmask = (DOUBLEMASK *) im_local( out, + (im_construct_fn) im_dup_dmask, + (im_callback_fn) im_free_dmask, mask, mask->filename, NULL )) ) + return( -1 ); + + rmask->xsize = mask->ysize; + rmask->ysize = mask->xsize; + + /* + */ + if( im_embed( in, t[0], 1, n_mask / 2, n_mask / 2, + in->Xsize + n_mask - 1, in->Ysize + n_mask - 1 ) || + aconv_raw( t[0], t[1], mask, n_layers ) || + aconv_raw( t[1], out, rmask, n_layers ) ) + return( -1 ); + + /* For testing .. just try one direction. + if( aconv_raw( in, out, mask, n_layers ) ) + return( -1 ); + */ + + out->Xoffset = 0; + out->Yoffset = 0; + + return( 0 ); +} +