diff --git a/.travis.yml b/.travis.yml index f105dad2..27aab72d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,7 @@ language: cpp before_script: - - ./bootstrap.sh + - ./autogen.sh - ./configure --disable-dependency-tracking --with-jpeg-includes=$JPEG/include diff --git a/ChangeLog b/ChangeLog index dc416a0c..697dbd6a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -29,9 +29,14 @@ - better upsizing with vips_resize() - add imagemagick v7 support, thanks sachinwalia2k8 - added vips_worley(), vips_perlin() noise generators +- added vips_convf(), vips_convi(), vips_convasep(), vips_conva() ... + im_conv*() functions rewritten as classes +- vips_convsep() calls vips_convasep() for the approximate case +- new fixed-point vector path for convi is up to about 2x faster - gif loader can write 1, 2, 3, or 4 bands depending on file contents - support --strip for pngsave - add svgz support [Felix Bünemann] +- rename boostrap.sh -> autogen.sh to help snapcraft 30/7/16 started 8.3.3 - fix performance regression in 8.3.2, thanks Lovell diff --git a/README.md b/README.md index bb8c6e4f..bbac874b 100644 --- a/README.md +++ b/README.md @@ -80,7 +80,7 @@ and `gobject-introspection`, see the dependencies section below. For example: Then build the build system with: - $ ./bootstrap.sh + $ ./autogen.sh Debug build: diff --git a/TODO b/TODO index 4102778e..107925ec 100644 --- a/TODO +++ b/TODO @@ -16,18 +16,7 @@ all with file and buffer source -- try: - - $ vips avg broken.jpg[fail] - - about 50% of the time it'll trigger a range of out-of-order reads and lock - for 10s or so while seq times out - -- add more webp tests to py suite - -- try moving some more of the CLI tests to py - - +- add APPROX convsep test? - add more webp tests to py suite diff --git a/bootstrap.sh b/autogen.sh similarity index 100% rename from bootstrap.sh rename to autogen.sh diff --git a/libvips/conversion/sequential.c b/libvips/conversion/sequential.c index c6a6bbfc..62b19c35 100644 --- a/libvips/conversion/sequential.c +++ b/libvips/conversion/sequential.c @@ -184,7 +184,7 @@ vips_sequential_generate( VipsRegion *or, /* Exit the loop on timeout or condition passes. We have to * be wary of spurious wakeups. */ - while( r->top > sequential->y_pos ) + while( r->top > sequential->y_pos ) { #ifdef HAVE_COND_INIT if( !g_cond_wait_until( sequential->ready, sequential->lock, time ) ) @@ -195,6 +195,14 @@ vips_sequential_generate( VipsRegion *or, break; #endif + /* We may have woken up because of an eval error. + */ + if( sequential->error ) { + g_mutex_unlock( sequential->lock ); + return( -1 ); + } + } + VIPS_GATE_STOP( "vips_sequential_generate: wait" ); VIPS_DEBUG_MSG_GREEN( "thread %p awake again ...\n", @@ -220,7 +228,7 @@ vips_sequential_generate( VipsRegion *or, area.width = 1; area.height = r->top - sequential->y_pos; if( vips_region_prepare( ir, &area ) ) { - VIPS_DEBUG_MSG( "thread %p error, unlocking ...\n", + VIPS_DEBUG_MSG( "thread %p error, unlocking #1 ...\n", g_thread_self() ); sequential->error = -1; g_cond_broadcast( sequential->ready ); @@ -237,7 +245,7 @@ vips_sequential_generate( VipsRegion *or, VIPS_DEBUG_MSG_GREEN( "thread %p reading ...\n", g_thread_self() ); if( vips_region_prepare( ir, r ) || vips_region_region( or, ir, r, r->left, r->top ) ) { - VIPS_DEBUG_MSG( "thread %p error, unlocking ...\n", + VIPS_DEBUG_MSG( "thread %p error, unlocking #2 ...\n", g_thread_self() ); sequential->error = -1; g_cond_broadcast( sequential->ready ); diff --git a/libvips/convolution/Makefile.am b/libvips/convolution/Makefile.am index 32f6eec1..d6346aa5 100644 --- a/libvips/convolution/Makefile.am +++ b/libvips/convolution/Makefile.am @@ -6,15 +6,15 @@ libconvolution_la_SOURCES = \ correlation.c \ correlation.h \ conv.c \ + conva.c \ + convf.c \ + convi.c \ + convasep.c \ convsep.c \ compass.c \ fastcor.c \ spcor.c \ sharpen.c \ - gaussblur.c \ - im_aconv.c \ - im_aconvsep.c \ - im_conv.c \ - im_conv_f.c + gaussblur.c AM_CPPFLAGS = -I${top_srcdir}/libvips/include @VIPS_CFLAGS@ @VIPS_INCLUDES@ diff --git a/libvips/convolution/conv.c b/libvips/convolution/conv.c index ff7dfd51..1f247b6a 100644 --- a/libvips/convolution/conv.c +++ b/libvips/convolution/conv.c @@ -66,18 +66,15 @@ vips_conv_build( VipsObject *object ) VipsObjectClass *class = VIPS_OBJECT_GET_CLASS( object ); VipsConvolution *convolution = (VipsConvolution *) object; VipsConv *conv = (VipsConv *) object; - VipsImage **t = (VipsImage **) - vips_object_local_array( object, 4 ); + VipsImage **t = (VipsImage **) vips_object_local_array( object, 4 ); VipsImage *in; - INTMASK *imsk; - DOUBLEMASK *dmsk; - - g_object_set( conv, "out", vips_image_new(), NULL ); if( VIPS_OBJECT_CLASS( vips_conv_parent_class )->build( object ) ) return( -1 ); + g_object_set( conv, "out", vips_image_new(), NULL ); + in = convolution->in; /* @@ -85,13 +82,6 @@ vips_conv_build( VipsObject *object ) vips_matrixprint( convolution->M, NULL ); */ - if( !(imsk = im_vips2imask( convolution->M, class->nickname )) || - !im_local_imask( convolution->out, imsk ) ) - return( -1 ); - if( !(dmsk = im_vips2mask( convolution->M, class->nickname )) || - !im_local_dmask( convolution->out, dmsk ) ) - return( -1 ); - /* Unpack for processing. */ if( vips_image_decode( in, &t[0] ) ) @@ -99,20 +89,30 @@ vips_conv_build( VipsObject *object ) in = t[0]; switch( conv->precision ) { - case VIPS_PRECISION_INTEGER: - if( im_conv( in, convolution->out, imsk ) ) + case VIPS_PRECISION_FLOAT: + if( vips_convf( in, &t[1], convolution->M, NULL ) || + vips_image_write( t[1], convolution->out ) ) return( -1 ); break; - case VIPS_PRECISION_FLOAT: - if( im_conv_f( in, convolution->out, dmsk ) ) + case VIPS_PRECISION_INTEGER: + if( vips_convi( in, &t[1], convolution->M, NULL ) || + vips_image_write( t[1], convolution->out ) ) return( -1 ); break; case VIPS_PRECISION_APPROXIMATE: +{ + DOUBLEMASK *dmsk; + + if( !(dmsk = im_vips2mask( convolution->M, class->nickname )) || + !im_local_dmask( convolution->out, dmsk ) ) + return( -1 ); + if( im_aconv( in, convolution->out, dmsk, conv->layers, conv->cluster ) ) return( -1 ); +} break; default: @@ -175,31 +175,40 @@ vips_conv_init( VipsConv *conv ) * * Optional arguments: * - * * @precision: calculation accuracy - * * @layers: number of layers for approximation - * * @cluster: cluster lines closer than this distance + * * @precision: #VipsPrecision, calculation accuracy + * * @layers: %gint, number of layers for approximation + * * @cluster: %gint, cluster lines closer than this distance * * Convolution. * * Perform a convolution of @in with @mask. - * Each output pixel is - * calculated as sigma[i]{pixel[i] * mask[i]} / scale + offset, where scale - * and offset are part of @mask. + * Each output pixel is calculated as: * - * If @precision is #VIPS_PRECISION_INTEGER then the convolution is performed - * with integer arithmetic and the output image + * |[ + * sigma[i]{pixel[i] * mask[i]} / scale + offset + * ]| + * + * where scale and offset are part of @mask. + * + * If @precision is #VIPS_PRECISION_INTEGER, then + * elements of @mask are converted to + * integers before convolution, using rint(), + * and the output image * always has the same #VipsBandFormat as the input image. * - * Convolutions on unsigned 8-bit images are calculated with the - * processor's vector unit, if possible. Disable this with --vips-novector or - * IM_NOVECTOR. + * For #VIPS_FORMAT_UCHAR images, vips_conv() uses a fast vector path based on + * fixed-point arithmetic. This can produce slightly different results. + * Disable the vector path with `--vips-novector` or `VIPS_NOVECTOR` or + * vips_vector_set_enabled(). * * If @precision is #VIPS_PRECISION_FLOAT then the convolution is performed * with floating-point arithmetic. The output image - * is always %VIPS_FORMAT_FLOAT unless @in is %VIPS_FORMAT_DOUBLE, in which case - * @out is also %VIPS_FORMAT_DOUBLE. + * is always #VIPS_FORMAT_FLOAT unless @in is #VIPS_FORMAT_DOUBLE, in which case + * @out is also #VIPS_FORMAT_DOUBLE. * - * If @precision is #VIPS_PRECISION_APPROXIMATE then the output image + * If @precision is #VIPS_PRECISION_APPROXIMATE then, like + * #VIPS_PRECISION_INTEGER, @mask is converted to int before convolution, and + * the output image * always has the same #VipsBandFormat as the input image. * * Larger values for @layers give more accurate @@ -211,6 +220,8 @@ vips_conv_init( VipsConv *conv ) * Smaller values of @cluster will give more accurate results, but be slower * and use more memory. 10% of the mask radius is a good rule of thumb. * + * See also: vips_convsep(). + * * Returns: 0 on success, -1 on error */ int diff --git a/libvips/convolution/conva.c b/libvips/convolution/conva.c new file mode 100644 index 00000000..a98b7552 --- /dev/null +++ b/libvips/convolution/conva.c @@ -0,0 +1,1360 @@ +/* conva ... approximate convolution + * + * This operation does an approximate convolution. + * + * Author: John Cupitt & Nicolas Robidoux + * Written on: 31/5/11 + * Modified on: + * 31/5/11 + * - from im_aconvsep() + * 10/7/16 + * - redone as a class + */ + +/* + + This file is part of VIPS. + + VIPS is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301 USA + + */ + +/* + + These files are distributed with VIPS - http://www.vips.ecs.soton.ac.uk + + */ + +/* + + See: + + http://incubator.quasimondo.com/processing/stackblur.pde + + This thing is a little like stackblur, but generalised to any 2D + convolution. + + */ + +/* + + TODO + +timing: + +$ time vips im_conv_f img_0075.jpg x2.v g2d201.con +real 5m3.359s +user 9m34.700s +sys 0m1.500s + +$ time vips im_aconv img_0075.jpg x.v g2d201.con 10 10 +real 0m3.151s +user 0m5.640s +sys 0m0.100s + +$ vips im_subtract x.v x2.v diff.v +$ vips im_abs diff.v abs.v +$ vips im_max abs.v +2.70833 + + - are we handling mask offset correctly? + + - could we do better with an h and a v cumulativization image? we might + not need the huge intermediate we have now, since any line sum an be + found with simple indexing + + */ + +/* + */ +#define DEBUG +#define VIPS_DEBUG + +#ifdef HAVE_CONFIG_H +#include +#endif /*HAVE_CONFIG_H*/ +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "pconvolution.h" + +/* Maximum number of boxes we can break the mask into. Don't have this too + * high, it'll make the instance huge, and gobject has a 64kb limit. + */ +#define MAX_LINES (1000) + +/* The number of edges we consider at once in clustering. Higher values are + * faster, but risk pushing up average error in the result. + */ +#define MAX_EDGES (1000) + +/* A horizontal line in the mask. + */ +typedef struct _HLine { + /* Start is the left-most pixel in the line, end is one beyond the + * right-most pixel. + */ + int start; + int end; + + /* The hlines have weights. weight 0 means this line is unused. + */ + int weight; +} HLine; + +/* For clustering. A pair of hlines and their distance. An edge in a graph. + */ +typedef struct _Edge { + /* The index into boxes->hline[]. + */ + int a; + int b; + + /* The distance between them, see boxes_distance(). + */ + int d; +} Edge; + +/* An element of a vline. + */ +typedef struct _VElement { + /* band is the index into hline[] we add, row is the row we take + * it from. + */ + int band; + int row; + + /* Negative lobes are made with factor -1, we also common-up repeated + * additions of the same line. + */ + int factor; +} VElement; + +/* A vline. + */ +typedef struct _VLine { + int band; + int factor; + int start; + int end; +} VLine; + +/* A set of boxes. + */ +typedef struct { + VipsConvolution parent_instance; + + VipsImage *iM; + + int layers; + int cluster; + + int divisor; + int rounding; + int offset; + + /* The horizontal lines we gather. hline[3] writes to band 3 in the + * intermediate image. max_line is the length of the longest hline: + * over 256 and we need to use an int intermediate for 8-bit images. + */ + int n_hline; + HLine hline[MAX_LINES]; + int max_line; + + /* During clustering, the top few edges we are considering. + */ + Edge edge[MAX_EDGES]; + + /* Scale and sum a set of hlines to make the final value. + */ + int n_velement; + VElement velement[MAX_LINES]; + + /* And group those velements as vlines. + */ + int n_vline; + VLine vline[MAX_LINES]; + +} VipsConva; + +typedef VipsConvolutionClass VipsConvaClass; + +G_DEFINE_TYPE( VipsConva, vips_conva, VIPS_TYPE_CONVOLUTION ); + +/* Euclid's algorithm. Use this to common up mults. + */ +static int +gcd( int a, int b ) +{ + if( b == 0 ) + return( abs( a ) ); + else + return( gcd( b, a % b ) ); +} + +static void +vips_conva_hline_start( VipsConva *conva, int x ) +{ + conva->hline[conva->n_hline].start = x; + conva->hline[conva->n_hline].weight = 1; +} + +static int +vips_conva_hline_end( VipsConva *conva, int x, int y, int factor ) +{ + VipsObjectClass *class = VIPS_OBJECT_GET_CLASS( conva ); + + conva->hline[conva->n_hline].end = x; + + conva->velement[conva->n_velement].row = y; + conva->velement[conva->n_velement].band = conva->n_hline; + conva->velement[conva->n_velement].factor = factor; + + if( conva->n_hline >= MAX_LINES - 1 ) { + vips_error( class->nickname, "%s", _( "mask too complex" ) ); + return( -1 ); + } + conva->n_hline += 1; + + if( conva->n_velement >= MAX_LINES - 1 ) { + vips_error( class->nickname, "%s", _( "mask too complex" ) ); + return( -1 ); + } + conva->n_velement += 1; + + return( 0 ); +} + +#ifdef DEBUG +static void +vips_conva_hprint( VipsConva *conva ) +{ + int x, y; + + printf( "hlines:\n" ); + printf( " n b r f w\n" ); + for( y = 0; y < conva->n_velement; y++ ) { + int b = conva->velement[y].band; + + printf( "%4d %3d %3d %2d %3d ", + y, b, + conva->velement[y].row, + conva->velement[y].factor, + conva->hline[b].weight ); + for( x = 0; x < 45; x++ ) { + int rx = x * (conva->iM->Xsize + 1) / 45; + + if( rx >= conva->hline[b].start && + rx < conva->hline[b].end ) + printf( "#" ); + else + printf( " " ); + } + printf( " %3d .. %3d\n", + conva->hline[b].start, conva->hline[b].end ); + } +} + +static void +vips_conva_vprint( VipsConva *conva ) +{ + int y; + + printf( "%d vlines:\n", conva->n_vline ); + printf( " n b f s e\n" ); + for( y = 0; y < conva->n_vline; y++ ) + printf( "%4d %2d %2d == %3d .. %3d\n", y, + conva->vline[y].band, + conva->vline[y].factor, + conva->vline[y].start, + conva->vline[y].end ); + + printf( "divisor = %d\n", conva->divisor ); + printf( "rounding = %d\n", conva->rounding ); + printf( "offset = %d\n", conva->offset ); + printf( "max_line = %d\n", conva->max_line ); +} +#endif /*DEBUG*/ + +/* Break the mask into a set of hlines. + */ +static int +vips_conva_decompose_hlines( VipsConva *conva ) +{ + VipsImage *iM = conva->iM; + const int size = iM->Xsize * iM->Ysize; + double *coeff = VIPS_MATRIX( iM, 0, 0 ); + + double max; + double min; + double depth; + int layers_above; + int layers_below; + int z, n, x, y; + + /* Find mask range. We must always include the zero axis in the mask. + */ + max = 0; + min = 0; + for( n = 0; n < size; n++ ) { + max = VIPS_MAX( max, coeff[n] ); + min = VIPS_MIN( min, coeff[n] ); + } + + VIPS_DEBUG_MSG( "vips_conva_decompose_hlines: min = %g, max = %g\n", + min, max ); + + /* The zero axis must fall on a layer boundary. Estimate the + * depth, find n-lines-above-zero, get exact depth, then calculate a + * fixed n-lines which includes any negative parts. + */ + depth = (max - min) / conva->layers; + layers_above = VIPS_CEIL( max / depth ); + depth = max / layers_above; + layers_below = VIPS_FLOOR( min / depth ); + conva->layers = layers_above - layers_below; + + VIPS_DEBUG_MSG( "vips_conva_decompose_hlines: depth = %g, layers = %d\n", + depth, conva->layers ); + + /* For each layer, generate a set of lines which are inside the + * perimeter. Work down from the top. + */ + for( z = 0; z < conva->layers; z++ ) { + /* How deep we are into the mask, as a double we can test + * against. Add half the layer depth so we can easily find >50% + * mask elements. + */ + double z_ph = max - (1 + z) * depth + depth / 2; + + /* Odd, but we must avoid rounding errors that make us miss 0 + * in the line above. + */ + int z_positive = z < layers_above; + + for( y = 0; y < iM->Ysize; y++ ) { + int inside; + + /* Start outside the perimeter. + */ + inside = 0; + + for( x = 0; x < iM->Xsize; x++ ) { + double c = coeff[x + y * iM->Xsize]; + + /* The vertical line from mask[x, y] to 0 is + * inside. Is our current square (x, y) part + * of that line? + */ + if( (z_positive && c >= z_ph) || + (!z_positive && c <= z_ph) ) { + if( !inside ) { + vips_conva_hline_start( conva, + x ); + inside = 1; + } + } + else { + if( inside ) { + if( vips_conva_hline_end( conva, + x, y, + z_positive ? 1 : -1 ) ) + return( -1 ); + inside = 0; + } + } + } + + if( inside && + vips_conva_hline_end( conva, + iM->Xsize, y, z_positive ? 1 : -1 ) ) + return( -1 ); + } + } + +#ifdef DEBUG + VIPS_DEBUG_MSG( "vips_conva_decompose_hlines: generated %d hlines\n", + conva->n_hline ); + vips_conva_hprint( conva ); +#endif /*DEBUG*/ + + return( 0 ); +} + +/* The 'distance' between a pair of hlines. + */ +static int +vips_conva_distance( VipsConva *conva, int a, int b ) +{ + g_assert( conva->hline[a].weight > 0 && conva->hline[b].weight > 0 ); + + return( abs( conva->hline[a].start - conva->hline[b].start ) + + abs( conva->hline[a].end - conva->hline[b].end ) ); +} + +/* Merge two hlines. Line b is deleted, and any refs to b in vlines updated to + * point at a. + */ +static void +vips_conva_merge( VipsConva *conva, int a, int b ) +{ + int i; + + /* Scale weights. + */ + int fa = conva->hline[a].weight; + int fb = conva->hline[b].weight; + double w = (double) fb / (fa + fb); + + /* New endpoints. + */ + conva->hline[a].start += w * + (conva->hline[b].start - conva->hline[a].start); + conva->hline[a].end += w * + (conva->hline[b].end - conva->hline[a].end); + conva->hline[a].weight += conva->hline[b].weight; + + /* Update velement refs to b to refer to a instead. + */ + for( i = 0; i < conva->n_velement; i++ ) + if( conva->velement[i].band == b ) + conva->velement[i].band = a; + + /* Mark b to be deleted. + */ + conva->hline[b].weight = 0; +} + +static int +edge_sortfn( const void *p1, const void *p2 ) +{ + Edge *a = (Edge *) p1; + Edge *b = (Edge *) p2; + + return( a->d - b->d ); +} + +/* Cluster in batches. Return non-zero if we merged some lines. + * + * This is not as accurate as rescanning the whole space on every merge, but + * it's far faster. + */ +static int +vips_conva_cluster2( VipsConva *conva ) +{ + int i, j, k; + int worst; + int worst_i; + int merged; + + for( i = 0; i < MAX_EDGES; i++ ) { + conva->edge[i].a = -1; + conva->edge[i].b = -1; + conva->edge[i].d = 99999; + } + worst_i = 0; + worst = conva->edge[worst_i].d; + + for( i = 0; i < conva->n_hline; i++ ) { + if( conva->hline[i].weight == 0 ) + continue; + + for( j = i + 1; j < conva->n_hline; j++ ) { + int distance; + + if( conva->hline[j].weight == 0 ) + continue; + + distance = vips_conva_distance( conva, i, j ); + if( distance < worst ) { + conva->edge[worst_i].a = i; + conva->edge[worst_i].b = j; + conva->edge[worst_i].d = distance; + + worst_i = 0; + worst = conva->edge[worst_i].d; + for( k = 0; k < MAX_EDGES; k++ ) + if( conva->edge[k].d > worst ) { + worst = conva->edge[k].d; + worst_i = k; + } + } + } + } + + /* Sort to get closest first. + */ + qsort( conva->edge, MAX_EDGES, sizeof( Edge ), edge_sortfn ); + + /* + printf( "edges:\n" ); + printf( " n a b d:\n" ); + for( i = 0; i < MAX_EDGES; i++ ) + printf( "%2i) %3d %3d %3d\n", i, + conva->edge[i].a, conva->edge[i].b, conva->edge[i].d ); + */ + + /* Merge from the top down. + */ + merged = 0; + for( k = 0; k < MAX_EDGES; k++ ) { + Edge *edge = &conva->edge[k]; + + if( edge->d > conva->cluster ) + break; + + /* Has been removed, see loop below. + */ + if( edge->a == -1 ) + continue; + + vips_conva_merge( conva, edge->a, edge->b ); + merged = 1; + + /* Nodes a and b have vanished or been moved. Remove any edges + * which refer to them from the edge list, + */ + for( i = k; i < MAX_EDGES; i++ ) { + Edge *edgei = &conva->edge[i]; + + if( edgei->a == edge->a || + edgei->b == edge->a || + edgei->a == edge->b || + edgei->b == edge->b ) + edgei->a = -1; + } + } + + return( merged ); +} + +/* Renumber after clustering. We will have removed a lot of hlines ... shuffle + * the rest down, adjust all the vline references. + */ +static void +vips_conva_renumber( VipsConva *conva ) +{ + int i, j; + + VIPS_DEBUG_MSG( "vips_conva_renumber: renumbering ...\n" ); + + /* Loop for all zero-weight hlines. + */ + for( i = 0; i < conva->n_hline; ) { + if( conva->hline[i].weight > 0 ) { + i++; + continue; + } + + /* We move hlines i + 1 down, so we need to adjust all + * band[] refs to match. + */ + for( j = 0; j < conva->n_velement; j++ ) + if( conva->velement[j].band > i ) + conva->velement[j].band -= 1; + + memmove( conva->hline + i, conva->hline + i + 1, + sizeof( HLine ) * (conva->n_hline - i - 1) ); + conva->n_hline -= 1; + } + + + VIPS_DEBUG_MSG( "vips_conva_renumber: ... %d hlines remain\n", + conva->n_hline ); +} + +/* Sort by band, then factor, then row. + */ +static int +velement_sortfn( const void *p1, const void *p2 ) +{ + VElement *a = (VElement *) p1; + VElement *b = (VElement *) p2; + + if( a->band != b->band ) + return( a->band - b->band ); + + if( a->factor != b->factor ) + return( a->factor - b->factor ); + + return( a->row - b->row ); +} + +static void +vips_conva_vline( VipsConva *conva ) +{ + int y, z; + + VIPS_DEBUG_MSG( "vips_conva_vline: forming vlines ...\n" ); + + /* Sort to get elements which could form a vline together. + */ + qsort( conva->velement, conva->n_velement, sizeof( VElement ), + velement_sortfn ); + +#ifdef DEBUG + vips_conva_hprint( conva ); +#endif /*DEBUG*/ + + /* If two lines have the same row and band, we can join them and knock + * up the factor instead. + */ + for( y = 0; y < conva->n_velement; y++ ) { + for( z = y + 1; z < conva->n_velement; z++ ) + if( conva->velement[z].band != + conva->velement[y].band || + conva->velement[z].row != + conva->velement[y].row ) + break; + + /* We need to keep the sign of the old factor. + */ + if( conva->velement[y].factor > 0 ) + conva->velement[y].factor = z - y; + else + conva->velement[y].factor = y - z; + memmove( conva->velement + y + 1, conva->velement + z, + sizeof( VElement ) * (conva->n_velement - z) ); + conva->n_velement -= z - y - 1; + } + +#ifdef DEBUG + printf( "after commoning up, %d velement remain\n", conva->n_velement ); + vips_conva_hprint( conva ); +#endif /*DEBUG*/ + + conva->n_vline = 0; + for( y = 0; y < conva->n_velement; ) { + int n = conva->n_vline; + + /* Start of a line. + */ + conva->vline[n].band = conva->velement[y].band; + conva->vline[n].factor = conva->velement[y].factor; + conva->vline[n].start = conva->velement[y].row; + + /* Search for the end of this line. + */ + for( z = y + 1; z < conva->n_velement; z++ ) + if( conva->velement[z].band != + conva->vline[n].band || + conva->velement[z].factor != + conva->vline[n].factor || + conva->velement[z].row != + conva->vline[n].start + z - y ) + break; + + /* So the line ends at the previously examined element. We + * want 'end' to be one beyond that (non-inclusive). + */ + conva->vline[n].end = conva->velement[z - 1].row + 1; + + conva->n_vline += 1; + y = z; + } + + VIPS_DEBUG_MSG( "vips_conva_vline: found %d vlines\n", conva->n_vline ); +} + +/* Break a mask into boxes. + */ +static int +vips_conva_decompose_boxes( VipsConva *conva ) +{ + VipsObjectClass *class = VIPS_OBJECT_GET_CLASS( conva ); + VipsImage *iM = conva->iM; + double *coeff = VIPS_MATRIX( iM, 0, 0 ); + const int size = iM->Xsize * iM->Ysize; + double scale = vips_image_get_scale( iM ); + double offset = vips_image_get_offset( iM ); + + double sum; + double area; + int x, y, z; + + if( vips_conva_decompose_hlines( conva ) ) + return( -1 ); + + /* Cluster to find groups of lines. + */ + VIPS_DEBUG_MSG( "vips_conva_decompose_boxes: " + "clustering hlines with thresh %d ...\n", conva->cluster ); + while( vips_conva_cluster2( conva ) ) + ; + + /* Renumber to remove holes created by clustering. + */ + vips_conva_renumber( conva ); + + /* Find a set of vlines for the remaining hlines. + */ + vips_conva_vline( conva ); + + /* Find the area of the lines and the length of the longest hline. We + * find the absolute area, we don't want -ves to cancel. + */ + area = 0; + conva->max_line = 0; + for( y = 0; y < conva->n_velement; y++ ) { + x = conva->velement[y].band; + z = conva->hline[x].end - conva->hline[x].start; + + area += abs( conva->velement[y].factor * z ); + if( z > conva->max_line ) + conva->max_line = z; + } + + /* Strength reduction: if all lines are divisible by n, we can move + * that n out into the area factor. The aim is to produce as many + * factor 1 lines as we can and to reduce the chance of overflow. + */ + x = conva->velement[0].factor; + for( y = 1; y < conva->n_velement; y++ ) + x = gcd( x, conva->velement[y].factor ); + for( y = 0; y < conva->n_velement; y++ ) + conva->velement[y].factor /= x; + area *= x; + + /* Find the area of the original mask. Again, don't let -ves cancel. + */ + sum = 0; + for( z = 0; z < size; z++ ) + sum += abs( coeff[z] ); + + conva->divisor = VIPS_RINT( area * scale / sum ); + conva->rounding = (conva->divisor + 1) / 2; + conva->offset = offset; + +#ifdef DEBUG + vips_conva_hprint( conva ); + vips_conva_vprint( conva ); +#endif /*DEBUG*/ + + /* With 512x512 tiles, each hline requires 3mb of intermediate per + * thread ... 300 lines is about a gb per thread, ouch. + */ + if( conva->n_hline > 150 ) { + vips_error( class->nickname, "%s", _( "mask too complex" ) ); + return( -1 ); + } + + return( 0 ); +} + +/* Our sequence value. + */ +typedef struct { + VipsConva *conva; + + VipsRegion *ir; /* Input region */ + + /* Offsets for start and stop. + */ + int *start; + int *end; + + int last_stride; /* Avoid recalcing offsets, if we can */ + + /* The rolling sums. int for integer types, double for floating point + * types. + */ + void *sum; +} VipsConvaSeq; + +/* Free a sequence value. + */ +static int +vips_conva_stop( void *vseq, void *a, void *b ) +{ + VipsConvaSeq *seq = (VipsConvaSeq *) vseq; + + VIPS_UNREF( seq->ir ); + + return( 0 ); +} + +/* Convolution start function. + */ +static void * +vips_conva_start( VipsImage *out, void *a, void *b ) +{ + VipsImage *in = (VipsImage *) a; + VipsConva *conva = (VipsConva *) b; + + VipsConvaSeq *seq; + + seq = VIPS_NEW( out, VipsConvaSeq ); + seq->conva = conva; + seq->ir = vips_region_new( in ); + + /* n_velement should be the largest possible dimension. + */ + g_assert( conva->n_velement >= conva->n_hline ); + g_assert( conva->n_velement >= conva->n_vline ); + + seq->start = VIPS_ARRAY( out, conva->n_velement, int ); + seq->end = VIPS_ARRAY( out, conva->n_velement, int ); + + if( vips_band_format_isint( out->BandFmt ) ) + seq->sum = VIPS_ARRAY( out, conva->n_velement, int ); + else + seq->sum = VIPS_ARRAY( out, conva->n_velement, double ); + seq->last_stride = -1; + + return( seq ); +} + +/* The h and v loops are very similar, but also annoyingly different. Keep + * them separate for easy debugging. + */ + +#define HCONV( IN, OUT ) \ +G_STMT_START { \ + for( i = 0; i < bands; i++ ) { \ + OUT *seq_sum = (OUT *) seq->sum; \ + \ + IN *p; \ + OUT *q; \ + \ + p = i + (IN *) VIPS_REGION_ADDR( ir, r->left, r->top + y ); \ + q = i * n_hline + \ + (OUT *) VIPS_REGION_ADDR( or, r->left, r->top + y ); \ + \ + for( z = 0; z < n_hline; z++ ) { \ + seq_sum[z] = 0; \ + for( x = conva->hline[z].start; \ + x < conva->hline[z].end; x++ ) \ + seq_sum[z] += p[x * istride]; \ + q[z] = seq_sum[z]; \ + } \ + q += ostride; \ + \ + for( x = 1; x < r->width; x++ ) { \ + for( z = 0; z < n_hline; z++ ) { \ + seq_sum[z] += p[seq->end[z]]; \ + seq_sum[z] -= p[seq->start[z]]; \ + q[z] = seq_sum[z]; \ + } \ + p += istride; \ + q += ostride; \ + } \ + } \ +} G_STMT_END + +/* Do horizontal masks ... we scan the mask along scanlines. + */ +static int +vips_conva_hgenerate( VipsRegion *or, void *vseq, + void *a, void *b, gboolean *stop ) +{ + VipsConvaSeq *seq = (VipsConvaSeq *) vseq; + VipsImage *in = (VipsImage *) a; + VipsConva *conva = (VipsConva *) b; + + VipsRegion *ir = seq->ir; + const int n_hline = conva->n_hline; + VipsImage *iM = conva->iM; + VipsRect *r = &or->valid; + + /* Double the bands (notionally) for complex. + */ + int bands = vips_band_format_iscomplex( in->BandFmt ) ? + 2 * in->Bands : in->Bands; + + VipsRect s; + int x, y, z, i; + int istride; + int ostride; + + /* Prepare the section of the input image we need. A little larger + * than the section of the output image we are producing. + */ + s = *r; + s.width += iM->Xsize - 1; + if( vips_region_prepare( ir, &s ) ) + return( -1 ); + + istride = VIPS_IMAGE_SIZEOF_PEL( in ) / + VIPS_IMAGE_SIZEOF_ELEMENT( in ); + ostride = VIPS_IMAGE_SIZEOF_PEL( or->im ) / + VIPS_IMAGE_SIZEOF_ELEMENT( or->im ); + + /* Init offset array. + */ + if( seq->last_stride != istride ) { + seq->last_stride = istride; + + for( z = 0; z < n_hline; z++ ) { + seq->start[z] = conva->hline[z].start * istride; + seq->end[z] = conva->hline[z].end * istride; + } + } + + for( y = 0; y < r->height; y++ ) { + switch( in->BandFmt ) { + case VIPS_FORMAT_UCHAR: + if( conva->max_line < 256 ) + HCONV( unsigned char, unsigned short ); + else + HCONV( unsigned char, unsigned int ); + break; + + case VIPS_FORMAT_CHAR: + if( conva->max_line < 256 ) + HCONV( signed char, signed short ); + else + HCONV( signed char, signed int ); + break; + + case VIPS_FORMAT_USHORT: + HCONV( unsigned short, unsigned int ); + break; + + case VIPS_FORMAT_SHORT: + HCONV( signed short, signed int ); + break; + + case VIPS_FORMAT_UINT: + HCONV( unsigned int, unsigned int ); + break; + + case VIPS_FORMAT_INT: + HCONV( signed int, signed int ); + break; + + case VIPS_FORMAT_FLOAT: + HCONV( float, float ); + break; + + case VIPS_FORMAT_DOUBLE: + HCONV( double, double ); + break; + + case VIPS_FORMAT_COMPLEX: + HCONV( float, float ); + break; + + case VIPS_FORMAT_DPCOMPLEX: + HCONV( double, double ); + break; + + default: + g_assert_not_reached(); + } + } + + return( 0 ); +} + +static int +vips_conva_horizontal( VipsConva *conva, VipsImage *in, VipsImage **out ) +{ + VipsObjectClass *class = VIPS_OBJECT_GET_CLASS( conva ); + + /* Prepare output. Consider a 7x7 mask and a 7x7 image --- the output + * would be 1x1. + */ + *out = vips_image_new(); + if( vips_image_pipelinev( *out, + VIPS_DEMAND_STYLE_SMALLTILE, in, NULL ) ) + return( -1 ); + + (*out)->Xsize -= conva->iM->Xsize - 1; + if( (*out)->Xsize <= 0 ) { + vips_error( class->nickname, + "%s", _( "image too small for mask" ) ); + return( -1 ); + } + (*out)->Bands *= conva->n_hline; + + /* Short u?char lines can use u?short intermediate. + */ + if( vips_band_format_isuint( in->BandFmt ) ) + (*out)->BandFmt = conva->max_line < 256 ? + VIPS_FORMAT_USHORT : VIPS_FORMAT_UINT; + else if( vips_band_format_isint( in->BandFmt ) ) + (*out)->BandFmt = conva->max_line < 256 ? + VIPS_FORMAT_SHORT : VIPS_FORMAT_INT; + + if( vips_image_generate( *out, + vips_conva_start, vips_conva_hgenerate, vips_conva_stop, + in, conva ) ) + return( -1 ); + + return( 0 ); +} + +#define CLIP_UCHAR( V ) \ +G_STMT_START { \ + if( (V) < 0 ) \ + (V) = 0; \ + else if( (V) > UCHAR_MAX ) \ + (V) = UCHAR_MAX; \ +} G_STMT_END + +#define CLIP_CHAR( V ) \ +G_STMT_START { \ + if( (V) < SCHAR_MIN ) \ + (V) = SCHAR_MIN; \ + else if( (V) > SCHAR_MAX ) \ + (V) = SCHAR_MAX; \ +} G_STMT_END + +#define CLIP_USHORT( V ) \ +G_STMT_START { \ + if( (V) < 0 ) \ + (V) = 0; \ + else if( (V) > USHRT_MAX ) \ + (V) = USHRT_MAX; \ +} G_STMT_END + +#define CLIP_SHORT( V ) \ +G_STMT_START { \ + if( (V) < SHRT_MIN ) \ + (V) = SHRT_MIN; \ + else if( (V) > SHRT_MAX ) \ + (V) = SHRT_MAX; \ +} G_STMT_END + +#define CLIP_NONE( V ) {} + +#define VCONV( ACC, IN, OUT, CLIP ) \ +G_STMT_START { \ + for( x = 0; x < sz; x++ ) { \ + ACC *seq_sum = (ACC *) seq->sum; \ + \ + IN *p; \ + OUT *q; \ + ACC sum; \ + \ + p = x * conva->n_hline + \ + (IN *) VIPS_REGION_ADDR( ir, r->left, r->top ); \ + q = x + (OUT *) VIPS_REGION_ADDR( or, r->left, r->top ); \ + \ + sum = 0; \ + for( z = 0; z < n_vline; z++ ) { \ + seq_sum[z] = 0; \ + for( k = conva->vline[z].start; \ + k < conva->vline[z].end; k++ ) \ + seq_sum[z] += p[k * istride + \ + conva->vline[z].band]; \ + sum += conva->vline[z].factor * seq_sum[z]; \ + } \ + sum = (sum + conva->rounding) / conva->divisor + conva->offset; \ + CLIP( sum ); \ + *q = sum; \ + q += ostride; \ + \ + for( y = 1; y < r->height; y++ ) { \ + sum = 0;\ + for( z = 0; z < n_vline; z++ ) { \ + seq_sum[z] += p[seq->end[z]]; \ + seq_sum[z] -= p[seq->start[z]]; \ + sum += conva->vline[z].factor * seq_sum[z]; \ + } \ + p += istride; \ + sum = (sum + conva->rounding) / conva->divisor + \ + conva->offset; \ + CLIP( sum ); \ + *q = sum; \ + q += ostride; \ + } \ + } \ +} G_STMT_END + +/* Do vertical masks ... we scan the mask down columns of pixels. + */ +static int +vips_conva_vgenerate( VipsRegion *or, void *vseq, + void *a, void *b, gboolean *stop ) +{ + VipsConvaSeq *seq = (VipsConvaSeq *) vseq; + VipsImage *in = (VipsImage *) a; + VipsConva *conva = (VipsConva *) b; + VipsConvolution *convolution = (VipsConvolution *) conva; + + VipsRegion *ir = seq->ir; + const int n_vline = conva->n_vline; + VipsImage *iM = conva->iM; + VipsRect *r = &or->valid; + + /* Double the width (notionally) for complex. + */ + int sz = vips_band_format_iscomplex( in->BandFmt ) ? + 2 * VIPS_REGION_N_ELEMENTS( or ) : VIPS_REGION_N_ELEMENTS( or ); + + VipsRect s; + int x, y, z, k; + int istride; + int ostride; + + /* Prepare the section of the input image we need. A little larger + * than the section of the output image we are producing. + */ + s = *r; + s.height += iM->Ysize - 1; + if( vips_region_prepare( ir, &s ) ) + return( -1 ); + + istride = VIPS_REGION_LSKIP( ir ) / + VIPS_IMAGE_SIZEOF_ELEMENT( in ); + ostride = VIPS_REGION_LSKIP( or ) / + VIPS_IMAGE_SIZEOF_ELEMENT( convolution->out ); + + /* Init offset array. + */ + if( seq->last_stride != istride ) { + seq->last_stride = istride; + + for( z = 0; z < n_vline; z++ ) { + seq->start[z] = conva->vline[z].band + + conva->vline[z].start * istride; + seq->end[z] = conva->vline[z].band + + conva->vline[z].end * istride; + } + } + + switch( convolution->in->BandFmt ) { + case VIPS_FORMAT_UCHAR: + if( conva->max_line < 256 ) + VCONV( unsigned int, \ + unsigned short, unsigned char, CLIP_UCHAR ); + else + VCONV( unsigned int, \ + unsigned int, unsigned char, CLIP_UCHAR ); + break; + + case VIPS_FORMAT_CHAR: + if( conva->max_line < 256 ) + VCONV( signed int, \ + signed short, signed char, CLIP_CHAR ); + else + VCONV( signed int, \ + signed int, signed char, CLIP_CHAR ); + break; + + case VIPS_FORMAT_USHORT: + VCONV( unsigned int, \ + unsigned int, unsigned short, CLIP_USHORT ); + break; + + case VIPS_FORMAT_SHORT: + VCONV( signed int, signed int, signed short, CLIP_SHORT ); + break; + + case VIPS_FORMAT_UINT: + VCONV( unsigned int, unsigned int, unsigned int, CLIP_NONE ); + break; + + case VIPS_FORMAT_INT: + VCONV( signed int, signed int, signed int, CLIP_NONE ); + break; + + case VIPS_FORMAT_FLOAT: + VCONV( float, float, float, CLIP_NONE ); + break; + + case VIPS_FORMAT_DOUBLE: + VCONV( double, double, double, CLIP_NONE ); + break; + + case VIPS_FORMAT_COMPLEX: + VCONV( float, float, float, CLIP_NONE ); + break; + + case VIPS_FORMAT_DPCOMPLEX: + VCONV( double, double, double, CLIP_NONE ); + break; + + default: + g_assert_not_reached(); + } + + return( 0 ); +} + +static int +vips_conva_vertical( VipsConva *conva, VipsImage *in, VipsImage **out ) +{ + VipsObjectClass *class = VIPS_OBJECT_GET_CLASS( conva ); + VipsConvolution *convolution = (VipsConvolution *) conva; + + /* Prepare output. Consider a 7x7 mask and a 7x7 image --- the output + * would be 1x1. + */ + *out = vips_image_new(); + if( vips_image_pipelinev( *out, + VIPS_DEMAND_STYLE_SMALLTILE, in, NULL ) ) + return( -1 ); + + (*out)->Ysize -= conva->iM->Ysize - 1; + if( (*out)->Ysize <= 0 ) { + vips_error( class->nickname, + "%s", _( "image too small for mask" ) ); + return( -1 ); + } + (*out)->Bands = convolution->in->Bands; + (*out)->BandFmt = convolution->in->BandFmt; + + if( vips_image_generate( *out, + vips_conva_start, vips_conva_vgenerate, vips_conva_stop, + in, conva ) ) + return( -1 ); + + return( 0 ); +} + +static int +vips_conva_build( VipsObject *object ) +{ + VipsConvolution *convolution = (VipsConvolution *) object; + VipsConva *conva = (VipsConva *) object; + VipsImage **t = (VipsImage **) vips_object_local_array( object, 4 ); + + VipsImage *in; + + if( VIPS_OBJECT_CLASS( vips_conva_parent_class )->build( object ) ) + return( -1 ); + + /* An int version of our mask. + */ + if( vips__image_intize( convolution->M, &t[0] ) ) + return( -1 ); + conva->iM = t[0]; + +#ifdef DEBUG + printf( "vips_conva_build: iM =\n" ); + vips_matrixprint( conva->iM, NULL ); +#endif /*DEBUG*/ + + in = convolution->in; + + if( vips_conva_decompose_boxes( conva ) ) + return( -1 ); + + g_object_set( conva, "out", vips_image_new(), NULL ); + if( + vips_embed( in, &t[1], + t[0]->Xsize / 2, + t[0]->Ysize / 2, + in->Xsize + t[0]->Xsize - 1, + in->Ysize + t[0]->Ysize - 1, + "extend", VIPS_EXTEND_COPY, + NULL ) || + vips_conva_horizontal( conva, t[1], &t[2] ) || + vips_conva_vertical( conva, t[2], &t[3] ) || + vips_image_write( t[3], convolution->out ) ) + return( -1 ); + + convolution->out->Xoffset = 0; + convolution->out->Yoffset = 0; + + return( 0 ); +} + +static void +vips_conva_class_init( VipsConvaClass *class ) +{ + GObjectClass *gobject_class = G_OBJECT_CLASS( class ); + VipsObjectClass *object_class = (VipsObjectClass *) class; + + gobject_class->set_property = vips_object_set_property; + gobject_class->get_property = vips_object_get_property; + + object_class->nickname = "conva"; + object_class->description = _( "approximate integer convolution" ); + object_class->build = vips_conva_build; + + VIPS_ARG_INT( class, "layers", 104, + _( "Layers" ), + _( "Use this many layers in approximation" ), + VIPS_ARGUMENT_OPTIONAL_INPUT, + G_STRUCT_OFFSET( VipsConva, layers ), + 1, 1000, 5 ); + + VIPS_ARG_INT( class, "cluster", 105, + _( "Cluster" ), + _( "Cluster lines closer than this in approximation" ), + VIPS_ARGUMENT_OPTIONAL_INPUT, + G_STRUCT_OFFSET( VipsConva, cluster ), + 1, 100, 1 ); + +} + +static void +vips_conva_init( VipsConva *conva ) +{ + conva->layers = 5; + conva->cluster = 1; +} + +/** + * vips_conva: + * @in: input image + * @out: output image + * @mask: convolution mask + * @...: %NULL-terminated list of optional named arguments + * + * Optional arguments: + * + * * @layers: %gint, number of layers for approximation + * * @cluster: %gint, cluster lines closer than this distance + * + * Perform an approximate integer convolution of @in with @mask. + * This is a low-level operation, see + * vips_conv() for something more convenient. + * + * The output image + * always has the same #VipsBandFormat as the input image. + * Elements of @mask are converted to + * integers before convolution. + * + * Larger values for @layers give more accurate + * results, but are slower. As @layers approaches the mask radius, the + * accuracy will become close to exact convolution and the speed will drop to + * match. For many large masks, such as Gaussian, @layers need be only 10% of + * this value and accuracy will still be good. + * + * Smaller values of @cluster will give more accurate results, but be slower + * and use more memory. 10% of the mask radius is a good rule of thumb. + * + * See also: vips_conv(). + * + * Returns: 0 on success, -1 on error + */ +int +vips_conva( VipsImage *in, VipsImage **out, VipsImage *mask, ... ) +{ + va_list ap; + int result; + + va_start( ap, mask ); + result = vips_call_split( "conva", ap, in, out, mask ); + va_end( ap ); + + return( result ); +} + diff --git a/libvips/convolution/convasep.c b/libvips/convolution/convasep.c new file mode 100644 index 00000000..5f5209a6 --- /dev/null +++ b/libvips/convolution/convasep.c @@ -0,0 +1,958 @@ +/* convasep ... separable approximate convolution + * + * This operation does an approximate, seperable convolution. + * + * Author: John Cupitt & Nicolas Robidoux + * Written on: 31/5/11 + * Modified on: + * 31/5/11 + * - from im_conv() + * 5/7/16 + * - redone as a class + */ + +/* + + This file is part of VIPS. + + VIPS is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301 USA + + */ + +/* + + These files are distributed with VIPS - http://www.vips.ecs.soton.ac.uk + + */ + +/* + + See: + + http://incubator.quasimondo.com/processing/stackblur.pde + + This thing is a little like stackblur, but generalised to any separable + mask. + + */ + +/* + + TODO + + - how about making a cumulative image and then subtracting points in + that, rather than keeping a set of running totals + + faster? + + we could then use orc to write a bit of code to implement this set + of lines + + stackoverflow has an algorithm for cumulativization using SIMD and + threads, see that font rasterization with rust piece on medium by + ralph levien + + */ + +/* +#define DEBUG +#define VIPS_DEBUG + */ + +#ifdef HAVE_CONFIG_H +#include +#endif /*HAVE_CONFIG_H*/ +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "pconvolution.h" + +/* Maximum number of lines we can break the mask into. + */ +#define MAX_LINES (1000) + +/* Euclid's algorithm. Use this to common up mults. + */ +static int +gcd( int a, int b ) +{ + if( b == 0 ) + return( abs( a ) ); + else + return( gcd( b, a % b ) ); +} + +typedef struct { + VipsConvolution parent_instance; + + int layers; + + int divisor; + int rounding; + int offset; + + /* The "width" of the mask, ie. n for our 1xn or nx1 argument, plus + * an int version of our mask. + */ + int width; + VipsImage *iM; + + /* The mask broken into a set of lines. + * + * Start is the left-most pixel in the line, end is one beyond the + * right-most pixel. + */ + int n_lines; + int start[MAX_LINES]; + int end[MAX_LINES]; + int factor[MAX_LINES]; +} VipsConvasep; + +typedef VipsConvolutionClass VipsConvasepClass; + +G_DEFINE_TYPE( VipsConvasep, vips_convasep, VIPS_TYPE_CONVOLUTION ); + +static void +vips_convasep_line_start( VipsConvasep *convasep, int x, int factor ) +{ + convasep->start[convasep->n_lines] = x; + convasep->factor[convasep->n_lines] = factor; +} + +static int +vips_convasep_line_end( VipsConvasep *convasep, int x ) +{ + VipsObjectClass *class = VIPS_OBJECT_GET_CLASS( convasep ); + + convasep->end[convasep->n_lines] = x; + + if( convasep->n_lines >= MAX_LINES - 1 ) { + vips_error( class->nickname, "%s", _( "mask too complex" ) ); + return( -1 ); + } + convasep->n_lines += 1; + + return( 0 ); +} + +/* Break a mask into lines. + */ +static int +vips_convasep_decompose( VipsConvasep *convasep ) +{ + VipsImage *iM = convasep->iM; + double *coeff = (double *) VIPS_IMAGE_ADDR( iM, 0, 0 ); + double scale = vips_image_get_scale( iM ); + double offset = vips_image_get_offset( iM ); + + double max; + double min; + double depth; + double sum; + double area; + int layers; + int layers_above; + int layers_below; + int z, n, x; + + VIPS_DEBUG_MSG( "vips_convasep_decompose: " + "breaking into %d layers ...\n", convasep->layers ); + + /* Find mask range. We must always include the zero axis in the mask. + */ + max = 0; + min = 0; + for( x = 0; x < convasep->width; x++ ) { + if( coeff[x] > max ) + max = coeff[x]; + if( coeff[x] < min ) + min = coeff[x]; + } + + /* The zero axis must fall on a layer boundary. Estimate the + * depth, find n-lines-above-zero, get exact depth, then calculate a + * fixed n-lines which includes any negative parts. + */ + depth = (max - min) / convasep->layers; + layers_above = ceil( max / depth ); + depth = max / layers_above; + layers_below = floor( min / depth ); + layers = layers_above - layers_below; + + VIPS_DEBUG_MSG( "depth = %g, layers = %d\n", depth, layers ); + + /* For each layer, generate a set of lines which are inside the + * perimeter. Work down from the top. + */ + for( z = 0; z < layers; z++ ) { + double y = max - (1 + z) * depth; + + /* y plus half depth ... ie. the layer midpoint. + */ + double y_ph = y + depth / 2; + + /* Odd, but we must avoid rounding errors that make us miss 0 + * in the line above. + */ + int y_positive = z < layers_above; + + int inside; + + /* Start outside the perimeter. + */ + inside = 0; + + for( x = 0; x < convasep->width; x++ ) { + /* The vertical line from mask[z] to 0 is inside. Is + * our current square (x, y) part of that line? + */ + if( (y_positive && coeff[x] >= y_ph) || + (!y_positive && coeff[x] <= y_ph) ) { + if( !inside ) { + vips_convasep_line_start( convasep, x, + y_positive ? 1 : -1 ); + inside = 1; + } + } + else if( inside ) { + if( vips_convasep_line_end( convasep, x ) ) + return( -1 ); + inside = 0; + } + } + + if( inside && + vips_convasep_line_end( convasep, convasep->width ) ) + return( -1 ); + } + + /* Can we common up any lines? Search for lines with identical + * start/end. + */ + for( z = 0; z < convasep->n_lines; z++ ) { + for( n = z + 1; n < convasep->n_lines; n++ ) { + if( convasep->start[z] == convasep->start[n] && + convasep->end[z] == convasep->end[n] ) { + convasep->factor[z] += convasep->factor[n]; + + /* n can be deleted. Do this in a separate + * pass below. + */ + convasep->factor[n] = 0; + } + } + } + + /* Now we can remove all factor 0 lines. + */ + for( z = 0; z < convasep->n_lines; z++ ) { + if( convasep->factor[z] == 0 ) { + for( x = z; x < convasep->n_lines; x++ ) { + convasep->start[x] = convasep->start[x + 1]; + convasep->end[x] = convasep->end[x + 1]; + convasep->factor[x] = convasep->factor[x + 1]; + } + convasep->n_lines -= 1; + } + } + + /* Find the area of the lines. + */ + area = 0; + for( z = 0; z < convasep->n_lines; z++ ) + area += convasep->factor[z] * + (convasep->end[z] - convasep->start[z]); + + /* Strength reduction: if all lines are divisible by n, we can move + * that n out into the ->area factor. The aim is to produce as many + * factor 1 lines as we can and to reduce the chance of overflow. + */ + x = convasep->factor[0]; + for( z = 1; z < convasep->n_lines; z++ ) + x = gcd( x, convasep->factor[z] ); + for( z = 0; z < convasep->n_lines; z++ ) + convasep->factor[z] /= x; + area *= x; + + /* Find the area of the original mask. + */ + sum = 0; + for( z = 0; z < convasep->width; z++ ) + sum += coeff[z]; + + convasep->divisor = VIPS_RINT( sum * area / scale ); + if( convasep->divisor == 0 ) + convasep->divisor = 1; + convasep->rounding = (convasep->divisor + 1) / 2; + convasep->offset = offset; + +#ifdef DEBUG + /* ASCII-art layer drawing. + */ + printf( "lines:\n" ); + for( z = 0; z < convasep->n_lines; z++ ) { + printf( "%3d - %2d x ", z, convasep->factor[z] ); + for( x = 0; x < 55; x++ ) { + int rx = x * (convasep->width + 1) / 55; + + if( rx >= convasep->start[z] && rx < convasep->end[z] ) + printf( "#" ); + else + printf( " " ); + } + printf( " %3d .. %3d\n", convasep->start[z], convasep->end[z] ); + } + printf( "divisor = %d\n", convasep->divisor ); + printf( "rounding = %d\n", convasep->rounding ); + printf( "offset = %d\n", convasep->offset ); +#endif /*DEBUG*/ + + return( 0 ); +} + +/* Our sequence value. + */ +typedef struct { + VipsConvasep *convasep; + + VipsRegion *ir; /* Input region */ + + int *start; /* Offsets for start and stop */ + int *end; + + /* The sums for each line. int for integer types, double for floating + * point types. + */ + int *isum; + double *dsum; + + int last_stride; /* Avoid recalcing offsets, if we can */ +} VipsConvasepSeq; + +/* Free a sequence value. + */ +static int +vips_convasep_stop( void *vseq, void *a, void *b ) +{ + VipsConvasepSeq *seq = (VipsConvasepSeq *) vseq; + + VIPS_UNREF( seq->ir ); + VIPS_FREE( seq->start ); + VIPS_FREE( seq->end ); + VIPS_FREE( seq->isum ); + VIPS_FREE( seq->dsum ); + + return( 0 ); +} + +/* Convolution start function. + */ +static void * +vips_convasep_start( VipsImage *out, void *a, void *b ) +{ + VipsImage *in = (IMAGE *) a; + VipsConvasep *convasep = (VipsConvasep *) b; + + VipsConvasepSeq *seq; + + if( !(seq = VIPS_NEW( out, VipsConvasepSeq )) ) + return( NULL ); + + /* Init! + */ + seq->convasep = convasep; + seq->ir = vips_region_new( in ); + seq->start = VIPS_ARRAY( NULL, convasep->n_lines, int ); + seq->end = VIPS_ARRAY( NULL, convasep->n_lines, int ); + seq->isum = NULL; + seq->dsum = NULL; + if( vips_band_format_isint( out->BandFmt ) ) + seq->isum = VIPS_ARRAY( NULL, convasep->n_lines, int ); + else + seq->dsum = VIPS_ARRAY( NULL, convasep->n_lines, double ); + seq->last_stride = -1; + + if( !seq->ir || + !seq->start || + !seq->end || + (!seq->isum && !seq->dsum) ) { + vips_convasep_stop( seq, in, convasep ); + return( NULL ); + } + + return( seq ); +} + +#define CLIP_UCHAR( V ) \ +G_STMT_START { \ + if( (V) < 0 ) \ + (V) = 0; \ + else if( (V) > UCHAR_MAX ) \ + (V) = UCHAR_MAX; \ +} G_STMT_END + +#define CLIP_CHAR( V ) \ +G_STMT_START { \ + if( (V) < SCHAR_MIN ) \ + (V) = SCHAR_MIN; \ + else if( (V) > SCHAR_MAX ) \ + (V) = SCHAR_MAX; \ +} G_STMT_END + +#define CLIP_USHORT( V ) \ +G_STMT_START { \ + if( (V) < 0 ) \ + (V) = 0; \ + else if( (V) > USHRT_MAX ) \ + (V) = USHRT_MAX; \ +} G_STMT_END + +#define CLIP_SHORT( V ) \ +G_STMT_START { \ + if( (V) < SHRT_MIN ) \ + (V) = SHRT_MIN; \ + else if( (V) > SHRT_MAX ) \ + (V) = SHRT_MAX; \ +} G_STMT_END + +#define CLIP_NONE( V ) {} + +/* The h and v loops are very similar, but also annoyingly different. Keep + * them separate for easy debugging. + */ + +#define HCONV_INT( TYPE, CLIP ) { \ + for( i = 0; i < bands; i++ ) { \ + int *isum = seq->isum; \ + \ + TYPE *q; \ + TYPE *p; \ + int sum; \ + \ + p = i + (TYPE *) VIPS_REGION_ADDR( ir, r->left, r->top + y ); \ + q = i + (TYPE *) VIPS_REGION_ADDR( or, r->left, r->top + y ); \ + \ + sum = 0; \ + for( z = 0; z < n_lines; z++ ) { \ + isum[z] = 0; \ + for( x = seq->start[z]; x < seq->end[z]; x += istride ) \ + isum[z] += p[x]; \ + sum += convasep->factor[z] * isum[z]; \ + } \ + \ + /* Don't add offset ... we only want to do that once, do it on \ + * the vertical pass. \ + */ \ + sum = (sum + convasep->rounding) / convasep->divisor; \ + CLIP( sum ); \ + *q = sum; \ + q += ostride; \ + \ + for( x = 1; x < r->width; x++ ) { \ + sum = 0; \ + for( z = 0; z < n_lines; z++ ) { \ + isum[z] += p[seq->end[z]]; \ + isum[z] -= p[seq->start[z]]; \ + sum += convasep->factor[z] * isum[z]; \ + } \ + p += istride; \ + sum = (sum + convasep->rounding) / convasep->divisor; \ + CLIP( sum ); \ + *q = sum; \ + q += ostride; \ + } \ + } \ +} + +#define HCONV_FLOAT( TYPE ) { \ + for( i = 0; i < bands; i++ ) { \ + double *dsum = seq->dsum; \ + \ + TYPE *q; \ + TYPE *p; \ + double sum; \ + \ + p = i + (TYPE *) VIPS_REGION_ADDR( ir, r->left, r->top + y ); \ + q = i + (TYPE *) VIPS_REGION_ADDR( or, r->left, r->top + y ); \ + \ + sum = 0; \ + for( z = 0; z < n_lines; z++ ) { \ + dsum[z] = 0; \ + for( x = seq->start[z]; x < seq->end[z]; x += istride ) \ + dsum[z] += p[x]; \ + sum += convasep->factor[z] * dsum[z]; \ + } \ + \ + /* Don't add offset ... we only want to do that once, do it on \ + * the vertical pass. \ + */ \ + sum = sum / convasep->divisor; \ + *q = sum; \ + q += ostride; \ + \ + for( x = 1; x < r->width; x++ ) { \ + sum = 0; \ + for( z = 0; z < n_lines; z++ ) { \ + dsum[z] += p[seq->end[z]]; \ + dsum[z] -= p[seq->start[z]]; \ + sum += convasep->factor[z] * dsum[z]; \ + } \ + p += istride; \ + sum = sum / convasep->divisor; \ + *q = sum; \ + q += ostride; \ + } \ + } \ +} + +/* Do horizontal masks ... we scan the mask along scanlines. + */ +static int +vips_convasep_generate_horizontal( VipsRegion *or, + void *vseq, void *a, void *b, gboolean *stop ) +{ + VipsConvasepSeq *seq = (VipsConvasepSeq *) vseq; + VipsImage *in = (VipsImage *) a; + VipsConvasep *convasep = (VipsConvasep *) b; + VipsConvolution *convolution = (VipsConvolution *) convasep; + + VipsRegion *ir = seq->ir; + const int n_lines = convasep->n_lines; + VipsRect *r = &or->valid; + + /* Double the bands (notionally) for complex. + */ + int bands = vips_band_format_iscomplex( in->BandFmt ) ? + 2 * in->Bands : in->Bands; + + VipsRect s; + int x, y, z, i; + int istride; + int ostride; + + /* Prepare the section of the input image we need. A little larger + * than the section of the output image we are producing. + */ + s = *r; + s.width += convasep->width - 1; + if( vips_region_prepare( ir, &s ) ) + return( -1 ); + + /* Stride can be different for the vertical case, keep this here for + * ease of direction change. + */ + istride = VIPS_IMAGE_SIZEOF_PEL( in ) / + VIPS_IMAGE_SIZEOF_ELEMENT( in ); + ostride = VIPS_IMAGE_SIZEOF_PEL( convolution->out ) / + VIPS_IMAGE_SIZEOF_ELEMENT( convolution->out ); + + /* Init offset array. + */ + if( seq->last_stride != istride ) { + seq->last_stride = istride; + + for( z = 0; z < n_lines; z++ ) { + seq->start[z] = convasep->start[z] * istride; + seq->end[z] = convasep->end[z] * istride; + } + } + + for( y = 0; y < r->height; y++ ) { + switch( in->BandFmt ) { + case VIPS_FORMAT_UCHAR: + HCONV_INT( unsigned char, CLIP_UCHAR ); + break; + + case VIPS_FORMAT_CHAR: + HCONV_INT( signed char, CLIP_CHAR ); + break; + + case VIPS_FORMAT_USHORT: + HCONV_INT( unsigned short, CLIP_USHORT ); + break; + + case VIPS_FORMAT_SHORT: + HCONV_INT( signed short, CLIP_SHORT ); + break; + + case VIPS_FORMAT_UINT: + HCONV_INT( unsigned int, CLIP_NONE ); + break; + + case VIPS_FORMAT_INT: + HCONV_INT( signed int, CLIP_NONE ); + break; + + case VIPS_FORMAT_FLOAT: + case VIPS_FORMAT_COMPLEX: + HCONV_FLOAT( float ); + break; + + case VIPS_FORMAT_DOUBLE: + case VIPS_FORMAT_DPCOMPLEX: + HCONV_FLOAT( double ); + break; + + default: + g_assert_not_reached(); + } + } + + return( 0 ); +} + +#define VCONV_INT( TYPE, CLIP ) { \ + for( x = 0; x < sz; x++ ) { \ + int *isum = seq->isum; \ + \ + TYPE *q; \ + TYPE *p; \ + int sum; \ + \ + p = x + (TYPE *) VIPS_REGION_ADDR( ir, r->left, r->top ); \ + q = x + (TYPE *) VIPS_REGION_ADDR( or, r->left, r->top ); \ + \ + sum = 0; \ + for( z = 0; z < n_lines; z++ ) { \ + isum[z] = 0; \ + for( y = seq->start[z]; y < seq->end[z]; y += istride ) \ + isum[z] += p[y]; \ + sum += convasep->factor[z] * isum[z]; \ + } \ + sum = (sum + convasep->rounding) / convasep->divisor + \ + convasep->offset; \ + CLIP( sum ); \ + *q = sum; \ + q += ostride; \ + \ + for( y = 1; y < r->height; y++ ) { \ + sum = 0; \ + for( z = 0; z < n_lines; z++ ) { \ + isum[z] += p[seq->end[z]]; \ + isum[z] -= p[seq->start[z]]; \ + sum += convasep->factor[z] * isum[z]; \ + } \ + p += istride; \ + sum = (sum + convasep->rounding) / convasep->divisor + \ + convasep->offset; \ + CLIP( sum ); \ + *q = sum; \ + q += ostride; \ + } \ + } \ +} + +#define VCONV_FLOAT( TYPE ) { \ + for( x = 0; x < sz; x++ ) { \ + double *dsum = seq->dsum; \ + \ + TYPE *q; \ + TYPE *p; \ + double sum; \ + \ + p = x + (TYPE *) VIPS_REGION_ADDR( ir, r->left, r->top ); \ + q = x + (TYPE *) VIPS_REGION_ADDR( or, r->left, r->top ); \ + \ + sum = 0; \ + for( z = 0; z < n_lines; z++ ) { \ + dsum[z] = 0; \ + for( y = seq->start[z]; y < seq->end[z]; y += istride ) \ + dsum[z] += p[y]; \ + sum += convasep->factor[z] * dsum[z]; \ + } \ + sum = sum / convasep->divisor + convasep->offset; \ + *q = sum; \ + q += ostride; \ + \ + for( y = 1; y < r->height; y++ ) { \ + sum = 0; \ + for( z = 0; z < n_lines; z++ ) { \ + dsum[z] += p[seq->end[z]]; \ + dsum[z] -= p[seq->start[z]]; \ + sum += convasep->factor[z] * dsum[z]; \ + } \ + p += istride; \ + sum = sum / convasep->divisor + convasep->offset; \ + *q = sum; \ + q += ostride; \ + } \ + } \ +} + +/* Do vertical masks ... we scan the mask down columns of pixels. Copy-paste + * from above with small changes. + */ +static int +vips_convasep_generate_vertical( VipsRegion *or, + void *vseq, void *a, void *b, gboolean *stop ) +{ + VipsConvasepSeq *seq = (VipsConvasepSeq *) vseq; + VipsImage *in = (VipsImage *) a; + VipsConvasep *convasep = (VipsConvasep *) b; + VipsConvolution *convolution = (VipsConvolution *) convasep; + + VipsRegion *ir = seq->ir; + const int n_lines = convasep->n_lines; + VipsRect *r = &or->valid; + + /* Double the width (notionally) for complex. + */ + int sz = vips_band_format_iscomplex( in->BandFmt ) ? + 2 * VIPS_REGION_N_ELEMENTS( or ) : VIPS_REGION_N_ELEMENTS( or ); + + VipsRect s; + int x, y, z; + int istride; + int ostride; + + /* Prepare the section of the input image we need. A little larger + * than the section of the output image we are producing. + */ + s = *r; + s.height += convasep->width - 1; + if( vips_region_prepare( ir, &s ) ) + return( -1 ); + + /* Stride can be different for the vertical case, keep this here for + * ease of direction change. + */ + istride = VIPS_REGION_LSKIP( ir ) / VIPS_IMAGE_SIZEOF_ELEMENT( in ); + ostride = VIPS_REGION_LSKIP( or ) / + VIPS_IMAGE_SIZEOF_ELEMENT( convolution->out ); + + /* Init offset array. + */ + if( seq->last_stride != istride ) { + seq->last_stride = istride; + + for( z = 0; z < n_lines; z++ ) { + seq->start[z] = convasep->start[z] * istride; + seq->end[z] = convasep->end[z] * istride; + } + } + + switch( in->BandFmt ) { + case VIPS_FORMAT_UCHAR: + VCONV_INT( unsigned char, CLIP_UCHAR ); + break; + + case VIPS_FORMAT_CHAR: + VCONV_INT( signed char, CLIP_CHAR ); + break; + + case VIPS_FORMAT_USHORT: + VCONV_INT( unsigned short, CLIP_USHORT ); + break; + + case VIPS_FORMAT_SHORT: + VCONV_INT( signed short, CLIP_SHORT ); + break; + + case VIPS_FORMAT_UINT: + VCONV_INT( unsigned int, CLIP_NONE ); + break; + + case VIPS_FORMAT_INT: + VCONV_INT( signed int, CLIP_NONE ); + break; + + case VIPS_FORMAT_FLOAT: + case VIPS_FORMAT_COMPLEX: + VCONV_FLOAT( float ); + break; + + case VIPS_FORMAT_DOUBLE: + case VIPS_FORMAT_DPCOMPLEX: + VCONV_FLOAT( double ); + break; + + default: + g_assert_not_reached(); + } + + return( 0 ); +} + +static int +vips_convasep_pass( VipsConvasep *convasep, + VipsImage *in, VipsImage **out, VipsDirection direction ) +{ + VipsObjectClass *class = VIPS_OBJECT_GET_CLASS( convasep ); + + VipsGenerateFn gen; + + *out = vips_image_new(); + if( vips_image_pipelinev( *out, + VIPS_DEMAND_STYLE_SMALLTILE, in, NULL ) ) + return( -1 ); + + if( direction == VIPS_DIRECTION_HORIZONTAL ) { + (*out)->Xsize -= convasep->width - 1; + gen = vips_convasep_generate_horizontal; + } + else { + (*out)->Ysize -= convasep->width - 1; + gen = vips_convasep_generate_vertical; + } + + if( (*out)->Xsize <= 0 || + (*out)->Ysize <= 0 ) { + vips_error( class->nickname, + "%s", _( "image too small for mask" ) ); + return( -1 ); + } + + if( vips_image_generate( *out, + vips_convasep_start, gen, vips_convasep_stop, in, convasep ) ) + return( -1 ); + + return( 0 ); +} + +static int +vips_convasep_build( VipsObject *object ) +{ + VipsObjectClass *class = VIPS_OBJECT_GET_CLASS( object ); + VipsConvolution *convolution = (VipsConvolution *) object; + VipsConvasep *convasep = (VipsConvasep *) object; + VipsImage **t = (VipsImage **) vips_object_local_array( object, 4 ); + + VipsImage *in; + + if( VIPS_OBJECT_CLASS( vips_convasep_parent_class )->build( object ) ) + return( -1 ); + + if( vips_check_separable( class->nickname, convolution->M ) ) + return( -1 ); + + /* An int version of our mask. + */ + if( vips__image_intize( convolution->M, &t[3] ) ) + return( -1 ); + convasep->iM = t[3]; + convasep->width = convasep->iM->Xsize * convasep->iM->Ysize; + in = convolution->in; + + if( vips_convasep_decompose( convasep ) ) + return( -1 ); + + g_object_set( convasep, "out", vips_image_new(), NULL ); + if( + vips_embed( in, &t[0], + convasep->width / 2, + convasep->width / 2, + in->Xsize + convasep->width - 1, + in->Ysize + convasep->width - 1, + "extend", VIPS_EXTEND_COPY, + NULL ) || + vips_convasep_pass( convasep, + t[0], &t[1], VIPS_DIRECTION_HORIZONTAL ) || + vips_convasep_pass( convasep, + t[1], &t[2], VIPS_DIRECTION_VERTICAL ) || + vips_image_write( t[2], convolution->out ) ) + return( -1 ); + + convolution->out->Xoffset = 0; + convolution->out->Yoffset = 0; + + return( 0 ); +} + +static void +vips_convasep_class_init( VipsConvasepClass *class ) +{ + GObjectClass *gobject_class = G_OBJECT_CLASS( class ); + VipsObjectClass *object_class = (VipsObjectClass *) class; + + gobject_class->set_property = vips_object_set_property; + gobject_class->get_property = vips_object_get_property; + + object_class->nickname = "convasep"; + object_class->description = + _( "approximate separable integer convolution" ); + object_class->build = vips_convasep_build; + + VIPS_ARG_INT( class, "layers", 104, + _( "Layers" ), + _( "Use this many layers in approximation" ), + VIPS_ARGUMENT_OPTIONAL_INPUT, + G_STRUCT_OFFSET( VipsConvasep, layers ), + 1, 1000, 5 ); + +} + +static void +vips_convasep_init( VipsConvasep *convasep ) +{ + convasep->layers = 5; + convasep->n_lines = 0; +} + +/** + * vips_convasep: + * @in: input image + * @out: output image + * @mask: convolve with this mask + * @...: %NULL-terminated list of optional named arguments + * + * Optional arguments: + * + * * @layers: %gint, number of layers for approximation + * + * Approximate separable integer convolution. This is a low-level operation, see + * vips_convsep() for something more convenient. + * + * The image is convolved twice: once with @mask and then again with @mask + * rotated by 90 degrees. + * @mask must be 1xn or nx1 elements. + * Elements of @mask are converted to + * integers before convolution. + * + * Larger values for @layers give more accurate + * results, but are slower. As @layers approaches the mask radius, the + * accuracy will become close to exact convolution and the speed will drop to + * match. For many large masks, such as Gaussian, @layers need be only 10% of + * this value and accuracy will still be good. + * + * The output image + * always has the same #VipsBandFormat as the input image. + * + * See also: vips_convsep(). + * + * Returns: 0 on success, -1 on error + */ +int +vips_convasep( VipsImage *in, VipsImage **out, VipsImage *mask, ... ) +{ + va_list ap; + int result; + + va_start( ap, mask ); + result = vips_call_split( "convasep", ap, in, out, mask ); + va_end( ap ); + + return( result ); +} + diff --git a/libvips/convolution/convf.c b/libvips/convolution/convf.c new file mode 100644 index 00000000..be7b59b2 --- /dev/null +++ b/libvips/convolution/convf.c @@ -0,0 +1,406 @@ +/* convf + * + * Copyright: 1990, N. Dessipris. + * + * Author: Nicos Dessipris & Kirk Martinez + * Written on: 29/04/1991 + * Modified on: 19/05/1991 + * 8/7/93 JC + * - adapted for partial v2 + * - memory leaks fixed + * - ANSIfied + * 12/7/93 JC + * - adapted im_convbi() to im_convbf() + * 7/10/94 JC + * - new IM_ARRAY() macro + * - evalend callbacks + * - more typedef + * 9/3/01 JC + * - redone from im_conv() + * 27/7/01 JC + * - rejects masks with scale == 0 + * 7/4/04 + * - now uses im_embed() with edge stretching on the input, not + * the output + * - sets Xoffset / Yoffset + * 11/11/05 + * - simpler inner loop avoids gcc4 bug + * 12/11/09 + * - only rebuild the buffer offsets if bpl changes + * - tiny speedups and cleanups + * - add restrict, though it doesn't seem to help gcc + * - add mask-all-zero check + * 13/11/09 + * - rename as im_conv_f() to make it easier for vips.c to make the + * overloaded version + * 3/2/10 + * - gtkdoc + * - more cleanups + * 1/10/10 + * - support complex (just double the bands) + * 29/10/10 + * - get rid of im_convsep_f(), just call this twice, no longer worth + * keeping two versions + * 15/10/11 Nicolas + * - handle offset correctly in seperable convolutions + * 26/1/16 Lovell Fuller + * - remove Duff for a 25% speedup + * 23/6/16 + * - redone as a class + */ + +/* + + This file is part of VIPS. + + VIPS is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301 USA + + */ + +/* + + These files are distributed with VIPS - http://www.vips.ecs.soton.ac.uk + + */ + +#ifdef HAVE_CONFIG_H +#include +#endif /*HAVE_CONFIG_H*/ +#include + +#include +#include +#include + +#include + +#include "pconvolution.h" + +typedef struct { + VipsConvolution parent_instance; + + /* We make a smaller version of the mask with the zeros squeezed out. + */ + int nnz; /* Number of non-zero mask elements */ + double *coeff; /* Array of non-zero mask coefficients */ + int *coeff_pos; /* Index of each nnz element in mask->coeff */ +} VipsConvf; + +typedef VipsConvolutionClass VipsConvfClass; + +G_DEFINE_TYPE( VipsConvf, vips_convf, VIPS_TYPE_CONVOLUTION ); + +/* Our sequence value. + */ +typedef struct { + VipsConvf *convf; + VipsRegion *ir; /* Input region */ + + int *offsets; /* Offsets for each non-zero matrix element */ + VipsPel **pts; /* Per-non-zero mask element image pointers */ + + int last_bpl; /* Avoid recalcing offsets, if we can */ +} VipsConvfSequence; + +/* Free a sequence value. + */ +static int +vips_convf_stop( void *vseq, void *a, void *b ) +{ + VipsConvfSequence *seq = (VipsConvfSequence *) vseq; + + VIPS_UNREF( seq->ir ); + + return( 0 ); +} + +/* Convolution start function. + */ +static void * +vips_convf_start( VipsImage *out, void *a, void *b ) +{ + VipsImage *in = (VipsImage *) a; + VipsConvf *convf = (VipsConvf *) b; + VipsConvfSequence *seq; + + if( !(seq = VIPS_NEW( out, VipsConvfSequence )) ) + return( NULL ); + + seq->convf = convf; + seq->ir = NULL; + seq->pts = NULL; + seq->last_bpl = -1; + + seq->ir = vips_region_new( in ); + if( !(seq->offsets = VIPS_ARRAY( out, convf->nnz, int )) || + !(seq->pts = VIPS_ARRAY( out, convf->nnz, VipsPel * )) ) { + vips_convf_stop( seq, in, convf ); + return( NULL ); + } + + return( (void *) seq ); +} + +#define CONV_FLOAT( ITYPE, OTYPE ) { \ + ITYPE ** restrict p = (ITYPE **) seq->pts; \ + OTYPE * restrict q = (OTYPE *) VIPS_REGION_ADDR( or, le, y ); \ + \ + for( x = 0; x < sz; x++ ) { \ + double sum; \ + int i; \ + \ + sum = 0; \ + for ( i = 0; i < nnz; i++ ) \ + sum += t[i] * p[i][x]; \ + \ + sum = (sum / scale) + offset; \ + \ + q[x] = sum; \ + } \ +} + +/* Convolve! + */ +static int +vips_convf_gen( REGION *or, void *vseq, void *a, void *b, gboolean *stop ) +{ + VipsConvfSequence *seq = (VipsConvfSequence *) vseq; + VipsConvf *convf = (VipsConvf *) b; + VipsConvolution *convolution = (VipsConvolution *) convf; + VipsImage *M = convolution->M; + double scale = vips_image_get_scale( M ); + double offset = vips_image_get_offset( M ); + VipsImage *in = (VipsImage *) a; + VipsRegion *ir = seq->ir; + double * restrict t = convf->coeff; + const int nnz = convf->nnz; + VipsRect *r = &or->valid; + int le = r->left; + int to = r->top; + int bo = VIPS_RECT_BOTTOM( r ); + int sz = VIPS_REGION_N_ELEMENTS( or ) * + (vips_band_format_iscomplex( in->BandFmt ) ? 2 : 1); + + VipsRect s; + int x, y, z, i; + + /* Prepare the section of the input image we need. A little larger + * than the section of the output image we are producing. + */ + s = *r; + s.width += M->Xsize - 1; + s.height += M->Ysize - 1; + if( vips_region_prepare( ir, &s ) ) + return( -1 ); + + /* Fill offset array. Only do this if the bpl has changed since the + * previous vips_region_prepare(). + */ + if( seq->last_bpl != VIPS_REGION_LSKIP( ir ) ) { + seq->last_bpl = VIPS_REGION_LSKIP( ir ); + + for( i = 0; i < nnz; i++ ) { + z = convf->coeff_pos[i]; + x = z % M->Xsize; + y = z / M->Xsize; + + seq->offsets[i] = + VIPS_REGION_ADDR( ir, x + le, y + to ) - + VIPS_REGION_ADDR( ir, le, to ); + } + } + + for( y = to; y < bo; y++ ) { + /* Init pts for this line of PELs. + */ + for( z = 0; z < nnz; z++ ) + seq->pts[z] = seq->offsets[z] + + VIPS_REGION_ADDR( ir, le, y ); + + switch( in->BandFmt ) { + case VIPS_FORMAT_UCHAR: + CONV_FLOAT( unsigned char, float ); + break; + + case VIPS_FORMAT_CHAR: + CONV_FLOAT( signed char, float ); + break; + + case VIPS_FORMAT_USHORT: + CONV_FLOAT( unsigned short, float ); + break; + + case VIPS_FORMAT_SHORT: + CONV_FLOAT( signed short, float ); + break; + + case VIPS_FORMAT_UINT: + CONV_FLOAT( unsigned int, float ); + break; + + case VIPS_FORMAT_INT: + CONV_FLOAT( signed int, float ); + break; + + case VIPS_FORMAT_FLOAT: + case VIPS_FORMAT_COMPLEX: + CONV_FLOAT( float, float ); + break; + + case VIPS_FORMAT_DOUBLE: + case VIPS_FORMAT_DPCOMPLEX: + CONV_FLOAT( double, double ); + break; + + default: + g_assert_not_reached(); + } + } + + return( 0 ); +} + +static int +vips_convf_build( VipsObject *object ) +{ + VipsConvolution *convolution = (VipsConvolution *) object; + VipsConvf *convf = (VipsConvf *) object; + VipsImage **t = (VipsImage **) vips_object_local_array( object, 4 ); + + VipsImage *in; + VipsImage *M; + double *coeff; + int ne; + int i; + + if( VIPS_OBJECT_CLASS( vips_convf_parent_class )->build( object ) ) + return( -1 ); + + M = convolution->M; + coeff = (double *) VIPS_IMAGE_ADDR( M, 0, 0 ); + ne = M->Xsize * M->Ysize; + if( !(convf->coeff = VIPS_ARRAY( object, ne, double )) || + !(convf->coeff_pos = VIPS_ARRAY( object, ne, int )) ) + return( -1 ); + + /* Find non-zero mask elements. + */ + for( i = 0; i < ne; i++ ) + if( coeff[i] ) { + convf->coeff[convf->nnz] = coeff[i]; + convf->coeff_pos[convf->nnz] = i; + convf->nnz += 1; + } + + /* Was the whole mask zero? We must have at least 1 element in there: + * set it to zero. + */ + if( convf->nnz == 0 ) { + convf->coeff[0] = 0; + convf->coeff_pos[0] = 0; + convf->nnz = 1; + } + + in = convolution->in; + + if( vips_embed( in, &t[0], + M->Xsize / 2, M->Ysize / 2, + in->Xsize + M->Xsize - 1, in->Ysize + M->Ysize - 1, + "extend", VIPS_EXTEND_COPY, + NULL ) ) + return( -1 ); + in = t[0]; + + g_object_set( convf, "out", vips_image_new(), NULL ); + if( vips_image_pipelinev( convolution->out, + VIPS_DEMAND_STYLE_SMALLTILE, in, NULL ) ) + return( -1 ); + + convolution->out->Xoffset = 0; + convolution->out->Yoffset = 0; + + /* Prepare output. Consider a 7x7 mask and a 7x7 image --- the output + * would be 1x1. + */ + if( vips_bandfmt_isint( in->BandFmt ) ) + convolution->out->BandFmt = IM_BANDFMT_FLOAT; + convolution->out->Xsize -= M->Xsize - 1; + convolution->out->Ysize -= M->Ysize - 1; + + if( vips_image_generate( convolution->out, + vips_convf_start, vips_convf_gen, vips_convf_stop, in, convf ) ) + return( -1 ); + + convolution->out->Xoffset = -M->Xsize / 2; + convolution->out->Yoffset = -M->Ysize / 2; + + return( 0 ); +} + +static void +vips_convf_class_init( VipsConvfClass *class ) +{ + VipsObjectClass *object_class = (VipsObjectClass *) class; + + object_class->nickname = "convf"; + object_class->description = _( "float convolution operation" ); + object_class->build = vips_convf_build; +} + +static void +vips_convf_init( VipsConvf *convf ) +{ + convf->nnz = 0; + convf->coeff = NULL; + convf->coeff_pos = NULL; +} + +/** + * vips_convf: + * @in: input image + * @out: output image + * @mask: convolve with this mask + * @...: %NULL-terminated list of optional named arguments + * + * Convolution. This is a low-level operation, see vips_conv() for something + * more convenient. + * + * Perform a convolution of @in with @mask. + * Each output pixel is + * calculated as sigma[i]{pixel[i] * mask[i]} / scale + offset, where scale + * and offset are part of @mask. + * + * The convolution is performed with floating-point arithmetic. The output image + * is always #VIPS_FORMAT_FLOAT unless @in is #VIPS_FORMAT_DOUBLE, in which case + * @out is also #VIPS_FORMAT_DOUBLE. + * + * See also: vips_conv(). + * + * Returns: 0 on success, -1 on error + */ +int +vips_convf( VipsImage *in, VipsImage **out, VipsImage *mask, ... ) +{ + va_list ap; + int result; + + va_start( ap, mask ); + result = vips_call_split( "convf", ap, in, out, mask ); + va_end( ap ); + + return( result ); +} + diff --git a/libvips/convolution/convi.c b/libvips/convolution/convi.c new file mode 100644 index 00000000..85817b5e --- /dev/null +++ b/libvips/convolution/convi.c @@ -0,0 +1,1066 @@ +/* convi + * + * Copyright: 1990, N. Dessipris. + * + * Author: Nicos Dessipris & Kirk Martinez + * Written on: 29/04/1991 + * Modified on: 19/05/1991 + * 8/7/93 JC + * - adapted for partial v2 + * - memory leaks fixed + * - ANSIfied + * 23/7/93 JC + * - inner loop unrolled with a switch - 25% speed-up! + * 13/12/93 JC + * - tiny rounding error removed + * 7/10/94 JC + * - new IM_ARRAY() macro + * - various simplifications + * - evalend callback added + * 1/2/95 JC + * - use of IM_REGION_ADDR() updated + * - output size was incorrect! see comment below + * - bug with large non-square matricies fixed too + * - uses new im_embed() function + * 13/7/98 JC + * - wierd bug ... im_free_imask is no longer directly called for close + * callback, caused SIGKILL on solaris 2.6 ... linker bug? + * 9/3/01 JC + * - reworked and simplified, about 10% faster + * - slightly better range clipping + * 27/7/01 JC + * - reject masks with scale == 0 + * 7/4/04 + * - im_conv() now uses im_embed() with edge stretching on the input, not + * the output + * - sets Xoffset / Yoffset + * 11/11/05 + * - simpler inner loop avoids gcc4 bug + * 7/11/07 + * - new evalstart/end callbacks + * 12/5/08 + * - int rounding was +1 too much, argh + * - only rebuild the buffer offsets if bpl changes + * 5/4/09 + * - tiny speedups and cleanups + * - add restrict, though it doesn't seem to help gcc + * 12/11/09 + * - only check for non-zero elements once + * - add mask-all-zero check + * - cleanups + * 3/2/10 + * - gtkdoc + * - more cleanups + * 23/08/10 + * - add a special case for 3x3 masks, about 20% faster + * 1/10/10 + * - support complex (just double the bands) + * 18/10/10 + * - add experimental Orc path + * 29/10/10 + * - use VipsVector + * - get rid of im_convsep(), just call this twice, no longer worth + * keeping two versions + * 8/11/10 + * - add array tiling + * 9/5/11 + * - argh typo in overflow estimation could cause errors + * 15/10/11 Nicolas + * - handle offset correctly in seperable convolutions + * 26/1/16 Lovell Fuller + * - remove Duff for a 25% speedup + * 23/6/16 + * - rewritten as a class + * - new fixed-point vector path, up to 2x faster + */ + +/* + + This file is part of VIPS. + + VIPS is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301 USA + + */ + +/* + + These files are distributed with VIPS - http://www.vips.ecs.soton.ac.uk + + */ + +/* +#define DEBUG +#define DEBUG_PIXELS +#define DEBUG_COMPILE + */ + +#ifdef HAVE_CONFIG_H +#include +#endif /*HAVE_CONFIG_H*/ +#include + +#include +#include +#include + +#include + +#include "pconvolution.h" + +/* We do the 8-bit vector path with fixed-point arithmetic. We use 2.6 bits + * for the mask coefficients, so our range is -2 to +1.99, after using scale + * on the mask. + */ +#define FIXED_BITS (6) +#define FIXED_SCALE (1 << FIXED_BITS) + +/* Larger than this and we fall back to C. + */ +#define MAX_PASS (20) + +/* A pass with a vector. + */ +typedef struct { + int first; /* The index of the first mask coff we use */ + int last; /* The index of the last mask coff we use */ + + int r; /* Set previous result in this var */ + + /* The code we generate for this section of the mask. + */ + VipsVector *vector; +} Pass; + +typedef struct { + VipsConvolution parent_instance; + + /* An int version of M. + */ + VipsImage *iM; + + /* We make a smaller version of the mask with the zeros squeezed out. + */ + int nnz; /* Number of non-zero mask elements */ + int *coeff; /* Array of non-zero mask coefficients */ + int *coeff_pos; /* Index of each nnz element in mask->coeff */ + + /* And a fixed-point version for a vector path. + */ + int *fixed; + int n_point; /* Number of points in fixed-point array */ + + /* The set of passes we need for this mask. + */ + int n_pass; + Pass pass[MAX_PASS]; + + /* Code for the final clip back to 8 bits. + */ + int r; + VipsVector *vector; +} VipsConvi; + +typedef VipsConvolutionClass VipsConviClass; + +G_DEFINE_TYPE( VipsConvi, vips_convi, VIPS_TYPE_CONVOLUTION ); + +/* Our sequence value. + */ +typedef struct { + VipsConvi *convi; + VipsRegion *ir; /* Input region */ + + int *offsets; /* Offsets for each non-zero matrix element */ + VipsPel **pts; /* Per-non-zero mask element image pointers */ + + int last_bpl; /* Avoid recalcing offsets, if we can */ + + /* We need a pair of intermediate buffers to keep the results of each + * vector conv pass. + */ + short *t1; + short *t2; +} VipsConviSequence; + +/* Free a sequence value. + */ +static int +vips_convi_stop( void *vseq, void *a, void *b ) +{ + VipsConviSequence *seq = (VipsConviSequence *) vseq; + + VIPS_UNREF( seq->ir ); + VIPS_FREE( seq->offsets ); + VIPS_FREE( seq->pts ); + VIPS_FREE( seq->t1 ); + VIPS_FREE( seq->t2 ); + + return( 0 ); +} + +/* Convolution start function. + */ +static void * +vips_convi_start( VipsImage *out, void *a, void *b ) +{ + VipsImage *in = (VipsImage *) a; + VipsConvi *convi = (VipsConvi *) b; + VipsConviSequence *seq; + + if( !(seq = VIPS_NEW( out, VipsConviSequence )) ) + return( NULL ); + + seq->convi = convi; + seq->ir = NULL; + seq->offsets = NULL; + seq->pts = NULL; + seq->last_bpl = -1; + seq->t1 = NULL; + seq->t2 = NULL; + + seq->ir = vips_region_new( in ); + + /* C mode. + */ + if( convi->nnz ) { + seq->offsets = VIPS_ARRAY( NULL, convi->nnz, int ); + seq->pts = VIPS_ARRAY( NULL, convi->nnz, VipsPel * ); + + if( !seq->offsets || + !seq->pts ) { + vips_convi_stop( seq, in, convi ); + return( NULL ); + } + } + + /* Vector mode. + */ + if( convi->n_pass ) { + seq->t1 = VIPS_ARRAY( NULL, VIPS_IMAGE_N_ELEMENTS( in ), short ); + seq->t2 = VIPS_ARRAY( NULL, VIPS_IMAGE_N_ELEMENTS( in ), short ); + + if( !seq->t1 || + !seq->t2 ) { + vips_convi_stop( seq, in, convi ); + return( NULL ); + } + } + + return( (void *) seq ); +} + +static void +vips_convi_compile_free( VipsConvi *convi ) +{ + int i; + + for( i = 0; i < convi->n_pass; i++ ) + VIPS_FREEF( vips_vector_free, convi->pass[i].vector ); + convi->n_pass = 0; + VIPS_FREEF( vips_vector_free, convi->vector ); +} + +#define TEMP( N, S ) vips_vector_temporary( v, (char *) N, S ) +#define PARAM( N, S ) vips_vector_parameter( v, (char *) N, S ) +#define SCANLINE( N, P, S ) vips_vector_source_scanline( v, (char *) N, P, S ) +#define CONST( N, V, S ) vips_vector_constant( v, (char *) N, V, S ) +#define ASM2( OP, A, B ) vips_vector_asm2( v, (char *) OP, A, B ) +#define ASM3( OP, A, B, C ) vips_vector_asm3( v, (char *) OP, A, B, C ) + +/* Generate code for a section of the mask. first is the index we start + * at, we set last to the index of the last one we use before we run + * out of intermediates / constants / parameters / sources or mask + * coefficients. + * + * 0 for success, -1 on error. + */ +static int +vips_convi_compile_section( VipsConvi *convi, VipsImage *in, Pass *pass ) +{ + VipsConvolution *convolution = (VipsConvolution *) convi; + VipsImage *M = convolution->M; + + VipsVector *v; + int i; + +#ifdef DEBUG_COMPILE + printf( "starting pass %d\n", pass->first ); +#endif /*DEBUG_COMPILE*/ + + pass->vector = v = vips_vector_new( "convi", 2 ); + + /* "r" is the array of sums from the previous pass (if any). + */ + pass->r = vips_vector_source_name( v, "r", 2 ); + + /* The value we fetch from the image, the accumulated sum. + */ + TEMP( "value", 2 ); + TEMP( "valueb", 1 ); + TEMP( "sum", 2 ); + + /* Init the sum. If this is the first pass, it's a constant. If this + * is a later pass, we have to init the sum from the result + * of the previous pass. + */ + if( pass->first == 0 ) { + char c0[256]; + + CONST( c0, 0, 2 ); + ASM2( "loadpw", "sum", c0 ); + } + else + ASM2( "loadw", "sum", "r" ); + + for( i = pass->first; i < convi->n_point; i++ ) { + int x = i % M->Xsize; + int y = i / M->Xsize; + + char source[256]; + char off[256]; + char coeff[256]; + + /* Exclude zero elements. + */ + if( !convi->fixed[i] ) + continue; + + /* The source. sl0 is the first scanline in the mask. + */ + SCANLINE( source, y, 1 ); + + /* Load with an offset. Only for non-first-columns though. + */ + if( x == 0 ) + ASM2( "convubw", "value", source ); + else { + CONST( off, in->Bands * x, 1 ); + ASM3( "loadoffb", "valueb", source, off ); + ASM2( "convubw", "value", "valueb" ); + } + + /* Mask coefficients are 2.6 bits fixed point, so -2 to +1.99. + * + * We need a signed multiply, so the image pixel needs to + * become a signed 16-bit value. We know only the bottom 8 bits + * of the image and coefficient are interesting, so we can take + * the bottom half of a 16x16->32 multiply. + */ + CONST( coeff, convi->fixed[i], 2 ); + ASM3( "mullw", "value", "value", coeff ); + + /* We accumulate the signed 16-bit result in sum. Saturated + * add. + */ + ASM3( "addssw", "sum", "sum", "value" ); + + if( vips_vector_full( v ) ) + break; + } + + pass->last = i; + + /* And write to our intermediate buffer. + */ + ASM2( "copyw", "d1", "sum" ); + + if( !vips_vector_compile( v ) ) + return( -1 ); + +#ifdef DEBUG_COMPILE + printf( "done coeffs %d to %d\n", pass->first, pass->last ); + vips_vector_print( v ); +#endif /*DEBUG_COMPILE*/ + + return( 0 ); +} + +/* Generate code for the final 16->8 conversion. + * + * 0 for success, -1 on error. + */ +static int +vips_convi_compile_clip( VipsConvi *convi ) +{ + VipsConvolution *convolution = (VipsConvolution *) convi; + VipsImage *M = convolution->M; + int offset = VIPS_RINT( vips_image_get_offset( M ) ); + + VipsVector *v; + char c32[256]; + char c6[256]; + char c0[256]; + char c255[256]; + char off[256]; + + convi->vector = v = vips_vector_new( "convi", 1 ); + + /* "r" is the array of sums we clip down. + */ + convi->r = vips_vector_source_name( v, "r", 2 ); + + /* The value we fetch from the image. + */ + TEMP( "value", 2 ); + + CONST( c32, 32, 2 ); + ASM3( "addw", "value", "r", c32 ); + CONST( c6, 6, 2 ); + ASM3( "shrsw", "value", "value", c6 ); + + CONST( off, offset, 2 ); + ASM3( "addw", "value", "value", off ); + + /* You'd think "convsuswb" (convert signed 16-bit to unsigned + * 8-bit with saturation) would be quicker, but it's a lot + * slower. + */ + CONST( c0, 0, 2 ); + ASM3( "maxsw", "value", c0, "value" ); + CONST( c255, 255, 2 ); + ASM3( "minsw", "value", c255, "value" ); + + ASM2( "convwb", "d1", "value" ); + + if( !vips_vector_compile( v ) ) + return( -1 ); + + return( 0 ); +} + +static int +vips_convi_compile( VipsConvi *convi, VipsImage *in ) +{ + int i; + Pass *pass; + + /* Generate passes until we've used up the whole mask. + */ + for( i = 0;; ) { + /* Allocate space for another pass. + */ + if( convi->n_pass == MAX_PASS ) + return( -1 ); + pass = &convi->pass[convi->n_pass]; + convi->n_pass += 1; + + pass->first = i; + pass->r = -1; + + if( vips_convi_compile_section( convi, in, pass ) ) + return( -1 ); + i = pass->last + 1; + + if( i >= convi->n_point ) + break; + } + + if( vips_convi_compile_clip( convi ) ) + return( -1 ); + + return( 0 ); +} + +static int +vips_convi_generate_vector( VipsRegion *or, + void *vseq, void *a, void *b, gboolean *stop ) +{ + VipsConviSequence *seq = (VipsConviSequence *) vseq; + VipsConvi *convi = (VipsConvi *) b; + VipsConvolution *convolution = (VipsConvolution *) convi; + VipsImage *M = convolution->M; + VipsImage *in = (VipsImage *) a; + VipsRegion *ir = seq->ir; + VipsRect *r = &or->valid; + int ne = r->width * in->Bands; + + VipsRect s; + int i, y; + VipsExecutor executor[MAX_PASS]; + VipsExecutor clip; + +#ifdef DEBUG_PIXELS + printf( "vips_convi_generate_vector: generating %d x %d at %d x %d\n", + r->width, r->height, r->left, r->top ); +#endif /*DEBUG_PIXELS*/ + + /* Prepare the section of the input image we need. A little larger + * than the section of the output image we are producing. + */ + s = *r; + s.width += M->Xsize - 1; + s.height += M->Ysize - 1; + if( vips_region_prepare( ir, &s ) ) + return( -1 ); + + for( i = 0; i < convi->n_pass; i++ ) + vips_executor_set_program( &executor[i], + convi->pass[i].vector, ne ); + vips_executor_set_program( &clip, convi->vector, ne ); + + VIPS_GATE_START( "vips_convi_generate_vector: work" ); + + for( y = 0; y < r->height; y ++ ) { + VipsPel *q = VIPS_REGION_ADDR( or, r->left, r->top + y ); + +#ifdef DEBUG_PIXELS +{ + int h, v; + + printf( "before convolve: x = %d, y = %d\n", + r->left, r->top + y ); + for( v = 0; v < M->Ysize; v++ ) { + for( h = 0; h < M->Xsize; h++ ) + printf( "%3d ", *VIPS_REGION_ADDR( ir, + r->left + h, r->top + y + v ) ); + printf( "\n" ); + } +} +#endif /*DEBUG_PIXELS*/ + + /* We run our n passes to generate this scanline. + */ + for( i = 0; i < convi->n_pass; i++ ) { + Pass *pass = &convi->pass[i]; + + vips_executor_set_scanline( &executor[i], + ir, r->left, r->top + y ); + vips_executor_set_array( &executor[i], + pass->r, seq->t1 ); + vips_executor_set_destination( &executor[i], seq->t2 ); + vips_executor_run( &executor[i] ); + + VIPS_SWAP( signed short *, seq->t1, seq->t2 ); + } + +#ifdef DEBUG_PIXELS + printf( "before clip: %d\n", ((signed short *) seq->t1)[0] ); +#endif /*DEBUG_PIXELS*/ + + vips_executor_set_array( &clip, convi->r, seq->t1 ); + vips_executor_set_destination( &clip, q ); + vips_executor_run( &clip ); + +#ifdef DEBUG_PIXELS + printf( "after clip: %d\n", + *VIPS_REGION_ADDR( or, r->left, r->top + y ) ); +#endif /*DEBUG_PIXELS*/ + } + + VIPS_GATE_STOP( "vips_convi_generate_vector: work" ); + + VIPS_COUNT_PIXELS( or, "vips_convi_generate_vector" ); + + return( 0 ); +} + +/* INT inner loops. + */ +#define CONV_INT( TYPE, CLIP ) { \ + TYPE ** restrict p = (TYPE **) seq->pts; \ + TYPE * restrict q = (TYPE *) VIPS_REGION_ADDR( or, le, y ); \ + \ + for( x = 0; x < sz; x++ ) { \ + int sum; \ + int i; \ + \ + sum = 0; \ + for ( i = 0; i < nnz; i++ ) \ + sum += t[i] * p[i][x]; \ + \ + sum = ((sum + rounding) / scale) + offset; \ + \ + CLIP; \ + \ + q[x] = sum; \ + } \ +} + +/* FLOAT inner loops. + */ +#define CONV_FLOAT( TYPE ) { \ + TYPE ** restrict p = (TYPE **) seq->pts; \ + TYPE * restrict q = (TYPE *) VIPS_REGION_ADDR( or, le, y ); \ + \ + for( x = 0; x < sz; x++ ) { \ + double sum; \ + int i; \ + \ + sum = 0; \ + for ( i = 0; i < nnz; i++ ) \ + sum += t[i] * p[i][x]; \ + \ + sum = (sum / scale) + offset; \ + \ + q[x] = sum; \ + } \ +} + +/* Various integer range clips. Record over/under flows. + */ +#define CLIP_UCHAR( V ) \ +G_STMT_START { \ + if( (V) < 0 ) \ + (V) = 0; \ + else if( (V) > UCHAR_MAX ) \ + (V) = UCHAR_MAX; \ +} G_STMT_END + +#define CLIP_CHAR( V ) \ +G_STMT_START { \ + if( (V) < SCHAR_MIN ) \ + (V) = SCHAR_MIN; \ + else if( (V) > SCHAR_MAX ) \ + (V) = SCHAR_MAX; \ +} G_STMT_END + +#define CLIP_USHORT( V ) \ +G_STMT_START { \ + if( (V) < 0 ) \ + (V) = 0; \ + else if( (V) > USHRT_MAX ) \ + (V) = USHRT_MAX; \ +} G_STMT_END + +#define CLIP_SHORT( V ) \ +G_STMT_START { \ + if( (V) < SHRT_MIN ) \ + (V) = SHRT_MIN; \ + else if( (V) > SHRT_MAX ) \ + (V) = SHRT_MAX; \ +} G_STMT_END + +#define CLIP_NONE( V ) {} + +/* Convolve! + */ +static int +vips_convi_generate( VipsRegion *or, + void *vseq, void *a, void *b, gboolean *stop ) +{ + VipsConviSequence *seq = (VipsConviSequence *) vseq; + VipsConvi *convi = (VipsConvi *) b; + VipsConvolution *convolution = (VipsConvolution *) convi; + VipsImage *M = convolution->M; + int scale = VIPS_RINT( vips_image_get_scale( M ) ); + int rounding = scale / 2; + int offset = VIPS_RINT( vips_image_get_offset( M ) ); + VipsImage *in = (VipsImage *) a; + VipsRegion *ir = seq->ir; + int * restrict t = convi->coeff; + const int nnz = convi->nnz; + VipsRect *r = &or->valid; + int le = r->left; + int to = r->top; + int bo = VIPS_RECT_BOTTOM( r ); + int sz = VIPS_REGION_N_ELEMENTS( or ) * + (vips_band_format_iscomplex( in->BandFmt ) ? 2 : 1); + + VipsRect s; + int x, y, z, i; + + /* Prepare the section of the input image we need. A little larger + * than the section of the output image we are producing. + */ + s = *r; + s.width += M->Xsize - 1; + s.height += M->Ysize - 1; + if( vips_region_prepare( ir, &s ) ) + return( -1 ); + + /* Fill offset array. Only do this if the bpl has changed since the + * previous vips_region_prepare(). + */ + if( seq->last_bpl != VIPS_REGION_LSKIP( ir ) ) { + seq->last_bpl = VIPS_REGION_LSKIP( ir ); + + for( i = 0; i < nnz; i++ ) { + z = convi->coeff_pos[i]; + x = z % M->Xsize; + y = z / M->Xsize; + + seq->offsets[i] = + VIPS_REGION_ADDR( ir, x + le, y + to ) - + VIPS_REGION_ADDR( ir, le, to ); + } + } + + for( y = to; y < bo; y++ ) { + /* Init pts for this line of PELs. + */ + for( z = 0; z < nnz; z++ ) + seq->pts[z] = seq->offsets[z] + + VIPS_REGION_ADDR( ir, le, y ); + + switch( in->BandFmt ) { + case VIPS_FORMAT_UCHAR: + CONV_INT( unsigned char, CLIP_UCHAR( sum ) ); + break; + + case VIPS_FORMAT_CHAR: + CONV_INT( signed char, CLIP_CHAR( sum ) ); + break; + + case VIPS_FORMAT_USHORT: + CONV_INT( unsigned short, CLIP_USHORT( sum ) ); + break; + + case VIPS_FORMAT_SHORT: + CONV_INT( signed short, CLIP_SHORT( sum ) ); + break; + + case VIPS_FORMAT_UINT: + CONV_INT( unsigned int, CLIP_NONE( sum ) ); + break; + + case VIPS_FORMAT_INT: + CONV_INT( signed int, CLIP_NONE( sum ) ); + break; + + case VIPS_FORMAT_FLOAT: + case VIPS_FORMAT_COMPLEX: + CONV_FLOAT( float ); + break; + + case VIPS_FORMAT_DOUBLE: + case VIPS_FORMAT_DPCOMPLEX: + CONV_FLOAT( double ); + break; + + default: + g_assert_not_reached(); + } + } + + return( 0 ); +} + +/* Make an int version of a mask. + * + * We rint() everything, then adjust the scale try to match the overall + * effect. + */ +int +vips__image_intize( VipsImage *in, VipsImage **out ) +{ + VipsImage *t; + int x, y; + double double_result; + double out_scale; + double out_offset; + int int_result; + + if( vips_check_matrix( "vips2imask", in, &t ) ) + return( -1 ); + if( !(*out = vips_image_new_matrix( t->Xsize, t->Ysize )) ) { + g_object_unref( t ); + return( -1 ); + } + + /* We want to make an intmask which has the same input to output ratio + * as the double image. + * + * Imagine convolving with the double image, what's the ratio of + * brightness between input and output? We want the same ratio for the + * int version, if we can. + * + * Imagine an input image where every pixel is 1, what will the output + * be? + */ + double_result = 0; + for( y = 0; y < t->Ysize; y++ ) + for( x = 0; x < t->Xsize; x++ ) + double_result += *VIPS_MATRIX( t, x, y ); + double_result /= vips_image_get_scale( t ); + + for( y = 0; y < t->Ysize; y++ ) + for( x = 0; x < t->Xsize; x++ ) + *VIPS_MATRIX( *out, x, y ) = + VIPS_RINT( *VIPS_MATRIX( t, x, y ) ); + + out_scale = VIPS_RINT( vips_image_get_scale( t ) ); + if( out_scale == 0 ) + out_scale = 1; + out_offset = VIPS_RINT( vips_image_get_offset( t ) ); + + /* Now convolve a 1 everywhere image with the int version we've made, + * what do we get? + */ + int_result = 0; + for( y = 0; y < t->Ysize; y++ ) + for( x = 0; x < t->Xsize; x++ ) + int_result += *VIPS_MATRIX( *out, x, y ); + int_result /= out_scale; + + /* And adjust the scale to get as close to a match as we can. + */ + out_scale = VIPS_RINT( out_scale + (int_result - double_result) ); + if( out_scale == 0 ) + out_scale = 1; + + vips_image_set_double( *out, "scale", out_scale ); + vips_image_set_double( *out, "offset", out_offset ); + + g_object_unref( t ); + + return( 0 ); +} + +/* Make an int version of a mask. + * + * This makes a fixed-point version ready for the vector path: instead of an + * output scale, we have x.y for each element. + * + * @out is a w x h int array. + */ +static int +intize_to_fixed_point( VipsImage *in, int *out ) +{ + VipsImage *t; + double scale; + int ne; + double *scaled; + double total_error; + int i; + + if( vips_check_matrix( "vips2imask", in, &t ) ) + return( -1 ); + + /* Bake the scale into the mask. + */ + scale = vips_image_get_scale( t ); + ne = t->Xsize * t->Ysize; + if( !(scaled = VIPS_ARRAY( in, ne, double )) ) { + g_object_unref( t ); + return( -1 ); + } + for( i = 0; i < ne; i++ ) + scaled[i] = VIPS_MATRIX( t, 0, 0 )[i] / scale; + g_object_unref( t ); + + /* The scaled mask must fit in 2.6 bits, so we can handle -2 to +1.99 + */ + for( i = 0; i < ne; i++ ) + if( scaled[i] >= 2.0 || + scaled[i] < -2 ) { +#ifdef DEBUG_COMPILE + printf( "intize_to_fixed_point: out of range\n" ); +#endif /*DEBUG_COMPILE*/ + + return( -1 ); + } + + /* The smallest coefficient we can manage is 1/64th, we'll just turn + * that into zero. + * + * Find the total error we'll get by rounding down to zero and bail if + * it's significant. + */ + total_error = 0.0; + for( i = 0; i < ne; i++ ) + if( scaled[i] != 0.0 && + VIPS_FABS( scaled[i] ) < 1.0 / FIXED_SCALE ) + total_error += VIPS_FABS( scaled[i] ); + + /* 0.1 is a 10% error. + */ + if( total_error > 0.1 ) { +#ifdef DEBUG_COMPILE + printf( "intize_to_fixed_point: too many underflows\n" ); +#endif /*DEBUG_COMPILE*/ + + return( -1 ); + } + + vips_vector_to_fixed_point( scaled, out, ne, FIXED_SCALE ); + +#ifdef DEBUG_COMPILE +{ + int x, y; + + printf( "intize_to_fixed_point:\n" ); + for( y = 0; y < t->Ysize; y++ ) { + printf( "\t" ); + for( x = 0; x < t->Xsize; x++ ) + printf( "%4d ", out[y * t->Xsize + x] ); + printf( "\n" ); + } +} +#endif /*DEBUG_COMPILE*/ + + return( 0 ); +} + +static int +vips_convi_build( VipsObject *object ) +{ + VipsObjectClass *class = VIPS_OBJECT_GET_CLASS( object ); + VipsConvolution *convolution = (VipsConvolution *) object; + VipsConvi *convi = (VipsConvi *) object; + VipsImage **t = (VipsImage **) vips_object_local_array( object, 4 ); + + VipsImage *in; + VipsImage *M; + int n_point; + VipsGenerateFn generate; + double *coeff; + int i; + + if( VIPS_OBJECT_CLASS( vips_convi_parent_class )->build( object ) ) + return( -1 ); + + in = convolution->in; + M = convolution->M; + convi->n_point = n_point = M->Xsize * M->Ysize; + + if( vips_embed( in, &t[0], + M->Xsize / 2, M->Ysize / 2, + in->Xsize + M->Xsize - 1, in->Ysize + M->Ysize - 1, + "extend", VIPS_EXTEND_COPY, + NULL ) ) + return( -1 ); + in = t[0]; + + /* For uchar input, try to make a vector path. + */ + if( vips_vector_isenabled() && + in->BandFmt == VIPS_FORMAT_UCHAR ) { + if( (convi->fixed = VIPS_ARRAY( object, n_point, int )) && + !intize_to_fixed_point( M, convi->fixed ) && + !vips_convi_compile( convi, in ) ) { + generate = vips_convi_generate_vector; + vips_info( class->nickname, "using vector path" ); + } + else + vips_convi_compile_free( convi ); + } + + /* If there's no vector path, fall back to C. + */ + if( !convi->n_pass ) { + /* Make an int version of our mask. + */ + if( vips__image_intize( M, &t[1] ) ) + return( -1 ); + convi->iM = M = t[1]; + + coeff = VIPS_MATRIX( M, 0, 0 ); + if( !(convi->coeff = VIPS_ARRAY( object, n_point, int )) || + !(convi->coeff_pos = VIPS_ARRAY( object, n_point, int )) ) + return( -1 ); + + /* Squeeze out zero mask elements. + */ + for( i = 0; i < n_point; i++ ) + if( coeff[i] ) { + convi->coeff[convi->nnz] = coeff[i]; + convi->coeff_pos[convi->nnz] = i; + convi->nnz += 1; + } + + /* Was the whole mask zero? We must have at least 1 element + * in there: set it to zero. + */ + if( convi->nnz == 0 ) { + convi->coeff[0] = 0; + convi->coeff_pos[0] = 0; + convi->nnz = 1; + } + + generate = vips_convi_generate; + vips_info( class->nickname, "using C path" ); + } + + g_object_set( convi, "out", vips_image_new(), NULL ); + if( vips_image_pipelinev( convolution->out, + VIPS_DEMAND_STYLE_SMALLTILE, in, NULL ) ) + return( -1 ); + + /* Prepare output. Consider a 7x7 mask and a 7x7 image --- the output + * would be 1x1. + */ + convolution->out->Xsize -= M->Xsize - 1; + convolution->out->Ysize -= M->Ysize - 1; + + if( vips_image_generate( convolution->out, + vips_convi_start, generate, vips_convi_stop, in, convi ) ) + return( -1 ); + + convolution->out->Xoffset = -M->Xsize / 2; + convolution->out->Yoffset = -M->Ysize / 2; + + return( 0 ); +} + +static void +vips_convi_class_init( VipsConviClass *class ) +{ + VipsObjectClass *object_class = (VipsObjectClass *) class; + + object_class->nickname = "convi"; + object_class->description = _( "int convolution operation" ); + object_class->build = vips_convi_build; +} + +static void +vips_convi_init( VipsConvi *convi ) +{ + convi->nnz = 0; + convi->coeff = NULL; + convi->coeff_pos = NULL; +} + +/** + * vips_convi: + * @in: input image + * @out: output image + * @mask: convolve with this mask + * @...: %NULL-terminated list of optional named arguments + * + * Integer convolution. This is a low-level operation, see vips_conv() for + * something more convenient. + * + * @mask is converted to an integer mask with rint() of each element, rint of + * scale and rint of offset. Each output pixel is then calculated as + * + * |[ + * sigma[i]{pixel[i] * mask[i]} / scale + offset + * ]| + * + * The output image always has the same #VipsBandFormat as the input image. + * + * For #VIPS_FORMAT_UCHAR images, vips_convi() uses a fast vector path based on + * fixed-point arithmetic. This can produce slightly different results. + * Disable the vector path with `--vips-novector` or `VIPS_NOVECTOR` or + * vips_vector_set_enabled(). + * + * See also: vips_conv(). + * + * Returns: 0 on success, -1 on error + */ +int +vips_convi( VipsImage *in, VipsImage **out, VipsImage *mask, ... ) +{ + va_list ap; + int result; + + va_start( ap, mask ); + result = vips_call_split( "convi", ap, in, out, mask ); + va_end( ap ); + + return( result ); +} + diff --git a/libvips/convolution/convolution.c b/libvips/convolution/convolution.c index a2caf0d9..2650a40c 100644 --- a/libvips/convolution/convolution.c +++ b/libvips/convolution/convolution.c @@ -157,16 +157,24 @@ void vips_convolution_operation_init( void ) { extern int vips_conv_get_type( void ); - extern int vips_compass_get_type( void ); + extern int vips_conva_get_type( void ); + extern int vips_convf_get_type( void ); + extern int vips_convi_get_type( void ); extern int vips_convsep_get_type( void ); + extern int vips_convasep_get_type( void ); + extern int vips_compass_get_type( void ); extern int vips_fastcor_get_type( void ); extern int vips_spcor_get_type( void ); extern int vips_sharpen_get_type( void ); extern int vips_gaussblur_get_type( void ); vips_conv_get_type(); + vips_conva_get_type(); + vips_convf_get_type(); + vips_convi_get_type(); vips_compass_get_type(); vips_convsep_get_type(); + vips_convasep_get_type(); vips_fastcor_get_type(); vips_spcor_get_type(); vips_sharpen_get_type(); diff --git a/libvips/convolution/convsep.c b/libvips/convolution/convsep.c index 7838ec41..d428e734 100644 --- a/libvips/convolution/convsep.c +++ b/libvips/convolution/convsep.c @@ -63,6 +63,8 @@ vips_convsep_build( VipsObject *object ) VipsImage **t = (VipsImage **) vips_object_local_array( object, 3 ); + VipsImage *in; + g_object_set( convsep, "out", vips_image_new(), NULL ); if( VIPS_OBJECT_CLASS( vips_convsep_parent_class )->build( object ) ) @@ -71,20 +73,38 @@ vips_convsep_build( VipsObject *object ) if( vips_check_separable( class->nickname, convolution->M ) ) return( -1 ); - if( vips_rot( convolution->M, &t[0], VIPS_ANGLE_D90, NULL ) || - vips_conv( convolution->in, &t[1], convolution->M, - "precision", convsep->precision, - "layers", convsep->layers, - "cluster", convsep->cluster, - NULL ) || - vips_conv( t[1], &t[2], t[0], - "precision", convsep->precision, - "layers", convsep->layers, - "cluster", convsep->cluster, - NULL ) ) - return( -1 ); + in = convolution->in; - if( vips_image_write( t[2], convolution->out ) ) + if( convsep->precision == VIPS_PRECISION_APPROXIMATE ) { + if( vips_convasep( convolution->in, &t[0], convolution->M, + "layers", convsep->layers, + NULL ) ) + return( -1 ); + in = t[0]; + } + else { + if( vips_rot( convolution->M, &t[0], VIPS_ANGLE_D90, NULL ) ) + return( -1 ); + + /* We must only add the offset once. + */ + vips_image_set_double( t[0], "offset", 0 ); + + if( vips_conv( convolution->in, &t[1], convolution->M, + "precision", convsep->precision, + "layers", convsep->layers, + "cluster", convsep->cluster, + NULL ) || + vips_conv( t[1], &t[2], t[0], + "precision", convsep->precision, + "layers", convsep->layers, + "cluster", convsep->cluster, + NULL ) ) + return( -1 ); + in = t[2]; + } + + if( vips_image_write( in, convolution->out ) ) return( -1 ); return( 0 ); diff --git a/libvips/convolution/im_aconv.c b/libvips/convolution/im_aconv.c deleted file mode 100644 index 901f846a..00000000 --- a/libvips/convolution/im_aconv.c +++ /dev/null @@ -1,1273 +0,0 @@ -/* im_aconv ... approximate convolution - * - * This operation does an approximate convolution. - * - * Author: John Cupitt & Nicolas Robidoux - * Written on: 31/5/11 - * Modified on: - * 31/5/11 - * - from im_aconvsep() - */ - -/* - - This file is part of VIPS. - - VIPS is free software; you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301 USA - - */ - -/* - - These files are distributed with VIPS - http://www.vips.ecs.soton.ac.uk - - */ - -/* - - See: - - http://incubator.quasimondo.com/processing/stackblur.pde - - This thing is a little like stackblur, but generalised to any 2D - convolution. - - */ - -/* - - TODO - -timing: - -$ time vips im_conv_f img_0075.jpg x2.v g2d201.con -real 5m3.359s -user 9m34.700s -sys 0m1.500s - -$ time vips im_aconv img_0075.jpg x.v g2d201.con 10 10 -real 0m3.151s -user 0m5.640s -sys 0m0.100s - -$ vips im_subtract x.v x2.v diff.v -$ vips im_abs diff.v abs.v -$ vips im_max abs.v -2.70833 - - - are we handling mask offset correctly? - - */ - -/* -#define DEBUG -#define VIPS_DEBUG - */ - -#ifdef HAVE_CONFIG_H -#include -#endif /*HAVE_CONFIG_H*/ -#include - -#include -#include -#include -#include -#include - -#include -#include -#include - -/* Maximum number of boxes we can break the mask into. - */ -#define MAX_LINES (10000) - -/* The number of edges we consider at once in clustering. Higher values are - * faster, but risk pushing up average error in the result. - */ -#define MAX_EDGES (1000) - -/* Get an (x,y) value from a mask. - */ -#define MASK( M, X, Y ) ((M)->coeff[(X) + (Y) * (M)->xsize]) - -/* A horizontal line in the mask. - */ -typedef struct _HLine { - /* Start is the left-most pixel in the line, end is one beyond the - * right-most pixel. - */ - int start; - int end; - - /* The hlines have weights. weight 0 means this line is unused. - */ - int weight; -} HLine; - -/* For clustering. A pair of hlines and their distance. An edge in a graph. - */ -typedef struct _Edge { - /* The index into boxes->hline[]. - */ - int a; - int b; - - /* The distance between them, see boxes_distance(). - */ - int d; -} Edge; - -/* An element of a vline. - */ -typedef struct _VElement { - /* band is the index into hline[] we add, row is the row we take - * it from. - */ - int band; - int row; - - /* Negative lobes are made with factor -1, we also common-up repeated - * additions of the same line. - */ - int factor; -} VElement; - -/* A vline. - */ -typedef struct _VLine { - int band; - int factor; - int start; - int end; -} VLine; - -/* A set of boxes. - */ -typedef struct _Boxes { - /* Copy of our arguments. - */ - IMAGE *in; - IMAGE *out; - DOUBLEMASK *mask; - int n_layers; - int cluster; - - int area; - int rounding; - - /* The horizontal lines we gather. hline[3] writes to band 3 in the - * intermediate image. max_line is the length of the longest hline: - * over 256 and we need to use an int intermediate for 8-bit images. - */ - int n_hline; - HLine hline[MAX_LINES]; - int max_line; - - /* During clustering, the top few edges we are considering. - */ - Edge edge[MAX_EDGES]; - - /* Scale and sum a set of hlines to make the final value. - */ - int n_velement; - VElement velement[MAX_LINES]; - - /* And group those velements as vlines. - */ - int n_vline; - VLine vline[MAX_LINES]; -} Boxes; - -/* Euclid's algorithm. Use this to common up mults. - */ -static int -gcd( int a, int b ) -{ - if( b == 0 ) - return( abs( a ) ); - else - return( gcd( b, a % b ) ); -} - -static void -boxes_start( Boxes *boxes, int x ) -{ - boxes->hline[boxes->n_hline].start = x; - boxes->hline[boxes->n_hline].weight = 1; -} - -static int -boxes_end( Boxes *boxes, int x, int y, int factor ) -{ - boxes->hline[boxes->n_hline].end = x; - - boxes->velement[boxes->n_velement].row = y; - boxes->velement[boxes->n_velement].band = boxes->n_hline; - boxes->velement[boxes->n_velement].factor = factor; - - if( boxes->n_hline >= MAX_LINES - 1 ) { - vips_error( "im_aconv", "%s", _( "mask too complex" ) ); - return( -1 ); - } - boxes->n_hline += 1; - - if( boxes->n_velement >= MAX_LINES - 1 ) { - vips_error( "im_aconv", "%s", _( "mask too complex" ) ); - return( -1 ); - } - boxes->n_velement += 1; - - return( 0 ); -} - -#ifdef DEBUG -static void -boxes_hprint( Boxes *boxes ) -{ - int x, y; - - printf( "hlines:\n" ); - printf( " n b r f w\n" ); - for( y = 0; y < boxes->n_velement; y++ ) { - int b = boxes->velement[y].band; - - printf( "%4d %3d %3d %2d %3d ", - y, b, - boxes->velement[y].row, - boxes->velement[y].factor, - boxes->hline[b].weight ); - for( x = 0; x < 45; x++ ) { - int rx = x * (boxes->mask->xsize + 1) / 45; - - if( rx >= boxes->hline[b].start && - rx < boxes->hline[b].end ) - printf( "#" ); - else - printf( " " ); - } - printf( " %3d .. %3d\n", - boxes->hline[b].start, boxes->hline[b].end ); - } -} - -static void -boxes_vprint( Boxes *boxes ) -{ - int y; - - printf( "%d vlines:\n", boxes->n_vline ); - printf( " n b f s e\n" ); - for( y = 0; y < boxes->n_vline; y++ ) - printf( "%4d %2d %2d == %3d .. %3d\n", y, - boxes->vline[y].band, - boxes->vline[y].factor, - boxes->vline[y].start, - boxes->vline[y].end ); - - printf( "area = %d\n", boxes->area ); - printf( "rounding = %d\n", boxes->rounding ); - printf( "max_line = %d\n", boxes->max_line ); -} -#endif /*DEBUG*/ - -/* Break the mask into a set of lines. - */ -static int -boxes_break( Boxes *boxes ) -{ - DOUBLEMASK *mask = boxes->mask; - const int size = mask->xsize * mask->ysize; - - double max; - double min; - double depth; - int layers_above; - int layers_below; - int z, n, x, y; - - /* Find mask range. We must always include the zero axis in the mask. - */ - max = 0; - min = 0; - for( n = 0; n < size; n++ ) { - max = IM_MAX( max, mask->coeff[n] ); - min = IM_MIN( min, mask->coeff[n] ); - } - - VIPS_DEBUG_MSG( "boxes_new: min = %g, max = %g\n", min, max ); - - /* The zero axis must fall on a layer boundary. Estimate the - * depth, find n-lines-above-zero, get exact depth, then calculate a - * fixed n-lines which includes any negative parts. - */ - depth = (max - min) / boxes->n_layers; - layers_above = VIPS_CEIL( max / depth ); - depth = max / layers_above; - layers_below = VIPS_FLOOR( min / depth ); - - boxes->n_layers = layers_above - layers_below; - - VIPS_DEBUG_MSG( "boxes_new: depth = %g, n_layers = %d\n", - depth, boxes->n_layers ); - - /* For each layer, generate a set of lines which are inside the - * perimeter. Work down from the top. - */ - for( z = 0; z < boxes->n_layers; z++ ) { - /* How deep we are into the mask, as a double we can test - * against. Add half the layer depth so we can easily find >50% - * mask elements. - */ - double z_ph = max - (1 + z) * depth + depth / 2; - - /* Odd, but we must avoid rounding errors that make us miss 0 - * in the line above. - */ - int z_positive = z < layers_above; - - for( y = 0; y < mask->ysize; y++ ) { - int inside; - - /* Start outside the perimeter. - */ - inside = 0; - - for( x = 0; x < mask->xsize; x++ ) { - double coeff = MASK( mask, x, y ); - - /* The vertical line from mask[x, y] to 0 is - * inside. Is our current square (x, y) part - * of that line? - */ - if( (z_positive && coeff >= z_ph) || - (!z_positive && coeff <= z_ph) ) { - if( !inside ) { - boxes_start( boxes, x ); - inside = 1; - } - } - else { - if( inside ) { - if( boxes_end( boxes, x, y, - z_positive ? 1 : -1 ) ) - return( -1 ); - inside = 0; - } - } - } - - if( inside && - boxes_end( boxes, mask->xsize, y, - z_positive ? 1 : -1 ) ) - return( -1 ); - } - } - -#ifdef DEBUG - VIPS_DEBUG_MSG( "boxes_new: generated %d boxes\n", boxes->n_hline ); - boxes_hprint( boxes ); -#endif /*DEBUG*/ - - return( 0 ); -} - -/* The 'distance' between a pair of hlines. - */ -static int -boxes_distance( Boxes *boxes, int a, int b ) -{ - g_assert( boxes->hline[a].weight > 0 && boxes->hline[b].weight > 0 ); - - return( abs( boxes->hline[a].start - boxes->hline[b].start ) + - abs( boxes->hline[a].end - boxes->hline[b].end ) ); -} - -/* Merge two hlines. Line b is deleted, and any refs to b in vlines updated to - * point at a. - */ -static void -boxes_merge( Boxes *boxes, int a, int b ) -{ - int i; - - /* Scale weights. - */ - int fa = boxes->hline[a].weight; - int fb = boxes->hline[b].weight; - double w = (double) fb / (fa + fb); - - /* New endpoints. - */ - boxes->hline[a].start += w * - (boxes->hline[b].start - boxes->hline[a].start); - boxes->hline[a].end += w * - (boxes->hline[b].end - boxes->hline[a].end); - boxes->hline[a].weight += boxes->hline[b].weight; - - /* Update velement refs to b to refer to a instead. - */ - for( i = 0; i < boxes->n_velement; i++ ) - if( boxes->velement[i].band == b ) - boxes->velement[i].band = a; - - /* Mark b to be deleted. - */ - boxes->hline[b].weight = 0; -} - -static int -edge_sortfn( const void *p1, const void *p2 ) -{ - Edge *a = (Edge *) p1; - Edge *b = (Edge *) p2; - - return( a->d - b->d ); -} - -/* Cluster in batches. Return non-zero if we merged some lines. - * - * This is not as accurate as rescanning the whole space on every merge, but - * it's far faster. - */ -static int -boxes_cluster2( Boxes *boxes, int cluster ) -{ - int i, j, k; - int worst; - int worst_i; - int merged; - - for( i = 0; i < MAX_EDGES; i++ ) { - boxes->edge[i].a = -1; - boxes->edge[i].b = -1; - boxes->edge[i].d = 99999; - } - worst_i = 0; - worst = boxes->edge[worst_i].d; - - for( i = 0; i < boxes->n_hline; i++ ) { - if( boxes->hline[i].weight == 0 ) - continue; - - for( j = i + 1; j < boxes->n_hline; j++ ) { - int distance; - - if( boxes->hline[j].weight == 0 ) - continue; - - distance = boxes_distance( boxes, i, j ); - if( distance < worst ) { - boxes->edge[worst_i].a = i; - boxes->edge[worst_i].b = j; - boxes->edge[worst_i].d = distance; - - worst_i = 0; - worst = boxes->edge[worst_i].d; - for( k = 0; k < MAX_EDGES; k++ ) - if( boxes->edge[k].d > worst ) { - worst = boxes->edge[k].d; - worst_i = k; - } - } - } - } - - /* Sort to get closest first. - */ - qsort( boxes->edge, MAX_EDGES, sizeof( Edge ), edge_sortfn ); - - /* - printf( "edges:\n" ); - printf( " n a b d:\n" ); - for( i = 0; i < MAX_EDGES; i++ ) - printf( "%2i) %3d %3d %3d\n", i, - boxes->edge[i].a, boxes->edge[i].b, boxes->edge[i].d ); - */ - - /* Merge from the top down. - */ - merged = 0; - for( k = 0; k < MAX_EDGES; k++ ) { - Edge *edge = &boxes->edge[k]; - - if( edge->d > cluster ) - break; - - /* Has been removed, see loop below. - */ - if( edge->a == -1 ) - continue; - - boxes_merge( boxes, edge->a, edge->b ); - merged = 1; - - /* Nodes a and b have vanished or been moved. Remove any edges - * which refer to them from the edge list, - */ - for( i = k; i < MAX_EDGES; i++ ) { - Edge *edgei = &boxes->edge[i]; - - if( edgei->a == edge->a || - edgei->b == edge->a || - edgei->a == edge->b || - edgei->b == edge->b ) - edgei->a = -1; - } - } - - return( merged ); -} - -/* Renumber after clustering. We will have removed a lot of hlines ... shuffle - * the rest down, adjust all the vline references. - */ -static void -boxes_renumber( Boxes *boxes ) -{ - int i, j; - - VIPS_DEBUG_MSG( "boxes_renumber: renumbering ...\n" ); - - /* Loop for all zero-weight hlines. - */ - for( i = 0; i < boxes->n_hline; ) { - if( boxes->hline[i].weight > 0 ) { - i++; - continue; - } - - /* We move hlines i + 1 down, so we need to adjust all - * band[] refs to match. - */ - for( j = 0; j < boxes->n_velement; j++ ) - if( boxes->velement[j].band > i ) - boxes->velement[j].band -= 1; - - memmove( boxes->hline + i, boxes->hline + i + 1, - sizeof( HLine ) * (boxes->n_hline - i - 1) ); - boxes->n_hline -= 1; - } - - VIPS_DEBUG_MSG( "boxes_renumber: ... %d hlines remain\n", - boxes->n_hline ); -} - -/* Sort by band, then factor, then row. - */ -static int -velement_sortfn( const void *p1, const void *p2 ) -{ - VElement *a = (VElement *) p1; - VElement *b = (VElement *) p2; - - if( a->band != b->band ) - return( a->band - b->band ); - - if( a->factor != b->factor ) - return( a->factor - b->factor ); - - return( a->row - b->row ); -} - -static void -boxes_vline( Boxes *boxes ) -{ - int y, z; - - VIPS_DEBUG_MSG( "boxes_vline: forming vlines ...\n" ); - - /* Sort to get elements which could form a vline together. - */ - qsort( boxes->velement, boxes->n_velement, sizeof( VElement ), - velement_sortfn ); - -#ifdef DEBUG - boxes_hprint( boxes ); -#endif /*DEBUG*/ - - /* If two lines have the same row and band, we can join them and knock - * up the factor instead. - */ - for( y = 0; y < boxes->n_velement; y++ ) { - for( z = y + 1; z < boxes->n_velement; z++ ) - if( boxes->velement[z].band != - boxes->velement[y].band || - boxes->velement[z].row != - boxes->velement[y].row ) - break; - - boxes->velement[y].factor = z - y; - memmove( boxes->velement + y + 1, boxes->velement + z, - sizeof( VElement ) * (boxes->n_velement - z) ); - boxes->n_velement -= z - y - 1; - } - -#ifdef DEBUG - printf( "after commoning up, %d velement remain\n", boxes->n_velement ); - boxes_hprint( boxes ); -#endif /*DEBUG*/ - - boxes->n_vline = 0; - for( y = 0; y < boxes->n_velement; ) { - int n = boxes->n_vline; - - /* Start of a line. - */ - boxes->vline[n].band = boxes->velement[y].band; - boxes->vline[n].factor = boxes->velement[y].factor; - boxes->vline[n].start = boxes->velement[y].row; - - /* Search for the end of this line. - */ - for( z = y + 1; z < boxes->n_velement; z++ ) - if( boxes->velement[z].band != - boxes->vline[n].band || - boxes->velement[z].factor != - boxes->vline[n].factor || - boxes->velement[z].row != - boxes->vline[n].start + z - y ) - break; - - /* So the line ends at the previously examined element. We - * want 'end' to be one beyond that (non-inclusive). - */ - boxes->vline[n].end = boxes->velement[z - 1].row + 1; - - boxes->n_vline += 1; - y = z; - } - - VIPS_DEBUG_MSG( "boxes_vline: found %d vlines\n", boxes->n_vline ); -} - -/* Break a mask into boxes. - */ -static Boxes * -boxes_new( IMAGE *in, IMAGE *out, DOUBLEMASK *mask, int n_layers, int cluster ) -{ - const int size = mask->xsize * mask->ysize; - - Boxes *boxes; - double sum; - int x, y, z; - - /* Check parameters. - */ - if( im_piocheck( in, out ) || - im_check_uncoded( "im_aconv", in ) || - vips_check_dmask( "im_aconv", mask ) ) - return( NULL ); - - boxes = VIPS_NEW( out, Boxes ); - boxes->in = in; - boxes->out = out; - if( !(boxes->mask = (DOUBLEMASK *) im_local( out, - (im_construct_fn) im_dup_dmask, - (im_callback_fn) im_free_dmask, mask, mask->filename, NULL )) ) - return( NULL ); - boxes->n_layers = n_layers; - boxes->cluster = cluster; - - boxes->n_hline = 0; - boxes->n_velement = 0; - boxes->n_vline = 0; - - /* Break into a set of hlines. - */ - if( boxes_break( boxes ) ) - return( NULL ); - - /* Cluster to find groups of lines. - */ - VIPS_DEBUG_MSG( "boxes_new: clustering with thresh %d ...\n", cluster ); - while( boxes_cluster2( boxes, cluster ) ) - ; - - /* Renumber to remove holes created by clustering. - */ - boxes_renumber( boxes ); - - /* Find a set of vlines for the remaining hlines. - */ - boxes_vline( boxes ); - - /* Find the area of the lines and the length of the longest hline. - */ - boxes->area = 0; - boxes->max_line = 0; - for( y = 0; y < boxes->n_velement; y++ ) { - x = boxes->velement[y].band; - z = boxes->hline[x].end - boxes->hline[x].start; - - boxes->area += boxes->velement[y].factor * z; - if( z > boxes->max_line ) - boxes->max_line = z; - } - - /* Strength reduction: if all lines are divisible by n, we can move - * that n out into the ->area factor. The aim is to produce as many - * factor 1 lines as we can and to reduce the chance of overflow. - */ - x = boxes->velement[0].factor; - for( y = 1; y < boxes->n_velement; y++ ) - x = gcd( x, boxes->velement[y].factor ); - for( y = 0; y < boxes->n_velement; y++ ) - boxes->velement[y].factor /= x; - boxes->area *= x; - - /* Find the area of the original mask. - */ - sum = 0; - for( z = 0; z < size; z++ ) - sum += mask->coeff[z]; - - boxes->area = rint( sum * boxes->area / mask->scale ); - boxes->rounding = (boxes->area + 1) / 2 + mask->offset * boxes->area; - -#ifdef DEBUG - boxes_hprint( boxes ); - boxes_vprint( boxes ); -#endif /*DEBUG*/ - - /* With 512x512 tiles, each hline requires 3mb of intermediate per - * thread ... 300 lines is about a gb per thread, ouch. - */ - if( boxes->n_hline > 150 ) { - im_error( "im_aconv", "%s", _( "mask too complex" ) ); - return( NULL ); - } - - return( boxes ); -} - -/* Our sequence value. - */ -typedef struct { - Boxes *boxes; - - REGION *ir; /* Input region */ - - /* Offsets for start and stop. - */ - int *start; - int *end; - - int last_stride; /* Avoid recalcing offsets, if we can */ - - /* The rolling sums. int for integer types, double for floating point - * types. - */ - void *sum; -} AConvSequence; - -/* Free a sequence value. - */ -static int -aconv_stop( void *vseq, void *a, void *b ) -{ - AConvSequence *seq = (AConvSequence *) vseq; - - IM_FREEF( im_region_free, seq->ir ); - - return( 0 ); -} - -/* Convolution start function. - */ -static void * -aconv_start( IMAGE *out, void *a, void *b ) -{ - IMAGE *in = (IMAGE *) a; - Boxes *boxes = (Boxes *) b; - - AConvSequence *seq; - - if( !(seq = IM_NEW( out, AConvSequence )) ) - return( NULL ); - - /* Init! - */ - seq->boxes = boxes; - seq->ir = im_region_create( in ); - - /* n_velement should be the largest possible dimension. - */ - g_assert( boxes->n_velement >= boxes->n_hline ); - g_assert( boxes->n_velement >= boxes->n_vline ); - - seq->start = IM_ARRAY( out, boxes->n_velement, int ); - seq->end = IM_ARRAY( out, boxes->n_velement, int ); - - if( vips_band_format_isint( out->BandFmt ) ) - seq->sum = IM_ARRAY( out, boxes->n_velement, int ); - else - seq->sum = IM_ARRAY( out, boxes->n_velement, double ); - seq->last_stride = -1; - - if( !seq->ir || !seq->start || !seq->end || !seq->sum ) { - aconv_stop( seq, in, boxes ); - return( NULL ); - } - - return( seq ); -} - -/* The h and v loops are very similar, but also annoyingly different. Keep - * them separate for easy debugging. - */ - -#define HCONV( IN, OUT ) \ -G_STMT_START { \ - for( i = 0; i < bands; i++ ) { \ - OUT *seq_sum = (OUT *) seq->sum; \ - \ - IN *p; \ - OUT *q; \ - \ - p = i + (IN *) IM_REGION_ADDR( ir, r->left, r->top + y ); \ - q = i * n_hline + \ - (OUT *) IM_REGION_ADDR( or, r->left, r->top + y ); \ - \ - for( z = 0; z < n_hline; z++ ) { \ - seq_sum[z] = 0; \ - for( x = boxes->hline[z].start; \ - x < boxes->hline[z].end; x++ ) \ - seq_sum[z] += p[x * istride]; \ - q[z] = seq_sum[z]; \ - } \ - q += ostride; \ - \ - for( x = 1; x < r->width; x++ ) { \ - for( z = 0; z < n_hline; z++ ) { \ - seq_sum[z] += p[seq->end[z]]; \ - seq_sum[z] -= p[seq->start[z]]; \ - q[z] = seq_sum[z]; \ - } \ - p += istride; \ - q += ostride; \ - } \ - } \ -} G_STMT_END - -/* Do horizontal masks ... we scan the mask along scanlines. - */ -static int -aconv_hgenerate( REGION *or, void *vseq, void *a, void *b ) -{ - AConvSequence *seq = (AConvSequence *) vseq; - IMAGE *in = (IMAGE *) a; - Boxes *boxes = (Boxes *) b; - - REGION *ir = seq->ir; - const int n_hline = boxes->n_hline; - DOUBLEMASK *mask = boxes->mask; - Rect *r = &or->valid; - - /* Double the bands (notionally) for complex. - */ - int bands = vips_band_format_iscomplex( in->BandFmt ) ? - 2 * in->Bands : in->Bands; - - Rect s; - int x, y, z, i; - int istride; - int ostride; - - /* Prepare the section of the input image we need. A little larger - * than the section of the output image we are producing. - */ - s = *r; - s.width += mask->xsize - 1; - if( im_prepare( ir, &s ) ) - return( -1 ); - - istride = IM_IMAGE_SIZEOF_PEL( in ) / - IM_IMAGE_SIZEOF_ELEMENT( in ); - ostride = IM_IMAGE_SIZEOF_PEL( or->im ) / - IM_IMAGE_SIZEOF_ELEMENT( or->im ); - - /* Init offset array. - */ - if( seq->last_stride != istride ) { - seq->last_stride = istride; - - for( z = 0; z < n_hline; z++ ) { - seq->start[z] = boxes->hline[z].start * istride; - seq->end[z] = boxes->hline[z].end * istride; - } - } - - for( y = 0; y < r->height; y++ ) { - switch( in->BandFmt ) { - case IM_BANDFMT_UCHAR: - if( boxes->max_line > 256 ) - HCONV( unsigned char, unsigned int ); - else - HCONV( unsigned char, unsigned short ); - break; - - case IM_BANDFMT_CHAR: - if( boxes->max_line > 256 ) - HCONV( signed char, signed int ); - else - HCONV( signed char, signed short ); - break; - - case IM_BANDFMT_USHORT: - HCONV( unsigned short, unsigned int ); - break; - - case IM_BANDFMT_SHORT: - HCONV( signed short, signed int ); - break; - - case IM_BANDFMT_UINT: - HCONV( unsigned int, unsigned int ); - break; - - case IM_BANDFMT_INT: - HCONV( signed int, signed int ); - break; - - case IM_BANDFMT_FLOAT: - HCONV( float, float ); - break; - - case IM_BANDFMT_DOUBLE: - HCONV( double, double ); - break; - - case IM_BANDFMT_COMPLEX: - HCONV( float, float ); - break; - - case IM_BANDFMT_DPCOMPLEX: - HCONV( double, double ); - break; - - default: - g_assert_not_reached(); - } - } - - return( 0 ); -} - -static int -aconv_horizontal( Boxes *boxes, IMAGE *in, IMAGE *out ) -{ - /* Prepare output. Consider a 7x7 mask and a 7x7 image --- the output - * would be 1x1. - */ - if( im_cp_desc( out, in ) ) - return( -1 ); - out->Xsize -= boxes->mask->xsize - 1; - if( out->Xsize <= 0 ) { - im_error( "im_aconv", "%s", _( "image too small for mask" ) ); - return( -1 ); - } - out->Bands *= boxes->n_hline; - - /* Short u?char lines can use u?short intermediate. - */ - if( vips_band_format_isuint( in->BandFmt ) ) - out->BandFmt = boxes->max_line < 256 ? - IM_BANDFMT_USHORT : IM_BANDFMT_UINT; - else if( vips_band_format_isint( in->BandFmt ) ) - out->BandFmt = boxes->max_line < 256 ? - IM_BANDFMT_SHORT : IM_BANDFMT_INT; - - if( im_demand_hint( out, IM_SMALLTILE, in, NULL ) || - im_generate( out, - aconv_start, aconv_hgenerate, aconv_stop, in, boxes ) ) - return( -1 ); - - out->Xoffset = -boxes->mask->xsize / 2; - out->Yoffset = -boxes->mask->ysize / 2; - - return( 0 ); -} - -#define CLIP_UCHAR( V ) \ -G_STMT_START { \ - if( (V) < 0 ) \ - (V) = 0; \ - else if( (V) > UCHAR_MAX ) \ - (V) = UCHAR_MAX; \ -} G_STMT_END - -#define CLIP_CHAR( V ) \ -G_STMT_START { \ - if( (V) < SCHAR_MIN ) \ - (V) = SCHAR_MIN; \ - else if( (V) > SCHAR_MAX ) \ - (V) = SCHAR_MAX; \ -} G_STMT_END - -#define CLIP_USHORT( V ) \ -G_STMT_START { \ - if( (V) < 0 ) \ - (V) = 0; \ - else if( (V) > USHRT_MAX ) \ - (V) = USHRT_MAX; \ -} G_STMT_END - -#define CLIP_SHORT( V ) \ -G_STMT_START { \ - if( (V) < SHRT_MIN ) \ - (V) = SHRT_MIN; \ - else if( (V) > SHRT_MAX ) \ - (V) = SHRT_MAX; \ -} G_STMT_END - -#define CLIP_NONE( V ) {} - -#define VCONV( ACC, IN, OUT, CLIP ) \ -G_STMT_START { \ - for( x = 0; x < sz; x++ ) { \ - ACC *seq_sum = (ACC *) seq->sum; \ - \ - IN *p; \ - OUT *q; \ - ACC sum; \ - \ - p = x * boxes->n_hline + \ - (IN *) IM_REGION_ADDR( ir, r->left, r->top ); \ - q = x + (OUT *) IM_REGION_ADDR( or, r->left, r->top ); \ - \ - sum = 0; \ - for( z = 0; z < n_vline; z++ ) { \ - seq_sum[z] = 0; \ - for( k = boxes->vline[z].start; \ - k < boxes->vline[z].end; k++ ) \ - seq_sum[z] += p[k * istride + \ - boxes->vline[z].band]; \ - sum += boxes->vline[z].factor * seq_sum[z]; \ - } \ - sum = (sum + boxes->rounding) / boxes->area; \ - CLIP( sum ); \ - *q = sum; \ - q += ostride; \ - \ - for( y = 1; y < r->height; y++ ) { \ - sum = 0;\ - for( z = 0; z < n_vline; z++ ) { \ - seq_sum[z] += p[seq->end[z]]; \ - seq_sum[z] -= p[seq->start[z]]; \ - sum += boxes->vline[z].factor * seq_sum[z]; \ - } \ - p += istride; \ - sum = (sum + boxes->rounding) / boxes->area; \ - CLIP( sum ); \ - *q = sum; \ - q += ostride; \ - } \ - } \ -} G_STMT_END - -/* Do vertical masks ... we scan the mask down columns of pixels. - */ -static int -aconv_vgenerate( REGION *or, void *vseq, void *a, void *b ) -{ - AConvSequence *seq = (AConvSequence *) vseq; - IMAGE *in = (IMAGE *) a; - Boxes *boxes = (Boxes *) b; - - REGION *ir = seq->ir; - const int n_vline = boxes->n_vline; - DOUBLEMASK *mask = boxes->mask; - Rect *r = &or->valid; - - /* Double the width (notionally) for complex. - */ - int sz = vips_band_format_iscomplex( in->BandFmt ) ? - 2 * IM_REGION_N_ELEMENTS( or ) : IM_REGION_N_ELEMENTS( or ); - - Rect s; - int x, y, z, k; - int istride; - int ostride; - - /* Prepare the section of the input image we need. A little larger - * than the section of the output image we are producing. - */ - s = *r; - s.height += mask->ysize - 1; - if( im_prepare( ir, &s ) ) - return( -1 ); - - istride = IM_REGION_LSKIP( ir ) / - IM_IMAGE_SIZEOF_ELEMENT( in ); - ostride = IM_REGION_LSKIP( or ) / - IM_IMAGE_SIZEOF_ELEMENT( boxes->out ); - - /* Init offset array. - */ - if( seq->last_stride != istride ) { - seq->last_stride = istride; - - for( z = 0; z < n_vline; z++ ) { - seq->start[z] = boxes->vline[z].band + - boxes->vline[z].start * istride; - seq->end[z] = boxes->vline[z].band + - boxes->vline[z].end * istride; - } - } - - switch( boxes->in->BandFmt ) { - case IM_BANDFMT_UCHAR: - if( boxes->max_line > 256 ) - VCONV( unsigned int, \ - unsigned int, unsigned char, CLIP_UCHAR ); - else - VCONV( unsigned int, \ - unsigned short, unsigned char, CLIP_UCHAR ); - break; - - case IM_BANDFMT_CHAR: - if( boxes->max_line > 256 ) - VCONV( signed int, \ - signed int, signed char, CLIP_UCHAR ); - else - VCONV( signed int, \ - signed short, signed char, CLIP_UCHAR ); - break; - - case IM_BANDFMT_USHORT: - VCONV( unsigned int, \ - unsigned int, unsigned short, CLIP_USHORT ); - break; - - case IM_BANDFMT_SHORT: - VCONV( signed int, signed int, signed short, CLIP_SHORT ); - break; - - case IM_BANDFMT_UINT: - VCONV( unsigned int, unsigned int, unsigned int, CLIP_NONE ); - break; - - case IM_BANDFMT_INT: - VCONV( signed int, signed int, signed int, CLIP_NONE ); - break; - - case IM_BANDFMT_FLOAT: - VCONV( float, float, float, CLIP_NONE ); - break; - - case IM_BANDFMT_DOUBLE: - VCONV( double, double, double, CLIP_NONE ); - break; - - case IM_BANDFMT_COMPLEX: - VCONV( float, float, float, CLIP_NONE ); - break; - - case IM_BANDFMT_DPCOMPLEX: - VCONV( double, double, double, CLIP_NONE ); - break; - - default: - g_assert_not_reached(); - } - - return( 0 ); -} - -static int -aconv_vertical( Boxes *boxes, IMAGE *in, IMAGE *out ) -{ - /* Prepare output. Consider a 7x7 mask and a 7x7 image --- the output - * would be 1x1. - */ - if( im_cp_desc( out, in ) ) - return( -1 ); - out->Ysize -= boxes->mask->ysize - 1; - if( out->Ysize <= 0 ) { - im_error( "im_aconv", "%s", _( "image too small for mask" ) ); - return( -1 ); - } - out->Bands = boxes->in->Bands; - out->BandFmt = boxes->in->BandFmt; - - if( im_demand_hint( out, IM_SMALLTILE, in, NULL ) || - im_generate( out, - aconv_start, aconv_vgenerate, aconv_stop, in, boxes ) ) - return( -1 ); - - out->Xoffset = -boxes->mask->xsize / 2; - out->Yoffset = -boxes->mask->ysize / 2; - - return( 0 ); -} - -/** - * im_aconv: - * @in: input image - * @out: output image - * @mask: convolution mask - * @n_layers: number of layers for approximation - * @cluster: cluster lines closer than this distance - * - * Perform an approximate convolution of @in with @mask. - * - * The output image - * always has the same #VipsBandFormat as the input image. - * - * Larger values for @n_layers give more accurate - * results, but are slower. As @n_layers approaches the mask radius, the - * accuracy will become close to exact convolution and the speed will drop to - * match. For many large masks, such as Gaussian, @n_layers need be only 10% of - * this value and accuracy will still be good. - * - * Smaller values of @cluster will give more accurate results, but be slower - * and use more memory. 10% of the mask radius is a good rule of thumb. - * - * See also: im_convsep_f(), im_create_dmaskv(). - * - * Returns: 0 on success, -1 on error - */ -int -im_aconv( IMAGE *in, IMAGE *out, DOUBLEMASK *mask, int n_layers, int cluster ) -{ - IMAGE *t[2]; - Boxes *boxes; - - if( !(boxes = boxes_new( in, out, mask, n_layers, cluster )) || - im_open_local_array( out, t, 2, "im_aconv", "p" ) ) - return( -1 ); - - /* - */ - if( im_embed( in, t[0], 1, mask->xsize / 2, mask->ysize / 2, - in->Xsize + mask->xsize - 1, in->Ysize + mask->ysize - 1 ) || - aconv_horizontal( boxes, t[0], t[1] ) || - aconv_vertical( boxes, t[1], out ) ) - return( -1 ); - - /* For testing .. just try one direction. - if( aconv_horizontal( boxes, in, out ) ) - return( -1 ); - */ - - out->Xoffset = 0; - out->Yoffset = 0; - - return( 0 ); -} - diff --git a/libvips/convolution/im_aconvsep.c b/libvips/convolution/im_aconvsep.c deleted file mode 100644 index 4bc0a810..00000000 --- a/libvips/convolution/im_aconvsep.c +++ /dev/null @@ -1,879 +0,0 @@ -/* im_aconvsep ... separable approximate convolution - * - * This operation does an approximate, seperable convolution. - * - * Author: John Cupitt & Nicolas Robidoux - * Written on: 31/5/11 - * Modified on: - * 31/5/11 - * - from im_conv() - */ - -/* - - This file is part of VIPS. - - VIPS is free software; you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301 USA - - */ - -/* - - These files are distributed with VIPS - http://www.vips.ecs.soton.ac.uk - - */ - -/* - - See: - - http://incubator.quasimondo.com/processing/stackblur.pde - - This thing is a little like stackblur, but generalised to any separable - mask. - - */ - -/* - - TODO - - - are we handling mask offset correctly? - - */ - -/* Show sample pixels as they are transformed. -#define DEBUG_PIXELS - */ - -/* -#define DEBUG -#define VIPS_DEBUG - */ - -#ifdef HAVE_CONFIG_H -#include -#endif /*HAVE_CONFIG_H*/ -#include - -#include -#include -#include -#include - -#include -#include -#include - -/* Maximum number of lines we can break the mask into. - */ -#define MAX_LINES (1000) - -/* Euclid's algorithm. Use this to common up mults. - */ -static int -gcd( int a, int b ) -{ - if( b == 0 ) - return( abs( a ) ); - else - return( gcd( b, a % b ) ); -} - -/* A set of lines. - */ -typedef struct _Lines { - /* Copy of our arguments. - */ - IMAGE *in; - IMAGE *out; - DOUBLEMASK *mask; - int n_layers; - - int area; - int rounding; - - /* Start is the left-most pixel in the line, end is one beyond the - * right-most pixel. - */ - int n_lines; - int start[MAX_LINES]; - int end[MAX_LINES]; - int factor[MAX_LINES]; -} Lines; - -static void -lines_start( Lines *lines, int x, int factor ) -{ - lines->start[lines->n_lines] = x; - lines->factor[lines->n_lines] = factor; -} - -static int -lines_end( Lines *lines, int x ) -{ - lines->end[lines->n_lines] = x; - - if( lines->n_lines >= MAX_LINES - 1 ) { - vips_error( "im_aconvsep", "%s", _( "mask too complex" ) ); - return( -1 ); - } - lines->n_lines += 1; - - return( 0 ); -} - -/* Break a mask into lines. - */ -static Lines * -lines_new( IMAGE *in, IMAGE *out, DOUBLEMASK *mask, int n_layers ) -{ - const int width = mask->xsize * mask->ysize; - - Lines *lines; - double max; - double min; - double depth; - double sum; - int layers_above; - int layers_below; - int z, n, x; - - /* Check parameters. - */ - if( im_piocheck( in, out ) || - im_check_uncoded( "im_aconvsep", in ) || - vips_check_dmask_1d( "im_aconvsep", mask ) ) - return( NULL ); - - lines = VIPS_NEW( out, Lines ); - lines->in = in; - lines->out = out; - if( !(lines->mask = (DOUBLEMASK *) im_local( out, - (im_construct_fn) im_dup_dmask, - (im_callback_fn) im_free_dmask, mask, mask->filename, NULL )) ) - return( NULL ); - lines->n_layers = n_layers; - lines->n_lines = 0; - - VIPS_DEBUG_MSG( "lines_new: breaking into %d layers ...\n", n_layers ); - - /* Find mask range. We must always include the zero axis in the mask. - */ - max = 0; - min = 0; - for( x = 0; x < width; x++ ) { - if( mask->coeff[x] > max ) - max = mask->coeff[x]; - if( mask->coeff[x] < min ) - min = mask->coeff[x]; - } - - /* The zero axis must fall on a layer boundary. Estimate the - * depth, find n-lines-above-zero, get exact depth, then calculate a - * fixed n-lines which includes any negative parts. - */ - depth = (max - min) / n_layers; - layers_above = ceil( max / depth ); - depth = max / layers_above; - layers_below = floor( min / depth ); - n_layers = layers_above - layers_below; - - VIPS_DEBUG_MSG( "depth = %g, n_layers = %d\n", depth, n_layers ); - - /* For each layer, generate a set of lines which are inside the - * perimeter. Work down from the top. - */ - for( z = 0; z < n_layers; z++ ) { - double y = max - (1 + z) * depth; - - /* y plus half depth ... ie. the layer midpoint. - */ - double y_ph = y + depth / 2; - - /* Odd, but we must avoid rounding errors that make us miss 0 - * in the line above. - */ - int y_positive = z < layers_above; - - int inside; - - /* Start outside the perimeter. - */ - inside = 0; - - for( x = 0; x < width; x++ ) { - /* The vertical line from mask[z] to 0 is inside. Is - * our current square (x, y) part of that line? - */ - if( (y_positive && mask->coeff[x] >= y_ph) || - (!y_positive && mask->coeff[x] <= y_ph) ) { - if( !inside ) { - lines_start( lines, x, - y_positive ? 1 : -1 ); - inside = 1; - } - } - else { - if( inside ) { - if( lines_end( lines, x ) ) - return( NULL ); - inside = 0; - } - } - } - - if( inside && - lines_end( lines, width ) ) - return( NULL ); - } - - /* Can we common up any lines? Search for lines with identical - * start/end. - */ - for( z = 0; z < lines->n_lines; z++ ) { - for( n = z + 1; n < lines->n_lines; n++ ) { - if( lines->start[z] == lines->start[n] && - lines->end[z] == lines->end[n] ) { - lines->factor[z] += lines->factor[n]; - - /* n can be deleted. Do this in a separate - * pass below. - */ - lines->factor[n] = 0; - } - } - } - - /* Now we can remove all factor 0 lines. - */ - for( z = 0; z < lines->n_lines; z++ ) { - if( lines->factor[z] == 0 ) { - for( x = z; x < lines->n_lines; x++ ) { - lines->start[x] = lines->start[x + 1]; - lines->end[x] = lines->end[x + 1]; - lines->factor[x] = lines->factor[x + 1]; - } - lines->n_lines -= 1; - } - } - - /* Find the area of the lines. - */ - lines->area = 0; - for( z = 0; z < lines->n_lines; z++ ) - lines->area += lines->factor[z] * - (lines->end[z] - lines->start[z]); - - /* Strength reduction: if all lines are divisible by n, we can move - * that n out into the ->area factor. The aim is to produce as many - * factor 1 lines as we can and to reduce the chance of overflow. - */ - x = lines->factor[0]; - for( z = 1; z < lines->n_lines; z++ ) - x = gcd( x, lines->factor[z] ); - for( z = 0; z < lines->n_lines; z++ ) - lines->factor[z] /= x; - lines->area *= x; - - /* Find the area of the original mask. - */ - sum = 0; - for( z = 0; z < width; z++ ) - sum += mask->coeff[z]; - - lines->area = rint( sum * lines->area / mask->scale ); - lines->rounding = (lines->area + 1) / 2 + mask->offset * lines->area; - - /* ASCII-art layer drawing. - printf( "lines:\n" ); - for( z = 0; z < lines->n_lines; z++ ) { - printf( "%3d - %2d x ", z, lines->factor[z] ); - for( x = 0; x < 55; x++ ) { - int rx = x * (width + 1) / 55; - - if( rx >= lines->start[z] && rx < lines->end[z] ) - printf( "#" ); - else - printf( " " ); - } - printf( " %3d .. %3d\n", lines->start[z], lines->end[z] ); - } - printf( "area = %d\n", lines->area ); - printf( "rounding = %d\n", lines->rounding ); - */ - - return( lines ); -} - -/* Our sequence value. - */ -typedef struct { - Lines *lines; - REGION *ir; /* Input region */ - - int *start; /* Offsets for start and stop */ - int *end; - - /* The sums for each line. int for integer types, double for floating - * point types. - */ - void *sum; - - int last_stride; /* Avoid recalcing offsets, if we can */ -} AConvSep; - -/* Free a sequence value. - */ -static int -aconvsep_stop( void *vseq, void *a, void *b ) -{ - AConvSep *seq = (AConvSep *) vseq; - - IM_FREEF( im_region_free, seq->ir ); - - return( 0 ); -} - -/* Convolution start function. - */ -static void * -aconvsep_start( IMAGE *out, void *a, void *b ) -{ - IMAGE *in = (IMAGE *) a; - Lines *lines = (Lines *) b; - - AConvSep *seq; - - if( !(seq = IM_NEW( out, AConvSep )) ) - return( NULL ); - - /* Init! - */ - seq->lines = lines; - seq->ir = im_region_create( in ); - seq->start = IM_ARRAY( out, lines->n_lines, int ); - seq->end = IM_ARRAY( out, lines->n_lines, int ); - if( vips_band_format_isint( out->BandFmt ) ) - seq->sum = IM_ARRAY( out, lines->n_lines, int ); - else - seq->sum = IM_ARRAY( out, lines->n_lines, double ); - seq->last_stride = -1; - - if( !seq->ir || !seq->start || !seq->end || !seq->sum ) { - aconvsep_stop( seq, in, lines ); - return( NULL ); - } - - return( seq ); -} - -#define CLIP_UCHAR( V ) \ -G_STMT_START { \ - if( (V) < 0 ) \ - (V) = 0; \ - else if( (V) > UCHAR_MAX ) \ - (V) = UCHAR_MAX; \ -} G_STMT_END - -#define CLIP_CHAR( V ) \ -G_STMT_START { \ - if( (V) < SCHAR_MIN ) \ - (V) = SCHAR_MIN; \ - else if( (V) > SCHAR_MAX ) \ - (V) = SCHAR_MAX; \ -} G_STMT_END - -#define CLIP_USHORT( V ) \ -G_STMT_START { \ - if( (V) < 0 ) \ - (V) = 0; \ - else if( (V) > USHRT_MAX ) \ - (V) = USHRT_MAX; \ -} G_STMT_END - -#define CLIP_SHORT( V ) \ -G_STMT_START { \ - if( (V) < SHRT_MIN ) \ - (V) = SHRT_MIN; \ - else if( (V) > SHRT_MAX ) \ - (V) = SHRT_MAX; \ -} G_STMT_END - -#define CLIP_NONE( V ) {} - -/* The h and v loops are very similar, but also annoyingly different. Keep - * them separate for easy debugging. - */ - -#define HCONV_INT( TYPE, CLIP ) { \ - for( i = 0; i < bands; i++ ) { \ - int *seq_sum = (int *) seq->sum; \ - \ - TYPE *q; \ - TYPE *p; \ - int sum; \ - \ - p = i + (TYPE *) IM_REGION_ADDR( ir, r->left, r->top + y ); \ - q = i + (TYPE *) IM_REGION_ADDR( or, r->left, r->top + y ); \ - \ - sum = 0; \ - for( z = 0; z < n_lines; z++ ) { \ - seq_sum[z] = 0; \ - for( x = lines->start[z]; x < lines->end[z]; x++ ) \ - seq_sum[z] += p[x * istride]; \ - sum += lines->factor[z] * seq_sum[z]; \ - } \ - sum = (sum + lines->rounding) / lines->area; \ - CLIP( sum ); \ - *q = sum; \ - q += ostride; \ - \ - for( x = 1; x < r->width; x++ ) { \ - sum = 0; \ - for( z = 0; z < n_lines; z++ ) { \ - seq_sum[z] += p[seq->end[z]]; \ - seq_sum[z] -= p[seq->start[z]]; \ - sum += lines->factor[z] * seq_sum[z]; \ - } \ - p += istride; \ - sum = (sum + lines->rounding) / lines->area; \ - CLIP( sum ); \ - *q = sum; \ - q += ostride; \ - } \ - } \ -} - -#define HCONV_FLOAT( TYPE ) { \ - for( i = 0; i < bands; i++ ) { \ - double *seq_sum = (double *) seq->sum; \ - \ - TYPE *q; \ - TYPE *p; \ - double sum; \ - \ - p = i + (TYPE *) IM_REGION_ADDR( ir, r->left, r->top + y ); \ - q = i + (TYPE *) IM_REGION_ADDR( or, r->left, r->top + y ); \ - \ - sum = 0; \ - for( z = 0; z < lines->n_lines; z++ ) { \ - seq_sum[z] = 0; \ - for( x = lines->start[z]; x < lines->end[z]; x++ ) \ - seq_sum[z] += p[x * istride]; \ - sum += lines->factor[z] * seq_sum[z]; \ - } \ - sum = sum / lines->area + mask->offset; \ - *q = sum; \ - q += ostride; \ - \ - for( x = 1; x < r->width; x++ ) { \ - sum = 0; \ - for( z = 0; z < lines->n_lines; z++ ) { \ - seq_sum[z] += p[seq->end[z]]; \ - seq_sum[z] -= p[seq->start[z]]; \ - sum += lines->factor[z] * seq_sum[z]; \ - } \ - p += istride; \ - sum = sum / lines->area + mask->offset; \ - *q = sum; \ - q += ostride; \ - } \ - } \ -} - -/* Do horizontal masks ... we scan the mask along scanlines. - */ -static int -aconvsep_generate_horizontal( REGION *or, void *vseq, void *a, void *b ) -{ - AConvSep *seq = (AConvSep *) vseq; - IMAGE *in = (IMAGE *) a; - Lines *lines = (Lines *) b; - - REGION *ir = seq->ir; - const int n_lines = lines->n_lines; - DOUBLEMASK *mask = lines->mask; - Rect *r = &or->valid; - - /* Double the bands (notionally) for complex. - */ - int bands = vips_band_format_iscomplex( in->BandFmt ) ? - 2 * in->Bands : in->Bands; - - Rect s; - int x, y, z, i; - int istride; - int ostride; - - /* Prepare the section of the input image we need. A little larger - * than the section of the output image we are producing. - */ - s = *r; - s.width += mask->xsize - 1; - s.height += mask->ysize - 1; - if( im_prepare( ir, &s ) ) - return( -1 ); - - /* Stride can be different for the vertical case, keep this here for - * ease of direction change. - */ - istride = IM_IMAGE_SIZEOF_PEL( in ) / - IM_IMAGE_SIZEOF_ELEMENT( in ); - ostride = IM_IMAGE_SIZEOF_PEL( lines->out ) / - IM_IMAGE_SIZEOF_ELEMENT( lines->out ); - - /* Init offset array. - */ - if( seq->last_stride != istride ) { - seq->last_stride = istride; - - for( z = 0; z < n_lines; z++ ) { - seq->start[z] = lines->start[z] * istride; - seq->end[z] = lines->end[z] * istride; - } - } - - for( y = 0; y < r->height; y++ ) { - switch( in->BandFmt ) { - case IM_BANDFMT_UCHAR: - HCONV_INT( unsigned char, CLIP_UCHAR ); - break; - - case IM_BANDFMT_CHAR: - HCONV_INT( signed char, CLIP_UCHAR ); - break; - - case IM_BANDFMT_USHORT: - HCONV_INT( unsigned short, CLIP_USHORT ); - break; - - case IM_BANDFMT_SHORT: - HCONV_INT( signed short, CLIP_SHORT ); - break; - - case IM_BANDFMT_UINT: - HCONV_INT( unsigned int, CLIP_NONE ); - break; - - case IM_BANDFMT_INT: - HCONV_INT( signed int, CLIP_NONE ); - break; - - case IM_BANDFMT_FLOAT: - HCONV_FLOAT( float ); - break; - - case IM_BANDFMT_DOUBLE: - HCONV_FLOAT( double ); - break; - - case IM_BANDFMT_COMPLEX: - HCONV_FLOAT( float ); - break; - - case IM_BANDFMT_DPCOMPLEX: - HCONV_FLOAT( double ); - break; - - default: - g_assert_not_reached(); - } - } - - return( 0 ); -} - -#define VCONV_INT( TYPE, CLIP ) { \ - for( x = 0; x < sz; x++ ) { \ - int *seq_sum = (int *) seq->sum; \ - \ - TYPE *q; \ - TYPE *p; \ - int sum; \ - \ - p = x + (TYPE *) IM_REGION_ADDR( ir, r->left, r->top ); \ - q = x + (TYPE *) IM_REGION_ADDR( or, r->left, r->top ); \ - \ - sum = 0; \ - for( z = 0; z < lines->n_lines; z++ ) { \ - seq_sum[z] = 0; \ - for( y = lines->start[z]; y < lines->end[z]; y++ ) \ - seq_sum[z] += p[y * istride]; \ - sum += lines->factor[z] * seq_sum[z]; \ - } \ - sum = (sum + lines->rounding) / lines->area; \ - CLIP( sum ); \ - *q = sum; \ - q += ostride; \ - \ - for( y = 1; y < r->height; y++ ) { \ - sum = 0; \ - for( z = 0; z < lines->n_lines; z++ ) { \ - seq_sum[z] += p[seq->end[z]]; \ - seq_sum[z] -= p[seq->start[z]]; \ - sum += lines->factor[z] * seq_sum[z]; \ - } \ - p += istride; \ - sum = (sum + lines->rounding) / lines->area; \ - CLIP( sum ); \ - *q = sum; \ - q += ostride; \ - } \ - } \ -} - -#define VCONV_FLOAT( TYPE ) { \ - for( x = 0; x < sz; x++ ) { \ - double *seq_sum = (double *) seq->sum; \ - \ - TYPE *q; \ - TYPE *p; \ - double sum; \ - \ - p = x + (TYPE *) IM_REGION_ADDR( ir, r->left, r->top ); \ - q = x + (TYPE *) IM_REGION_ADDR( or, r->left, r->top ); \ - \ - sum = 0; \ - for( z = 0; z < lines->n_lines; z++ ) { \ - seq_sum[z] = 0; \ - for( y = lines->start[z]; y < lines->end[z]; y++ ) \ - seq_sum[z] += p[y * istride]; \ - sum += lines->factor[z] * seq_sum[z]; \ - } \ - sum = sum / lines->area + mask->offset; \ - *q = sum; \ - q += ostride; \ - \ - for( y = 1; y < r->height; y++ ) { \ - sum = 0; \ - for( z = 0; z < lines->n_lines; z++ ) { \ - seq_sum[z] += p[seq->end[z]]; \ - seq_sum[z] -= p[seq->start[z]]; \ - sum += lines->factor[z] * seq_sum[z]; \ - } \ - p += istride; \ - sum = sum / lines->area + mask->offset; \ - *q = sum; \ - q += ostride; \ - } \ - } \ -} - -/* Do vertical masks ... we scan the mask down columns of pixels. Copy-paste - * from above with small changes. - */ -static int -aconvsep_generate_vertical( REGION *or, void *vseq, void *a, void *b ) -{ - AConvSep *seq = (AConvSep *) vseq; - IMAGE *in = (IMAGE *) a; - Lines *lines = (Lines *) b; - - REGION *ir = seq->ir; - const int n_lines = lines->n_lines; - DOUBLEMASK *mask = lines->mask; - Rect *r = &or->valid; - - /* Double the width (notionally) for complex. - */ - int sz = vips_band_format_iscomplex( in->BandFmt ) ? - 2 * IM_REGION_N_ELEMENTS( or ) : IM_REGION_N_ELEMENTS( or ); - - Rect s; - int x, y, z; - int istride; - int ostride; - - /* Prepare the section of the input image we need. A little larger - * than the section of the output image we are producing. - */ - s = *r; - s.width += mask->xsize - 1; - s.height += mask->ysize - 1; - if( im_prepare( ir, &s ) ) - return( -1 ); - - /* Stride can be different for the vertical case, keep this here for - * ease of direction change. - */ - istride = IM_REGION_LSKIP( ir ) / - IM_IMAGE_SIZEOF_ELEMENT( lines->in ); - ostride = IM_REGION_LSKIP( or ) / - IM_IMAGE_SIZEOF_ELEMENT( lines->out ); - - /* Init offset array. - */ - if( seq->last_stride != istride ) { - seq->last_stride = istride; - - for( z = 0; z < n_lines; z++ ) { - seq->start[z] = lines->start[z] * istride; - seq->end[z] = lines->end[z] * istride; - } - } - - switch( in->BandFmt ) { - case IM_BANDFMT_UCHAR: - VCONV_INT( unsigned char, CLIP_UCHAR ); - break; - - case IM_BANDFMT_CHAR: - VCONV_INT( signed char, CLIP_UCHAR ); - break; - - case IM_BANDFMT_USHORT: - VCONV_INT( unsigned short, CLIP_USHORT ); - break; - - case IM_BANDFMT_SHORT: - VCONV_INT( signed short, CLIP_SHORT ); - break; - - case IM_BANDFMT_UINT: - VCONV_INT( unsigned int, CLIP_NONE ); - break; - - case IM_BANDFMT_INT: - VCONV_INT( signed int, CLIP_NONE ); - break; - - case IM_BANDFMT_FLOAT: - VCONV_FLOAT( float ); - break; - - case IM_BANDFMT_DOUBLE: - VCONV_FLOAT( double ); - break; - - case IM_BANDFMT_COMPLEX: - VCONV_FLOAT( float ); - break; - - case IM_BANDFMT_DPCOMPLEX: - VCONV_FLOAT( double ); - break; - - default: - g_assert_not_reached(); - } - - return( 0 ); -} - -static int -aconvsep_raw( IMAGE *in, IMAGE *out, DOUBLEMASK *mask, int n_layers ) -{ - Lines *lines; - im_generate_fn generate; - -#ifdef DEBUG - printf( "aconvsep_raw: starting with matrix:\n" ); - im_print_dmask( mask ); -#endif /*DEBUG*/ - - if( !(lines = lines_new( in, out, mask, n_layers )) ) - return( -1 ); - - /* Prepare output. Consider a 7x7 mask and a 7x7 image --- the output - * would be 1x1. - */ - if( im_cp_desc( out, in ) ) - return( -1 ); - out->Xsize -= mask->xsize - 1; - out->Ysize -= mask->ysize - 1; - if( out->Xsize <= 0 || out->Ysize <= 0 ) { - im_error( "im_aconvsep", "%s", _( "image too small for mask" ) ); - return( -1 ); - } - - if( mask->xsize == 1 ) - generate = aconvsep_generate_vertical; - else - generate = aconvsep_generate_horizontal; - - if( im_demand_hint( out, IM_SMALLTILE, in, NULL ) || - im_generate( out, - aconvsep_start, generate, aconvsep_stop, in, lines ) ) - return( -1 ); - - out->Xoffset = -mask->xsize / 2; - out->Yoffset = -mask->ysize / 2; - - return( 0 ); -} - -/** - * im_aconvsep: - * @in: input image - * @out: output image - * @mask: convolution mask - * @n_layers: number of layers for approximation - * - * Perform an approximate separable convolution of @in with @mask. - * - * The mask must be 1xn or nx1 elements. - * The output image - * always has the same #VipsBandFormat as the input image. - * - * The image is convolved twice: once with @mask and then again with @mask - * rotated by 90 degrees. - * - * Larger values for @n_layers give more accurate - * results, but are slower. As @n_layers approaches the mask radius, the - * accuracy will become close to exact convolution and the speed will drop to - * match. For many large masks, such as Gaussian, @n_layers need be only 10% of - * this value and accuracy will still be good. - * - * See also: im_convsep_f(), im_create_dmaskv(). - * - * Returns: 0 on success, -1 on error - */ -int -im_aconvsep( IMAGE *in, IMAGE *out, DOUBLEMASK *mask, int n_layers ) -{ - IMAGE *t[2]; - const int n_mask = mask->xsize * mask->ysize; - DOUBLEMASK *rmask; - - if( im_open_local_array( out, t, 2, "im_aconvsep", "p" ) || - !(rmask = (DOUBLEMASK *) im_local( out, - (im_construct_fn) im_dup_dmask, - (im_callback_fn) im_free_dmask, mask, mask->filename, NULL )) ) - return( -1 ); - - rmask->xsize = mask->ysize; - rmask->ysize = mask->xsize; - - /* - */ - if( im_embed( in, t[0], 1, n_mask / 2, n_mask / 2, - in->Xsize + n_mask - 1, in->Ysize + n_mask - 1 ) || - aconvsep_raw( t[0], t[1], mask, n_layers ) || - aconvsep_raw( t[1], out, rmask, n_layers ) ) - return( -1 ); - - /* For testing .. just try one direction. - if( aconvsep_raw( in, out, mask, n_layers ) ) - return( -1 ); - */ - - out->Xoffset = 0; - out->Yoffset = 0; - - return( 0 ); -} - diff --git a/libvips/convolution/im_conv.c b/libvips/convolution/im_conv.c deleted file mode 100644 index 1dc477de..00000000 --- a/libvips/convolution/im_conv.c +++ /dev/null @@ -1,1106 +0,0 @@ -/* im_conv - * - * Copyright: 1990, N. Dessipris. - * - * Author: Nicos Dessipris & Kirk Martinez - * Written on: 29/04/1991 - * Modified on: 19/05/1991 - * 8/7/93 JC - * - adapted for partial v2 - * - memory leaks fixed - * - ANSIfied - * 23/7/93 JC - * - inner loop unrolled with a switch - 25% speed-up! - * 13/12/93 JC - * - tiny rounding error removed - * 7/10/94 JC - * - new IM_ARRAY() macro - * - various simplifications - * - evalend callback added - * 1/2/95 JC - * - use of IM_REGION_ADDR() updated - * - output size was incorrect! see comment below - * - bug with large non-square matricies fixed too - * - uses new im_embed() function - * 13/7/98 JC - * - wierd bug ... im_free_imask is no longer directly called for close - * callback, caused SIGKILL on solaris 2.6 ... linker bug? - * 9/3/01 JC - * - reworked and simplified, about 10% faster - * - slightly better range clipping - * 27/7/01 JC - * - reject masks with scale == 0 - * 7/4/04 - * - im_conv() now uses im_embed() with edge stretching on the input, not - * the output - * - sets Xoffset / Yoffset - * 11/11/05 - * - simpler inner loop avoids gcc4 bug - * 7/11/07 - * - new evalstart/end callbacks - * 12/5/08 - * - int rounding was +1 too much, argh - * - only rebuild the buffer offsets if bpl changes - * 5/4/09 - * - tiny speedups and cleanups - * - add restrict, though it doesn't seem to help gcc - * 12/11/09 - * - only check for non-zero elements once - * - add mask-all-zero check - * - cleanups - * 3/2/10 - * - gtkdoc - * - more cleanups - * 23/08/10 - * - add a special case for 3x3 masks, about 20% faster - * 1/10/10 - * - support complex (just double the bands) - * 18/10/10 - * - add experimental Orc path - * 29/10/10 - * - use VipsVector - * - get rid of im_convsep(), just call this twice, no longer worth - * keeping two versions - * 8/11/10 - * - add array tiling - * 9/5/11 - * - argh typo in overflow estimation could cause errors - * 15/10/11 Nicolas - * - handle offset correctly in seperable convolutions - * 26/1/16 Lovell Fuller - * - remove Duff for a 25% speedup - */ - -/* - - This file is part of VIPS. - - VIPS is free software; you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301 USA - - */ - -/* - - These files are distributed with VIPS - http://www.vips.ecs.soton.ac.uk - - */ - -/* Show sample pixels as they are transformed. -#define DEBUG_PIXELS - */ - -/* -#define DEBUG - */ - -/* - - TODO - - - tried 8-bit data with a 32-bit intermediate, but it was only - slightly faster than C - - 16-bit data would be even slower, no speed advantage - - - make up a signed 8-bit code path? - - - don't use divluw, it's insanely slow, instead scale coefficients so - that we can just do >>8 at the end - - */ - -#ifdef HAVE_CONFIG_H -#include -#endif /*HAVE_CONFIG_H*/ -#include - -#include -#include -#include - -#include -#include - -/* We can't run more than this many passes. Larger than this and we - * fall back to C. - */ -#define MAX_PASS (10) - -/* A pass with a vector. - */ -typedef struct { - int first; /* The index of the first mask coff we use */ - int last; /* The index of the last mask coff we use */ - - int r; /* Set previous result in this var */ - - /* The code we generate for this section of this mask. - */ - VipsVector *vector; -} Pass; - -/* Our parameters ... we take a copy of the mask argument, plus we make a - * smaller version with the zeros squeezed out. - */ -typedef struct { - IMAGE *in; - IMAGE *out; - INTMASK *mask; /* Copy of mask arg */ - - int nnz; /* Number of non-zero mask elements */ - int *coeff; /* Array of non-zero mask coefficients */ - int *coeff_pos; /* Index of each nnz element in mask->coeff */ - - int underflow; /* Global underflow/overflow counts */ - int overflow; - - /* The convolver we generate for this mask. We have to split the - * convolve and clip into two phases. - */ - int n_pass; - Pass pass[MAX_PASS]; - int s1; /* Input to clip */ - VipsVector *clip; -} Conv; - -static void -conv_vector_free( Conv *conv ) -{ - int i; - - for( i = 0; i < conv->n_pass; i++ ) - IM_FREEF( vips_vector_free, conv->pass[i].vector ); - conv->n_pass = 0; - - IM_FREEF( vips_vector_free, conv->clip ); -} - -static int -conv_close( Conv *conv ) -{ - IM_FREEF( im_free_imask, conv->mask ); - conv_vector_free( conv ); - - return( 0 ); -} - -static int -conv_evalstart( Conv *conv ) -{ - /* Reset underflow/overflow count. - * - * This often doesn't get called until eval has already finished, so - * resetting here just wipes all records. - * - conv->overflow = 0; - conv->underflow = 0; - * - */ - - return( 0 ); -} - -static int -conv_evalend( Conv *conv ) -{ - if( conv->overflow ) - vips_info( "im_conv", - _( "%d overflows detected" ), conv->overflow ); - if( conv->underflow ) - vips_info( "im_conv", - _( "%d underflows detected" ), conv->underflow ); - - return( 0 ); -} - -#define TEMP( N, S ) vips_vector_temporary( v, N, S ) -#define SCANLINE( N, P, S ) vips_vector_source_scanline( v, N, P, S ) -#define CONST( N, V, S ) vips_vector_constant( v, N, V, S ) -#define ASM2( OP, A, B ) vips_vector_asm2( v, OP, A, B ) -#define ASM3( OP, A, B, C ) vips_vector_asm3( v, OP, A, B, C ) - -/* Generate code for a section of the mask. - * - * 0 for success, -1 on error. - */ -static int -conv_compile_convolution_u8s16_section( Pass *pass, - Conv *conv, gboolean first_pass ) -{ - INTMASK *mask = conv->mask; - const int n_mask = mask->xsize * mask->ysize; - - int i; - VipsVector *v; - char zero[256]; - char offset[256]; - char source[256]; - char coeff[256]; - - pass->vector = v = vips_vector_new( "conv", 2 ); - - /* The value we fetch from the image, the product with the matrix - * value, the accumulated sum. - */ - TEMP( "value", 1 ); - TEMP( "product", 2 ); - TEMP( "sum", 2 ); - - /* Init the sum. If this is the first pass, it's a constant. If this - * is a later pass, we have to init the sum from the result - * of the previous pass. - */ - if( first_pass ) { - CONST( zero, 0, 2 ); - ASM2( "copyw", "sum", zero ); - } - else { - /* "r" is the result of the previous pass. - */ - pass->r = vips_vector_source_name( v, "r", 2 ); - ASM2( "loadw", "sum", "r" ); - } - - for( i = pass->first; i < n_mask; i++ ) { - int x = i % mask->xsize; - int y = i / mask->xsize; - - if( !mask->coeff[i] ) - /* Exclude zero elements. - */ - continue; - - /* The source. sl0 is the first scanline in the mask. - */ - SCANLINE( source, y, 1 ); - - /* The offset, only for non-first-columns though. - */ - if( x > 0 ) - CONST( offset, conv->in->Bands * x, 1 ); - - /* The coefficient. Only for non-1 coeffs though, we skip the - * mul for them. - * - * We need to do 8-bit unsigned pixel * signed mask, so we - * have to cast the pixel up to 16-bit then do a mult against a - * 16-bit constant. We know the result will fit in the bottom - * 16 bits. - */ - if( mask->coeff[i] != 1 ) - CONST( coeff, mask->coeff[i], 2 ); - - /* Two factors: - * - element is in the first column, ie. has a zero offset - * - mask coeff is 1, ie. we can skip the multiply - * - * We could combine some of these cases, but it's simpler - * and safer to spell them all out. - */ - if( x == 0 ) - ASM2( "loadb", "value", source ); - else - ASM3( "loadoffb", "value", source, offset ); - - ASM2( "convubw", "product", "value" ); - - if( mask->coeff[i] != 1 ) - ASM3( "mullw", "product", "product", coeff ); - - ASM3( "addssw", "sum", "sum", "product" ); - - if( vips_vector_full( v ) ) - break; - } - - pass->last = i; - - ASM2( "copyw", "d1", "sum" ); - -#ifdef DEBUG - vips_vector_print( v ); - printf( "compiling ...\n" ); -#endif /*DEBUG*/ - - if( !vips_vector_compile( v ) ) - return( -1 ); - - return( 0 ); -} - -/* Generate the convolution pass for u8 data with an s16 accumulator. - * - * 0 for success, -1 on error. - */ -static int -conv_compile_convolution_u8s16( Conv *conv ) -{ - INTMASK *mask = conv->mask; - const int n_mask = mask->xsize * mask->ysize; - - double min, max; - int i; - - if( conv->in->BandFmt != IM_BANDFMT_UCHAR ) - return( -1 ); - - /* Can the accumulator overflow or underflow at any stage? Since - * matrix elements are signed, we need to calculate a running - * possible min and max. - */ - min = 0; - max = 0; - for( i = 0; i < n_mask; i++ ) { - int v = 255 * mask->coeff[i]; - - min = IM_MIN( min, min + v ); - max = IM_MAX( max, max + v ); - - if( max > SHRT_MAX ) - return( -1 ); - if( min < SHRT_MIN ) - return( -1 ); - } - - /* Generate passes until we've used up the whole mask. - */ - for( i = 0;;) { - Pass *pass; - - /* Skip any zero coefficients at the start of the mask - * region. - */ - for( ; i < n_mask && !mask->coeff[i]; i++ ) - ; - if( i == n_mask ) - break; - - /* Allocate space for another pass. - */ - if( conv->n_pass == MAX_PASS ) - return( -1 ); - pass = &conv->pass[conv->n_pass]; - conv->n_pass += 1; - - pass->first = i; - pass->last = i; - pass->r = -1; - - if( conv_compile_convolution_u8s16_section( pass, - conv, conv->n_pass == 1 ) ) - return( -1 ); - i = pass->last + 1; - -#ifdef DEBUG - printf( "conv_compile_convolution_u8s16: " - "first = %d, last = %d\n", - pass->first, pass->last ); -#endif /*DEBUG*/ - - if( i >= n_mask ) - break; - } - - return( 0 ); -} - -/* Generate the program that does (pass + rounding) / scale + offset - * from a s16 intermediate back to a u8 output. - */ -static int -conv_compile_scale_s16u8( Conv *conv ) -{ - INTMASK *mask = conv->mask; - - VipsVector *v; - char scale[256]; - char offset[256]; - char zero[256]; - - /* Scale and offset must be in range. - */ - if( mask->scale > 255 || - mask->scale < 0 || - mask->offset > SHRT_MAX || - mask->offset < SHRT_MIN ) - return( -1 ); - - conv->clip = v = vips_vector_new( "clip", 1 ); - conv->s1 = vips_vector_source_name( v, "s1", 2 ); - - TEMP( "t1", 2 ); - TEMP( "t2", 2 ); - - /* We can only do unsigned divide, so we must add the offset before - * dividing by the scale. We need to scale the offset up. - * - * We can build the rounding into the offset as well. - * You might think this should be (scale + 1) / 2, but then we'd be - * adding one for scale == 1. - */ - CONST( scale, mask->scale, 1 ); - CONST( offset, mask->offset * mask->scale + mask->scale / 2, 2 ); - CONST( zero, 0, 2 ); - - /* Offset and scale. - */ - ASM3( "addssw", "t1", "s1", offset ); - - /* We need to convert the signed result of the - * offset to unsigned for the div, ie. we want to set anything <0 to 0. - */ - ASM3( "cmpgtsw", "t2", "t1", zero ); - ASM3( "andw", "t1", "t1", "t2" ); - - ASM3( "divluw", "t1", "t1", scale ); - ASM2( "convuuswb", "d1", "t1" ); - - if( !vips_vector_compile( v ) ) - return( -1 ); - -#ifdef DEBUG - vips_vector_print( v ); -#endif /*DEBUG*/ - - return( 0 ); -} - -static Conv * -conv_new( IMAGE *in, IMAGE *out, INTMASK *mask ) -{ - Conv *conv = IM_NEW( out, Conv ); - const int n_mask = mask->xsize * mask->ysize; - int i; - - if( !conv ) - return( NULL ); - - conv->in = in; - conv->out = out; - conv->mask = NULL; - conv->nnz = 0; - conv->coeff = NULL; - conv->coeff_pos = NULL; - conv->underflow = 0; - conv->overflow = 0; - - conv->n_pass = 0; - conv->s1 = -1; - conv->clip = NULL; - - if( im_add_close_callback( out, - (im_callback_fn) conv_close, conv, NULL ) || - im_add_close_callback( out, - (im_callback_fn) conv_evalstart, conv, NULL ) || - im_add_close_callback( out, - (im_callback_fn) conv_evalend, conv, NULL ) || - !(conv->coeff = IM_ARRAY( out, n_mask, int )) || - !(conv->coeff_pos = IM_ARRAY( out, n_mask, int )) || - !(conv->mask = im_dup_imask( mask, "conv_mask" )) ) - return( NULL ); - - /* Find non-zero mask elements. - */ - for( i = 0; i < n_mask; i++ ) - if( mask->coeff[i] ) { - conv->coeff[conv->nnz] = mask->coeff[i]; - conv->coeff_pos[conv->nnz] = i; - conv->nnz += 1; - } - - /* Was the whole mask zero? We must have at least 1 element in there: - * set it to zero. - */ - if( conv->nnz == 0 ) { - conv->coeff[0] = mask->coeff[0]; - conv->coeff_pos[0] = 0; - conv->nnz = 1; - } - - /* Generate code for this mask / image, if possible. - */ - if( vips_vector_isenabled() ) { - if( conv_compile_convolution_u8s16( conv ) || - conv_compile_scale_s16u8( conv ) ) - conv_vector_free( conv ); - } - - return( conv ); -} - -/* Our sequence value. - */ -typedef struct { - Conv *conv; - REGION *ir; /* Input region */ - - int *offsets; /* Offsets for each non-zero matrix element */ - VipsPel **pts; /* Per-non-zero mask element pointers */ - - int underflow; /* Underflow/overflow counts */ - int overflow; - - int last_bpl; /* Avoid recalcing offsets, if we can */ - - /* We need a pair of intermediate buffers to keep the results of each - * conv pass in. - */ - void *t1; - void *t2; -} ConvSequence; - -/* Free a sequence value. - */ -static int -conv_stop( void *vseq, void *a, void *b ) -{ - ConvSequence *seq = (ConvSequence *) vseq; - Conv *conv = (Conv *) b; - - /* Add local under/over counts to global counts. - */ - conv->overflow += seq->overflow; - conv->underflow += seq->underflow; - - IM_FREEF( im_region_free, seq->ir ); - IM_FREE( seq->t1 ); - IM_FREE( seq->t2 ); - - return( 0 ); -} - -/* Convolution start function. - */ -static void * -conv_start( IMAGE *out, void *a, void *b ) -{ - IMAGE *in = (IMAGE *) a; - Conv *conv = (Conv *) b; - - ConvSequence *seq; - - if( !(seq = IM_NEW( out, ConvSequence )) ) - return( NULL ); - - /* Init! - */ - seq->conv = conv; - seq->ir = NULL; - seq->pts = NULL; - seq->underflow = 0; - seq->overflow = 0; - seq->last_bpl = -1; - seq->t1 = NULL; - seq->t2 = NULL; - - /* Attach region and arrays. - */ - seq->ir = im_region_create( in ); - seq->offsets = IM_ARRAY( out, conv->nnz, int ); - seq->pts = IM_ARRAY( out, conv->nnz, VipsPel * ); - if( !seq->ir || !seq->offsets || !seq->pts ) { - conv_stop( seq, in, conv ); - return( NULL ); - } - - if( vips_vector_isenabled() && - conv->n_pass ) { - seq->t1 = IM_ARRAY( NULL, IM_IMAGE_N_ELEMENTS( in ), short ); - seq->t2 = IM_ARRAY( NULL, IM_IMAGE_N_ELEMENTS( in ), short ); - - if( !seq->t1 || !seq->t2 ) { - conv_stop( seq, in, conv ); - return( NULL ); - } - } - - return( seq ); -} - -/* INT inner loops. - */ -#define CONV_INT( TYPE, IM_CLIP ) { \ - TYPE ** restrict p = (TYPE **) seq->pts; \ - TYPE * restrict q = (TYPE *) IM_REGION_ADDR( or, le, y ); \ - \ - for( x = 0; x < sz; x++ ) { \ - int sum; \ - int i; \ - \ - sum = 0; \ - for ( i = 0; i < nnz; i++ ) \ - sum += t[i] * p[i][x]; \ - \ - sum = ((sum + rounding) / mask->scale) + mask->offset; \ - \ - IM_CLIP; \ - \ - q[x] = sum; \ - } \ -} - -/* FLOAT inner loops. - */ -#define CONV_FLOAT( TYPE ) { \ - TYPE ** restrict p = (TYPE **) seq->pts; \ - TYPE * restrict q = (TYPE *) IM_REGION_ADDR( or, le, y ); \ - \ - for( x = 0; x < sz; x++ ) { \ - double sum; \ - int i; \ - \ - sum = 0; \ - for ( i = 0; i < nnz; i++ ) \ - sum += t[i] * p[i][x]; \ - \ - sum = (sum / mask->scale) + mask->offset; \ - \ - q[x] = sum; \ - } \ -} - -/* Convolve! See below for the special-case 3x3 path. - */ -static int -conv_gen( REGION *or, void *vseq, void *a, void *b ) -{ - ConvSequence *seq = (ConvSequence *) vseq; - IMAGE *in = (IMAGE *) a; - Conv *conv = (Conv *) b; - REGION *ir = seq->ir; - INTMASK *mask = conv->mask; - int * restrict t = conv->coeff; - const int nnz = conv->nnz; - - /* You might think this should be (scale + 1) / 2, but then we'd be - * adding one for scale == 1. - */ - int rounding = mask->scale / 2; - - Rect *r = &or->valid; - Rect s; - int le = r->left; - int to = r->top; - int bo = IM_RECT_BOTTOM( r ); - int sz = IM_REGION_N_ELEMENTS( or ) * (im_iscomplex( in ) ? 2 : 1); - - int x, y, z, i; - - /* Prepare the section of the input image we need. A little larger - * than the section of the output image we are producing. - */ - s = *r; - s.width += mask->xsize - 1; - s.height += mask->ysize - 1; - if( im_prepare( ir, &s ) ) - return( -1 ); - - VIPS_GATE_START( "conv_gen: work" ); - - /* Fill offset array. Only do this if the bpl has changed since the - * previous im_prepare(). - */ - if( seq->last_bpl != IM_REGION_LSKIP( ir ) ) { - seq->last_bpl = IM_REGION_LSKIP( ir ); - - for( i = 0; i < nnz; i++ ) { - z = conv->coeff_pos[i]; - x = z % conv->mask->xsize; - y = z / conv->mask->xsize; - - seq->offsets[i] = - IM_REGION_ADDR( ir, x + le, y + to ) - - IM_REGION_ADDR( ir, le, to ); - } - } - - for( y = to; y < bo; y++ ) { - /* Init pts for this line of PELs. - */ - for( z = 0; z < nnz; z++ ) - seq->pts[z] = seq->offsets[z] + - IM_REGION_ADDR( ir, le, y ); - - switch( in->BandFmt ) { - case IM_BANDFMT_UCHAR: - CONV_INT( unsigned char, IM_CLIP_UCHAR( sum, seq ) ); - break; - - case IM_BANDFMT_CHAR: - CONV_INT( signed char, IM_CLIP_CHAR( sum, seq ) ); - break; - - case IM_BANDFMT_USHORT: - CONV_INT( unsigned short, IM_CLIP_USHORT( sum, seq ) ); - break; - - case IM_BANDFMT_SHORT: - CONV_INT( signed short, IM_CLIP_SHORT( sum, seq ) ); - break; - - case IM_BANDFMT_UINT: - CONV_INT( unsigned int, IM_CLIP_NONE( sum, seq ) ); - break; - - case IM_BANDFMT_INT: - CONV_INT( signed int, IM_CLIP_NONE( sum, seq ) ); - break; - - case IM_BANDFMT_FLOAT: - case IM_BANDFMT_COMPLEX: - CONV_FLOAT( float ); - break; - - case IM_BANDFMT_DOUBLE: - case IM_BANDFMT_DPCOMPLEX: - CONV_FLOAT( double ); - break; - - default: - g_assert_not_reached(); - } - } - - VIPS_GATE_STOP( "conv_gen: work" ); - - return( 0 ); -} - -/* INT inner loops. - */ -#define CONV3x3_INT( TYPE, IM_CLIP ) { \ - TYPE * restrict p0 = (TYPE *) IM_REGION_ADDR( ir, le, y ); \ - TYPE * restrict p1 = (TYPE *) IM_REGION_ADDR( ir, le, y + 1 ); \ - TYPE * restrict p2 = (TYPE *) IM_REGION_ADDR( ir, le, y + 2 ); \ - TYPE * restrict q = (TYPE *) IM_REGION_ADDR( or, le, y ); \ - \ - for( x = 0; x < sz; x++ ) { \ - int sum; \ - \ - sum = 0; \ - sum += m[0] * p0[0]; \ - sum += m[1] * p0[bands]; \ - sum += m[2] * p0[bands * 2]; \ - sum += m[3] * p1[0]; \ - sum += m[4] * p1[bands]; \ - sum += m[5] * p1[bands * 2]; \ - sum += m[6] * p2[0]; \ - sum += m[7] * p2[bands]; \ - sum += m[8] * p2[bands * 2]; \ - \ - p0 += 1; \ - p1 += 1; \ - p2 += 1; \ - \ - sum = ((sum + rounding) / mask->scale) + mask->offset; \ - \ - IM_CLIP; \ - \ - q[x] = sum; \ - } \ -} - -/* FLOAT inner loops. - */ -#define CONV3x3_FLOAT( TYPE ) { \ - TYPE * restrict p0 = (TYPE *) IM_REGION_ADDR( ir, le, y ); \ - TYPE * restrict p1 = (TYPE *) IM_REGION_ADDR( ir, le, y + 1 ); \ - TYPE * restrict p2 = (TYPE *) IM_REGION_ADDR( ir, le, y + 2 ); \ - TYPE * restrict q = (TYPE *) IM_REGION_ADDR( or, le, y ); \ - \ - for( x = 0; x < sz; x++ ) { \ - double sum; \ - \ - sum = 0; \ - sum += m[0] * p0[0]; \ - sum += m[1] * p0[bands]; \ - sum += m[2] * p0[bands * 2]; \ - sum += m[3] * p1[0]; \ - sum += m[4] * p1[bands]; \ - sum += m[5] * p1[bands * 2]; \ - sum += m[6] * p2[0]; \ - sum += m[7] * p2[bands]; \ - sum += m[8] * p2[bands * 2]; \ - \ - p0 += 1; \ - p1 += 1; \ - p2 += 1; \ - \ - sum = (sum / mask->scale) + mask->offset; \ - \ - q[x] = sum; \ - } \ -} - -/* 3x3 masks are very common, so we have a special path for them. This is - * about 20% faster than the general convolver above. - */ -static int -conv3x3_gen( REGION *or, void *vseq, void *a, void *b ) -{ - ConvSequence *seq = (ConvSequence *) vseq; - IMAGE *in = (IMAGE *) a; - Conv *conv = (Conv *) b; - REGION *ir = seq->ir; - INTMASK *mask = conv->mask; - int * restrict m = mask->coeff; - - /* You might think this should be (scale + 1) / 2, but then we'd be - * adding one for scale == 1. - */ - int rounding = mask->scale / 2; - - Rect *r = &or->valid; - int le = r->left; - int to = r->top; - int bo = IM_RECT_BOTTOM( r ); - int sz = IM_REGION_N_ELEMENTS( or ) * (im_iscomplex( in ) ? 2 : 1); - int bands = in->Bands; - - Rect s; - int x, y; - - /* Prepare the section of the input image we need. A little larger - * than the section of the output image we are producing. - */ - s = *r; - s.width += 2; - s.height += 2; - if( im_prepare( ir, &s ) ) - return( -1 ); - - VIPS_GATE_START( "conv3x3_gen: work" ); - - for( y = to; y < bo; y++ ) { - switch( in->BandFmt ) { - case IM_BANDFMT_UCHAR: - CONV3x3_INT( unsigned char, - IM_CLIP_UCHAR( sum, seq ) ); - break; - - case IM_BANDFMT_CHAR: - CONV3x3_INT( signed char, - IM_CLIP_CHAR( sum, seq ) ); - break; - - case IM_BANDFMT_USHORT: - CONV3x3_INT( unsigned short, - IM_CLIP_USHORT( sum, seq ) ); - break; - - case IM_BANDFMT_SHORT: - CONV3x3_INT( signed short, - IM_CLIP_SHORT( sum, seq ) ); - break; - - case IM_BANDFMT_UINT: - CONV3x3_INT( unsigned int, - IM_CLIP_NONE( sum, seq ) ); - break; - - case IM_BANDFMT_INT: - CONV3x3_INT( signed int, - IM_CLIP_NONE( sum, seq ) ); - break; - - case IM_BANDFMT_FLOAT: - case IM_BANDFMT_COMPLEX: - CONV3x3_FLOAT( float ); - break; - - case IM_BANDFMT_DOUBLE: - case IM_BANDFMT_DPCOMPLEX: - CONV3x3_FLOAT( double ); - break; - - default: - g_assert_not_reached(); - } - } - - VIPS_GATE_STOP( "conv3x3_gen: work" ); - - return( 0 ); -} - -/* The VipsVector codepath. - */ -static int -convvec_gen( REGION *or, void *vseq, void *a, void *b ) -{ - ConvSequence *seq = (ConvSequence *) vseq; - IMAGE *in = (IMAGE *) a; - Conv *conv = (Conv *) b; - INTMASK *mask = conv->mask; - REGION *ir = seq->ir; - - Rect *r = &or->valid; - int sz = IM_REGION_N_ELEMENTS( or ) * (im_iscomplex( in ) ? 2 : 1); - - Rect s; - int j, y; - VipsExecutor convolve[MAX_PASS]; - VipsExecutor clip; - - /* Prepare the section of the input image we need. A little larger - * than the section of the output image we are producing. - */ - s = *r; - s.width += mask->xsize - 1; - s.height += mask->ysize - 1; - if( im_prepare( ir, &s ) ) - return( -1 ); - - VIPS_GATE_START( "convvec_gen: work" ); - - for( j = 0; j < conv->n_pass; j++ ) - vips_executor_set_program( &convolve[j], - conv->pass[j].vector, sz ); - vips_executor_set_program( &clip, conv->clip, sz ); - - for( y = 0; y < r->height; y++ ) { -#ifdef DEBUG_PIXELS -{ - int h, v; - - printf( "before convolve: %d, %d\n", r->left, r->top + y ); - for( v = 0; v < mask->ysize; v++ ) { - for( h = 0; h < mask->xsize; h++ ) - printf( "%3d ", *IM_REGION_ADDR( ir, - r->left + h, r->top + y + v ) ); - printf( "\n" ); - } -} -#endif /*DEBUG_PIXELS*/ - - for( j = 0; j < conv->n_pass; j++ ) { - /* We always read from t1 and write to t2. - */ - vips_executor_set_scanline( &convolve[j], - ir, r->left, r->top + y ); - vips_executor_set_array( &convolve[j], - conv->pass[j].r, seq->t1 ); - vips_executor_set_destination( &convolve[j], seq->t2 ); - vips_executor_run( &convolve[j] ); - - IM_SWAP( void *, seq->t1, seq->t2 ); - } - -#ifdef DEBUG_PIXELS - printf( "before clip: %d\n", ((signed short *) seq->t1)[0] ); -#endif /*DEBUG_PIXELS*/ - - vips_executor_set_array( &clip, conv->s1, seq->t1 ); - vips_executor_set_destination( &clip, - IM_REGION_ADDR( or, r->left, r->top + y ) ); - vips_executor_run( &clip ); - -#ifdef DEBUG_PIXELS - printf( "after clip: %d\n", - *IM_REGION_ADDR( or, r->left, r->top + y ) ); -#endif /*DEBUG_PIXELS*/ - } - - VIPS_GATE_STOP( "convvec_gen: work" ); - - return( 0 ); -} - -int -im_conv_raw( IMAGE *in, IMAGE *out, INTMASK *mask ) -{ - Conv *conv; - im_generate_fn generate; - -#ifdef DEBUG - printf( "im_conv_raw: starting with matrix:\n" ); - im_print_imask( mask ); -#endif /*DEBUG*/ - - /* Check parameters. - */ - if( im_piocheck( in, out ) || - im_check_uncoded( "im_conv", in ) || - im_check_imask( "im_conv", mask ) ) - return( -1 ); - if( mask->scale == 0 ) { - im_error( "im_conv", "%s", "mask scale must be non-zero" ); - return( -1 ); - } - if( !(conv = conv_new( in, out, mask )) ) - return( -1 ); - - /* Prepare output. Consider a 7x7 mask and a 7x7 image --- the output - * would be 1x1. - */ - if( im_cp_desc( out, in ) ) - return( -1 ); - out->Xsize -= mask->xsize - 1; - out->Ysize -= mask->ysize - 1; - if( out->Xsize <= 0 || out->Ysize <= 0 ) { - im_error( "im_conv", "%s", _( "image too small for mask" ) ); - return( -1 ); - } - - if( conv->n_pass ) { - generate = convvec_gen; - -#ifdef DEBUG - printf( "im_conv_raw: using vector path\n" ); -#endif /*DEBUG*/ - } - else if( mask->xsize == 3 && mask->ysize == 3 ) { - generate = conv3x3_gen; - -#ifdef DEBUG - printf( "im_conv_raw: using 3x3 path\n" ); -#endif /*DEBUG*/ - } - else { - generate = conv_gen; - -#ifdef DEBUG - printf( "im_conv_raw: using general path\n" ); -#endif /*DEBUG*/ - } - - if( im_demand_hint( out, IM_SMALLTILE, in, NULL ) || - im_generate( out, conv_start, generate, conv_stop, in, conv ) ) - return( -1 ); - - out->Xoffset = -mask->xsize / 2; - out->Yoffset = -mask->ysize / 2; - - return( 0 ); -} - -int -im_conv( IMAGE *in, IMAGE *out, INTMASK *mask ) -{ - IMAGE *t1 = im_open_local( out, "im_conv intermediate", "p" ); - - if( !t1 || - im_embed( in, t1, 1, mask->xsize / 2, mask->ysize / 2, - in->Xsize + mask->xsize - 1, - in->Ysize + mask->ysize - 1 ) || - im_conv_raw( t1, out, mask ) ) - return( -1 ); - - out->Xoffset = 0; - out->Yoffset = 0; - - return( 0 ); -} diff --git a/libvips/convolution/im_conv_f.c b/libvips/convolution/im_conv_f.c deleted file mode 100644 index 0cbe104b..00000000 --- a/libvips/convolution/im_conv_f.c +++ /dev/null @@ -1,389 +0,0 @@ -/* im_conv_f - * - * Copyright: 1990, N. Dessipris. - * - * Author: Nicos Dessipris & Kirk Martinez - * Written on: 29/04/1991 - * Modified on: 19/05/1991 - * 8/7/93 JC - * - adapted for partial v2 - * - memory leaks fixed - * - ANSIfied - * 12/7/93 JC - * - adapted im_convbi() to im_convbf() - * 7/10/94 JC - * - new IM_ARRAY() macro - * - evalend callbacks - * - more typedef - * 9/3/01 JC - * - redone from im_conv() - * 27/7/01 JC - * - rejects masks with scale == 0 - * 7/4/04 - * - now uses im_embed() with edge stretching on the input, not - * the output - * - sets Xoffset / Yoffset - * 11/11/05 - * - simpler inner loop avoids gcc4 bug - * 12/11/09 - * - only rebuild the buffer offsets if bpl changes - * - tiny speedups and cleanups - * - add restrict, though it doesn't seem to help gcc - * - add mask-all-zero check - * 13/11/09 - * - rename as im_conv_f() to make it easier for vips.c to make the - * overloaded version - * 3/2/10 - * - gtkdoc - * - more cleanups - * 1/10/10 - * - support complex (just double the bands) - * 29/10/10 - * - get rid of im_convsep_f(), just call this twice, no longer worth - * keeping two versions - * 15/10/11 Nicolas - * - handle offset correctly in seperable convolutions - * 26/1/16 Lovell Fuller - * - remove Duff for a 25% speedup - */ - -/* - - This file is part of VIPS. - - VIPS is free software; you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301 USA - - */ - -/* - - These files are distributed with VIPS - http://www.vips.ecs.soton.ac.uk - - */ - -#ifdef HAVE_CONFIG_H -#include -#endif /*HAVE_CONFIG_H*/ -#include - -#include -#include -#include - -#include - -/* Our parameters ... we take a copy of the mask argument, plus we make a - * smaller version with the zeros squeezed out. - */ -typedef struct { - IMAGE *in; - IMAGE *out; - DOUBLEMASK *mask; /* Copy of mask arg */ - - int nnz; /* Number of non-zero mask elements */ - double *coeff; /* Array of non-zero mask coefficients */ - int *coeff_pos; /* Index of each nnz element in mask->coeff */ -} Conv; - -static int -conv_close( Conv *conv ) -{ - IM_FREEF( im_free_dmask, conv->mask ); - - return( 0 ); -} - -static Conv * -conv_new( IMAGE *in, IMAGE *out, DOUBLEMASK *mask ) -{ - Conv *conv = IM_NEW( out, Conv ); - const int ne = mask->xsize * mask->ysize; - int i; - - if( !conv ) - return( NULL ); - - conv->in = in; - conv->out = out; - conv->mask = NULL; - conv->nnz = 0; - conv->coeff = NULL; - - if( im_add_close_callback( out, - (im_callback_fn) conv_close, conv, NULL ) || - !(conv->coeff = IM_ARRAY( out, ne, double )) || - !(conv->coeff_pos = IM_ARRAY( out, ne, int )) || - !(conv->mask = im_dup_dmask( mask, "conv_mask" )) ) - return( NULL ); - - /* Find non-zero mask elements. - */ - for( i = 0; i < ne; i++ ) - if( mask->coeff[i] ) { - conv->coeff[conv->nnz] = mask->coeff[i]; - conv->coeff_pos[conv->nnz] = i; - conv->nnz += 1; - } - - /* Was the whole mask zero? We must have at least 1 element in there: - * set it to zero. - */ - if( conv->nnz == 0 ) { - conv->coeff[0] = mask->coeff[0]; - conv->coeff_pos[0] = 0; - conv->nnz = 1; - } - - return( conv ); -} - -/* Our sequence value. - */ -typedef struct { - Conv *conv; - REGION *ir; /* Input region */ - - int *offsets; /* Offsets for each non-zero matrix element */ - VipsPel **pts; /* Per-non-zero mask element image pointers */ - - int last_bpl; /* Avoid recalcing offsets, if we can */ -} ConvSequence; - -/* Free a sequence value. - */ -static int -conv_stop( void *vseq, void *a, void *b ) -{ - ConvSequence *seq = (ConvSequence *) vseq; - - IM_FREEF( im_region_free, seq->ir ); - - return( 0 ); -} - -/* Convolution start function. - */ -static void * -conv_start( IMAGE *out, void *a, void *b ) -{ - IMAGE *in = (IMAGE *) a; - Conv *conv = (Conv *) b; - ConvSequence *seq; - - if( !(seq = IM_NEW( out, ConvSequence )) ) - return( NULL ); - - /* Init! - */ - seq->conv = conv; - seq->ir = NULL; - seq->pts = NULL; - seq->last_bpl = -1; - - /* Attach region and arrays. - */ - seq->ir = im_region_create( in ); - seq->offsets = IM_ARRAY( out, conv->nnz, int ); - seq->pts = IM_ARRAY( out, conv->nnz, VipsPel * ); - if( !seq->ir || !seq->offsets || !seq->pts ) { - conv_stop( seq, in, conv ); - return( NULL ); - } - - return( (void *) seq ); -} - -#define CONV_FLOAT( ITYPE, OTYPE ) { \ - ITYPE ** restrict p = (ITYPE **) seq->pts; \ - OTYPE * restrict q = (OTYPE *) IM_REGION_ADDR( or, le, y ); \ - \ - for( x = 0; x < sz; x++ ) { \ - double sum; \ - int i; \ - \ - sum = 0; \ - for ( i = 0; i < nnz; i++ ) \ - sum += t[i] * p[i][x]; \ - \ - sum = (sum / mask->scale) + mask->offset; \ - \ - q[x] = sum; \ - } \ -} - -/* Convolve! - */ -static int -conv_gen( REGION *or, void *vseq, void *a, void *b ) -{ - ConvSequence *seq = (ConvSequence *) vseq; - IMAGE *in = (IMAGE *) a; - Conv *conv = (Conv *) b; - REGION *ir = seq->ir; - DOUBLEMASK *mask = conv->mask; - double * restrict t = conv->coeff; - const int nnz = conv->nnz; - - Rect *r = &or->valid; - Rect s; - int le = r->left; - int to = r->top; - int bo = IM_RECT_BOTTOM(r); - int sz = IM_REGION_N_ELEMENTS( or ) * - (vips_band_format_iscomplex( in->BandFmt ) ? 2 : 1); - - int x, y, z, i; - - /* Prepare the section of the input image we need. A little larger - * than the section of the output image we are producing. - */ - s = *r; - s.width += mask->xsize - 1; - s.height += mask->ysize - 1; - if( im_prepare( ir, &s ) ) - return( -1 ); - - /* Fill offset array. Only do this if the bpl has changed since the - * previous im_prepare(). - */ - if( seq->last_bpl != IM_REGION_LSKIP( ir ) ) { - seq->last_bpl = IM_REGION_LSKIP( ir ); - - for( i = 0; i < nnz; i++ ) { - z = conv->coeff_pos[i]; - x = z % conv->mask->xsize; - y = z / conv->mask->xsize; - - seq->offsets[i] = - IM_REGION_ADDR( ir, x + le, y + to ) - - IM_REGION_ADDR( ir, le, to ); - } - } - - for( y = to; y < bo; y++ ) { - /* Init pts for this line of PELs. - */ - for( z = 0; z < nnz; z++ ) - seq->pts[z] = seq->offsets[z] + - IM_REGION_ADDR( ir, le, y ); - - switch( in->BandFmt ) { - case IM_BANDFMT_UCHAR: - CONV_FLOAT( unsigned char, float ); break; - case IM_BANDFMT_CHAR: - CONV_FLOAT( signed char, float ); break; - case IM_BANDFMT_USHORT: - CONV_FLOAT( unsigned short, float ); break; - case IM_BANDFMT_SHORT: - CONV_FLOAT( signed short, float ); break; - case IM_BANDFMT_UINT: - CONV_FLOAT( unsigned int, float ); break; - case IM_BANDFMT_INT: - CONV_FLOAT( signed int, float ); break; - case IM_BANDFMT_FLOAT: - case IM_BANDFMT_COMPLEX: - CONV_FLOAT( float, float ); break; - case IM_BANDFMT_DOUBLE: - case IM_BANDFMT_DPCOMPLEX: - CONV_FLOAT( double, double ); break; - - default: - g_assert_not_reached(); - } - } - - return( 0 ); -} - -int -im_conv_f_raw( IMAGE *in, IMAGE *out, DOUBLEMASK *mask ) -{ - Conv *conv; - - /* Check parameters. - */ - if( im_piocheck( in, out ) || - im_check_uncoded( "im_conv", in ) || - im_check_dmask( "im_conv", mask ) ) - return( -1 ); - if( mask->scale == 0 ) { - im_error( "im_conv_f", "%s", "mask scale must be non-zero" ); - return( -1 ); - } - if( !(conv = conv_new( in, out, mask )) ) - return( -1 ); - - /* Prepare output. Consider a 7x7 mask and a 7x7 image --- the output - * would be 1x1. - */ - if( im_cp_desc( out, in ) ) - return( -1 ); - if( vips_bandfmt_isint( in->BandFmt ) ) - out->BandFmt = IM_BANDFMT_FLOAT; - out->Xsize -= mask->xsize - 1; - out->Ysize -= mask->ysize - 1; - if( out->Xsize <= 0 || out->Ysize <= 0 ) { - im_error( "im_conv_f", "%s", _( "image too small for mask" ) ); - return( -1 ); - } - - if( im_demand_hint( out, IM_SMALLTILE, in, NULL ) ) - return( -1 ); - - if( im_generate( out, conv_start, conv_gen, conv_stop, in, conv ) ) - return( -1 ); - - out->Xoffset = -mask->xsize / 2; - out->Yoffset = -mask->ysize / 2; - - return( 0 ); -} - -/** - * im_conv_f: - * @in: input image - * @out: output image - * @mask: convolution mask - * - * Convolve @in with @mask using floating-point arithmetic. The output image - * is always %IM_BANDFMT_FLOAT unless @in is %IM_BANDFMT_DOUBLE, in which case - * @out is also %IM_BANDFMT_DOUBLE. - * - * Each output pixel is - * calculated as sigma[i]{pixel[i] * mask[i]} / scale + offset, where scale - * and offset are part of @mask. - * - * See also: im_conv(), im_convsep_f(), im_create_dmaskv(). - * - * Returns: 0 on success, -1 on error - */ -int -im_conv_f( IMAGE *in, IMAGE *out, DOUBLEMASK *mask ) -{ - IMAGE *t1 = im_open_local( out, "im_conv_f intermediate", "p" ); - - if( !t1 || - im_embed( in, t1, 1, mask->xsize / 2, mask->ysize / 2, - in->Xsize + mask->xsize - 1, - in->Ysize + mask->ysize - 1 ) || - im_conv_f_raw( t1, out, mask ) ) - return( -1 ); - - out->Xoffset = 0; - out->Yoffset = 0; - - return( 0 ); -} diff --git a/libvips/create/worley.c b/libvips/create/worley.c index dc509731..514b7258 100644 --- a/libvips/create/worley.c +++ b/libvips/create/worley.c @@ -1,6 +1,9 @@ /* Worley noise generator. * * 19/7/16 + * + * 11/8/16 + * - float output */ /* @@ -215,7 +218,7 @@ vips_worley_start( VipsImage *out, void *a, void *b ) return( seq ); } -static int +static float vips_hypot( int x, int y ) { /* Faster than hypot() for int args. @@ -223,10 +226,10 @@ vips_hypot( int x, int y ) return( sqrt( x * x + y * y ) ); } -static int +static float vips_worley_distance( VipsWorley *worley, Cell cells[9], int x, int y ) { - int distance; + float distance; int i, j; @@ -236,7 +239,7 @@ vips_worley_distance( VipsWorley *worley, Cell cells[9], int x, int y ) Cell *cell = &cells[i]; for( j = 0; j < cell->n_features; j++ ) { - int d = vips_hypot( + float d = vips_hypot( x - cell->feature_x[j], y - cell->feature_y[j] ); @@ -258,7 +261,7 @@ vips_worley_gen( VipsRegion *or, void *vseq, void *a, void *b, int x, y; for( y = 0; y < r->height; y++ ) { - int *q = (int *) VIPS_REGION_ADDR( or, r->left, r->top + y ); + float *q = (float *) VIPS_REGION_ADDR( or, r->left, r->top + y ); for( x = 0; x < r->width; x++ ) { int cell_x = (r->left + x) / worley->cell_size; @@ -300,7 +303,7 @@ vips_worley_build( VipsObject *object ) vips_image_init_fields( create->out, worley->width, worley->height, 1, - VIPS_FORMAT_INT, VIPS_CODING_NONE, VIPS_INTERPRETATION_B_W, + VIPS_FORMAT_FLOAT, VIPS_CODING_NONE, VIPS_INTERPRETATION_B_W, 1.0, 1.0 ); vips_image_pipelinev( create->out, VIPS_DEMAND_STYLE_ANY, NULL ); @@ -365,7 +368,7 @@ vips_worley_init( VipsWorley *worley ) * * * @cell_size: %gint, size of Worley cells * - * Create a one-band int image of Worley noise. See: + * Create a one-band float image of Worley noise. See: * * https://en.wikipedia.org/wiki/Worley_noise * diff --git a/libvips/deprecated/vips7compat.c b/libvips/deprecated/vips7compat.c index 18713e38..10eec9d0 100644 --- a/libvips/deprecated/vips7compat.c +++ b/libvips/deprecated/vips7compat.c @@ -2404,6 +2404,116 @@ im_convsep_f( IMAGE *in, IMAGE *out, DOUBLEMASK *mask ) return( 0 ); } +int +im_conv( VipsImage *in, VipsImage *out, INTMASK *mask ) +{ + VipsImage *t1, *t2; + + if( !(t1 = vips_image_new()) || + im_imask2vips( mask, t1 ) ) + return( -1 ); + if( vips_convi( in, &t2, t1, + NULL ) ) { + g_object_unref( t1 ); + return( -1 ); + } + g_object_unref( t1 ); + if( vips_image_write( t2, out ) ) { + g_object_unref( t2 ); + return( -1 ); + } + g_object_unref( t2 ); + + return( 0 ); +} + +int +im_conv_raw( VipsImage *in, VipsImage *out, INTMASK *mask ) +{ + im_error( "im_conv_raw", "no compat function" ); + return( -1 ); +} + +int +im_conv_f( VipsImage *in, VipsImage *out, DOUBLEMASK *mask ) +{ + VipsImage *t1, *t2; + + if( !(t1 = vips_image_new()) || + im_mask2vips( mask, t1 ) ) + return( -1 ); + if( vips_convf( in, &t2, t1, + NULL ) ) { + g_object_unref( t1 ); + return( -1 ); + } + g_object_unref( t1 ); + if( vips_image_write( t2, out ) ) { + g_object_unref( t2 ); + return( -1 ); + } + g_object_unref( t2 ); + + return( 0 ); +} + +int +im_aconvsep( VipsImage *in, VipsImage *out, DOUBLEMASK *mask, int n_layers ) +{ + VipsImage *t1, *t2; + + if( !(t1 = vips_image_new()) || + im_mask2vips( mask, t1 ) ) + return( -1 ); + if( vips_convasep( in, &t2, t1, + "layers", n_layers, + NULL ) ) { + g_object_unref( t1 ); + return( -1 ); + } + g_object_unref( t1 ); + if( vips_image_write( t2, out ) ) { + g_object_unref( t2 ); + return( -1 ); + } + g_object_unref( t2 ); + + return( 0 ); +} + +int +im_aconv( VipsImage *in, VipsImage *out, + DOUBLEMASK *mask, int n_layers, int cluster ) +{ + VipsImage *t1, *t2; + + if( !(t1 = vips_image_new()) || + im_mask2vips( mask, t1 ) ) + return( -1 ); + if( vips_conva( in, &t2, t1, + "layers", n_layers, + "cluster", cluster, + NULL ) ) { + g_object_unref( t1 ); + return( -1 ); + } + g_object_unref( t1 ); + if( vips_image_write( t2, out ) ) { + g_object_unref( t2 ); + return( -1 ); + } + g_object_unref( t2 ); + + return( 0 ); +} + +int +im_conv_f_raw( VipsImage *in, VipsImage *out, DOUBLEMASK *mask ) +{ + im_error( "im_conv_f_raw", "no compat function" ); + return( -1 ); +} + int im_addgnoise( IMAGE *in, IMAGE *out, double sigma ) { diff --git a/libvips/foreign/csv.c b/libvips/foreign/csv.c index 2e35cf16..f72d76f1 100644 --- a/libvips/foreign/csv.c +++ b/libvips/foreign/csv.c @@ -30,6 +30,8 @@ * 4/6/15 * - try to support DOS files under linux ... we have to look for \r\n * linebreaks + * 12/8/16 + * - allow missing offset in matrix header */ /* @@ -522,6 +524,8 @@ read_ascii_double( FILE *fp, const char whitemap[256], double *out ) /* Read the header. Two numbers for width and height, and two optional * numbers for scale and offset. + * + * We can have scale and no offset, in which case we assume offset = 0. */ static int vips__matrix_header( char *whitemap, FILE *fp, @@ -532,6 +536,11 @@ vips__matrix_header( char *whitemap, FILE *fp, int i; int ch; + /* Offset defaults to zero. + */ + header[2] = 1.0; + header[3] = 0.0; + for( i = 0; i < 4 && (ch = read_ascii_double( fp, whitemap, &header[i] )) == 0; i++ ) @@ -556,22 +565,17 @@ vips__matrix_header( char *whitemap, FILE *fp, "%s", _( "width / height out of range" ) ); return( -1 ); } - if( i == 3 ) { - vips_error( "mask2vips", "%s", _( "bad scale / offset" ) ); - return( -1 ); - } if( (ch = read_ascii_double( fp, whitemap, &d )) != '\n' ) { vips_error( "mask2vips", "%s", _( "extra chars in header" ) ); return( -1 ); } - if( i > 2 && - header[2] == 0.0 ) { + if( header[2] == 0.0 ) { vips_error( "mask2vips", "%s", _( "zero scale" ) ); return( -1 ); } - *scale = i > 2 ? header[2] : 1.0; - *offset = i > 2 ? header[3] : 0.0; + *scale = header[2]; + *offset = header[3]; skip_line( fp ); diff --git a/libvips/include/vips/convolution.h b/libvips/include/vips/convolution.h index 1240301e..a79df95b 100644 --- a/libvips/include/vips/convolution.h +++ b/libvips/include/vips/convolution.h @@ -31,8 +31,8 @@ */ -#ifndef IM_CONVOLUTION_H -#define IM_CONVOLUTION_H +#ifndef VIPS_CONVOLUTION_H +#define VIPS_CONVOLUTION_H #ifdef __cplusplus extern "C" { @@ -46,15 +46,23 @@ typedef enum { int vips_conv( VipsImage *in, VipsImage **out, VipsImage *mask, ... ) __attribute__((sentinel)); -int vips_compass( VipsImage *in, VipsImage **out, VipsImage *mask, ... ) +int vips_convf( VipsImage *in, VipsImage **out, VipsImage *mask, ... ) + __attribute__((sentinel)); +int vips_convi( VipsImage *in, VipsImage **out, VipsImage *mask, ... ) + __attribute__((sentinel)); +int vips_conva( VipsImage *in, VipsImage **out, VipsImage *mask, ... ) __attribute__((sentinel)); int vips_convsep( VipsImage *in, VipsImage **out, VipsImage *mask, ... ) __attribute__((sentinel)); +int vips_convasep( VipsImage *in, VipsImage **out, VipsImage *mask, ... ) + __attribute__((sentinel)); -int vips_sharpen( VipsImage *in, VipsImage **out, ... ) +int vips_compass( VipsImage *in, VipsImage **out, VipsImage *mask, ... ) __attribute__((sentinel)); int vips_gaussblur( VipsImage *in, VipsImage **out, double sigma, ... ) __attribute__((sentinel)); +int vips_sharpen( VipsImage *in, VipsImage **out, ... ) + __attribute__((sentinel)); int vips_spcor( VipsImage *in, VipsImage *ref, VipsImage **out, ... ) __attribute__((sentinel)); @@ -65,4 +73,4 @@ int vips_fastcor( VipsImage *in, VipsImage *ref, VipsImage **out, ... ) } #endif /*__cplusplus*/ -#endif /*IM_CONVOLUTION_H*/ +#endif /*VIPS_CONVOLUTION_H*/ diff --git a/libvips/include/vips/internal.h b/libvips/include/vips/internal.h index bc832d2a..6d32afa2 100644 --- a/libvips/include/vips/internal.h +++ b/libvips/include/vips/internal.h @@ -246,11 +246,12 @@ typedef struct _VipsImagePixels { gint64 npels; /* Number of pels calculated so far */ } VipsImagePixels; -int -vips__foreign_convert_saveable( VipsImage *in, VipsImage **ready, +int vips__foreign_convert_saveable( VipsImage *in, VipsImage **ready, VipsSaveable saveable, VipsBandFormat *format, VipsCoding *coding, VipsArrayDouble *background ); +int vips__image_intize( VipsImage *in, VipsImage **out ); + #ifdef __cplusplus } #endif /*__cplusplus*/ diff --git a/libvips/include/vips/vector.h b/libvips/include/vips/vector.h index 8ce714cb..88e7eb20 100644 --- a/libvips/include/vips/vector.h +++ b/libvips/include/vips/vector.h @@ -136,6 +136,8 @@ void vips_executor_set_array( VipsExecutor *executor, int var, void *value ); void vips_executor_run( VipsExecutor *executor ); +void vips_vector_to_fixed_point( double *in, int *out, int n, int scale ); + #ifdef __cplusplus } #endif /*__cplusplus*/ diff --git a/libvips/iofuncs/memory.c b/libvips/iofuncs/memory.c index 21a11741..ca5e2cca 100644 --- a/libvips/iofuncs/memory.c +++ b/libvips/iofuncs/memory.c @@ -109,7 +109,15 @@ static GMutex *vips_tracked_mutex = NULL; * @OBJ: allocate memory local to @OBJ, or %NULL for no auto-free * @T: type of thing to allocate * - * Returns: A pointer of type @T *, or %NULL on error. + * Allocate memory for a thing of type @T. The memory is not + * cleared. + * + * This macro cannot fail. See vips_tracked_malloc() if you are + * allocating large amounts of memory. + * + * See also: vips_malloc(). + * + * Returns: A pointer of type @T *. */ /** @@ -118,7 +126,15 @@ static GMutex *vips_tracked_mutex = NULL; * @N: number of @T 's to allocate * @T: type of thing to allocate * - * Returns: A pointer of type @T *, or %NULL on error. + * Allocate memory for an array of objects of type @T. The memory is not + * cleared. + * + * This macro cannot fail. See vips_tracked_malloc() if you are + * allocating large amounts of memory. + * + * See also: vips_malloc(). + * + * Returns: A pointer of type @T *. */ static void @@ -141,7 +157,7 @@ vips_malloc_cb( VipsObject *object, char *buf ) * * See also: vips_tracked_malloc(). * - * Returns: (transfer full): a pointer to the allocated memory + * Returns: (transfer full): a pointer to the allocated memory. */ void * vips_malloc( VipsObject *object, size_t size ) @@ -166,7 +182,7 @@ vips_malloc( VipsObject *object, size_t size ) * * g_strdup() a string. When @object is freed, the string will be freed for * you. If @object is %NULL, you need to - * free the memory explicitly with g_free(). + * free the memory yourself with g_free(). * * This function cannot fail. * diff --git a/libvips/iofuncs/vector.c b/libvips/iofuncs/vector.c index b96ac01f..f405c3ad 100644 --- a/libvips/iofuncs/vector.c +++ b/libvips/iofuncs/vector.c @@ -522,3 +522,84 @@ vips_executor_run( VipsExecutor *executor ) orc_executor_run( &executor->executor ); #endif /*HAVE_ORC*/ } + +/* Make a fixed-point version of a matrix. Each + * out[i] = rint(in[i] * adj_scale), where adj_scale is selected so that + * sum(out) = sum(in) * scale. + * + * Because of the vagaries of rint(), we can't just calc this, we have to + * iterate and converge on the best value for adj_scale. + */ +void +vips_vector_to_fixed_point( double *in, int *out, int n, int scale ) +{ + double fsum; + int i; + int target; + int sum; + double high; + double low; + double guess; + + fsum = 0.0; + for( i = 0; i < n; i++ ) + fsum += in[i]; + target = VIPS_RINT( fsum * scale ); + + /* As we rint() each scale element, we can get up to 0.5 error. + * Therefore, by the end of the mask, we can be off by up to n/2. Our + * high and low guesses are therefore n/2 either side of the obvious + * answer. + */ + high = scale + (n + 1) / 2; + low = scale - (n + 1) / 2; + + do { + guess = (high + low) / 2.0; + + for( i = 0; i < n; i++ ) + out[i] = VIPS_RINT( in[i] * guess ); + + sum = 0; + for( i = 0; i < n; i++ ) + sum += out[i]; + + if( sum == target ) + break; + if( sum < target ) + low = guess; + if( sum > target ) + high = guess; + + /* This will typically produce about 5 iterations. + */ + } while( high - low > 0.01 ); + + if( sum != target ) { + /* Spread the error out thinly over the whole array. For + * example, consider the matrix: + * + * 3 3 9 0 + * 1 1 1 + * 1 1 1 + * 1 1 1 + * + * being converted with scale = 64 (convi does this). We want + * to generate a mix of 7s and 8s. + */ + int each_error = (target - sum) / n; + int extra_error = (target - sum) % n; + + /* To share the residual error, we add or subtract 1 from the + * first abs(extra_error) elements. + */ + int direction = extra_error > 0 ? 1 : -1; + int n_elements = VIPS_ABS( extra_error ); + + for( i = 0; i < n; i++ ) + out[i] += each_error; + + for( i = 0; i < n_elements; i++ ) + out[i] += direction; + } +} diff --git a/libvips/resample/reducev.cpp b/libvips/resample/reducev.cpp index 760b3b0b..362ca861 100644 --- a/libvips/resample/reducev.cpp +++ b/libvips/resample/reducev.cpp @@ -702,65 +702,6 @@ vips_reducev_vector_gen( VipsRegion *out_region, void *vseq, return( 0 ); } -/* Make a fixed-point version of a mask. Each out[i] = rint(in[i] * adj_scale), - * where adj_scale is selected so that sum(out) = sum(in) * scale. - * - * Because of the vagaries of rint(), we can't just calc this, we have to - * iterate and converge on the best value for adj_scale. - */ -static void -vips_reducev_intize( double *in, int *out, int n, int scale ) -{ - double fsum; - int target; - int sum; - double high; - double low; - double guess; - - fsum = 0.0; - for( int i = 0; i < n; i++ ) - fsum += in[i]; - target = VIPS_RINT( fsum * scale ); - - /* As we rint() each scale element, we can get up to 0.5 error. - * Therefore, by the end of the mask, we can be off by up to n/2. Our - * high and low guesses are therefore n/2 either side of the obvious - * answer. - */ - high = scale + (n + 1) / 2; - low = scale - (n + 1) / 2; - - do { - guess = (high + low) / 2.0; - - for( int i = 0; i < n; i++ ) - out[i] = VIPS_RINT( in[i] * guess ); - - sum = 0; - for( int i = 0; i < n; i++ ) - sum += out[i]; - - if( sum == target ) - break; - if( sum < target ) - low = guess; - if( sum > target ) - high = guess; - - /* This will typically produce about 5 iterations. - */ - } while( high - low > 0.01 ); - - if( sum != target ) - /* We're as close as we can get ... add the remaining error to - * the centre element. Hopefully we'll get slight sharpness - * changes rather than slight brightness changes and it'll - * be less visible. - */ - out[n / 2] += target - sum; -} - static int vips_reducev_raw( VipsReducev *reducev, VipsImage *in ) { @@ -798,7 +739,7 @@ vips_reducev_raw( VipsReducev *reducev, VipsImage *in ) if( !reducev->matrixi[y] ) return( -1 ); - vips_reducev_intize( + vips_vector_to_fixed_point( reducev->matrixf[y], reducev->matrixi[y], reducev->n_point, VIPS_INTERPOLATE_SCALE ); } @@ -813,7 +754,7 @@ vips_reducev_raw( VipsReducev *reducev, VipsImage *in ) if( !reducev->matrixo[y] ) return( -1 ); - vips_reducev_intize( + vips_vector_to_fixed_point( reducev->matrixf[y], reducev->matrixo[y], reducev->n_point, 64 ); } diff --git a/test/test_convolution.py b/test/test_convolution.py index 03e62651..ea516db6 100755 --- a/test/test_convolution.py +++ b/test/test_convolution.py @@ -96,6 +96,12 @@ class TestConvolution(unittest.TestCase): for x, y in zip_expand(a, b): self.assertAlmostEqual(x, y, places = places, msg = msg) + # test a pair of things which can be lists for difference less than a + # threshold + def assertLessThreshold(self, a, b, diff): + for x, y in zip_expand(a, b): + self.assertLess(abs(x - y), diff) + def setUp(self): im = Vips.Image.mask_ideal(100, 100, 0.5, reject = True, optical = True) self.colour = im * [1, 2, 3] + [2, 3, 4] @@ -131,6 +137,26 @@ class TestConvolution(unittest.TestCase): true = conv(im, msk, 49, 49) self.assertAlmostEqualObjects(result, true) + # don't test conva, it's still not done + def dont_test_conva(self): + for im in self.all_images: + for msk in self.all_masks: + print("msk:") + msk.matrixprint() + print("im.bands = %s" % im.bands) + + convolved = im.conv(msk, precision = Vips.Precision.APPROXIMATE) + + result = convolved(25, 50) + true = conv(im, msk, 24, 49) + print("result = %s, true = %s" % (result, true)) + self.assertLessThreshold(result, true, 5) + + result = convolved(50, 50) + true = conv(im, msk, 49, 49) + print("result = %s, true = %s" % (result, true)) + self.assertLessThreshold(result, true, 5) + def test_compass(self): for im in self.all_images: for msk in self.all_masks: diff --git a/test/test_create.py b/test/test_create.py index 861ee07d..9641efa8 100755 --- a/test/test_create.py +++ b/test/test_create.py @@ -446,7 +446,7 @@ class TestCreate(unittest.TestCase): self.assertEqual(im.width, 512) self.assertEqual(im.height, 512) self.assertEqual(im.bands, 1) - self.assertEqual(im.format, Vips.BandFormat.INT) + self.assertEqual(im.format, Vips.BandFormat.FLOAT) def test_perlin(self): im = Vips.Image.perlin(512, 512)