diff --git a/libvips/convolution/Makefile.am b/libvips/convolution/Makefile.am
index a0d2e1ab..88bb3e2e 100644
--- a/libvips/convolution/Makefile.am
+++ b/libvips/convolution/Makefile.am
@@ -15,7 +15,6 @@ libconvolution_la_SOURCES = \
 	sharpen.c \
 	gaussblur.c \
 	im_aconv.c \
-	im_aconvsep.c \
-	im_conv.c 
+	im_aconvsep.c 
 
 AM_CPPFLAGS = -I${top_srcdir}/libvips/include @VIPS_CFLAGS@ @VIPS_INCLUDES@ 
diff --git a/libvips/convolution/im_conv.c b/libvips/convolution/im_conv.c
deleted file mode 100644
index 234739b5..00000000
--- a/libvips/convolution/im_conv.c
+++ /dev/null
@@ -1,1097 +0,0 @@
-/* im_conv
- *
- * Copyright: 1990, N. Dessipris.
- *
- * Author: Nicos Dessipris & Kirk Martinez
- * Written on: 29/04/1991
- * Modified on: 19/05/1991
- * 8/7/93 JC
- *      - adapted for partial v2
- *      - memory leaks fixed
- *      - ANSIfied
- * 23/7/93 JC
- *	- inner loop unrolled with a switch - 25% speed-up!
- * 13/12/93 JC
- *	- tiny rounding error removed
- * 7/10/94 JC
- *	- new IM_ARRAY() macro
- *	- various simplifications
- *	- evalend callback added
- * 1/2/95 JC
- *	- use of IM_REGION_ADDR() updated
- *	- output size was incorrect! see comment below
- *	- bug with large non-square matricies fixed too
- *	- uses new im_embed() function
- * 13/7/98 JC
- *	- wierd bug ... im_free_imask is no longer directly called for close
- *	  callback, caused SIGKILL on solaris 2.6 ... linker bug?
- * 9/3/01 JC
- *	- reworked and simplified, about 10% faster
- *	- slightly better range clipping
- * 27/7/01 JC
- *	- reject masks with scale == 0
- * 7/4/04 
- *	- im_conv() now uses im_embed() with edge stretching on the input, not
- *	  the output
- *	- sets Xoffset / Yoffset
- * 11/11/05
- * 	- simpler inner loop avoids gcc4 bug 
- * 7/11/07
- * 	- new evalstart/end callbacks
- * 12/5/08
- * 	- int rounding was +1 too much, argh
- * 	- only rebuild the buffer offsets if bpl changes
- * 5/4/09
- * 	- tiny speedups and cleanups
- * 	- add restrict, though it doesn't seem to help gcc
- * 12/11/09
- * 	- only check for non-zero elements once
- * 	- add mask-all-zero check
- * 	- cleanups
- * 3/2/10
- * 	- gtkdoc
- * 	- more cleanups
- * 23/08/10
- * 	- add a special case for 3x3 masks, about 20% faster
- * 1/10/10
- * 	- support complex (just double the bands)
- * 18/10/10
- * 	- add experimental Orc path
- * 29/10/10
- * 	- use VipsVector
- * 	- get rid of im_convsep(), just call this twice, no longer worth
- * 	  keeping two versions
- * 8/11/10
- * 	- add array tiling
- * 9/5/11
- * 	- argh typo in overflow estimation could cause errors
- * 15/10/11 Nicolas
- * 	- handle offset correctly in seperable convolutions
- * 26/1/16 Lovell Fuller
- * 	- remove Duff for a 25% speedup
- */
-
-/*
-
-    This file is part of VIPS.
-    
-    VIPS is free software; you can redistribute it and/or modify
-    it under the terms of the GNU Lesser General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU Lesser General Public License for more details.
-
-    You should have received a copy of the GNU Lesser General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-    02110-1301  USA
-
- */
-
-/*
-
-    These files are distributed with VIPS - http://www.vips.ecs.soton.ac.uk
-
- */
-
-/* Show sample pixels as they are transformed.
-#define DEBUG_PIXELS
- */
-
-/*
-#define DEBUG
- */
-
-/* 
-
- 	TODO
-
-	- tried 8-bit data with a 32-bit intermediate, but it was only 
-	  slightly faster than C
-
-	  16-bit data would be even slower, no speed advantage
-
-	- make up a signed 8-bit code path?
-
-	- don't use divluw, it's insanely slow, instead scale coefficients so 
-	  that we can just do >>8 at the end
-
- */
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif /*HAVE_CONFIG_H*/
-#include <vips/intl.h>
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <limits.h>
-
-#include <vips/vips.h>
-#include <vips/vector.h>
-
-/* We can't run more than this many passes. Larger than this and we
- * fall back to C.
- */
-#define MAX_PASS (10)
-
-/* A pass with a vector. 
- */
-typedef struct {
-	int first;		/* The index of the first mask coff we use */
-	int last;		/* The index of the last mask coff we use */
-
-	int r;			/* Set previous result in this var */
-
-        /* The code we generate for this section of this mask. 
-	 */
-        VipsVector *vector;
-} Pass;
-
-/* Our parameters ... we take a copy of the mask argument, plus we make a
- * smaller version with the zeros squeezed out. 
- */
-typedef struct {
-	IMAGE *in;
-	IMAGE *out;
-	INTMASK *mask;		/* Copy of mask arg */
-
-	int nnz;		/* Number of non-zero mask elements */
-	int *coeff;		/* Array of non-zero mask coefficients */
-	int *coeff_pos;		/* Index of each nnz element in mask->coeff */
-
-	int underflow;		/* Global underflow/overflow counts */
-	int overflow;
-
-	/* The convolver we generate for this mask. We have to split the
-	 * convolve and clip into two phases.
-	 */
-	int n_pass;	
-	Pass pass[MAX_PASS];
-	int s1;			/* Input to clip */
-	VipsVector *clip;
-} Conv;
-
-static void
-conv_vector_free( Conv *conv )
-{
-	int i;
-
-	for( i = 0; i < conv->n_pass; i++ )
-		IM_FREEF( vips_vector_free, conv->pass[i].vector );
-	conv->n_pass = 0;
-
-	IM_FREEF( vips_vector_free, conv->clip );
-}
-
-static int
-conv_close( Conv *conv )
-{
-	IM_FREEF( im_free_imask, conv->mask );
-	conv_vector_free( conv );
-
-        return( 0 );
-}
-
-static int
-conv_evalstart( Conv *conv )
-{
-	/* Reset underflow/overflow count.
-	 *
-	 * This often doesn't get called until eval has already finished, so
-	 * resetting here just wipes all records. 
-	 *
-	conv->overflow = 0;
-	conv->underflow = 0;
-	 *
-	 */
-
-        return( 0 );
-}
-
-static int
-conv_evalend( Conv *conv )
-{
-	if( conv->overflow ) 
-		vips_info( "im_conv", 
-			_( "%d overflows detected" ), conv->overflow ); 
-	if( conv->underflow )
-		vips_info( "im_conv", 
-			_( "%d underflows detected" ), conv->underflow );
-
-        return( 0 );
-}
-
-#define TEMP( N, S ) vips_vector_temporary( v, N, S )
-#define SCANLINE( N, P, S ) vips_vector_source_scanline( v, N, P, S )
-#define CONST( N, V, S ) vips_vector_constant( v, N, V, S )
-#define ASM2( OP, A, B ) vips_vector_asm2( v, OP, A, B )
-#define ASM3( OP, A, B, C ) vips_vector_asm3( v, OP, A, B, C )
-
-/* Generate code for a section of the mask.
- *
- * 0 for success, -1 on error.
- */
-static int
-conv_compile_convolution_u8s16_section( Pass *pass, 
-	Conv *conv, gboolean first_pass )
-{
-	INTMASK *mask = conv->mask;
-	const int n_mask = mask->xsize * mask->ysize; 
-
-	int i;
-	VipsVector *v;
-	char zero[256];
-	char offset[256];
-	char source[256];
-	char coeff[256];
-
-	pass->vector = v = vips_vector_new( "conv", 2 );
-
-	/* The value we fetch from the image, the product with the matrix
-	 * value, the accumulated sum.
-	 */
-	TEMP( "value", 1 );
-	TEMP( "product", 2 );
-	TEMP( "sum", 2 );
-
-	/* Init the sum. If this is the first pass, it's a constant. If this
-	 * is a later pass, we have to init the sum from the result 
-	 * of the previous pass. 
-	 */
-	if( first_pass ) {
-		CONST( zero, 0, 2 );
-		ASM2( "copyw", "sum", zero );
-	}
-	else {
-		/* "r" is the result of the previous pass. 
-		 */
-		pass->r = vips_vector_source_name( v, "r", 2 );
-		ASM2( "loadw", "sum", "r" );
-	}
-
-	for( i = pass->first; i < n_mask; i++ ) {
-		int x = i % mask->xsize;
-		int y = i / mask->xsize;
-
-		if( !mask->coeff[i] )
-			/* Exclude zero elements.
-			 */
-			continue;
-
-		/* The source. sl0 is the first scanline in the mask.
-		 */
-		SCANLINE( source, y, 1 );
-
-		/* The offset, only for non-first-columns though.
-		 */
-		if( x > 0 ) 
-			CONST( offset, conv->in->Bands * x, 1 );
-
-		/* The coefficient. Only for non-1 coeffs though, we skip the
-		 * mul for them.
-		 *
-		 * We need to do 8-bit unsigned pixel * signed mask, so we
-		 * have to cast the pixel up to 16-bit then do a mult against a
-		 * 16-bit constant. We know the result will fit in the bottom
-		 * 16 bits.
-		 */
-		if( mask->coeff[i] != 1 ) 
-			CONST( coeff, mask->coeff[i], 2 );
-
-		/* Two factors: 
-		 * - element is in the first column, ie. has a zero offset
-		 * - mask coeff is 1, ie. we can skip the multiply
-		 *
-		 * We could combine some of these cases, but it's simpler
-		 * and safer to spell them all out.
-		 */
-		if( x == 0 ) 
-			ASM2( "loadb", "value", source );
-		else 
-			ASM3( "loadoffb", "value", source, offset );
-
-		ASM2( "convubw", "product", "value" );
-
-		if( mask->coeff[i] != 1 ) 
-			ASM3( "mullw", "product", "product", coeff );
-
-		ASM3( "addssw", "sum", "sum", "product" );
-
-		if( vips_vector_full( v ) )
-			break;
-	}
-
-	pass->last = i;
-
-	ASM2( "copyw", "d1", "sum" );
-
-#ifdef DEBUG
-	vips_vector_print( v );
-	printf( "compiling ...\n" );
-#endif /*DEBUG*/
-
-	if( !vips_vector_compile( v ) ) 
-		return( -1 );
-
-	return( 0 );
-}
-
-/* Generate the convolution pass for u8 data with an s16 accumulator.
- *
- * 0 for success, -1 on error.
- */
-static int
-conv_compile_convolution_u8s16( Conv *conv )
-{
-	INTMASK *mask = conv->mask;
-	const int n_mask = mask->xsize * mask->ysize; 
-
-	double min, max;
-	int i;
-
-	if( conv->in->BandFmt != IM_BANDFMT_UCHAR )
-		return( -1 );
-
-	/* Can the accumulator overflow or underflow at any stage? Since
-	 * matrix elements are signed, we need to calculate a running 
-	 * possible min and max.
-	 */
-	min = 0;
-	max = 0;
-	for( i = 0; i < n_mask; i++ ) {
-		int v = 255 * mask->coeff[i];
-
-		min = IM_MIN( min, min + v );
-		max = IM_MAX( max, max + v );
-
-		if( max > SHRT_MAX )
-			return( -1 );
-		if( min < SHRT_MIN )
-			return( -1 );
-	}
-
-	/* Generate passes until we've used up the whole mask.
-	 */
-	for( i = 0;;) {
-		Pass *pass;
-
-		/* Skip any zero coefficients at the start of the mask 
-		 * region.
-		 */
-		for( ; i < n_mask && !mask->coeff[i]; i++ )
-			;
-		if( i == n_mask )
-			break;
-
-		/* Allocate space for another pass.
-		 */
-		if( conv->n_pass == MAX_PASS ) 
-			return( -1 );
-		pass = &conv->pass[conv->n_pass];
-		conv->n_pass += 1;
-
-		pass->first = i;
-		pass->last = i;
-		pass->r = -1;
-
-		if( conv_compile_convolution_u8s16_section( pass, 
-			conv, conv->n_pass == 1 ) )
-			return( -1 );
-		i = pass->last + 1;
-
-#ifdef DEBUG
-		printf( "conv_compile_convolution_u8s16: "
-			"first = %d, last = %d\n", 
-			pass->first, pass->last ); 
-#endif /*DEBUG*/
-
-		if( i >= n_mask )
-			break;
-	}
-
-	return( 0 );
-}
-
-/* Generate the program that does (pass + rounding) / scale + offset 
- * from a s16 intermediate back to a u8 output.
- */
-static int
-conv_compile_scale_s16u8( Conv *conv )
-{
-	INTMASK *mask = conv->mask;
-
-	VipsVector *v;
-	char scale[256];
-	char offset[256];
-	char zero[256];
-
-	/* Scale and offset must be in range.
-	 */
-	if( mask->scale > 255 ||
-		mask->scale < 0 ||
-		mask->offset > SHRT_MAX ||
-		mask->offset < SHRT_MIN ) 
-		return( -1 );
-
-	conv->clip = v = vips_vector_new( "clip", 1 );
-	conv->s1 = vips_vector_source_name( v, "s1", 2 );
-
-	TEMP( "t1", 2 );
-	TEMP( "t2", 2 );
-
-	/* We can only do unsigned divide, so we must add the offset before
-	 * dividing by the scale. We need to scale the offset up.
-	 *
-	 * We can build the rounding into the offset as well.
-	 * You might think this should be (scale + 1) / 2, but then we'd be 
-	 * adding one for scale == 1.
-	 */
-	CONST( scale, mask->scale, 1 );
-	CONST( offset, mask->offset * mask->scale + mask->scale / 2, 2 );
-	CONST( zero, 0, 2 );
-
-	/* Offset and scale. 
-	 */
-	ASM3( "addssw", "t1", "s1", offset );
-
-	/* We need to convert the signed result of the
-	 * offset to unsigned for the div, ie. we want to set anything <0 to 0.
-	 */
-	ASM3( "cmpgtsw", "t2", "t1", zero );
-	ASM3( "andw", "t1", "t1", "t2" );
-
-	ASM3( "divluw", "t1", "t1", scale );
-	ASM2( "convuuswb", "d1", "t1" );
-
-	if( !vips_vector_compile( v ) ) 
-		return( -1 );
-
-#ifdef DEBUG
-	vips_vector_print( v );
-#endif /*DEBUG*/
-
-	return( 0 );
-}
-
-static Conv *
-conv_new( IMAGE *in, IMAGE *out, INTMASK *mask )
-{
-        Conv *conv = IM_NEW( out, Conv );
-	const int n_mask = mask->xsize * mask->ysize;
-        int i;
-
-        if( !conv )
-                return( NULL );
-
-        conv->in = in;
-        conv->out = out;
-        conv->mask = NULL;
-        conv->nnz = 0;
-        conv->coeff = NULL;
-        conv->coeff_pos = NULL;
-        conv->underflow = 0;
-        conv->overflow = 0;
-
-	conv->n_pass = 0;
-	conv->s1 = -1;
-	conv->clip = NULL;
-
-        if( im_add_close_callback( out, 
-		(im_callback_fn) conv_close, conv, NULL ) ||
-		im_add_close_callback( out, 
-			(im_callback_fn) conv_evalstart, conv, NULL ) ||
-		im_add_close_callback( out, 
-			(im_callback_fn) conv_evalend, conv, NULL ) ||
-        	!(conv->coeff = IM_ARRAY( out, n_mask, int )) ||
-        	!(conv->coeff_pos = IM_ARRAY( out, n_mask, int )) ||
-        	!(conv->mask = im_dup_imask( mask, "conv_mask" )) )
-                return( NULL );
-
-        /* Find non-zero mask elements.
-         */
-        for( i = 0; i < n_mask; i++ )
-                if( mask->coeff[i] ) {
-			conv->coeff[conv->nnz] = mask->coeff[i];
-			conv->coeff_pos[conv->nnz] = i;
-			conv->nnz += 1;
-		}
-
-	/* Was the whole mask zero? We must have at least 1 element in there:
-	 * set it to zero.
-	 */
-	if( conv->nnz == 0 ) {
-		conv->coeff[0] = mask->coeff[0];
-		conv->coeff_pos[0] = 0;
-		conv->nnz = 1;
-	}
-
-	/* Generate code for this mask / image, if possible.
-	 */
-	if( vips_vector_isenabled() ) {
-		if( conv_compile_convolution_u8s16( conv ) ||
-			conv_compile_scale_s16u8( conv ) ) 
-			conv_vector_free( conv );
-	}
-
-        return( conv );
-}
-
-/* Our sequence value.
- */
-typedef struct {
-	Conv *conv;
-	REGION *ir;		/* Input region */
-
-	int *offsets;		/* Offsets for each non-zero matrix element */
-	VipsPel **pts;		/* Per-non-zero mask element pointers */
-
-	int underflow;		/* Underflow/overflow counts */
-	int overflow;
-
-	int last_bpl;		/* Avoid recalcing offsets, if we can */
-
-	/* We need a pair of intermediate buffers to keep the results of each
-	 * conv pass in. 
-	 */
-	void *t1;
-	void *t2;
-} ConvSequence;
-
-/* Free a sequence value.
- */
-static int
-conv_stop( void *vseq, void *a, void *b )
-{
-	ConvSequence *seq = (ConvSequence *) vseq;
-	Conv *conv = (Conv *) b;
-
-	/* Add local under/over counts to global counts.
-	 */
-	conv->overflow += seq->overflow;
-	conv->underflow += seq->underflow;
-
-	IM_FREEF( im_region_free, seq->ir );
-	IM_FREE( seq->t1 );
-	IM_FREE( seq->t2 );
-
-	return( 0 );
-}
-
-/* Convolution start function.
- */
-static void *
-conv_start( IMAGE *out, void *a, void *b )
-{
-	IMAGE *in = (IMAGE *) a;
-	Conv *conv = (Conv *) b;
-
-	ConvSequence *seq;
-
-	if( !(seq = IM_NEW( out, ConvSequence )) )
-		return( NULL );
-
-	/* Init!
-	 */
-	seq->conv = conv;
-	seq->ir = NULL;
-	seq->pts = NULL;
-	seq->underflow = 0;
-	seq->overflow = 0;
-	seq->last_bpl = -1;
-	seq->t1 = NULL;
-	seq->t2 = NULL;
-
-	/* Attach region and arrays.
-	 */
-	seq->ir = im_region_create( in );
-	seq->offsets = IM_ARRAY( out, conv->nnz, int );
-	seq->pts = IM_ARRAY( out, conv->nnz, VipsPel * );
-	if( !seq->ir || !seq->offsets || !seq->pts ) {
-		conv_stop( seq, in, conv );
-		return( NULL );
-	}
-
-	if( vips_vector_isenabled() && 
-		conv->n_pass ) {
-		seq->t1 = IM_ARRAY( NULL, IM_IMAGE_N_ELEMENTS( in ), short );
-		seq->t2 = IM_ARRAY( NULL, IM_IMAGE_N_ELEMENTS( in ), short );
-
-		if( !seq->t1 || !seq->t2 ) {
-			conv_stop( seq, in, conv );
-			return( NULL );
-		}
-	}
-
-	return( seq );
-}
-
-/* INT inner loops.
- */
-#define CONV_INT( TYPE, IM_CLIP ) { \
-	TYPE ** restrict p = (TYPE **) seq->pts; \
-	TYPE * restrict q = (TYPE *) IM_REGION_ADDR( or, le, y ); \
-	\
-	for( x = 0; x < sz; x++ ) {  \
-		int sum; \
-		int i; \
-		\
-		sum = 0; \
-		for ( i = 0; i < nnz; i++ ) \
-			sum += t[i] * p[i][x]; \
-		\
-		sum = ((sum + rounding) / mask->scale) + mask->offset; \
-		\
-		IM_CLIP; \
-		\
-		q[x] = sum;  \
-	}  \
-} 
-
-/* FLOAT inner loops.
- */
-#define CONV_FLOAT( TYPE ) { \
-	TYPE ** restrict p = (TYPE **) seq->pts; \
-	TYPE * restrict q = (TYPE *) IM_REGION_ADDR( or, le, y ); \
-	\
-	for( x = 0; x < sz; x++ ) {  \
-		double sum; \
-		int i; \
-		\
-		sum = 0; \
-		for ( i = 0; i < nnz; i++ ) \
-			sum += t[i] * p[i][x]; \
- 		\
-		sum = (sum / mask->scale) + mask->offset; \
-		\
-		q[x] = sum;  \
-	}  \
-} 
-
-/* Convolve! See below for the special-case 3x3 path.
- */
-static int
-conv_gen( REGION *or, void *vseq, void *a, void *b )
-{
-	ConvSequence *seq = (ConvSequence *) vseq;
-	IMAGE *in = (IMAGE *) a;
-	Conv *conv = (Conv *) b;
-	REGION *ir = seq->ir;
-	INTMASK *mask = conv->mask;
-	int * restrict t = conv->coeff; 
-	const int nnz = conv->nnz;
-
-	/* You might think this should be (scale + 1) / 2, but then we'd be 
-	 * adding one for scale == 1.
-	 */
-	int rounding = mask->scale / 2;
-
-	Rect *r = &or->valid;
-	Rect s;
-	int le = r->left;
-	int to = r->top;
-	int bo = IM_RECT_BOTTOM( r );
-	int sz = IM_REGION_N_ELEMENTS( or ) * (im_iscomplex( in ) ? 2 : 1);
-
-	int x, y, z, i;
-
-	/* Prepare the section of the input image we need. A little larger
-	 * than the section of the output image we are producing.
-	 */
-	s = *r;
-	s.width += mask->xsize - 1;
-	s.height += mask->ysize - 1;
-	if( im_prepare( ir, &s ) )
-		return( -1 );
-
-	VIPS_GATE_START( "conv_gen: work" ); 
-
-        /* Fill offset array. Only do this if the bpl has changed since the 
-	 * previous im_prepare().
-	 */
-	if( seq->last_bpl != IM_REGION_LSKIP( ir ) ) {
-		seq->last_bpl = IM_REGION_LSKIP( ir );
-
-		for( i = 0; i < nnz; i++ ) {
-			z = conv->coeff_pos[i];
-			x = z % conv->mask->xsize;
-			y = z / conv->mask->xsize;
-
-			seq->offsets[i] = 
-				IM_REGION_ADDR( ir, x + le, y + to ) -
-				IM_REGION_ADDR( ir, le, to );
-		}
-	}
-
-	for( y = to; y < bo; y++ ) { 
-		/* Init pts for this line of PELs.
-		 */
-                for( z = 0; z < nnz; z++ )
-                        seq->pts[z] = seq->offsets[z] +  
-                                IM_REGION_ADDR( ir, le, y ); 
-
-		switch( in->BandFmt ) {
-		case IM_BANDFMT_UCHAR: 	
-			CONV_INT( unsigned char, IM_CLIP_UCHAR( sum, seq ) ); 
-			break;
-
-		case IM_BANDFMT_CHAR:   
-			CONV_INT( signed char, IM_CLIP_CHAR( sum, seq ) ); 
-			break;
-
-		case IM_BANDFMT_USHORT: 
-			CONV_INT( unsigned short, IM_CLIP_USHORT( sum, seq ) ); 
-			break;
-
-		case IM_BANDFMT_SHORT:  
-			CONV_INT( signed short, IM_CLIP_SHORT( sum, seq ) ); 
-			break;
-
-		case IM_BANDFMT_UINT:   
-			CONV_INT( unsigned int, IM_CLIP_NONE( sum, seq ) ); 
-			break;
-
-		case IM_BANDFMT_INT:    
-			CONV_INT( signed int, IM_CLIP_NONE( sum, seq ) ); 
-			break;
-
-		case IM_BANDFMT_FLOAT:  
-		case IM_BANDFMT_COMPLEX:  
-			CONV_FLOAT( float ); 
-			break;
-
-		case IM_BANDFMT_DOUBLE: 
-		case IM_BANDFMT_DPCOMPLEX:  
-			CONV_FLOAT( double ); 
-			break;
-
-		default:
-			g_assert_not_reached();
-		}
-	}
-
-	VIPS_GATE_STOP( "conv_gen: work" ); 
-
-	return( 0 );
-}
-
-/* INT inner loops.
- */
-#define CONV3x3_INT( TYPE, IM_CLIP ) { \
-	TYPE * restrict p0 = (TYPE *) IM_REGION_ADDR( ir, le, y ); \
-	TYPE * restrict p1 = (TYPE *) IM_REGION_ADDR( ir, le, y + 1 ); \
-	TYPE * restrict p2 = (TYPE *) IM_REGION_ADDR( ir, le, y + 2 ); \
-	TYPE * restrict q = (TYPE *) IM_REGION_ADDR( or, le, y ); \
-	\
-	for( x = 0; x < sz; x++ ) {  \
-		int sum; \
- 		\
-		sum = 0; \
-		sum += m[0] * p0[0]; \
-		sum += m[1] * p0[bands]; \
-		sum += m[2] * p0[bands * 2]; \
-		sum += m[3] * p1[0]; \
-		sum += m[4] * p1[bands]; \
-		sum += m[5] * p1[bands * 2]; \
-		sum += m[6] * p2[0]; \
-		sum += m[7] * p2[bands]; \
-		sum += m[8] * p2[bands * 2]; \
-		\
-		p0 += 1; \
-		p1 += 1; \
-		p2 += 1; \
- 		\
-		sum = ((sum + rounding) / mask->scale) + mask->offset; \
- 		\
-		IM_CLIP; \
-		\
-		q[x] = sum;  \
-	}  \
-} 
-
-/* FLOAT inner loops.
- */
-#define CONV3x3_FLOAT( TYPE ) { \
-	TYPE * restrict p0 = (TYPE *) IM_REGION_ADDR( ir, le, y ); \
-	TYPE * restrict p1 = (TYPE *) IM_REGION_ADDR( ir, le, y + 1 ); \
-	TYPE * restrict p2 = (TYPE *) IM_REGION_ADDR( ir, le, y + 2 ); \
-	TYPE * restrict q = (TYPE *) IM_REGION_ADDR( or, le, y ); \
-	\
-	for( x = 0; x < sz; x++ ) {  \
-		double sum; \
- 		\
-		sum = 0; \
-		sum += m[0] * p0[0]; \
-		sum += m[1] * p0[bands]; \
-		sum += m[2] * p0[bands * 2]; \
-		sum += m[3] * p1[0]; \
-		sum += m[4] * p1[bands]; \
-		sum += m[5] * p1[bands * 2]; \
-		sum += m[6] * p2[0]; \
-		sum += m[7] * p2[bands]; \
-		sum += m[8] * p2[bands * 2]; \
- 		\
-		p0 += 1; \
-		p1 += 1; \
-		p2 += 1; \
- 		\
-		sum = (sum / mask->scale) + mask->offset; \
-		\
-		q[x] = sum;  \
-	}  \
-} 
-
-/* 3x3 masks are very common, so we have a special path for them. This is
- * about 20% faster than the general convolver above.
- */
-static int
-conv3x3_gen( REGION *or, void *vseq, void *a, void *b )
-{
-	ConvSequence *seq = (ConvSequence *) vseq;
-	IMAGE *in = (IMAGE *) a;
-	Conv *conv = (Conv *) b;
-	REGION *ir = seq->ir;
-	INTMASK *mask = conv->mask;
-	int * restrict m = mask->coeff; 
-
-	/* You might think this should be (scale + 1) / 2, but then we'd be 
-	 * adding one for scale == 1.
-	 */
-	int rounding = mask->scale / 2;
-
-	Rect *r = &or->valid;
-	int le = r->left;
-	int to = r->top;
-	int bo = IM_RECT_BOTTOM( r );
-	int sz = IM_REGION_N_ELEMENTS( or ) * (im_iscomplex( in ) ? 2 : 1);
-	int bands = in->Bands;
-
-	Rect s;
-	int x, y;
-
-	/* Prepare the section of the input image we need. A little larger
-	 * than the section of the output image we are producing.
-	 */
-	s = *r;
-	s.width += 2;
-	s.height += 2;
-	if( im_prepare( ir, &s ) )
-		return( -1 );
-
-	VIPS_GATE_START( "conv3x3_gen: work" ); 
-
-	for( y = to; y < bo; y++ ) { 
-		switch( in->BandFmt ) {
-		case IM_BANDFMT_UCHAR: 	
-			CONV3x3_INT( unsigned char, 
-				IM_CLIP_UCHAR( sum, seq ) ); 
-			break;
-
-		case IM_BANDFMT_CHAR:   
-			CONV3x3_INT( signed char, 
-				IM_CLIP_CHAR( sum, seq ) ); 
-			break;
-
-		case IM_BANDFMT_USHORT: 
-			CONV3x3_INT( unsigned short, 
-				IM_CLIP_USHORT( sum, seq ) ); 
-			break;
-
-		case IM_BANDFMT_SHORT:  
-			CONV3x3_INT( signed short, 
-				IM_CLIP_SHORT( sum, seq ) ); 
-			break;
-
-		case IM_BANDFMT_UINT:   
-			CONV3x3_INT( unsigned int, 
-				IM_CLIP_NONE( sum, seq ) ); 
-			break;
-
-		case IM_BANDFMT_INT:    
-			CONV3x3_INT( signed int, 
-				IM_CLIP_NONE( sum, seq ) ); 
-			break;
-
-		case IM_BANDFMT_FLOAT:  
-		case IM_BANDFMT_COMPLEX:  
-			CONV3x3_FLOAT( float ); 
-			break;
-
-		case IM_BANDFMT_DOUBLE: 
-		case IM_BANDFMT_DPCOMPLEX: 
-			CONV3x3_FLOAT( double ); 
-			break;
-
-		default:
-			g_assert_not_reached();
-		}
-	}
-
-	VIPS_GATE_STOP( "conv3x3_gen: work" ); 
-
-	return( 0 );
-}
-
-/* The VipsVector codepath.
- */
-static int
-convvec_gen( REGION *or, void *vseq, void *a, void *b )
-{
-	ConvSequence *seq = (ConvSequence *) vseq;
-	IMAGE *in = (IMAGE *) a;
-	Conv *conv = (Conv *) b;
-	INTMASK *mask = conv->mask;
-	REGION *ir = seq->ir;
-
-	Rect *r = &or->valid;
-	int sz = IM_REGION_N_ELEMENTS( or ) * (im_iscomplex( in ) ? 2 : 1);
-
-	Rect s;
-	int j, y;
-	VipsExecutor convolve[MAX_PASS];
-	VipsExecutor clip;
-
-	/* Prepare the section of the input image we need. A little larger
-	 * than the section of the output image we are producing.
-	 */
-	s = *r;
-	s.width += mask->xsize - 1;
-	s.height += mask->ysize - 1;
-	if( im_prepare( ir, &s ) )
-		return( -1 );
-
-	VIPS_GATE_START( "convvec_gen: work" ); 
-
-	for( j = 0; j < conv->n_pass; j++ ) 
-		vips_executor_set_program( &convolve[j], 
-			conv->pass[j].vector, sz );
-	vips_executor_set_program( &clip, conv->clip, sz );
-
-	for( y = 0; y < r->height; y++ ) { 
-#ifdef DEBUG_PIXELS
-{
-		int h, v;
-
-		printf( "before convolve: %d, %d\n", r->left, r->top + y );
-		for( v = 0; v < mask->ysize; v++ ) {
-			for( h = 0; h < mask->xsize; h++ )
-				printf( "%3d ", *IM_REGION_ADDR( ir, 
-					r->left + h, r->top + y + v ) );
-			printf( "\n" );
-		}
-}
-#endif /*DEBUG_PIXELS*/
-
-		for( j = 0; j < conv->n_pass; j++ ) {
-			/* We always read from t1 and write to t2.
-			 */
-			vips_executor_set_scanline( &convolve[j], 
-				ir, r->left, r->top + y );
-			vips_executor_set_array( &convolve[j],
-				conv->pass[j].r, seq->t1 );
-			vips_executor_set_destination( &convolve[j], seq->t2 );
-			vips_executor_run( &convolve[j] );
-
-			IM_SWAP( void *, seq->t1, seq->t2 );
-		}
-
-#ifdef DEBUG_PIXELS
-		printf( "before clip: %d\n", ((signed short *) seq->t1)[0] );
-#endif /*DEBUG_PIXELS*/
-
-		vips_executor_set_array( &clip, conv->s1, seq->t1 );
-		vips_executor_set_destination( &clip, 
-			IM_REGION_ADDR( or, r->left, r->top + y ) );
-		vips_executor_run( &clip );
-
-#ifdef DEBUG_PIXELS
-		printf( "after clip: %d\n", 
-			*IM_REGION_ADDR( or, r->left, r->top + y ) );
-#endif /*DEBUG_PIXELS*/
-	}
-
-	VIPS_GATE_STOP( "convvec_gen: work" ); 
-
-	return( 0 );
-}
-
-int
-im_conv_raw( IMAGE *in, IMAGE *out, INTMASK *mask )
-{
-	Conv *conv;
-	im_generate_fn generate;
-
-#ifdef DEBUG
-	printf( "im_conv_raw: starting with matrix:\n" );
-	im_print_imask( mask );
-#endif /*DEBUG*/
-
-	/* Check parameters.
-	 */
-	if( im_piocheck( in, out ) ||
-		im_check_uncoded( "im_conv", in ) ||
-		im_check_imask( "im_conv", mask ) ) 
-		return( -1 );
-	if( mask->scale == 0 ) {
-		im_error( "im_conv", "%s", "mask scale must be non-zero" );
-		return( -1 );
-	}
-	if( !(conv = conv_new( in, out, mask )) )
-		return( -1 );
-
-	/* Prepare output. Consider a 7x7 mask and a 7x7 image --- the output
-	 * would be 1x1.
-	 */
-	if( im_cp_desc( out, in ) )
-		return( -1 );
-	out->Xsize -= mask->xsize - 1;
-	out->Ysize -= mask->ysize - 1;
-	if( out->Xsize <= 0 || out->Ysize <= 0 ) {
-		im_error( "im_conv", "%s", _( "image too small for mask" ) );
-		return( -1 );
-	}
-
-	if( conv->n_pass ) {
-		generate = convvec_gen;
-		vips_info( "im_conv_raw", "using vec path" ); 
-	}
-	else if( mask->xsize == 3 && mask->ysize == 3 ) {
-		generate = conv3x3_gen;
-		vips_info( "im_conv_raw", "using 3x3 path" ); 
-	}
-	else {
-		generate = conv_gen;
-		vips_info( "im_conv_raw", "using C path" ); 
-	}
-
-	if( im_demand_hint( out, IM_SMALLTILE, in, NULL ) ||
-		im_generate( out, conv_start, generate, conv_stop, in, conv ) )
-		return( -1 );
-
-	out->Xoffset = -mask->xsize / 2;
-	out->Yoffset = -mask->ysize / 2;
-
-	return( 0 );
-}
-
-int 
-im_conv( IMAGE *in, IMAGE *out, INTMASK *mask )
-{
-	IMAGE *t1 = im_open_local( out, "im_conv intermediate", "p" );
-
-	if( !t1 || 
-		im_embed( in, t1, 1, mask->xsize / 2, mask->ysize / 2, 
-			in->Xsize + mask->xsize - 1, 
-			in->Ysize + mask->ysize - 1 ) ||
-		im_conv_raw( t1, out, mask ) )
-		return( -1 );
-
-	out->Xoffset = 0;
-	out->Yoffset = 0;
-
-	return( 0 );
-}
diff --git a/libvips/deprecated/vips7compat.c b/libvips/deprecated/vips7compat.c
index 009178d8..cefff34d 100644
--- a/libvips/deprecated/vips7compat.c
+++ b/libvips/deprecated/vips7compat.c
@@ -2404,6 +2404,36 @@ im_convsep_f( IMAGE *in, IMAGE *out, DOUBLEMASK *mask )
 	return( 0 );
 }
 
+int 
+im_conv( VipsImage *in, VipsImage *out, INTMASK *mask )
+{
+	VipsImage *t1, *t2;
+
+	if( !(t1 = vips_image_new()) ||
+		im_imask2vips( mask, t1 ) )
+		return( -1 );
+	if( vips_convi( in, &t2, t1, 
+		NULL ) ) {
+		g_object_unref( t1 );
+		return( -1 );
+	}
+	g_object_unref( t1 );
+	if( vips_image_write( t2, out ) ) {
+		g_object_unref( t2 );
+		return( -1 );
+	}
+	g_object_unref( t2 );
+
+	return( 0 );
+}
+
+int
+im_conv_raw( VipsImage *in, VipsImage *out, INTMASK *mask )
+{
+	im_error( "im_conv_raw", "no compat function" );
+	return( -1 );
+}
+
 int 
 im_conv_f( VipsImage *in, VipsImage *out, DOUBLEMASK *mask )
 {