diff --git a/ChangeLog b/ChangeLog index b4324789..609a5217 100644 --- a/ChangeLog +++ b/ChangeLog @@ -39,6 +39,8 @@ - mask gtk-doc done - add cfitsio dependancy - add FITS reader +- land the vector branmch ... we have SSE erode/dilate/add/conv +- add IM_SWAP 12/5/10 started 7.22.2 - the conditional image of ifthenelse can be any format, a (!=0) is added if diff --git a/TODO b/TODO index da6942f8..e3b58958 100644 --- a/TODO +++ b/TODO @@ -1,4 +1,9 @@ -- add IM_SWAP, see orc branch util.h +- lab [100,0,0] -> srgb [255, 255, 254]? how odd + +- scrap im_convsep_f() ... just use im_conv_f() twice + +- make im_rank() use IM_SWAP() + - test fits reader more ... colour? diff --git a/configure.in b/configure.in index 6eabda21..4982de2a 100644 --- a/configure.in +++ b/configure.in @@ -375,6 +375,21 @@ if test x"$with_magick" != "xno"; then LIBS=$save_LIBS fi +# orc +AC_ARG_WITH([orc], + AS_HELP_STRING([--without-orc], [build without orc (default: test)])) + +if test x"$with_orc" != "xno"; then + # we use loadpw etc. + PKG_CHECK_MODULES(ORC, orc-0.4 >= 0.4.11, + [AC_DEFINE(HAVE_ORC,1,[define if you have orc-0.4.11 or later installed.]) + with_orc=yes + PACKAGES_USED="$PACKAGES_USED orc-0.4"], + [AC_MSG_WARN([orc-0.4.11 or later not found; disabling orc support]) + with_orc=no + ]) +fi + # lcms ... look for lcms2 first, it has better threading support AC_ARG_WITH([lcms], AS_HELP_STRING([--without-lcms], [build without lcms (default: test)])) @@ -570,14 +585,14 @@ fi # Gather all up for VIPS_CFLAGS, VIPS_INCLUDES, VIPS_LIBS and VIPS_CXX_LIBS # sort includes to get longer, more specific dirs first # helps, for example, selecting graphicsmagick over imagemagick -VIPS_CFLAGS=`for i in $VIPS_CFLAGS $GTHREAD_CFLAGS $REQUIRED_CFLAGS $PANGOFT2_CFLAGS $FFTW3_CFLAGS $MAGICK_CFLAGS $PNG_CFLAGS $EXIF_CFLAGS $MATIO_CFLAGS $CFITSIO_CFLAGS $OPENEXR_CFLAGS +VIPS_CFLAGS=`for i in $VIPS_CFLAGS $GTHREAD_CFLAGS $REQUIRED_CFLAGS $PANGOFT2_CFLAGS $FFTW3_CFLAGS $MAGICK_CFLAGS $PNG_CFLAGS $EXIF_CFLAGS $MATIO_CFLAGS $CFITSIO_CFLAGS $OPENEXR_CFLAGS $ORC_CFLAGS do echo $i done | sort -ru` VIPS_CFLAGS=`echo $VIPS_CFLAGS` VIPS_CFLAGS="$VIPS_DEBUG_FLAGS $VIPS_CFLAGS" VIPS_INCLUDES="$PNG_INCLUDES $TIFF_INCLUDES $ZIP_INCLUDES $JPEG_INCLUDES $FFTW_INCLUDES $LCMS_INCLUDES" -VIPS_LIBS="$MAGICK_LIBS $PNG_LIBS $TIFF_LIBS $ZIP_LIBS $JPEG_LIBS $GTHREAD_LIBS $REQUIRED_LIBS $PANGOFT2_LIBS $FFTW3_LIBS $FFTW_LIBS $LCMS_LIBS $OPENEXR_LIBS $CFITSIO_LIBS $MATIO_LIBS $EXIF_LIBS -lm" +VIPS_LIBS="$MAGICK_LIBS $PNG_LIBS $TIFF_LIBS $ZIP_LIBS $JPEG_LIBS $GTHREAD_LIBS $REQUIRED_LIBS $PANGOFT2_LIBS $FFTW3_LIBS $FFTW_LIBS $ORC_LIBS $LCMS_LIBS $OPENEXR_LIBS $CFITSIO_LIBS $MATIO_LIBS $EXIF_LIBS -lm" # need -lstdc++ for (eg.) the C++ format loaders VIPS_CXX_LIBS="-lstdc++" @@ -660,6 +675,8 @@ build docs with gtkdoc $enable_gtk_doc use fftw3 for FFT: $with_fftw3 Magick package: $with_magickpackage file import with libMagick: $with_magick +accelerate loops with orc: $with_orc + (needs orc-0.4.11 or later) ICC profile support with lcms: $with_lcms (version $with_lcms_ver) file import with OpenEXR: $with_OpenEXR file import with matio: $with_matio diff --git a/libvips/arithmetic/im_add.c b/libvips/arithmetic/im_add.c index bf94ed42..5252a754 100644 --- a/libvips/arithmetic/im_add.c +++ b/libvips/arithmetic/im_add.c @@ -30,6 +30,8 @@ * - more of operation scaffold moved inside * 25/7/10 * - remove oil support again ... we'll try Orc instead + * 29/10/10 + * - move to VipsVector for Orc support */ /* @@ -69,6 +71,7 @@ #include #include +#include #ifdef WITH_DMALLOC #include @@ -83,6 +86,8 @@ q[x] = p1[x] + p2[x]; \ } +static VipsVector *add_vectors[IM_BANDFMT_LAST] = { NULL }; + static void add_buffer( PEL **in, PEL *out, int width, IMAGE *im ) { @@ -91,32 +96,49 @@ add_buffer( PEL **in, PEL *out, int width, IMAGE *im ) const int sz = width * im->Bands * (vips_bandfmt_iscomplex( im->BandFmt ) ? 2 : 1); - int x; + if( vips_vector_get_enabled() && + add_vectors[im->BandFmt] ) { + VipsExecutor ex; - /* Add all input types. Keep types here in sync with bandfmt_add[] - * below. - */ - switch( im->BandFmt ) { - case IM_BANDFMT_UCHAR: LOOP( unsigned char, unsigned short ); break; - case IM_BANDFMT_CHAR: LOOP( signed char, signed short ); break; - case IM_BANDFMT_USHORT: LOOP( unsigned short, unsigned int ); break; - case IM_BANDFMT_SHORT: LOOP( signed short, signed int ); break; - case IM_BANDFMT_UINT: LOOP( unsigned int, unsigned int ); break; - case IM_BANDFMT_INT: LOOP( signed int, signed int ); break; + vips_executor_set_program( &ex, add_vectors[im->BandFmt], sz ); + vips_executor_set_source( &ex, 1, in[0] ); + vips_executor_set_source( &ex, 2, in[1] ); + vips_executor_set_destination( &ex, out ); - case IM_BANDFMT_FLOAT: - case IM_BANDFMT_COMPLEX: - LOOP( float, float ); - break; + vips_executor_run( &ex ); + } + else { + int x; - case IM_BANDFMT_DOUBLE: - case IM_BANDFMT_DPCOMPLEX: - LOOP( double, double ); - break; + /* Add all input types. Keep types here in sync with + * bandfmt_add[] below. + */ + switch( im->BandFmt ) { + case IM_BANDFMT_UCHAR: + LOOP( unsigned char, unsigned short ); break; + case IM_BANDFMT_CHAR: + LOOP( signed char, signed short ); break; + case IM_BANDFMT_USHORT: + LOOP( unsigned short, unsigned int ); break; + case IM_BANDFMT_SHORT: + LOOP( signed short, signed int ); break; + case IM_BANDFMT_UINT: + LOOP( unsigned int, unsigned int ); break; + case IM_BANDFMT_INT: + LOOP( signed int, signed int ); break; - default: - g_assert( 0 ); - } + case IM_BANDFMT_FLOAT: + case IM_BANDFMT_COMPLEX: + LOOP( float, float ); break; + + case IM_BANDFMT_DOUBLE: + case IM_BANDFMT_DPCOMPLEX: + LOOP( double, double ); break; + + default: + g_assert( 0 ); + } + } } /* Save a bit of typing. @@ -311,6 +333,106 @@ static int bandfmt_add[10] = { US, S, UI, I, UI, I, F, X, D, DX }; +void +im__init_programs( VipsVector *vectors[IM_BANDFMT_LAST], + int format_table[IM_BANDFMT_LAST] ) +{ + int fmt; + + for( fmt = 0; fmt < IM_BANDFMT_LAST; fmt++ ) { + int isize = im__sizeof_bandfmt[fmt]; + int osize = im__sizeof_bandfmt[format_table[fmt]]; + + char source[256]; + VipsVector *v; + + /* float and double are not handled (well) by ORC. + */ + if( fmt == IM_BANDFMT_DOUBLE || + fmt == IM_BANDFMT_FLOAT || + fmt == IM_BANDFMT_COMPLEX || + fmt == IM_BANDFMT_DPCOMPLEX ) + continue; + + v = vectors[fmt] = + vips_vector_new_ds( "binary arith", osize, isize ); + vips_vector_source( v, source, 2, isize ); + + vips_vector_temporary( v, "t1", osize ); + vips_vector_temporary( v, "t2", osize ); + } +} + +void +im__compile_programs( VipsVector *vectors[IM_BANDFMT_LAST] ) +{ + int fmt; + + for( fmt = 0; fmt < IM_BANDFMT_LAST; fmt++ ) { + if( vectors[fmt] && + !vips_vector_compile( vectors[fmt] ) ) + IM_FREEF( vips_vector_free, vectors[fmt] ); + } + +#ifdef DEBUG + printf( "im__compile_programs: " ); + for( fmt = 0; fmt < IM_BANDFMT_LAST; fmt++ ) + if( vectors[fmt] ) + printf( "%s ", im_BandFmt2char( fmt ) ); + printf( "\n" ); +#endif /*DEBUG*/ +} + +static void +build_programs( void ) +{ + static gboolean done = FALSE; + + VipsVector *v; + + if( done ) + return; + done = TRUE; + + im__init_programs( add_vectors, bandfmt_add ); + + v = add_vectors[IM_BANDFMT_UCHAR]; + vips_vector_asm2( v, "convubw", "t1", "s1" ); + vips_vector_asm2( v, "convubw", "t2", "s2" ); + vips_vector_asm3( v, "addw", "d1", "t1", "t2" ); + + v = add_vectors[IM_BANDFMT_CHAR]; + vips_vector_asm2( v, "convsbw", "t1", "s1" ); + vips_vector_asm2( v, "convsbw", "t2", "s2" ); + vips_vector_asm3( v, "addw", "d1", "t1", "t2" ); + + /* + + only the 8-bit ones have a useful speedup, with orc-0.4.11 + on a c2d anyway + + test this again at some point I guess + + v = add_vectors[IM_BANDFMT_USHORT]; + vips_vector_asm2( v, "convuwl", "t1", "s1" ); + vips_vector_asm2( v, "convuwl", "t2", "s2" ); + vips_vector_asm3( v, "addl", "d1", "t1", "t2" ); + + v = add_vectors[IM_BANDFMT_SHORT]; + vips_vector_asm2( v, "convswl", "t1", "s1" ); + vips_vector_asm2( v, "convswl", "t2", "s2" ); + vips_vector_asm3( v, "addl", "d1", "t1", "t2" ); + + v = add_vectors[IM_BANDFMT_UINT]; + vips_vector_asm3( v, "addl", "d1", "s1", "s2" ); + + v = add_vectors[IM_BANDFMT_INT]; + vips_vector_asm3( v, "addl", "d1", "s1", "s2" ); + */ + + im__compile_programs( add_vectors ); +} + /** * im_add: * @in1: input image @@ -387,6 +509,9 @@ static int bandfmt_add[10] = { * In other words, the output type is just large enough to hold the whole * range of possible values. * + * Operations on 8-bit images are performed using the processor's vector unit, + * if possible. Disable this with --vips-novector or IM_NOVECTOR. + * * See also: im_subtract(), im_lintra(). * * Returns: 0 on success, -1 on error @@ -394,6 +519,9 @@ static int bandfmt_add[10] = { int im_add( IMAGE *in1, IMAGE *in2, IMAGE *out ) { + if( vips_vector_get_enabled() ) + build_programs(); + return( im__arith_binary( "im_add", in1, in2, out, bandfmt_add, diff --git a/libvips/convolution/Makefile.am b/libvips/convolution/Makefile.am index c5c2b42c..6c40e01d 100644 --- a/libvips/convolution/Makefile.am +++ b/libvips/convolution/Makefile.am @@ -6,7 +6,6 @@ libconvolution_la_SOURCES = \ im_compass.c \ im_conv.c \ im_conv_f.c \ - im_convsep.c \ im_convsep_f.c \ im_contrast_surface.c \ im_fastcor.c \ diff --git a/libvips/convolution/im_conv.c b/libvips/convolution/im_conv.c index f9c2003d..c8a20d01 100644 --- a/libvips/convolution/im_conv.c +++ b/libvips/convolution/im_conv.c @@ -55,6 +55,12 @@ * - add a special case for 3x3 masks, about 20% faster * 1/10/10 * - support complex (just double the bands) + * 18/10/10 + * - add experimental Orc path + * 29/10/10 + * - use VipsVector + * - get rid of im_convsep(), just call this twice, no longer worth + * keeping two versions */ /* @@ -83,6 +89,43 @@ */ +/* Show sample pixels as they are transformed. +#define DEBUG_PIXELS + */ + +/* +#define DEBUG + */ + +/* + + TODO + + - will this change make much difference to the vips benchmark? + + - would setting params by index rather than name be any quicker? + + - fix up a signed 8-bit code path? + + - try a path with a 32-bit sum for larger matrices / scale / offset, + much slower? + + - try a 16-bit path, though the speedup might not be worthwhile + + - with a 5x5 matrix: + + 5 5 62 0 + 0 1 1 1 0 + 1 4 6 4 1 + 1 6 10 6 1 + 1 4 6 4 1 + 0 1 1 1 0 + + Orc is no faster than C, argh, multipass is not worthwhile for + large matrices + + */ + #ifdef HAVE_CONFIG_H #include #endif /*HAVE_CONFIG_H*/ @@ -93,6 +136,7 @@ #include #include +#include #ifdef WITH_DMALLOC #include @@ -112,12 +156,26 @@ typedef struct { int underflow; /* Global underflow/overflow counts */ int overflow; + + /* The convolver we generate for this mask. We have to split the + * convolve and clip into two phases. + */ + VipsVector *convolve; + VipsVector *clip; } Conv; +static void +conv_vector_free( Conv *conv ) +{ + IM_FREEF( vips_vector_free, conv->convolve ); + IM_FREEF( vips_vector_free, conv->clip ); +} + static int conv_close( Conv *conv ) { IM_FREEF( im_free_imask, conv->mask ); + conv_vector_free( conv ); return( 0 ); } @@ -146,6 +204,197 @@ conv_evalend( Conv *conv ) return( 0 ); } +#define TEMP( N, S ) vips_vector_temporary( v, N, S ) +#define SRC( N, P, S ) vips_vector_source( v, N, P, S ) +#define CONST( N, V, S ) vips_vector_constant( v, N, V, S ) +#define ASM2( OP, A, B ) vips_vector_asm2( v, OP, A, B ) +#define ASM3( OP, A, B, C ) vips_vector_asm3( v, OP, A, B, C ) + +/* Generate code for a 3x3 mask. Just do multiply-add, a second pass does the + * round and clip. + * + * 0 for success, -1 on error. + */ +static int +conv_compile_convolution_u8s16( Conv *conv ) +{ + INTMASK *mask = conv->mask; + + double min, max; + int i; + VipsVector *v; + char zero[256]; + char offset[256]; + char source[256]; + char coeff[256]; + + if( conv->in->BandFmt != IM_BANDFMT_UCHAR ) + return( -1 ); + + /* Don't test mask size, it's very hard to predict when we will + * exhaust the program space. + */ + + /* Can the accumulator overflow or underflow at any stage? Since + * matrix elements are signed, we need to calculate a running + * possible min and max. + */ + min = 0; + max = 0; + for( i = 0; i < mask->xsize * mask->ysize; i++ ) { + int v = 255 * mask->coeff[i]; + + if( min + v < min ) + min += v; + else if( min + v > max ) + max += v; + + if( max > SHRT_MAX ) + return( -1 ); + if( min < SHRT_MIN ) + return( -1 ); + } + + /* Start with a single source scanline, we add more as we need them. + */ + conv->convolve = v = vips_vector_new_ds( "conv", 2, 1 ); + + /* The value we fetch from the image, the product with the matrix + * value, the accumulated sum. + */ + TEMP( "value", 1 ); + TEMP( "product", 2 ); + TEMP( "sum", 2 ); + + CONST( zero, 0, 2 ); + ASM2( "copyw", "sum", zero ); + + for( i = 0; i < mask->xsize * mask->ysize; i++ ) { + int x = i % mask->xsize; + int y = i / mask->xsize; + + if( !mask->coeff[i] ) + /* Exclude zero elements. + */ + continue; + + /* The source. s1 is the first scanline in the mask. + */ + SRC( source, y + 1, 1 ); + + /* The offset, only for non-first-columns though. + */ + if( x > 0 ) + CONST( offset, conv->in->Bands * x, 1 ); + + /* The coefficient. Only for non-1 coeffs though, we skip the + * mul for them. + * + * We need to do 8-bit unsigned pixel * signed mask, so we + * have to cast the pixel up to 16-bit then do a mult against a + * 16-bit constant. We know the result will fit in the botom + * 16 bits. + */ + if( mask->coeff[i] != 1 ) + CONST( coeff, mask->coeff[i], 2 ); + + /* Two factors: + * - element is in the first column, ie. has a zero offset + * - mask coeff is 1, ie. we can skip the multiply + * + * We could combine some of these cases, but it's simpler + * and safer to spell them all out. + */ + if( x == 0 ) + ASM2( "loadb", "value", source ); + else + ASM3( "loadoffb", "value", source, offset ); + + ASM2( "convubw", "product", "value" ); + + if( mask->coeff[i] != 1 ) + ASM3( "mullw", "product", "product", coeff ); + + ASM3( "addssw", "sum", "sum", "product" ); + + /* If we run out of space, fall back to C. + */ + if( vips_vector_full( v ) ) + return( -1 ); + } + + ASM2( "copyw", "d1", "sum" ); + + if( !vips_vector_compile( v ) ) + return( -1 ); + +#ifdef DEBUG + vips_vector_print( v ); +#endif /*DEBUG*/ + + return( 0 ); +} + +/* Generate the program that does (sum + rounding) / scale + offset + * from a s16 intermediate back to a u8 output. + */ +static int +conv_compile_scale_s16u8( Conv *conv ) +{ + INTMASK *mask = conv->mask; + + VipsVector *v; + char scale[256]; + char offset[256]; + char zero[256]; + + /* Scale and offset must be in range. + */ + if( mask->scale > 255 || + mask->scale < 0 || + mask->offset > SHRT_MAX || + mask->offset < SHRT_MIN ) + return( -1 ); + + conv->clip = v = vips_vector_new_ds( "clip", 1, 2 ); + + TEMP( "t1", 2 ); + TEMP( "t2", 2 ); + + /* We can only do unsigned divide, so we must add the offset before + * dividing by the scale. We need to scale the offset up. + * + * We can build the rounding into the offset as well. + * You might think this should be (scale + 1) / 2, but then we'd be + * adding one for scale == 1. + */ + CONST( scale, mask->scale, 1 ); + CONST( offset, mask->offset * mask->scale + mask->scale / 2, 2 ); + CONST( zero, 0, 2 ); + + /* Offset and scale. + */ + ASM3( "addssw", "t1", "s1", offset ); + + /* We need to convert the signed result of the + * offset to unsigned for the div, ie. we want to set anything <0 to 0. + */ + ASM3( "cmpgtsw", "t2", "t1", zero ); + ASM3( "andw", "t1", "t1", "t2" ); + + ASM3( "divluw", "t1", "t1", scale ); + ASM2( "convuuswb", "d1", "t1" ); + + if( !vips_vector_compile( v ) ) + return( -1 ); + +#ifdef DEBUG + vips_vector_print( v ); +#endif /*DEBUG*/ + + return( 0 ); +} + static Conv * conv_new( IMAGE *in, IMAGE *out, INTMASK *mask ) { @@ -165,6 +414,9 @@ conv_new( IMAGE *in, IMAGE *out, INTMASK *mask ) conv->underflow = 0; conv->overflow = 0; + conv->convolve = NULL; + conv->clip = NULL; + if( im_add_close_callback( out, (im_callback_fn) conv_close, conv, NULL ) || im_add_close_callback( out, @@ -194,6 +446,14 @@ conv_new( IMAGE *in, IMAGE *out, INTMASK *mask ) conv->nnz = 1; } + /* Generate code for this mask / image, if possible. + */ + if( vips_vector_get_enabled() ) { + if( conv_compile_convolution_u8s16( conv ) || + conv_compile_scale_s16u8( conv ) ) + conv_vector_free( conv ); + } + return( conv ); } @@ -210,6 +470,11 @@ typedef struct { int overflow; int last_bpl; /* Avoid recalcing offsets, if we can */ + + /* We need an intermediate buffer to keep the result of the conv in + * before we clip it. + */ + void *sum; } ConvSequence; /* Free a sequence value. @@ -227,6 +492,8 @@ conv_stop( void *vseq, void *a, void *b ) IM_FREEF( im_region_free, seq->ir ); + IM_FREE( seq->sum ); + return( 0 ); } @@ -250,13 +517,15 @@ conv_start( IMAGE *out, void *a, void *b ) seq->underflow = 0; seq->overflow = 0; seq->last_bpl = -1; + seq->sum = NULL; /* Attach region and arrays. */ seq->ir = im_region_create( in ); seq->offsets = IM_ARRAY( out, conv->nnz, int ); seq->pts = IM_ARRAY( out, conv->nnz, PEL * ); - if( !seq->ir || !seq->offsets || !seq->pts ) { + seq->sum = IM_ARRAY( NULL, IM_IMAGE_N_ELEMENTS( in ), short ); + if( !seq->ir || !seq->offsets || !seq->pts || !seq->sum ) { conv_stop( seq, in, conv ); return( NULL ); } @@ -333,8 +602,7 @@ conv_gen( REGION *or, void *vseq, void *a, void *b ) int le = r->left; int to = r->top; int bo = IM_RECT_BOTTOM( r ); - int sz = IM_REGION_N_ELEMENTS( or ) * - (vips_bandfmt_iscomplex( in->BandFmt ) ? 2 : 1); + int sz = IM_REGION_N_ELEMENTS( or ) * (im_iscomplex( in ) ? 2 : 1); int x, y, z, i; @@ -428,13 +696,13 @@ conv_gen( REGION *or, void *vseq, void *a, void *b ) sum = 0; \ sum += m[0] * p0[0]; \ sum += m[1] * p0[bands]; \ - sum += m[2] * p0[bands << 1]; \ + sum += m[2] * p0[bands * 2]; \ sum += m[3] * p1[0]; \ sum += m[4] * p1[bands]; \ - sum += m[5] * p1[bands << 1]; \ + sum += m[5] * p1[bands * 2]; \ sum += m[6] * p2[0]; \ sum += m[7] * p2[bands]; \ - sum += m[8] * p2[bands << 1]; \ + sum += m[8] * p2[bands * 2]; \ \ p0 += 1; \ p1 += 1; \ @@ -462,13 +730,13 @@ conv_gen( REGION *or, void *vseq, void *a, void *b ) sum = 0; \ sum += m[0] * p0[0]; \ sum += m[1] * p0[bands]; \ - sum += m[2] * p0[bands << 1]; \ + sum += m[2] * p0[bands * 2]; \ sum += m[3] * p1[0]; \ sum += m[4] * p1[bands]; \ - sum += m[5] * p1[bands << 1]; \ + sum += m[5] * p1[bands * 2]; \ sum += m[6] * p2[0]; \ sum += m[7] * p2[bands]; \ - sum += m[8] * p2[bands << 1]; \ + sum += m[8] * p2[bands * 2]; \ \ p0 += 1; \ p1 += 1; \ @@ -502,8 +770,7 @@ conv3x3_gen( REGION *or, void *vseq, void *a, void *b ) int le = r->left; int to = r->top; int bo = IM_RECT_BOTTOM( r ); - int sz = IM_REGION_N_ELEMENTS( or ) * - (vips_bandfmt_iscomplex( in->BandFmt ) ? 2 : 1); + int sz = IM_REGION_N_ELEMENTS( or ) * (im_iscomplex( in ) ? 2 : 1); int bands = in->Bands; Rect s; @@ -568,6 +835,79 @@ conv3x3_gen( REGION *or, void *vseq, void *a, void *b ) return( 0 ); } +/* The VipsVector codepath. + */ +static int +convvec_gen( REGION *or, void *vseq, void *a, void *b ) +{ + ConvSequence *seq = (ConvSequence *) vseq; + IMAGE *in = (IMAGE *) a; + Conv *conv = (Conv *) b; + INTMASK *mask = conv->mask; + REGION *ir = seq->ir; + + Rect *r = &or->valid; + int sz = IM_REGION_N_ELEMENTS( or ) * (im_iscomplex( in ) ? 2 : 1); + + Rect s; + int y, j; + VipsExecutor convolve; + VipsExecutor clip; + + /* Prepare the section of the input image we need. A little larger + * than the section of the output image we are producing. + */ + s = *r; + s.width += mask->xsize - 1; + s.height += mask->ysize - 1; + if( im_prepare( ir, &s ) ) + return( -1 ); + + vips_executor_set_program( &convolve, conv->convolve, sz ); + vips_executor_set_program( &clip, conv->clip, sz ); + + /* Link the combiner to the intermediate buffer. + */ + vips_executor_set_array( &convolve, "d1", seq->sum ); + vips_executor_set_array( &clip, "s1", seq->sum ); + + for( y = 0; y < r->height; y++ ) { +#ifdef DEBUG_PIXELS +{ + int h, v; + + printf( "before convolve: %d, %d\n", r->left, r->top + y ); + for( v = 0; v < mask->ysize; v++ ) { + for( h = 0; h < mask->xsize; h++ ) + printf( "%3d ", *((PEL *) IM_REGION_ADDR( ir, + r->left + h, r->top + y + v )) ); + printf( "\n" ); + } +} +#endif /*DEBUG_PIXELS*/ + + for( j = 0; j < mask->ysize; j++ ) + vips_executor_set_source( &convolve, j + 1, + IM_REGION_ADDR( ir, r->left, r->top + y + j ) ); + vips_executor_run( &convolve ); + +#ifdef DEBUG_PIXELS + printf( "before clip: %3d\n", *((signed short *) seq->sum) ); +#endif /*DEBUG_PIXELS*/ + + vips_executor_set_array( &clip, "d1", + IM_REGION_ADDR( or, r->left, r->top + y ) ); + vips_executor_run( &clip ); + +#ifdef DEBUG_PIXELS + printf( "after clip: %d\n", + *((PEL *) IM_REGION_ADDR( or, r->left, r->top + y )) ); +#endif /*DEBUG_PIXELS*/ + } + + return( 0 ); +} + int im_conv_raw( IMAGE *in, IMAGE *out, INTMASK *mask ) { @@ -599,7 +939,14 @@ im_conv_raw( IMAGE *in, IMAGE *out, INTMASK *mask ) return( -1 ); } - if( mask->xsize == 3 && mask->ysize == 3 ) + if( conv->convolve ) { + generate = convvec_gen; + +#ifdef DEBUG + printf( "im_conv_raw: using vector path\n" ); +#endif /*DEBUG*/ + } + else if( mask->xsize == 3 && mask->ysize == 3 ) generate = conv3x3_gen; else generate = conv_gen; @@ -631,6 +978,10 @@ im_conv_raw( IMAGE *in, IMAGE *out, INTMASK *mask ) * and offset are part of @mask. For integer @in, the division by scale * includes round-to-nearest. * + * Small convolutions on unsigned 8-bit images are performed using the + * processor's vector unit, + * if possible. Disable this with --vips-novector or IM_NOVECTOR. + * * See also: im_conv_f(), im_convsep(), im_create_imaskv(). * * Returns: 0 on success, -1 on error @@ -652,3 +1003,75 @@ im_conv( IMAGE *in, IMAGE *out, INTMASK *mask ) return( 0 ); } + +int +im_convsep_raw( IMAGE *in, IMAGE *out, INTMASK *mask ) +{ + IMAGE *t; + INTMASK *rmask; + + if( mask->xsize != 1 && mask->ysize != 1 ) { + im_error( "im_convsep", + "%s", _( "expect 1xN or Nx1 input mask" ) ); + return( -1 ); + } + + if( !(t = im_open_local( out, "im_convsep", "p" )) || + !(rmask = (INTMASK *) im_local( out, + (im_construct_fn) im_dup_imask, + (im_callback_fn) im_free_imask, mask, mask->filename, NULL )) ) + return( -1 ); + + rmask->xsize = mask->ysize; + rmask->ysize = mask->xsize; + + if( im_conv_raw( in, t, mask ) || + im_conv_raw( t, out, rmask ) ) + return( -1 ); + + return( 0 ); +} + +/** + * im_convsep: + * @in: input image + * @out: output image + * @mask: convolution mask + * + * Perform a separable convolution of @in with @mask using integer arithmetic. + * + * The mask must be 1xn or nx1 elements. + * The output image + * always has the same #VipsBandFmt as the input image. + * + * The image is convolved twice: once with @mask and then again with @mask + * rotated by 90 degrees. This is much faster for certain types of mask + * (gaussian blur, for example) than doing a full 2D convolution. + * + * Each output pixel is + * calculated as sigma[i]{pixel[i] * mask[i]} / scale + offset, where scale + * and offset are part of @mask. For integer @in, the division by scale + * includes round-to-nearest. + * + * See also: im_convsep_f(), im_conv(), im_create_imaskv(). + * + * Returns: 0 on success, -1 on error + */ +int +im_convsep( IMAGE *in, IMAGE *out, INTMASK *mask ) +{ + IMAGE *t1 = im_open_local( out, "im_convsep intermediate", "p" ); + int size = mask->xsize * mask->ysize; + + if( !t1 || + im_embed( in, t1, 1, size / 2, size / 2, + in->Xsize + size - 1, + in->Ysize + size - 1 ) || + im_convsep_raw( t1, out, mask ) ) + return( -1 ); + + out->Xoffset = 0; + out->Yoffset = 0; + + return( 0 ); +} diff --git a/libvips/convolution/im_convsep.c b/libvips/convolution/im_convsep.c deleted file mode 100644 index 30dedccc..00000000 --- a/libvips/convolution/im_convsep.c +++ /dev/null @@ -1,461 +0,0 @@ -/* im_convsep - * - * Copyright: 1990, N. Dessipris. - * - * Author: Nicos Dessipris - * Written on: 29/04/1991 - * Modified on: 29/4/93 K.Martinez for Sys5 - * 9/3/01 JC - * - rewritten using im_conv() - * 27/7/01 JC - * - rejects masks with scale == 0 - * 7/4/04 - * - now uses im_embed() with edge stretching on the input, not - * the output - * - sets Xoffset / Yoffset - * 21/4/04 - * - scale down int convolves at 1/2 way mark, much less likely to integer - * overflow on intermediates - * 12/5/08 - * - int rounding was +1 too much, argh - * 3/2/10 - * - gtkdoc - * - more cleanups - * 1/10/10 - * - support complex (just double the bands) - */ - -/* - - This file is part of VIPS. - - VIPS is free software; you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - - */ - -/* - - These files are distributed with VIPS - http://www.vips.ecs.soton.ac.uk - - */ - -#ifdef HAVE_CONFIG_H -#include -#endif /*HAVE_CONFIG_H*/ -#include - -#include -#include -#include - -#include - -#ifdef WITH_DMALLOC -#include -#endif /*WITH_DMALLOC*/ - -/* Our parameters ... we take a copy of the mask argument. - */ -typedef struct { - IMAGE *in; - IMAGE *out; - INTMASK *mask; /* Copy of mask arg */ - - int size; /* N for our 1xN or Nx1 mask */ - int scale; /* Our scale ... we have to square mask->scale */ - - int underflow; /* Global underflow/overflow counts */ - int overflow; -} Conv; - -/* End of evaluation --- print overflows and underflows. - */ -static int -conv_destroy( Conv *conv ) -{ - /* Print underflow/overflow count. - */ - if( conv->overflow || conv->underflow ) - im_warn( "im_convsep", _( "%d overflows and %d underflows " - "detected" ), conv->overflow, conv->underflow ); - - if( conv->mask ) { - (void) im_free_imask( conv->mask ); - conv->mask = NULL; - } - - return( 0 ); -} - -static Conv * -conv_new( IMAGE *in, IMAGE *out, INTMASK *mask ) -{ - Conv *conv = IM_NEW( out, Conv ); - - if( !conv ) - return( NULL ); - - conv->in = in; - conv->out = out; - conv->mask = NULL; - conv->size = mask->xsize * mask->ysize; - conv->scale = mask->scale * mask->scale; - conv->underflow = 0; - conv->overflow = 0; - - if( im_add_close_callback( out, - (im_callback_fn) conv_destroy, conv, NULL ) || - !(conv->mask = im_dup_imask( mask, "conv_mask" )) ) - return( NULL ); - - return( conv ); -} - -/* Our sequence value. - */ -typedef struct { - Conv *conv; - REGION *ir; /* Input region */ - - PEL *sum; /* Line buffer */ - - int underflow; /* Underflow/overflow counts */ - int overflow; -} ConvSequence; - -/* Free a sequence value. - */ -static int -conv_stop( void *vseq, void *a, void *b ) -{ - ConvSequence *seq = (ConvSequence *) vseq; - Conv *conv = (Conv *) b; - - /* Add local under/over counts to global counts. - */ - conv->overflow += seq->overflow; - conv->underflow += seq->underflow; - - IM_FREEF( im_region_free, seq->ir ); - - return( 0 ); -} - -/* Convolution start function. - */ -static void * -conv_start( IMAGE *out, void *a, void *b ) -{ - IMAGE *in = (IMAGE *) a; - Conv *conv = (Conv *) b; - ConvSequence *seq; - - if( !(seq = IM_NEW( out, ConvSequence )) ) - return( NULL ); - - /* Init! - */ - seq->conv = conv; - seq->ir = NULL; - seq->sum = NULL; - seq->underflow = 0; - seq->overflow = 0; - - /* Attach region and arrays. - */ - seq->ir = im_region_create( in ); - if( vips_bandfmt_isint( conv->out->BandFmt ) ) - seq->sum = (PEL *) - IM_ARRAY( out, IM_IMAGE_N_ELEMENTS( in ), int ); - else - seq->sum = (PEL *) - IM_ARRAY( out, IM_IMAGE_N_ELEMENTS( in ), double ); - if( !seq->ir || !seq->sum ) { - conv_stop( seq, in, conv ); - return( NULL ); - } - - return( (void *) seq ); -} - -/* What we do for every point in the mask, for each pixel. - */ -#define VERTICAL_CONV { z -= 1; li -= lskip; sum += coeff[z] * vfrom[li]; } -#define HORIZONTAL_CONV { z -= 1; li -= bands; sum += coeff[z] * hfrom[li]; } - -/* INT and FLOAT inner loops. - */ -#define CONV_INT( TYPE, IM_CLIP ) { \ - TYPE *vfrom; \ - int *vto; \ - int *hfrom; \ - TYPE *hto; \ - \ - /* Convolve to sum array. We convolve the full width of \ - * this input line. \ - */ \ - vfrom = (TYPE *) IM_REGION_ADDR( ir, le, y ); \ - vto = (int *) seq->sum; \ - for( x = 0; x < isz; x++ ) { \ - int sum; \ - \ - z = conv->size; \ - li = lskip * z; \ - sum = 0; \ - \ - IM_UNROLL( z, VERTICAL_CONV ); \ - \ - sum = ((sum + rounding) / mask->scale) + mask->offset; \ - \ - vto[x] = sum; \ - vfrom += 1; \ - } \ - \ - /* Convolve sums to output. \ - */ \ - hfrom = (int *) seq->sum; \ - hto = (TYPE *) IM_REGION_ADDR( or, le, y ); \ - for( x = 0; x < osz; x++ ) { \ - int sum; \ - \ - z = conv->size; \ - li = bands * z; \ - sum = 0; \ - \ - IM_UNROLL( z, HORIZONTAL_CONV ); \ - \ - sum = ((sum + rounding) / mask->scale) + mask->offset; \ - \ - IM_CLIP; \ - \ - hto[x] = sum; \ - hfrom += 1; \ - } \ -} - -#define CONV_FLOAT( TYPE ) { \ - TYPE *vfrom; \ - double *vto; \ - double *hfrom; \ - TYPE *hto; \ - \ - /* Convolve to sum array. We convolve the full width of \ - * this input line. \ - */ \ - vfrom = (TYPE *) IM_REGION_ADDR( ir, le, y ); \ - vto = (double *) seq->sum; \ - for( x = 0; x < isz; x++ ) { \ - double sum; \ - \ - z = conv->size; \ - li = lskip * z; \ - sum = 0; \ - \ - IM_UNROLL( z, VERTICAL_CONV ); \ - \ - vto[x] = sum; \ - vfrom += 1; \ - } \ - \ - /* Convolve sums to output. \ - */ \ - hfrom = (double *) seq->sum; \ - hto = (TYPE *) IM_REGION_ADDR( or, le, y ); \ - for( x = 0; x < osz; x++ ) { \ - double sum; \ - \ - z = conv->size; \ - li = bands * z; \ - sum = 0; \ - \ - IM_UNROLL( z, HORIZONTAL_CONV ); \ - \ - sum = (sum / conv->scale) + mask->offset; \ - \ - hto[x] = sum; \ - hfrom += 1; \ - } \ -} - -/* Convolve! - */ -static int -conv_gen( REGION *or, void *vseq, void *a, void *b ) -{ - ConvSequence *seq = (ConvSequence *) vseq; - IMAGE *in = (IMAGE *) a; - Conv *conv = (Conv *) b; - REGION *ir = seq->ir; - INTMASK *mask = conv->mask; - - /* You might think this should be (scale+1)/2, but then we'd be adding - * one for scale == 1. - */ - int rounding = mask->scale / 2; - - int bands = in->Bands; - int *coeff = conv->mask->coeff; - - Rect *r = &or->valid; - int le = r->left; - int to = r->top; - int bo = IM_RECT_BOTTOM(r); - int osz = IM_REGION_N_ELEMENTS( or ) * - (vips_bandfmt_iscomplex( in->BandFmt ) ? 2 : 1); - - Rect s; - int lskip; - int isz; - int x, y, z, li; - - /* Prepare the section of the input image we need. A little larger - * than the section of the output image we are producing. - */ - s = *r; - s.width += conv->size - 1; - s.height += conv->size - 1; - if( im_prepare( ir, &s ) ) - return( -1 ); - lskip = IM_REGION_LSKIP( ir ) / IM_IMAGE_SIZEOF_ELEMENT( in ); - isz = IM_REGION_N_ELEMENTS( ir ); - - for( y = to; y < bo; y++ ) { - switch( in->BandFmt ) { - case IM_BANDFMT_UCHAR: - CONV_INT( unsigned char, IM_CLIP_UCHAR( sum, seq ) ); - break; - case IM_BANDFMT_CHAR: - CONV_INT( signed char, IM_CLIP_CHAR( sum, seq ) ); - break; - case IM_BANDFMT_USHORT: - CONV_INT( unsigned short, IM_CLIP_USHORT( sum, seq ) ); - break; - case IM_BANDFMT_SHORT: - CONV_INT( signed short, IM_CLIP_SHORT( sum, seq ) ); - break; - case IM_BANDFMT_UINT: - CONV_INT( unsigned int, IM_CLIP_NONE( sum, seq ) ); - break; - case IM_BANDFMT_INT: - CONV_INT( signed int, IM_CLIP_NONE( sum, seq ) ); - break; - case IM_BANDFMT_FLOAT: - case IM_BANDFMT_COMPLEX: - CONV_FLOAT( float ); - break; - case IM_BANDFMT_DOUBLE: - case IM_BANDFMT_DPCOMPLEX: - CONV_FLOAT( double ); - break; - - default: - g_assert( 0 ); - } - } - - return( 0 ); -} - -int -im_convsep_raw( IMAGE *in, IMAGE *out, INTMASK *mask ) -{ - Conv *conv; - - /* Check parameters. - */ - if( im_piocheck( in, out ) || - im_check_uncoded( "im_convsep", in ) || - im_check_imask( "im_convsep", mask ) ) - return( -1 ); - if( mask->xsize != 1 && mask->ysize != 1 ) { - im_error( "im_convsep", - "%s", _( "expect 1xN or Nx1 input mask" ) ); - return( -1 ); - } - if( mask->scale == 0 ) { - im_error( "im_convsep", "%s", "mask scale must be non-zero" ); - return( -1 ); - } - if( !(conv = conv_new( in, out, mask )) ) - return( -1 ); - - /* Prepare output. Consider a 7x7 mask and a 7x7 image --- the output - * would be 1x1. - */ - if( im_cp_desc( out, in ) ) - return( -1 ); - out->Xsize -= conv->size - 1; - out->Ysize -= conv->size - 1; - if( out->Xsize <= 0 || out->Ysize <= 0 ) { - im_error( "im_convsep", "%s", _( "image too small for mask" ) ); - return( -1 ); - } - - /* SMALLTILE seems the fastest in benchmarks. - */ - if( im_demand_hint( out, IM_SMALLTILE, in, NULL ) || - im_generate( out, conv_start, conv_gen, conv_stop, in, conv ) ) - return( -1 ); - - out->Xoffset = -mask->xsize / 2; - out->Yoffset = -mask->ysize / 2; - - return( 0 ); -} - - -/** - * im_convsep: - * @in: input image - * @out: output image - * @mask: convolution mask - * - * Perform a separable convolution of @in with @mask using integer arithmetic. - * - * The mask must be 1xn or nx1 elements. - * The output image - * always has the same #VipsBandFmt as the input image. - * - * The image is convolved twice: once with @mask and then again with @mask - * rotated by 90 degrees. This is much faster for certain types of mask - * (gaussian blur, for example) than doing a full 2D convolution. - * - * Each output pixel is - * calculated as sigma[i]{pixel[i] * mask[i]} / scale + offset, where scale - * and offset are part of @mask. For integer @in, the division by scale - * includes round-to-nearest. - * - * See also: im_convsep_f(), im_conv(), im_create_imaskv(). - * - * Returns: 0 on success, -1 on error - */ -int -im_convsep( IMAGE *in, IMAGE *out, INTMASK *mask ) -{ - IMAGE *t1 = im_open_local( out, "im_convsep intermediate", "p" ); - int size = mask->xsize * mask->ysize; - - if( !t1 || - im_embed( in, t1, 1, size / 2, size / 2, - in->Xsize + size - 1, - in->Ysize + size - 1 ) || - im_convsep_raw( t1, out, mask ) ) - return( -1 ); - - out->Xoffset = 0; - out->Yoffset = 0; - - return( 0 ); -} diff --git a/libvips/include/vips/Makefile.am b/libvips/include/vips/Makefile.am index b4fa2c8b..94e1b056 100644 --- a/libvips/include/vips/Makefile.am +++ b/libvips/include/vips/Makefile.am @@ -43,6 +43,7 @@ pkginclude_HEADERS = \ transform.h \ util.h \ version.h \ + vector.h \ vips.h vipsc++.h: diff --git a/libvips/include/vips/image.h b/libvips/include/vips/image.h index 574de6f8..cbb8c067 100644 --- a/libvips/include/vips/image.h +++ b/libvips/include/vips/image.h @@ -87,7 +87,8 @@ typedef enum { IM_BANDFMT_FLOAT = 6, IM_BANDFMT_COMPLEX = 7, IM_BANDFMT_DOUBLE = 8, - IM_BANDFMT_DPCOMPLEX = 9 + IM_BANDFMT_DPCOMPLEX = 9, + IM_BANDFMT_LAST = 10 } VipsBandFmt; typedef enum { diff --git a/libvips/include/vips/util.h b/libvips/include/vips/util.h index 27080247..efdf0e7f 100644 --- a/libvips/include/vips/util.h +++ b/libvips/include/vips/util.h @@ -56,6 +56,13 @@ extern "C" { #define IM_CLIP(A,V,B) IM_MAX( (A), IM_MIN( (B), (V) ) ) #define IM_NUMBER(R) ((int)(sizeof(R)/sizeof(R[0]))) +#define IM_SWAP( TYPE, A, B ) \ +G_STMT_START { \ + TYPE t = (A); \ + (A) = (B); \ + (B) = t; \ +} G_STMT_END + #define IM_FREEF( F, S ) \ G_STMT_START \ if( S ) { \ @@ -90,7 +97,8 @@ G_STMT_START { \ /* Duff's device. Do OPERation N times in a 16-way unrolled loop. */ -#define IM_UNROLL( N, OPER ) { \ +#define IM_UNROLL( N, OPER ) \ +G_STMT_START \ if( (N) ) { \ int duff_count = ((N) + 15) / 16; \ \ @@ -114,7 +122,7 @@ G_STMT_START { \ } while( --duff_count > 0 ); \ } \ } \ -} +G_STMT_END /* Round a float to the nearest integer. Much faster than rint(). */ @@ -122,7 +130,8 @@ G_STMT_START { \ /* Various integer range clips. Record over/under flows. */ -#define IM_CLIP_UCHAR( V, SEQ ) { \ +#define IM_CLIP_UCHAR( V, SEQ ) \ +G_STMT_START \ if( (V) < 0 ) { \ (SEQ)->underflow++; \ (V) = 0; \ @@ -131,9 +140,10 @@ G_STMT_START { \ (SEQ)->overflow++; \ (V) = UCHAR_MAX; \ } \ -} +G_STMT_END -#define IM_CLIP_USHORT( V, SEQ ) { \ +#define IM_CLIP_USHORT( V, SEQ ) \ +G_STMT_START \ if( (V) < 0 ) { \ (SEQ)->underflow++; \ (V) = 0; \ @@ -142,9 +152,10 @@ G_STMT_START { \ (SEQ)->overflow++; \ (V) = USHRT_MAX; \ } \ -} +G_STMT_END -#define IM_CLIP_CHAR( V, SEQ ) { \ +#define IM_CLIP_CHAR( V, SEQ ) \ +G_STMT_START \ if( (V) < SCHAR_MIN ) { \ (SEQ)->underflow++; \ (V) = SCHAR_MIN; \ @@ -153,9 +164,10 @@ G_STMT_START { \ (SEQ)->overflow++; \ (V) = SCHAR_MAX; \ } \ -} +G_STMT_END -#define IM_CLIP_SHORT( V, SEQ ) { \ +#define IM_CLIP_SHORT( V, SEQ ) \ +G_STMT_START \ if( (V) < SHRT_MIN ) { \ (SEQ)->underflow++; \ (V) = SHRT_MIN; \ @@ -164,7 +176,7 @@ G_STMT_START { \ (SEQ)->overflow++; \ (V) = SHRT_MAX; \ } \ -} +G_STMT_END #define IM_CLIP_NONE( V, SEQ ) {} diff --git a/libvips/include/vips/vector.h b/libvips/include/vips/vector.h new file mode 100644 index 00000000..a07691e7 --- /dev/null +++ b/libvips/include/vips/vector.h @@ -0,0 +1,115 @@ +/* helper stuff for Orc + * + * 29/10/10 + * - from im_dilate hackery + */ + +/* + + This file is part of VIPS. + + VIPS is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + */ + +/* + + These files are distributed with VIPS - http://www.vips.ecs.soton.ac.uk + + */ + +#ifndef IM_VECTOR_H +#define IM_VECTOR_H + +#ifdef HAVE_ORC +#include +#endif /*HAVE_ORC*/ + +#ifdef __cplusplus +extern "C" { +#endif /*__cplusplus*/ + +/* An Orc program. + */ +typedef struct { + /* Handy for debugging. + */ + const char *name; + + /* How many resources we've used so far in this codegen. + */ + int n_temp; + int n_source; + int n_destination; + int n_constant; + int n_parameter; + int n_instruction; + +#ifdef HAVE_ORC + /* The code we have generated. + */ + OrcProgram *program; +#endif /*HAVE_ORC*/ + + /* Compiled successfully. + */ + gboolean compiled; +} VipsVector; + +#ifdef HAVE_ORC +typedef OrcExecutor VipsExecutor; +#else /*!HAVE_ORC*/ +typedef int VipsExecutor; +#endif /*HAVE_ORC*/ + +/* Set from the command-line. + */ +extern gboolean im__vector_enabled; + +void vips_vector_init( void ); +gboolean vips_vector_get_enabled( void ); +void vips_vector_set_enabled( gboolean enabled ); + +void vips_vector_free( VipsVector *vector ); +VipsVector *vips_vector_new_ds( const char *name, int size1, int size2 ); + +void vips_vector_constant( VipsVector *vector, + char *name, int value, int size ); +void vips_vector_source_name( VipsVector *vector, char *name, int size ); +void vips_vector_source( VipsVector *vector, char *name, int number, int size ); +void vips_vector_temporary( VipsVector *vector, char *name, int size ); +void vips_vector_asm2( VipsVector *vector, + const char *op, const char *a, const char *b ); +void vips_vector_asm3( VipsVector *vector, + const char *op, const char *a, const char *b, const char *c ); +gboolean vips_vector_full( VipsVector *vector ); + +gboolean vips_vector_compile( VipsVector *vector ); + +void vips_vector_print( VipsVector *vector ); + +void vips_executor_set_program( VipsExecutor *executor, + VipsVector *vector, int n ); +void vips_executor_set_source( VipsExecutor *executor, int n, void *value ); +void vips_executor_set_destination( VipsExecutor *executor, void *value ); +void vips_executor_set_array( VipsExecutor *executor, char *name, void *value ); + +void vips_executor_run( VipsExecutor *executor ); + +#ifdef __cplusplus +} +#endif /*__cplusplus*/ + +#endif /*IM_VECTOR_H*/ diff --git a/libvips/inplace/flood.c b/libvips/inplace/flood.c index d62967bf..b00ecd74 100644 --- a/libvips/inplace/flood.c +++ b/libvips/inplace/flood.c @@ -73,12 +73,6 @@ #include #endif /*WITH_DMALLOC*/ -#define SWAP( TYPE, A, B ) { \ - TYPE t = (A); \ - (A) = (B); \ - (B) = t; \ -} - /* Size of a scanline buffer. We allocate a list of these to hold scanlines * we need to visit. */ @@ -346,7 +340,7 @@ flood_all( Flood *flood, int x, int y ) p->n = 0; } - SWAP( Buffer *, flood->in, flood->out ); + IM_SWAP( Buffer *, flood->in, flood->out ); } } diff --git a/libvips/inplace/im_draw_line.c b/libvips/inplace/im_draw_line.c index 9ee5548b..8572ffa3 100644 --- a/libvips/inplace/im_draw_line.c +++ b/libvips/inplace/im_draw_line.c @@ -64,8 +64,6 @@ #include #endif /*WITH_DMALLOC*/ -#define SWAP(A,B) {int t; t = (A); (A) = (B); (B) = t;} - typedef struct _Line { Draw draw; @@ -112,14 +110,14 @@ line_new( VipsImage *im, int x1, int y1, int x2, int y2, PEL *ink ) * right. Do diagonals here .. just have up and right and down * and right now. */ - SWAP( x1, x2 ); - SWAP( y1, y2 ); + IM_SWAP( int, x1, x2 ); + IM_SWAP( int, y1, y2 ); } else if( abs( line->dx ) < abs( line->dy ) && line->dy < 0 ) { /* Swap to get all y greater cases going down the screen. */ - SWAP( x1, x2 ); - SWAP( y1, y2 ); + IM_SWAP( int, x1, x2 ); + IM_SWAP( int, y1, y2 ); } /* Recalculate dx, dy. diff --git a/libvips/iofuncs/Makefile.am b/libvips/iofuncs/Makefile.am index 6c30108e..e1e1b456 100644 --- a/libvips/iofuncs/Makefile.am +++ b/libvips/iofuncs/Makefile.am @@ -44,6 +44,7 @@ libiofuncs_la_SOURCES = \ im_init_world.c \ buf.c \ window.c \ + vector.c \ buffer.c \ time.c diff --git a/libvips/iofuncs/im_init_world.c b/libvips/iofuncs/im_init_world.c index 3874a8d4..cb883d16 100644 --- a/libvips/iofuncs/im_init_world.c +++ b/libvips/iofuncs/im_init_world.c @@ -63,6 +63,7 @@ #include #include #include +#include #ifdef WITH_DMALLOC #include @@ -223,6 +224,10 @@ im_init_world( const char *argv0 ) */ im__buffer_init(); + /* Get the run-time compiler going. + */ + vips_vector_init(); + done = TRUE; return( 0 ); @@ -268,6 +273,9 @@ static GOptionEntry option_entries[] = { { "vips-disc-threshold", 'd', 0, G_OPTION_ARG_STRING, &im__disc_threshold, N_( "image size above which to decompress to disc" ), NULL }, + { "vips-novector", 't', G_OPTION_FLAG_REVERSE, G_OPTION_ARG_NONE, + &im__vector_enabled, + N_( "disable vectorised versions of operations" ), NULL }, { NULL } }; diff --git a/libvips/iofuncs/vector.c b/libvips/iofuncs/vector.c new file mode 100644 index 00000000..395ec429 --- /dev/null +++ b/libvips/iofuncs/vector.c @@ -0,0 +1,330 @@ +/* helper functions for Orc + * + * 29/10/10 + * - from morph hacking + */ + +/* + + This file is part of VIPS. + + VIPS is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + */ + +/* + + These files are distributed with VIPS - http://www.vips.ecs.soton.ac.uk + + */ + +/* + + TODO + + - would setting params by index rather than name be any quicker? + + */ + +/* Verbose messages from Orc (or use ORC_DEBUG=99 on the command-line). +#define DEBUG_ORC + */ + +/* +#define DEBUG + */ + +#ifdef HAVE_CONFIG_H +#include +#endif /*HAVE_CONFIG_H*/ +#include + +#include +#include + +#ifdef WITH_DMALLOC +#include +#endif /*WITH_DMALLOC*/ + +/* Cleared by the command-line --vips-novector switch and the IM_NOVECTOR env + * var. + */ +gboolean im__vector_enabled = TRUE; + +void +vips_vector_init( void ) +{ +#ifdef HAVE_ORC + orc_init(); + +#ifdef DEBUG_ORC + /* You can also do ORC_DEBUG=99 at the command-line. + */ + orc_debug_set_level( 99 ); +#endif /*DEBUG_ORC*/ + + /* Look for the environment variable IM_NOVECTOR and use that to turn + * off as well. + */ + if( g_getenv( "IM_NOVECTOR" ) ) + im__vector_enabled = FALSE; +#endif /*HAVE_ORC*/ +} + +gboolean +vips_vector_get_enabled( void ) +{ + return( im__vector_enabled ); +} + +void +vips_vector_set_enabled( gboolean enabled ) +{ + im__vector_enabled = enabled; +} + +void +vips_vector_free( VipsVector *vector ) +{ +#ifdef HAVE_ORC + IM_FREEF( orc_program_free, vector->program ); +#endif /*HAVE_ORC*/ + IM_FREE( vector ); +} + +VipsVector * +vips_vector_new_ds( const char *name, int size1, int size2 ) +{ + VipsVector *vector; + + if( !(vector = IM_NEW( NULL, VipsVector )) ) + return( NULL ); + vector->name = name; + vector->n_temp = 0; + vector->n_source = 0; + vector->n_destination = 0; + vector->n_constant = 0; + vector->n_parameter = 0; + vector->n_instruction = 0; + vector->compiled = FALSE; + +#ifdef HAVE_ORC + vector->program = orc_program_new_ds( size1, size2 ); +#endif /*HAVE_ORC*/ + vector->n_source += 1; + vector->n_destination += 1; + + return( vector ); +} + +void +vips_vector_asm2( VipsVector *vector, + const char *op, const char *a, const char *b ) +{ + vector->n_instruction += 1; + +#ifdef DEBUG + printf( " %s %s %s\n", op, a, b ); +#endif /*DEBUG*/ + +#ifdef HAVE_ORC + orc_program_append_ds_str( vector->program, op, a, b ); +#endif /*HAVE_ORC*/ +} + +void +vips_vector_asm3( VipsVector *vector, + const char *op, const char *a, const char *b, const char *c ) +{ + vector->n_instruction += 1; + +#ifdef DEBUG + printf( " %s %s %s %s\n", op, a, b, c ); +#endif /*DEBUG*/ + +#ifdef HAVE_ORC + orc_program_append_str( vector->program, op, a, b, c ); +#endif /*HAVE_ORC*/ +} + +void +vips_vector_constant( VipsVector *vector, char *name, int value, int size ) +{ +#ifdef HAVE_ORC + char *sname; + + if( size == 1 ) + sname = "b"; + else if( size == 2 ) + sname = "w"; + else if( size == 4 ) + sname = "l"; + else { + printf( "vips_vector_constant: bad constant size\n" ); + + /* Not really correct, heh. + */ + sname = "x"; + } + + if( value > 0 ) + im_snprintf( name, 256, "c%d%s", value, sname ); + else + im_snprintf( name, 256, "cm%d%s", -value, sname ); + + if( orc_program_find_var_by_name( vector->program, name ) == -1 ) { + orc_program_add_constant( vector->program, size, value, name ); + vector->n_constant += 1; + } +#endif /*HAVE_ORC*/ +} + +void +vips_vector_source_name( VipsVector *vector, char *name, int size ) +{ +#ifdef HAVE_ORC +#ifdef DEBUG + if( orc_program_find_var_by_name( vector->program, name ) != -1 ) + printf( "argh! source %s defined twice\n", name ); +#endif /*DEBUG*/ + + orc_program_add_source( vector->program, size, name ); + vector->n_source += 1; +#endif /*HAVE_ORC*/ +} + +void +vips_vector_source( VipsVector *vector, char *name, int number, int size ) +{ +#ifdef HAVE_ORC + im_snprintf( name, 256, "s%d", number ); + + if( orc_program_find_var_by_name( vector->program, name ) == -1 ) + vips_vector_source_name( vector, name, size ); +#endif /*HAVE_ORC*/ +} + +void +vips_vector_temporary( VipsVector *vector, char *name, int size ) +{ +#ifdef HAVE_ORC + orc_program_add_temporary( vector->program, size, name ); + vector->n_temp += 1; +#endif /*HAVE_ORC*/ +} + +gboolean +vips_vector_full( VipsVector *vector ) +{ + /* We can need a max of 2 constants plus one source per + * coefficient, so stop if we're sure we don't have enough. + * We need to stay under the 100 instruction limit too. + */ + if( vector->n_constant > 16 - 2 ) + return( TRUE ); + if( vector->n_source > 8 - 1 ) + return( TRUE ); + if( vector->n_instruction > 50 ) + return( TRUE ); + + return( FALSE ); +} + +gboolean +vips_vector_compile( VipsVector *vector ) +{ +#ifdef HAVE_ORC + OrcCompileResult result; + + result = orc_program_compile( vector->program ); + if( !ORC_COMPILE_RESULT_IS_SUCCESSFUL( result ) ) { +#ifdef DEBUG + printf( "*** error compiling %s\n", vector->name ); +#endif /*DEBUG*/ + + return( FALSE ); + } + + vector->compiled = TRUE; +#endif /*HAVE_ORC*/ + + return( TRUE ); +} + +void +vips_vector_print( VipsVector *vector ) +{ + printf( "%s: ", vector->name ); + if( vector->compiled ) + printf( "successfully compiled\n" ); + else + printf( "not compiled successfully\n" ); + printf( " n_source = %d\n", vector->n_source ); + printf( " n_parameter = %d\n", vector->n_parameter ); + printf( " n_destination = %d\n", vector->n_destination ); + printf( " n_constant = %d\n", vector->n_constant ); + printf( " n_temp = %d\n", vector->n_temp ); + printf( " n_instruction = %d\n", vector->n_instruction ); +} + +void +vips_executor_set_program( VipsExecutor *executor, VipsVector *vector, int n ) +{ +#ifdef HAVE_ORC + orc_executor_set_program( executor, vector->program ); + orc_executor_set_n( executor, n ); +#endif /*HAVE_ORC*/ +} + +void +vips_executor_set_source( VipsExecutor *executor, int n, void *value ) +{ +#ifdef HAVE_ORC + char name[256]; + OrcProgram *program = executor->program; + + im_snprintf( name, 256, "s%d", n ); + if( orc_program_find_var_by_name( program, name ) != -1 ) + orc_executor_set_array_str( executor, name, value ); +#endif /*HAVE_ORC*/ +} + +void +vips_executor_set_destination( VipsExecutor *executor, void *value ) +{ +#ifdef HAVE_ORC + orc_executor_set_array_str( executor, "d1", value ); +#endif /*HAVE_ORC*/ +} + +void +vips_executor_set_array( VipsExecutor *executor, char *name, void *value ) +{ +#ifdef HAVE_ORC + OrcProgram *program = executor->program; + + if( orc_program_find_var_by_name( program, name ) != -1 ) + orc_executor_set_array_str( executor, name, value ); +#endif /*HAVE_ORC*/ +} + +void +vips_executor_run( VipsExecutor *executor ) +{ +#ifdef HAVE_ORC + orc_executor_run( executor ); +#endif /*HAVE_ORC*/ +} + diff --git a/libvips/morphology/Makefile.am b/libvips/morphology/Makefile.am index 9de3bb12..1c32ef82 100644 --- a/libvips/morphology/Makefile.am +++ b/libvips/morphology/Makefile.am @@ -2,8 +2,7 @@ noinst_LTLIBRARIES = libmorphology.la libmorphology_la_SOURCES = \ im_cntlines.c \ - im_dilate.c\ - im_erode.c\ + morphology.c\ im_rank.c \ im_rank_image.c \ im_zerox.c \ diff --git a/libvips/morphology/im_dilate.c b/libvips/morphology/im_dilate.c deleted file mode 100644 index 3df3c666..00000000 --- a/libvips/morphology/im_dilate.c +++ /dev/null @@ -1,349 +0,0 @@ -/* @(#) Function which dilates a binary VASARI format picture with a mask. - * @(#) The mask coefficients are either 255 (object) or 0 (bk) or 128 (any). - * @(#) Input image are binary images with either 0 or 255 values, one channel - * @(#) only. The program dilates a white object on a black background. - * @(#) The center of the mask is at location (m->xsize/2, m->ysize/2) - * @(#) integer division. The mask is expected to have an odd width and - * @(#) height. - * @(#) - * @(#) int im_dilate(in, out, m) - * @(#) IMAGE *in, *out; - * @(#) INTMASK *m; - * @(#) - * @(#) Returns either 0 (sucess) or -1 (fail) - * - * 19/9/95 JC - * - rewritten - * 6/7/99 JC - * - small tidies - * 7/4/04 - * - now uses im_embed() with edge stretching on the input, not - * the output - * - sets Xoffset / Yoffset - * 21/4/08 - * - only rebuild the buffer offsets if bpl changes - * - small cleanups - */ - -/* - - This file is part of VIPS. - - VIPS is free software; you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - - */ - -/* - - These files are distributed with VIPS - http://www.vips.ecs.soton.ac.uk - - */ - -/* -#define DEBUG - */ - -#ifdef HAVE_CONFIG_H -#include -#endif /*HAVE_CONFIG_H*/ -#include - -#include -#include - -#include - -#ifdef WITH_DMALLOC -#include -#endif /*WITH_DMALLOC*/ - -/* Our sequence value. - */ -typedef struct { - REGION *ir; /* Input region */ - - int *soff; /* Offsets we check for set */ - int ss; /* ... and number we check for set */ - int *coff; /* Offsets we check for clear */ - int cs; /* ... and number we check for clear */ - int last_bpl; /* Avoid recalcing offsets, if we can */ -} SeqInfo; - -/* Stop function. - */ -static int -dilate_stop( void *vseq, void *a, void *b ) -{ - SeqInfo *seq = (SeqInfo *) vseq; - - IM_FREEF( im_region_free, seq->ir ); - - return( 0 ); -} - -/* Start function. - */ -static void * -dilate_start( IMAGE *out, void *a, void *b ) -{ - IMAGE *in = (IMAGE *) a; - INTMASK *msk = (INTMASK *) b; - int sz = msk->xsize * msk->ysize; - SeqInfo *seq; - - if( !(seq = IM_NEW( out, SeqInfo )) ) - return( NULL ); - - /* Init! - */ - seq->ir = NULL; - seq->soff = NULL; - seq->ss = 0; - seq->coff = NULL; - seq->cs = 0; - seq->last_bpl = -1; - - /* Attach region and arrays. - */ - seq->ir = im_region_create( in ); - seq->soff = IM_ARRAY( out, sz, int ); - seq->coff = IM_ARRAY( out, sz, int ); - if( !seq->ir || !seq->soff || !seq->coff ) { - dilate_stop( seq, in, NULL ); - return( NULL ); - } - - return( seq ); -} - -/* Dilate! - */ -static int -dilate_gen( REGION *or, void *vseq, void *a, void *b ) -{ - SeqInfo *seq = (SeqInfo *) vseq; - INTMASK *msk = (INTMASK *) b; - REGION *ir = seq->ir; - - int *soff = seq->soff; - int *coff = seq->coff; - - Rect *r = &or->valid; - Rect s; - int le = r->left; - int to = r->top; - int bo = IM_RECT_BOTTOM(r); - int sz = IM_REGION_N_ELEMENTS( or ); - - int *t; - - int x, y; - int result, i; - - /* Prepare the section of the input image we need. A little larger - * than the section of the output image we are producing. - */ - s = *r; - s.width += msk->xsize - 1; - s.height += msk->ysize - 1; - if( im_prepare( ir, &s ) ) - return( -1 ); - -#ifdef DEBUG - printf( "erode_gen: preparing %dx%d pixels\n", s.width, s.height ); -#endif /*DEBUG*/ - - /* Scan mask, building offsets we check when processing. Only do this - * if the bpl has changed since the previous im_prepare(). - */ - if( seq->last_bpl != IM_REGION_LSKIP( ir ) ) { - seq->last_bpl = IM_REGION_LSKIP( ir ); - - seq->ss = 0; - seq->cs = 0; - for( t = msk->coeff, y = 0; y < msk->ysize; y++ ) - for( x = 0; x < msk->xsize; x++, t++ ) - switch( *t ) { - case 255: - soff[seq->ss++] = - IM_REGION_ADDR( ir, - x + le, y + to ) - - IM_REGION_ADDR( ir, le, to ); - break; - - case 128: - break; - - case 0: - coff[seq->cs++] = - IM_REGION_ADDR( ir, - x + le, y + to ) - - IM_REGION_ADDR( ir, le, to ); - break; - - default: - im_error( "im_dilate", - _( "bad mask element (%d " - "should be 0, 128 or 255)" ), - *t ); - return( -1 ); - } - } - - /* Dilate! - */ - for( y = to; y < bo; y++ ) { - PEL *p = (PEL *) IM_REGION_ADDR( ir, le, y ); - PEL *q = (PEL *) IM_REGION_ADDR( or, le, y ); - - /* Loop along line. - */ - for( x = 0; x < sz; x++, q++, p++ ) { - /* Search for a hit on the set list. - */ - result = 0; - for( i = 0; i < seq->ss; i++ ) - if( p[soff[i]] ) { - /* Found a match! - */ - result = 255; - break; - } - - /* No set pixels ... search for a hit in the clear - * pixels. - */ - if( !result ) - for( i = 0; i < seq->cs; i++ ) - if( !p[coff[i]] ) { - /* Found a match! - */ - result = 255; - break; - } - - *q = result; - - } - } - - return( 0 ); -} - -/* Dilate an image. - */ -int -im_dilate_raw( IMAGE *in, IMAGE *out, INTMASK *m ) -{ - INTMASK *msk; - - /* Check mask has odd number of elements in width and height. - */ - if( m->xsize < 1 || !(m->xsize & 0x1) || - m->ysize < 1 || !(m->ysize & 0x1) ) { - im_error( "im_dilate", "%s", _( "mask size not odd" ) ); - return( -1 ); - } - - /* Standard checks. - */ - if( im_piocheck( in, out ) ) - return( -1 ); - if( in->Coding != IM_CODING_NONE || - in->BandFmt != IM_BANDFMT_UCHAR ) { - im_error( "im_dilate", "%s", _( "uchar uncoded only" ) ); - return( -1 ); - } - if( im_cp_desc( out, in ) ) - return( -1 ); - - /* Prepare output. Consider a 7x7 mask and a 7x7 image --- the output - * would be 1x1. - */ - if( im_cp_desc( out, in ) ) - return( -1 ); - out->Xsize -= m->xsize - 1; - out->Ysize -= m->ysize - 1; - if( out->Xsize <= 0 || out->Ysize <= 0 ) { - im_error( "im_dilate", "%s", _( "image too small for mask" ) ); - return( -1 ); - } - - /* Take a copy of m. - */ - if( !(msk = im_dup_imask( m, "conv_mask" )) ) - return( -1 ); - if( im_add_close_callback( out, - (im_callback_fn) im_free_imask, msk, NULL ) ) { - im_free_imask( msk ); - return( -1 ); - } - - /* Set demand hints. FATSTRIP is good for us, as THINSTRIP will cause - * too many recalculations on overlaps. - */ - if( im_demand_hint( out, IM_FATSTRIP, in, NULL ) ) - return( -1 ); - - /* Generate! - */ - if( im_generate( out, dilate_start, dilate_gen, dilate_stop, in, msk ) ) - return( -1 ); - - out->Xoffset = -m->xsize / 2; - out->Yoffset = -m->ysize / 2; - - return( 0 ); -} - - -/** - * im_dilate: - * @in: input image - * @out: output image - * @m: dilate mask - * - * Dilate an image with a mask. - * The mask coefficients are either 255 (object) or 0 (black) or 128 (don't - * care). - * Input image are binary images with either 0 or 255 values, one channel - * only. The program dilates a white object on a black background. - * The center of the mask is at location (m->xsize/2, m->ysize/2) - * integer division. The mask is expected to have an odd width and - * height. - -sets pixels in the output if - * - * See also: im_erode(). - * - * Returns: 0 on success, -1 on error - */ -int -im_dilate( IMAGE *in, IMAGE *out, INTMASK *m ) -{ - IMAGE *t1 = im_open_local( out, "im_dilate:1", "p" ); - - if( !t1 || - im_embed( in, t1, 1, m->xsize / 2, m->ysize / 2, - in->Xsize + m->xsize - 1, - in->Ysize + m->ysize - 1 ) || - im_dilate_raw( t1, out, m ) ) - return( -1 ); - - out->Xoffset = 0; - out->Yoffset = 0; - - return( 0 ); -} diff --git a/libvips/morphology/im_erode.c b/libvips/morphology/im_erode.c deleted file mode 100644 index ccf983c3..00000000 --- a/libvips/morphology/im_erode.c +++ /dev/null @@ -1,325 +0,0 @@ -/* @(#) Function which erodes a binary VASARI format picture with a mask. - * @(#) The mask coefficients are either 255 (object) or 0 (bk) or 128 (any). - * @(#) Input image are binary images with either 0 or 255 values, one channel - * @(#) only. The program erodes a white object on a black background. - * @(#) The center of the mask is at location (m->xsize/2, m->ysize/2) - * @(#) integer division. The mask is expected to have an odd width and - * @(#) height. - * @(#) - * @(#) int im_erode(in, out, m) - * @(#) IMAGE *in, *out; - * @(#) INTMASK *m; - * @(#) - * @(#) Returns either 0 (sucess) or -1 (fail) - * - * 19/9/95 JC - * - rewrite - * 6/7/99 JC - * - checks and small tidies - * 7/4/04 - * - now uses im_embed() with edge stretching on the input, not - * the output - * - sets Xoffset / Yoffset - * 21/4/08 - * - only rebuild the buffer offsets if bpl changes - * - small cleanups - */ - -/* - - This file is part of VIPS. - - VIPS is free software; you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - - */ - -/* - - These files are distributed with VIPS - http://www.vips.ecs.soton.ac.uk - - */ - -/* -#define DEBUG - */ - -#ifdef HAVE_CONFIG_H -#include -#endif /*HAVE_CONFIG_H*/ -#include - -#include -#include - -#include - -#ifdef WITH_DMALLOC -#include -#endif /*WITH_DMALLOC*/ - -/* Our sequence value. - */ -typedef struct { - REGION *ir; /* Input region */ - - int *soff; /* Offsets we check for set */ - int ss; /* ... and number we check for set */ - int *coff; /* Offsets we check for clear */ - int cs; /* ... and number we check for clear */ - int last_bpl; /* Avoid recalcing offsets, if we can */ -} SeqInfo; - -/* Stop function. - */ -static int -erode_stop( void *vseq, void *a, void *b ) -{ - SeqInfo *seq = (SeqInfo *) vseq; - - IM_FREEF( im_region_free, seq->ir ); - - return( 0 ); -} - -/* Start function. - */ -static void * -erode_start( IMAGE *out, void *a, void *b ) -{ - IMAGE *in = (IMAGE *) a; - INTMASK *msk = (INTMASK *) b; - SeqInfo *seq; - int sz = msk->xsize * msk->ysize; - - if( !(seq = IM_NEW( out, SeqInfo )) ) - return( NULL ); - - /* Init! - */ - seq->ir = NULL; - seq->soff = NULL; - seq->ss = 0; - seq->coff = NULL; - seq->cs = 0; - seq->last_bpl = -1; - - /* Attach region and arrays. - */ - seq->ir = im_region_create( in ); - seq->soff = IM_ARRAY( out, sz, int ); - seq->coff = IM_ARRAY( out, sz, int ); - if( !seq->ir || !seq->soff || !seq->coff ) { - erode_stop( seq, in, NULL ); - return( NULL ); - } - - return( (void *) seq ); -} - -/* Erode! - */ -static int -erode_gen( REGION *or, void *vseq, void *a, void *b ) -{ - SeqInfo *seq = (SeqInfo *) vseq; - INTMASK *msk = (INTMASK *) b; - REGION *ir = seq->ir; - - int *soff = seq->soff; - int *coff = seq->coff; - - Rect *r = &or->valid; - Rect s; - int le = r->left; - int to = r->top; - int bo = IM_RECT_BOTTOM(r); - int sz = IM_REGION_N_ELEMENTS( or ); - - int *t; - - int x, y; - int result, i; - - /* Prepare the section of the input image we need. A little larger - * than the section of the output image we are producing. - */ - s = *r; - s.width += msk->xsize - 1; - s.height += msk->ysize - 1; - if( im_prepare( ir, &s ) ) - return( -1 ); - -#ifdef DEBUG - printf( "erode_gen: preparing %dx%d pixels\n", s.width, s.height ); -#endif /*DEBUG*/ - - /* Scan mask, building offsets we check when processing. Only do this - * if the bpl has changed since the previous im_prepare(). - */ - if( seq->last_bpl != IM_REGION_LSKIP( ir ) ) { - seq->last_bpl = IM_REGION_LSKIP( ir ); - - seq->ss = 0; - seq->cs = 0; - for( t = msk->coeff, y = 0; y < msk->ysize; y++ ) - for( x = 0; x < msk->xsize; x++, t++ ) - switch( *t ) { - case 255: - soff[seq->ss++] = - IM_REGION_ADDR( ir, - x + le, y + to ) - - IM_REGION_ADDR( ir, le, to ); - break; - - case 128: - break; - - case 0: - coff[seq->cs++] = - IM_REGION_ADDR( ir, - x + le, y + to ) - - IM_REGION_ADDR( ir, le, to ); - break; - - default: - im_error( "im_erode", - _( "bad mask element (%d " - "should be 0, 128 or 255)" ), - *t ); - return( -1 ); - } - } - - /* Erode! - */ - for( y = to; y < bo; y++ ) { - PEL *p = (PEL *) IM_REGION_ADDR( ir, le, y ); - PEL *q = (PEL *) IM_REGION_ADDR( or, le, y ); - - /* Loop along line. - */ - for( x = 0; x < sz; x++, q++, p++ ) { - /* Check all set pixels are set. - */ - result = 255; - for( i = 0; i < seq->ss; i++ ) - if( !p[soff[i]] ) { - /* Found a mismatch! - */ - result = 0; - break; - } - - /* Check all clear pixels are clear. - */ - if( result ) - for( i = 0; i < seq->cs; i++ ) - if( p[coff[i]] ) { - result = 0; - break; - } - - *q = result; - } - } - - return( 0 ); -} - -/* Erode an image. - */ -int -im_erode_raw( IMAGE *in, IMAGE *out, INTMASK *m ) -{ - INTMASK *msk; - - /* Check mask has odd number of elements in width and height. - */ - if( m->xsize < 1 || !(m->xsize & 0x1) || - m->ysize < 1 || !(m->ysize & 0x1) ) { - im_error( "im_erode", "%s", _( "mask size not odd" ) ); - return( -1 ); - } - - /* Standard checks. - */ - if( im_piocheck( in, out ) ) - return( -1 ); - if( in->Coding != IM_CODING_NONE || - in->BandFmt != IM_BANDFMT_UCHAR ) { - im_error( "im_erode", "%s", _( "1-band uchar uncoded only" ) ); - return( -1 ); - } - if( im_cp_desc( out, in ) ) - return( -1 ); - - /* Prepare output. Consider a 7x7 mask and a 7x7 image --- the output - * would be 1x1. - */ - if( im_cp_desc( out, in ) ) - return( -1 ); - out->Xsize -= m->xsize - 1; - out->Ysize -= m->ysize - 1; - if( out->Xsize <= 0 || out->Ysize <= 0 ) { - im_error( "im_erode", "%s", _( "image too small for mask" ) ); - return( -1 ); - } - - /* Take a copy of m. - */ - if( !(msk = im_dup_imask( m, "conv_mask" )) ) - return( -1 ); - if( im_add_close_callback( out, - (im_callback_fn) im_free_imask, msk, NULL ) ) { - im_free_imask( msk ); - return( -1 ); - } - - /* Set demand hints. FATSTRIP is good for us, as THINSTRIP will cause - * too many recalculations on overlaps. - */ - if( im_demand_hint( out, IM_FATSTRIP, in, NULL ) ) - return( -1 ); - - /* Generate! - */ - if( im_generate( out, erode_start, erode_gen, erode_stop, in, msk ) ) - return( -1 ); - - out->Xoffset = -m->xsize / 2; - out->Yoffset = -m->ysize / 2; - - return( 0 ); -} - -/* The above, with a border to make out the same size as in. - */ -int -im_erode( IMAGE *in, IMAGE *out, INTMASK *m ) -{ - IMAGE *t1 = im_open_local( out, "im_erode:1", "p" ); - - if( !t1 || - im_embed( in, t1, 1, m->xsize / 2, m->ysize / 2, - in->Xsize + m->xsize - 1, - in->Ysize + m->ysize - 1 ) || - im_erode_raw( t1, out, m ) ) - return( -1 ); - - out->Xoffset = 0; - out->Yoffset = 0; - - return( 0 ); -} diff --git a/libvips/morphology/morphology.c b/libvips/morphology/morphology.c new file mode 100644 index 00000000..64339355 --- /dev/null +++ b/libvips/morphology/morphology.c @@ -0,0 +1,823 @@ +/* morphological operators + * + * 19/9/95 JC + * - rewritten + * 6/7/99 JC + * - small tidies + * 7/4/04 + * - now uses im_embed() with edge stretching on the input, not + * the output + * - sets Xoffset / Yoffset + * 21/4/08 + * - only rebuild the buffer offsets if bpl changes + * - small cleanups + * 25/10/10 + * - start again from the Orc'd im_conv + * 29/10/10 + * - use VipsVector + * - do erode as well + */ + +/* + + This file is part of VIPS. + + VIPS is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + */ + +/* + + These files are distributed with VIPS - http://www.vips.ecs.soton.ac.uk + + */ + +/* +#define DEBUG + */ + +#ifdef HAVE_CONFIG_H +#include +#endif /*HAVE_CONFIG_H*/ +#include + +#include +#include +#include + +#include +#include + +#ifdef WITH_DMALLOC +#include +#endif /*WITH_DMALLOC*/ + +/* The two operators we implement. They are more hit-miss, really. + */ +typedef enum { + ERODE, + DILATE +} MorphOp; + +/* We can't run more than this many passes. Larger than this and we + * fall back to C. + */ +#define MAX_PASSES (10) + +/* A pass with a vector. + */ +typedef struct { + int first; /* The index of the first mask coff we use */ + int last; /* The index of the last mask coff we use */ + + /* The code we generate for this section of this mask. + */ + VipsVector *vector; +} Pass; + +/* Our parameters. + */ +typedef struct { + IMAGE *in; + IMAGE *out; + INTMASK *mask; /* Copy of mask arg */ + MorphOp op; + + /* The passes we generate for this mask. + */ + int n_pass; + Pass pass[MAX_PASSES]; +} Morph; + +static void +pass_free( Morph *morph ) +{ + int i; + + for( i = 0; i < morph->n_pass; i++ ) + IM_FREEF( vips_vector_free, morph->pass[i].vector ); + morph->n_pass = 0; +} + +static int +morph_close( Morph *morph ) +{ + IM_FREEF( im_free_imask, morph->mask ); + pass_free( morph ); + + return( 0 ); +} + +#define TEMP( N, S ) vips_vector_temporary( v, N, S ) +#define SRC( N, P, S ) vips_vector_source( v, N, P, S ) +#define CONST( N, V, S ) vips_vector_constant( v, N, V, S ) +#define ASM2( OP, A, B ) vips_vector_asm2( v, OP, A, B ) +#define ASM3( OP, A, B, C ) vips_vector_asm3( v, OP, A, B, C ) + +/* Generate code for a section of the mask. first is the index we start + * at, we set last to the index of the last one we use before we run + * out of intermediates / constants / parameters / sources or mask + * coefficients. + * + * 0 for success, -1 on error. + */ +static int +pass_compile_section( Morph *morph, int first, int *last ) +{ + INTMASK *mask = morph->mask; + const int n_mask = mask->xsize * mask->ysize; + + Pass *pass; + VipsVector *v; + char offset[256]; + char source[256]; + char zero[256]; + char one[256]; + int i; + + /* Skip any don't-care coefficients at the start of the mask region. + */ + for( ; mask->coeff[first] == 128 && first < n_mask; first++ ) + ; + if( first == n_mask ) + return( 0 ); + + /* Allocate space for another pass. + */ + if( morph->n_pass == MAX_PASSES ) + return( -1 ); + pass = &morph->pass[morph->n_pass]; + morph->n_pass += 1; + pass->first = first; + + /* Start with a single source scanline, we add more as we need them. + */ + pass->vector = v = vips_vector_new_ds( "morph", 1, 1 ); + + /* The value we fetch from the image, + * the accumulated sum. + */ + TEMP( "value", 1 ); + TEMP( "sum", 1 ); + + CONST( zero, 0, 1 ); + CONST( one, 255, 1 ); + + /* Init the sum. If this is the first pass, it's a constant. If this + * is a later pass, we have to init the sum from the result + * of the previous pass. + */ + if( morph->n_pass == 1 ) { + if( morph->op == DILATE ) + ASM2( "copyb", "sum", zero ); + else + ASM2( "copyb", "sum", one ); + } + else { + /* "r" is the result of the previous pass. + */ + vips_vector_source_name( v, "r", 1 ); + ASM2( "loadb", "sum", "r" ); + } + + for( i = first; i < n_mask; i++ ) { + int x = i % mask->xsize; + int y = i / mask->xsize; + + /* Exclude don't-care elements. + */ + if( mask->coeff[i] == 128 ) + continue; + + /* The source. s1 is the first scanline in the mask. + */ + vips_vector_source( v, source, y + 1, 1 ); + + /* The offset, only for non-first-columns though. + */ + if( x > 0 ) { + CONST( offset, morph->in->Bands * x, 1 ); + ASM3( "loadoffb", "value", source, offset ); + } + else + ASM2( "loadb", "value", source ); + + /* Join to our sum. If the mask element is zero, we have to + * add an extra negate. + */ + if( morph->op == DILATE ) { + if( !mask->coeff[i] ) + ASM3( "xorb", "value", "value", one ); + ASM3( "orb", "sum", "sum", "value" ); + } + else { + if( !mask->coeff[i] ) + ASM3( "andnb", "sum", "sum", "value" ); + else + ASM3( "andb", "sum", "sum", "value" ); + } + + if( vips_vector_full( v ) ) + break; + } + + pass->last = i; + *last = i; + + ASM2( "copyb", "d1", "sum" ); + + if( !vips_vector_compile( v ) ) + return( -1 ); + +#ifdef DEBUG + printf( "done matrix coeffs %d to %d\n", pass->first, pass->last ); + vips_vector_print( v ); +#endif /*DEBUG*/ + + return( 0 ); +} + +/* Generate a set of passes. + */ +static int +pass_compile( Morph *morph ) +{ + INTMASK *mask = morph->mask; + const int n_mask = mask->xsize * mask->ysize; + + int i; + +#ifdef DEBUG + printf( "morph: generating vector code\n" ); +#endif /*DEBUG*/ + + /* Generate passes until we've used up the whole mask. + */ + for( i = 0;;) { + int last; + + if( pass_compile_section( morph, i, &last ) ) + return( -1 ); + i = last + 1; + + if( i >= n_mask ) + break; + } + + return( 0 ); +} + +static Morph * +morph_new( IMAGE *in, IMAGE *out, INTMASK *mask, MorphOp op ) +{ + const int n_mask = mask->xsize * mask->ysize; + + Morph *morph; + int i; + + if( im_piocheck( in, out ) || + im_check_uncoded( "morph", in ) || + im_check_format( "morph", in, IM_BANDFMT_UCHAR ) || + im_check_imask( "morph", mask ) ) + return( NULL ); + for( i = 0; i < n_mask; i++ ) + if( mask->coeff[i] != 0 && + mask->coeff[i] != 128 && + mask->coeff[i] != 255 ) { + im_error( "morph", + _( "bad mask element (%d " + "should be 0, 128 or 255)" ), + mask->coeff[i] ); + return( NULL ); + } + + if( !(morph = IM_NEW( out, Morph )) ) + return( NULL ); + + morph->in = in; + morph->out = out; + morph->mask = NULL; + morph->op = op; + + morph->n_pass = 0; + for( i = 0; i < MAX_PASSES; i++ ) + morph->pass[i].vector = NULL; + + if( im_add_close_callback( out, + (im_callback_fn) morph_close, morph, NULL ) || + !(morph->mask = im_dup_imask( mask, "morph" )) ) + return( NULL ); + + /* Generate code for this mask / image, if possible. + */ + if( vips_vector_get_enabled() ) { + if( pass_compile( morph ) ) + pass_free( morph ); + } + + return( morph ); +} + +/* Our sequence value. + */ +typedef struct { + Morph *morph; + REGION *ir; /* Input region */ + + int *soff; /* Offsets we check for set */ + int ss; /* ... and number we check for set */ + int *coff; /* Offsets we check for clear */ + int cs; /* ... and number we check for clear */ + + int last_bpl; /* Avoid recalcing offsets, if we can */ + + /* In vector mode we need a pair of intermediate buffers to keep the + * results of each pass in. + */ + void *t1; + void *t2; +} MorphSequence; + +/* Free a sequence value. + */ +static int +morph_stop( void *vseq, void *a, void *b ) +{ + MorphSequence *seq = (MorphSequence *) vseq; + + IM_FREEF( im_region_free, seq->ir ); + IM_FREE( seq->t1 ); + IM_FREE( seq->t2 ); + + return( 0 ); +} + +/* Morph start function. + */ +static void * +morph_start( IMAGE *out, void *a, void *b ) +{ + IMAGE *in = (IMAGE *) a; + Morph *morph = (Morph *) b; + int n_mask = morph->mask->xsize * morph->mask->ysize; + int sz = IM_IMAGE_N_ELEMENTS( in ); + + MorphSequence *seq; + + if( !(seq = IM_NEW( out, MorphSequence )) ) + return( NULL ); + + /* Init! + */ + seq->morph = morph; + seq->ir = NULL; + seq->soff = NULL; + seq->ss = 0; + seq->coff = NULL; + seq->cs = 0; + seq->last_bpl = -1; + seq->t1 = NULL; + seq->t2 = NULL; + + /* Attach region and arrays. + */ + seq->ir = im_region_create( in ); + seq->soff = IM_ARRAY( out, n_mask, int ); + seq->coff = IM_ARRAY( out, n_mask, int ); + seq->t1 = IM_ARRAY( NULL, sz, PEL ); + seq->t2 = IM_ARRAY( NULL, sz, PEL ); + if( !seq->ir || !seq->soff || !seq->coff || !seq->t1 || !seq->t2 ) { + morph_stop( seq, in, NULL ); + return( NULL ); + } + + return( seq ); +} + +/* Dilate! + */ +static int +dilate_gen( REGION *or, void *vseq, void *a, void *b ) +{ + MorphSequence *seq = (MorphSequence *) vseq; + Morph *morph = (Morph *) b; + INTMASK *mask = morph->mask; + REGION *ir = seq->ir; + + int *soff = seq->soff; + int *coff = seq->coff; + + Rect *r = &or->valid; + Rect s; + int le = r->left; + int to = r->top; + int bo = IM_RECT_BOTTOM( r ); + int sz = IM_REGION_N_ELEMENTS( or ); + + int *t; + + int x, y; + int result, i; + + /* Prepare the section of the input image we need. A little larger + * than the section of the output image we are producing. + */ + s = *r; + s.width += mask->xsize - 1; + s.height += mask->ysize - 1; + if( im_prepare( ir, &s ) ) + return( -1 ); + + /* Scan mask, building offsets we check when processing. Only do this + * if the bpl has changed since the previous im_prepare(). + */ + if( seq->last_bpl != IM_REGION_LSKIP( ir ) ) { + seq->last_bpl = IM_REGION_LSKIP( ir ); + + seq->ss = 0; + seq->cs = 0; + for( t = mask->coeff, y = 0; y < mask->ysize; y++ ) + for( x = 0; x < mask->xsize; x++, t++ ) + switch( *t ) { + case 255: + soff[seq->ss++] = + IM_REGION_ADDR( ir, + x + le, y + to ) - + IM_REGION_ADDR( ir, le, to ); + break; + + case 128: + break; + + case 0: + coff[seq->cs++] = + IM_REGION_ADDR( ir, + x + le, y + to ) - + IM_REGION_ADDR( ir, le, to ); + break; + + default: + g_assert( 0 ); + } + } + + /* Dilate! + */ + for( y = to; y < bo; y++ ) { + PEL *p = (PEL *) IM_REGION_ADDR( ir, le, y ); + PEL *q = (PEL *) IM_REGION_ADDR( or, le, y ); + + /* Loop along line. + */ + for( x = 0; x < sz; x++, q++, p++ ) { + /* Search for a hit on the set list. + */ + result = 0; + for( i = 0; i < seq->ss; i++ ) + if( p[soff[i]] ) { + /* Found a match! + */ + result = 255; + break; + } + + /* No set pixels ... search for a hit in the clear + * pixels. + */ + if( !result ) + for( i = 0; i < seq->cs; i++ ) + if( !p[coff[i]] ) { + /* Found a match! + */ + result = 255; + break; + } + + *q = result; + + } + } + + return( 0 ); +} + +/* Erode! + */ +static int +erode_gen( REGION *or, void *vseq, void *a, void *b ) +{ + MorphSequence *seq = (MorphSequence *) vseq; + INTMASK *msk = (INTMASK *) b; + REGION *ir = seq->ir; + + int *soff = seq->soff; + int *coff = seq->coff; + + Rect *r = &or->valid; + Rect s; + int le = r->left; + int to = r->top; + int bo = IM_RECT_BOTTOM(r); + int sz = IM_REGION_N_ELEMENTS( or ); + + int *t; + + int x, y; + int result, i; + + /* Prepare the section of the input image we need. A little larger + * than the section of the output image we are producing. + */ + s = *r; + s.width += msk->xsize - 1; + s.height += msk->ysize - 1; + if( im_prepare( ir, &s ) ) + return( -1 ); + +#ifdef DEBUG + printf( "erode_gen: preparing %dx%d pixels\n", s.width, s.height ); +#endif /*DEBUG*/ + + /* Scan mask, building offsets we check when processing. Only do this + * if the bpl has changed since the previous im_prepare(). + */ + if( seq->last_bpl != IM_REGION_LSKIP( ir ) ) { + seq->last_bpl = IM_REGION_LSKIP( ir ); + + seq->ss = 0; + seq->cs = 0; + for( t = msk->coeff, y = 0; y < msk->ysize; y++ ) + for( x = 0; x < msk->xsize; x++, t++ ) + switch( *t ) { + case 255: + soff[seq->ss++] = + IM_REGION_ADDR( ir, + x + le, y + to ) - + IM_REGION_ADDR( ir, le, to ); + break; + + case 128: + break; + + case 0: + coff[seq->cs++] = + IM_REGION_ADDR( ir, + x + le, y + to ) - + IM_REGION_ADDR( ir, le, to ); + break; + + default: + g_assert( 0 ); + } + } + + /* Erode! + */ + for( y = to; y < bo; y++ ) { + PEL *p = (PEL *) IM_REGION_ADDR( ir, le, y ); + PEL *q = (PEL *) IM_REGION_ADDR( or, le, y ); + + /* Loop along line. + */ + for( x = 0; x < sz; x++, q++, p++ ) { + /* Check all set pixels are set. + */ + result = 255; + for( i = 0; i < seq->ss; i++ ) + if( !p[soff[i]] ) { + /* Found a mismatch! + */ + result = 0; + break; + } + + /* Check all clear pixels are clear. + */ + if( result ) + for( i = 0; i < seq->cs; i++ ) + if( p[coff[i]] ) { + result = 0; + break; + } + + *q = result; + } + } + + return( 0 ); +} + +static void +pass_run( Morph *morph, Pass *pass, VipsExecutor *executor, + REGION *ir, void *t1, void *t2, int x, int y ) +{ + INTMASK *mask = morph->mask; + int top = pass->first / mask->xsize; + int bottom = pass->last / mask->xsize; + + PEL *p = (PEL *) IM_REGION_ADDR( ir, x, y ); + int lsk = IM_REGION_LSKIP( ir ); + + int i; + + /* Generate all the scanline pointers this prog needs. + */ + for( i = top; i <= bottom; i++ ) + vips_executor_set_source( executor, i + 1, p + i * lsk ); + + /* It might need the result from a previous pass. + */ + vips_executor_set_array( executor, "r", t1 ); + + vips_executor_set_array( executor, "d1", t2 ); + + vips_executor_run( executor ); +} + +/* The vector codepath. + */ +static int +morph_vector_gen( REGION *or, void *vseq, void *a, void *b ) +{ + MorphSequence *seq = (MorphSequence *) vseq; + Morph *morph = (Morph *) b; + INTMASK *mask = morph->mask; + REGION *ir = seq->ir; + Rect *r = &or->valid; + int sz = IM_REGION_N_ELEMENTS( or ); + + Rect s; + int y, j; + VipsExecutor executor[MAX_PASSES]; + + /* Prepare the section of the input image we need. A little larger + * than the section of the output image we are producing. + */ + s = *r; + s.width += mask->xsize - 1; + s.height += mask->ysize - 1; + if( im_prepare( ir, &s ) ) + return( -1 ); + + for( j = 0; j < morph->n_pass; j++ ) + vips_executor_set_program( &executor[j], + morph->pass[j].vector, sz ); + + for( y = 0; y < r->height; y++ ) { + for( j = 0; j < morph->n_pass; j++ ) { + void *d; + + /* The last pass goes to the output image, + * intermediate passes go to t2. + */ + if( j == morph->n_pass - 1 ) + d = IM_REGION_ADDR( or, r->left, r->top + y ); + else + d = seq->t2; + + pass_run( morph, &morph->pass[j], &executor[j], + ir, seq->t1, d, r->left, r->top + y ); + + IM_SWAP( void *, seq->t1, seq->t2 ); + } + } + + return( 0 ); +} + +/* Morph an image. + */ +static int +morphology( IMAGE *in, IMAGE *out, INTMASK *mask, MorphOp op ) +{ + Morph *morph; + im_generate_fn generate; + + /* Check parameters. + */ + if( !(morph = morph_new( in, out, mask, op )) ) + return( -1 ); + + /* Prepare output. Consider a 7x7 mask and a 7x7 image --- the output + * would be 1x1. + */ + if( im_cp_desc( out, in ) ) + return( -1 ); + out->Xsize -= mask->xsize - 1; + out->Ysize -= mask->ysize - 1; + if( out->Xsize <= 0 || out->Ysize <= 0 ) { + im_error( "morph", "%s", _( "image too small for mask" ) ); + return( -1 ); + } + + if( morph->n_pass ) { + generate = morph_vector_gen; + +#ifdef DEBUG + printf( "morph_vector_gen: %d passes\n", morph->n_pass ); +#endif /*DEBUG*/ + } + else if( op == DILATE ) + generate = dilate_gen; + else + generate = erode_gen; + + /* Set demand hints. FATSTRIP is good for us, as THINSTRIP will cause + * too many recalculations on overlaps. + */ + if( im_demand_hint( out, IM_FATSTRIP, in, NULL ) || + im_generate( out, + morph_start, generate, morph_stop, in, morph ) ) + return( -1 ); + + out->Xoffset = -mask->xsize / 2; + out->Yoffset = -mask->ysize / 2; + + return( 0 ); +} + +int +im_dilate_raw( IMAGE *in, IMAGE *out, INTMASK *mask ) +{ + return( morphology( in, out, mask, DILATE ) ); +} + +int +im_erode_raw( IMAGE *in, IMAGE *out, INTMASK *mask ) +{ + return( morphology( in, out, mask, ERODE ) ); +} + +/** + * im_dilate: + * @in: input image + * @out: output image + * @mask: mask + * + * Operations are performed using the processor's vector unit, + * if possible. Disable this with --vips-novector or IM_NOVECTOR. + * + * See also: + * + * Returns: 0 on success, -1 on error + */ +int +im_dilate( IMAGE *in, IMAGE *out, INTMASK *mask ) +{ + IMAGE *t1 = im_open_local( out, "im_dilate:1", "p" ); + + if( !t1 || + im_embed( in, t1, 1, mask->xsize / 2, mask->ysize / 2, + in->Xsize + mask->xsize - 1, + in->Ysize + mask->ysize - 1 ) || + morphology( in, out, mask, DILATE ) ) + return( -1 ); + + out->Xoffset = 0; + out->Yoffset = 0; + + return( 0 ); +} + +/** + * im_erode: + * @in: input image + * @out: output image + * @mask: mask + * + * Operations are performed using the processor's vector unit, + * if possible. Disable this with --vips-novector or IM_NOVECTOR. + * + * See also: + * + * Returns: 0 on success, -1 on error + */ +int +im_erode( IMAGE *in, IMAGE *out, INTMASK *mask ) +{ + IMAGE *t1 = im_open_local( out, "im_erode:1", "p" ); + + if( !t1 || + im_embed( in, t1, 1, mask->xsize / 2, mask->ysize / 2, + in->Xsize + mask->xsize - 1, + in->Ysize + mask->ysize - 1 ) || + morphology( in, out, mask, ERODE ) ) + return( -1 ); + + out->Xoffset = 0; + out->Yoffset = 0; + + return( 0 ); +} diff --git a/libvips/mosaicing/im_improve.c b/libvips/mosaicing/im_improve.c index 3b1ca9a7..a9907e80 100644 --- a/libvips/mosaicing/im_improve.c +++ b/libvips/mosaicing/im_improve.c @@ -156,8 +156,6 @@ copydevpoints( TIE_POINTS *pnew, TIE_POINTS *pold ) return( 0 ); } -#define SWAP( A, B ) { void *t = (A); A = B; B = t; } - int im__improve( TIE_POINTS *inpoints, TIE_POINTS *outpoints ) { @@ -182,7 +180,7 @@ im__improve( TIE_POINTS *inpoints, TIE_POINTS *outpoints ) /* And loop. */ - SWAP( p, q ); + IM_SWAP( void *, p, q ); } /* q has the output - copy to outpoints.