multipass mode for im_conv()
This commit is contained in:
parent
593af5a8d8
commit
425795a8c8
@ -42,6 +42,7 @@
|
|||||||
- land the vector branmch ... we have SSE erode/dilate/add/conv
|
- land the vector branmch ... we have SSE erode/dilate/add/conv
|
||||||
- add IM_SWAP
|
- add IM_SWAP
|
||||||
- dilate/erode do (!=0) on non-uchar images
|
- dilate/erode do (!=0) on non-uchar images
|
||||||
|
- add multipass Orc to im_conv(), 3.5x faster for 5x5 mask
|
||||||
|
|
||||||
12/5/10 started 7.22.2
|
12/5/10 started 7.22.2
|
||||||
- the conditional image of ifthenelse can be any format, a (!=0) is added if
|
- the conditional image of ifthenelse can be any format, a (!=0) is added if
|
||||||
|
4
TODO
4
TODO
@ -1,6 +1,6 @@
|
|||||||
- im_conv() might now give a speedup on larger masks and with 16 bits?
|
- try a 32-bit intermediate for im_conv()
|
||||||
|
|
||||||
- im_profile() next
|
- gtkdoc for im_profile() next
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -61,6 +61,8 @@
|
|||||||
* - use VipsVector
|
* - use VipsVector
|
||||||
* - get rid of im_convsep(), just call this twice, no longer worth
|
* - get rid of im_convsep(), just call this twice, no longer worth
|
||||||
* keeping two versions
|
* keeping two versions
|
||||||
|
* 8/11/10
|
||||||
|
* - add array tiling
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -103,26 +105,11 @@
|
|||||||
|
|
||||||
- will this change make much difference to the vips benchmark?
|
- will this change make much difference to the vips benchmark?
|
||||||
|
|
||||||
- would setting params by index rather than name be any quicker?
|
|
||||||
|
|
||||||
- fix up a signed 8-bit code path?
|
|
||||||
|
|
||||||
- try a path with a 32-bit sum for larger matrices / scale / offset,
|
- try a path with a 32-bit sum for larger matrices / scale / offset,
|
||||||
much slower?
|
|
||||||
|
|
||||||
- try a 16-bit path, though the speedup might not be worthwhile
|
- make up a signed 8-bit code path?
|
||||||
|
|
||||||
- with a 5x5 matrix:
|
- try a 16-bit path
|
||||||
|
|
||||||
5 5 62 0
|
|
||||||
0 1 1 1 0
|
|
||||||
1 4 6 4 1
|
|
||||||
1 6 10 6 1
|
|
||||||
1 4 6 4 1
|
|
||||||
0 1 1 1 0
|
|
||||||
|
|
||||||
Orc is no faster than C, argh, multipass is not worthwhile for
|
|
||||||
large matrices
|
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -142,6 +129,22 @@
|
|||||||
#include <dmalloc.h>
|
#include <dmalloc.h>
|
||||||
#endif /*WITH_DMALLOC*/
|
#endif /*WITH_DMALLOC*/
|
||||||
|
|
||||||
|
/* We can't run more than this many passes. Larger than this and we
|
||||||
|
* fall back to C.
|
||||||
|
*/
|
||||||
|
#define MAX_PASS (10)
|
||||||
|
|
||||||
|
/* A pass with a vector.
|
||||||
|
*/
|
||||||
|
typedef struct {
|
||||||
|
int first; /* The index of the first mask coff we use */
|
||||||
|
int last; /* The index of the last mask coff we use */
|
||||||
|
|
||||||
|
/* The code we generate for this section of this mask.
|
||||||
|
*/
|
||||||
|
VipsVector *vector;
|
||||||
|
} Pass;
|
||||||
|
|
||||||
/* Our parameters ... we take a copy of the mask argument, plus we make a
|
/* Our parameters ... we take a copy of the mask argument, plus we make a
|
||||||
* smaller version with the zeros squeezed out.
|
* smaller version with the zeros squeezed out.
|
||||||
*/
|
*/
|
||||||
@ -160,14 +163,20 @@ typedef struct {
|
|||||||
/* The convolver we generate for this mask. We have to split the
|
/* The convolver we generate for this mask. We have to split the
|
||||||
* convolve and clip into two phases.
|
* convolve and clip into two phases.
|
||||||
*/
|
*/
|
||||||
VipsVector *convolve;
|
int n_pass;
|
||||||
|
Pass pass[MAX_PASS];
|
||||||
VipsVector *clip;
|
VipsVector *clip;
|
||||||
} Conv;
|
} Conv;
|
||||||
|
|
||||||
static void
|
static void
|
||||||
conv_vector_free( Conv *conv )
|
conv_vector_free( Conv *conv )
|
||||||
{
|
{
|
||||||
IM_FREEF( vips_vector_free, conv->convolve );
|
int i;
|
||||||
|
|
||||||
|
for( i = 0; i < conv->n_pass; i++ )
|
||||||
|
IM_FREEF( vips_vector_free, conv->pass[i].vector );
|
||||||
|
conv->n_pass = 0;
|
||||||
|
|
||||||
IM_FREEF( vips_vector_free, conv->clip );
|
IM_FREEF( vips_vector_free, conv->clip );
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -210,17 +219,16 @@ conv_evalend( Conv *conv )
|
|||||||
#define ASM2( OP, A, B ) vips_vector_asm2( v, OP, A, B )
|
#define ASM2( OP, A, B ) vips_vector_asm2( v, OP, A, B )
|
||||||
#define ASM3( OP, A, B, C ) vips_vector_asm3( v, OP, A, B, C )
|
#define ASM3( OP, A, B, C ) vips_vector_asm3( v, OP, A, B, C )
|
||||||
|
|
||||||
/* Generate code for a 3x3 mask. Just do multiply-add, a second pass does the
|
/* Generate code for a section of the mask.
|
||||||
* round and clip.
|
|
||||||
*
|
*
|
||||||
* 0 for success, -1 on error.
|
* 0 for success, -1 on error.
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
conv_compile_convolution_u8s16( Conv *conv )
|
conv_compile_convolution_u8s16_section( Pass *pass, Conv *conv )
|
||||||
{
|
{
|
||||||
INTMASK *mask = conv->mask;
|
INTMASK *mask = conv->mask;
|
||||||
|
const int n_mask = mask->xsize * mask->ysize;
|
||||||
|
|
||||||
double min, max;
|
|
||||||
int i;
|
int i;
|
||||||
VipsVector *v;
|
VipsVector *v;
|
||||||
char zero[256];
|
char zero[256];
|
||||||
@ -228,34 +236,7 @@ conv_compile_convolution_u8s16( Conv *conv )
|
|||||||
char source[256];
|
char source[256];
|
||||||
char coeff[256];
|
char coeff[256];
|
||||||
|
|
||||||
if( conv->in->BandFmt != IM_BANDFMT_UCHAR )
|
pass->vector = v = vips_vector_new( "conv", 2 );
|
||||||
return( -1 );
|
|
||||||
|
|
||||||
/* Don't test mask size, it's very hard to predict when we will
|
|
||||||
* exhaust the program space.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Can the accumulator overflow or underflow at any stage? Since
|
|
||||||
* matrix elements are signed, we need to calculate a running
|
|
||||||
* possible min and max.
|
|
||||||
*/
|
|
||||||
min = 0;
|
|
||||||
max = 0;
|
|
||||||
for( i = 0; i < mask->xsize * mask->ysize; i++ ) {
|
|
||||||
int v = 255 * mask->coeff[i];
|
|
||||||
|
|
||||||
if( min + v < min )
|
|
||||||
min += v;
|
|
||||||
else if( min + v > max )
|
|
||||||
max += v;
|
|
||||||
|
|
||||||
if( max > SHRT_MAX )
|
|
||||||
return( -1 );
|
|
||||||
if( min < SHRT_MIN )
|
|
||||||
return( -1 );
|
|
||||||
}
|
|
||||||
|
|
||||||
conv->convolve = v = vips_vector_new( "conv", 2 );
|
|
||||||
|
|
||||||
/* The value we fetch from the image, the product with the matrix
|
/* The value we fetch from the image, the product with the matrix
|
||||||
* value, the accumulated sum.
|
* value, the accumulated sum.
|
||||||
@ -267,7 +248,7 @@ conv_compile_convolution_u8s16( Conv *conv )
|
|||||||
CONST( zero, 0, 2 );
|
CONST( zero, 0, 2 );
|
||||||
ASM2( "copyw", "sum", zero );
|
ASM2( "copyw", "sum", zero );
|
||||||
|
|
||||||
for( i = 0; i < mask->xsize * mask->ysize; i++ ) {
|
for( i = pass->first; i < n_mask; i++ ) {
|
||||||
int x = i % mask->xsize;
|
int x = i % mask->xsize;
|
||||||
int y = i / mask->xsize;
|
int y = i / mask->xsize;
|
||||||
|
|
||||||
@ -315,12 +296,12 @@ conv_compile_convolution_u8s16( Conv *conv )
|
|||||||
|
|
||||||
ASM3( "addssw", "sum", "sum", "product" );
|
ASM3( "addssw", "sum", "sum", "product" );
|
||||||
|
|
||||||
/* If we run out of space, fall back to C.
|
|
||||||
*/
|
|
||||||
if( vips_vector_full( v ) )
|
if( vips_vector_full( v ) )
|
||||||
return( -1 );
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pass->last = i;
|
||||||
|
|
||||||
ASM2( "copyw", "d1", "sum" );
|
ASM2( "copyw", "d1", "sum" );
|
||||||
|
|
||||||
if( !vips_vector_compile( v ) )
|
if( !vips_vector_compile( v ) )
|
||||||
@ -333,7 +314,83 @@ conv_compile_convolution_u8s16( Conv *conv )
|
|||||||
return( 0 );
|
return( 0 );
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Generate the program that does (sum + rounding) / scale + offset
|
/* Generate the convolution pass for u8 data with an s16 accumulator.
|
||||||
|
*
|
||||||
|
* 0 for success, -1 on error.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
conv_compile_convolution_u8s16( Conv *conv )
|
||||||
|
{
|
||||||
|
INTMASK *mask = conv->mask;
|
||||||
|
const int n_mask = mask->xsize * mask->ysize;
|
||||||
|
|
||||||
|
double min, max;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
if( conv->in->BandFmt != IM_BANDFMT_UCHAR )
|
||||||
|
return( -1 );
|
||||||
|
|
||||||
|
/* Can the accumulator overflow or underflow at any stage? Since
|
||||||
|
* matrix elements are signed, we need to calculate a running
|
||||||
|
* possible min and max.
|
||||||
|
*/
|
||||||
|
min = 0;
|
||||||
|
max = 0;
|
||||||
|
for( i = 0; i < n_mask; i++ ) {
|
||||||
|
int v = 255 * mask->coeff[i];
|
||||||
|
|
||||||
|
if( min + v < min )
|
||||||
|
min += v;
|
||||||
|
else if( min + v > max )
|
||||||
|
max += v;
|
||||||
|
|
||||||
|
if( max > SHRT_MAX )
|
||||||
|
return( -1 );
|
||||||
|
if( min < SHRT_MIN )
|
||||||
|
return( -1 );
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Generate passes until we've used up the whole mask.
|
||||||
|
*/
|
||||||
|
for( i = 0;;) {
|
||||||
|
Pass *pass;
|
||||||
|
|
||||||
|
/* Skip any zero coefficients at the start of the mask
|
||||||
|
* region.
|
||||||
|
*/
|
||||||
|
for( ; i < n_mask && !mask->coeff[i]; i++ )
|
||||||
|
;
|
||||||
|
if( i == n_mask )
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Allocate space for another pass.
|
||||||
|
*/
|
||||||
|
if( conv->n_pass == MAX_PASS )
|
||||||
|
return( -1 );
|
||||||
|
pass = &conv->pass[conv->n_pass];
|
||||||
|
conv->n_pass += 1;
|
||||||
|
|
||||||
|
pass->first = i;
|
||||||
|
pass->last = i;
|
||||||
|
|
||||||
|
if( conv_compile_convolution_u8s16_section( pass, conv ) )
|
||||||
|
return( -1 );
|
||||||
|
i = pass->last + 1;
|
||||||
|
|
||||||
|
#ifdef DEBUG
|
||||||
|
printf( "conv_compile_convolution_u8s16: "
|
||||||
|
"first = %d, last = %d\n",
|
||||||
|
pass->first, pass->last );
|
||||||
|
#endif /*DEBUG*/
|
||||||
|
|
||||||
|
if( i >= n_mask )
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return( 0 );
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Generate the program that does (sum(passes) + rounding) / scale + offset
|
||||||
* from a s16 intermediate back to a u8 output.
|
* from a s16 intermediate back to a u8 output.
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
@ -341,6 +398,7 @@ conv_compile_scale_s16u8( Conv *conv )
|
|||||||
{
|
{
|
||||||
INTMASK *mask = conv->mask;
|
INTMASK *mask = conv->mask;
|
||||||
|
|
||||||
|
int i;
|
||||||
VipsVector *v;
|
VipsVector *v;
|
||||||
char scale[256];
|
char scale[256];
|
||||||
char offset[256];
|
char offset[256];
|
||||||
@ -355,7 +413,12 @@ conv_compile_scale_s16u8( Conv *conv )
|
|||||||
return( -1 );
|
return( -1 );
|
||||||
|
|
||||||
conv->clip = v = vips_vector_new( "clip", 1 );
|
conv->clip = v = vips_vector_new( "clip", 1 );
|
||||||
vips_vector_source_name( v, "s1", 2 );
|
for( i = 0; i < conv->n_pass; i++ ) {
|
||||||
|
char source[10];
|
||||||
|
|
||||||
|
im_snprintf( source, 10, "s%d", i );
|
||||||
|
vips_vector_source_name( v, source, 2 );
|
||||||
|
}
|
||||||
|
|
||||||
TEMP( "t1", 2 );
|
TEMP( "t1", 2 );
|
||||||
TEMP( "t2", 2 );
|
TEMP( "t2", 2 );
|
||||||
@ -371,9 +434,19 @@ conv_compile_scale_s16u8( Conv *conv )
|
|||||||
CONST( offset, mask->offset * mask->scale + mask->scale / 2, 2 );
|
CONST( offset, mask->offset * mask->scale + mask->scale / 2, 2 );
|
||||||
CONST( zero, 0, 2 );
|
CONST( zero, 0, 2 );
|
||||||
|
|
||||||
|
/* Sum the passes into t1.
|
||||||
|
*/
|
||||||
|
ASM2( "loadw", "t1", "s0" );
|
||||||
|
for( i = 1; i < conv->n_pass; i++ ) {
|
||||||
|
char source[10];
|
||||||
|
|
||||||
|
im_snprintf( source, 10, "s%d", i );
|
||||||
|
ASM3( "addssw", "t1", "t1", source );
|
||||||
|
}
|
||||||
|
|
||||||
/* Offset and scale.
|
/* Offset and scale.
|
||||||
*/
|
*/
|
||||||
ASM3( "addssw", "t1", "s1", offset );
|
ASM3( "addssw", "t1", "t1", offset );
|
||||||
|
|
||||||
/* We need to convert the signed result of the
|
/* We need to convert the signed result of the
|
||||||
* offset to unsigned for the div, ie. we want to set anything <0 to 0.
|
* offset to unsigned for the div, ie. we want to set anything <0 to 0.
|
||||||
@ -384,7 +457,8 @@ conv_compile_scale_s16u8( Conv *conv )
|
|||||||
ASM3( "divluw", "t1", "t1", scale );
|
ASM3( "divluw", "t1", "t1", scale );
|
||||||
ASM2( "convuuswb", "d1", "t1" );
|
ASM2( "convuuswb", "d1", "t1" );
|
||||||
|
|
||||||
if( !vips_vector_compile( v ) )
|
if( vips_vector_full( v ) ||
|
||||||
|
!vips_vector_compile( v ) )
|
||||||
return( -1 );
|
return( -1 );
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
@ -398,7 +472,7 @@ static Conv *
|
|||||||
conv_new( IMAGE *in, IMAGE *out, INTMASK *mask )
|
conv_new( IMAGE *in, IMAGE *out, INTMASK *mask )
|
||||||
{
|
{
|
||||||
Conv *conv = IM_NEW( out, Conv );
|
Conv *conv = IM_NEW( out, Conv );
|
||||||
const int ne = mask->xsize * mask->ysize;
|
const int n_mask = mask->xsize * mask->ysize;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
if( !conv )
|
if( !conv )
|
||||||
@ -413,7 +487,7 @@ conv_new( IMAGE *in, IMAGE *out, INTMASK *mask )
|
|||||||
conv->underflow = 0;
|
conv->underflow = 0;
|
||||||
conv->overflow = 0;
|
conv->overflow = 0;
|
||||||
|
|
||||||
conv->convolve = NULL;
|
conv->n_pass = 0;
|
||||||
conv->clip = NULL;
|
conv->clip = NULL;
|
||||||
|
|
||||||
if( im_add_close_callback( out,
|
if( im_add_close_callback( out,
|
||||||
@ -422,14 +496,14 @@ conv_new( IMAGE *in, IMAGE *out, INTMASK *mask )
|
|||||||
(im_callback_fn) conv_evalstart, conv, NULL ) ||
|
(im_callback_fn) conv_evalstart, conv, NULL ) ||
|
||||||
im_add_close_callback( out,
|
im_add_close_callback( out,
|
||||||
(im_callback_fn) conv_evalend, conv, NULL ) ||
|
(im_callback_fn) conv_evalend, conv, NULL ) ||
|
||||||
!(conv->coeff = IM_ARRAY( out, ne, int )) ||
|
!(conv->coeff = IM_ARRAY( out, n_mask, int )) ||
|
||||||
!(conv->coeff_pos = IM_ARRAY( out, ne, int )) ||
|
!(conv->coeff_pos = IM_ARRAY( out, n_mask, int )) ||
|
||||||
!(conv->mask = im_dup_imask( mask, "conv_mask" )) )
|
!(conv->mask = im_dup_imask( mask, "conv_mask" )) )
|
||||||
return( NULL );
|
return( NULL );
|
||||||
|
|
||||||
/* Find non-zero mask elements.
|
/* Find non-zero mask elements.
|
||||||
*/
|
*/
|
||||||
for( i = 0; i < ne; i++ )
|
for( i = 0; i < n_mask; i++ )
|
||||||
if( mask->coeff[i] ) {
|
if( mask->coeff[i] ) {
|
||||||
conv->coeff[conv->nnz] = mask->coeff[i];
|
conv->coeff[conv->nnz] = mask->coeff[i];
|
||||||
conv->coeff_pos[conv->nnz] = i;
|
conv->coeff_pos[conv->nnz] = i;
|
||||||
@ -470,10 +544,10 @@ typedef struct {
|
|||||||
|
|
||||||
int last_bpl; /* Avoid recalcing offsets, if we can */
|
int last_bpl; /* Avoid recalcing offsets, if we can */
|
||||||
|
|
||||||
/* We need an intermediate buffer to keep the result of the conv in
|
/* We need a set of intermediate buffers to keep the result of the
|
||||||
* before we clip it.
|
* conv in before we clip it.
|
||||||
*/
|
*/
|
||||||
void *sum;
|
void **sum;
|
||||||
} ConvSequence;
|
} ConvSequence;
|
||||||
|
|
||||||
/* Free a sequence value.
|
/* Free a sequence value.
|
||||||
@ -484,6 +558,8 @@ conv_stop( void *vseq, void *a, void *b )
|
|||||||
ConvSequence *seq = (ConvSequence *) vseq;
|
ConvSequence *seq = (ConvSequence *) vseq;
|
||||||
Conv *conv = (Conv *) b;
|
Conv *conv = (Conv *) b;
|
||||||
|
|
||||||
|
int i;
|
||||||
|
|
||||||
/* Add local under/over counts to global counts.
|
/* Add local under/over counts to global counts.
|
||||||
*/
|
*/
|
||||||
conv->overflow += seq->overflow;
|
conv->overflow += seq->overflow;
|
||||||
@ -491,6 +567,8 @@ conv_stop( void *vseq, void *a, void *b )
|
|||||||
|
|
||||||
IM_FREEF( im_region_free, seq->ir );
|
IM_FREEF( im_region_free, seq->ir );
|
||||||
|
|
||||||
|
for( i = 0; i < conv->n_pass; i++ )
|
||||||
|
IM_FREE( seq->sum[i] );
|
||||||
IM_FREE( seq->sum );
|
IM_FREE( seq->sum );
|
||||||
|
|
||||||
return( 0 );
|
return( 0 );
|
||||||
@ -503,7 +581,9 @@ conv_start( IMAGE *out, void *a, void *b )
|
|||||||
{
|
{
|
||||||
IMAGE *in = (IMAGE *) a;
|
IMAGE *in = (IMAGE *) a;
|
||||||
Conv *conv = (Conv *) b;
|
Conv *conv = (Conv *) b;
|
||||||
|
|
||||||
ConvSequence *seq;
|
ConvSequence *seq;
|
||||||
|
int i;
|
||||||
|
|
||||||
if( !(seq = IM_NEW( out, ConvSequence )) )
|
if( !(seq = IM_NEW( out, ConvSequence )) )
|
||||||
return( NULL );
|
return( NULL );
|
||||||
@ -523,12 +603,27 @@ conv_start( IMAGE *out, void *a, void *b )
|
|||||||
seq->ir = im_region_create( in );
|
seq->ir = im_region_create( in );
|
||||||
seq->offsets = IM_ARRAY( out, conv->nnz, int );
|
seq->offsets = IM_ARRAY( out, conv->nnz, int );
|
||||||
seq->pts = IM_ARRAY( out, conv->nnz, PEL * );
|
seq->pts = IM_ARRAY( out, conv->nnz, PEL * );
|
||||||
seq->sum = IM_ARRAY( NULL, IM_IMAGE_N_ELEMENTS( in ), short );
|
if( !seq->ir || !seq->offsets || !seq->pts ) {
|
||||||
if( !seq->ir || !seq->offsets || !seq->pts || !seq->sum ) {
|
|
||||||
conv_stop( seq, in, conv );
|
conv_stop( seq, in, conv );
|
||||||
return( NULL );
|
return( NULL );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if( vips_vector_get_enabled() && conv->n_pass ) {
|
||||||
|
if( !(seq->sum = IM_ARRAY( NULL, conv->n_pass, void * )) ) {
|
||||||
|
conv_stop( seq, in, conv );
|
||||||
|
return( NULL );
|
||||||
|
}
|
||||||
|
for( i = 0; i < conv->n_pass; i++ )
|
||||||
|
seq->sum[i] = NULL;
|
||||||
|
|
||||||
|
for( i = 0; i < conv->n_pass; i++ )
|
||||||
|
if( !(seq->sum[i] = IM_ARRAY( NULL,
|
||||||
|
IM_IMAGE_N_ELEMENTS( in ), short )) ) {
|
||||||
|
conv_stop( seq, in, conv );
|
||||||
|
return( NULL );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return( seq );
|
return( seq );
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -849,8 +944,8 @@ convvec_gen( REGION *or, void *vseq, void *a, void *b )
|
|||||||
int sz = IM_REGION_N_ELEMENTS( or ) * (im_iscomplex( in ) ? 2 : 1);
|
int sz = IM_REGION_N_ELEMENTS( or ) * (im_iscomplex( in ) ? 2 : 1);
|
||||||
|
|
||||||
Rect s;
|
Rect s;
|
||||||
int y;
|
int j, y;
|
||||||
VipsExecutor convolve;
|
VipsExecutor convolve[MAX_PASS];
|
||||||
VipsExecutor clip;
|
VipsExecutor clip;
|
||||||
|
|
||||||
/* Prepare the section of the input image we need. A little larger
|
/* Prepare the section of the input image we need. A little larger
|
||||||
@ -862,13 +957,18 @@ convvec_gen( REGION *or, void *vseq, void *a, void *b )
|
|||||||
if( im_prepare( ir, &s ) )
|
if( im_prepare( ir, &s ) )
|
||||||
return( -1 );
|
return( -1 );
|
||||||
|
|
||||||
vips_executor_set_program( &convolve, conv->convolve, sz );
|
for( j = 0; j < conv->n_pass; j++ )
|
||||||
|
vips_executor_set_program( &convolve[j],
|
||||||
|
conv->pass[j].vector, sz );
|
||||||
vips_executor_set_program( &clip, conv->clip, sz );
|
vips_executor_set_program( &clip, conv->clip, sz );
|
||||||
|
|
||||||
/* Link the combiner to the intermediate buffer.
|
/* Link the conv output to the intermediate buffer, and to the
|
||||||
|
* clipper's input.
|
||||||
*/
|
*/
|
||||||
vips_executor_set_destination( &convolve, seq->sum );
|
for( j = 0; j < conv->n_pass; j++ ) {
|
||||||
vips_executor_set_array( &clip, conv->clip->s[0], seq->sum );
|
vips_executor_set_destination( &convolve[j], seq->sum[j] );
|
||||||
|
vips_executor_set_array( &clip, conv->clip->s[j], seq->sum[j] );
|
||||||
|
}
|
||||||
|
|
||||||
for( y = 0; y < r->height; y++ ) {
|
for( y = 0; y < r->height; y++ ) {
|
||||||
#ifdef DEBUG_PIXELS
|
#ifdef DEBUG_PIXELS
|
||||||
@ -885,12 +985,17 @@ convvec_gen( REGION *or, void *vseq, void *a, void *b )
|
|||||||
}
|
}
|
||||||
#endif /*DEBUG_PIXELS*/
|
#endif /*DEBUG_PIXELS*/
|
||||||
|
|
||||||
vips_executor_set_scanline( &convolve,
|
for( j = 0; j < conv->n_pass; j++ ) {
|
||||||
|
vips_executor_set_scanline( &convolve[j],
|
||||||
ir, r->left, r->top + y );
|
ir, r->left, r->top + y );
|
||||||
vips_executor_run( &convolve );
|
vips_executor_run( &convolve[j] );
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef DEBUG_PIXELS
|
#ifdef DEBUG_PIXELS
|
||||||
printf( "before clip: %3d\n", *((signed short *) seq->sum) );
|
printf( "before clip:\n" );
|
||||||
|
for( j = 0; j < conv->n_pass; j++ )
|
||||||
|
printf( " %d) %3d\n",
|
||||||
|
j, ((signed short *) seq->sum[j])[0] );
|
||||||
#endif /*DEBUG_PIXELS*/
|
#endif /*DEBUG_PIXELS*/
|
||||||
|
|
||||||
vips_executor_set_destination( &clip,
|
vips_executor_set_destination( &clip,
|
||||||
@ -937,7 +1042,7 @@ im_conv_raw( IMAGE *in, IMAGE *out, INTMASK *mask )
|
|||||||
return( -1 );
|
return( -1 );
|
||||||
}
|
}
|
||||||
|
|
||||||
if( conv->convolve ) {
|
if( conv->n_pass ) {
|
||||||
generate = convvec_gen;
|
generate = convvec_gen;
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
@ -1059,12 +1164,12 @@ int
|
|||||||
im_convsep( IMAGE *in, IMAGE *out, INTMASK *mask )
|
im_convsep( IMAGE *in, IMAGE *out, INTMASK *mask )
|
||||||
{
|
{
|
||||||
IMAGE *t1 = im_open_local( out, "im_convsep intermediate", "p" );
|
IMAGE *t1 = im_open_local( out, "im_convsep intermediate", "p" );
|
||||||
int size = mask->xsize * mask->ysize;
|
int n_mask = mask->xsize * mask->ysize;
|
||||||
|
|
||||||
if( !t1 ||
|
if( !t1 ||
|
||||||
im_embed( in, t1, 1, size / 2, size / 2,
|
im_embed( in, t1, 1, n_mask / 2, n_mask / 2,
|
||||||
in->Xsize + size - 1,
|
in->Xsize + n_mask - 1,
|
||||||
in->Ysize + size - 1 ) ||
|
in->Ysize + n_mask - 1 ) ||
|
||||||
im_convsep_raw( t1, out, mask ) )
|
im_convsep_raw( t1, out, mask ) )
|
||||||
return( -1 );
|
return( -1 );
|
||||||
|
|
||||||
|
@ -372,4 +372,3 @@ vips_executor_run( VipsExecutor *executor )
|
|||||||
orc_executor_run( &executor->executor );
|
orc_executor_run( &executor->executor );
|
||||||
#endif /*HAVE_ORC*/
|
#endif /*HAVE_ORC*/
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -77,7 +77,7 @@ typedef enum {
|
|||||||
/* We can't run more than this many passes. Larger than this and we
|
/* We can't run more than this many passes. Larger than this and we
|
||||||
* fall back to C.
|
* fall back to C.
|
||||||
*/
|
*/
|
||||||
#define MAX_PASSES (10)
|
#define MAX_PASS (10)
|
||||||
|
|
||||||
/* A pass with a vector.
|
/* A pass with a vector.
|
||||||
*/
|
*/
|
||||||
@ -103,7 +103,7 @@ typedef struct {
|
|||||||
/* The passes we generate for this mask.
|
/* The passes we generate for this mask.
|
||||||
*/
|
*/
|
||||||
int n_pass;
|
int n_pass;
|
||||||
Pass pass[MAX_PASSES];
|
Pass pass[MAX_PASS];
|
||||||
} Morph;
|
} Morph;
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@ -262,7 +262,7 @@ pass_compile( Morph *morph )
|
|||||||
|
|
||||||
/* Allocate space for another pass.
|
/* Allocate space for another pass.
|
||||||
*/
|
*/
|
||||||
if( morph->n_pass == MAX_PASSES )
|
if( morph->n_pass == MAX_PASS )
|
||||||
return( -1 );
|
return( -1 );
|
||||||
pass = &morph->pass[morph->n_pass];
|
pass = &morph->pass[morph->n_pass];
|
||||||
morph->n_pass += 1;
|
morph->n_pass += 1;
|
||||||
@ -327,7 +327,7 @@ morph_new( IMAGE *in, IMAGE *out, INTMASK *mask, MorphOp op )
|
|||||||
morph->op = op;
|
morph->op = op;
|
||||||
|
|
||||||
morph->n_pass = 0;
|
morph->n_pass = 0;
|
||||||
for( i = 0; i < MAX_PASSES; i++ )
|
for( i = 0; i < MAX_PASS; i++ )
|
||||||
morph->pass[i].vector = NULL;
|
morph->pass[i].vector = NULL;
|
||||||
|
|
||||||
if( im_add_close_callback( out,
|
if( im_add_close_callback( out,
|
||||||
@ -655,7 +655,7 @@ morph_vector_gen( REGION *or, void *vseq, void *a, void *b )
|
|||||||
|
|
||||||
Rect s;
|
Rect s;
|
||||||
int y, j;
|
int y, j;
|
||||||
VipsExecutor executor[MAX_PASSES];
|
VipsExecutor executor[MAX_PASS];
|
||||||
|
|
||||||
/* Prepare the section of the input image we need. A little larger
|
/* Prepare the section of the input image we need. A little larger
|
||||||
* than the section of the output image we are producing.
|
* than the section of the output image we are producing.
|
||||||
|
Loading…
Reference in New Issue
Block a user