more im_conv improvements
This commit is contained in:
parent
313a2dcef4
commit
b0f162478f
9
TODO
9
TODO
@ -1,11 +1,4 @@
|
|||||||
- conv should load the acc at start from the previous run
|
- note in docs that all conv backends are expected to givr identical results
|
||||||
|
|
||||||
conv should do conv+round+clip in the same bit of code, if it can --- should
|
|
||||||
often be possible because coefficients repeat so much
|
|
||||||
|
|
||||||
test_conv.ws should test negative coefficients
|
|
||||||
|
|
||||||
note in docs that all conv backends are expected to givr identical results
|
|
||||||
|
|
||||||
try macros for vips_executor_set() etc.
|
try macros for vips_executor_set() etc.
|
||||||
|
|
||||||
|
@ -110,9 +110,6 @@
|
|||||||
|
|
||||||
- make up a signed 8-bit code path?
|
- make up a signed 8-bit code path?
|
||||||
|
|
||||||
- make it more like morphology.c: have a param for the result of the
|
|
||||||
previous pass rather than a separate combining pass
|
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifdef HAVE_CONFIG_H
|
#ifdef HAVE_CONFIG_H
|
||||||
@ -142,6 +139,8 @@ typedef struct {
|
|||||||
int first; /* The index of the first mask coff we use */
|
int first; /* The index of the first mask coff we use */
|
||||||
int last; /* The index of the last mask coff we use */
|
int last; /* The index of the last mask coff we use */
|
||||||
|
|
||||||
|
int r; /* Set previous result in this var */
|
||||||
|
|
||||||
/* The code we generate for this section of this mask.
|
/* The code we generate for this section of this mask.
|
||||||
*/
|
*/
|
||||||
VipsVector *vector;
|
VipsVector *vector;
|
||||||
@ -167,6 +166,7 @@ typedef struct {
|
|||||||
*/
|
*/
|
||||||
int n_pass;
|
int n_pass;
|
||||||
Pass pass[MAX_PASS];
|
Pass pass[MAX_PASS];
|
||||||
|
int s1; /* Input to clip */
|
||||||
VipsVector *clip;
|
VipsVector *clip;
|
||||||
} Conv;
|
} Conv;
|
||||||
|
|
||||||
@ -226,7 +226,8 @@ conv_evalend( Conv *conv )
|
|||||||
* 0 for success, -1 on error.
|
* 0 for success, -1 on error.
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
conv_compile_convolution_u8s16_section( Pass *pass, Conv *conv )
|
conv_compile_convolution_u8s16_section( Pass *pass,
|
||||||
|
Conv *conv, gboolean first_pass )
|
||||||
{
|
{
|
||||||
INTMASK *mask = conv->mask;
|
INTMASK *mask = conv->mask;
|
||||||
const int n_mask = mask->xsize * mask->ysize;
|
const int n_mask = mask->xsize * mask->ysize;
|
||||||
@ -247,8 +248,20 @@ conv_compile_convolution_u8s16_section( Pass *pass, Conv *conv )
|
|||||||
TEMP( "product", 2 );
|
TEMP( "product", 2 );
|
||||||
TEMP( "sum", 2 );
|
TEMP( "sum", 2 );
|
||||||
|
|
||||||
|
/* Init the sum. If this is the first pass, it's a constant. If this
|
||||||
|
* is a later pass, we have to init the sum from the result
|
||||||
|
* of the previous pass.
|
||||||
|
*/
|
||||||
|
if( first_pass ) {
|
||||||
CONST( zero, 0, 2 );
|
CONST( zero, 0, 2 );
|
||||||
ASM2( "copyw", "sum", zero );
|
ASM2( "copyw", "sum", zero );
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/* "r" is the result of the previous pass.
|
||||||
|
*/
|
||||||
|
pass->r = vips_vector_source_name( v, "r", 2 );
|
||||||
|
ASM2( "loadw", "sum", "r" );
|
||||||
|
}
|
||||||
|
|
||||||
for( i = pass->first; i < n_mask; i++ ) {
|
for( i = pass->first; i < n_mask; i++ ) {
|
||||||
int x = i % mask->xsize;
|
int x = i % mask->xsize;
|
||||||
@ -306,13 +319,14 @@ conv_compile_convolution_u8s16_section( Pass *pass, Conv *conv )
|
|||||||
|
|
||||||
ASM2( "copyw", "d1", "sum" );
|
ASM2( "copyw", "d1", "sum" );
|
||||||
|
|
||||||
if( !vips_vector_compile( v ) )
|
|
||||||
return( -1 );
|
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
vips_vector_print( v );
|
vips_vector_print( v );
|
||||||
|
printf( "compiling ...\n" );
|
||||||
#endif /*DEBUG*/
|
#endif /*DEBUG*/
|
||||||
|
|
||||||
|
if( !vips_vector_compile( v ) )
|
||||||
|
return( -1 );
|
||||||
|
|
||||||
return( 0 );
|
return( 0 );
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -374,8 +388,10 @@ conv_compile_convolution_u8s16( Conv *conv )
|
|||||||
|
|
||||||
pass->first = i;
|
pass->first = i;
|
||||||
pass->last = i;
|
pass->last = i;
|
||||||
|
pass->r = -1;
|
||||||
|
|
||||||
if( conv_compile_convolution_u8s16_section( pass, conv ) )
|
if( conv_compile_convolution_u8s16_section( pass,
|
||||||
|
conv, conv->n_pass == 1 ) )
|
||||||
return( -1 );
|
return( -1 );
|
||||||
i = pass->last + 1;
|
i = pass->last + 1;
|
||||||
|
|
||||||
@ -392,7 +408,7 @@ conv_compile_convolution_u8s16( Conv *conv )
|
|||||||
return( 0 );
|
return( 0 );
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Generate the program that does (sum(passes) + rounding) / scale + offset
|
/* Generate the program that does (pass + rounding) / scale + offset
|
||||||
* from a s16 intermediate back to a u8 output.
|
* from a s16 intermediate back to a u8 output.
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
@ -400,7 +416,6 @@ conv_compile_scale_s16u8( Conv *conv )
|
|||||||
{
|
{
|
||||||
INTMASK *mask = conv->mask;
|
INTMASK *mask = conv->mask;
|
||||||
|
|
||||||
int i;
|
|
||||||
VipsVector *v;
|
VipsVector *v;
|
||||||
char scale[256];
|
char scale[256];
|
||||||
char offset[256];
|
char offset[256];
|
||||||
@ -415,12 +430,7 @@ conv_compile_scale_s16u8( Conv *conv )
|
|||||||
return( -1 );
|
return( -1 );
|
||||||
|
|
||||||
conv->clip = v = vips_vector_new( "clip", 1 );
|
conv->clip = v = vips_vector_new( "clip", 1 );
|
||||||
for( i = 0; i < conv->n_pass; i++ ) {
|
conv->s1 = vips_vector_source_name( v, "s1", 2 );
|
||||||
char source[10];
|
|
||||||
|
|
||||||
im_snprintf( source, 10, "s%d", i );
|
|
||||||
vips_vector_source_name( v, source, 2 );
|
|
||||||
}
|
|
||||||
|
|
||||||
TEMP( "t1", 2 );
|
TEMP( "t1", 2 );
|
||||||
TEMP( "t2", 2 );
|
TEMP( "t2", 2 );
|
||||||
@ -436,19 +446,9 @@ conv_compile_scale_s16u8( Conv *conv )
|
|||||||
CONST( offset, mask->offset * mask->scale + mask->scale / 2, 2 );
|
CONST( offset, mask->offset * mask->scale + mask->scale / 2, 2 );
|
||||||
CONST( zero, 0, 2 );
|
CONST( zero, 0, 2 );
|
||||||
|
|
||||||
/* Sum the passes into t1.
|
|
||||||
*/
|
|
||||||
ASM2( "loadw", "t1", "s0" );
|
|
||||||
for( i = 1; i < conv->n_pass; i++ ) {
|
|
||||||
char source[10];
|
|
||||||
|
|
||||||
im_snprintf( source, 10, "s%d", i );
|
|
||||||
ASM3( "addssw", "t1", "t1", source );
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Offset and scale.
|
/* Offset and scale.
|
||||||
*/
|
*/
|
||||||
ASM3( "addssw", "t1", "t1", offset );
|
ASM3( "addssw", "t1", "s1", offset );
|
||||||
|
|
||||||
/* We need to convert the signed result of the
|
/* We need to convert the signed result of the
|
||||||
* offset to unsigned for the div, ie. we want to set anything <0 to 0.
|
* offset to unsigned for the div, ie. we want to set anything <0 to 0.
|
||||||
@ -459,8 +459,7 @@ conv_compile_scale_s16u8( Conv *conv )
|
|||||||
ASM3( "divluw", "t1", "t1", scale );
|
ASM3( "divluw", "t1", "t1", scale );
|
||||||
ASM2( "convuuswb", "d1", "t1" );
|
ASM2( "convuuswb", "d1", "t1" );
|
||||||
|
|
||||||
if( vips_vector_full( v ) ||
|
if( !vips_vector_compile( v ) )
|
||||||
!vips_vector_compile( v ) )
|
|
||||||
return( -1 );
|
return( -1 );
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
@ -490,6 +489,7 @@ conv_new( IMAGE *in, IMAGE *out, INTMASK *mask )
|
|||||||
conv->overflow = 0;
|
conv->overflow = 0;
|
||||||
|
|
||||||
conv->n_pass = 0;
|
conv->n_pass = 0;
|
||||||
|
conv->s1 = -1;
|
||||||
conv->clip = NULL;
|
conv->clip = NULL;
|
||||||
|
|
||||||
if( im_add_close_callback( out,
|
if( im_add_close_callback( out,
|
||||||
@ -546,10 +546,11 @@ typedef struct {
|
|||||||
|
|
||||||
int last_bpl; /* Avoid recalcing offsets, if we can */
|
int last_bpl; /* Avoid recalcing offsets, if we can */
|
||||||
|
|
||||||
/* We need a set of intermediate buffers to keep the result of the
|
/* We need a pair of intermediate buffers to keep the results of each
|
||||||
* conv in before we clip it.
|
* conv pass in.
|
||||||
*/
|
*/
|
||||||
void **sum;
|
void *t1;
|
||||||
|
void *t2;
|
||||||
} ConvSequence;
|
} ConvSequence;
|
||||||
|
|
||||||
/* Free a sequence value.
|
/* Free a sequence value.
|
||||||
@ -560,18 +561,14 @@ conv_stop( void *vseq, void *a, void *b )
|
|||||||
ConvSequence *seq = (ConvSequence *) vseq;
|
ConvSequence *seq = (ConvSequence *) vseq;
|
||||||
Conv *conv = (Conv *) b;
|
Conv *conv = (Conv *) b;
|
||||||
|
|
||||||
int i;
|
|
||||||
|
|
||||||
/* Add local under/over counts to global counts.
|
/* Add local under/over counts to global counts.
|
||||||
*/
|
*/
|
||||||
conv->overflow += seq->overflow;
|
conv->overflow += seq->overflow;
|
||||||
conv->underflow += seq->underflow;
|
conv->underflow += seq->underflow;
|
||||||
|
|
||||||
IM_FREEF( im_region_free, seq->ir );
|
IM_FREEF( im_region_free, seq->ir );
|
||||||
|
IM_FREE( seq->t1 );
|
||||||
for( i = 0; i < conv->n_pass; i++ )
|
IM_FREE( seq->t2 );
|
||||||
IM_FREE( seq->sum[i] );
|
|
||||||
IM_FREE( seq->sum );
|
|
||||||
|
|
||||||
return( 0 );
|
return( 0 );
|
||||||
}
|
}
|
||||||
@ -585,7 +582,6 @@ conv_start( IMAGE *out, void *a, void *b )
|
|||||||
Conv *conv = (Conv *) b;
|
Conv *conv = (Conv *) b;
|
||||||
|
|
||||||
ConvSequence *seq;
|
ConvSequence *seq;
|
||||||
int i;
|
|
||||||
|
|
||||||
if( !(seq = IM_NEW( out, ConvSequence )) )
|
if( !(seq = IM_NEW( out, ConvSequence )) )
|
||||||
return( NULL );
|
return( NULL );
|
||||||
@ -598,7 +594,8 @@ conv_start( IMAGE *out, void *a, void *b )
|
|||||||
seq->underflow = 0;
|
seq->underflow = 0;
|
||||||
seq->overflow = 0;
|
seq->overflow = 0;
|
||||||
seq->last_bpl = -1;
|
seq->last_bpl = -1;
|
||||||
seq->sum = NULL;
|
seq->t1 = NULL;
|
||||||
|
seq->t2 = NULL;
|
||||||
|
|
||||||
/* Attach region and arrays.
|
/* Attach region and arrays.
|
||||||
*/
|
*/
|
||||||
@ -610,17 +607,12 @@ conv_start( IMAGE *out, void *a, void *b )
|
|||||||
return( NULL );
|
return( NULL );
|
||||||
}
|
}
|
||||||
|
|
||||||
if( vips_vector_get_enabled() && conv->n_pass ) {
|
if( vips_vector_get_enabled() &&
|
||||||
if( !(seq->sum = IM_ARRAY( NULL, conv->n_pass, void * )) ) {
|
conv->n_pass ) {
|
||||||
conv_stop( seq, in, conv );
|
seq->t1 = IM_ARRAY( NULL, IM_IMAGE_N_ELEMENTS( in ), short );
|
||||||
return( NULL );
|
seq->t2 = IM_ARRAY( NULL, IM_IMAGE_N_ELEMENTS( in ), short );
|
||||||
}
|
|
||||||
for( i = 0; i < conv->n_pass; i++ )
|
|
||||||
seq->sum[i] = NULL;
|
|
||||||
|
|
||||||
for( i = 0; i < conv->n_pass; i++ )
|
if( !seq->t1 || !seq->t2 ) {
|
||||||
if( !(seq->sum[i] = IM_ARRAY( NULL,
|
|
||||||
IM_IMAGE_N_ELEMENTS( in ), short )) ) {
|
|
||||||
conv_stop( seq, in, conv );
|
conv_stop( seq, in, conv );
|
||||||
return( NULL );
|
return( NULL );
|
||||||
}
|
}
|
||||||
@ -964,14 +956,6 @@ convvec_gen( REGION *or, void *vseq, void *a, void *b )
|
|||||||
conv->pass[j].vector, sz );
|
conv->pass[j].vector, sz );
|
||||||
vips_executor_set_program( &clip, conv->clip, sz );
|
vips_executor_set_program( &clip, conv->clip, sz );
|
||||||
|
|
||||||
/* Link the conv output to the intermediate buffer, and to the
|
|
||||||
* clipper's input.
|
|
||||||
*/
|
|
||||||
for( j = 0; j < conv->n_pass; j++ ) {
|
|
||||||
vips_executor_set_destination( &convolve[j], seq->sum[j] );
|
|
||||||
vips_executor_set_array( &clip, conv->clip->s[j], seq->sum[j] );
|
|
||||||
}
|
|
||||||
|
|
||||||
for( y = 0; y < r->height; y++ ) {
|
for( y = 0; y < r->height; y++ ) {
|
||||||
#ifdef DEBUG_PIXELS
|
#ifdef DEBUG_PIXELS
|
||||||
{
|
{
|
||||||
@ -988,18 +972,23 @@ convvec_gen( REGION *or, void *vseq, void *a, void *b )
|
|||||||
#endif /*DEBUG_PIXELS*/
|
#endif /*DEBUG_PIXELS*/
|
||||||
|
|
||||||
for( j = 0; j < conv->n_pass; j++ ) {
|
for( j = 0; j < conv->n_pass; j++ ) {
|
||||||
|
/* We always read from t1 and write to t2.
|
||||||
|
*/
|
||||||
vips_executor_set_scanline( &convolve[j],
|
vips_executor_set_scanline( &convolve[j],
|
||||||
ir, r->left, r->top + y );
|
ir, r->left, r->top + y );
|
||||||
|
vips_executor_set_array( &convolve[j],
|
||||||
|
conv->pass[j].r, seq->t1 );
|
||||||
|
vips_executor_set_destination( &convolve[j], seq->t2 );
|
||||||
vips_executor_run( &convolve[j] );
|
vips_executor_run( &convolve[j] );
|
||||||
|
|
||||||
|
IM_SWAP( void *, seq->t1, seq->t2 );
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef DEBUG_PIXELS
|
#ifdef DEBUG_PIXELS
|
||||||
printf( "before clip:\n" );
|
printf( "before clip: %d\n", ((signed short *) seq->t1)[0] );
|
||||||
for( j = 0; j < conv->n_pass; j++ )
|
|
||||||
printf( " %d) %3d\n",
|
|
||||||
j, ((signed short *) seq->sum[j])[0] );
|
|
||||||
#endif /*DEBUG_PIXELS*/
|
#endif /*DEBUG_PIXELS*/
|
||||||
|
|
||||||
|
vips_executor_set_array( &clip, conv->s1, seq->t1 );
|
||||||
vips_executor_set_destination( &clip,
|
vips_executor_set_destination( &clip,
|
||||||
IM_REGION_ADDR( or, r->left, r->top + y ) );
|
IM_REGION_ADDR( or, r->left, r->top + y ) );
|
||||||
vips_executor_run( &clip );
|
vips_executor_run( &clip );
|
||||||
|
@ -264,7 +264,6 @@ vips_vector_full( VipsVector *vector )
|
|||||||
{
|
{
|
||||||
/* We can need a max of 2 constants plus one source per
|
/* We can need a max of 2 constants plus one source per
|
||||||
* coefficient, so stop if we're sure we don't have enough.
|
* coefficient, so stop if we're sure we don't have enough.
|
||||||
* We need to stay under the 100 instruction limit too.
|
|
||||||
*/
|
*/
|
||||||
if( vector->n_constant > 16 - 2 )
|
if( vector->n_constant > 16 - 2 )
|
||||||
return( TRUE );
|
return( TRUE );
|
||||||
@ -275,7 +274,9 @@ vips_vector_full( VipsVector *vector )
|
|||||||
if( vector->n_source + vector->n_scanline + 1 > 7 )
|
if( vector->n_source + vector->n_scanline + 1 > 7 )
|
||||||
return( TRUE );
|
return( TRUE );
|
||||||
|
|
||||||
if( vector->n_instruction > 50 )
|
/* I seem to get segvs with I counts over about 50 :-( argh.
|
||||||
|
*/
|
||||||
|
if( vector->n_instruction > 45 )
|
||||||
return( TRUE );
|
return( TRUE );
|
||||||
|
|
||||||
return( FALSE );
|
return( FALSE );
|
||||||
|
Loading…
Reference in New Issue
Block a user