This commit is contained in:
John Cupitt 2009-11-12 15:47:41 +00:00
parent 6879f66734
commit 2b3a198f9b
4 changed files with 101 additions and 50 deletions

View File

@ -3,6 +3,9 @@
- bumped version to 7.20 - bumped version to 7.20
- fixes to get "make dist" working again - fixes to get "make dist" working again
- oop, im_clip2fmt() was missing PTOP flag, should get a small speedup - oop, im_clip2fmt() was missing PTOP flag, should get a small speedup
- im_conv() / im_convf() didn't like all-zero masks
- small updates to im_convf() from im_conv()
- im_read_imask() produced an incorrect error message if passed a doublemask
3/4/09 started 7.19.0 3/4/09 started 7.19.0
- version bump - version bump

View File

@ -67,6 +67,10 @@
* 5/4/09 * 5/4/09
* - tiny speedups and cleanups * - tiny speedups and cleanups
* - add restrict, though it doesn't seem to help gcc * - add restrict, though it doesn't seem to help gcc
* 12/11/09
* - only check for non-zero elements once
* - add mask-all-zero check
* - cleanups
*/ */
/* /*
@ -103,7 +107,6 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <limits.h> #include <limits.h>
#include <assert.h>
#include <vips/vips.h> #include <vips/vips.h>
@ -121,6 +124,7 @@ typedef struct {
int nnz; /* Number of non-zero mask elements */ int nnz; /* Number of non-zero mask elements */
int *coeff; /* Array of non-zero mask coefficients */ int *coeff; /* Array of non-zero mask coefficients */
int *coeff_pos; /* Index of each nnz element in mask->coeff */
int underflow; /* Global underflow/overflow counts */ int underflow; /* Global underflow/overflow counts */
int overflow; int overflow;
@ -173,6 +177,7 @@ conv_new( IMAGE *in, IMAGE *out, INTMASK *mask )
conv->mask = NULL; conv->mask = NULL;
conv->nnz = 0; conv->nnz = 0;
conv->coeff = NULL; conv->coeff = NULL;
conv->coeff_pos = NULL;
conv->underflow = 0; conv->underflow = 0;
conv->overflow = 0; conv->overflow = 0;
@ -183,14 +188,27 @@ conv_new( IMAGE *in, IMAGE *out, INTMASK *mask )
im_add_close_callback( out, im_add_close_callback( out,
(im_callback_fn) conv_evalend, conv, NULL ) || (im_callback_fn) conv_evalend, conv, NULL ) ||
!(conv->coeff = IM_ARRAY( out, ne, int )) || !(conv->coeff = IM_ARRAY( out, ne, int )) ||
!(conv->coeff_pos = IM_ARRAY( out, ne, int )) ||
!(conv->mask = im_dup_imask( mask, "conv_mask" )) ) !(conv->mask = im_dup_imask( mask, "conv_mask" )) )
return( NULL ); return( NULL );
/* Find non-zero mask elements. /* Find non-zero mask elements.
*/ */
for( i = 0; i < ne; i++ ) for( i = 0; i < ne; i++ )
if( mask->coeff[i] ) if( mask->coeff[i] ) {
conv->coeff[conv->nnz++] = mask->coeff[i]; conv->coeff[conv->nnz] = mask->coeff[i];
conv->coeff_pos[conv->nnz] = i;
conv->nnz += 1;
}
/* Was the whole mask zero? We must have at least 1 element in there:
* set it to zero.
*/
if( conv->nnz == 0 ) {
conv->coeff[0] = mask->coeff[0];
conv->coeff_pos[0] = 0;
conv->nnz = 1;
}
return( conv ); return( conv );
} }
@ -319,8 +337,8 @@ conv_gen( REGION *or, void *vseq, void *a, void *b )
INTMASK *mask = conv->mask; INTMASK *mask = conv->mask;
int * restrict t = conv->coeff; int * restrict t = conv->coeff;
/* You might think this should be (scale+1)/2, but then we'd be adding /* You might think this should be (scale + 1) / 2, but then we'd be
* one for scale == 1. * adding one for scale == 1.
*/ */
int rounding = mask->scale / 2; int rounding = mask->scale / 2;
@ -348,15 +366,16 @@ conv_gen( REGION *or, void *vseq, void *a, void *b )
if( seq->last_bpl != IM_REGION_LSKIP( ir ) ) { if( seq->last_bpl != IM_REGION_LSKIP( ir ) ) {
seq->last_bpl = IM_REGION_LSKIP( ir ); seq->last_bpl = IM_REGION_LSKIP( ir );
z = 0; for( i = 0; i < conv->nnz; i++ ) {
for( i = 0, y = 0; y < mask->ysize; y++ ) z = conv->coeff_pos[i];
for( x = 0; x < mask->xsize; x++, i++ ) x = z % conv->mask->xsize;
if( mask->coeff[i] ) y = z / conv->mask->xsize;
seq->offsets[z++] =
IM_REGION_ADDR( ir, seq->offsets[i] =
x + le, y + to ) - IM_REGION_ADDR( ir, x + le, y + to ) -
IM_REGION_ADDR( ir, le, to ); IM_REGION_ADDR( ir, le, to );
} }
}
for( y = to; y < bo; y++ ) { for( y = to; y < bo; y++ ) {
/* Init pts for this line of PELs. /* Init pts for this line of PELs.
@ -399,7 +418,7 @@ conv_gen( REGION *or, void *vseq, void *a, void *b )
break; break;
default: default:
assert( 0 ); g_assert( 0 );
} }
} }
@ -413,18 +432,16 @@ im_conv_raw( IMAGE *in, IMAGE *out, INTMASK *mask )
/* Check parameters. /* Check parameters.
*/ */
if( !in || in->Coding != IM_CODING_NONE || im_iscomplex( in ) ) { if( im_piocheck( in, out ) ||
im_error( "im_conv", "%s", _( "non-complex uncoded only" ) ); im_check_uncoded( "im_conv", in ) ||
im_check_noncomplex( "im_conv", in ) )
return( -1 ); return( -1 );
}
if( !mask || mask->xsize > 1000 || mask->ysize > 1000 || if( !mask || mask->xsize > 1000 || mask->ysize > 1000 ||
mask->xsize <= 0 || mask->ysize <= 0 || !mask->coeff || mask->xsize <= 0 || mask->ysize <= 0 || !mask->coeff ||
mask->scale == 0 ) { mask->scale == 0 ) {
im_error( "im_conv", "%s", _( "nonsense mask parameters" ) ); im_error( "im_conv", "%s", _( "nonsense mask parameters" ) );
return( -1 ); return( -1 );
} }
if( im_piocheck( in, out ) )
return( -1 );
if( !(conv = conv_new( in, out, mask )) ) if( !(conv = conv_new( in, out, mask )) )
return( -1 ); return( -1 );

View File

@ -35,6 +35,11 @@
* - sets Xoffset / Yoffset * - sets Xoffset / Yoffset
* 11/11/05 * 11/11/05
* - simpler inner loop avoids gcc4 bug * - simpler inner loop avoids gcc4 bug
* 12/11/09
* - only rebuild the buffer offsets if bpl changes
* - tiny speedups and cleanups
* - add restrict, though it doesn't seem to help gcc
* - add mask-all-zero check
*/ */
/* /*
@ -89,15 +94,13 @@ typedef struct {
int nnz; /* Number of non-zero mask elements */ int nnz; /* Number of non-zero mask elements */
double *coeff; /* Array of non-zero mask coefficients */ double *coeff; /* Array of non-zero mask coefficients */
int *coeff_pos; /* Index of each nnz element in mask->coeff */
} Conv; } Conv;
static int static int
conv_close( Conv *conv ) conv_close( Conv *conv )
{ {
if( conv->mask ) { IM_FREEF( im_free_dmask, conv->mask );
(void) im_free_dmask( conv->mask );
conv->mask = NULL;
}
return( 0 ); return( 0 );
} }
@ -121,14 +124,27 @@ conv_new( IMAGE *in, IMAGE *out, DOUBLEMASK *mask )
if( im_add_close_callback( out, if( im_add_close_callback( out,
(im_callback_fn) conv_close, conv, NULL ) || (im_callback_fn) conv_close, conv, NULL ) ||
!(conv->coeff = IM_ARRAY( out, ne, double )) || !(conv->coeff = IM_ARRAY( out, ne, double )) ||
!(conv->coeff_pos = IM_ARRAY( out, ne, int )) ||
!(conv->mask = im_dup_dmask( mask, "conv_mask" )) ) !(conv->mask = im_dup_dmask( mask, "conv_mask" )) )
return( NULL ); return( NULL );
/* Find non-zero mask elements. /* Find non-zero mask elements.
*/ */
for( i = 0; i < ne; i++ ) for( i = 0; i < ne; i++ )
if( mask->coeff[i] ) if( mask->coeff[i] ) {
conv->coeff[conv->nnz++] = mask->coeff[i]; conv->coeff[conv->nnz] = mask->coeff[i];
conv->coeff_pos[conv->nnz] = i;
conv->nnz += 1;
}
/* Was the whole mask zero? We must have at least 1 element in there:
* set it to zero.
*/
if( conv->nnz == 0 ) {
conv->coeff[0] = mask->coeff[0];
conv->coeff_pos[0] = 0;
conv->nnz = 1;
}
return( conv ); return( conv );
} }
@ -141,6 +157,8 @@ typedef struct {
int *offsets; /* Offsets for each non-zero matrix element */ int *offsets; /* Offsets for each non-zero matrix element */
PEL **pts; /* Per-non-zero mask element image pointers */ PEL **pts; /* Per-non-zero mask element image pointers */
int last_bpl; /* Avoid recalcing offsets, if we can */
} ConvSequence; } ConvSequence;
/* Free a sequence value. /* Free a sequence value.
@ -172,6 +190,7 @@ conv_start( IMAGE *out, void *a, void *b )
seq->conv = conv; seq->conv = conv;
seq->ir = NULL; seq->ir = NULL;
seq->pts = NULL; seq->pts = NULL;
seq->last_bpl = -1;
/* Attach region and arrays. /* Attach region and arrays.
*/ */
@ -186,17 +205,21 @@ conv_start( IMAGE *out, void *a, void *b )
return( (void *) seq ); return( (void *) seq );
} }
#define INNER sum += *t++ * (*p++)[x] #define INNER { \
sum += t[i] * p[i][x]; \
i += 1; \
}
#define CONV_FLOAT( ITYPE, OTYPE ) { \ #define CONV_FLOAT( ITYPE, OTYPE ) { \
OTYPE *q = (OTYPE *) IM_REGION_ADDR( or, le, y ); \ ITYPE ** restrict p = (ITYPE **) seq->pts; \
OTYPE * restrict q = (OTYPE *) IM_REGION_ADDR( or, le, y ); \
\ \
for( x = 0; x < sz; x++ ) { \ for( x = 0; x < sz; x++ ) { \
double sum = 0; \ double sum; \
double *t = conv->coeff; \ int i; \
ITYPE **p = (ITYPE **) seq->pts; \
\ \
z = 0; \ sum = 0; \
i = 0; \
IM_UNROLL( conv->nnz, INNER ); \ IM_UNROLL( conv->nnz, INNER ); \
\ \
sum = (sum / mask->scale) + mask->offset; \ sum = (sum / mask->scale) + mask->offset; \
@ -215,6 +238,7 @@ conv_gen( REGION *or, void *vseq, void *a, void *b )
Conv *conv = (Conv *) b; Conv *conv = (Conv *) b;
REGION *ir = seq->ir; REGION *ir = seq->ir;
DOUBLEMASK *mask = conv->mask; DOUBLEMASK *mask = conv->mask;
double * restrict t = conv->coeff;
Rect *r = &or->valid; Rect *r = &or->valid;
Rect s; Rect s;
@ -234,15 +258,22 @@ conv_gen( REGION *or, void *vseq, void *a, void *b )
if( im_prepare( ir, &s ) ) if( im_prepare( ir, &s ) )
return( -1 ); return( -1 );
/* Fill offset array. /* Fill offset array. Only do this if the bpl has changed since the
* previous im_prepare().
*/ */
z = 0; if( seq->last_bpl != IM_REGION_LSKIP( ir ) ) {
for( i = 0, y = 0; y < mask->ysize; y++ ) seq->last_bpl = IM_REGION_LSKIP( ir );
for( x = 0; x < mask->xsize; x++, i++ )
if( mask->coeff[i] ) for( i = 0; i < conv->nnz; i++ ) {
seq->offsets[z++] = z = conv->coeff_pos[i];
x = z % conv->mask->xsize;
y = z / conv->mask->xsize;
seq->offsets[i] =
IM_REGION_ADDR( ir, x + le, y + to ) - IM_REGION_ADDR( ir, x + le, y + to ) -
IM_REGION_ADDR( ir, le, to ); IM_REGION_ADDR( ir, le, to );
}
}
for( y = to; y < bo; y++ ) { for( y = to; y < bo; y++ ) {
/* Init pts for this line of PELs. /* Init pts for this line of PELs.
@ -284,19 +315,16 @@ im_convf_raw( IMAGE *in, IMAGE *out, DOUBLEMASK *mask )
/* Check parameters. /* Check parameters.
*/ */
if( !in || in->Coding != IM_CODING_NONE || im_iscomplex( in ) ) { if( im_piocheck( in, out ) ||
im_error( "im_convf", im_check_uncoded( "im_conv", in ) ||
"%s", _( "non-complex uncoded only" ) ); im_check_noncomplex( "im_conv", in ) )
return( -1 ); return( -1 );
}
if( !mask || mask->xsize > 1000 || mask->ysize > 1000 || if( !mask || mask->xsize > 1000 || mask->ysize > 1000 ||
mask->xsize <= 0 || mask->ysize <= 0 || !mask->coeff || mask->xsize <= 0 || mask->ysize <= 0 || !mask->coeff ||
mask->scale == 0 ) { mask->scale == 0 ) {
im_error( "im_convf", "%s", _( "nonsense mask parameters" ) ); im_error( "im_conv", "%s", _( "nonsense mask parameters" ) );
return( -1 ); return( -1 );
} }
if( im_piocheck( in, out ) )
return( -1 );
if( !(conv = conv_new( in, out, mask )) ) if( !(conv = conv_new( in, out, mask )) )
return( -1 ); return( -1 );

View File

@ -57,6 +57,9 @@
* - add im_norm_dmask() * - add im_norm_dmask()
* 1/9/09 * 1/9/09
* - move im_print_*mask() here * - move im_print_*mask() here
* 12/11/09
* - reading a float mask with im_read_imask() produced an incorrect
* error messagge
*/ */
/* /*
@ -399,20 +402,20 @@ im_read_imask( const char *maskfile )
if( ceil( dmask->scale ) != dmask->scale || if( ceil( dmask->scale ) != dmask->scale ||
ceil( dmask->offset ) != dmask->offset ) { ceil( dmask->offset ) != dmask->offset ) {
im_free_dmask( dmask );
im_error( "im_read_imask", im_error( "im_read_imask",
"%s", _( "scale and offset should be int" ) ); "%s", _( "scale and offset should be int" ) );
im_free_dmask( dmask );
return( NULL ); return( NULL );
} }
for( i = 0; i < dmask->xsize * dmask->ysize; i++ ) for( i = 0; i < dmask->xsize * dmask->ysize; i++ )
if( ceil( dmask->coeff[i] ) != dmask->coeff[i] ) { if( ceil( dmask->coeff[i] ) != dmask->coeff[i] ) {
im_free_dmask( dmask );
im_error( "im_read_imask", _( "cofficient at " im_error( "im_read_imask", _( "cofficient at "
"position (%d, %d) is not int" ), "position (%d, %d) is not int" ),
i % dmask->xsize, i % dmask->xsize,
i / dmask->xsize ); i / dmask->xsize );
im_free_dmask( dmask );
return( NULL ); return( NULL );
} }