stuff
This commit is contained in:
parent
bfac02d427
commit
90ecad14fc
@ -1,5 +1,6 @@
|
|||||||
3/4/09 started 7.19.0
|
3/4/09 started 7.19.0
|
||||||
- version bump
|
- version bump
|
||||||
|
- tiny conv speedup
|
||||||
|
|
||||||
25/3/09 started 7.18.0
|
25/3/09 started 7.18.0
|
||||||
- revised version numbers
|
- revised version numbers
|
||||||
|
15
TODO
15
TODO
@ -1,8 +1,23 @@
|
|||||||
|
- restrict in im_conv
|
||||||
|
|
||||||
|
before
|
||||||
|
|
||||||
|
$ time vips im_conv wtc.v t.v mask.con
|
||||||
|
vips warning: im_conv: 59722 overflows and 675925 underflows detected
|
||||||
|
|
||||||
|
real 0m13.303s
|
||||||
|
user 0m11.013s
|
||||||
|
sys 0m1.120s
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
WONTFIX for 7.18
|
WONTFIX for 7.18
|
||||||
================
|
================
|
||||||
|
|
||||||
- try adding "restrict" to im_conv? other interpolators?
|
- try adding "restrict" to im_conv? other interpolators?
|
||||||
|
|
||||||
|
- can we use conv_sep to speed up the memuse benchmarks?
|
||||||
|
|
||||||
- move im_shrink & friends to resample?
|
- move im_shrink & friends to resample?
|
||||||
|
|
||||||
match_linear, match_linear_search?
|
match_linear, match_linear_search?
|
||||||
|
@ -64,6 +64,9 @@
|
|||||||
* 12/5/08
|
* 12/5/08
|
||||||
* - int rounding was +1 too much, argh
|
* - int rounding was +1 too much, argh
|
||||||
* - only rebuild the buffer offsets if bpl changes
|
* - only rebuild the buffer offsets if bpl changes
|
||||||
|
* 5/4/09
|
||||||
|
* - tiny speedup ... change ++ to +=1 in inner loop
|
||||||
|
* - add restrict, though it doesn't seem to help gcc
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -117,7 +120,7 @@ typedef struct {
|
|||||||
INTMASK *mask; /* Copy of mask arg */
|
INTMASK *mask; /* Copy of mask arg */
|
||||||
|
|
||||||
int nnz; /* Number of non-zero mask elements */
|
int nnz; /* Number of non-zero mask elements */
|
||||||
int *coeff; /* Array of non-zero mask coefficients */
|
int * restrict coeff; /* Array of non-zero mask coefficients */
|
||||||
|
|
||||||
int underflow; /* Global underflow/overflow counts */
|
int underflow; /* Global underflow/overflow counts */
|
||||||
int overflow;
|
int overflow;
|
||||||
@ -199,7 +202,7 @@ typedef struct {
|
|||||||
REGION *ir; /* Input region */
|
REGION *ir; /* Input region */
|
||||||
|
|
||||||
int *offsets; /* Offsets for each non-zero matrix element */
|
int *offsets; /* Offsets for each non-zero matrix element */
|
||||||
PEL **pts; /* Per-non-zero mask element image pointers */
|
PEL * restrict * restrict pts; /* Per-non-zero mask element pointers */
|
||||||
|
|
||||||
int underflow; /* Underflow/overflow counts */
|
int underflow; /* Underflow/overflow counts */
|
||||||
int overflow;
|
int overflow;
|
||||||
@ -259,7 +262,11 @@ conv_start( IMAGE *out, void *a, void *b )
|
|||||||
return( seq );
|
return( seq );
|
||||||
}
|
}
|
||||||
|
|
||||||
#define INNER sum += *t++ * (*p++)[x]
|
#define INNER { \
|
||||||
|
sum += *t * (*p)[x]; \
|
||||||
|
t += 1; \
|
||||||
|
p += 1; \
|
||||||
|
}
|
||||||
|
|
||||||
/* INT and FLOAT inner loops.
|
/* INT and FLOAT inner loops.
|
||||||
*/
|
*/
|
||||||
@ -267,11 +274,14 @@ conv_start( IMAGE *out, void *a, void *b )
|
|||||||
TYPE *q = (TYPE *) IM_REGION_ADDR( or, le, y ); \
|
TYPE *q = (TYPE *) IM_REGION_ADDR( or, le, y ); \
|
||||||
\
|
\
|
||||||
for( x = 0; x < sz; x++ ) { \
|
for( x = 0; x < sz; x++ ) { \
|
||||||
int sum = 0; \
|
int * restrict t; \
|
||||||
int *t = conv->coeff; \
|
TYPE ** restrict p; \
|
||||||
TYPE **p = (TYPE **) seq->pts; \
|
int sum; \
|
||||||
\
|
\
|
||||||
z = 0; \
|
z = 0; \
|
||||||
|
sum = 0; \
|
||||||
|
t = conv->coeff; \
|
||||||
|
p = (TYPE **) seq->pts; \
|
||||||
IM_UNROLL( conv->nnz, INNER ); \
|
IM_UNROLL( conv->nnz, INNER ); \
|
||||||
\
|
\
|
||||||
sum = ((sum + rounding) / mask->scale) + mask->offset; \
|
sum = ((sum + rounding) / mask->scale) + mask->offset; \
|
||||||
@ -286,11 +296,14 @@ conv_start( IMAGE *out, void *a, void *b )
|
|||||||
TYPE *q = (TYPE *) IM_REGION_ADDR( or, le, y ); \
|
TYPE *q = (TYPE *) IM_REGION_ADDR( or, le, y ); \
|
||||||
\
|
\
|
||||||
for( x = 0; x < sz; x++ ) { \
|
for( x = 0; x < sz; x++ ) { \
|
||||||
double sum = 0; \
|
int * restrict t; \
|
||||||
int *t = conv->coeff; \
|
TYPE ** restrict p; \
|
||||||
TYPE **p = (TYPE **) seq->pts; \
|
double sum; \
|
||||||
\
|
\
|
||||||
z = 0; \
|
z = 0; \
|
||||||
|
sum = 0; \
|
||||||
|
t = conv->coeff; \
|
||||||
|
p = (TYPE **) seq->pts; \
|
||||||
IM_UNROLL( conv->nnz, INNER ); \
|
IM_UNROLL( conv->nnz, INNER ); \
|
||||||
\
|
\
|
||||||
sum = (sum / mask->scale) + mask->offset; \
|
sum = (sum / mask->scale) + mask->offset; \
|
||||||
@ -405,13 +418,13 @@ im_conv_raw( IMAGE *in, IMAGE *out, INTMASK *mask )
|
|||||||
/* Check parameters.
|
/* Check parameters.
|
||||||
*/
|
*/
|
||||||
if( !in || in->Coding != IM_CODING_NONE || im_iscomplex( in ) ) {
|
if( !in || in->Coding != IM_CODING_NONE || im_iscomplex( in ) ) {
|
||||||
im_errormsg( "im_conv: input non-complex uncoded please!");
|
im_error( "im_conv", "%s", _( "non-complex uncoded only" ) );
|
||||||
return( -1 );
|
return( -1 );
|
||||||
}
|
}
|
||||||
if( !mask || mask->xsize > 1000 || mask->ysize > 1000 ||
|
if( !mask || mask->xsize > 1000 || mask->ysize > 1000 ||
|
||||||
mask->xsize <= 0 || mask->ysize <= 0 || !mask->coeff ||
|
mask->xsize <= 0 || mask->ysize <= 0 || !mask->coeff ||
|
||||||
mask->scale == 0 ) {
|
mask->scale == 0 ) {
|
||||||
im_errormsg( "im_conv: nonsense mask parameters" );
|
im_error( "im_conv", "%s", _( "nonsense mask parameters" ) );
|
||||||
return( -1 );
|
return( -1 );
|
||||||
}
|
}
|
||||||
if( im_piocheck( in, out ) )
|
if( im_piocheck( in, out ) )
|
||||||
@ -427,7 +440,7 @@ im_conv_raw( IMAGE *in, IMAGE *out, INTMASK *mask )
|
|||||||
out->Xsize -= mask->xsize - 1;
|
out->Xsize -= mask->xsize - 1;
|
||||||
out->Ysize -= mask->ysize - 1;
|
out->Ysize -= mask->ysize - 1;
|
||||||
if( out->Xsize <= 0 || out->Ysize <= 0 ) {
|
if( out->Xsize <= 0 || out->Ysize <= 0 ) {
|
||||||
im_errormsg( "im_conv: image too small for mask" );
|
im_error( "im_conv", "%s", _( "image too small for mask" ) );
|
||||||
return( -1 );
|
return( -1 );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user