im_aconv() works for a large 2d mask

needs some more tuning, we should be able to speed it up still
This commit is contained in:
John Cupitt 2011-06-09 11:39:31 +01:00
parent a9e305787b
commit 21fce2ab9e

View File

@ -50,18 +50,37 @@
TODO TODO
- tried a 201x201 mask, sigmal 44.6, minamp 0.1, does not seem to timing:
read the whole mask? we get only 37638 elements, max and min are
messed up $ time vips im_conv_f img_0075.jpg x2.v g2d201.con
real 11m58.769s
user 22m46.390s
sys 0m3.270s
$ time vips im_aconv img_0075.jpg x.v g2d201.con 10 10
boxes_new: min = 0, max = 1
boxes_new: depth = 0.1, n_layers = 10
boxes_new: generated 1130 boxes
boxes_new: clustering with thresh 10 ...
boxes_new: renumbering ...
boxes_new: after renumbering, 14 boxes remain
real 0m34.377s
user 1m0.440s
sys 0m0.370s
$ vips im_subtract x.v x2.v diff.v
$ vips im_abs diff.v abs.v
$ vips im_max abs.v
2.70833
- can we use rolling averages for the vertical pass?
- add more bandfmt
- are we handling mask offset correctly? - are we handling mask offset correctly?
*/ */
/* Show sample pixels as they are transformed.
#define DEBUG_PIXELS
*/
/* /*
*/ */
#define DEBUG #define DEBUG
@ -87,7 +106,7 @@
/* Maximum number of boxes we can break the mask into. /* Maximum number of boxes we can break the mask into.
*/ */
#define MAX_LINES (1000) #define MAX_LINES (10000)
/* Get an (x,y) value from a mask. /* Get an (x,y) value from a mask.
*/ */
@ -181,6 +200,8 @@ boxes_end( Boxes *boxes, int x, int y, int factor )
static int static int
boxes_distance( Boxes *boxes, int a, int b ) boxes_distance( Boxes *boxes, int a, int b )
{ {
g_assert( boxes->weight[a] > 0 && boxes->weight[b] > 0 );
return( abs( boxes->start[a] - boxes->start[b] ) + return( abs( boxes->start[a] - boxes->start[b] ) +
abs( boxes->end[a] - boxes->end[b] ) ); abs( boxes->end[a] - boxes->end[b] ) );
} }
@ -264,13 +285,19 @@ boxes_renumber( Boxes *boxes )
{ {
int i, j; int i, j;
for( i = 0; i < boxes->n_hlines; i++ ) { /* Loop for all zero-weight hlines.
if( boxes->weight[i] == 0 ) { */
for( i = 0; i < boxes->n_hlines; ) {
if( boxes->weight[i] > 0 ) {
i++;
continue;
}
/* We move hlines i + 1 down, so we need to adjust all /* We move hlines i + 1 down, so we need to adjust all
* band[] refs to match. * band[] refs to match.
*/ */
for( j = 0; j < boxes->n_vlines; j++ ) for( j = 0; j < boxes->n_vlines; j++ )
if( boxes->band[j] <= i ) if( boxes->band[j] > i )
boxes->band[j] -= 1; boxes->band[j] -= 1;
for( j = i; j < boxes->n_hlines; j++ ) { for( j = i; j < boxes->n_hlines; j++ ) {
@ -281,9 +308,38 @@ boxes_renumber( Boxes *boxes )
boxes->n_hlines -= 1; boxes->n_hlines -= 1;
} }
}
} }
#ifdef DEBUG
static void
boxes_print( Boxes *boxes )
{
int x, y;
printf( "lines:\n" );
printf( " n b r f w\n" );
for( y = 0; y < boxes->n_vlines; y++ ) {
int b = boxes->band[y];
printf( "%3d %3d %3d %2d %2d ",
y, b,
boxes->row[y], boxes->factor[y],
boxes->weight[b] );
for( x = 0; x < 50; x++ ) {
int rx = x * (boxes->mask->xsize + 1) / 50;
if( rx >= boxes->start[b] && rx < boxes->end[b] )
printf( "#" );
else
printf( " " );
}
printf( " %3d .. %3d\n", boxes->start[b], boxes->end[b] );
}
printf( "area = %d\n", boxes->area );
printf( "rounding = %d\n", boxes->rounding );
}
#endif /*DEBUG*/
/* Break a mask into boxes. /* Break a mask into boxes.
*/ */
static Boxes * static Boxes *
@ -399,13 +455,13 @@ boxes_new( IMAGE *in, IMAGE *out, DOUBLEMASK *mask, int n_layers, int cluster )
VIPS_DEBUG_MSG( "boxes_new: generated %d boxes\n", VIPS_DEBUG_MSG( "boxes_new: generated %d boxes\n",
boxes->n_hlines ); boxes->n_hlines );
VIPS_DEBUG_MSG( "boxes_new: clustering with thresh %d ...\n", VIPS_DEBUG_MSG( "boxes_new: clustering with thresh %d ...\n",
cluster ); cluster );
while( boxes_cluster( boxes, cluster ) ) while( boxes_cluster( boxes, cluster ) )
; ;
VIPS_DEBUG_MSG( "boxes_new: renumbering ...\n" );
boxes_renumber( boxes ); boxes_renumber( boxes );
VIPS_DEBUG_MSG( "boxes_new: after clustering, %d boxes remain\n", VIPS_DEBUG_MSG( "boxes_new: after renumbering, %d boxes remain\n",
boxes->n_hlines ); boxes->n_hlines );
/* Find the area of the lines. /* Find the area of the lines.
@ -438,27 +494,17 @@ boxes_new( IMAGE *in, IMAGE *out, DOUBLEMASK *mask, int n_layers, int cluster )
boxes->area = rint( sum * boxes->area / mask->scale ); boxes->area = rint( sum * boxes->area / mask->scale );
boxes->rounding = (boxes->area + 1) / 2 + mask->offset * boxes->area; boxes->rounding = (boxes->area + 1) / 2 + mask->offset * boxes->area;
/* ASCII-art layer drawing. #ifdef DEBUG
boxes_print( boxes );
#endif /*DEBUG*/
/* With 512x512 tiles, each hline requires 3mb of intermediate per
* thread ... 300 lines is about a gb per thread, ouch.
*/ */
printf( "lines:\n" ); if( boxes->n_hlines > 150 ) {
printf( " n b r f\n" ); im_error( "im_aconv", "%s", _( "mask too complex" ) );
for( y = 0; y < boxes->n_vlines; y++ ) { return( NULL );
int b = boxes->band[y];
printf( "%3d %3d %3d %2d ",
y, b, boxes->row[y], boxes->factor[y] );
for( x = 0; x < 50; x++ ) {
int rx = x * (mask->xsize + 1) / 50;
if( rx >= boxes->start[b] && rx < boxes->end[b] )
printf( "#" );
else
printf( " " );
} }
printf( " %3d .. %3d\n", boxes->start[b], boxes->end[b] );
}
printf( "area = %d\n", boxes->area );
printf( "rounding = %d\n", boxes->rounding );
return( boxes ); return( boxes );
} }
@ -768,9 +814,6 @@ aconv_vgenerate( REGION *or, void *vseq, void *a, void *b )
if( im_prepare( ir, &s ) ) if( im_prepare( ir, &s ) )
return( -1 ); return( -1 );
/* Stride can be different for the vertical case, keep this here for
* ease of direction change.
*/
istride = IM_REGION_LSKIP( ir ) / istride = IM_REGION_LSKIP( ir ) /
IM_IMAGE_SIZEOF_ELEMENT( in ); IM_IMAGE_SIZEOF_ELEMENT( in );
ostride = IM_REGION_LSKIP( or ) / ostride = IM_REGION_LSKIP( or ) /
@ -789,23 +832,24 @@ aconv_vgenerate( REGION *or, void *vseq, void *a, void *b )
switch( boxes->in->BandFmt ) { switch( boxes->in->BandFmt ) {
case IM_BANDFMT_UCHAR: case IM_BANDFMT_UCHAR:
for( y = 0; y < r->height; y++ ) { for( x = 0; x < sz; x++ ) {
int *p; int *p;
PEL *q; PEL *q;
int sum; int sum;
p = (int *) IM_REGION_ADDR( ir, r->left, r->top + y ); p = x * boxes->n_hlines +
q = (PEL *) IM_REGION_ADDR( or, r->left, r->top + y ); (int *) IM_REGION_ADDR( ir, r->left, r->top );
q = x + (PEL *) IM_REGION_ADDR( or, r->left, r->top );
for( x = 0; x < sz; x++ ) { for( y = 0; y < r->height; y++ ) {
sum = 0; sum = 0;
for( z = 0; z < n_vlines; z++ ) for( z = 0; z < n_vlines; z++ )
sum += boxes->factor[z] * p[seq->start[z]]; sum += boxes->factor[z] * p[seq->start[z]];
p += istride;
sum = (sum + boxes->rounding) / boxes->area; sum = (sum + boxes->rounding) / boxes->area;
CLIP_UCHAR( sum ); CLIP_UCHAR( sum );
*q = sum; *q = sum;
q += 1; q += ostride;
p += boxes->n_hlines;
} }
} }