refactor im_aconv

get ready for rolling vertical sums too
This commit is contained in:
John Cupitt 2011-06-09 15:21:42 +01:00
parent 21fce2ab9e
commit 4e3e0cee6c

View File

@ -73,7 +73,10 @@ $ vips im_abs diff.v abs.v
$ vips im_max abs.v $ vips im_max abs.v
2.70833 2.70833
- can we use rolling averages for the vertical pass? - can we use rolling averages for the vertical pass?
we need to search for groups with the same band and adjacent row
- clustering could be much faster
- add more bandfmt - add more bandfmt
@ -92,6 +95,7 @@ $ vips im_max abs.v
#include <vips/intl.h> #include <vips/intl.h>
#include <stdio.h> #include <stdio.h>
#include <string.h>
#include <stdlib.h> #include <stdlib.h>
#include <limits.h> #include <limits.h>
#include <math.h> #include <math.h>
@ -112,16 +116,43 @@ $ vips im_max abs.v
*/ */
#define MASK( M, X, Y ) ((M)->coeff[(X) + (Y) * (M)->xsize]) #define MASK( M, X, Y ) ((M)->coeff[(X) + (Y) * (M)->xsize])
/* Euclid's algorithm. Use this to common up mults. /* A horizontal line in the mask.
*/ */
static int typedef struct _HLine {
gcd( int a, int b ) /* Start is the left-most pixel in the line, end is one beyond the
{ * right-most pixel.
if( b == 0 ) */
return( abs( a ) ); int start;
else int end;
return( gcd( b, a % b ) );
} /* The hlines have weights. weight 0 means this line is unused.
*/
int weight;
} HLine;
/* An element of a vline.
*/
typedef struct _VElement {
/* band is the index into hline[] we add, row is the row we take
* it from.
*/
int band;
int row;
/* Negative lobes are made with factor -1, we also common-up repeated
* additions of the same line.
*/
int factor;
} VElement;
/* A vline.
*/
typedef struct _VLine {
int band;
int factor;
int start;
int end;
} VLine;
/* A set of boxes. /* A set of boxes.
*/ */
@ -137,60 +168,61 @@ typedef struct _Boxes {
int area; int area;
int rounding; int rounding;
/* The horizontal lines we gather. /* The horizontal lines we gather. hline[3] writes to band 3 in the
*/
int n_hlines;
/* Start is the left-most pixel in the line, end is one beyond the
* right-most pixel. start[3]/end[3] writes to band 3 in the
* intermediate image. * intermediate image.
*/ */
int start[MAX_LINES]; int n_hline;
int end[MAX_LINES]; HLine hline[MAX_LINES];
/* The hlines have weights. weight 0 means this line is unused. /* Scale and sum a set of hlines to make the final value.
*/ */
int weight[MAX_LINES]; int n_velement;
VElement velement[MAX_LINES];
/* Scale and sum a set of hlines to make the final value. band[] is /* And group those velements as vlines.
* the index into start/end we add, row[] is the row we take it from.
*/ */
int n_vlines; int n_vline;
int band[MAX_LINES]; VLine vline[MAX_LINES];
int row[MAX_LINES];
/* Each hline has a factor during gather, eg. -1 for -ve lobes.
*/
int factor[MAX_LINES];
} Boxes; } Boxes;
/* Euclid's algorithm. Use this to common up mults.
*/
static int
gcd( int a, int b )
{
if( b == 0 )
return( abs( a ) );
else
return( gcd( b, a % b ) );
}
static void static void
boxes_start( Boxes *boxes, int x ) boxes_start( Boxes *boxes, int x )
{ {
boxes->start[boxes->n_hlines] = x; boxes->hline[boxes->n_hline].start = x;
boxes->weight[boxes->n_hlines] = 1; boxes->hline[boxes->n_hline].weight = 1;
} }
static int static int
boxes_end( Boxes *boxes, int x, int y, int factor ) boxes_end( Boxes *boxes, int x, int y, int factor )
{ {
boxes->end[boxes->n_hlines] = x; boxes->hline[boxes->n_hline].end = x;
boxes->row[boxes->n_vlines] = y; boxes->velement[boxes->n_velement].row = y;
boxes->band[boxes->n_vlines] = boxes->n_hlines; boxes->velement[boxes->n_velement].band = boxes->n_hline;
boxes->factor[boxes->n_vlines] = factor; boxes->velement[boxes->n_velement].factor = factor;
if( boxes->n_hlines >= MAX_LINES - 1 ) { if( boxes->n_hline >= MAX_LINES - 1 ) {
vips_error( "im_aconv", "%s", _( "mask too complex" ) ); vips_error( "im_aconv", "%s", _( "mask too complex" ) );
return( -1 ); return( -1 );
} }
boxes->n_hlines += 1; boxes->n_hline += 1;
if( boxes->n_vlines >= MAX_LINES - 1 ) { if( boxes->n_velement >= MAX_LINES - 1 ) {
vips_error( "im_aconv", "%s", _( "mask too complex" ) ); vips_error( "im_aconv", "%s", _( "mask too complex" ) );
return( -1 ); return( -1 );
} }
boxes->n_vlines += 1; boxes->n_velement += 1;
return( 0 ); return( 0 );
} }
@ -200,10 +232,10 @@ boxes_end( Boxes *boxes, int x, int y, int factor )
static int static int
boxes_distance( Boxes *boxes, int a, int b ) boxes_distance( Boxes *boxes, int a, int b )
{ {
g_assert( boxes->weight[a] > 0 && boxes->weight[b] > 0 ); g_assert( boxes->hline[a].weight > 0 && boxes->hline[b].weight > 0 );
return( abs( boxes->start[a] - boxes->start[b] ) + return( abs( boxes->hline[a].start - boxes->hline[b].start ) +
abs( boxes->end[a] - boxes->end[b] ) ); abs( boxes->hline[a].end - boxes->hline[b].end ) );
} }
/* Merge two hlines. Line b is deleted, and any refs to b in vlines updated to /* Merge two hlines. Line b is deleted, and any refs to b in vlines updated to
@ -216,25 +248,27 @@ boxes_merge( Boxes *boxes, int a, int b )
/* Scale weights. /* Scale weights.
*/ */
int fa = boxes->weight[a]; int fa = boxes->hline[a].weight;
int fb = boxes->weight[b]; int fb = boxes->hline[b].weight;
double w = (double) fb / (fa + fb); double w = (double) fb / (fa + fb);
/* New endpoints. /* New endpoints.
*/ */
boxes->start[a] += w * (boxes->start[b] - boxes->start[a]); boxes->hline[a].start += w *
boxes->end[a] += w * (boxes->end[b] - boxes->end[a]); (boxes->hline[b].start - boxes->hline[a].start);
boxes->weight[a] += boxes->weight[b]; boxes->hline[a].end += w *
(boxes->hline[b].end - boxes->hline[a].end);
boxes->hline[a].weight += boxes->hline[b].weight;
/* Update refs to b in vlines to refer to a instead. /* Update velement refs to b to refer to a instead.
*/ */
for( i = 0; i < boxes->n_vlines; i++ ) for( i = 0; i < boxes->n_velement; i++ )
if( boxes->band[i] == b ) if( boxes->velement[i].band == b )
boxes->band[i] = a; boxes->velement[i].band = a;
/* Mark b to be deleted. /* Mark b to be deleted.
*/ */
boxes->weight[b] = 0; boxes->hline[b].weight = 0;
} }
/* Find the closest pair of hlines, join them up if the distance is less than /* Find the closest pair of hlines, join them up if the distance is less than
@ -249,14 +283,14 @@ boxes_cluster( Boxes *boxes, int cluster )
best = 9999999; best = 9999999;
for( i = 0; i < boxes->n_hlines; i++ ) { for( i = 0; i < boxes->n_hline; i++ ) {
if( boxes->weight[i] == 0 ) if( boxes->hline[i].weight == 0 )
continue; continue;
for( j = i + 1; j < boxes->n_hlines; j++ ) { for( j = i + 1; j < boxes->n_hline; j++ ) {
int d; int d;
if( boxes->weight[j] == 0 ) if( boxes->hline[j].weight == 0 )
continue; continue;
d = boxes_distance( boxes, i, j ); d = boxes_distance( boxes, i, j );
@ -285,10 +319,16 @@ boxes_renumber( Boxes *boxes )
{ {
int i, j; int i, j;
j = 0;
for( i = 0; i < boxes->n_hline; i++ )
if( boxes->hline[i].weight == 0 )
j++;
printf( "%d weight 0 hlines\n", j );
/* Loop for all zero-weight hlines. /* Loop for all zero-weight hlines.
*/ */
for( i = 0; i < boxes->n_hlines; ) { for( i = 0; i < boxes->n_hline; ) {
if( boxes->weight[i] > 0 ) { if( boxes->hline[i].weight > 0 ) {
i++; i++;
continue; continue;
} }
@ -296,17 +336,70 @@ boxes_renumber( Boxes *boxes )
/* We move hlines i + 1 down, so we need to adjust all /* We move hlines i + 1 down, so we need to adjust all
* band[] refs to match. * band[] refs to match.
*/ */
for( j = 0; j < boxes->n_vlines; j++ ) for( j = 0; j < boxes->n_velement; j++ )
if( boxes->band[j] > i ) if( boxes->velement[j].band > i )
boxes->band[j] -= 1; boxes->velement[j].band -= 1;
for( j = i; j < boxes->n_hlines; j++ ) { memmove( boxes->hline + i, boxes->hline + i + 1,
boxes->start[j] = boxes->start[j + 1]; sizeof( HLine ) * (boxes->n_hline - i - 1) );
boxes->end[j] = boxes->end[j + 1]; boxes->n_hline -= 1;
boxes->weight[j] = boxes->weight[j + 1]; }
} }
boxes->n_hlines -= 1; /* Sort by band, then factor, then row.
*/
static int
sortfn( const void *p1, const void *p2 )
{
VElement *a = (VElement *) p1;
VElement *b = (VElement *) p2;
if( a->band != b->band )
return( a->band - b->band );
if( a->factor != b->factor )
return( a->factor - b->factor );
return( a->row - b->row );
}
static void
boxes_vline( Boxes *boxes )
{
int y, z;
/* Sort to get elements which could form a vline together.
*/
qsort( boxes->velement, boxes->n_velement, sizeof( VElement ), sortfn );
boxes->n_vline = 0;
for( y = 0; y < boxes->n_velement; ) {
int n = boxes->n_vline;
/* Start of a line.
*/
boxes->vline[n].band = boxes->velement[y].band;
boxes->vline[n].factor = boxes->velement[y].factor;
boxes->vline[n].start = boxes->velement[y].row;
/* Search for the end of this line.
*/
for( z = y + 1; z < boxes->n_velement; z++ )
if( boxes->velement[z].band !=
boxes->vline[n].band ||
boxes->velement[z].factor !=
boxes->vline[n].factor ||
boxes->velement[z].row !=
boxes->vline[n].start + z - y )
break;
/* So the line ends at the previously examined element. We
* want 'end' to be one beyond that (non-inclusive).
*/
boxes->vline[n].end = boxes->velement[z - 1].row + 1;
boxes->n_vline += 1;
y = z;
} }
} }
@ -316,25 +409,38 @@ boxes_print( Boxes *boxes )
{ {
int x, y; int x, y;
printf( "lines:\n" ); printf( "hlines:\n" );
printf( " n b r f w\n" ); printf( " n b r f w\n" );
for( y = 0; y < boxes->n_vlines; y++ ) { for( y = 0; y < boxes->n_velement; y++ ) {
int b = boxes->band[y]; int b = boxes->velement[y].band;
printf( "%3d %3d %3d %2d %2d ", printf( "%4d %3d %3d %2d %3d ",
y, b, y, b,
boxes->row[y], boxes->factor[y], boxes->velement[y].row,
boxes->weight[b] ); boxes->velement[y].factor,
for( x = 0; x < 50; x++ ) { boxes->hline[b].weight );
int rx = x * (boxes->mask->xsize + 1) / 50; for( x = 0; x < 45; x++ ) {
int rx = x * (boxes->mask->xsize + 1) / 45;
if( rx >= boxes->start[b] && rx < boxes->end[b] ) if( rx >= boxes->hline[b].start &&
rx < boxes->hline[b].end )
printf( "#" ); printf( "#" );
else else
printf( " " ); printf( " " );
} }
printf( " %3d .. %3d\n", boxes->start[b], boxes->end[b] ); printf( " %3d .. %3d\n",
boxes->hline[b].start, boxes->hline[b].end );
} }
printf( "%d vlines:\n", boxes->n_vline );
printf( " n b f s e\n" );
for( y = 0; y < boxes->n_vline; y++ )
printf( "%4d %2d %2d == %3d .. %3d\n", y,
boxes->vline[y].band,
boxes->vline[y].factor,
boxes->vline[y].start,
boxes->vline[y].end );
printf( "area = %d\n", boxes->area ); printf( "area = %d\n", boxes->area );
printf( "rounding = %d\n", boxes->rounding ); printf( "rounding = %d\n", boxes->rounding );
} }
@ -374,8 +480,9 @@ boxes_new( IMAGE *in, IMAGE *out, DOUBLEMASK *mask, int n_layers, int cluster )
boxes->n_layers = n_layers; boxes->n_layers = n_layers;
boxes->cluster = cluster; boxes->cluster = cluster;
boxes->n_hlines = 0; boxes->n_hline = 0;
boxes->n_vlines = 0; boxes->n_velement = 0;
boxes->n_vline = 0;
/* Find mask range. We must always include the zero axis in the mask. /* Find mask range. We must always include the zero axis in the mask.
*/ */
@ -453,36 +560,41 @@ boxes_new( IMAGE *in, IMAGE *out, DOUBLEMASK *mask, int n_layers, int cluster )
} }
} }
VIPS_DEBUG_MSG( "boxes_new: generated %d boxes\n", VIPS_DEBUG_MSG( "boxes_new: generated %d boxes\n", boxes->n_hline );
boxes->n_hlines ); boxes_print( boxes );
VIPS_DEBUG_MSG( "boxes_new: clustering with thresh %d ...\n", VIPS_DEBUG_MSG( "boxes_new: clustering with thresh %d ...\n",
cluster ); cluster );
while( boxes_cluster( boxes, cluster ) ) while( boxes_cluster( boxes, cluster ) )
; ;
VIPS_DEBUG_MSG( "boxes_new: renumbering ...\n" ); VIPS_DEBUG_MSG( "boxes_new: renumbering ...\n" );
boxes_renumber( boxes ); boxes_renumber( boxes );
VIPS_DEBUG_MSG( "boxes_new: after renumbering, %d boxes remain\n", VIPS_DEBUG_MSG( "boxes_new: after renumbering, %d hlines remain\n",
boxes->n_hlines ); boxes->n_hline );
VIPS_DEBUG_MSG( "boxes_new: forming vlines ...\n" );
boxes_vline( boxes );
VIPS_DEBUG_MSG( "boxes_new: found %d vlines\n", boxes->n_vline );
/* Find the area of the lines. /* Find the area of the lines.
*/ */
boxes->area = 0; boxes->area = 0;
for( y = 0; y < boxes->n_vlines; y++ ) { for( y = 0; y < boxes->n_velement; y++ ) {
int x = boxes->band[y]; int x = boxes->velement[y].band;
boxes->area += boxes->factor[y] * boxes->area += boxes->velement[y].factor *
(boxes->end[x] - boxes->start[x]); (boxes->hline[x].end - boxes->hline[x].start);
} }
/* Strength reduction: if all lines are divisible by n, we can move /* Strength reduction: if all lines are divisible by n, we can move
* that n out into the ->area factor. The aim is to produce as many * that n out into the ->area factor. The aim is to produce as many
* factor 1 lines as we can and to reduce the chance of overflow. * factor 1 lines as we can and to reduce the chance of overflow.
*/ */
x = boxes->factor[0]; x = boxes->velement[0].factor;
for( y = 1; y < boxes->n_vlines; y++ ) for( y = 1; y < boxes->n_velement; y++ )
x = gcd( x, boxes->factor[y] ); x = gcd( x, boxes->velement[y].factor );
for( y = 0; y < boxes->n_vlines; y++ ) for( y = 0; y < boxes->n_velement; y++ )
boxes->factor[y] /= x; boxes->velement[y].factor /= x;
boxes->area *= x; boxes->area *= x;
/* Find the area of the original mask. /* Find the area of the original mask.
@ -501,7 +613,7 @@ boxes_new( IMAGE *in, IMAGE *out, DOUBLEMASK *mask, int n_layers, int cluster )
/* With 512x512 tiles, each hline requires 3mb of intermediate per /* With 512x512 tiles, each hline requires 3mb of intermediate per
* thread ... 300 lines is about a gb per thread, ouch. * thread ... 300 lines is about a gb per thread, ouch.
*/ */
if( boxes->n_hlines > 150 ) { if( boxes->n_hline > 150 ) {
im_error( "im_aconv", "%s", _( "mask too complex" ) ); im_error( "im_aconv", "%s", _( "mask too complex" ) );
return( NULL ); return( NULL );
} }
@ -560,18 +672,17 @@ aconv_start( IMAGE *out, void *a, void *b )
seq->boxes = boxes; seq->boxes = boxes;
seq->ir = im_region_create( in ); seq->ir = im_region_create( in );
/* There will always be more vlines than hlines, so make the arrays /* n_velement should be the largest possible dimension.
* vlines big and we'll have room for both.
*/ */
g_assert( boxes->n_vlines >= boxes->n_hlines ); g_assert( boxes->n_velement >= boxes->n_hline );
seq->start = IM_ARRAY( out, boxes->n_vlines, int ); seq->start = IM_ARRAY( out, boxes->n_velement, int );
seq->end = IM_ARRAY( out, boxes->n_vlines, int ); seq->end = IM_ARRAY( out, boxes->n_velement, int );
if( vips_band_format_isint( out->BandFmt ) ) if( vips_band_format_isint( out->BandFmt ) )
seq->sum = IM_ARRAY( out, boxes->n_vlines, int ); seq->sum = IM_ARRAY( out, boxes->n_velement, int );
else else
seq->sum = IM_ARRAY( out, boxes->n_vlines, double ); seq->sum = IM_ARRAY( out, boxes->n_velement, double );
seq->last_stride = -1; seq->last_stride = -1;
if( !seq->ir || !seq->start || !seq->end || !seq->sum ) { if( !seq->ir || !seq->start || !seq->end || !seq->sum ) {
@ -630,7 +741,7 @@ aconv_hgenerate( REGION *or, void *vseq, void *a, void *b )
Boxes *boxes = (Boxes *) b; Boxes *boxes = (Boxes *) b;
REGION *ir = seq->ir; REGION *ir = seq->ir;
const int n_hlines = boxes->n_hlines; const int n_hline = boxes->n_hline;
DOUBLEMASK *mask = boxes->mask; DOUBLEMASK *mask = boxes->mask;
Rect *r = &or->valid; Rect *r = &or->valid;
@ -662,9 +773,9 @@ aconv_hgenerate( REGION *or, void *vseq, void *a, void *b )
if( seq->last_stride != istride ) { if( seq->last_stride != istride ) {
seq->last_stride = istride; seq->last_stride = istride;
for( z = 0; z < n_hlines; z++ ) { for( z = 0; z < n_hline; z++ ) {
seq->start[z] = boxes->start[z] * istride; seq->start[z] = boxes->hline[z].start * istride;
seq->end[z] = boxes->end[z] * istride; seq->end[z] = boxes->hline[z].end * istride;
} }
} }
@ -679,19 +790,20 @@ aconv_hgenerate( REGION *or, void *vseq, void *a, void *b )
int *q; int *q;
p = i + (PEL *) IM_REGION_ADDR( ir, r->left, r->top + y ); p = i + (PEL *) IM_REGION_ADDR( ir, r->left, r->top + y );
q = i * n_hlines + q = i * n_hline +
(int *) IM_REGION_ADDR( or, r->left, r->top + y ); (int *) IM_REGION_ADDR( or, r->left, r->top + y );
for( z = 0; z < n_hlines; z++ ) { for( z = 0; z < n_hline; z++ ) {
seq_sum[z] = 0; seq_sum[z] = 0;
for( x = boxes->start[z]; x < boxes->end[z]; x++ ) for( x = boxes->hline[z].start;
x < boxes->hline[z].end; x++ )
seq_sum[z] += p[x * istride]; seq_sum[z] += p[x * istride];
q[z] = seq_sum[z]; q[z] = seq_sum[z];
} }
q += ostride; q += ostride;
for( x = 1; x < r->width; x++ ) { for( x = 1; x < r->width; x++ ) {
for( z = 0; z < n_hlines; z++ ) { for( z = 0; z < n_hline; z++ ) {
seq_sum[z] += p[seq->end[z]]; seq_sum[z] += p[seq->end[z]];
seq_sum[z] -= p[seq->start[z]]; seq_sum[z] -= p[seq->start[z]];
q[z] = seq_sum[z]; q[z] = seq_sum[z];
@ -766,7 +878,7 @@ aconv_horizontal( Boxes *boxes, IMAGE *in, IMAGE *out )
im_error( "im_aconv", "%s", _( "image too small for mask" ) ); im_error( "im_aconv", "%s", _( "image too small for mask" ) );
return( -1 ); return( -1 );
} }
out->Bands *= boxes->n_hlines; out->Bands *= boxes->n_hline;
out->BandFmt = vips_band_format_isfloat( in->BandFmt ) ? out->BandFmt = vips_band_format_isfloat( in->BandFmt ) ?
VIPS_FORMAT_DOUBLE : VIPS_FORMAT_INT; VIPS_FORMAT_DOUBLE : VIPS_FORMAT_INT;
@ -792,7 +904,7 @@ aconv_vgenerate( REGION *or, void *vseq, void *a, void *b )
Boxes *boxes = (Boxes *) b; Boxes *boxes = (Boxes *) b;
REGION *ir = seq->ir; REGION *ir = seq->ir;
const int n_vlines = boxes->n_vlines; const int n_velement = boxes->n_velement;
DOUBLEMASK *mask = boxes->mask; DOUBLEMASK *mask = boxes->mask;
Rect *r = &or->valid; Rect *r = &or->valid;
@ -824,9 +936,9 @@ aconv_vgenerate( REGION *or, void *vseq, void *a, void *b )
if( seq->last_stride != istride ) { if( seq->last_stride != istride ) {
seq->last_stride = istride; seq->last_stride = istride;
for( z = 0; z < n_vlines; z++ ) for( z = 0; z < n_velement; z++ )
seq->start[z] = boxes->band[z] + seq->start[z] = boxes->velement[z].band +
boxes->row[z] * istride; boxes->velement[z].row * istride;
} }
switch( boxes->in->BandFmt ) { switch( boxes->in->BandFmt ) {
@ -837,14 +949,15 @@ aconv_vgenerate( REGION *or, void *vseq, void *a, void *b )
PEL *q; PEL *q;
int sum; int sum;
p = x * boxes->n_hlines + p = x * boxes->n_hline +
(int *) IM_REGION_ADDR( ir, r->left, r->top ); (int *) IM_REGION_ADDR( ir, r->left, r->top );
q = x + (PEL *) IM_REGION_ADDR( or, r->left, r->top ); q = x + (PEL *) IM_REGION_ADDR( or, r->left, r->top );
for( y = 0; y < r->height; y++ ) { for( y = 0; y < r->height; y++ ) {
sum = 0; sum = 0;
for( z = 0; z < n_vlines; z++ ) for( z = 0; z < n_velement; z++ )
sum += boxes->factor[z] * p[seq->start[z]]; sum += boxes->velement[z].factor *
p[seq->start[z]];
p += istride; p += istride;
sum = (sum + boxes->rounding) / boxes->area; sum = (sum + boxes->rounding) / boxes->area;
CLIP_UCHAR( sum ); CLIP_UCHAR( sum );