composite in scanlines, not pixels

try compositing a scanline at a time, should improve cache locality for
deep image stacks

seems to work, needs benchmarking
This commit is contained in:
John Cupitt 2018-11-27 18:27:38 +00:00
parent e6f271df10
commit 2c26c23163
3 changed files with 1764 additions and 140 deletions

View File

@ -9,6 +9,7 @@
- add bandand()/or()/eor() to cplusplus binding [clcaalu]
- implement shrink-on-load for tiff pyramids [jcupitt]
- added vips_image_set_blob_copy() [jcupitt]
- don't stop composite on first non-transparent image [felixbuenemann]
23/9/18 started 8.7.1
- update function list in docs [janko-m]

File diff suppressed because it is too large Load Diff

View File

@ -11,6 +11,7 @@
* - x/y params let you position images
* 27/11/18
* - don't stop on first non-transparent image [felixbuenemann]
* - composite in scanlines, not pixels [jcupitt]
*/
/*
@ -163,6 +164,13 @@ typedef struct {
*/
VipsPel **p;
/* We composite images a scanline at a time, working up the image
* stack. Keep intermediate pixels in this scanline buffer.
*
* This will be float for the vector path, double otherwise.
*/
double *scanline;
} VipsCompositeSequence;
static int
@ -179,6 +187,7 @@ vips_composite_stop( void *vseq, void *a, void *b )
}
VIPS_FREE( seq->p );
VIPS_FREE( seq->scanline );
VIPS_FREE( seq );
@ -230,6 +239,14 @@ vips_composite_start( VipsImage *out, void *a, void *b )
return( NULL );
}
/* Scanline buffer.
*/
if( !(seq->scanline =
VIPS_ARRAY( NULL, in[0]->Xsize * in[0]->Bands, double )) ) {
vips_composite_stop( seq, NULL, NULL );
return( NULL );
}
return( seq );
}
@ -329,13 +346,13 @@ vips_composite_base_max_band( VipsCompositeBase *composite, double *max_band )
* xB colour band of source B
*/
/* A is the new pixel coming in, of any non-complex type T.
/* A is the new scaline coming in, of any non-complex type T.
*
* We must scale incoming pixels to 0 - 1 by dividing by the scale[] vector.
*
* If premultipled is not set, we premultiply incoming pixels before blending.
*
* B is the double pixel we are accumulating.
* B is the double scanline we are accumulating.
*/
template <typename T>
static void
@ -524,7 +541,8 @@ vips_composite_base_blend( VipsCompositeBase *composite,
double g;
if( B[b] <= 0.25 )
g = ((16 * B[b] - 12) * B[b] + 4) * B[b];
g = ((16 * B[b] - 12) *
B[b] + 4) * B[b];
else
g = sqrt( B[b] );
@ -786,7 +804,8 @@ vips_composite_base_blend3( VipsCompositeBase *composite,
*/
template <typename T, gint64 min_T, gint64 max_T>
static void
vips_combine_pixels( VipsCompositeBase *composite, VipsPel *q, VipsPel **p )
vips_combine_pixels( VipsCompositeBase *composite,
VipsPel *q, VipsPel **p, double *scanline, int width )
{
VipsBlendMode *mode = (VipsBlendMode *) composite->mode->area.data;
int n_mode = composite->mode->area.n;
@ -795,61 +814,85 @@ vips_combine_pixels( VipsCompositeBase *composite, VipsPel *q, VipsPel **p )
T * restrict tq = (T * restrict) q;
T ** restrict tp = (T ** restrict) p;
double B[MAX_BANDS + 1];
double aB;
double *B1;
T * restrict tp1;
T * restrict tq1;
/* Load and scale the base pixel to 0 - 1.
/* Load and scale the base scanline to 0 - 1.
*/
for( int b = 0; b <= bands; b++ )
B[b] = tp[0][b] / composite->max_band[b];
B1 = scanline;
tp1 = tp[0];
for( int x = 0; x < width; x++ ) {
for( int b = 0; b <= bands; b++ )
B1[b] = tp1[b] / composite->max_band[b];
aB = B[bands];
if( !composite->premultiplied )
for( int b = 0; b < bands; b++ )
B[b] *= aB;
if( !composite->premultiplied ) {
double aB = B1[bands];
for( int b = 0; b < bands; b++ )
B1[b] *= aB;
}
B1 += bands + 1;
tp1 += bands + 1;
}
for( int i = 1; i < n; i++ ) {
VipsBlendMode m = n_mode == 1 ? mode[0] : mode[i - 1];
vips_composite_base_blend<T>( composite, m, B, tp[i] );
B1 = scanline;
tp1 = tp[i];
for( int x = 0; x < width; x++ ) {
vips_composite_base_blend<T>( composite, m, B1, tp1 );
B1 += bands + 1;
tp1 += bands + 1;
}
}
/* Unpremultiply, if necessary.
/* Write back as a full range pixel, clipping to range. Unpremultiply,
* if necessary.
*/
if( !composite->premultiplied ) {
double aR = B[bands];
B1 = scanline;
tq1 = tq;
for( int x = 0; x < width; x++ ) {
if( !composite->premultiplied ) {
double aR = B1[bands];
if( aR == 0 )
for( int b = 0; b < bands; b++ )
B[b] = 0;
else
for( int b = 0; b < bands; b++ )
B[b] = B[b] / aR;
}
/* Write back as a full range pixel, clipping to range.
*/
for( int b = 0; b <= bands; b++ ) {
double v;
v = B[b] * composite->max_band[b];
if( min_T != 0 ||
max_T != 0 ) {
v = VIPS_CLIP( min_T, v, max_T );
if( aR == 0 )
for( int b = 0; b < bands; b++ )
B1[b] = 0;
else
for( int b = 0; b < bands; b++ )
B1[b] = B1[b] / aR;
}
tq[b] = v;
for( int b = 0; b <= bands; b++ ) {
double v;
v = B1[b] * composite->max_band[b];
if( min_T != 0 ||
max_T != 0 ) {
v = VIPS_CLIP( min_T, v, max_T );
}
tq1[b] = v;
}
B1 += bands + 1;
tq1 += bands + 1;
}
}
#ifdef HAVE_VECTOR_ARITH
/* Three band (four with alpha) vecvtior case. Non-double output. min_T and
/* Three band (four with alpha) vector case. Non-double output. min_T and
* max_T are the numeric range for this type. 0, 0 means no limit,
* for example float.
*/
template <typename T, gint64 min_T, gint64 max_T>
static void
vips_combine_pixels3( VipsCompositeBase *composite, VipsPel *q, VipsPel **p )
vips_combine_pixels3( VipsCompositeBase *composite,
VipsPel *q, VipsPel **p, double *scanline, int width )
{
VipsBlendMode *mode = (VipsBlendMode *) composite->mode->area.data;
int n_mode = composite->mode->area.n;
@ -857,59 +900,108 @@ vips_combine_pixels3( VipsCompositeBase *composite, VipsPel *q, VipsPel **p )
T * restrict tq = (T * restrict) q;
T ** restrict tp = (T ** restrict) p;
v4f B;
float aB;
float *B1;
T * restrict tp1;
T * restrict tq1;
B[0] = tp[0][0];
B[1] = tp[0][1];
B[2] = tp[0][2];
B[3] = tp[0][3];
printf( "on vector path\n" );
/* Scale the base pixel to 0 - 1.
*/
B /= composite->max_band_vec;
aB = B[3];
B1 = (float *) scanline;
tp1 = tp[0];
for( int x = 0; x < width; x++ ) {
v4f B;
float aB;
if( !composite->premultiplied ) {
B *= aB;
B[3] = aB;
B[0] = tp1[0];
B[1] = tp1[1];
B[2] = tp1[2];
B[3] = tp1[3];
/* Scale the base pixel to 0 - 1.
*/
B /= composite->max_band_vec;
aB = B[3];
if( !composite->premultiplied ) {
B *= aB;
B[3] = aB;
}
B1[0] = B[0];
B1[1] = B[1];
B1[2] = B[2];
B1[3] = B[3];
B1 += 4;
tp1 += 4;
}
for( int i = 1; i < n; i++ ) {
VipsBlendMode m = n_mode == 1 ? mode[0] : mode[i - 1];
vips_composite_base_blend3<T>( composite, m, B, tp[i] );
}
B1 = (float *) scanline;
tp1 = tp[i];
for( int x = 0; x < width; x++ ) {
v4f B;
/* Unpremultiply, if necessary.
*/
if( !composite->premultiplied ) {
float aR = B[3];
B[0] = B1[0];
B[1] = B1[1];
B[2] = B1[2];
B[3] = B1[3];
if( aR == 0 )
for( int b = 0; b < 3; b++ )
B[b] = 0;
else {
B /= aR;
B[3] = aR;
vips_composite_base_blend3<T>( composite, m, B, tp1 );
B1[0] = B[0];
B1[1] = B[1];
B1[2] = B[2];
B1[3] = B[3];
B1 += 4;
tp1 += 4;
}
}
/* Write back as a full range pixel, clipping to range.
*/
B *= composite->max_band_vec;
if( min_T != 0 ||
max_T != 0 ) {
float low = min_T;
float high = max_T;
B1 = (float *) scanline;
tq1 = tq;
for( int x = 0; x < width; x++ ) {
v4f B;
B = VIPS_CLIP( low, B, high );
B[0] = B1[0];
B[1] = B1[1];
B[2] = B1[2];
B[3] = B1[3];
/* Unpremultiply, if necessary.
*/
if( !composite->premultiplied ) {
float aR = B[3];
if( aR == 0 )
for( int b = 0; b < 3; b++ )
B[b] = 0;
else {
B /= aR;
B[3] = aR;
}
}
B *= composite->max_band_vec;
if( min_T != 0 ||
max_T != 0 ) {
float low = min_T;
float high = max_T;
B = VIPS_CLIP( low, B, high );
}
tq1[0] = B[0];
tq1[1] = B[1];
tq1[2] = B[2];
tq1[3] = B[3];
B1 += 4;
tq1 += 4;
}
tq[0] = B[0];
tq[1] = B[1];
tq[2] = B[2];
tq[3] = B[3];
}
#endif /*HAVE_VECTOR_ARITH*/
@ -920,7 +1012,6 @@ vips_composite_base_gen( VipsRegion *output_region,
VipsCompositeSequence *seq = (VipsCompositeSequence *) vseq;
VipsCompositeBase *composite = (VipsCompositeBase *) b;
VipsRect *r = &output_region->valid;
int ps = VIPS_IMAGE_SIZEOF_PEL( output_region->im );
int n = composite->in->area.n;
if( vips_reorder_prepare_many( output_region->im, seq->ir, r ) )
@ -937,85 +1028,90 @@ vips_composite_base_gen( VipsRegion *output_region,
seq->p[n] = NULL;
q = VIPS_REGION_ADDR( output_region, r->left, r->top + y );
for( int x = 0; x < r->width; x++ ) {
switch( seq->ir[0]->im->BandFmt ) {
case VIPS_FORMAT_UCHAR:
switch( seq->ir[0]->im->BandFmt ) {
case VIPS_FORMAT_UCHAR:
#ifdef HAVE_VECTOR_ARITH
if( composite->bands == 3 )
vips_combine_pixels3
<unsigned char, 0, UCHAR_MAX>
( composite, q, seq->p );
else
if( composite->bands == 3 )
vips_combine_pixels3
<unsigned char, 0, UCHAR_MAX>
( composite, q, seq->p, seq->scanline,
r->width );
else
#endif
vips_combine_pixels
<unsigned char, 0, UCHAR_MAX>
( composite, q, seq->p );
break;
case VIPS_FORMAT_CHAR:
vips_combine_pixels
<signed char, SCHAR_MIN, SCHAR_MAX>
( composite, q, seq->p );
break;
<unsigned char, 0, UCHAR_MAX>
( composite, q, seq->p, seq->scanline,
r->width );
break;
case VIPS_FORMAT_USHORT:
case VIPS_FORMAT_CHAR:
vips_combine_pixels
<signed char, SCHAR_MIN, SCHAR_MAX>
( composite, q, seq->p, seq->scanline,
r->width );
break;
case VIPS_FORMAT_USHORT:
#ifdef HAVE_VECTOR_ARITH
if( composite->bands == 3 )
vips_combine_pixels3
<unsigned short, 0, USHRT_MAX>
( composite, q, seq->p );
else
if( composite->bands == 3 )
vips_combine_pixels3
<unsigned short, 0, USHRT_MAX>
( composite, q, seq->p, seq->scanline,
r->width );
else
#endif
vips_combine_pixels
<unsigned short, 0, USHRT_MAX>
( composite, q, seq->p );
break;
case VIPS_FORMAT_SHORT:
vips_combine_pixels
<signed short, SHRT_MIN, SHRT_MAX>
( composite, q, seq->p );
break;
<unsigned short, 0, USHRT_MAX>
( composite, q, seq->p, seq->scanline,
r->width );
break;
case VIPS_FORMAT_UINT:
vips_combine_pixels
<unsigned int, 0, UINT_MAX>
( composite, q, seq->p );
break;
case VIPS_FORMAT_SHORT:
vips_combine_pixels
<signed short, SHRT_MIN, SHRT_MAX>
( composite, q, seq->p, seq->scanline,
r->width );
break;
case VIPS_FORMAT_INT:
vips_combine_pixels
<signed int, INT_MIN, INT_MAX>
( composite, q, seq->p );
break;
case VIPS_FORMAT_UINT:
vips_combine_pixels
<unsigned int, 0, UINT_MAX>
( composite, q, seq->p, seq->scanline,
r->width );
break;
case VIPS_FORMAT_FLOAT:
case VIPS_FORMAT_INT:
vips_combine_pixels
<signed int, INT_MIN, INT_MAX>
( composite, q, seq->p, seq->scanline,
r->width );
break;
case VIPS_FORMAT_FLOAT:
#ifdef HAVE_VECTOR_ARITH
if( composite->bands == 3 )
vips_combine_pixels3
<float, 0, USHRT_MAX>
( composite, q, seq->p );
else
if( composite->bands == 3 )
vips_combine_pixels3
<float, 0, USHRT_MAX>
( composite, q, seq->p, seq->scanline,
r->width );
else
#endif
vips_combine_pixels
<float, 0, 0>
( composite, q, seq->p );
break;
case VIPS_FORMAT_DOUBLE:
vips_combine_pixels
<double, 0, 0>
( composite, q, seq->p );
break;
<float, 0, 0>
( composite, q, seq->p, seq->scanline,
r->width );
break;
default:
g_assert_not_reached();
return( -1 );
}
case VIPS_FORMAT_DOUBLE:
vips_combine_pixels
<double, 0, 0>
( composite, q, seq->p, seq->scanline,
r->width );
break;
for( int i = 0; i < n; i++ )
seq->p[i] += ps;
q += ps;
default:
g_assert_not_reached();
return( -1 );
}
}