composite in scanlines, not pixels

try compositing a scanline at a time, should improve cache locality for
deep image stacks

seems to work, needs benchmarking
This commit is contained in:
John Cupitt 2018-11-27 18:27:38 +00:00
parent e6f271df10
commit 2c26c23163
3 changed files with 1764 additions and 140 deletions

View File

@ -9,6 +9,7 @@
- add bandand()/or()/eor() to cplusplus binding [clcaalu] - add bandand()/or()/eor() to cplusplus binding [clcaalu]
- implement shrink-on-load for tiff pyramids [jcupitt] - implement shrink-on-load for tiff pyramids [jcupitt]
- added vips_image_set_blob_copy() [jcupitt] - added vips_image_set_blob_copy() [jcupitt]
- don't stop composite on first non-transparent image [felixbuenemann]
23/9/18 started 8.7.1 23/9/18 started 8.7.1
- update function list in docs [janko-m] - update function list in docs [janko-m]

File diff suppressed because it is too large Load Diff

View File

@ -11,6 +11,7 @@
* - x/y params let you position images * - x/y params let you position images
* 27/11/18 * 27/11/18
* - don't stop on first non-transparent image [felixbuenemann] * - don't stop on first non-transparent image [felixbuenemann]
* - composite in scanlines, not pixels [jcupitt]
*/ */
/* /*
@ -163,6 +164,13 @@ typedef struct {
*/ */
VipsPel **p; VipsPel **p;
/* We composite images a scanline at a time, working up the image
* stack. Keep intermediate pixels in this scanline buffer.
*
* This will be float for the vector path, double otherwise.
*/
double *scanline;
} VipsCompositeSequence; } VipsCompositeSequence;
static int static int
@ -179,6 +187,7 @@ vips_composite_stop( void *vseq, void *a, void *b )
} }
VIPS_FREE( seq->p ); VIPS_FREE( seq->p );
VIPS_FREE( seq->scanline );
VIPS_FREE( seq ); VIPS_FREE( seq );
@ -230,6 +239,14 @@ vips_composite_start( VipsImage *out, void *a, void *b )
return( NULL ); return( NULL );
} }
/* Scanline buffer.
*/
if( !(seq->scanline =
VIPS_ARRAY( NULL, in[0]->Xsize * in[0]->Bands, double )) ) {
vips_composite_stop( seq, NULL, NULL );
return( NULL );
}
return( seq ); return( seq );
} }
@ -329,13 +346,13 @@ vips_composite_base_max_band( VipsCompositeBase *composite, double *max_band )
* xB colour band of source B * xB colour band of source B
*/ */
/* A is the new pixel coming in, of any non-complex type T. /* A is the new scaline coming in, of any non-complex type T.
* *
* We must scale incoming pixels to 0 - 1 by dividing by the scale[] vector. * We must scale incoming pixels to 0 - 1 by dividing by the scale[] vector.
* *
* If premultipled is not set, we premultiply incoming pixels before blending. * If premultipled is not set, we premultiply incoming pixels before blending.
* *
* B is the double pixel we are accumulating. * B is the double scanline we are accumulating.
*/ */
template <typename T> template <typename T>
static void static void
@ -524,7 +541,8 @@ vips_composite_base_blend( VipsCompositeBase *composite,
double g; double g;
if( B[b] <= 0.25 ) if( B[b] <= 0.25 )
g = ((16 * B[b] - 12) * B[b] + 4) * B[b]; g = ((16 * B[b] - 12) *
B[b] + 4) * B[b];
else else
g = sqrt( B[b] ); g = sqrt( B[b] );
@ -786,7 +804,8 @@ vips_composite_base_blend3( VipsCompositeBase *composite,
*/ */
template <typename T, gint64 min_T, gint64 max_T> template <typename T, gint64 min_T, gint64 max_T>
static void static void
vips_combine_pixels( VipsCompositeBase *composite, VipsPel *q, VipsPel **p ) vips_combine_pixels( VipsCompositeBase *composite,
VipsPel *q, VipsPel **p, double *scanline, int width )
{ {
VipsBlendMode *mode = (VipsBlendMode *) composite->mode->area.data; VipsBlendMode *mode = (VipsBlendMode *) composite->mode->area.data;
int n_mode = composite->mode->area.n; int n_mode = composite->mode->area.n;
@ -795,61 +814,85 @@ vips_combine_pixels( VipsCompositeBase *composite, VipsPel *q, VipsPel **p )
T * restrict tq = (T * restrict) q; T * restrict tq = (T * restrict) q;
T ** restrict tp = (T ** restrict) p; T ** restrict tp = (T ** restrict) p;
double B[MAX_BANDS + 1]; double *B1;
double aB; T * restrict tp1;
T * restrict tq1;
/* Load and scale the base pixel to 0 - 1. /* Load and scale the base scanline to 0 - 1.
*/ */
for( int b = 0; b <= bands; b++ ) B1 = scanline;
B[b] = tp[0][b] / composite->max_band[b]; tp1 = tp[0];
for( int x = 0; x < width; x++ ) {
for( int b = 0; b <= bands; b++ )
B1[b] = tp1[b] / composite->max_band[b];
aB = B[bands]; if( !composite->premultiplied ) {
if( !composite->premultiplied ) double aB = B1[bands];
for( int b = 0; b < bands; b++ )
B[b] *= aB; for( int b = 0; b < bands; b++ )
B1[b] *= aB;
}
B1 += bands + 1;
tp1 += bands + 1;
}
for( int i = 1; i < n; i++ ) { for( int i = 1; i < n; i++ ) {
VipsBlendMode m = n_mode == 1 ? mode[0] : mode[i - 1]; VipsBlendMode m = n_mode == 1 ? mode[0] : mode[i - 1];
vips_composite_base_blend<T>( composite, m, B, tp[i] ); B1 = scanline;
tp1 = tp[i];
for( int x = 0; x < width; x++ ) {
vips_composite_base_blend<T>( composite, m, B1, tp1 );
B1 += bands + 1;
tp1 += bands + 1;
}
} }
/* Unpremultiply, if necessary. /* Write back as a full range pixel, clipping to range. Unpremultiply,
* if necessary.
*/ */
if( !composite->premultiplied ) { B1 = scanline;
double aR = B[bands]; tq1 = tq;
for( int x = 0; x < width; x++ ) {
if( !composite->premultiplied ) {
double aR = B1[bands];
if( aR == 0 ) if( aR == 0 )
for( int b = 0; b < bands; b++ ) for( int b = 0; b < bands; b++ )
B[b] = 0; B1[b] = 0;
else else
for( int b = 0; b < bands; b++ ) for( int b = 0; b < bands; b++ )
B[b] = B[b] / aR; B1[b] = B1[b] / aR;
}
/* Write back as a full range pixel, clipping to range.
*/
for( int b = 0; b <= bands; b++ ) {
double v;
v = B[b] * composite->max_band[b];
if( min_T != 0 ||
max_T != 0 ) {
v = VIPS_CLIP( min_T, v, max_T );
} }
tq[b] = v; for( int b = 0; b <= bands; b++ ) {
double v;
v = B1[b] * composite->max_band[b];
if( min_T != 0 ||
max_T != 0 ) {
v = VIPS_CLIP( min_T, v, max_T );
}
tq1[b] = v;
}
B1 += bands + 1;
tq1 += bands + 1;
} }
} }
#ifdef HAVE_VECTOR_ARITH #ifdef HAVE_VECTOR_ARITH
/* Three band (four with alpha) vecvtior case. Non-double output. min_T and /* Three band (four with alpha) vector case. Non-double output. min_T and
* max_T are the numeric range for this type. 0, 0 means no limit, * max_T are the numeric range for this type. 0, 0 means no limit,
* for example float. * for example float.
*/ */
template <typename T, gint64 min_T, gint64 max_T> template <typename T, gint64 min_T, gint64 max_T>
static void static void
vips_combine_pixels3( VipsCompositeBase *composite, VipsPel *q, VipsPel **p ) vips_combine_pixels3( VipsCompositeBase *composite,
VipsPel *q, VipsPel **p, double *scanline, int width )
{ {
VipsBlendMode *mode = (VipsBlendMode *) composite->mode->area.data; VipsBlendMode *mode = (VipsBlendMode *) composite->mode->area.data;
int n_mode = composite->mode->area.n; int n_mode = composite->mode->area.n;
@ -857,59 +900,108 @@ vips_combine_pixels3( VipsCompositeBase *composite, VipsPel *q, VipsPel **p )
T * restrict tq = (T * restrict) q; T * restrict tq = (T * restrict) q;
T ** restrict tp = (T ** restrict) p; T ** restrict tp = (T ** restrict) p;
v4f B; float *B1;
float aB; T * restrict tp1;
T * restrict tq1;
B[0] = tp[0][0]; printf( "on vector path\n" );
B[1] = tp[0][1];
B[2] = tp[0][2];
B[3] = tp[0][3];
/* Scale the base pixel to 0 - 1. B1 = (float *) scanline;
*/ tp1 = tp[0];
B /= composite->max_band_vec; for( int x = 0; x < width; x++ ) {
aB = B[3]; v4f B;
float aB;
if( !composite->premultiplied ) { B[0] = tp1[0];
B *= aB; B[1] = tp1[1];
B[3] = aB; B[2] = tp1[2];
B[3] = tp1[3];
/* Scale the base pixel to 0 - 1.
*/
B /= composite->max_band_vec;
aB = B[3];
if( !composite->premultiplied ) {
B *= aB;
B[3] = aB;
}
B1[0] = B[0];
B1[1] = B[1];
B1[2] = B[2];
B1[3] = B[3];
B1 += 4;
tp1 += 4;
} }
for( int i = 1; i < n; i++ ) { for( int i = 1; i < n; i++ ) {
VipsBlendMode m = n_mode == 1 ? mode[0] : mode[i - 1]; VipsBlendMode m = n_mode == 1 ? mode[0] : mode[i - 1];
vips_composite_base_blend3<T>( composite, m, B, tp[i] ); B1 = (float *) scanline;
} tp1 = tp[i];
for( int x = 0; x < width; x++ ) {
v4f B;
/* Unpremultiply, if necessary. B[0] = B1[0];
*/ B[1] = B1[1];
if( !composite->premultiplied ) { B[2] = B1[2];
float aR = B[3]; B[3] = B1[3];
if( aR == 0 ) vips_composite_base_blend3<T>( composite, m, B, tp1 );
for( int b = 0; b < 3; b++ )
B[b] = 0; B1[0] = B[0];
else { B1[1] = B[1];
B /= aR; B1[2] = B[2];
B[3] = aR; B1[3] = B[3];
B1 += 4;
tp1 += 4;
} }
} }
/* Write back as a full range pixel, clipping to range. B1 = (float *) scanline;
*/ tq1 = tq;
B *= composite->max_band_vec; for( int x = 0; x < width; x++ ) {
if( min_T != 0 || v4f B;
max_T != 0 ) {
float low = min_T;
float high = max_T;
B = VIPS_CLIP( low, B, high ); B[0] = B1[0];
B[1] = B1[1];
B[2] = B1[2];
B[3] = B1[3];
/* Unpremultiply, if necessary.
*/
if( !composite->premultiplied ) {
float aR = B[3];
if( aR == 0 )
for( int b = 0; b < 3; b++ )
B[b] = 0;
else {
B /= aR;
B[3] = aR;
}
}
B *= composite->max_band_vec;
if( min_T != 0 ||
max_T != 0 ) {
float low = min_T;
float high = max_T;
B = VIPS_CLIP( low, B, high );
}
tq1[0] = B[0];
tq1[1] = B[1];
tq1[2] = B[2];
tq1[3] = B[3];
B1 += 4;
tq1 += 4;
} }
tq[0] = B[0];
tq[1] = B[1];
tq[2] = B[2];
tq[3] = B[3];
} }
#endif /*HAVE_VECTOR_ARITH*/ #endif /*HAVE_VECTOR_ARITH*/
@ -920,7 +1012,6 @@ vips_composite_base_gen( VipsRegion *output_region,
VipsCompositeSequence *seq = (VipsCompositeSequence *) vseq; VipsCompositeSequence *seq = (VipsCompositeSequence *) vseq;
VipsCompositeBase *composite = (VipsCompositeBase *) b; VipsCompositeBase *composite = (VipsCompositeBase *) b;
VipsRect *r = &output_region->valid; VipsRect *r = &output_region->valid;
int ps = VIPS_IMAGE_SIZEOF_PEL( output_region->im );
int n = composite->in->area.n; int n = composite->in->area.n;
if( vips_reorder_prepare_many( output_region->im, seq->ir, r ) ) if( vips_reorder_prepare_many( output_region->im, seq->ir, r ) )
@ -937,85 +1028,90 @@ vips_composite_base_gen( VipsRegion *output_region,
seq->p[n] = NULL; seq->p[n] = NULL;
q = VIPS_REGION_ADDR( output_region, r->left, r->top + y ); q = VIPS_REGION_ADDR( output_region, r->left, r->top + y );
for( int x = 0; x < r->width; x++ ) { switch( seq->ir[0]->im->BandFmt ) {
switch( seq->ir[0]->im->BandFmt ) { case VIPS_FORMAT_UCHAR:
case VIPS_FORMAT_UCHAR:
#ifdef HAVE_VECTOR_ARITH #ifdef HAVE_VECTOR_ARITH
if( composite->bands == 3 ) if( composite->bands == 3 )
vips_combine_pixels3 vips_combine_pixels3
<unsigned char, 0, UCHAR_MAX> <unsigned char, 0, UCHAR_MAX>
( composite, q, seq->p ); ( composite, q, seq->p, seq->scanline,
else r->width );
else
#endif #endif
vips_combine_pixels
<unsigned char, 0, UCHAR_MAX>
( composite, q, seq->p );
break;
case VIPS_FORMAT_CHAR:
vips_combine_pixels vips_combine_pixels
<signed char, SCHAR_MIN, SCHAR_MAX> <unsigned char, 0, UCHAR_MAX>
( composite, q, seq->p ); ( composite, q, seq->p, seq->scanline,
break; r->width );
break;
case VIPS_FORMAT_USHORT: case VIPS_FORMAT_CHAR:
vips_combine_pixels
<signed char, SCHAR_MIN, SCHAR_MAX>
( composite, q, seq->p, seq->scanline,
r->width );
break;
case VIPS_FORMAT_USHORT:
#ifdef HAVE_VECTOR_ARITH #ifdef HAVE_VECTOR_ARITH
if( composite->bands == 3 ) if( composite->bands == 3 )
vips_combine_pixels3 vips_combine_pixels3
<unsigned short, 0, USHRT_MAX> <unsigned short, 0, USHRT_MAX>
( composite, q, seq->p ); ( composite, q, seq->p, seq->scanline,
else r->width );
else
#endif #endif
vips_combine_pixels
<unsigned short, 0, USHRT_MAX>
( composite, q, seq->p );
break;
case VIPS_FORMAT_SHORT:
vips_combine_pixels vips_combine_pixels
<signed short, SHRT_MIN, SHRT_MAX> <unsigned short, 0, USHRT_MAX>
( composite, q, seq->p ); ( composite, q, seq->p, seq->scanline,
break; r->width );
break;
case VIPS_FORMAT_UINT: case VIPS_FORMAT_SHORT:
vips_combine_pixels vips_combine_pixels
<unsigned int, 0, UINT_MAX> <signed short, SHRT_MIN, SHRT_MAX>
( composite, q, seq->p ); ( composite, q, seq->p, seq->scanline,
break; r->width );
break;
case VIPS_FORMAT_INT: case VIPS_FORMAT_UINT:
vips_combine_pixels vips_combine_pixels
<signed int, INT_MIN, INT_MAX> <unsigned int, 0, UINT_MAX>
( composite, q, seq->p ); ( composite, q, seq->p, seq->scanline,
break; r->width );
break;
case VIPS_FORMAT_FLOAT: case VIPS_FORMAT_INT:
vips_combine_pixels
<signed int, INT_MIN, INT_MAX>
( composite, q, seq->p, seq->scanline,
r->width );
break;
case VIPS_FORMAT_FLOAT:
#ifdef HAVE_VECTOR_ARITH #ifdef HAVE_VECTOR_ARITH
if( composite->bands == 3 ) if( composite->bands == 3 )
vips_combine_pixels3 vips_combine_pixels3
<float, 0, USHRT_MAX> <float, 0, USHRT_MAX>
( composite, q, seq->p ); ( composite, q, seq->p, seq->scanline,
else r->width );
else
#endif #endif
vips_combine_pixels
<float, 0, 0>
( composite, q, seq->p );
break;
case VIPS_FORMAT_DOUBLE:
vips_combine_pixels vips_combine_pixels
<double, 0, 0> <float, 0, 0>
( composite, q, seq->p ); ( composite, q, seq->p, seq->scanline,
break; r->width );
break;
default: case VIPS_FORMAT_DOUBLE:
g_assert_not_reached(); vips_combine_pixels
return( -1 ); <double, 0, 0>
} ( composite, q, seq->p, seq->scanline,
r->width );
break;
for( int i = 0; i < n; i++ ) default:
seq->p[i] += ps; g_assert_not_reached();
q += ps; return( -1 );
} }
} }