speedups, add missing paths, docs

This commit is contained in:
John Cupitt 2017-10-02 15:34:49 +01:00
parent fddd277995
commit ce4a3bc5f6
4 changed files with 342 additions and 66 deletions

View File

@ -138,7 +138,7 @@ Clang dynamic analysis:
Build with the GCC auto-vectorizer and diagnostics (or just -O3): Build with the GCC auto-vectorizer and diagnostics (or just -O3):
$ FLAGS="-O2 -msse4.2 -ffast-math" $ FLAGS="-O2 -march=native -ffast-math"
$ FLAGS="$FLAGS -ftree-vectorize -fdump-tree-vect-details" $ FLAGS="$FLAGS -ftree-vectorize -fdump-tree-vect-details"
$ CFLAGS="$FLAGS" CXXFLAGS="$FLAGS" \ $ CFLAGS="$FLAGS" CXXFLAGS="$FLAGS" \
./configure --prefix=/home/john/vips ./configure --prefix=/home/john/vips

View File

@ -322,17 +322,35 @@ vips_bandjoin_const_buffer( VipsBandary *bandary,
q1 = q; q1 = q;
p1 = p[0]; p1 = p[0];
for( x = 0; x < width; x++ ) { /* Special path for 8-bit RGB -> RGBA ... it's a common case.
for( z = 0; z < ips; z++ ) */
q1[z] = p1[z]; if( ips == 3 &&
ebs == 1 ) {
int c = bandjoin->c_ready[0];
p1 += ips; for( x = 0; x < width; x++ ) {
q1 += ips; q1[0] = p1[0];
q1[1] = p1[1];
q1[2] = p1[2];
q1[3] = c;
for( z = 0; z < ebs; z++ ) p1 += 3;
q1[z] = bandjoin->c_ready[z]; q1 += 4;
}
}
else {
for( x = 0; x < width; x++ ) {
for( z = 0; z < ips; z++ )
q1[z] = p1[z];
q1 += ebs; p1 += ips;
q1 += ips;
for( z = 0; z < ebs; z++ )
q1[z] = bandjoin->c_ready[z];
q1 += ebs;
}
} }
} }

View File

@ -89,9 +89,6 @@
* *
* The various Porter-Duff and PDF blend modes. See vips_composite(), * The various Porter-Duff and PDF blend modes. See vips_composite(),
* for example. * for example.
*
* The PDF blend modes (MULTPLY onwards) require channels all in [0, 1], so
* they only work for spaces like RGB where all channels have the same range.
*/ */
/* References: /* References:
@ -101,18 +98,6 @@
* https://en.wikipedia.org/wiki/Alpha_compositing * https://en.wikipedia.org/wiki/Alpha_compositing
* *
* https://www.cairographics.org/operators/ * https://www.cairographics.org/operators/
*
* Benchmark:
*
* vips replicate PNG_transparency_demonstration_1.png x.png 15 15
* vips crop x.png wtc_overlay.png 0 0 9372 9372
*
* composite -compose over wtc_overlay.png.png wtc.jpg x.jpg
*
* vips composite "wtc_overlay.png wtc.jpg" x.jpg 2
*
* convert -compose over -composite wtc.jpg wtc_overlay.png x.jpg
*
*/ */
typedef struct _VipsComposite { typedef struct _VipsComposite {
@ -159,9 +144,9 @@ G_DEFINE_TYPE( VipsComposite, vips_composite, VIPS_TYPE_CONVERSION );
* aR alpha of result * aR alpha of result
* aA alpha of source A (the new pixel) * aA alpha of source A (the new pixel)
* aB alpha of source B (the thing we accumulate) * aB alpha of source B (the thing we accumulate)
* xR colour channel of result * xR colour band of result
* xA colour channel of source A * xA colour band of source A
* xB colour channel of source B * xB colour band of source B
*/ */
static double inline static double inline
@ -380,10 +365,6 @@ vips_composite_blend_mul( VipsBlendMode mode,
default: default:
/* The PDF modes are a bit different. /* The PDF modes are a bit different.
*/ */
t1 = (1 - aB) * aA;
t2 = (1 - aA) * aB;
t3 = aA * aB;
switch( mode ) { switch( mode ) {
case VIPS_BLEND_MODE_MULTIPLY: case VIPS_BLEND_MODE_MULTIPLY:
for( b = 0; b < bands; b++ ) for( b = 0; b < bands; b++ )
@ -478,6 +459,10 @@ vips_composite_blend_mul( VipsBlendMode mode,
B[b] = 0; B[b] = 0;
} }
else { else {
t1 = (1 - aB) * aA;
t2 = (1 - aA) * aB;
t3 = aA * aB;
for( b = 0; b < bands; b++ ) for( b = 0; b < bands; b++ )
B[b] = (t1 * A[b] + t2 * B[b] + t3 * f[b]) / aR; B[b] = (t1 * A[b] + t2 * B[b] + t3 * f[b]) / aR;
} }
@ -518,15 +503,6 @@ vips_composite_blend_mul_3float( VipsBlendMode mode, v4f &B, float *A_memory )
aB = B[3]; aB = B[3];
aR = vips_composite_alpha( mode, aA, aB ); aR = vips_composite_alpha( mode, aA, aB );
if( aR == 0 ) {
B[0] = 0;
B[1] = 0;
B[2] = 0;
B[3] = 0;
return;
}
switch( mode ) { switch( mode ) {
case VIPS_BLEND_MODE_CLEAR: case VIPS_BLEND_MODE_CLEAR:
B[0] = 0; B[0] = 0;
@ -539,8 +515,15 @@ vips_composite_blend_mul_3float( VipsBlendMode mode, v4f &B, float *A_memory )
break; break;
case VIPS_BLEND_MODE_OVER: case VIPS_BLEND_MODE_OVER:
t1 = aB * (1 - aA); if( aR == 0 ) {
B = (aA * A + t1 * B) / aR; B[0] = 0;
B[1] = 0;
B[2] = 0;
}
else {
t1 = aB * (1 - aA);
B = (aA * A + t1 * B) / aR;
}
break; break;
case VIPS_BLEND_MODE_IN: case VIPS_BLEND_MODE_IN:
@ -559,8 +542,15 @@ vips_composite_blend_mul_3float( VipsBlendMode mode, v4f &B, float *A_memory )
break; break;
case VIPS_BLEND_MODE_DEST_OVER: case VIPS_BLEND_MODE_DEST_OVER:
t1 = aA * (1 - aB); if( aR == 0 ) {
B = (t1 * A + aB * B) / aR; B[0] = 0;
B[1] = 0;
B[2] = 0;
}
else {
t1 = aA * (1 - aB);
B = (t1 * A + aB * B) / aR;
}
break; break;
case VIPS_BLEND_MODE_DEST_IN: case VIPS_BLEND_MODE_DEST_IN:
@ -576,27 +566,43 @@ vips_composite_blend_mul_3float( VipsBlendMode mode, v4f &B, float *A_memory )
break; break;
case VIPS_BLEND_MODE_XOR: case VIPS_BLEND_MODE_XOR:
t1 = aA * (1 - aB); if( aR == 0 ) {
t2 = aB * (1 - aA); B[0] = 0;
B = (t1 * A + t2 * B) / aR; B[1] = 0;
B[2] = 0;
}
else {
t1 = aA * (1 - aB);
t2 = aB * (1 - aA);
B = (t1 * A + t2 * B) / aR;
}
break; break;
case VIPS_BLEND_MODE_ADD: case VIPS_BLEND_MODE_ADD:
B = (aA * A + aB * B) / aR; if( aR == 0 ) {
B[0] = 0;
B[1] = 0;
B[2] = 0;
}
else
B = (aA * A + aB * B) / aR;
break; break;
case VIPS_BLEND_MODE_SATURATE: case VIPS_BLEND_MODE_SATURATE:
t1 = VIPS_MIN( aA, 1 - aB ); if( aR == 0 ) {
B = (t1 * A + aB * B) / aR; B[0] = 0;
B[1] = 0;
B[2] = 0;
}
else {
t1 = VIPS_MIN( aA, 1 - aB );
B = (t1 * A + aB * B) / aR;
}
break; break;
default: default:
/* The PDF modes are a bit different. /* The PDF modes are a bit different.
*/ */
t1 = (1 - aB) * aA;
t2 = (1 - aA) * aB;
t3 = aA * aB;
switch( mode ) { switch( mode ) {
case VIPS_BLEND_MODE_MULTIPLY: case VIPS_BLEND_MODE_MULTIPLY:
f = A * B; f = A * B;
@ -643,8 +649,10 @@ vips_composite_blend_mul_3float( VipsBlendMode mode, v4f &B, float *A_memory )
if( B[b] <= 0.25 ) if( B[b] <= 0.25 )
g[b] = ((16 * B[b] - 12) * g[b] = ((16 * B[b] - 12) *
B[b] + 4) * B[b]; B[b] + 4) * B[b];
else else if( B[b] >= 0 )
g[b] = sqrt( B[b] ); g[b] = sqrt( B[b] );
else
g[b] = 0;
if( A[b] <= 0.5 ) if( A[b] <= 0.5 )
f[b] = B[b] - (1 - 2 * A[b]) * f[b] = B[b] - (1 - 2 * A[b]) *
@ -672,7 +680,17 @@ vips_composite_blend_mul_3float( VipsBlendMode mode, v4f &B, float *A_memory )
break; break;
} }
B = (t1 * A + t2 * B + t3 * f) / aR; if( aR == 0 ) {
B[0] = 0;
B[1] = 0;
B[2] = 0;
}
else {
t1 = (1 - aB) * aA;
t2 = (1 - aA) * aB;
t3 = aA * aB;
B = (t1 * A + t2 * B + t3 * f) / aR;
}
} }
B[3] = aR; B[3] = aR;
@ -680,14 +698,208 @@ vips_composite_blend_mul_3float( VipsBlendMode mode, v4f &B, float *A_memory )
#endif /*HAVE_VECTOR_ARITH*/ #endif /*HAVE_VECTOR_ARITH*/
/* A is the new pixel coming in, B is the double pixel we are accumulating. /* A is the new pixel coming in, B is the double pixel we are accumulating.
* Assume pixels are premultiplied.
*/ */
template <typename T> template <typename T>
static void static void
vips_composite_blend_premul( VipsBlendMode mode, vips_composite_blend_premul( VipsBlendMode mode,
double * restrict B, T * restrict A, int bands ) double * restrict B, T * restrict A, int bands )
{ {
// adapt multiply case once it's done double aA;
g_assert_not_reached(); double aB;
double aR;
double t1;
double t2;
double t3;
double f[MAX_BANDS + 1];
int b;
aA = A[bands];
aB = B[bands];
aR = vips_composite_alpha( mode, aA, aB );
switch( mode ) {
case VIPS_BLEND_MODE_CLEAR:
for( b = 0; b < bands; b++ )
B[b] = 1 - aA;
break;
case VIPS_BLEND_MODE_SOURCE:
for( b = 0; b < bands; b++ )
B[b] = A[b];
break;
case VIPS_BLEND_MODE_OVER:
t1 = 1 - aA;
for( b = 0; b < bands; b++ )
B[b] = A[b] + t1 * B[b];
break;
case VIPS_BLEND_MODE_IN:
for( b = 0; b < bands; b++ )
B[b] = A[b];
break;
case VIPS_BLEND_MODE_OUT:
for( b = 0; b < bands; b++ )
B[b] = A[b];
break;
case VIPS_BLEND_MODE_ATOP:
if( aB == 0 )
for( b = 0; b < bands; b++ )
B[b] = A[b];
else
for( b = 0; b < bands; b++ )
B[b] = A[b] + (B[b] / aB) * (1 - aA);
break;
case VIPS_BLEND_MODE_DEST:
// B = B
break;
case VIPS_BLEND_MODE_DEST_OVER:
t1 = 1 - aB;
for( b = 0; b < bands; b++ )
B[b] = B[b] + t1 * A[b];
break;
case VIPS_BLEND_MODE_DEST_IN:
// B = B
break;
case VIPS_BLEND_MODE_DEST_OUT:
// B = B
break;
case VIPS_BLEND_MODE_DEST_ATOP:
if( aB != 0 )
for( b = 0; b < bands; b++ )
B[b] = (A[b] / aB) * (1 - aB) + B[b];
break;
case VIPS_BLEND_MODE_XOR:
t1 = 1 - aB;
t2 = 1 - aA;
for( b = 0; b < bands; b++ )
B[b] = t1 * A[b] + t2 * B[b];
break;
case VIPS_BLEND_MODE_ADD:
for( b = 0; b < bands; b++ )
B[b] = A[b] + B[b];
break;
case VIPS_BLEND_MODE_SATURATE:
if( aA != 0 ) {
t1 = VIPS_MIN( aA, 1 - aB );
for( b = 0; b < bands; b++ )
B[b] = t1 * (A[b] / aA) + B[b];
}
break;
default:
/* The PDF modes are a bit different.
*/
switch( mode ) {
case VIPS_BLEND_MODE_MULTIPLY:
for( b = 0; b < bands; b++ )
f[b] = A[b] * B[b];
break;
case VIPS_BLEND_MODE_SCREEN:
for( b = 0; b < bands; b++ )
f[b] = A[b] + B[b] - A[b] * B[b];
break;
case VIPS_BLEND_MODE_OVERLAY:
for( b = 0; b < bands; b++ )
if( B[b] <= 0.5 )
f[b] = 2 * A[b] * B[b];
else
f[b] = 1 - 2 * (1 - A[b]) * (1 - B[b]);
break;
case VIPS_BLEND_MODE_DARKEN:
for( b = 0; b < bands; b++ )
f[b] = VIPS_MIN( A[b], B[b] );
break;
case VIPS_BLEND_MODE_LIGHTEN:
for( b = 0; b < bands; b++ )
f[b] = VIPS_MAX( A[b], B[b] );
break;
case VIPS_BLEND_MODE_COLOUR_DODGE:
for( b = 0; b < bands; b++ )
if( A[b] < 1 )
f[b] = VIPS_MIN( 1, B[b] / (1 - A[b]) );
else
f[b] = 1;
break;
case VIPS_BLEND_MODE_COLOUR_BURN:
for( b = 0; b < bands; b++ )
if( A[b] > 0 )
f[b] = 1 - VIPS_MIN( 1,
(1 - B[b]) / A[b] );
else
f[b] = 0;
break;
case VIPS_BLEND_MODE_HARD_LIGHT:
for( b = 0; b < bands; b++ )
if( A[b] < 0.5 )
f[b] = 2 * A[b] * B[b];
else
f[b] = 1 - 2 * (1 - A[b]) * (1 - B[b]);
break;
case VIPS_BLEND_MODE_SOFT_LIGHT:
for( b = 0; b < bands; b++ ) {
double g;
if( B[b] <= 0.25 )
g = ((16 * B[b] - 12) * B[b] + 4) *
B[b];
else
g = sqrt( B[b] );
if( A[b] <= 0.5 )
f[b] = B[b] - (1 - 2 * A[b]) *
B[b] * (1 - B[b]);
else
f[b] = B[b] + (2 * A[b] - 1) *
(g - B[b]);
}
break;
case VIPS_BLEND_MODE_DIFFERENCE:
for( b = 0; b < bands; b++ )
f[b] = abs( B[b] - A[b] );
break;
case VIPS_BLEND_MODE_EXCLUSION:
for( b = 0; b < bands; b++ )
f[b] = A[b] + B[b] - 2 * A[b] * B[b];
break;
default:
g_assert_not_reached();
for( b = 0; b < bands; b++ )
B[b] = 0;
}
t1 = 1 - aB;
t2 = 1 - aA;
t3 = aA * aB;
for( b = 0; b < bands; b++ )
B[b] = t1 * A[b] + t2 * B[b] + t3 * f[b];
break;
}
B[bands] = aR;
} }
template <typename T> template <typename T>
@ -973,10 +1185,12 @@ vips_composite_build( VipsObject *object )
return( -1 ); return( -1 );
} }
/* Transform to compositing space. It defaults to sRGB or B_W. /* Transform to compositing space. It defaults to sRGB or B_W, usually
* 8 bit, but 16 bit if any inputs are 16 bit.
*/ */
if( !vips_object_argument_isset( object, "compositing_space" ) ) { if( !vips_object_argument_isset( object, "compositing_space" ) ) {
gboolean all_grey; gboolean all_grey;
gboolean any_16;
all_grey = TRUE; all_grey = TRUE;
for( i = 0; i < composite->n; i++ ) for( i = 0; i < composite->n; i++ )
@ -985,8 +1199,21 @@ vips_composite_build( VipsObject *object )
break; break;
} }
composite->compositing_space = all_grey ? any_16 = FALSE;
VIPS_INTERPRETATION_B_W : VIPS_INTERPRETATION_sRGB; for( i = 0; i < composite->n; i++ )
if( in[i]->Type == VIPS_INTERPRETATION_GREY16 ||
in[i]->Type == VIPS_INTERPRETATION_RGB16 ) {
any_16 = TRUE;
break;
}
composite->compositing_space = any_16 ?
(all_grey ?
VIPS_INTERPRETATION_GREY16 :
VIPS_INTERPRETATION_RGB16) :
(all_grey ?
VIPS_INTERPRETATION_B_W :
VIPS_INTERPRETATION_sRGB);
} }
compositing = (VipsImage **) compositing = (VipsImage **)
@ -1150,8 +1377,40 @@ vips_compositev( VipsImage **in, VipsImage **out, int n, int *mode, va_list ap )
* @mode: array of (@n - 1) #VipsBlendMode * @mode: array of (@n - 1) #VipsBlendMode
* @...: %NULL-terminated list of optional named arguments * @...: %NULL-terminated list of optional named arguments
* *
* Optional arguments:
*
* * @compositing_space: #VipsInterpretation to composite in
* * @premultiplied: %gboolean, images are already premultiplied
*
* Composite an array of images together. * Composite an array of images together.
* *
* Images are placed in a stack, with @in[@n - 1] at the bottom and @in[0] at
* the top. Pixels are blended together working from the bottom upwards, with
* the blend mode at each step being set by the corresponding #VipsBlendMode
* in @mode.
*
* Images are transformed to a compositing space before processing. This is
* #VIPS_INTERPRETATION_sRGB, #VIPS_INTERPRETATION_B_W,
* #VIPS_INTERPRETATION_RGB16, or #VIPS_INTERPRETATION_GREY16
* by default, depending on
* how many bands and bits the input images have. You select any other space,
* such as #VIPS_INTERPRETATION_LAB or #VIPS_INTERPRETATION_scRGB.
*
* The output image will always be #VIPS_FORMAT_FLOAT unless one of the inputs
* is #VIPS_FORMAT_DOUBLE, which which case the output will be double as well.
*
* Complex images are not supported.
*
* The output image will always have an alpha band. A solid alpha is
* added to any input missing an alpha.
*
* The images do not need to match in size or format. They will be expanded to
* the smallest common size and format in the usual way.
*
* Image are normally treated as unpremultiplied, so this oepration can be used
* directly on PNG images. If your images have been through vips_premultiply(),
* set @premultiplied.
*
* See also: vips_insert(). * See also: vips_insert().
* *
* Returns: 0 on success, -1 on error * Returns: 0 on success, -1 on error

View File

@ -112,8 +112,7 @@ G_DEFINE_TYPE( VipsFlatten, vips_flatten, VIPS_TYPE_CONVERSION );
int b; \ int b; \
\ \
for( b = 0; b < bands - 1; b++ ) \ for( b = 0; b < bands - 1; b++ ) \
q[b] = (p[b] * alpha) / max_alpha + \ q[b] = (p[b] * alpha + bg[b] * nalpha) / max_alpha; \
(bg[b] * nalpha) / max_alpha; \
\ \
p += bands; \ p += bands; \
q += bands - 1; \ q += bands - 1; \
@ -150,8 +149,8 @@ G_DEFINE_TYPE( VipsFlatten, vips_flatten, VIPS_TYPE_CONVERSION );
int b; \ int b; \
\ \
for( b = 0; b < bands - 1; b++ ) \ for( b = 0; b < bands - 1; b++ ) \
q[b] = ((double) p[b] * alpha) / max_alpha + \ q[b] = ((double) p[b] * alpha + \
((double) bg[b] * nalpha) / max_alpha; \ (double) bg[b] * nalpha) / max_alpha; \
\ \
p += bands; \ p += bands; \
q += bands - 1; \ q += bands - 1; \