Ensure max_band vector is aligned on a 16-byte boundary

See https://github.com/mstorsjo/llvm-mingw/issues/190
This commit is contained in:
Kleis Auke Wolthuizen 2021-03-12 12:57:36 +01:00
parent 6666b941bf
commit a55513a194

View File

@ -130,12 +130,6 @@ typedef struct _VipsCompositeBase {
*/
gboolean skippable;
#ifdef HAVE_VECTOR_ARITH
/* max_band as a vector, for the RGBA case.
*/
v4f max_band_vec;
#endif /*HAVE_VECTOR_ARITH*/
} VipsCompositeBase;
typedef VipsConversionClass VipsCompositeBaseClass;
@ -194,6 +188,16 @@ typedef struct {
*/
VipsPel **p;
#ifdef HAVE_VECTOR_ARITH
/* A pointer to the 'real' memory.
*/
void *mem;
/* max_band as a vector, for the RGBA case.
*/
v4f max_band_vec;
#endif /*HAVE_VECTOR_ARITH*/
} VipsCompositeSequence;
static int
@ -216,7 +220,14 @@ vips_composite_stop( void *vseq, void *a, void *b )
VIPS_FREE( seq->enabled );
VIPS_FREE( seq->p );
#ifdef HAVE_VECTOR_ARITH
/* Must use g_free here, otherwise we end up writing to a
* pointer that we just freed.
*/
g_free( seq->mem );
#else /*!defined(HAVE_VECTOR_ARITH)*/
VIPS_FREE( seq );
#endif /*HAVE_VECTOR_ARITH*/
return( 0 );
}
@ -227,12 +238,38 @@ vips_composite_start( VipsImage *out, void *a, void *b )
VipsImage **in = (VipsImage **) a;
VipsCompositeBase *composite = (VipsCompositeBase *) b;
void *mem;
VipsCompositeSequence *seq;
int i, n;
int i, n, size;
if( !(seq = VIPS_NEW( NULL, VipsCompositeSequence )) )
/* The size of our struct.
*/
size = sizeof( VipsCompositeSequence );
#ifdef HAVE_VECTOR_ARITH
/* Ensure that the memory is aligned on a 16-byte boundary.
*/
size += 16 - 1;
#endif /*HAVE_VECTOR_ARITH*/
/* Allocate a new chunk of memory.
*/
if( !(mem = vips_malloc( NULL, size )) )
return( NULL );
#ifdef HAVE_VECTOR_ARITH
/* Our aligned pointer.
*/
seq = (VipsCompositeSequence *)
(((guintptr) mem + 15) & ~(guintptr) 0x0F);
/* Store the pointer to the 'real' memory.
*/
seq->mem = mem;
#else /*!defined(HAVE_VECTOR_ARITH)*/
seq = (VipsCompositeSequence *) mem;
#endif /*HAVE_VECTOR_ARITH*/
seq->composite = composite;
seq->input_regions = NULL;
seq->enabled = NULL;
@ -280,7 +317,19 @@ vips_composite_start( VipsImage *out, void *a, void *b )
return( NULL );
}
}
#ifdef HAVE_VECTOR_ARITH
/* We need a float version for the vector path.
*/
if( composite->bands == 3 )
seq->max_band_vec = (v4f){
(float) composite->max_band[0],
(float) composite->max_band[1],
(float) composite->max_band[2],
(float) composite->max_band[3]
};
#endif
return( seq );
}
@ -664,9 +713,11 @@ vips_composite_base_blend( VipsCompositeBase *composite,
*/
template <typename T>
static void
vips_composite_base_blend3( VipsCompositeBase *composite,
vips_composite_base_blend3( VipsCompositeSequence *seq,
VipsBlendMode mode, v4f &B, T * restrict p )
{
VipsCompositeBase *composite = seq->composite;
v4f A;
float aA;
float aB;
@ -684,7 +735,7 @@ vips_composite_base_blend3( VipsCompositeBase *composite,
A[2] = p[2];
A[3] = p[3];
A /= composite->max_band_vec;
A /= seq->max_band_vec;
aA = A[3];
aB = B[3];
@ -975,7 +1026,7 @@ vips_combine_pixels3( VipsCompositeSequence *seq, VipsPel *q )
/* Scale the base pixel to 0 - 1.
*/
B /= composite->max_band_vec;
B /= seq->max_band_vec;
aB = B[3];
if( !composite->premultiplied ) {
@ -987,7 +1038,7 @@ vips_combine_pixels3( VipsCompositeSequence *seq, VipsPel *q )
int j = seq->enabled[i];
VipsBlendMode m = n_mode == 1 ? mode[0] : mode[j - 1];
vips_composite_base_blend3<T>( composite, m, B, tp[i] );
vips_composite_base_blend3<T>( seq, m, B, tp[i] );
}
/* Unpremultiply, if necessary.
@ -1006,7 +1057,7 @@ vips_combine_pixels3( VipsCompositeSequence *seq, VipsPel *q )
/* Write back as a full range pixel, clipping to range.
*/
B *= composite->max_band_vec;
B *= seq->max_band_vec;
if( min_T != 0 ||
max_T != 0 ) {
float low = min_T;
@ -1386,14 +1437,6 @@ vips_composite_base_build( VipsObject *object )
return( -1 );
}
#ifdef HAVE_VECTOR_ARITH
/* We need a float version for the vector path.
*/
if( composite->bands == 3 )
for( int b = 0; b <= 3; b++ )
composite->max_band_vec[b] = composite->max_band[b];
#endif /*HAVE_VECTOR_ARITH*/
/* Transform the input images to match in format. We may have
* mixed float and double, for example.
*/