Ensure max_band vector is aligned on a 16-byte boundary
See https://github.com/mstorsjo/llvm-mingw/issues/190
This commit is contained in:
parent
6666b941bf
commit
a55513a194
@ -130,12 +130,6 @@ typedef struct _VipsCompositeBase {
|
||||
*/
|
||||
gboolean skippable;
|
||||
|
||||
#ifdef HAVE_VECTOR_ARITH
|
||||
/* max_band as a vector, for the RGBA case.
|
||||
*/
|
||||
v4f max_band_vec;
|
||||
#endif /*HAVE_VECTOR_ARITH*/
|
||||
|
||||
} VipsCompositeBase;
|
||||
|
||||
typedef VipsConversionClass VipsCompositeBaseClass;
|
||||
@ -194,6 +188,16 @@ typedef struct {
|
||||
*/
|
||||
VipsPel **p;
|
||||
|
||||
#ifdef HAVE_VECTOR_ARITH
|
||||
/* A pointer to the 'real' memory.
|
||||
*/
|
||||
void *mem;
|
||||
|
||||
/* max_band as a vector, for the RGBA case.
|
||||
*/
|
||||
v4f max_band_vec;
|
||||
#endif /*HAVE_VECTOR_ARITH*/
|
||||
|
||||
} VipsCompositeSequence;
|
||||
|
||||
static int
|
||||
@ -216,7 +220,14 @@ vips_composite_stop( void *vseq, void *a, void *b )
|
||||
VIPS_FREE( seq->enabled );
|
||||
VIPS_FREE( seq->p );
|
||||
|
||||
#ifdef HAVE_VECTOR_ARITH
|
||||
/* Must use g_free here, otherwise we end up writing to a
|
||||
* pointer that we just freed.
|
||||
*/
|
||||
g_free( seq->mem );
|
||||
#else /*!defined(HAVE_VECTOR_ARITH)*/
|
||||
VIPS_FREE( seq );
|
||||
#endif /*HAVE_VECTOR_ARITH*/
|
||||
|
||||
return( 0 );
|
||||
}
|
||||
@ -227,12 +238,38 @@ vips_composite_start( VipsImage *out, void *a, void *b )
|
||||
VipsImage **in = (VipsImage **) a;
|
||||
VipsCompositeBase *composite = (VipsCompositeBase *) b;
|
||||
|
||||
void *mem;
|
||||
VipsCompositeSequence *seq;
|
||||
int i, n;
|
||||
int i, n, size;
|
||||
|
||||
if( !(seq = VIPS_NEW( NULL, VipsCompositeSequence )) )
|
||||
/* The size of our struct.
|
||||
*/
|
||||
size = sizeof( VipsCompositeSequence );
|
||||
|
||||
#ifdef HAVE_VECTOR_ARITH
|
||||
/* Ensure that the memory is aligned on a 16-byte boundary.
|
||||
*/
|
||||
size += 16 - 1;
|
||||
#endif /*HAVE_VECTOR_ARITH*/
|
||||
|
||||
/* Allocate a new chunk of memory.
|
||||
*/
|
||||
if( !(mem = vips_malloc( NULL, size )) )
|
||||
return( NULL );
|
||||
|
||||
#ifdef HAVE_VECTOR_ARITH
|
||||
/* Our aligned pointer.
|
||||
*/
|
||||
seq = (VipsCompositeSequence *)
|
||||
(((guintptr) mem + 15) & ~(guintptr) 0x0F);
|
||||
|
||||
/* Store the pointer to the 'real' memory.
|
||||
*/
|
||||
seq->mem = mem;
|
||||
#else /*!defined(HAVE_VECTOR_ARITH)*/
|
||||
seq = (VipsCompositeSequence *) mem;
|
||||
#endif /*HAVE_VECTOR_ARITH*/
|
||||
|
||||
seq->composite = composite;
|
||||
seq->input_regions = NULL;
|
||||
seq->enabled = NULL;
|
||||
@ -280,7 +317,19 @@ vips_composite_start( VipsImage *out, void *a, void *b )
|
||||
return( NULL );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#ifdef HAVE_VECTOR_ARITH
|
||||
/* We need a float version for the vector path.
|
||||
*/
|
||||
if( composite->bands == 3 )
|
||||
seq->max_band_vec = (v4f){
|
||||
(float) composite->max_band[0],
|
||||
(float) composite->max_band[1],
|
||||
(float) composite->max_band[2],
|
||||
(float) composite->max_band[3]
|
||||
};
|
||||
#endif
|
||||
|
||||
return( seq );
|
||||
}
|
||||
|
||||
@ -664,9 +713,11 @@ vips_composite_base_blend( VipsCompositeBase *composite,
|
||||
*/
|
||||
template <typename T>
|
||||
static void
|
||||
vips_composite_base_blend3( VipsCompositeBase *composite,
|
||||
vips_composite_base_blend3( VipsCompositeSequence *seq,
|
||||
VipsBlendMode mode, v4f &B, T * restrict p )
|
||||
{
|
||||
VipsCompositeBase *composite = seq->composite;
|
||||
|
||||
v4f A;
|
||||
float aA;
|
||||
float aB;
|
||||
@ -684,7 +735,7 @@ vips_composite_base_blend3( VipsCompositeBase *composite,
|
||||
A[2] = p[2];
|
||||
A[3] = p[3];
|
||||
|
||||
A /= composite->max_band_vec;
|
||||
A /= seq->max_band_vec;
|
||||
|
||||
aA = A[3];
|
||||
aB = B[3];
|
||||
@ -975,7 +1026,7 @@ vips_combine_pixels3( VipsCompositeSequence *seq, VipsPel *q )
|
||||
|
||||
/* Scale the base pixel to 0 - 1.
|
||||
*/
|
||||
B /= composite->max_band_vec;
|
||||
B /= seq->max_band_vec;
|
||||
aB = B[3];
|
||||
|
||||
if( !composite->premultiplied ) {
|
||||
@ -987,7 +1038,7 @@ vips_combine_pixels3( VipsCompositeSequence *seq, VipsPel *q )
|
||||
int j = seq->enabled[i];
|
||||
VipsBlendMode m = n_mode == 1 ? mode[0] : mode[j - 1];
|
||||
|
||||
vips_composite_base_blend3<T>( composite, m, B, tp[i] );
|
||||
vips_composite_base_blend3<T>( seq, m, B, tp[i] );
|
||||
}
|
||||
|
||||
/* Unpremultiply, if necessary.
|
||||
@ -1006,7 +1057,7 @@ vips_combine_pixels3( VipsCompositeSequence *seq, VipsPel *q )
|
||||
|
||||
/* Write back as a full range pixel, clipping to range.
|
||||
*/
|
||||
B *= composite->max_band_vec;
|
||||
B *= seq->max_band_vec;
|
||||
if( min_T != 0 ||
|
||||
max_T != 0 ) {
|
||||
float low = min_T;
|
||||
@ -1386,14 +1437,6 @@ vips_composite_base_build( VipsObject *object )
|
||||
return( -1 );
|
||||
}
|
||||
|
||||
#ifdef HAVE_VECTOR_ARITH
|
||||
/* We need a float version for the vector path.
|
||||
*/
|
||||
if( composite->bands == 3 )
|
||||
for( int b = 0; b <= 3; b++ )
|
||||
composite->max_band_vec[b] = composite->max_band[b];
|
||||
#endif /*HAVE_VECTOR_ARITH*/
|
||||
|
||||
/* Transform the input images to match in format. We may have
|
||||
* mixed float and double, for example.
|
||||
*/
|
||||
|
Loading…
Reference in New Issue
Block a user