diff --git a/configure.ac b/configure.ac index 2b928842..176bee11 100644 --- a/configure.ac +++ b/configure.ac @@ -409,7 +409,7 @@ fi AC_FUNC_MEMCMP AC_FUNC_MMAP AC_FUNC_VPRINTF -AC_CHECK_FUNCS([getcwd gettimeofday getwd memset munmap putenv realpath strcasecmp strchr strcspn strdup strerror strrchr strspn vsnprintf realpath mkstemp mktemp random rand sysconf atexit]) +AC_CHECK_FUNCS([getcwd gettimeofday getwd memset munmap putenv realpath strcasecmp strchr strcspn strdup strerror strrchr strspn vsnprintf realpath mkstemp mktemp random rand sysconf atexit _aligned_malloc posix_memalign memalign]) AC_CHECK_LIB(m,cbrt,[AC_DEFINE(HAVE_CBRT,1,[have cbrt() in libm.])]) AC_CHECK_LIB(m,hypot,[AC_DEFINE(HAVE_HYPOT,1,[have hypot() in libm.])]) AC_CHECK_LIB(m,atan2,[AC_DEFINE(HAVE_ATAN2,1,[have atan2() in libm.])]) diff --git a/libvips/conversion/composite.cpp b/libvips/conversion/composite.cpp index b9292d60..f083d60e 100644 --- a/libvips/conversion/composite.cpp +++ b/libvips/conversion/composite.cpp @@ -55,13 +55,17 @@ #include #include -#if _MSC_VER +#ifdef _MSC_VER #include #else #include #endif #include +#if defined(HAVE__ALIGNED_MALLOC) || defined(HAVE_MEMALIGN) +#include +#endif + #include #include #include @@ -159,7 +163,8 @@ vips_composite_base_dispose( GObject *gobject ) G_OBJECT_CLASS( vips_composite_base_parent_class )->dispose( gobject ); } -/* Our sequence value. +/* Our sequence value. This must be aligned on a 16-byte boundary when + * HAVE_VECTOR_ARITH is defined. */ typedef struct { VipsCompositeBase *composite; @@ -189,10 +194,6 @@ typedef struct { VipsPel **p; #ifdef HAVE_VECTOR_ARITH - /* A pointer to the 'real' memory. - */ - void *mem; - /* max_band as a vector, for the RGBA case. */ v4f max_band_vec; @@ -200,6 +201,39 @@ typedef struct { } VipsCompositeSequence; +#ifdef HAVE_VECTOR_ARITH +/* Allocate aligned memory. The return value can be released + * by calling the vips_free_aligned() function, for example: + * VIPS_FREEF( vips_free_aligned, ptr ); + */ +static inline void * +vips_alloc_aligned( size_t sz, size_t align ) +{ + g_assert( !(align & (align - 1)) ); +#ifdef HAVE__ALIGNED_MALLOC + return _aligned_malloc( sz, align ); +#elif defined(HAVE_POSIX_MEMALIGN) + void *ptr; + if( posix_memalign( &ptr, align, sz ) ) return NULL; + return ptr; +#elif defined(HAVE_MEMALIGN) + return memalign( align, sz ); +#else +#error Missing aligned alloc implementation +#endif +} + +static inline void +vips_free_aligned( void* ptr ) +{ +#ifdef HAVE__ALIGNED_MALLOC + _aligned_free( ptr ); +#else /*defined(HAVE_POSIX_MEMALIGN) || defined(HAVE_MEMALIGN)*/ + free( ptr ); +#endif +} +#endif /*HAVE_VECTOR_ARITH*/ + static int vips_composite_stop( void *vseq, void *a, void *b ) { @@ -221,10 +255,7 @@ vips_composite_stop( void *vseq, void *a, void *b ) VIPS_FREE( seq->p ); #ifdef HAVE_VECTOR_ARITH - /* Must use g_free here, otherwise we end up writing to a - * pointer that we just freed. - */ - g_free( seq->mem ); + VIPS_FREEF( vips_free_aligned, seq ); #else /*!defined(HAVE_VECTOR_ARITH)*/ VIPS_FREE( seq ); #endif /*HAVE_VECTOR_ARITH*/ @@ -238,37 +269,18 @@ vips_composite_start( VipsImage *out, void *a, void *b ) VipsImage **in = (VipsImage **) a; VipsCompositeBase *composite = (VipsCompositeBase *) b; - void *mem; VipsCompositeSequence *seq; - int i, n, size; - - /* The size of our struct. - */ - size = sizeof( VipsCompositeSequence ); + int i, n; #ifdef HAVE_VECTOR_ARITH /* Ensure that the memory is aligned on a 16-byte boundary. */ - size += 16 - 1; -#endif /*HAVE_VECTOR_ARITH*/ - - /* Allocate a new chunk of memory. - */ - if( !(mem = vips_malloc( NULL, size )) ) - return( NULL ); - -#ifdef HAVE_VECTOR_ARITH - /* Our aligned pointer. - */ - seq = (VipsCompositeSequence *) - (((guintptr) mem + 15) & ~(guintptr) 0x0F); - - /* Store the pointer to the 'real' memory. - */ - seq->mem = mem; + if( !(seq = ((VipsCompositeSequence *) vips_alloc_aligned( + sizeof( VipsCompositeSequence ), 16 ))) ) #else /*!defined(HAVE_VECTOR_ARITH)*/ - seq = (VipsCompositeSequence *) mem; + if( !(seq = VIPS_NEW( NULL, VipsCompositeSequence )) ) #endif /*HAVE_VECTOR_ARITH*/ + return( NULL ); seq->composite = composite; seq->input_regions = NULL;