Introduce support for target_clones attribute (#3280)
This requires GLIBC 2.23+, plus either gcc 6+ or clang 14+. - Provides build-time feature detection - Use with (un)premultiply for ~10% perf gain on AVX CPUs - Slightly increases binary size, so best to use sparingly
This commit is contained in:
parent
f8c06d8a36
commit
7eba4ee43f
@ -121,6 +121,7 @@ G_DEFINE_TYPE( VipsPremultiply, vips_premultiply, VIPS_TYPE_CONVERSION );
|
|||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VIPS_TARGET_CLONES("default,avx")
|
||||||
static int
|
static int
|
||||||
vips_premultiply_gen( VipsRegion *or, void *vseq, void *a, void *b,
|
vips_premultiply_gen( VipsRegion *or, void *vseq, void *a, void *b,
|
||||||
gboolean *stop )
|
gboolean *stop )
|
||||||
|
@ -174,6 +174,7 @@ G_DEFINE_TYPE( VipsUnpremultiply, vips_unpremultiply, VIPS_TYPE_CONVERSION );
|
|||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VIPS_TARGET_CLONES("default,avx")
|
||||||
static int
|
static int
|
||||||
vips_unpremultiply_gen( VipsRegion *or, void *vseq, void *a, void *b,
|
vips_unpremultiply_gen( VipsRegion *or, void *vseq, void *a, void *b,
|
||||||
gboolean *stop )
|
gboolean *stop )
|
||||||
|
@ -215,6 +215,16 @@ G_STMT_START { \
|
|||||||
*/
|
*/
|
||||||
#define VIPS_PATH_MAX (4096)
|
#define VIPS_PATH_MAX (4096)
|
||||||
|
|
||||||
|
/* Create multiple copies of a function targeted at groups of SIMD intrinsics,
|
||||||
|
* with the most suitable selected at runtime via dynamic dispatch.
|
||||||
|
*/
|
||||||
|
#ifdef HAVE_TARGET_CLONES
|
||||||
|
#define VIPS_TARGET_CLONES( TARGETS ) \
|
||||||
|
__attribute__(( target_clones( TARGETS ) ))
|
||||||
|
#else
|
||||||
|
#define VIPS_TARGET_CLONES( TARGETS )
|
||||||
|
#endif
|
||||||
|
|
||||||
VIPS_API
|
VIPS_API
|
||||||
const char *vips_enum_string( GType enm, int value );
|
const char *vips_enum_string( GType enm, int value );
|
||||||
VIPS_API
|
VIPS_API
|
||||||
|
14
meson.build
14
meson.build
@ -131,6 +131,20 @@ if cpp.compiles(vector_arithmetic_check, name: 'Has vector arithmetic', dependen
|
|||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
# HAVE_TARGET_CLONES
|
||||||
|
target_clones_check = '''
|
||||||
|
static int __attribute__((target_clones("default,avx")))
|
||||||
|
has_target_clones(void) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
int main(void) {
|
||||||
|
return has_target_clones();
|
||||||
|
}
|
||||||
|
'''
|
||||||
|
if cc.compiles(target_clones_check, args: '-Werror', name: 'Has target_clones attribute')
|
||||||
|
cfg_var.set('HAVE_TARGET_CLONES', '1')
|
||||||
|
endif
|
||||||
|
|
||||||
func_names = [ 'vsnprintf', '_aligned_malloc', 'posix_memalign', 'memalign', 'cbrt', 'hypot', 'atan2', 'asinh' ]
|
func_names = [ 'vsnprintf', '_aligned_malloc', 'posix_memalign', 'memalign', 'cbrt', 'hypot', 'atan2', 'asinh' ]
|
||||||
foreach func_name : func_names
|
foreach func_name : func_names
|
||||||
if cc.has_function(func_name, dependencies: m_dep)
|
if cc.has_function(func_name, dependencies: m_dep)
|
||||||
|
Loading…
Reference in New Issue
Block a user