Introduce support for target_clones attribute (#3280)

This requires GLIBC 2.23+, plus either gcc 6+ or clang 14+.

- Provides build-time feature detection
- Use with (un)premultiply for ~10% perf gain on AVX CPUs
- Slightly increases binary size, so best to use sparingly
This commit is contained in:
Lovell Fuller 2023-01-16 09:45:37 +00:00 committed by GitHub
parent f8c06d8a36
commit 7eba4ee43f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 26 additions and 0 deletions

View File

@ -121,6 +121,7 @@ G_DEFINE_TYPE( VipsPremultiply, vips_premultiply, VIPS_TYPE_CONVERSION );
} \
}
VIPS_TARGET_CLONES("default,avx")
static int
vips_premultiply_gen( VipsRegion *or, void *vseq, void *a, void *b,
gboolean *stop )

View File

@ -174,6 +174,7 @@ G_DEFINE_TYPE( VipsUnpremultiply, vips_unpremultiply, VIPS_TYPE_CONVERSION );
} \
}
VIPS_TARGET_CLONES("default,avx")
static int
vips_unpremultiply_gen( VipsRegion *or, void *vseq, void *a, void *b,
gboolean *stop )

View File

@ -215,6 +215,16 @@ G_STMT_START { \
*/
#define VIPS_PATH_MAX (4096)
/* Create multiple copies of a function targeted at groups of SIMD intrinsics,
* with the most suitable selected at runtime via dynamic dispatch.
*/
#ifdef HAVE_TARGET_CLONES
#define VIPS_TARGET_CLONES( TARGETS ) \
__attribute__(( target_clones( TARGETS ) ))
#else
#define VIPS_TARGET_CLONES( TARGETS )
#endif
VIPS_API
const char *vips_enum_string( GType enm, int value );
VIPS_API

View File

@ -131,6 +131,20 @@ if cpp.compiles(vector_arithmetic_check, name: 'Has vector arithmetic', dependen
endif
endif
# HAVE_TARGET_CLONES
target_clones_check = '''
static int __attribute__((target_clones("default,avx")))
has_target_clones(void) {
return 0;
}
int main(void) {
return has_target_clones();
}
'''
if cc.compiles(target_clones_check, args: '-Werror', name: 'Has target_clones attribute')
cfg_var.set('HAVE_TARGET_CLONES', '1')
endif
func_names = [ 'vsnprintf', '_aligned_malloc', 'posix_memalign', 'memalign', 'cbrt', 'hypot', 'atan2', 'asinh' ]
foreach func_name : func_names
if cc.has_function(func_name, dependencies: m_dep)