arch/x86_64: add support for FMA, AVX-2 and AVX-512

add support for FMA, AVX-2 and AVX-512 instruction sets for x86_64

Signed-off-by: p-szafonimateusz <p-szafonimateusz@xiaomi.com>
This commit is contained in:
p-szafonimateusz 2024-04-22 15:49:18 +02:00 committed by Xiang Xiao
parent 2192d83200
commit bd73a915bd
6 changed files with 206 additions and 20 deletions

View File

@ -30,6 +30,9 @@ config ARCH_INTEL64
select ARCH_HAVE_SSE41
select ARCH_HAVE_SSE42
select ARCH_HAVE_SSE4A
select ARCH_HAVE_FMA if ARCH_X86_64_HAVE_XSAVE
select ARCH_HAVE_AVX if ARCH_X86_64_HAVE_XSAVE
select ARCH_HAVE_AVX512 if ARCH_X86_64_HAVE_XSAVE
select ARCH_ICACHE
select ARCH_DCACHE
select ARCH_HAVE_IRQTRIGGER
@ -78,6 +81,10 @@ config ARCH_HAVE_SSE4A
bool
default n
config ARCH_HAVE_FMA
bool
default n
config ARCH_HAVE_AVX
bool
default n

View File

@ -165,12 +165,14 @@
#define X86_64_CPUID_CAP 0x01
# define X86_64_CPUID_01_SSE3 (1 << 0)
# define X86_64_CPUID_01_SSSE3 (1 << 9)
# define X86_64_CPUID_01_FMA (1 << 12)
# define X86_64_CPUID_01_PCID (1 << 17)
# define X86_64_CPUID_01_SSE41 (1 << 19)
# define X86_64_CPUID_01_SSE42 (1 << 20)
# define X86_64_CPUID_01_X2APIC (1 << 21)
# define X86_64_CPUID_01_TSCDEA (1 << 24)
# define X86_64_CPUID_01_XSAVE (1 << 26)
# define X86_64_CPUID_01_AVX (1 << 28)
# define X86_64_CPUID_01_RDRAND (1 << 30)
# define X86_64_CPUID_01_APICID(ebx) ((ebx) >> 24)
#define X86_64_CPUID_EXTCAP 0x07

View File

@ -143,3 +143,43 @@ endif()
if(CONFIG_ARCH_X86_64_SSE4A)
add_compile_options(-msse4a)
endif()
if(CONFIG_ARCH_X86_64_AVX)
add_compile_options(-mavx)
endif()
if(CONFIG_ARCH_X86_64_AVX512)
add_compile_options(-mavx512f)
endif()
if(CONFIG_ARCH_X86_64_AVX512PF)
add_compile_options(-mavx512pf)
endif()
if(CONFIG_ARCH_X86_64_AVX512ER)
add_compile_options(-mavx512er)
endif()
if(CONFIG_ARCH_X86_64_AVX512CD)
add_compile_options(-mavx512cd)
endif()
if(CONFIG_ARCH_X86_64_AVX512VL)
add_compile_options(-mavx512vl)
endif()
if(CONFIG_ARCH_X86_64_AVX512BW)
add_compile_options(-mavx512bw)
endif()
if(CONFIG_ARCH_X86_64_AVX512DQ)
add_compile_options(-mavx512dq)
endif()
if(CONFIG_ARCH_X86_64_AVX512IFMA)
add_compile_options(-mavx512ifma)
endif()
if(CONFIG_ARCH_X86_64_AVX512VBMI)
add_compile_options(-mavx512vbmi)
endif()

View File

@ -75,6 +75,11 @@ config ARCH_X86_64_SSE4A
depends on ARCH_HAVE_SSE4A
default n
config ARCH_X86_64_FMA
bool "FMA support"
depends on ARCH_HAVE_FMA && ARCH_X86_64_AVX
default n
config ARCH_X86_64_AVX
bool "AVX support"
depends on ARCH_HAVE_AVX
@ -85,4 +90,40 @@ config ARCH_X86_64_AVX512
depends on ARCH_HAVE_AVX512
default n
if ARCH_X86_64_AVX512
config ARCH_X86_64_AVX512PF
bool "AVX512 Prefetch Instructions (AVX512PF)"
default n
config ARCH_X86_64_AVX512ER
bool "AVX512 Exponential and Reciprocal Instructions (AVX512ER)"
default n
config ARCH_X86_64_AVX512CD
bool "AVX512 Conflict Detection Instructions (AVX512CD)"
default n
config ARCH_X86_64_AVX512VL
bool "AVX512 Vector Length Extensions (AVX512VL)"
default n
config ARCH_X86_64_AVX512BW
bool "AVX512 Byte and Word Instructions (AVX512BW)"
default n
config ARCH_X86_64_AVX512DQ
bool "AVX512 Doubleword and Quadword Instructions (AVX512DQ)"
default n
config ARCH_X86_64_AVX512IFMA
bool "AVX512 Integer Fused Multiply-Add Instructions (AVX512IFMA)"
default n
config ARCH_X86_64_AVX512VBMI
bool "AVX512 Vector Bit Manipulation Instructions (AVX512VBMI)"
default n
endif # ARCH_X86_64_AVX512
endif

View File

@ -56,16 +56,6 @@ NM = $(CROSSDEV)nm
OBJCOPY = $(CROSSDEV)objcopy
OBJDUMP = $(CROSSDEV)objdump
CFLAGS := $(ARCHWARNINGS) $(ARCHOPTIMIZATION) $(ARCHCPUFLAGS) $(ARCHINCLUDES) $(ARCHDEFINES) $(EXTRAFLAGS) -pipe
CPPFLAGS := $(ARCHINCLUDES) $(ARCHDEFINES) $(EXTRAFLAGS)
AFLAGS := $(CFLAGS) -D__ASSEMBLY__
ifeq ($(CONFIG_HOST_MACOS),y)
AFLAGS += -Wa,--divide
endif
EXEEXT = .elf
ifeq ($(CONFIG_ARCH_X86_64_SSE3),y)
ARCHCPUFLAGS += -msse3
endif
@ -75,13 +65,67 @@ ifeq ($(CONFIG_ARCH_X86_64_SSSE3),y)
endif
ifeq ($(CONFIG_ARCH_X86_64_SSE41),y)
ARCHCPUFLAGS += -msse41
ARCHCPUFLAGS += -msse4.1
endif
ifeq ($(CONFIG_ARCH_X86_64_SSE42),y)
ARCHCPUFLAGS += -msse42
ARCHCPUFLAGS += -msse4.2
endif
ifeq ($(CONFIG_ARCH_X86_64_SSE4a),y)
ifeq ($(CONFIG_ARCH_X86_64_SSE4A),y)
ARCHCPUFLAGS += -msse4a
endif
ifeq ($(CONFIG_ARCH_X86_64_FMA),y)
ARCHCPUFLAGS += -mfma
endif
ifeq ($(CONFIG_ARCH_X86_64_AVX),y)
ARCHCPUFLAGS += -mavx
endif
ifeq ($(CONFIG_ARCH_X86_64_AVX512),y)
ARCHCPUFLAGS += -mavx512f
endif
ifeq ($(CONFIG_ARCH_X86_64_AVX512PF),y)
ARCHCPUFLAGS += -mavx512pf
endif
ifeq ($(CONFIG_ARCH_X86_64_AVX512ER),y)
ARCHCPUFLAGS += -mavx512er
endif
ifeq ($(CONFIG_ARCH_X86_64_AVX512CD),y)
ARCHCPUFLAGS += -mavx512cd
endif
ifeq ($(CONFIG_ARCH_X86_64_AVX512VL),y)
ARCHCPUFLAGS += -mavx512vl
endif
ifeq ($(CONFIG_ARCH_X86_64_AVX512BW),y)
ARCHCPUFLAGS += -mavx512bw
endif
ifeq ($(CONFIG_ARCH_X86_64_AVX512DQ),y)
ARCHCPUFLAGS += -mavx512dq
endif
ifeq ($(CONFIG_ARCH_X86_64_AVX512IFMA),y)
ARCHCPUFLAGS += -mavx512ifma
endif
ifeq ($(CONFIG_ARCH_X86_64_AVX512VBMI),y)
ARCHCPUFLAGS += -mavx512vbmi
endif
CFLAGS := $(ARCHWARNINGS) $(ARCHOPTIMIZATION) $(ARCHCPUFLAGS) $(ARCHINCLUDES) $(ARCHDEFINES) $(EXTRAFLAGS) -pipe
CPPFLAGS := $(ARCHINCLUDES) $(ARCHDEFINES) $(EXTRAFLAGS)
AFLAGS := $(CFLAGS) -D__ASSEMBLY__
ifeq ($(CONFIG_HOST_MACOS),y)
AFLAGS += -Wa,--divide
endif
EXEEXT = .elf

View File

@ -58,9 +58,47 @@ void x86_64_check_and_enable_capability(void)
{
unsigned long ebx;
unsigned long ecx;
unsigned long require;
unsigned long require = 0;
require = X86_64_CPUID_01_X2APIC;
/* Check SSE3 instructions availability */
#ifdef CONFIG_ARCH_X86_64_SSE3
require |= X86_64_CPUID_01_SSE3;
#endif
/* Check Suplement SSE3 instructions availability */
#ifdef CONFIG_ARCH_X86_64_SSEE3
require |= X86_64_CPUID_01_SSEE3;
#endif
/* Check Fused multiply-add (FMA) instructions availability */
#ifdef CONFIG_ARCH_X86_64_FMA
require |= X86_64_CPUID_01_FMA;
#endif
/* Check process context identifiers availability */
#ifdef CONFIG_ARCH_INTEL64_HAVE_PCID
require |= X86_64_CPUID_01_PCID;
#endif
/* Check SSE4.1 instructions availability */
#ifdef CONFIG_ARCH_X86_64_SSE41
require |= X86_64_CPUID_01_SSE41;
#endif
/* Check SSE4.2 instructions availability */
#ifdef CONFIG_ARCH_X86_64_SSE42
require |= X86_64_CPUID_01_SSE42;
#endif
/* Check x2APIC availability */
require |= X86_64_CPUID_01_X2APIC;
/* Check timer availability */
@ -68,16 +106,22 @@ void x86_64_check_and_enable_capability(void)
require |= X86_64_CPUID_01_TSCDEA;
#endif
#ifdef CONFIG_ARCH_INTEL64_HAVE_XSAVE
/* Check XSAVE/XRSTOR availability */
#ifdef CONFIG_ARCH_X86_64_HAVE_XSAVE
require |= X86_64_CPUID_01_XSAVE;
#endif
#ifdef CONFIG_ARCH_INTEL64_HAVE_RDRAND
require |= X86_64_CPUID_01_RDRAND;
/* Check AVX instructions availability */
#ifdef CONFIG_ARCH_X86_64_AVX
require |= X86_64_CPUID_01_AVX;
#endif
#ifdef CONFIG_ARCH_INTEL64_HAVE_PCID
require |= X86_64_CPUID_01_PCID;
/* Check RDRAND feature availability */
#ifdef CONFIG_ARCH_INTEL64_HAVE_RDRAND
require |= X86_64_CPUID_01_RDRAND;
#endif
asm volatile("cpuid" : "=c" (ecx) : "a" (X86_64_CPUID_CAP)
@ -94,6 +138,14 @@ void x86_64_check_and_enable_capability(void)
require = 0;
/* Check AVX512 Foundation instructions availability */
#ifdef CONFIG_ARCH_X86_64_AVX512
require |= X86_64_CPUID_07_AVX512F;
#endif
/* Check CLWB instruction availability */
#ifdef CONFIG_ARCH_INTEL64_HAVE_CLWB
require |= X86_64_CPUID_07_CLWB;
#endif