From bd73a915bd016376405d8f318b3fa87277101cb4 Mon Sep 17 00:00:00 2001 From: p-szafonimateusz Date: Mon, 22 Apr 2024 15:49:18 +0200 Subject: [PATCH] arch/x86_64: add support for FMA, AVX-2 and AVX-512 add support for FMA, AVX-2 and AVX-512 instruction sets for x86_64 Signed-off-by: p-szafonimateusz --- arch/x86_64/Kconfig | 7 ++ arch/x86_64/include/intel64/arch.h | 2 + arch/x86_64/src/cmake/Toolchain.cmake | 40 +++++++++++ arch/x86_64/src/common/Kconfig | 41 +++++++++++ arch/x86_64/src/common/Toolchain.defs | 70 +++++++++++++++---- .../src/intel64/intel64_check_capability.c | 66 +++++++++++++++-- 6 files changed, 206 insertions(+), 20 deletions(-) diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 064cbf8b09..a22b9310b3 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -30,6 +30,9 @@ config ARCH_INTEL64 select ARCH_HAVE_SSE41 select ARCH_HAVE_SSE42 select ARCH_HAVE_SSE4A + select ARCH_HAVE_FMA if ARCH_X86_64_HAVE_XSAVE + select ARCH_HAVE_AVX if ARCH_X86_64_HAVE_XSAVE + select ARCH_HAVE_AVX512 if ARCH_X86_64_HAVE_XSAVE select ARCH_ICACHE select ARCH_DCACHE select ARCH_HAVE_IRQTRIGGER @@ -78,6 +81,10 @@ config ARCH_HAVE_SSE4A bool default n +config ARCH_HAVE_FMA + bool + default n + config ARCH_HAVE_AVX bool default n diff --git a/arch/x86_64/include/intel64/arch.h b/arch/x86_64/include/intel64/arch.h index d0e7a61607..34964e3c2c 100644 --- a/arch/x86_64/include/intel64/arch.h +++ b/arch/x86_64/include/intel64/arch.h @@ -165,12 +165,14 @@ #define X86_64_CPUID_CAP 0x01 # define X86_64_CPUID_01_SSE3 (1 << 0) # define X86_64_CPUID_01_SSSE3 (1 << 9) +# define X86_64_CPUID_01_FMA (1 << 12) # define X86_64_CPUID_01_PCID (1 << 17) # define X86_64_CPUID_01_SSE41 (1 << 19) # define X86_64_CPUID_01_SSE42 (1 << 20) # define X86_64_CPUID_01_X2APIC (1 << 21) # define X86_64_CPUID_01_TSCDEA (1 << 24) # define X86_64_CPUID_01_XSAVE (1 << 26) +# define X86_64_CPUID_01_AVX (1 << 28) # define X86_64_CPUID_01_RDRAND (1 << 30) # define X86_64_CPUID_01_APICID(ebx) ((ebx) >> 24) #define X86_64_CPUID_EXTCAP 0x07 diff --git a/arch/x86_64/src/cmake/Toolchain.cmake b/arch/x86_64/src/cmake/Toolchain.cmake index 3d79887381..aae29d7d9b 100644 --- a/arch/x86_64/src/cmake/Toolchain.cmake +++ b/arch/x86_64/src/cmake/Toolchain.cmake @@ -143,3 +143,43 @@ endif() if(CONFIG_ARCH_X86_64_SSE4A) add_compile_options(-msse4a) endif() + +if(CONFIG_ARCH_X86_64_AVX) + add_compile_options(-mavx) +endif() + +if(CONFIG_ARCH_X86_64_AVX512) + add_compile_options(-mavx512f) +endif() + +if(CONFIG_ARCH_X86_64_AVX512PF) + add_compile_options(-mavx512pf) +endif() + +if(CONFIG_ARCH_X86_64_AVX512ER) + add_compile_options(-mavx512er) +endif() + +if(CONFIG_ARCH_X86_64_AVX512CD) + add_compile_options(-mavx512cd) +endif() + +if(CONFIG_ARCH_X86_64_AVX512VL) + add_compile_options(-mavx512vl) +endif() + +if(CONFIG_ARCH_X86_64_AVX512BW) + add_compile_options(-mavx512bw) +endif() + +if(CONFIG_ARCH_X86_64_AVX512DQ) + add_compile_options(-mavx512dq) +endif() + +if(CONFIG_ARCH_X86_64_AVX512IFMA) + add_compile_options(-mavx512ifma) +endif() + +if(CONFIG_ARCH_X86_64_AVX512VBMI) + add_compile_options(-mavx512vbmi) +endif() diff --git a/arch/x86_64/src/common/Kconfig b/arch/x86_64/src/common/Kconfig index fbe6661d18..bd8dd435b3 100644 --- a/arch/x86_64/src/common/Kconfig +++ b/arch/x86_64/src/common/Kconfig @@ -75,6 +75,11 @@ config ARCH_X86_64_SSE4A depends on ARCH_HAVE_SSE4A default n +config ARCH_X86_64_FMA + bool "FMA support" + depends on ARCH_HAVE_FMA && ARCH_X86_64_AVX + default n + config ARCH_X86_64_AVX bool "AVX support" depends on ARCH_HAVE_AVX @@ -85,4 +90,40 @@ config ARCH_X86_64_AVX512 depends on ARCH_HAVE_AVX512 default n +if ARCH_X86_64_AVX512 + +config ARCH_X86_64_AVX512PF + bool "AVX512 Prefetch Instructions (AVX512PF)" + default n + +config ARCH_X86_64_AVX512ER + bool "AVX512 Exponential and Reciprocal Instructions (AVX512ER)" + default n + +config ARCH_X86_64_AVX512CD + bool "AVX512 Conflict Detection Instructions (AVX512CD)" + default n + +config ARCH_X86_64_AVX512VL + bool "AVX512 Vector Length Extensions (AVX512VL)" + default n + +config ARCH_X86_64_AVX512BW + bool "AVX512 Byte and Word Instructions (AVX512BW)" + default n + +config ARCH_X86_64_AVX512DQ + bool "AVX512 Doubleword and Quadword Instructions (AVX512DQ)" + default n + +config ARCH_X86_64_AVX512IFMA + bool "AVX512 Integer Fused Multiply-Add Instructions (AVX512IFMA)" + default n + +config ARCH_X86_64_AVX512VBMI + bool "AVX512 Vector Bit Manipulation Instructions (AVX512VBMI)" + default n + +endif # ARCH_X86_64_AVX512 + endif diff --git a/arch/x86_64/src/common/Toolchain.defs b/arch/x86_64/src/common/Toolchain.defs index 32264de3cc..2f0a4cdd6d 100644 --- a/arch/x86_64/src/common/Toolchain.defs +++ b/arch/x86_64/src/common/Toolchain.defs @@ -56,16 +56,6 @@ NM = $(CROSSDEV)nm OBJCOPY = $(CROSSDEV)objcopy OBJDUMP = $(CROSSDEV)objdump -CFLAGS := $(ARCHWARNINGS) $(ARCHOPTIMIZATION) $(ARCHCPUFLAGS) $(ARCHINCLUDES) $(ARCHDEFINES) $(EXTRAFLAGS) -pipe -CPPFLAGS := $(ARCHINCLUDES) $(ARCHDEFINES) $(EXTRAFLAGS) -AFLAGS := $(CFLAGS) -D__ASSEMBLY__ - -ifeq ($(CONFIG_HOST_MACOS),y) -AFLAGS += -Wa,--divide -endif - -EXEEXT = .elf - ifeq ($(CONFIG_ARCH_X86_64_SSE3),y) ARCHCPUFLAGS += -msse3 endif @@ -75,13 +65,67 @@ ifeq ($(CONFIG_ARCH_X86_64_SSSE3),y) endif ifeq ($(CONFIG_ARCH_X86_64_SSE41),y) - ARCHCPUFLAGS += -msse41 + ARCHCPUFLAGS += -msse4.1 endif ifeq ($(CONFIG_ARCH_X86_64_SSE42),y) - ARCHCPUFLAGS += -msse42 + ARCHCPUFLAGS += -msse4.2 endif -ifeq ($(CONFIG_ARCH_X86_64_SSE4a),y) +ifeq ($(CONFIG_ARCH_X86_64_SSE4A),y) ARCHCPUFLAGS += -msse4a endif + +ifeq ($(CONFIG_ARCH_X86_64_FMA),y) + ARCHCPUFLAGS += -mfma +endif + +ifeq ($(CONFIG_ARCH_X86_64_AVX),y) + ARCHCPUFLAGS += -mavx +endif + +ifeq ($(CONFIG_ARCH_X86_64_AVX512),y) + ARCHCPUFLAGS += -mavx512f +endif + +ifeq ($(CONFIG_ARCH_X86_64_AVX512PF),y) + ARCHCPUFLAGS += -mavx512pf +endif + +ifeq ($(CONFIG_ARCH_X86_64_AVX512ER),y) + ARCHCPUFLAGS += -mavx512er +endif + +ifeq ($(CONFIG_ARCH_X86_64_AVX512CD),y) + ARCHCPUFLAGS += -mavx512cd +endif + +ifeq ($(CONFIG_ARCH_X86_64_AVX512VL),y) + ARCHCPUFLAGS += -mavx512vl +endif + +ifeq ($(CONFIG_ARCH_X86_64_AVX512BW),y) + ARCHCPUFLAGS += -mavx512bw +endif + +ifeq ($(CONFIG_ARCH_X86_64_AVX512DQ),y) + ARCHCPUFLAGS += -mavx512dq +endif + +ifeq ($(CONFIG_ARCH_X86_64_AVX512IFMA),y) + ARCHCPUFLAGS += -mavx512ifma +endif + +ifeq ($(CONFIG_ARCH_X86_64_AVX512VBMI),y) + ARCHCPUFLAGS += -mavx512vbmi +endif + +CFLAGS := $(ARCHWARNINGS) $(ARCHOPTIMIZATION) $(ARCHCPUFLAGS) $(ARCHINCLUDES) $(ARCHDEFINES) $(EXTRAFLAGS) -pipe +CPPFLAGS := $(ARCHINCLUDES) $(ARCHDEFINES) $(EXTRAFLAGS) +AFLAGS := $(CFLAGS) -D__ASSEMBLY__ + +ifeq ($(CONFIG_HOST_MACOS),y) +AFLAGS += -Wa,--divide +endif + +EXEEXT = .elf diff --git a/arch/x86_64/src/intel64/intel64_check_capability.c b/arch/x86_64/src/intel64/intel64_check_capability.c index 87dad14a7e..067a5786e1 100644 --- a/arch/x86_64/src/intel64/intel64_check_capability.c +++ b/arch/x86_64/src/intel64/intel64_check_capability.c @@ -58,9 +58,47 @@ void x86_64_check_and_enable_capability(void) { unsigned long ebx; unsigned long ecx; - unsigned long require; + unsigned long require = 0; - require = X86_64_CPUID_01_X2APIC; + /* Check SSE3 instructions availability */ + +#ifdef CONFIG_ARCH_X86_64_SSE3 + require |= X86_64_CPUID_01_SSE3; +#endif + + /* Check Suplement SSE3 instructions availability */ + +#ifdef CONFIG_ARCH_X86_64_SSEE3 + require |= X86_64_CPUID_01_SSEE3; +#endif + + /* Check Fused multiply-add (FMA) instructions availability */ + +#ifdef CONFIG_ARCH_X86_64_FMA + require |= X86_64_CPUID_01_FMA; +#endif + + /* Check process context identifiers availability */ + +#ifdef CONFIG_ARCH_INTEL64_HAVE_PCID + require |= X86_64_CPUID_01_PCID; +#endif + + /* Check SSE4.1 instructions availability */ + +#ifdef CONFIG_ARCH_X86_64_SSE41 + require |= X86_64_CPUID_01_SSE41; +#endif + + /* Check SSE4.2 instructions availability */ + +#ifdef CONFIG_ARCH_X86_64_SSE42 + require |= X86_64_CPUID_01_SSE42; +#endif + + /* Check x2APIC availability */ + + require |= X86_64_CPUID_01_X2APIC; /* Check timer availability */ @@ -68,16 +106,22 @@ void x86_64_check_and_enable_capability(void) require |= X86_64_CPUID_01_TSCDEA; #endif -#ifdef CONFIG_ARCH_INTEL64_HAVE_XSAVE + /* Check XSAVE/XRSTOR availability */ + +#ifdef CONFIG_ARCH_X86_64_HAVE_XSAVE require |= X86_64_CPUID_01_XSAVE; #endif -#ifdef CONFIG_ARCH_INTEL64_HAVE_RDRAND - require |= X86_64_CPUID_01_RDRAND; + /* Check AVX instructions availability */ + +#ifdef CONFIG_ARCH_X86_64_AVX + require |= X86_64_CPUID_01_AVX; #endif -#ifdef CONFIG_ARCH_INTEL64_HAVE_PCID - require |= X86_64_CPUID_01_PCID; + /* Check RDRAND feature availability */ + +#ifdef CONFIG_ARCH_INTEL64_HAVE_RDRAND + require |= X86_64_CPUID_01_RDRAND; #endif asm volatile("cpuid" : "=c" (ecx) : "a" (X86_64_CPUID_CAP) @@ -94,6 +138,14 @@ void x86_64_check_and_enable_capability(void) require = 0; + /* Check AVX512 Foundation instructions availability */ + +#ifdef CONFIG_ARCH_X86_64_AVX512 + require |= X86_64_CPUID_07_AVX512F; +#endif + + /* Check CLWB instruction availability */ + #ifdef CONFIG_ARCH_INTEL64_HAVE_CLWB require |= X86_64_CPUID_07_CLWB; #endif