arch/x86_64: add support for FMA, AVX-2 and AVX-512
add support for FMA, AVX-2 and AVX-512 instruction sets for x86_64 Signed-off-by: p-szafonimateusz <p-szafonimateusz@xiaomi.com>
This commit is contained in:
parent
2192d83200
commit
bd73a915bd
@ -30,6 +30,9 @@ config ARCH_INTEL64
|
||||
select ARCH_HAVE_SSE41
|
||||
select ARCH_HAVE_SSE42
|
||||
select ARCH_HAVE_SSE4A
|
||||
select ARCH_HAVE_FMA if ARCH_X86_64_HAVE_XSAVE
|
||||
select ARCH_HAVE_AVX if ARCH_X86_64_HAVE_XSAVE
|
||||
select ARCH_HAVE_AVX512 if ARCH_X86_64_HAVE_XSAVE
|
||||
select ARCH_ICACHE
|
||||
select ARCH_DCACHE
|
||||
select ARCH_HAVE_IRQTRIGGER
|
||||
@ -78,6 +81,10 @@ config ARCH_HAVE_SSE4A
|
||||
bool
|
||||
default n
|
||||
|
||||
config ARCH_HAVE_FMA
|
||||
bool
|
||||
default n
|
||||
|
||||
config ARCH_HAVE_AVX
|
||||
bool
|
||||
default n
|
||||
|
@ -165,12 +165,14 @@
|
||||
#define X86_64_CPUID_CAP 0x01
|
||||
# define X86_64_CPUID_01_SSE3 (1 << 0)
|
||||
# define X86_64_CPUID_01_SSSE3 (1 << 9)
|
||||
# define X86_64_CPUID_01_FMA (1 << 12)
|
||||
# define X86_64_CPUID_01_PCID (1 << 17)
|
||||
# define X86_64_CPUID_01_SSE41 (1 << 19)
|
||||
# define X86_64_CPUID_01_SSE42 (1 << 20)
|
||||
# define X86_64_CPUID_01_X2APIC (1 << 21)
|
||||
# define X86_64_CPUID_01_TSCDEA (1 << 24)
|
||||
# define X86_64_CPUID_01_XSAVE (1 << 26)
|
||||
# define X86_64_CPUID_01_AVX (1 << 28)
|
||||
# define X86_64_CPUID_01_RDRAND (1 << 30)
|
||||
# define X86_64_CPUID_01_APICID(ebx) ((ebx) >> 24)
|
||||
#define X86_64_CPUID_EXTCAP 0x07
|
||||
|
@ -143,3 +143,43 @@ endif()
|
||||
if(CONFIG_ARCH_X86_64_SSE4A)
|
||||
add_compile_options(-msse4a)
|
||||
endif()
|
||||
|
||||
if(CONFIG_ARCH_X86_64_AVX)
|
||||
add_compile_options(-mavx)
|
||||
endif()
|
||||
|
||||
if(CONFIG_ARCH_X86_64_AVX512)
|
||||
add_compile_options(-mavx512f)
|
||||
endif()
|
||||
|
||||
if(CONFIG_ARCH_X86_64_AVX512PF)
|
||||
add_compile_options(-mavx512pf)
|
||||
endif()
|
||||
|
||||
if(CONFIG_ARCH_X86_64_AVX512ER)
|
||||
add_compile_options(-mavx512er)
|
||||
endif()
|
||||
|
||||
if(CONFIG_ARCH_X86_64_AVX512CD)
|
||||
add_compile_options(-mavx512cd)
|
||||
endif()
|
||||
|
||||
if(CONFIG_ARCH_X86_64_AVX512VL)
|
||||
add_compile_options(-mavx512vl)
|
||||
endif()
|
||||
|
||||
if(CONFIG_ARCH_X86_64_AVX512BW)
|
||||
add_compile_options(-mavx512bw)
|
||||
endif()
|
||||
|
||||
if(CONFIG_ARCH_X86_64_AVX512DQ)
|
||||
add_compile_options(-mavx512dq)
|
||||
endif()
|
||||
|
||||
if(CONFIG_ARCH_X86_64_AVX512IFMA)
|
||||
add_compile_options(-mavx512ifma)
|
||||
endif()
|
||||
|
||||
if(CONFIG_ARCH_X86_64_AVX512VBMI)
|
||||
add_compile_options(-mavx512vbmi)
|
||||
endif()
|
||||
|
@ -75,6 +75,11 @@ config ARCH_X86_64_SSE4A
|
||||
depends on ARCH_HAVE_SSE4A
|
||||
default n
|
||||
|
||||
config ARCH_X86_64_FMA
|
||||
bool "FMA support"
|
||||
depends on ARCH_HAVE_FMA && ARCH_X86_64_AVX
|
||||
default n
|
||||
|
||||
config ARCH_X86_64_AVX
|
||||
bool "AVX support"
|
||||
depends on ARCH_HAVE_AVX
|
||||
@ -85,4 +90,40 @@ config ARCH_X86_64_AVX512
|
||||
depends on ARCH_HAVE_AVX512
|
||||
default n
|
||||
|
||||
if ARCH_X86_64_AVX512
|
||||
|
||||
config ARCH_X86_64_AVX512PF
|
||||
bool "AVX512 Prefetch Instructions (AVX512PF)"
|
||||
default n
|
||||
|
||||
config ARCH_X86_64_AVX512ER
|
||||
bool "AVX512 Exponential and Reciprocal Instructions (AVX512ER)"
|
||||
default n
|
||||
|
||||
config ARCH_X86_64_AVX512CD
|
||||
bool "AVX512 Conflict Detection Instructions (AVX512CD)"
|
||||
default n
|
||||
|
||||
config ARCH_X86_64_AVX512VL
|
||||
bool "AVX512 Vector Length Extensions (AVX512VL)"
|
||||
default n
|
||||
|
||||
config ARCH_X86_64_AVX512BW
|
||||
bool "AVX512 Byte and Word Instructions (AVX512BW)"
|
||||
default n
|
||||
|
||||
config ARCH_X86_64_AVX512DQ
|
||||
bool "AVX512 Doubleword and Quadword Instructions (AVX512DQ)"
|
||||
default n
|
||||
|
||||
config ARCH_X86_64_AVX512IFMA
|
||||
bool "AVX512 Integer Fused Multiply-Add Instructions (AVX512IFMA)"
|
||||
default n
|
||||
|
||||
config ARCH_X86_64_AVX512VBMI
|
||||
bool "AVX512 Vector Bit Manipulation Instructions (AVX512VBMI)"
|
||||
default n
|
||||
|
||||
endif # ARCH_X86_64_AVX512
|
||||
|
||||
endif
|
||||
|
@ -56,16 +56,6 @@ NM = $(CROSSDEV)nm
|
||||
OBJCOPY = $(CROSSDEV)objcopy
|
||||
OBJDUMP = $(CROSSDEV)objdump
|
||||
|
||||
CFLAGS := $(ARCHWARNINGS) $(ARCHOPTIMIZATION) $(ARCHCPUFLAGS) $(ARCHINCLUDES) $(ARCHDEFINES) $(EXTRAFLAGS) -pipe
|
||||
CPPFLAGS := $(ARCHINCLUDES) $(ARCHDEFINES) $(EXTRAFLAGS)
|
||||
AFLAGS := $(CFLAGS) -D__ASSEMBLY__
|
||||
|
||||
ifeq ($(CONFIG_HOST_MACOS),y)
|
||||
AFLAGS += -Wa,--divide
|
||||
endif
|
||||
|
||||
EXEEXT = .elf
|
||||
|
||||
ifeq ($(CONFIG_ARCH_X86_64_SSE3),y)
|
||||
ARCHCPUFLAGS += -msse3
|
||||
endif
|
||||
@ -75,13 +65,67 @@ ifeq ($(CONFIG_ARCH_X86_64_SSSE3),y)
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_ARCH_X86_64_SSE41),y)
|
||||
ARCHCPUFLAGS += -msse41
|
||||
ARCHCPUFLAGS += -msse4.1
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_ARCH_X86_64_SSE42),y)
|
||||
ARCHCPUFLAGS += -msse42
|
||||
ARCHCPUFLAGS += -msse4.2
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_ARCH_X86_64_SSE4a),y)
|
||||
ifeq ($(CONFIG_ARCH_X86_64_SSE4A),y)
|
||||
ARCHCPUFLAGS += -msse4a
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_ARCH_X86_64_FMA),y)
|
||||
ARCHCPUFLAGS += -mfma
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_ARCH_X86_64_AVX),y)
|
||||
ARCHCPUFLAGS += -mavx
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_ARCH_X86_64_AVX512),y)
|
||||
ARCHCPUFLAGS += -mavx512f
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_ARCH_X86_64_AVX512PF),y)
|
||||
ARCHCPUFLAGS += -mavx512pf
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_ARCH_X86_64_AVX512ER),y)
|
||||
ARCHCPUFLAGS += -mavx512er
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_ARCH_X86_64_AVX512CD),y)
|
||||
ARCHCPUFLAGS += -mavx512cd
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_ARCH_X86_64_AVX512VL),y)
|
||||
ARCHCPUFLAGS += -mavx512vl
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_ARCH_X86_64_AVX512BW),y)
|
||||
ARCHCPUFLAGS += -mavx512bw
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_ARCH_X86_64_AVX512DQ),y)
|
||||
ARCHCPUFLAGS += -mavx512dq
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_ARCH_X86_64_AVX512IFMA),y)
|
||||
ARCHCPUFLAGS += -mavx512ifma
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_ARCH_X86_64_AVX512VBMI),y)
|
||||
ARCHCPUFLAGS += -mavx512vbmi
|
||||
endif
|
||||
|
||||
CFLAGS := $(ARCHWARNINGS) $(ARCHOPTIMIZATION) $(ARCHCPUFLAGS) $(ARCHINCLUDES) $(ARCHDEFINES) $(EXTRAFLAGS) -pipe
|
||||
CPPFLAGS := $(ARCHINCLUDES) $(ARCHDEFINES) $(EXTRAFLAGS)
|
||||
AFLAGS := $(CFLAGS) -D__ASSEMBLY__
|
||||
|
||||
ifeq ($(CONFIG_HOST_MACOS),y)
|
||||
AFLAGS += -Wa,--divide
|
||||
endif
|
||||
|
||||
EXEEXT = .elf
|
||||
|
@ -58,9 +58,47 @@ void x86_64_check_and_enable_capability(void)
|
||||
{
|
||||
unsigned long ebx;
|
||||
unsigned long ecx;
|
||||
unsigned long require;
|
||||
unsigned long require = 0;
|
||||
|
||||
require = X86_64_CPUID_01_X2APIC;
|
||||
/* Check SSE3 instructions availability */
|
||||
|
||||
#ifdef CONFIG_ARCH_X86_64_SSE3
|
||||
require |= X86_64_CPUID_01_SSE3;
|
||||
#endif
|
||||
|
||||
/* Check Suplement SSE3 instructions availability */
|
||||
|
||||
#ifdef CONFIG_ARCH_X86_64_SSEE3
|
||||
require |= X86_64_CPUID_01_SSEE3;
|
||||
#endif
|
||||
|
||||
/* Check Fused multiply-add (FMA) instructions availability */
|
||||
|
||||
#ifdef CONFIG_ARCH_X86_64_FMA
|
||||
require |= X86_64_CPUID_01_FMA;
|
||||
#endif
|
||||
|
||||
/* Check process context identifiers availability */
|
||||
|
||||
#ifdef CONFIG_ARCH_INTEL64_HAVE_PCID
|
||||
require |= X86_64_CPUID_01_PCID;
|
||||
#endif
|
||||
|
||||
/* Check SSE4.1 instructions availability */
|
||||
|
||||
#ifdef CONFIG_ARCH_X86_64_SSE41
|
||||
require |= X86_64_CPUID_01_SSE41;
|
||||
#endif
|
||||
|
||||
/* Check SSE4.2 instructions availability */
|
||||
|
||||
#ifdef CONFIG_ARCH_X86_64_SSE42
|
||||
require |= X86_64_CPUID_01_SSE42;
|
||||
#endif
|
||||
|
||||
/* Check x2APIC availability */
|
||||
|
||||
require |= X86_64_CPUID_01_X2APIC;
|
||||
|
||||
/* Check timer availability */
|
||||
|
||||
@ -68,16 +106,22 @@ void x86_64_check_and_enable_capability(void)
|
||||
require |= X86_64_CPUID_01_TSCDEA;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_ARCH_INTEL64_HAVE_XSAVE
|
||||
/* Check XSAVE/XRSTOR availability */
|
||||
|
||||
#ifdef CONFIG_ARCH_X86_64_HAVE_XSAVE
|
||||
require |= X86_64_CPUID_01_XSAVE;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_ARCH_INTEL64_HAVE_RDRAND
|
||||
require |= X86_64_CPUID_01_RDRAND;
|
||||
/* Check AVX instructions availability */
|
||||
|
||||
#ifdef CONFIG_ARCH_X86_64_AVX
|
||||
require |= X86_64_CPUID_01_AVX;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_ARCH_INTEL64_HAVE_PCID
|
||||
require |= X86_64_CPUID_01_PCID;
|
||||
/* Check RDRAND feature availability */
|
||||
|
||||
#ifdef CONFIG_ARCH_INTEL64_HAVE_RDRAND
|
||||
require |= X86_64_CPUID_01_RDRAND;
|
||||
#endif
|
||||
|
||||
asm volatile("cpuid" : "=c" (ecx) : "a" (X86_64_CPUID_CAP)
|
||||
@ -94,6 +138,14 @@ void x86_64_check_and_enable_capability(void)
|
||||
|
||||
require = 0;
|
||||
|
||||
/* Check AVX512 Foundation instructions availability */
|
||||
|
||||
#ifdef CONFIG_ARCH_X86_64_AVX512
|
||||
require |= X86_64_CPUID_07_AVX512F;
|
||||
#endif
|
||||
|
||||
/* Check CLWB instruction availability */
|
||||
|
||||
#ifdef CONFIG_ARCH_INTEL64_HAVE_CLWB
|
||||
require |= X86_64_CPUID_07_CLWB;
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user