summaryrefslogtreecommitdiffstats
path: root/util
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2023-05-19 22:37:52 -0700
committerThiago Macieira <thiago.macieira@intel.com>2023-05-26 16:22:22 -0700
commitf89c4c4f8fd5afbb6a5480fe2fc13bb6dcaace47 (patch)
tree31429c50f70faf0832d2a4eca46f13048ec2ddd3 /util
parent718dae8e0be499c0c722fa3fcbd36f97aba3e22b (diff)
qsimd_x86: update from upstream and enable a few more features
After https://github.com/opendcdiag/opendcdiag/pull/223. Enabled for Qt: * waitpkg * RAO (Remote Atomic Operations) * CMPccXADD * avxifma * LAM (Linear Address Masking) Disabled: * AVX-512 VNNI Change-Id: I5f7f427ded124479baa6fffd1760c35ed5b2adbb Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
Diffstat (limited to 'util')
-rw-r--r--util/x86simdgen/3rdparty/simd-intel.conf77
-rwxr-xr-xutil/x86simdgen/3rdparty/x86simd_generate.pl5
2 files changed, 63 insertions, 19 deletions
diff --git a/util/x86simdgen/3rdparty/simd-intel.conf b/util/x86simdgen/3rdparty/simd-intel.conf
index 0db4259f97..3837855c19 100644
--- a/util/x86simdgen/3rdparty/simd-intel.conf
+++ b/util/x86simdgen/3rdparty/simd-intel.conf
@@ -50,13 +50,13 @@ avx512vl Leaf07_00EBX 31 avx512f # AVX512 Vector Length
avx512vbmi Leaf07_00ECX 1 avx512f # AVX512 Vector Byte Manipulation Instructions
#pku Leaf07_00ECX 3 # Protection Keys for User mode
#ospke Leaf07_00ECX 4 # Protection Keys Enabled by OS
-#waitpkg Leaf07_00ECX 5 # User-Level Monitor / Wait
+waitpkg Leaf07_00ECX 5 # User-Level Monitor / Wait
avx512vbmi2 Leaf07_00ECX 6 avx512f # AVX512 Vector Byte Manipulation Instructions 2
shstk Leaf07_00ECX 7 # Control Flow Enforcement Technology Shadow Stack
gfni Leaf07_00ECX 8 # Galois Field new instructions
vaes Leaf07_00ECX 9 avx2,avx,aes # 256- and 512-bit AES
#vpclmulqdq Leaf07_00ECX 10 avx # 256- and 512-bit Carryless Multiply
-avx512vnni Leaf07_00ECX 11 avx512f # AVX512 Vector Neural Network Instructions
+#avx512vnni Leaf07_00ECX 11 avx512f # AVX512 Vector Neural Network Instructions
avx512bitalg Leaf07_00ECX 12 avx512f # AVX512 Bit Algorithms
avx512vpopcntdq Leaf07_00ECX 14 avx512f # AVX512 Population Count
#la57 Leaf07_00ECX 16 # 5-level page tables
@@ -78,16 +78,24 @@ hybrid Leaf07_00EDX 15 # Hybrid processor
ibt Leaf07_00EDX 20 # Control Flow Enforcement Technology Indirect Branch Tracking
#amxbf16 Leaf07_00EDX 22 amxtile # AMX Tile multiplication in BFloat16
avx512fp16 Leaf07_00EDX 23 avx512f,f16c # AVX512 16-bit Floating Point
-#amxtile Leaf07_00EDX 24 # Advanced Matrix Extensions Tile support
-#amxint8 Leaf07_00EDX 25 amxtile # AMX Tile multiplication for Int8
+#amx-tile Leaf07_00EDX 24 # Advanced Matrix Extensions Tile support
+#amx-int8 Leaf07_00EDX 25 amx-tile # AMX Tile multiplication for Int8
+raoint Leaf07_01EAX 3 # Remote Atomic Operations, Integer
#avxvnni Leaf07_01EAX 4 avx # AVX (VEX-encoded) versions of the Vector Neural Network Instructions
#avx512bf16 Leaf07_01EAX 5 avx512f # AVX512 Brain Float16
+cmpccxadd Leaf07_01EAX 6 # CMPccXADD instructions
#zlmovsb Leaf07_01EAX 10 # Zero-length MOVSB
#fsrs Leaf07_01EAX 11 # Fast Short (REP?) STOSB
#fsrc Leaf07_01EAX 12 # Fast Short (REP?) CMPSB, SCASB
#fred Leaf07_01EAX 17 # Flexible Return and Event Delivery
#lkgs Leaf07_01EAX 18 # Load into Kernel GS
-#lam Leaf07_01EAX 26 # Linear Address Masking
+#amx-fp16 Leaf07_01EAX 21 amx-tile # AMX Tile multiplication in FP16
+avxifma Leaf07_01EAX 23 avx # AVX-IFMA instructions
+lam Leaf07_01EAX 26 # Linear Address Masking
+#avxvnniint8 Leaf07_01EDX 4 avx # AVX Vector Neural Network Instructions, Int8
+#avxneconvert Leaf07_01EDX 5 avx # AVX Non-Exception BF16/FP16/FP32 Conversion instructions
+#amx-complex Leaf07_01EDX 8 amx-tile # AMX Complex Matrix multiplication
+#prefetchiti Leaf07_01EDX 14 # PREFETCHIT0/1 instructions
#xsaveopt Leaf13_01EAX 0 # Optimized XSAVE
#xsavec Leaf13_01EAX 1 # XSAVE with Compaction
#xgetbv1 Leaf13_01EAX 2 # XGETBV with ECX=1
@@ -122,12 +130,12 @@ xsave=AvxState SseState|Ymm_Hi128 avx,fma,avx512f
xsave=MPXState Bndregs|Bndcsr mpx
xsave=Avx512State AvxState|OpMask|Zmm_Hi256|Hi16_Zmm avx512f
xsave=CetState CetUState|CetSState shstk
-xsave=AmxState Xtilecfg|Xtiledata amxtile
+xsave=AmxState Xtilecfg|Xtiledata amx-tile
# Processor/arch listing below this line
# Source: Intel Instruction Set Extension manual, section 1.2
# Source: GCC gcc/config/i386/i386.h, i386-c.c, i386-builtins.c
-# Architecture Based on New features Optional features
+# Architecture Based on New features
arch=x86_64 <> sse2
# Core line
arch=Core2 x86_64 sse3,ssse3,cx16
@@ -135,26 +143,44 @@ arch=NHM Core2 sse4.1,sse4.2,popcnt
arch=WSM NHM
arch=SNB WSM avx
arch=IVB SNB f16c,rdrnd,fsgsbase
-arch=HSW IVB avx2,fma,bmi,bmi2,lzcnt,movbe
+arch=HSW IVB avx2,fma,bmi,bmi2,lzcnt,movbe # hle,rtm
arch=BDW HSW adx,rdseed
arch=BDX BDW
arch=SKL BDW xsavec,xsaves
-arch=ADL SKL avxvnni,gfni,vaes,vpclmulqdq,serialize,shstk,cldemote,movdiri,movdir64b,ibt,waitpkg,keylocker rdpid
-arch=SKX SKL avx512f,avx512dq,avx512cd,avx512bw,avx512vl clwb
+arch=SKX SKL avx512f,avx512dq,avx512cd,avx512bw,avx512vl #clwb
arch=CLX SKX avx512vnni
arch=CPX CLX avx512bf16
-arch=CNL SKX avx512ifma,avx512vbmi sha
-arch=ICL CNL avx512vbmi2,gfni,vaes,vpclmulqdq,avx512vnni,avx512bitalg,avx512vpopcntdq fsrm,rdpid
-arch=ICX ICL pconfig
-arch=TGL ICL avx512vp2intersect,shstk,,movdiri,movdir64b,ibt,keylocker
-arch=SPR TGL avx512bf16,amxtile,amxbf16,amxint8,avxvnni,cldemote,pconfig,waitpkg,serialize,tsxldtrk,uintr
+arch=PLC SKX avx512ifma,avx512vbmi #sha
+arch=SNC PLC avx512vbmi2,gfni,vaes,vpclmulqdq,avx512vnni,avx512bitalg,avx512vpopcntdq #fsrm,rdpid
+arch=WLC SNC shstk,movdiri,movdir64b,ibt,keylocker # avx512vp2intersect
+arch=GLC WLC avx512bf16,avxvnni,cldemote,waitpkg,serialize,uintr # tsxldtrk
+arch=RPC GLC
+arch=RWC RPC prefetchiti
# Atom line
arch=SLM WSM rdrnd,movbe
arch=GLM SLM fsgsbase,rdseed,lzcnt,xsavec,xsaves
arch=TNT GLM clwb,gfni,cldemote,waitpkg,movdiri,movdir64b
+arch=GRT SKL avxvnni,gfni,vaes,vpclmulqdq,serialize,shstk,cldemote,movdiri,movdir64b,ibt,waitpkg,keylocker # rdpid
+arch=CMT GRT cmpccxadd,avxifma,avxneconvert,avxvnniint8
# Xeon Phi line
#arch=KNL SKL avx512f,avx512er,avx512pf,avx512cd
#arch=KNM KNL avx5124fmaps,avx5124vnniw,avx512vpopcntdq
+ # Hybrids and other names
+arch=CNL PLC
+arch=ICL SNC
+arch=TGL WLC
+arch=ADL GRT
+arch=RPL GRT
+arch=MTL CMT
+arch=ARL CMT
+arch=LNL CMT
+arch=ICX SNC pconfig
+arch=SPR GLC pconfig,amx-tile,amx-bf16,amx-int8
+arch=EMR SPR
+arch=GNR GLC pconfig,amx-tile,amx-bf16,amx-int8,amx-fp16,amx-complex
+arch=SRF CMT cmpccxadd,avxifma,avxneconvert,avxvnniint8
+arch=GRR SRF raoint
+arch=CWF SRF
# Longer names
arch=Nehalem NHM # Intel Core i3/i5/i7
arch=Westmere WSM # Intel Core i3/i5/i7
@@ -166,14 +192,31 @@ arch=Skylake SKL # Sixth Generation Intel Core i3/i5/i7
arch=Skylake-Avx512 SKX # Intel Xeon Scalable
arch=CascadeLake CLX # Second Generation Intel Xeon Scalable
arch=CooperLake CPX # Third Generation Intel Xeon Scalable
+arch=PalmCove PLC
arch=CannonLake CNL # Intel Core i3-8121U
+arch=SunnyCove SNC
arch=IceLake-Client ICL # Tenth Generation Intel Core i3/i5/i7
arch=IceLake-Server ICX # Third Generation Intel Xeon Scalable
-arch=AlderLake ADL
-arch=SapphireRapids SPR
+arch=WillowCove WLC
arch=TigerLake TGL # Eleventh Generation Intel Core i3/i5/i7
+arch=GoldenCove GLC
+arch=AlderLake ADL # Twelfth Generation Intel Core
+arch=RaptorCove RPC
+arch=RaptorLake RPL # Thirteenth Generation Intel Core
+arch=RedwoodCove RWC
+arch=MeteorLake MTL
+arch=ArrowLake ARL
+arch=LunarLake LNL
+arch=SapphireRapids SPR # Fourth Generation Intel Xeon Scalable
+arch=EmeraldRapids EMR # Fifth Generation Intel Xeon Scalable
+arch=GraniteRapids GNR
arch=Silvermont SLM
arch=Goldmont GLM
arch=Tremont TNT
+arch=Gracemont GRT
+arch=Crestmont CMT
+arch=GrandRidge GRR
+arch=SierraForest SRF
+arch=ClearwaterForest CWF
#arch=KnightsLanding KNL
#arch=KnightsMill KNM
diff --git a/util/x86simdgen/3rdparty/x86simd_generate.pl b/util/x86simdgen/3rdparty/x86simd_generate.pl
index 99236b6eec..66bb1bbd9c 100755
--- a/util/x86simdgen/3rdparty/x86simd_generate.pl
+++ b/util/x86simdgen/3rdparty/x86simd_generate.pl
@@ -13,6 +13,7 @@ my %leaves = (
Leaf07_00ECX => "CPUID Leaf 7, Sub-leaf 0, ECX",
Leaf07_00EDX => "CPUID Leaf 7, Sub-leaf 0, EDX",
Leaf07_01EAX => "CPUID Leaf 7, Sub-leaf 1, EAX",
+ Leaf07_01EDX => "CPUID Leaf 7, Sub-leaf 1, EDX",
Leaf13_01EAX => "CPUID Leaf 13, Sub-leaf 1, EAX",
Leaf80000001hECX => "CPUID Leaf 80000001h, ECX",
Leaf80000008hEBX => "CPUID Leaf 80000008h, EBX",
@@ -258,7 +259,7 @@ print "\nenum X86CpuidLeaves {";
map { print " $_," } @leafNames;
print " X86CpuidMaxLeaf\n};";
-my $type = scalar %leaves > 8 ? "uint16_t" : "uint8_t";
+my $type = scalar keys %leaves > 8 ? "uint16_t" : "uint8_t";
printf "\nstatic const %s x86_locators[] = {\n",
$type, $type;
for (my $j = 0; $j < scalar @features; ++$j) {
@@ -283,7 +284,7 @@ struct X86Architecture
};
static const struct X86Architecture x86_architectures[] = {|;
-for (sort { $b <=> $a } keys %sorted_archs) {
+for (sort keys %sorted_archs) {
my $arch = $sorted_archs{$_};
next if $arch->{base} eq "<>";
printf " { cpu_%s, \"%s\" },\n", $arch->{id}, $arch->{prettyname};