diff options
author | Thiago Macieira <thiago.macieira@intel.com> | 2023-05-19 22:37:52 -0700 |
---|---|---|
committer | Thiago Macieira <thiago.macieira@intel.com> | 2023-05-26 16:22:22 -0700 |
commit | f89c4c4f8fd5afbb6a5480fe2fc13bb6dcaace47 (patch) | |
tree | 31429c50f70faf0832d2a4eca46f13048ec2ddd3 /util | |
parent | 718dae8e0be499c0c722fa3fcbd36f97aba3e22b (diff) |
qsimd_x86: update from upstream and enable a few more features
After https://github.com/opendcdiag/opendcdiag/pull/223.
Enabled for Qt:
* waitpkg
* RAO (Remote Atomic Operations)
* CMPccXADD
* avxifma
* LAM (Linear Address Masking)
Disabled:
* AVX-512 VNNI
Change-Id: I5f7f427ded124479baa6fffd1760c35ed5b2adbb
Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
Diffstat (limited to 'util')
-rw-r--r-- | util/x86simdgen/3rdparty/simd-intel.conf | 77 | ||||
-rwxr-xr-x | util/x86simdgen/3rdparty/x86simd_generate.pl | 5 |
2 files changed, 63 insertions, 19 deletions
diff --git a/util/x86simdgen/3rdparty/simd-intel.conf b/util/x86simdgen/3rdparty/simd-intel.conf index 0db4259f97..3837855c19 100644 --- a/util/x86simdgen/3rdparty/simd-intel.conf +++ b/util/x86simdgen/3rdparty/simd-intel.conf @@ -50,13 +50,13 @@ avx512vl Leaf07_00EBX 31 avx512f # AVX512 Vector Length avx512vbmi Leaf07_00ECX 1 avx512f # AVX512 Vector Byte Manipulation Instructions #pku Leaf07_00ECX 3 # Protection Keys for User mode #ospke Leaf07_00ECX 4 # Protection Keys Enabled by OS -#waitpkg Leaf07_00ECX 5 # User-Level Monitor / Wait +waitpkg Leaf07_00ECX 5 # User-Level Monitor / Wait avx512vbmi2 Leaf07_00ECX 6 avx512f # AVX512 Vector Byte Manipulation Instructions 2 shstk Leaf07_00ECX 7 # Control Flow Enforcement Technology Shadow Stack gfni Leaf07_00ECX 8 # Galois Field new instructions vaes Leaf07_00ECX 9 avx2,avx,aes # 256- and 512-bit AES #vpclmulqdq Leaf07_00ECX 10 avx # 256- and 512-bit Carryless Multiply -avx512vnni Leaf07_00ECX 11 avx512f # AVX512 Vector Neural Network Instructions +#avx512vnni Leaf07_00ECX 11 avx512f # AVX512 Vector Neural Network Instructions avx512bitalg Leaf07_00ECX 12 avx512f # AVX512 Bit Algorithms avx512vpopcntdq Leaf07_00ECX 14 avx512f # AVX512 Population Count #la57 Leaf07_00ECX 16 # 5-level page tables @@ -78,16 +78,24 @@ hybrid Leaf07_00EDX 15 # Hybrid processor ibt Leaf07_00EDX 20 # Control Flow Enforcement Technology Indirect Branch Tracking #amxbf16 Leaf07_00EDX 22 amxtile # AMX Tile multiplication in BFloat16 avx512fp16 Leaf07_00EDX 23 avx512f,f16c # AVX512 16-bit Floating Point -#amxtile Leaf07_00EDX 24 # Advanced Matrix Extensions Tile support -#amxint8 Leaf07_00EDX 25 amxtile # AMX Tile multiplication for Int8 +#amx-tile Leaf07_00EDX 24 # Advanced Matrix Extensions Tile support +#amx-int8 Leaf07_00EDX 25 amx-tile # AMX Tile multiplication for Int8 +raoint Leaf07_01EAX 3 # Remote Atomic Operations, Integer #avxvnni Leaf07_01EAX 4 avx # AVX (VEX-encoded) versions of the Vector Neural Network Instructions #avx512bf16 Leaf07_01EAX 5 avx512f # AVX512 Brain Float16 +cmpccxadd Leaf07_01EAX 6 # CMPccXADD instructions #zlmovsb Leaf07_01EAX 10 # Zero-length MOVSB #fsrs Leaf07_01EAX 11 # Fast Short (REP?) STOSB #fsrc Leaf07_01EAX 12 # Fast Short (REP?) CMPSB, SCASB #fred Leaf07_01EAX 17 # Flexible Return and Event Delivery #lkgs Leaf07_01EAX 18 # Load into Kernel GS -#lam Leaf07_01EAX 26 # Linear Address Masking +#amx-fp16 Leaf07_01EAX 21 amx-tile # AMX Tile multiplication in FP16 +avxifma Leaf07_01EAX 23 avx # AVX-IFMA instructions +lam Leaf07_01EAX 26 # Linear Address Masking +#avxvnniint8 Leaf07_01EDX 4 avx # AVX Vector Neural Network Instructions, Int8 +#avxneconvert Leaf07_01EDX 5 avx # AVX Non-Exception BF16/FP16/FP32 Conversion instructions +#amx-complex Leaf07_01EDX 8 amx-tile # AMX Complex Matrix multiplication +#prefetchiti Leaf07_01EDX 14 # PREFETCHIT0/1 instructions #xsaveopt Leaf13_01EAX 0 # Optimized XSAVE #xsavec Leaf13_01EAX 1 # XSAVE with Compaction #xgetbv1 Leaf13_01EAX 2 # XGETBV with ECX=1 @@ -122,12 +130,12 @@ xsave=AvxState SseState|Ymm_Hi128 avx,fma,avx512f xsave=MPXState Bndregs|Bndcsr mpx xsave=Avx512State AvxState|OpMask|Zmm_Hi256|Hi16_Zmm avx512f xsave=CetState CetUState|CetSState shstk -xsave=AmxState Xtilecfg|Xtiledata amxtile +xsave=AmxState Xtilecfg|Xtiledata amx-tile # Processor/arch listing below this line # Source: Intel Instruction Set Extension manual, section 1.2 # Source: GCC gcc/config/i386/i386.h, i386-c.c, i386-builtins.c -# Architecture Based on New features Optional features +# Architecture Based on New features arch=x86_64 <> sse2 # Core line arch=Core2 x86_64 sse3,ssse3,cx16 @@ -135,26 +143,44 @@ arch=NHM Core2 sse4.1,sse4.2,popcnt arch=WSM NHM arch=SNB WSM avx arch=IVB SNB f16c,rdrnd,fsgsbase -arch=HSW IVB avx2,fma,bmi,bmi2,lzcnt,movbe +arch=HSW IVB avx2,fma,bmi,bmi2,lzcnt,movbe # hle,rtm arch=BDW HSW adx,rdseed arch=BDX BDW arch=SKL BDW xsavec,xsaves -arch=ADL SKL avxvnni,gfni,vaes,vpclmulqdq,serialize,shstk,cldemote,movdiri,movdir64b,ibt,waitpkg,keylocker rdpid -arch=SKX SKL avx512f,avx512dq,avx512cd,avx512bw,avx512vl clwb +arch=SKX SKL avx512f,avx512dq,avx512cd,avx512bw,avx512vl #clwb arch=CLX SKX avx512vnni arch=CPX CLX avx512bf16 -arch=CNL SKX avx512ifma,avx512vbmi sha -arch=ICL CNL avx512vbmi2,gfni,vaes,vpclmulqdq,avx512vnni,avx512bitalg,avx512vpopcntdq fsrm,rdpid -arch=ICX ICL pconfig -arch=TGL ICL avx512vp2intersect,shstk,,movdiri,movdir64b,ibt,keylocker -arch=SPR TGL avx512bf16,amxtile,amxbf16,amxint8,avxvnni,cldemote,pconfig,waitpkg,serialize,tsxldtrk,uintr +arch=PLC SKX avx512ifma,avx512vbmi #sha +arch=SNC PLC avx512vbmi2,gfni,vaes,vpclmulqdq,avx512vnni,avx512bitalg,avx512vpopcntdq #fsrm,rdpid +arch=WLC SNC shstk,movdiri,movdir64b,ibt,keylocker # avx512vp2intersect +arch=GLC WLC avx512bf16,avxvnni,cldemote,waitpkg,serialize,uintr # tsxldtrk +arch=RPC GLC +arch=RWC RPC prefetchiti # Atom line arch=SLM WSM rdrnd,movbe arch=GLM SLM fsgsbase,rdseed,lzcnt,xsavec,xsaves arch=TNT GLM clwb,gfni,cldemote,waitpkg,movdiri,movdir64b +arch=GRT SKL avxvnni,gfni,vaes,vpclmulqdq,serialize,shstk,cldemote,movdiri,movdir64b,ibt,waitpkg,keylocker # rdpid +arch=CMT GRT cmpccxadd,avxifma,avxneconvert,avxvnniint8 # Xeon Phi line #arch=KNL SKL avx512f,avx512er,avx512pf,avx512cd #arch=KNM KNL avx5124fmaps,avx5124vnniw,avx512vpopcntdq + # Hybrids and other names +arch=CNL PLC +arch=ICL SNC +arch=TGL WLC +arch=ADL GRT +arch=RPL GRT +arch=MTL CMT +arch=ARL CMT +arch=LNL CMT +arch=ICX SNC pconfig +arch=SPR GLC pconfig,amx-tile,amx-bf16,amx-int8 +arch=EMR SPR +arch=GNR GLC pconfig,amx-tile,amx-bf16,amx-int8,amx-fp16,amx-complex +arch=SRF CMT cmpccxadd,avxifma,avxneconvert,avxvnniint8 +arch=GRR SRF raoint +arch=CWF SRF # Longer names arch=Nehalem NHM # Intel Core i3/i5/i7 arch=Westmere WSM # Intel Core i3/i5/i7 @@ -166,14 +192,31 @@ arch=Skylake SKL # Sixth Generation Intel Core i3/i5/i7 arch=Skylake-Avx512 SKX # Intel Xeon Scalable arch=CascadeLake CLX # Second Generation Intel Xeon Scalable arch=CooperLake CPX # Third Generation Intel Xeon Scalable +arch=PalmCove PLC arch=CannonLake CNL # Intel Core i3-8121U +arch=SunnyCove SNC arch=IceLake-Client ICL # Tenth Generation Intel Core i3/i5/i7 arch=IceLake-Server ICX # Third Generation Intel Xeon Scalable -arch=AlderLake ADL -arch=SapphireRapids SPR +arch=WillowCove WLC arch=TigerLake TGL # Eleventh Generation Intel Core i3/i5/i7 +arch=GoldenCove GLC +arch=AlderLake ADL # Twelfth Generation Intel Core +arch=RaptorCove RPC +arch=RaptorLake RPL # Thirteenth Generation Intel Core +arch=RedwoodCove RWC +arch=MeteorLake MTL +arch=ArrowLake ARL +arch=LunarLake LNL +arch=SapphireRapids SPR # Fourth Generation Intel Xeon Scalable +arch=EmeraldRapids EMR # Fifth Generation Intel Xeon Scalable +arch=GraniteRapids GNR arch=Silvermont SLM arch=Goldmont GLM arch=Tremont TNT +arch=Gracemont GRT +arch=Crestmont CMT +arch=GrandRidge GRR +arch=SierraForest SRF +arch=ClearwaterForest CWF #arch=KnightsLanding KNL #arch=KnightsMill KNM diff --git a/util/x86simdgen/3rdparty/x86simd_generate.pl b/util/x86simdgen/3rdparty/x86simd_generate.pl index 99236b6eec..66bb1bbd9c 100755 --- a/util/x86simdgen/3rdparty/x86simd_generate.pl +++ b/util/x86simdgen/3rdparty/x86simd_generate.pl @@ -13,6 +13,7 @@ my %leaves = ( Leaf07_00ECX => "CPUID Leaf 7, Sub-leaf 0, ECX", Leaf07_00EDX => "CPUID Leaf 7, Sub-leaf 0, EDX", Leaf07_01EAX => "CPUID Leaf 7, Sub-leaf 1, EAX", + Leaf07_01EDX => "CPUID Leaf 7, Sub-leaf 1, EDX", Leaf13_01EAX => "CPUID Leaf 13, Sub-leaf 1, EAX", Leaf80000001hECX => "CPUID Leaf 80000001h, ECX", Leaf80000008hEBX => "CPUID Leaf 80000008h, EBX", @@ -258,7 +259,7 @@ print "\nenum X86CpuidLeaves {"; map { print " $_," } @leafNames; print " X86CpuidMaxLeaf\n};"; -my $type = scalar %leaves > 8 ? "uint16_t" : "uint8_t"; +my $type = scalar keys %leaves > 8 ? "uint16_t" : "uint8_t"; printf "\nstatic const %s x86_locators[] = {\n", $type, $type; for (my $j = 0; $j < scalar @features; ++$j) { @@ -283,7 +284,7 @@ struct X86Architecture }; static const struct X86Architecture x86_architectures[] = {|; -for (sort { $b <=> $a } keys %sorted_archs) { +for (sort keys %sorted_archs) { my $arch = $sorted_archs{$_}; next if $arch->{base} eq "<>"; printf " { cpu_%s, \"%s\" },\n", $arch->{id}, $arch->{prettyname}; |