diff options
author | Thiago Macieira <thiago.macieira@intel.com> | 2022-01-28 11:31:37 -0800 |
---|---|---|
committer | Thiago Macieira <thiago.macieira@intel.com> | 2022-02-18 15:46:41 -0800 |
commit | b852584556bec3750bad7fac984b6fc5af4c870f (patch) | |
tree | 57aa51a33b976c65a464cfc2141ad6beed3dca36 /util | |
parent | 05428d9b97669f962cc3a9991a43f9a870e144b5 (diff) |
qsimd: update the generator script from OpenDCDiag
I'd been making changes to that and improving it for the past 2 years
without bringing it back into Qt.
The list of features is mostly the same, except:
- removed TSX features
- removed features specific to Xeon Phi processors
- added CET and AVX512FP16 features
- added the bit for hybrid CPU detection
See matching update at https://github.com/opendcdiag/opendcdiag/pull/49
Change-Id: I6fcda969a9e9427198bffffd16ce860b5a38aece
Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
Diffstat (limited to 'util')
-rw-r--r-- | util/x86simdgen/.gitignore | 1 | ||||
-rw-r--r-- | util/x86simdgen/3rdparty/simd-amd.conf | 35 | ||||
-rw-r--r-- | util/x86simdgen/3rdparty/simd-intel.conf | 179 | ||||
-rwxr-xr-x | util/x86simdgen/3rdparty/x86simd_generate.pl | 329 | ||||
-rw-r--r-- | util/x86simdgen/Makefile | 19 | ||||
-rw-r--r-- | util/x86simdgen/README.md | 13 | ||||
-rwxr-xr-x | util/x86simdgen/generate.pl | 277 | ||||
-rw-r--r-- | util/x86simdgen/header | 39 | ||||
-rw-r--r-- | util/x86simdgen/simd.txt | 37 |
9 files changed, 615 insertions, 314 deletions
diff --git a/util/x86simdgen/.gitignore b/util/x86simdgen/.gitignore new file mode 100644 index 0000000000..e9c3a83dde --- /dev/null +++ b/util/x86simdgen/.gitignore @@ -0,0 +1 @@ +qsimd_x86_p.h diff --git a/util/x86simdgen/3rdparty/simd-amd.conf b/util/x86simdgen/3rdparty/simd-amd.conf new file mode 100644 index 0000000000..dd7c214207 --- /dev/null +++ b/util/x86simdgen/3rdparty/simd-amd.conf @@ -0,0 +1,35 @@ +# -*- mode: conf; indent-tabs-mode: t -*- +# Feature CPUID function Bit Required feature +#mmxext Leaf80000001hEDX 22 # AMD extensions to MMX +#rdtscp Leaf80000001hEDX 27 # RDTSCP instruction +#3dnow Leaf80000001hEDX 31 # 3DNow! instructions +#3dnowext Leaf80000001hEDX 30 # AMD extensions to 3DNow! +lzcnt Leaf80000001hECX 5 # Leading Zero Count +sse4a Leaf80000001hECX 6 # SSE4a +xop Leaf80000001hECX 11 # eXtended Operations +fma4 Leaf80000001hECX 16 # 4-operand Fused Multiply-Add +tbm Leaf80000001hECX 21 # Trailing Bit Manipulation +clzero Leaf80000008hEBX 0 # Cacheline clear and write zero +wbnoinvd Leaf80000008hEBX 9 # Write Back with No Invalidate + +# Processor/arch listing below this line +# Source: GCC gcc/common/config/i386/i386-common.c +# Source: Wikipedia +# Architecture Based on New features +arch=AmdFam10h x86_64 sse3,sse4a,cx16,popcnt,lzcnt # AMD K10 +arch=BtVer1 AmdFam10h xsave # AMD Bobcat v1 +arch=BtVer2 BtVer1 ssse3,sse4.1,sse4.2,avx,bmi,f16c,movbe,xsaveopt # AMD Bobcat v2 +arch=BdVer1 BtVer1 ssse3,sse4.1,sse4.2,avx,xop,fma4 +arch=BdVer2 BdVer1 bmi,f16c,fma,tbm +arch=BdVer3 BdVer2 fsgsbase,xsaveopt +arch=BdVer4 BdVer3 avx2,bmi2,rdrnd,movbe +arch=ZnVer1 BdVer4 adx,rdseed,clzero,clfushopt,xsavec,xsaves +arch=ZnVer2 ZnVer1 clwb,wbnoinvd + +arch=Barcelona AmdFam10h +arch=Bulldozer BdVer1 # AMD Bulldozer +arch=Piledriver BdVer2 # AMD Bulldozer v2 (Piledriver) +arch=Steamroller BdVer3 # AMD Bulldozer v3 (Steamroller) +arch=Excavator BdVer4 # AMD Bulldozer v4 (Excavator) +arch=Zen ZnVer1 # AMD Zen +arch=Zen2 ZnVer2 # AMD Zen2 diff --git a/util/x86simdgen/3rdparty/simd-intel.conf b/util/x86simdgen/3rdparty/simd-intel.conf new file mode 100644 index 0000000000..0db4259f97 --- /dev/null +++ b/util/x86simdgen/3rdparty/simd-intel.conf @@ -0,0 +1,179 @@ +# -*- mode: conf; indent-tabs-mode: t -*- +# Feature CPUID function Bit Required feature +#sep Leaf01EDX 11 # Sysenter/sysexit +#cmov Leaf01EDX 15 # Conditional Move +#clflush Leaf01EDX 19 # Cache-Line Flush +#mmx Leaf01EDX 23 # Multi Media Extensions +#fxsr Leaf01EDX 24 # FXSAVE instruction +#sse Leaf01EDX 25 # Streaming SIMD Extensions +sse2 Leaf01EDX 26 # Streaming SIMD Extensions 2 +# -- everything above this line is mandatory on x86-64 -- +sse3 Leaf01ECX 0 # Streaming SIMD Extensions 3 +#pclmul Leaf01ECX 1 # Carryless Multiply +ssse3 Leaf01ECX 9 # Supplemental Streaming SIMD Extensions 3 +fma Leaf01ECX 12 # Fused Multiply-Add +#cx16 Leaf01ECX 13 # Compare-Exchange 16 bytes +sse4.1 Leaf01ECX 19 # Streaming SIMD Extensions 4.1 +sse4.2 Leaf01ECX 20 # Streaming SIMD Extensions 4.2 +movbe Leaf01ECX 22 # MOV Big Endian +popcnt Leaf01ECX 23 # Population count +aes Leaf01ECX 25 sse4.2 # Advenced Encryption Standard +#xsave Leaf01ECX 26 # XSAVE, XGETBV instructions +#osxsave Leaf01ECX 27 # XSAVE enabled by OS +avx Leaf01ECX 28 # Advanced Vector Extensions +f16c Leaf01ECX 29 avx # 16-bit Float Conversion +rdrnd Leaf01ECX 30 # Random number generator +#hypervisor Leaf01ECX 31 # Running on a hypervisor +#fsgsbase Leaf07_00EBX 0 # FS/GS base access +bmi Leaf07_00EBX 3 # Bit Manipulation Instructions +#hle Leaf07_00EBX 4 # Hardware Lock Ellision +avx2 Leaf07_00EBX 5 avx # Advanced Vector Extensions 2 +bmi2 Leaf07_00EBX 8 # Bit Manipulation Instructions 2 +#erms Leaf07_00EBX 9 # Enhanced REP MOVSB/STOSB +#rtm Leaf07_00EBX 11 # Restricted Transactional Memory +#rdt_m Leaf07_00EBX 12 # Resource Director Technology (RDT) Monitoring +#mpx Leaf07_00EBX 14 # Memory Protection Extensions +#rdt_a Leaf07_00EBX 12 # Resource Director Technology (RDT) Allocation +avx512f Leaf07_00EBX 16 avx # AVX512 Foundation +avx512dq Leaf07_00EBX 17 avx512f # AVX512 Double & Quadword +rdseed Leaf07_00EBX 18 # Random number generator for seeding +#adx Leaf07_00EBX 19 # Multi-Precision Add-Carry +avx512ifma Leaf07_00EBX 21 avx512f # AVX512 Integer Fused Multiply-Add +#clflushopt Leaf07_00EBX 23 # Cache-Fline Flush Optimized +#clwb Leaf07_00EBX 24 # Cache-Line Write Back +#avx512pf Leaf07_00EBX 26 avx512f # AVX512 Prefetch +#avx512er Leaf07_00EBX 27 avx512f # AVX512 Exponential & Reciprocal +avx512cd Leaf07_00EBX 28 avx512f # AVX512 Conflict Detection +sha Leaf07_00EBX 29 # SHA-1 and SHA-256 instructions +avx512bw Leaf07_00EBX 30 avx512f # AVX512 Byte & Word +avx512vl Leaf07_00EBX 31 avx512f # AVX512 Vector Length +avx512vbmi Leaf07_00ECX 1 avx512f # AVX512 Vector Byte Manipulation Instructions +#pku Leaf07_00ECX 3 # Protection Keys for User mode +#ospke Leaf07_00ECX 4 # Protection Keys Enabled by OS +#waitpkg Leaf07_00ECX 5 # User-Level Monitor / Wait +avx512vbmi2 Leaf07_00ECX 6 avx512f # AVX512 Vector Byte Manipulation Instructions 2 +shstk Leaf07_00ECX 7 # Control Flow Enforcement Technology Shadow Stack +gfni Leaf07_00ECX 8 # Galois Field new instructions +vaes Leaf07_00ECX 9 avx2,avx,aes # 256- and 512-bit AES +#vpclmulqdq Leaf07_00ECX 10 avx # 256- and 512-bit Carryless Multiply +avx512vnni Leaf07_00ECX 11 avx512f # AVX512 Vector Neural Network Instructions +avx512bitalg Leaf07_00ECX 12 avx512f # AVX512 Bit Algorithms +avx512vpopcntdq Leaf07_00ECX 14 avx512f # AVX512 Population Count +#la57 Leaf07_00ECX 16 # 5-level page tables +#rdpid Leaf07_00ECX 22 # RDPID instruction +#cldemote Leaf07_00ECX 25 # Cache Line Demotion +#movdiri Leaf07_00ECX 27 # Move Direct-store Integer +#movdir64b Leaf07_00ECX 28 # Move Direct-store 64 bytes +#enqcmd Leaf07_00ECX 29 # Enqueue Command +#pks Leaf07_00ECX 31 # Protection Keys for Supervisor mode +#avx5124nniw Leaf07_00EDX 2 avx512f # AVX512 4-iteration Vector Neural Network Instructions +#avx5124fmaps Leaf07_00EDX 3 avx512f # AVX512 4-iteration Fused Multiply Accumulation +#fsrm Leaf07_00EDX 4 # Fast Short REP MOV +#uintr Leaf07_00EDX 5 # User interrupts +#avx512vp2intersect Leaf07_00EDX 8 avx512f # AVX512 Intersection computation +#serialize Leaf07_00EDX 14 # SERIALIZE instruction +hybrid Leaf07_00EDX 15 # Hybrid processor +#tsxldtrk Leaf07_00EDX 16 # TDX (RTM) Suspend Load Address Tracking +#pconfig Leaf07_00EDX 18 # Platform configuration +ibt Leaf07_00EDX 20 # Control Flow Enforcement Technology Indirect Branch Tracking +#amxbf16 Leaf07_00EDX 22 amxtile # AMX Tile multiplication in BFloat16 +avx512fp16 Leaf07_00EDX 23 avx512f,f16c # AVX512 16-bit Floating Point +#amxtile Leaf07_00EDX 24 # Advanced Matrix Extensions Tile support +#amxint8 Leaf07_00EDX 25 amxtile # AMX Tile multiplication for Int8 +#avxvnni Leaf07_01EAX 4 avx # AVX (VEX-encoded) versions of the Vector Neural Network Instructions +#avx512bf16 Leaf07_01EAX 5 avx512f # AVX512 Brain Float16 +#zlmovsb Leaf07_01EAX 10 # Zero-length MOVSB +#fsrs Leaf07_01EAX 11 # Fast Short (REP?) STOSB +#fsrc Leaf07_01EAX 12 # Fast Short (REP?) CMPSB, SCASB +#fred Leaf07_01EAX 17 # Flexible Return and Event Delivery +#lkgs Leaf07_01EAX 18 # Load into Kernel GS +#lam Leaf07_01EAX 26 # Linear Address Masking +#xsaveopt Leaf13_01EAX 0 # Optimized XSAVE +#xsavec Leaf13_01EAX 1 # XSAVE with Compaction +#xgetbv1 Leaf13_01EAX 2 # XGETBV with ECX=1 +#xsaves Leaf13_01EAX 3 # XSAVE Supervisor mode +#xfd Leaf13_01EAX 4 # eXtended Feature Disable MSR +#lzcnt Leaf80000001hECX 5 # Leading Zero Count + +# XSAVE states +# Source: Intel Software Development Manual, Volume 1, Chapter 13 +# Source: Intel Instruction Set Extensions Manual (ed. 041), Chapter 3, "Intel AMX Instruction Set" +# Grouping Value Required for +xsave=X87 0x0001 # X87 and MMX state +xsave=SseState 0x0002 sse # SSE: 128 bits of XMM registers +xsave=Ymm_Hi128 0x0004 # AVX: high 128 bits in YMM registers +xsave=Bndregs 0x0008 # Memory Protection Extensions +xsave=Bndcsr 0x0010 # Memory Protection Extensions +xsave=OpMask 0x0020 # AVX512: k0 through k7 +xsave=Zmm_Hi256 0x0040 # AVX512: high 256 bits of ZMM0-15 +xsave=Hi16_Zmm 0x0080 # AVX512: all 512 bits of ZMM16-31 +xsave=PTState 0x0100 # Processor Trace +xsave=PKRUState 0x0200 pku # Protection Key +# ??? 0x0400 +xsave=CetUState 0x0800 # CET: user mode +xsave=CetSState 0x1000 # CET: supervisor mode +xsave=HdcState 0x2000 # Hardware Duty Cycle +xsave=UintrState 0x4000 uintr # User Interrupts +# ??? 0x8000 +xsave=HwpState 0x10000 # Hardware P-State +xsave=Xtilecfg 0x20000 # AMX: XTILECFG register +xsave=Xtiledata 0x40000 # AMX: data in the tiles +xsave=AvxState SseState|Ymm_Hi128 avx,fma,avx512f +xsave=MPXState Bndregs|Bndcsr mpx +xsave=Avx512State AvxState|OpMask|Zmm_Hi256|Hi16_Zmm avx512f +xsave=CetState CetUState|CetSState shstk +xsave=AmxState Xtilecfg|Xtiledata amxtile + +# Processor/arch listing below this line +# Source: Intel Instruction Set Extension manual, section 1.2 +# Source: GCC gcc/config/i386/i386.h, i386-c.c, i386-builtins.c +# Architecture Based on New features Optional features +arch=x86_64 <> sse2 + # Core line +arch=Core2 x86_64 sse3,ssse3,cx16 +arch=NHM Core2 sse4.1,sse4.2,popcnt +arch=WSM NHM +arch=SNB WSM avx +arch=IVB SNB f16c,rdrnd,fsgsbase +arch=HSW IVB avx2,fma,bmi,bmi2,lzcnt,movbe +arch=BDW HSW adx,rdseed +arch=BDX BDW +arch=SKL BDW xsavec,xsaves +arch=ADL SKL avxvnni,gfni,vaes,vpclmulqdq,serialize,shstk,cldemote,movdiri,movdir64b,ibt,waitpkg,keylocker rdpid +arch=SKX SKL avx512f,avx512dq,avx512cd,avx512bw,avx512vl clwb +arch=CLX SKX avx512vnni +arch=CPX CLX avx512bf16 +arch=CNL SKX avx512ifma,avx512vbmi sha +arch=ICL CNL avx512vbmi2,gfni,vaes,vpclmulqdq,avx512vnni,avx512bitalg,avx512vpopcntdq fsrm,rdpid +arch=ICX ICL pconfig +arch=TGL ICL avx512vp2intersect,shstk,,movdiri,movdir64b,ibt,keylocker +arch=SPR TGL avx512bf16,amxtile,amxbf16,amxint8,avxvnni,cldemote,pconfig,waitpkg,serialize,tsxldtrk,uintr + # Atom line +arch=SLM WSM rdrnd,movbe +arch=GLM SLM fsgsbase,rdseed,lzcnt,xsavec,xsaves +arch=TNT GLM clwb,gfni,cldemote,waitpkg,movdiri,movdir64b + # Xeon Phi line +#arch=KNL SKL avx512f,avx512er,avx512pf,avx512cd +#arch=KNM KNL avx5124fmaps,avx5124vnniw,avx512vpopcntdq + # Longer names +arch=Nehalem NHM # Intel Core i3/i5/i7 +arch=Westmere WSM # Intel Core i3/i5/i7 +arch=SandyBridge SNB # Second Generation Intel Core i3/i5/i7 +arch=IvyBridge IVB # Third Generation Intel Core i3/i5/i7 +arch=Haswell HSW # Fourth Generation Intel Core i3/i5/i7 +arch=Broadwell BDW # Fifth Generation Intel Core i3/i5/i7 +arch=Skylake SKL # Sixth Generation Intel Core i3/i5/i7 +arch=Skylake-Avx512 SKX # Intel Xeon Scalable +arch=CascadeLake CLX # Second Generation Intel Xeon Scalable +arch=CooperLake CPX # Third Generation Intel Xeon Scalable +arch=CannonLake CNL # Intel Core i3-8121U +arch=IceLake-Client ICL # Tenth Generation Intel Core i3/i5/i7 +arch=IceLake-Server ICX # Third Generation Intel Xeon Scalable +arch=AlderLake ADL +arch=SapphireRapids SPR +arch=TigerLake TGL # Eleventh Generation Intel Core i3/i5/i7 +arch=Silvermont SLM +arch=Goldmont GLM +arch=Tremont TNT +#arch=KnightsLanding KNL +#arch=KnightsMill KNM diff --git a/util/x86simdgen/3rdparty/x86simd_generate.pl b/util/x86simdgen/3rdparty/x86simd_generate.pl new file mode 100755 index 0000000000..a07f858dcc --- /dev/null +++ b/util/x86simdgen/3rdparty/x86simd_generate.pl @@ -0,0 +1,329 @@ +#!/usr/bin/env perl + +# SPDX-License-Identifier: Apache-2.0 + +use strict; +$\ = "\n"; +$/ = "\n"; +my $debug = 0; +my %leaves = ( + Leaf01ECX => "CPUID Leaf 1, ECX", + Leaf07_00EBX => "CPUID Leaf 7, Sub-leaf 0, EBX", + Leaf07_00ECX => "CPUID Leaf 7, Sub-leaf 0, ECX", + Leaf07_00EDX => "CPUID Leaf 7, Sub-leaf 0, EDX", + Leaf07_01EAX => "CPUID Leaf 7, Sub-leaf 1, EAX", + Leaf13_01EAX => "CPUID Leaf 13, Sub-leaf 1, EAX", + Leaf80000001hECX => "CPUID Leaf 80000001h, ECX", + Leaf80000008hEBX => "CPUID Leaf 80000008h, EBX", +); +my @leafNames = sort keys %leaves; + +# out of order (we want it first) +unshift @leafNames, "Leaf01EDX"; +$leaves{Leaf01EDX} = "CPUID Leaf 1, EDX"; + +# Read input from file specified by first argument +my $input_conf_file = shift @ARGV; +open(FH, '<', $input_conf_file) or die $!; + +my $i = 0; +my @features; +my @architecture_names; +my %architectures; +my @xsaveStates; +my $maxarchnamelen = 0; +while (<FH>) { + chomp $_; + m/#\s*(.*)\s*/; + my $comment = $1; + + s/#.*$//; + s/^\s+//; + next if $_ eq ""; + + if (s/^arch=//) { + my ($arch, $based, $f) = split /\s+/; + die("Unknown base architecture \"$based\"") + unless $based eq "<>" or grep {$_ eq $based} @architecture_names; + my $id = lc($arch); + $id =~ s/[^A-Za-z0-9_]/_/g; + + my $prettyname = $arch; + $prettyname =~ s/\B([A-Z])/ $1/g; + $prettyname =~ s/-(\w+)/ ($1)/g; + $maxarchnamelen = length($prettyname) if length($prettyname) > $maxarchnamelen; + + my @basefeatures; + my @extrafeatures; + @basefeatures = @{$architectures{$based}->{allfeatures}} if $based ne "<>"; + @extrafeatures = @{$architectures{$arch}{features}} if defined($architectures{$arch}); + @extrafeatures = (@extrafeatures, split(',', $f)); + my @allfeatures = sort (@basefeatures, @extrafeatures); + + $architectures{$arch} = { + name => $arch, + prettyname => $prettyname, + id => $id, + base => $based, + features => \@extrafeatures, + allfeatures => \@allfeatures, + comment => $comment + }; + push @architecture_names, $arch + unless grep {$_ eq $arch} @architecture_names; + } elsif (s/^xsave=//) { + my ($name, $value, $required) = split /\s+/; + push @xsaveStates, + { id => $name, value => $value, required_for => $required, comment => $comment }; + } else { + my ($name, $function, $bit, $depends) = split /\s+/; + die("Unknown CPUID function \"$function\"") + unless grep {$_ eq $function} @leafNames; + if (my @match = grep { $_->{name} eq $name } @features) { + die("internal error") if scalar @match != 1; + next if $match[0]->{function} eq $function && + $match[0]->{bit} eq $bit && $match[0]->{depends} eq $depends; + die("Duplicate feature \"$name\" with different details. " . + "Previously was $match[0]->{function} bit $match[0]->{bit}."); + } + + my $id = uc($name); + $id =~ s/[^A-Z0-9_]/_/g; + push @features, + { name => $name, depends => $depends, id => $id, bit => $bit, leaf => $function, comment => $comment }; + ++$i; + die("Too many features to fit a 64-bit integer") if $i > 64; + } +} +close FH; + +# Print the header output +my $headername = ""; +my $headerguard = ""; +if ($headername = shift @ARGV) { + + $headerguard = uc($headername); + $headerguard =~ s/[^A-Z0-9_]/_/g; + + print qq|// This is a generated file. DO NOT EDIT. +// Please see $0 +#ifndef $headerguard +#define $headerguard + +#include <stdint.h>|; +} else { + $debug = 1; +} + +# Print the feature list +my $lastleaf; +for (my $i = 0; $i < scalar @features; ++$i) { + my $feature = $features[$i]; + # Leaf header: + printf "\n// in %s:\n", $leaves{$feature->{leaf}} + if $feature->{leaf} ne $lastleaf; + $lastleaf = $feature->{leaf}; + + # Feature + printf "#define cpu_feature_%-31s (UINT64_C(1) << %d)\n", lc($feature->{id}), $i; + + # Feature string names for Clang and GCC + my $str = $feature->{name} . ',' . $feature->{depends}; + $str =~ s/,$//; + printf "#define QT_FUNCTION_TARGET_STRING_%-17s \"%s\"\n", + $feature->{id}, $str; +} + +# Print the architecture list +print "\n// CPU architectures"; +for (@architecture_names) { + my $arch = $architectures{$_}; + my $base = $arch->{base}; + if ($base eq "<>") { + $base = "0"; + } else { + $base =~ s/[^A-Za-z0-9_]/_/g; + $base = "cpu_" . $base; + } + + printf "#define cpu_%-19s (%s", lc($arch->{id}), lc($base); + + for my $f (@{$arch->{features}}) { + my @match = grep { $_->{name} eq $f } @features; + if (scalar @match == 1) { + printf " \\\n%33s| cpu_feature_%s", " ", lc($match[0]->{id}); + } else { + printf STDERR "%s: unknown feature '%s' for CPU '%s'\n", $0, $f, $arch->{name} + if $debug; + } + } + print ")"; +} + +print q{ +static const uint64_t _compilerCpuFeatures = 0}; + +# And print the compiler-enabled features part: +for (my $i = 0; $i < scalar @features; ++$i) { + my $feature = $features[$i]; + printf + "#ifdef __%s__\n" . + " | cpu_feature_%s\n" . + "#endif\n", + $feature->{id}, lc($feature->{id}); +} + +print ' ;'; +if ($headerguard ne "") { + print q| +#if (defined __cplusplus) && __cplusplus >= 201103L +enum X86CpuFeatures : uint64_t {|; + + for (@features) { + my $line = sprintf "CpuFeature%s = cpu_feature_%s,", $_->{id}, lc($_->{id}); + if ($_->{comment} ne "") { + printf " %-56s ///< %s\n", $line, $_->{comment}; + } else { + print " $line"; + } + } + +print qq|}; // enum X86CpuFeatures + +enum X86CpuArchitectures : uint64_t {|; + + for (@architecture_names) { + my $arch = $architectures{$_}; + my $name = $arch->{name}; + $name =~ s/[^A-Za-z0-9]//g; + my $line = sprintf "CpuArch%s = cpu_%s,", $name, lc($arch->{id}); + if ($arch->{comment} ne "") { + printf " %-56s ///< %s\n", $line, $arch->{comment}; + } else { + print " $line"; + } + } + + print qq|}; // enum X86cpuArchitectures +#endif /* C++11 */\n|; +}; + +print "// -- implementation start --\n"; +# Now generate the string table and bit-location array +my $offset = 0; +my @offsets; +print "static const char features_string[] ="; +for my $feature (@features) { + print " \" $feature->{name}\\0\""; + push @offsets, $offset; + $offset += 2 + length($feature->{name}); +} +print " \"\\0\";"; + +# Print the string offset table +printf "\nstatic const %s features_indices[] = {", + $offset > 255 ? "uint16_t" : "uint8_t"; +for (my $j = 0; $j < scalar @offsets; ++$j) { + printf "%s%3d,", + $j % 8 ? " " : "\n ", $offsets[$j]; +} +print "\n};"; + +# Print the locator enum and table +print "\nenum X86CpuidLeaves {"; +map { print " $_," } @leafNames; +print " X86CpuidMaxLeaf\n};"; + +my $type = scalar %leaves > 8 ? "uint16_t" : "uint8_t"; +printf "\nstatic const %s x86_locators[] = {\n", + $type, $type; +for (my $j = 0; $j < scalar @features; ++$j) { + my $feature = $features[$j]; + printf " %s*32 + %2d, %s// %s\n", + $feature->{leaf}, $feature->{bit}, ' ' x (24 - length($feature->{leaf})), $feature->{name}; +} +print '};'; + +# Generate the processor name listing, sorted by feature length +my %sorted_archs; +for (@architecture_names) { + my $arch = $architectures{$_}; + my $key = sprintf "%02d_%s", scalar(@{$arch->{allfeatures}}), join(',', @{$arch->{allfeatures}}); + $sorted_archs{$key} = $arch; +} +print qq| +struct X86Architecture +{ + uint64_t features; + char name[$maxarchnamelen + 1]; +}; + +static const struct X86Architecture x86_architectures[] = {|; +for (sort { $b <=> $a } keys %sorted_archs) { + my $arch = $sorted_archs{$_}; + next if $arch->{base} eq "<>"; + printf " { cpu_%s, \"%s\" },\n", $arch->{id}, $arch->{prettyname}; +} +print "};"; + +# Produce the list of XSAVE states +print "\nenum XSaveBits {"; +my $xsaveEnumPrefix = "XSave_"; +for my $state (@xsaveStates) { + my $value = $state->{value}; + unless ($value =~ /^0x/) { + # Compound value + $value = join(" | ", map { $xsaveEnumPrefix . $_ } split(/\|/, $value)); + } + printf " %s%-12s = %s,", $xsaveEnumPrefix, $state->{id}, $value; + printf "%s// %s", ' ' x (18 - length($value)), $state->{comment} + if $state->{comment} ne ''; + printf "\n"; +}; +print "};"; + +# Produce a list of features require extended XSAVE state +my $xsaveRequirementMapping; +for my $state (@xsaveStates) { + my $xsaveReqPrefix = "XSaveReq_"; + my @required_for = split /,/, $state->{required_for}; + next unless scalar @required_for; + + my $prefix = sprintf "\n// List of features requiring %s%s\nstatic const uint64_t %s%s = 0", + $xsaveEnumPrefix, $state->{id}, $xsaveReqPrefix, $state->{id}; + + # match either the feature name or one of its requirements against list + # of features that this state is required for + for my $feature (@features) { + my $id = lc($feature->{id}); + my $required = 0; + for my $requirement (@required_for) { + my @depends = split /,/, "$id," . $feature->{depends}; + $required = grep { $_ eq $requirement } @depends; + last if $required; + } + printf "$prefix\n | cpu_feature_%s", $id if $required; + $prefix = "" if $required; + } + + if ($prefix eq "") { + # we printed something + print ";"; + $xsaveRequirementMapping .= sprintf " { %s%s, %s%s },\n", + $xsaveReqPrefix, $state->{id}, $xsaveEnumPrefix, $state->{id}; + } +} + +# Finally, make a table +printf qq| +struct XSaveRequirementMapping +{ + uint64_t cpu_features; + uint64_t xsave_state; +}; + +static const struct XSaveRequirementMapping xsave_requirements[] = { +%s}; + +// -- implementation end -- +#endif /* $headerguard */\n|, $xsaveRequirementMapping if $xsaveRequirementMapping ne ""; diff --git a/util/x86simdgen/Makefile b/util/x86simdgen/Makefile new file mode 100644 index 0000000000..a727af4217 --- /dev/null +++ b/util/x86simdgen/Makefile @@ -0,0 +1,19 @@ +GENERATOR = 3rdparty/x86simd_generate.pl +TARGETDIR = ../../src/corelib/global/ +TARGETCPP = qsimd_x86.cpp +TARGETHEADER = qsimd_x86_p.h + +CONF_FILES = 3rdparty/simd-intel.conf +# We don't currently use any feature from simd-amd.conf +# CONF_FILES += 3rdparty/simd-amd.conf + +all: $(TARGETDIR)/$(TARGETHEADER) $(TARGETDIR)/$(TARGETCPP) +$(TARGETHEADER): $(CONF_FILES) | $(GENERATOR) + cat $^ | perl $(GENERATOR) /dev/stdin $@ > $@ +$(TARGETDIR)/$(TARGETHEADER): header $(TARGETHEADER) + sed '/-- implementation start --/,/-- implementation end --/d' $^ > $@ +$(TARGETDIR)/$(TARGETCPP): $(TARGETHEADER) header + (cat header; echo '#include "$(TARGETHEADER)"'; sed '1,/-- implementation start --/d;/-- implementation end --/,$$d' $<) > $@ + +clean: + -$(RM) $(TARGETHEADER) diff --git a/util/x86simdgen/README.md b/util/x86simdgen/README.md new file mode 100644 index 0000000000..83f554a84e --- /dev/null +++ b/util/x86simdgen/README.md @@ -0,0 +1,13 @@ +# Scripts to regenerate the x86 SIMD flags + +Upstream: https://github.com/opendcdiag/opendcdiag +License: Apache-2.0 + +The .conf files are meant to be edited and the options we want to use +are uncommented. + +To regenerate: + make + +Note: the license of the script does not affect the produced output's +license. Therefore, no qt_attribution.json file is provided. diff --git a/util/x86simdgen/generate.pl b/util/x86simdgen/generate.pl deleted file mode 100755 index b3e7e99298..0000000000 --- a/util/x86simdgen/generate.pl +++ /dev/null @@ -1,277 +0,0 @@ -#!/usr/bin/env perl -############################################################################# -## -## Copyright (C) 2018 Intel Corporation. -## Contact: https://www.qt.io/licensing/ -## -## This file is part of the build configuration tools of the Qt Toolkit. -## -## $QT_BEGIN_LICENSE:MIT$ -## Permission is hereby granted, free of charge, to any person obtaining a copy -## of this software and associated documentation files (the "Software"), to deal -## in the Software without restriction, including without limitation the rights -## to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -## copies of the Software, and to permit persons to whom the Software is -## furnished to do so, subject to the following conditions: -## -## The above copyright notice and this permission notice shall be included in -## all copies or substantial portions of the Software. -## -## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -## OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -## THE SOFTWARE. -## $QT_END_LICENSE$ -## -############################################################################# - -use strict; -$\ = "\n"; -$/ = "\n"; -my %leaves = ( - Leaf1EDX => "CPUID Leaf 1, EDX", - Leaf1ECX => "CPUID Leaf 1, ECX", - Leaf7_0EBX => "CPUID Leaf 7, Sub-leaf 0, EBX", - Leaf7_0ECX => "CPUID Leaf 7, Sub-leaf 0, ECX", - Leaf7_0EDX => "CPUID Leaf 7, Sub-leaf 0, EDX", -); -my @leafNames = sort keys %leaves; - -# Read data from stdin -my $i = 1; -my @features; -while (<STDIN>) { - s/#.*$//; - chomp; - next if $_ eq ""; - - my ($name, $function, $bit, $depends) = split /\s+/; - die("Unknown CPUID function \"$function\"") - unless grep $function, @leafNames; - - my $id = uc($name); - $id =~ s/[^A-Z0-9_]/_/g; - push @features, - { name => $name, depends => $depends, id => $id, bit => $bit, leaf => $function }; - ++$i; -} - -if (my $h = shift @ARGV) { - open HEADER, ">", $h; - select HEADER; -} - -# Print the qsimd_x86_p.h output -print q{/**************************************************************************** -** -** Copyright (C) 2018 Intel Corporation. -** Contact: https://www.qt.io/licensing/ -** -** This file is part of the QtCore module of the Qt Toolkit. -** -** $QT_BEGIN_LICENSE:LGPL$ -** Commercial License Usage -** Licensees holding valid commercial Qt licenses may use this file in -** accordance with the commercial license agreement provided with the -** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and The Qt Company. For licensing terms -** and conditions see https://www.qt.io/terms-conditions. For further -** information use the contact form at https://www.qt.io/contact-us. -** -** GNU Lesser General Public License Usage -** Alternatively, this file may be used under the terms of the GNU Lesser -** General Public License version 3 as published by the Free Software -** Foundation and appearing in the file LICENSE.LGPL3 included in the -** packaging of this file. Please review the following information to -** ensure the GNU Lesser General Public License version 3 requirements -** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. -** -** GNU General Public License Usage -** Alternatively, this file may be used under the terms of the GNU -** General Public License version 2.0 or (at your option) the GNU General -** Public license version 3 or any later version approved by the KDE Free -** Qt Foundation. The licenses are as published by the Free Software -** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 -** included in the packaging of this file. Please review the following -** information to ensure the GNU General Public License requirements will -** be met: https://www.gnu.org/licenses/gpl-2.0.html and -** https://www.gnu.org/licenses/gpl-3.0.html. -** -** $QT_END_LICENSE$ -** -****************************************************************************/ - -// This is a generated file. DO NOT EDIT. -// Please see util/x86simdgen/generate.pl"; -#ifndef QSIMD_P_H -# error "Please include <private/qsimd_p.h> instead" -#endif -#ifndef QSIMD_X86_P_H -#define QSIMD_X86_P_H - -#include "qsimd_p.h" - -// -// W A R N I N G -// ------------- -// -// This file is not part of the Qt API. It exists purely as an -// implementation detail. This header file may change from version to -// version without notice, or even be removed. -// -// We mean it. -// - -QT_BEGIN_NAMESPACE - -// used only to indicate that the CPU detection was initialized -#define QSimdInitialized (Q_UINT64_C(1) << 0)}; - -# Print the enum -my $lastleaf; -for (my $i = 0; $i < scalar @features; ++$i) { - my $feature = $features[$i]; - # Leaf header: - printf "\n// in %s:\n", $leaves{$feature->{leaf}} - if $feature->{leaf} ne $lastleaf; - $lastleaf = $feature->{leaf}; - - # Feature - printf "#define CpuFeature%-33s (Q_UINT64_C(1) << %d)\n", $feature->{id}, $i + 1; - - # Feature string names for Clang and GCC - my $str = $feature->{name}; - $str .= ",$feature->{depends}" if defined($feature->{depends}); - printf "#define QT_FUNCTION_TARGET_STRING_%-17s \"%s\"\n", - $feature->{id}, $str; -} - -print q{ -static const quint64 qCompilerCpuFeatures = 0}; - -# And print the compiler-enabled features part: -for (my $i = 0; $i < scalar @features; ++$i) { - my $feature = $features[$i]; - printf - "#ifdef __%s__\n" . - " | CpuFeature%s\n" . - "#endif\n", - $feature->{id}, $feature->{id}; -} - -print q{ ; - -QT_END_NAMESPACE - -#endif // QSIMD_X86_P_H -}; - -if (my $cpp = shift @ARGV) { - open CPP, ">", $cpp; - select CPP; -} else { - print q{ - ----- cut here, paste the rest into qsimd_x86.cpp --- - - -}; -}; - -print q{/**************************************************************************** -** -** Copyright (C) 2018 Intel Corporation. -** Contact: https://www.qt.io/licensing/ -** -** This file is part of the QtCore module of the Qt Toolkit. -** -** $QT_BEGIN_LICENSE:LGPL$ -** Commercial License Usage -** Licensees holding valid commercial Qt licenses may use this file in -** accordance with the commercial license agreement provided with the -** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and The Qt Company. For licensing terms -** and conditions see https://www.qt.io/terms-conditions. For further -** information use the contact form at https://www.qt.io/contact-us. -** -** GNU Lesser General Public License Usage -** Alternatively, this file may be used under the terms of the GNU Lesser -** General Public License version 3 as published by the Free Software -** Foundation and appearing in the file LICENSE.LGPL3 included in the -** packaging of this file. Please review the following information to -** ensure the GNU Lesser General Public License version 3 requirements -** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. -** -** GNU General Public License Usage -** Alternatively, this file may be used under the terms of the GNU -** General Public License version 2.0 or (at your option) the GNU General -** Public license version 3 or any later version approved by the KDE Free -** Qt Foundation. The licenses are as published by the Free Software -** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 -** included in the packaging of this file. Please review the following -** information to ensure the GNU General Public License requirements will -** be met: https://www.gnu.org/licenses/gpl-2.0.html and -** https://www.gnu.org/licenses/gpl-3.0.html. -** -** $QT_END_LICENSE$ -** -****************************************************************************/ - -// This is a generated file. DO NOT EDIT. -// Please see util/x86simdgen/generate.pl"; -#include "qsimd_p.h" -}; - -# Now generate the string table and bit-location array -my $offset = 0; -my @offsets; -print "static const char features_string[] ="; -for my $feature (@features) { - print " \" $feature->{name}\\0\""; - push @offsets, $offset; - $offset += 2 + length($feature->{name}); -} -print " \"\\0\";"; - -# Print the string offset table -printf "\nstatic const %s features_indices[] = {\n %3d", - $offset > 255 ? "quint16" : "quint8", $offset; -for (my $j = 0; $j < scalar @offsets; ++$j) { - printf ",%s%3d", - ($j + 1) % 8 ? " " : "\n ", $offsets[$j]; -} -print "\n};"; - -# Print the locator enum and table -print "\nenum X86CpuidLeaves {"; -map { print " $_," } @leafNames; -print " X86CpuidMaxLeaf\n};"; - -my $type = scalar %leaves > 8 ? "quint16" : "quint8"; -printf "\nstatic const %s x86_locators[] = {", - $type, $type; -my $lastname; -for (my $j = 0; $j < scalar @features; ++$j) { - my $feature = $features[$j]; - printf ", // %s", $lastname - if defined($lastname); - printf "\n %s*32 + %2d", - $feature->{leaf}, $feature->{bit}; - $lastname = $feature->{name}; -} -printf qq{ // $lastname -\}; - -// List of AVX512 features (see detectProcessorFeatures()) -static const quint64 AllAVX512 = 0}; - -# Print AVX512 features -for (my $j = 0; $j < scalar @features; ++$j) { - my $feature = $features[$j]; - $_ = $feature->{id}; - printf "\n | CpuFeature%s", $_ if /AVX512/; -} -print ";"; diff --git a/util/x86simdgen/header b/util/x86simdgen/header new file mode 100644 index 0000000000..163f045408 --- /dev/null +++ b/util/x86simdgen/header @@ -0,0 +1,39 @@ +/**************************************************************************** +** +** Copyright (C) 2022 Intel Corporation. +** Contact: https://www.qt.io/licensing/ +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or (at your option) the GNU General +** Public license version 3 or any later version approved by the KDE Free +** Qt Foundation. The licenses are as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-2.0.html and +** https://www.gnu.org/licenses/gpl-3.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + diff --git a/util/x86simdgen/simd.txt b/util/x86simdgen/simd.txt deleted file mode 100644 index 1fce7b9497..0000000000 --- a/util/x86simdgen/simd.txt +++ /dev/null @@ -1,37 +0,0 @@ -# Feature CPUID function Bit Required feature -sse2 Leaf1EDX 26 -sse3 Leaf1ECX 0 -ssse3 Leaf1ECX 9 -fma Leaf1ECX 12 -sse4.1 Leaf1ECX 19 -sse4.2 Leaf1ECX 20 -movbe Leaf1ECX 22 -popcnt Leaf1ECX 23 -aes Leaf1ECX 25 sse4.2 -avx Leaf1ECX 28 -f16c Leaf1ECX 29 -rdrnd Leaf1ECX 30 -bmi Leaf7_0EBX 3 -hle Leaf7_0EBX 4 -avx2 Leaf7_0EBX 5 -bmi2 Leaf7_0EBX 8 -rtm Leaf7_0EBX 11 -avx512f Leaf7_0EBX 16 -avx512dq Leaf7_0EBX 17 -rdseed Leaf7_0EBX 18 -avx512ifma Leaf7_0EBX 21 -avx512pf Leaf7_0EBX 26 -avx512er Leaf7_0EBX 27 -avx512cd Leaf7_0EBX 28 -sha Leaf7_0EBX 29 -avx512bw Leaf7_0EBX 30 -avx512vl Leaf7_0EBX 31 -avx512vbmi Leaf7_0ECX 1 -avx512vbmi2 Leaf7_0ECX 6 -gfni Leaf7_0ECX 8 -vaes Leaf7_0ECX 9 -avx512vnni Leaf7_0ECX 11 -avx512bitalg Leaf7_0ECX 12 -avx512vpopcntdq Leaf7_0ECX 14 -avx5124nniw Leaf7_0EDX 2 -avx5124fmaps Leaf7_0EDX 3 |