summaryrefslogtreecommitdiffstats
path: root/src/corelib/global/qsimd_x86.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/corelib/global/qsimd_x86.cpp')
-rw-r--r--src/corelib/global/qsimd_x86.cpp291
1 files changed, 178 insertions, 113 deletions
diff --git a/src/corelib/global/qsimd_x86.cpp b/src/corelib/global/qsimd_x86.cpp
index be17f44c09..9a3bd80b39 100644
--- a/src/corelib/global/qsimd_x86.cpp
+++ b/src/corelib/global/qsimd_x86.cpp
@@ -1,45 +1,9 @@
-/****************************************************************************
-**
-** Copyright (C) 2018 Intel Corporation.
-** Contact: https://www.qt.io/licensing/
-**
-** This file is part of the QtCore module of the Qt Toolkit.
-**
-** $QT_BEGIN_LICENSE:LGPL$
-** Commercial License Usage
-** Licensees holding valid commercial Qt licenses may use this file in
-** accordance with the commercial license agreement provided with the
-** Software or, alternatively, in accordance with the terms contained in
-** a written agreement between you and The Qt Company. For licensing terms
-** and conditions see https://www.qt.io/terms-conditions. For further
-** information use the contact form at https://www.qt.io/contact-us.
-**
-** GNU Lesser General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU Lesser
-** General Public License version 3 as published by the Free Software
-** Foundation and appearing in the file LICENSE.LGPL3 included in the
-** packaging of this file. Please review the following information to
-** ensure the GNU Lesser General Public License version 3 requirements
-** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
-**
-** GNU General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU
-** General Public License version 2.0 or (at your option) the GNU General
-** Public license version 3 or any later version approved by the KDE Free
-** Qt Foundation. The licenses are as published by the Free Software
-** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
-** included in the packaging of this file. Please review the following
-** information to ensure the GNU General Public License requirements will
-** be met: https://www.gnu.org/licenses/gpl-2.0.html and
-** https://www.gnu.org/licenses/gpl-3.0.html.
-**
-** $QT_END_LICENSE$
-**
-****************************************************************************/
-
+// Copyright (C) 2022 Intel Corporation.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
// This is a generated file. DO NOT EDIT.
-// Please see util/x86simdgen/generate.pl
-#include "qsimd_p.h"
+// Please see util/x86simdgen/README.md
+
+#include "qsimd_x86_p.h"
static const char features_string[] =
" sse2\0"
@@ -55,101 +19,202 @@ static const char features_string[] =
" f16c\0"
" rdrnd\0"
" bmi\0"
- " hle\0"
" avx2\0"
" bmi2\0"
- " rtm\0"
" avx512f\0"
" avx512dq\0"
" rdseed\0"
" avx512ifma\0"
- " avx512pf\0"
- " avx512er\0"
" avx512cd\0"
" sha\0"
" avx512bw\0"
" avx512vl\0"
" avx512vbmi\0"
+ " waitpkg\0"
" avx512vbmi2\0"
+ " shstk\0"
" gfni\0"
" vaes\0"
- " avx512vnni\0"
" avx512bitalg\0"
" avx512vpopcntdq\0"
- " avx5124nniw\0"
- " avx5124fmaps\0"
+ " hybrid\0"
+ " ibt\0"
+ " avx512fp16\0"
+ " raoint\0"
+ " cmpccxadd\0"
+ " avxifma\0"
+ " lam\0"
"\0";
-static const quint16 features_indices[] = {
- 306, 0, 6, 12, 19, 24, 32, 40,
- 47, 55, 60, 65, 71, 78, 83, 88,
- 94, 100, 105, 114, 124, 132, 144, 154,
- 164, 174, 179, 189, 199, 211, 224, 230,
- 236, 248, 262, 279, 292
+static const uint16_t features_indices[] = {
+ 0, 6, 12, 19, 24, 32, 40, 47,
+ 55, 60, 65, 71, 78, 83, 89, 95,
+ 104, 114, 122, 134, 144, 149, 159, 169,
+ 181, 190, 203, 210, 216, 222, 236, 253,
+ 261, 266, 278, 286, 297, 306,
};
enum X86CpuidLeaves {
- Leaf1ECX,
- Leaf1EDX,
- Leaf7_0EBX,
- Leaf7_0ECX,
- Leaf7_0EDX,
+ Leaf01EDX,
+ Leaf01ECX,
+ Leaf07_00EBX,
+ Leaf07_00ECX,
+ Leaf07_00EDX,
+ Leaf07_01EAX,
+ Leaf07_01EDX,
+ Leaf13_01EAX,
+ Leaf80000001hECX,
+ Leaf80000008hEBX,
X86CpuidMaxLeaf
};
-static const quint8 x86_locators[] = {
- Leaf1EDX*32 + 26, // sse2
- Leaf1ECX*32 + 0, // sse3
- Leaf1ECX*32 + 9, // ssse3
- Leaf1ECX*32 + 12, // fma
- Leaf1ECX*32 + 19, // sse4.1
- Leaf1ECX*32 + 20, // sse4.2
- Leaf1ECX*32 + 22, // movbe
- Leaf1ECX*32 + 23, // popcnt
- Leaf1ECX*32 + 25, // aes
- Leaf1ECX*32 + 28, // avx
- Leaf1ECX*32 + 29, // f16c
- Leaf1ECX*32 + 30, // rdrnd
- Leaf7_0EBX*32 + 3, // bmi
- Leaf7_0EBX*32 + 4, // hle
- Leaf7_0EBX*32 + 5, // avx2
- Leaf7_0EBX*32 + 8, // bmi2
- Leaf7_0EBX*32 + 11, // rtm
- Leaf7_0EBX*32 + 16, // avx512f
- Leaf7_0EBX*32 + 17, // avx512dq
- Leaf7_0EBX*32 + 18, // rdseed
- Leaf7_0EBX*32 + 21, // avx512ifma
- Leaf7_0EBX*32 + 26, // avx512pf
- Leaf7_0EBX*32 + 27, // avx512er
- Leaf7_0EBX*32 + 28, // avx512cd
- Leaf7_0EBX*32 + 29, // sha
- Leaf7_0EBX*32 + 30, // avx512bw
- Leaf7_0EBX*32 + 31, // avx512vl
- Leaf7_0ECX*32 + 1, // avx512vbmi
- Leaf7_0ECX*32 + 6, // avx512vbmi2
- Leaf7_0ECX*32 + 8, // gfni
- Leaf7_0ECX*32 + 9, // vaes
- Leaf7_0ECX*32 + 11, // avx512vnni
- Leaf7_0ECX*32 + 12, // avx512bitalg
- Leaf7_0ECX*32 + 14, // avx512vpopcntdq
- Leaf7_0EDX*32 + 2, // avx5124nniw
- Leaf7_0EDX*32 + 3 // avx5124fmaps
+static const uint16_t x86_locators[] = {
+ Leaf01EDX*32 + 26, // sse2
+ Leaf01ECX*32 + 0, // sse3
+ Leaf01ECX*32 + 9, // ssse3
+ Leaf01ECX*32 + 12, // fma
+ Leaf01ECX*32 + 19, // sse4.1
+ Leaf01ECX*32 + 20, // sse4.2
+ Leaf01ECX*32 + 22, // movbe
+ Leaf01ECX*32 + 23, // popcnt
+ Leaf01ECX*32 + 25, // aes
+ Leaf01ECX*32 + 28, // avx
+ Leaf01ECX*32 + 29, // f16c
+ Leaf01ECX*32 + 30, // rdrnd
+ Leaf07_00EBX*32 + 3, // bmi
+ Leaf07_00EBX*32 + 5, // avx2
+ Leaf07_00EBX*32 + 8, // bmi2
+ Leaf07_00EBX*32 + 16, // avx512f
+ Leaf07_00EBX*32 + 17, // avx512dq
+ Leaf07_00EBX*32 + 18, // rdseed
+ Leaf07_00EBX*32 + 21, // avx512ifma
+ Leaf07_00EBX*32 + 28, // avx512cd
+ Leaf07_00EBX*32 + 29, // sha
+ Leaf07_00EBX*32 + 30, // avx512bw
+ Leaf07_00EBX*32 + 31, // avx512vl
+ Leaf07_00ECX*32 + 1, // avx512vbmi
+ Leaf07_00ECX*32 + 5, // waitpkg
+ Leaf07_00ECX*32 + 6, // avx512vbmi2
+ Leaf07_00ECX*32 + 7, // shstk
+ Leaf07_00ECX*32 + 8, // gfni
+ Leaf07_00ECX*32 + 9, // vaes
+ Leaf07_00ECX*32 + 12, // avx512bitalg
+ Leaf07_00ECX*32 + 14, // avx512vpopcntdq
+ Leaf07_00EDX*32 + 15, // hybrid
+ Leaf07_00EDX*32 + 20, // ibt
+ Leaf07_00EDX*32 + 23, // avx512fp16
+ Leaf07_01EAX*32 + 3, // raoint
+ Leaf07_01EAX*32 + 6, // cmpccxadd
+ Leaf07_01EAX*32 + 23, // avxifma
+ Leaf07_01EAX*32 + 26, // lam
+};
+
+struct X86Architecture
+{
+ uint64_t features;
+ char name[17 + 1];
+};
+
+static const struct X86Architecture x86_architectures[] = {
+ { cpu_core2, "Core2" },
+ { cpu_westmere, "Westmere" },
+ { cpu_sandybridge, "Sandy Bridge" },
+ { cpu_silvermont, "Silvermont" },
+ { cpu_ivybridge, "Ivy Bridge" },
+ { cpu_goldmont, "Goldmont" },
+ { cpu_haswell, "Haswell" },
+ { cpu_broadwell, "Broadwell" },
+ { cpu_tremont, "Tremont" },
+ { cpu_skylake, "Skylake" },
+ { cpu_skylake_avx512, "Skylake (Avx512)" },
+ { cpu_cascadelake, "Cascade Lake" },
+ { cpu_cooperlake, "Cooper Lake" },
+ { cpu_cannonlake, "Cannon Lake" },
+ { cpu_gracemont, "Gracemont" },
+ { cpu_icelake_client, "Ice Lake (Client)" },
+ { cpu_icelake_server, "Ice Lake (Server)" },
+ { cpu_crestmont, "Crestmont" },
+ { cpu_tigerlake, "Tiger Lake" },
+ { cpu_clearwaterforest, "Clearwater Forest" },
+ { cpu_grandridge, "Grand Ridge" },
+ { cpu_raptorcove, "Raptor Cove" },
+ { cpu_redwoodcove, "Redwood Cove" },
+ { cpu_emeraldrapids, "Emerald Rapids" },
+ { cpu_graniterapids, "Granite Rapids" },
+};
+
+enum XSaveBits {
+ XSave_X87 = 0x0001, // X87 and MMX state
+ XSave_SseState = 0x0002, // SSE: 128 bits of XMM registers
+ XSave_Ymm_Hi128 = 0x0004, // AVX: high 128 bits in YMM registers
+ XSave_Bndregs = 0x0008, // Memory Protection Extensions
+ XSave_Bndcsr = 0x0010, // Memory Protection Extensions
+ XSave_OpMask = 0x0020, // AVX512: k0 through k7
+ XSave_Zmm_Hi256 = 0x0040, // AVX512: high 256 bits of ZMM0-15
+ XSave_Hi16_Zmm = 0x0080, // AVX512: all 512 bits of ZMM16-31
+ XSave_PTState = 0x0100, // Processor Trace
+ XSave_PKRUState = 0x0200, // Protection Key
+ XSave_CetUState = 0x0800, // CET: user mode
+ XSave_CetSState = 0x1000, // CET: supervisor mode
+ XSave_HdcState = 0x2000, // Hardware Duty Cycle
+ XSave_UintrState = 0x4000, // User Interrupts
+ XSave_HwpState = 0x10000, // Hardware P-State
+ XSave_Xtilecfg = 0x20000, // AMX: XTILECFG register
+ XSave_Xtiledata = 0x40000, // AMX: data in the tiles
+ XSave_AvxState = XSave_SseState | XSave_Ymm_Hi128,
+ XSave_MPXState = XSave_Bndregs | XSave_Bndcsr,
+ XSave_Avx512State = XSave_AvxState | XSave_OpMask | XSave_Zmm_Hi256 | XSave_Hi16_Zmm,
+ XSave_CetState = XSave_CetUState | XSave_CetSState,
+ XSave_AmxState = XSave_Xtilecfg | XSave_Xtiledata,
+};
+
+// List of features requiring XSave_AvxState
+static const uint64_t XSaveReq_AvxState = 0
+ | cpu_feature_fma
+ | cpu_feature_avx
+ | cpu_feature_f16c
+ | cpu_feature_avx2
+ | cpu_feature_avx512f
+ | cpu_feature_avx512dq
+ | cpu_feature_avx512ifma
+ | cpu_feature_avx512cd
+ | cpu_feature_avx512bw
+ | cpu_feature_avx512vl
+ | cpu_feature_avx512vbmi
+ | cpu_feature_avx512vbmi2
+ | cpu_feature_vaes
+ | cpu_feature_avx512bitalg
+ | cpu_feature_avx512vpopcntdq
+ | cpu_feature_avx512fp16
+ | cpu_feature_avxifma;
+
+// List of features requiring XSave_Avx512State
+static const uint64_t XSaveReq_Avx512State = 0
+ | cpu_feature_avx512f
+ | cpu_feature_avx512dq
+ | cpu_feature_avx512ifma
+ | cpu_feature_avx512cd
+ | cpu_feature_avx512bw
+ | cpu_feature_avx512vl
+ | cpu_feature_avx512vbmi
+ | cpu_feature_avx512vbmi2
+ | cpu_feature_avx512bitalg
+ | cpu_feature_avx512vpopcntdq
+ | cpu_feature_avx512fp16;
+
+// List of features requiring XSave_CetState
+static const uint64_t XSaveReq_CetState = 0
+ | cpu_feature_shstk;
+
+struct XSaveRequirementMapping
+{
+ uint64_t cpu_features;
+ uint64_t xsave_state;
+};
+
+static const struct XSaveRequirementMapping xsave_requirements[] = {
+ { XSaveReq_AvxState, XSave_AvxState },
+ { XSaveReq_Avx512State, XSave_Avx512State },
+ { XSaveReq_CetState, XSave_CetState },
};
-// List of AVX512 features (see detectProcessorFeatures())
-static const quint64 AllAVX512 = 0
- | CpuFeatureAVX512F
- | CpuFeatureAVX512DQ
- | CpuFeatureAVX512IFMA
- | CpuFeatureAVX512PF
- | CpuFeatureAVX512ER
- | CpuFeatureAVX512CD
- | CpuFeatureAVX512BW
- | CpuFeatureAVX512VL
- | CpuFeatureAVX512VBMI
- | CpuFeatureAVX512VBMI2
- | CpuFeatureAVX512VNNI
- | CpuFeatureAVX512BITALG
- | CpuFeatureAVX512VPOPCNTDQ
- | CpuFeatureAVX5124NNIW
- | CpuFeatureAVX5124FMAPS;