3DCore: rip away the configure option and static choice of SIMD

Instead, simply use whatever is available from the compiler. This also does away with the separation between Matrix4x4_SSE and Matrix4x4_AVX2. The two classes store the data the same way and are source-compatible; they just operate differently. This also allows for an AVX2-enabled Qt3DRenderer to link to and run with a non-AVX Qt3DCore. Change-Id: I76216ced393445a4ae2dfffd172a512266b2414d Reviewed-by: Paul Lemire <paul.lemire@kdab.com>
author: Thiago Macieira <thiago.macieira@intel.com> 2022-11-23 12:37:00 -0800
committer: Thiago Macieira <thiago.macieira@intel.com> 2023-01-19 19:18:52 -0800
commit: 00cb8afafb2867b348aa529c0702252d12d1b4d6 (patch)
tree: e173cbc730f713f44aaeaf399383617493549329 /src
parent: 15e4fc5daeeec5eeb0c1ccb0ce6f76b5b284e2c7 (diff)
16 files changed, 78 insertions, 258 deletions
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index aaf3b81ba..359cd521f 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -9,6 +9,7 @@
 
 qt_internal_add_module(3DCore
     SOURCES
+        aligned_malloc_p.h
         aspect/qcoreaspect.cpp aspect/qcoreaspect.h aspect/qcoreaspect_p.h
         aspect/qcoresettings.cpp aspect/qcoresettings.h aspect/qcoresettings_p.h
         aspect/coresettings.cpp aspect/coresettings_p.h
@@ -53,6 +54,7 @@ qt_internal_add_module(3DCore
         qurlhelper.cpp qurlhelper_p.h
         resources/qhandle_p.h
         resources/qloadgltf_p.h
+        resources/qresourcemanager.cpp resources/qresourcemanager_p.h
         services/nullservices_p.h
         services/qabstractframeadvanceservice.cpp services/qabstractframeadvanceservice_p.h
         services/qabstractframeadvanceservice_p_p.h
@@ -110,77 +112,17 @@ qt_internal_extend_target(3DCore CONDITION gcov
         "-ftest-coverage"
 )
 
-function(qt3d_add_simd_part target)
-    qt_parse_all_arguments(arg "qt3d_add_simd_part" "" ""
-       "NAME;SIMD;${__default_private_args};COMPILE_FLAGS" ${ARGN})
-    if ("x${arg_SIMD}" STREQUAL x)
-        message(FATAL_ERROR "qt3d_add_simd_part needs a SIMD type to be set.")
-    endif()
-
-    set(condition "QT_FEATURE_${arg_SIMD}")
-    if("${arg_SIMD}" STREQUAL arch_haswell)
-        set(condition "TEST_subarch_avx2 AND TEST_subarch_bmi AND TEST_subarch_bmi2 AND TEST_subarch_f16c AND TEST_subarch_fma AND TEST_subarch_lzcnt AND TEST_subarch_popcnt")
-    endif()
-
-    qt_evaluate_config_expression(result ${condition})
-    if(${result})
-        if(QT_CMAKE_DEBUG_EXTEND_TARGET)
-            message("qt3d_add_simd_part(${target} SIMD ${arg_SIMD} ...): Evaluated")
-        endif()
-        string(TOUPPER "QT_CFLAGS_${arg_SIMD}" simd_flags)
-
-        foreach(source IN LISTS arg_SOURCES)
-            set_property(SOURCE "${source}" APPEND
-                PROPERTY COMPILE_OPTIONS
-                ${${simd_flags}}
-                ${arg_COMPILE_FLAGS}
-            )
-        endforeach()
-        set_source_files_properties(${arg_SOURCES} PROPERTIES SKIP_PRECOMPILE_HEADERS TRUE)
-        target_sources(${target} PRIVATE ${arg_SOURCES})
-        target_compile_options("${target}Private" INTERFACE ${${simd_flags}} ${arg_COMPILE_FLAGS})
-        target_compile_definitions("${target}Private" INTERFACE ${simd_flags})
-    else()
-        if(QT_CMAKE_DEBUG_EXTEND_TARGET)
-            message("qt3d_add_simd_part(${target} SIMD ${arg_SIMD} ...): Skipped")
-        endif()
-    endif()
-endfunction()
-
-if(QT_FEATURE_qt3d_simd_sse2)
-    qt3d_add_simd_part(3DCore SIMD sse2
-        SOURCES
-            transforms/matrix4x4_sse.cpp transforms/matrix4x4_sse_p.h
-    )
-endif()
-
-if(QT_FEATURE_qt3d_simd_sse2 AND NOT QT_FEATURE_qt3d_simd_avx2)
-    qt3d_add_simd_part(3DCore SIMD sse2
-        SOURCES
-            transforms/vector3d_sse.cpp transforms/vector3d_sse_p.h
-            transforms/vector4d_sse.cpp transforms/vector4d_sse_p.h
-            aligned_malloc_p.h
-            resources/qresourcemanager.cpp resources/qresourcemanager_p.h
-    )
-endif()
-
-if(QT_FEATURE_qt3d_simd_avx2)
-    qt3d_add_simd_part(3DCore SIMD arch_haswell
-        SOURCES
-            transforms/matrix4x4_avx2.cpp transforms/matrix4x4_avx2_p.h
-            transforms/vector3d_sse.cpp transforms/vector3d_sse_p.h
-            transforms/vector4d_sse.cpp transforms/vector4d_sse_p.h
-            aligned_malloc_p.h
-            resources/qresourcemanager.cpp resources/qresourcemanager_p.h
-    )
-endif()
-
-qt_internal_extend_target(3DCore
-    CONDITION
-        NOT QT_FEATURE_qt3d_simd_sse2 AND NOT QT_FEATURE_qt3d_simd_avx2
+qt_internal_extend_target(3DCore CONDITION
+    CONDITION ( TEST_architecture_arch STREQUAL i386 ) OR
+        ( TEST_architecture_arch STREQUAL x86_64 ) OR
+        ( CMAKE_OSX_ARCHITECTURES MATCHES "x86_64" ) OR
+        ( CMAKE_OSX_ARCHITECTURES MATCHES "x86_64h" ) OR
+        ( CMAKE_OSX_ARCHITECTURES MATCHES "i386" )
     SOURCES
-        aligned_malloc_p.h
-        resources/qresourcemanager.cpp resources/qresourcemanager_p.h
+        transforms/matrix4x4_avx2_p.h
+        transforms/matrix4x4_sse.cpp transforms/matrix4x4_sse_p.h
+        transforms/vector3d_sse.cpp transforms/vector3d_sse_p.h
+        transforms/vector4d_sse.cpp transforms/vector4d_sse_p.h
 )
 
 qt_internal_add_docs(3DCore
diff --git a/src/core/aligned_malloc_p.h b/src/core/aligned_malloc_p.h
index 750eea730..fa2b5c38f 100644
--- a/src/core/aligned_malloc_p.h
+++ b/src/core/aligned_malloc_p.h
@@ -18,15 +18,15 @@
 #include <QtCore/private/qsimd_p.h>
 #include <Qt3DCore/private/qt3dcore-config_p.h>
 
-#if QT_CONFIG(qt3d_simd_avx2) && defined(__AVX2__) && defined(QT_COMPILER_SUPPORTS_AVX2)
+#if defined(__AVX2__)
 # define QT3D_ALIGNED_MALLOC(s) _mm_malloc(s, 32)
-#elif QT_CONFIG(qt3d_simd_sse2) && defined(__SSE2__) && defined(QT_COMPILER_SUPPORTS_SSE2)
+#elif defined(__SSE2__)
 # define QT3D_ALIGNED_MALLOC(s) _mm_malloc(s, 16)
 #else
     #define QT3D_ALIGNED_MALLOC(s) malloc(s)
 #endif
 
-#if (QT_CONFIG(qt3d_simd_avx2) && defined(__AVX2__) && defined(QT_COMPILER_SUPPORTS_AVX2)) || (QT_CONFIG(qt3d_simd_sse2) && defined(__SSE2__) && defined(QT_COMPILER_SUPPORTS_SSE2))
+#if defined(__SSE2__)
 # define QT3D_ALIGNED_FREE(ptr) _mm_free(ptr)
 #else
 # define QT3D_ALIGNED_FREE(ptr) free(ptr)
diff --git a/src/core/configure.cmake b/src/core/configure.cmake
index 845ed63ac..7a0543619 100644
--- a/src/core/configure.cmake
+++ b/src/core/configure.cmake
@@ -68,26 +68,9 @@ qt_feature("qt3d-animation" PUBLIC
     PURPOSE "Use the 3D Animation Aspect library"
     CONDITION QT_FEATURE_qt3d_render
 )
-qt_feature("qt3d-simd-sse2" PRIVATE
-    LABEL "Use SSE2 instructions"
-    PURPOSE "Use SSE2 SIMD instructions to accelerate matrix operations"
-    AUTODETECT QT_FEATURE_sse2
-    ENABLE INPUT_qt3d_simd STREQUAL 'sse2' OR INPUT_qt3d_simd STREQUAL 'avx2'
-    DISABLE INPUT_qt3d_simd STREQUAL 'no' OR ( TEST_architecture_arch STREQUAL i386 )
-)
-qt_feature("qt3d-simd-avx2" PRIVATE
-    LABEL "Use AVX2 instructions"
-    PURPOSE "Use AVX2 SIMD instructions to accelerate matrix operations"
-    AUTODETECT TEST_arch_${TEST_architecture_arch}_subarch_avx2
-    CONDITION QT_FEATURE_avx2
-    ENABLE INPUT_qt3d_simd STREQUAL 'avx2'
-    DISABLE INPUT_qt3d_simd STREQUAL 'sse2' OR INPUT_qt3d_simd STREQUAL 'no' OR ( TEST_architecture_arch STREQUAL i386 )
-)
 qt_configure_add_summary_section(NAME "Qt 3D")
 qt_configure_add_summary_entry(ARGS "qt3d-assimp")
 qt_configure_add_summary_entry(ARGS "qt3d-system-assimp")
-qt_configure_add_summary_entry(ARGS "qt3d-simd-sse2")
-qt_configure_add_summary_entry(ARGS "qt3d-simd-avx2")
 qt_configure_add_summary_section(NAME "Aspects")
 qt_configure_add_summary_entry(ARGS "qt3d-render")
 qt_configure_add_summary_entry(ARGS "qt3d-input")
diff --git a/src/core/resources/qresourcemanager.cpp b/src/core/resources/qresourcemanager.cpp
index 4dc113ef9..410b223fb 100644
--- a/src/core/resources/qresourcemanager.cpp
+++ b/src/core/resources/qresourcemanager.cpp
@@ -57,9 +57,9 @@ namespace Qt3DCore {
 
 void *AlignedAllocator::allocate(uint size)
 {
-#if QT_CONFIG(qt3d_simd_avx2) && defined(__AVX2__) && defined(QT_COMPILER_SUPPORTS_AVX2)
+#if defined(__AVX2__)
     return _mm_malloc(size, 32);
-#elif QT_CONFIG(qt3d_simd_sse2) && defined(__SSE2__) && defined(QT_COMPILER_SUPPORTS_SSE2)
+#elif defined(__SSE2__)
     return _mm_malloc(size, 16);
 #else
     return malloc(size);
@@ -68,9 +68,7 @@ void *AlignedAllocator::allocate(uint size)
 
 void AlignedAllocator::release(void *p)
 {
-#if QT_CONFIG(qt3d_simd_avx2) && defined(__AVX2__) && defined(QT_COMPILER_SUPPORTS_AVX2)
-    _mm_free(p);
-#elif QT_CONFIG(qt3d_simd_sse2) && defined(__SSE2__) && defined(QT_COMPILER_SUPPORTS_SSE2)
+#if defined(__SSE2__)
     _mm_free(p);
 #else
     free(p);
diff --git a/src/core/transforms/matrix4x4_avx2.cpp b/src/core/transforms/matrix4x4_avx2.cpp
deleted file mode 100644
index a14b2bd8c..000000000
--- a/src/core/transforms/matrix4x4_avx2.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-// Copyright (C) 2016 Paul Lemire <paul.lemire350@gmail.com>
-// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
-
-#include "matrix4x4_avx2_p.h"
-
-#ifdef QT_COMPILER_SUPPORTS_AVX2
-
-QT_BEGIN_NAMESPACE
-
-namespace Qt3DCore {
-
-QDebug operator<<(QDebug dbg, const Matrix4x4_AVX2 &m)
-{
-    dbg.nospace() << "Matrix4x4_AVX2(" << Qt::endl
-                  << qSetFieldWidth(10)
-                  << m.m11() << m.m12() << m.m13() << m.m14() << Qt::endl
-                  << m.m21() << m.m22() << m.m23() << m.m24() << Qt::endl
-                  << m.m31() << m.m32() << m.m33() << m.m34() << Qt::endl
-                  << m.m41() << m.m42() << m.m43() << m.m44() << Qt::endl
-                  << qSetFieldWidth(0) << ')';
-    return dbg;
-}
-
-} // Qt3DCore
-
-QT_END_NAMESPACE
-
-#endif
diff --git a/src/core/transforms/matrix4x4_avx2_p.h b/src/core/transforms/matrix4x4_avx2_p.h
index 7e8c20f9e..ea7330623 100644
--- a/src/core/transforms/matrix4x4_avx2_p.h
+++ b/src/core/transforms/matrix4x4_avx2_p.h
@@ -20,7 +20,7 @@
 #include <private/qsimd_p.h>
 #include <QMatrix4x4>
 
-#ifdef QT_COMPILER_SUPPORTS_AVX2
+#ifdef __AVX2__
 
 // Some GCC versions don't have _mm256_set_m128 available
 // Work around that
@@ -33,22 +33,22 @@ QT_BEGIN_NAMESPACE
 
 namespace Qt3DCore {
 
-class Matrix4x4_AVX2
+class Matrix4x4_SSE
 {
 public:
 
-    Q_ALWAYS_INLINE Matrix4x4_AVX2() { setToIdentity(); }
-    explicit Q_ALWAYS_INLINE Matrix4x4_AVX2(Qt::Initialization) {}
+    Q_ALWAYS_INLINE Matrix4x4_SSE() { setToIdentity(); }
+    explicit Q_ALWAYS_INLINE Matrix4x4_SSE(Qt::Initialization) {}
 
     // Assumes data is 32 bytes aligned (and in column major order)
-    explicit Q_ALWAYS_INLINE Matrix4x4_AVX2(float *data)
+    explicit Q_ALWAYS_INLINE Matrix4x4_SSE(float *data)
     {
         m_col12 = _mm256_load_ps(data);
         m_col34 = _mm256_load_ps(data + 8);
     }
 
     // QMatrix4x4::constData returns in column major order
-    explicit Q_ALWAYS_INLINE Matrix4x4_AVX2(const QMatrix4x4 &mat)
+    explicit Q_ALWAYS_INLINE Matrix4x4_SSE(const QMatrix4x4 &mat)
     {
         // data may not be properly aligned, using unaligned loads
         const float *data = mat.constData();
@@ -57,7 +57,7 @@ public:
     }
 
     // In (row major) but we store in column major order
-    explicit Q_ALWAYS_INLINE Matrix4x4_AVX2(float m11, float m12, float m13, float m14,
+    explicit Q_ALWAYS_INLINE Matrix4x4_SSE(float m11, float m12, float m13, float m14,
                                             float m21, float m22, float m23, float m24,
                                             float m31, float m32, float m33, float m34,
                                             float m41, float m42, float m43, float m44)
@@ -96,7 +96,7 @@ public:
         // Using a static identity matrix and assigning it is 27 instructions
     }
 
-    Q_ALWAYS_INLINE Matrix4x4_AVX2 operator*(const Matrix4x4_AVX2 &other) const
+    Q_ALWAYS_INLINE Matrix4x4_SSE operator*(const Matrix4x4_SSE &other) const
     {
         // Shuffling: (Latency 1)
         // (8 bits -> first two pairs used to select from the first vector, second pairs from second vector)
@@ -182,58 +182,58 @@ public:
         tmp2 = _mm256_add_ps(_mm256_mul_ps(_mm256_shuffle_ps(otherCol34, otherCol34, 0xaa), _mm256_broadcast_ps(&col3)), tmp2);
         tmp2 = _mm256_add_ps(_mm256_mul_ps(_mm256_shuffle_ps(otherCol34, otherCol34, 0xff), _mm256_broadcast_ps(&col4)), tmp2);
 
-        Matrix4x4_AVX2 c(Qt::Uninitialized);
+        Matrix4x4_SSE c(Qt::Uninitialized);
         c.m_col12 = tmp;
         c.m_col34 = tmp2;
         return c;
     }
 
-    Q_ALWAYS_INLINE Matrix4x4_AVX2 operator-(const Matrix4x4_AVX2 &other) const
+    Q_ALWAYS_INLINE Matrix4x4_SSE operator-(const Matrix4x4_SSE &other) const
     {
-        Matrix4x4_AVX2 c(Qt::Uninitialized);
+        Matrix4x4_SSE c(Qt::Uninitialized);
 
         c.m_col12 = _mm256_sub_ps(m_col12, other.m_col12);
         c.m_col34 = _mm256_sub_ps(m_col34, other.m_col34);
         return c;
     }
 
-    Q_ALWAYS_INLINE Matrix4x4_AVX2 operator+(const Matrix4x4_AVX2 &other) const
+    Q_ALWAYS_INLINE Matrix4x4_SSE operator+(const Matrix4x4_SSE &other) const
     {
-        Matrix4x4_AVX2 c(Qt::Uninitialized);
+        Matrix4x4_SSE c(Qt::Uninitialized);
 
         c.m_col12 = _mm256_add_ps(m_col12, other.m_col12);
         c.m_col34 = _mm256_add_ps(m_col34, other.m_col34);
         return c;
     }
 
-    Q_ALWAYS_INLINE Matrix4x4_AVX2 &operator*=(const Matrix4x4_AVX2 &other)
+    Q_ALWAYS_INLINE Matrix4x4_SSE &operator*=(const Matrix4x4_SSE &other)
     {
         *this = *this * other;
         return *this;
     }
 
-    Q_ALWAYS_INLINE Matrix4x4_AVX2 &operator-=(const Matrix4x4_AVX2 &other)
+    Q_ALWAYS_INLINE Matrix4x4_SSE &operator-=(const Matrix4x4_SSE &other)
     {
         *this = *this - other;
         return *this;
     }
 
-    Q_ALWAYS_INLINE Matrix4x4_AVX2 &operator+=(const Matrix4x4_AVX2 &other)
+    Q_ALWAYS_INLINE Matrix4x4_SSE &operator+=(const Matrix4x4_SSE &other)
     {
         *this = *this + other;
         return *this;
     }
 
-    Q_ALWAYS_INLINE Matrix4x4_AVX2 transposed() const
+    Q_ALWAYS_INLINE Matrix4x4_SSE transposed() const
     {
-        Matrix4x4_AVX2 c(Qt::Uninitialized);
+        Matrix4x4_SSE c(Qt::Uninitialized);
         const __m128 col1 = _mm256_extractf128_ps(m_col12, 0);
         const __m128 col2 = _mm256_extractf128_ps(m_col12, 1);
         const __m128 col3 = _mm256_extractf128_ps(m_col34, 0);
         const __m128 col4 = _mm256_extractf128_ps(m_col34, 1);
 
         // ~117 instructions
-        // Matrix4x4_AVX2 c = *this;
+        // Matrix4x4_SSE c = *this;
         // _MM_TRANSPOSE4_PS(c.m_col1, c.m_col2, c.m_col3, c.m_col4);
 
         // ~131 instructions - AVX2
@@ -279,14 +279,14 @@ public:
         return c;
     }
 
-    Q_ALWAYS_INLINE Matrix4x4_AVX2 inverted() const
+    Q_ALWAYS_INLINE Matrix4x4_SSE inverted() const
     {
         // TO DO: Optimize
         const QMatrix4x4 mat = toQMatrix4x4();
-        return Matrix4x4_AVX2(mat.inverted());
+        return Matrix4x4_SSE(mat.inverted());
     }
 
-    Q_ALWAYS_INLINE bool operator==(const Matrix4x4_AVX2 &other) const
+    Q_ALWAYS_INLINE bool operator==(const Matrix4x4_SSE &other) const
     {
         // cmp returns (-1, -1, -1, -1, -1, -1, -1, -1) if the two m256 are equals
         // movemask takes the most significant bits (8x 1 in this case) which equals 0xff
@@ -295,7 +295,7 @@ public:
 
     }
 
-    Q_ALWAYS_INLINE bool operator!=(const Matrix4x4_AVX2 &other) const
+    Q_ALWAYS_INLINE bool operator!=(const Matrix4x4_SSE &other) const
     {
         return !(*this == other);
     }
@@ -450,13 +450,13 @@ public:
                         Vector3D_SSE::dotProduct(row3, vector));
     }
 
-    friend Vector4D operator*(const Vector4D &vector, const Matrix4x4_AVX2 &matrix);
-    friend Vector4D operator*(const Matrix4x4_AVX2 &matrix, const Vector4D &vector);
+    friend Vector4D operator*(const Vector4D &vector, const Matrix4x4_SSE &matrix);
+    friend Vector4D operator*(const Matrix4x4_SSE &matrix, const Vector4D &vector);
 
-    friend Vector3D operator*(const Vector3D &vector, const Matrix4x4_AVX2 &matrix);
-    friend Vector3D operator*(const Matrix4x4_AVX2 &matrix, const Vector3D &vector);
+    friend Vector3D operator*(const Vector3D &vector, const Matrix4x4_SSE &matrix);
+    friend Vector3D operator*(const Matrix4x4_SSE &matrix, const Vector3D &vector);
 
-    friend Q_3DCORE_PRIVATE_EXPORT QDebug operator<<(QDebug dbg, const Matrix4x4_AVX2 &m);
+    friend Q_3DCORE_PRIVATE_EXPORT QDebug operator<<(QDebug dbg, const Matrix4x4_SSE &m);
 
 private:
     // column major order
@@ -476,7 +476,7 @@ private:
     __m256 m_col34;
 };
 
-Q_ALWAYS_INLINE Vector4D operator*(const Vector4D &vector, const Matrix4x4_AVX2 &matrix)
+Q_ALWAYS_INLINE Vector4D operator*(const Vector4D &vector, const Matrix4x4_SSE &matrix)
 {
     const __m256 vecMultiplier = _mm256_broadcast_ps(&vector.m_xyzw);
     // a1 a2 a3 a4 b1 b2 b3 b4, c1 c2 c3 c4 d1 d2 d3 d4
@@ -495,13 +495,13 @@ Q_ALWAYS_INLINE Vector4D operator*(const Vector4D &vector, const Matrix4x4_AVX2
     return v;
 }
 
-Q_ALWAYS_INLINE Vector4D operator*(const Matrix4x4_AVX2 &matrix, const Vector4D &vector)
+Q_ALWAYS_INLINE Vector4D operator*(const Matrix4x4_SSE &matrix, const Vector4D &vector)
 {
-    const Matrix4x4_AVX2 transposed = matrix.transposed();
+    const Matrix4x4_SSE transposed = matrix.transposed();
     return vector * transposed;
 }
 
-Q_ALWAYS_INLINE Vector3D operator*(const Vector3D &vector, const Matrix4x4_AVX2 &matrix)
+Q_ALWAYS_INLINE Vector3D operator*(const Vector3D &vector, const Matrix4x4_SSE &matrix)
 {
     const __m128 vec4 = _mm_set_ps(1.0f, vector.z(), vector.y(), vector.x());
     const __m256 vecMultiplier = _mm256_broadcast_ps(&vec4);
@@ -524,20 +524,20 @@ Q_ALWAYS_INLINE Vector3D operator*(const Vector3D &vector, const Matrix4x4_AVX2
     return v;
 }
 
-Q_ALWAYS_INLINE Vector3D operator*(const Matrix4x4_AVX2 &matrix, const Vector3D &vector)
+Q_ALWAYS_INLINE Vector3D operator*(const Matrix4x4_SSE &matrix, const Vector3D &vector)
 {
-    const Matrix4x4_AVX2 transposed = matrix.transposed();
+    const Matrix4x4_SSE transposed = matrix.transposed();
     return vector * transposed;
 }
 
 } // Qt3DCore
 
-Q_DECLARE_TYPEINFO(Qt3DCore::Matrix4x4_AVX2, Q_PRIMITIVE_TYPE);
+Q_DECLARE_TYPEINFO(Qt3DCore::Matrix4x4_SSE, Q_PRIMITIVE_TYPE);
 
 QT_END_NAMESPACE
 
-Q_DECLARE_METATYPE(Qt3DCore::Matrix4x4_AVX2)
+Q_DECLARE_METATYPE(Qt3DCore::Matrix4x4_SSE)
 
-#endif // QT_COMPILER_SUPPORTS_AVX
+#endif // __AVX2__
 
 #endif // QT3DCORE_MATRIX4X4_AVX2_P_H
diff --git a/src/core/transforms/matrix4x4_p.h b/src/core/transforms/matrix4x4_p.h
index ac3d02aad..3591597fb 100644
--- a/src/core/transforms/matrix4x4_p.h
+++ b/src/core/transforms/matrix4x4_p.h
@@ -16,20 +16,9 @@
 //
 
 #include <private/qsimd_p.h>
-#include <Qt3DCore/private/qt3dcore-config_p.h>
 
-
-// We check if sse or avx config option was enabled as it could
-// be disabled even though a given platform supports SSE2 or AVX2 instructions
-#if QT_CONFIG(qt3d_simd_avx2) && defined(__AVX2__) && defined(QT_COMPILER_SUPPORTS_AVX2)
-
-#include <Qt3DCore/private/matrix4x4_avx2_p.h>
-
-QT_BEGIN_NAMESPACE
-using Matrix4x4 = Qt3DCore::Matrix4x4_AVX2;
-QT_END_NAMESPACE
-
-#elif QT_CONFIG(qt3d_simd_sse2) && defined(__SSE2__) && defined(QT_COMPILER_SUPPORTS_SSE2)
+// Check if we can use the optimized version of QMatrix4x4
+#if defined(__SSE2__)
 
 #include <Qt3DCore/private/matrix4x4_sse_p.h>
 
diff --git a/src/core/transforms/matrix4x4_sse.cpp b/src/core/transforms/matrix4x4_sse.cpp
index 0ec0f3131..83ffeb4d2 100644
--- a/src/core/transforms/matrix4x4_sse.cpp
+++ b/src/core/transforms/matrix4x4_sse.cpp
@@ -3,7 +3,7 @@
 
 #include "matrix4x4_sse_p.h"
 
-#ifdef QT_COMPILER_SUPPORTS_SSE2
+#ifdef __SSE2__
 
 QT_BEGIN_NAMESPACE
 
@@ -25,4 +25,4 @@ QDebug operator<<(QDebug dbg, const Matrix4x4_SSE &m)
 
 QT_END_NAMESPACE
 
-#endif // QT_COMPILER_SUPPORTS_SSE2
+#endif // __SSE2__
diff --git a/src/core/transforms/matrix4x4_sse_p.h b/src/core/transforms/matrix4x4_sse_p.h
index 8762bec93..fc2381cb4 100644
--- a/src/core/transforms/matrix4x4_sse_p.h
+++ b/src/core/transforms/matrix4x4_sse_p.h
@@ -20,7 +20,9 @@
 #include <private/qsimd_p.h>
 #include <QMatrix4x4>
 
-#ifdef QT_COMPILER_SUPPORTS_SSE2
+#if defined(__AVX2__)
+#include "matrix4x4_avx2_p.h"
+#elif defined(__SSE2__)
 
 QT_BEGIN_NAMESPACE
 
@@ -464,6 +466,6 @@ QT_END_NAMESPACE
 
 Q_DECLARE_METATYPE(Qt3DCore::Matrix4x4_SSE)
 
-#endif // QT_COMPILER_SUPPORTS_SSE2
+#endif // __SSE2__
 
 #endif // QT3DCORE_MATRIX4X4_SSE_P_H
diff --git a/src/core/transforms/vector3d_p.h b/src/core/transforms/vector3d_p.h
index e204613ca..a9815a2a8 100644
--- a/src/core/transforms/vector3d_p.h
+++ b/src/core/transforms/vector3d_p.h
@@ -16,11 +16,9 @@
 //
 
 #include <private/qsimd_p.h>
-#include <Qt3DCore/private/qt3dcore-config_p.h>
 
-// We check if sse config option was enabled as it could
-// be disabled even though a given platform supports SSE2 instructions
-#if QT_CONFIG(qt3d_simd_sse2) && (defined(__AVX2__) || defined(__SSE2__)) && defined(QT_COMPILER_SUPPORTS_SSE2)
+// Check if we can use the optimized version of QVector3D
+#if defined(__SSE2__)
 
 #include <Qt3DCore/private/vector3d_sse_p.h>
 
diff --git a/src/core/transforms/vector3d_sse.cpp b/src/core/transforms/vector3d_sse.cpp
index b588fefd5..28590580f 100644
--- a/src/core/transforms/vector3d_sse.cpp
+++ b/src/core/transforms/vector3d_sse.cpp
@@ -1,19 +1,15 @@
 // Copyright (C) 2017 Klaralvdalens Datakonsult AB (KDAB).
 // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
 
+#include "vector3d_sse_p.h"
+
 #include <private/qsimd_p.h>
 
-#if defined(__AVX2__) && defined(QT_COMPILER_SUPPORTS_AVX2)
-#include "matrix4x4_avx2_p.h"
-#else
 #include "matrix4x4_sse_p.h"
-#endif
-
-#include "vector3d_sse_p.h"
 #include "vector4d_sse_p.h"
 #include <QDebug>
 
-#ifdef QT_COMPILER_SUPPORTS_SSE2
+#ifdef __SSE2__
 
 QT_BEGIN_NAMESPACE
 
@@ -30,41 +26,6 @@ Vector3D_SSE::Vector3D_SSE(const Vector4D_SSE &v)
     m_xyzw = _mm_mul_ps(v.m_xyzw, _mm_set_ps(0.0f, 1.0f, 1.0f, 1.0f));
 }
 
-#if defined(__AVX2__) && defined(QT_COMPILER_SUPPORTS_AVX2)
-
-Vector3D_SSE Vector3D_SSE::unproject(const Matrix4x4_AVX2 &modelView, const Matrix4x4_AVX2 &projection, const QRect &viewport) const
-{
-    const Matrix4x4_AVX2 inverse = (projection * modelView).inverted();
-
-    Vector4D_SSE tmp(*this, 1.0f);
-    tmp.setX((tmp.x() - float(viewport.x())) / float(viewport.width()));
-    tmp.setY((tmp.y() - float(viewport.y())) / float(viewport.height()));
-    tmp = tmp * 2.0f - Vector4D_SSE(1.0f, 1.0f, 1.0f, 1.0f);
-
-    Vector4D_SSE obj = inverse * tmp;
-    if (qFuzzyIsNull(obj.w()))
-        obj.setW(1.0f);
-    obj /= obj.w();
-    return Vector3D_SSE(obj);
-}
-
-Vector3D_SSE Vector3D_SSE::project(const Matrix4x4_AVX2 &modelView, const Matrix4x4_AVX2 &projection, const QRect &viewport) const
-{
-    Vector4D_SSE tmp(*this, 1.0f);
-    tmp = projection * modelView * tmp;
-    if (qFuzzyIsNull(tmp.w()))
-        tmp.setW(1.0f);
-    tmp /= tmp.w();
-
-    tmp = tmp * 0.5f + Vector4D_SSE(0.5f, 0.5f, 0.5f, 0.5f);
-    tmp.setX(tmp.x() * viewport.width() + viewport.x());
-    tmp.setY(tmp.y() * viewport.height() + viewport.y());
-
-    return Vector3D_SSE(tmp);
-}
-
-#else
-
 Vector3D_SSE Vector3D_SSE::unproject(const Matrix4x4_SSE &modelView, const Matrix4x4_SSE &projection, const QRect &viewport) const
 {
     const Matrix4x4_SSE inverse = (projection * modelView).inverted();
@@ -96,10 +57,8 @@ Vector3D_SSE Vector3D_SSE::project(const Matrix4x4_SSE &modelView, const Matrix4
     return Vector3D_SSE(tmp);
 }
 
-#endif
-
 } // Qt3DCore
 
 QT_END_NAMESPACE
 
-#endif // QT_COMPILER_SUPPORTS_SSE2
+#endif // __SSE2__
diff --git a/src/core/transforms/vector3d_sse_p.h b/src/core/transforms/vector3d_sse_p.h
index 98eb97dc2..e18f203ee 100644
--- a/src/core/transforms/vector3d_sse_p.h
+++ b/src/core/transforms/vector3d_sse_p.h
@@ -22,14 +22,13 @@
 #include <QDebug>
 #include <math.h>
 
-#ifdef QT_COMPILER_SUPPORTS_SSE2
+#ifdef __SSE2__
 
 QT_BEGIN_NAMESPACE
 
 namespace Qt3DCore {
 
 class Matrix4x4_SSE;
-class Matrix4x4_AVX2;
 class Vector4D_SSE;
 
 class Vector3D_SSE
@@ -142,13 +141,8 @@ public:
         return ((_mm_movemask_ps(_mm_cmpeq_ps(m_xyzw, _mm_set_ps1(0.0f))) & 0x7) == 0x7);
     }
 
-#if defined(__AVX2__) && defined(QT_COMPILER_SUPPORTS_AVX2)
-    Q_3DCORE_PRIVATE_EXPORT Vector3D_SSE unproject(const Matrix4x4_AVX2 &modelView, const Matrix4x4_AVX2 &projection, const QRect &viewport) const;
-    Q_3DCORE_PRIVATE_EXPORT Vector3D_SSE project(const Matrix4x4_AVX2 &modelView, const Matrix4x4_AVX2 &projection, const QRect &viewport) const;
-#else
     Q_3DCORE_PRIVATE_EXPORT Vector3D_SSE unproject(const Matrix4x4_SSE &modelView, const Matrix4x4_SSE &projection, const QRect &viewport) const;
     Q_3DCORE_PRIVATE_EXPORT Vector3D_SSE project(const Matrix4x4_SSE &modelView, const Matrix4x4_SSE &projection, const QRect &viewport) const;
-#endif
 
     Q_ALWAYS_INLINE float x() const { return _mm_cvtss_f32(m_xyzw); }
 
@@ -309,13 +303,6 @@ public:
     }
 
     friend class Vector4D_SSE;
-
-#if defined(__AVX2__) && defined(QT_COMPILER_SUPPORTS_AVX2)
-    friend class Matrix4x4_AVX2;
-    friend Vector3D_SSE operator*(const Vector3D_SSE &vector, const Matrix4x4_AVX2 &matrix);
-    friend Vector3D_SSE operator*(const Matrix4x4_AVX2 &matrix, const Vector3D_SSE &vector);
-#endif
-
     friend class Matrix4x4_SSE;
     friend Vector3D_SSE operator*(const Vector3D_SSE &vector, const Matrix4x4_SSE &matrix);
     friend Vector3D_SSE operator*(const Matrix4x4_SSE &matrix, const Vector3D_SSE &vector);
@@ -358,6 +345,6 @@ QT_END_NAMESPACE
 
 Q_DECLARE_METATYPE(Qt3DCore::Vector3D_SSE)
 
-#endif // QT_COMPILER_SUPPORTS_SSE2
+#endif // __SSE2__
 
 #endif // QT3DCORE_VECTOR3D_SSE_P_H
diff --git a/src/core/transforms/vector4d_p.h b/src/core/transforms/vector4d_p.h
index 242c4cdce..57391b790 100644
--- a/src/core/transforms/vector4d_p.h
+++ b/src/core/transforms/vector4d_p.h
@@ -16,11 +16,9 @@
 //
 
 #include <private/qsimd_p.h>
-#include <Qt3DCore/private/qt3dcore-config_p.h>
 
-// We check if sse config option was enabled as it could
-// be disabled even though a given platform supports SSE2 instructions
-#if QT_CONFIG(qt3d_simd_sse2) && (defined(__AVX2__) || defined(__SSE2__)) && defined(QT_COMPILER_SUPPORTS_SSE2)
+// Check if we can use the optimized version of QVector3D
+#if defined(__SSE2__)
 
 #include <Qt3DCore/private/vector4d_sse_p.h>
 
diff --git a/src/core/transforms/vector4d_sse.cpp b/src/core/transforms/vector4d_sse.cpp
index c422425c3..09f9106df 100644
--- a/src/core/transforms/vector4d_sse.cpp
+++ b/src/core/transforms/vector4d_sse.cpp
@@ -4,7 +4,7 @@
 #include "vector4d_sse_p.h"
 #include <QDebug>
 
-#ifdef QT_COMPILER_SUPPORTS_SSE2
+#ifdef __SSE2__
 
 QT_BEGIN_NAMESPACE
 
@@ -20,4 +20,4 @@ QDebug operator<<(QDebug dbg, const Vector4D_SSE &v)
 
 QT_END_NAMESPACE
 
-#endif // QT_COMPILER_SUPPORTS_SSE2
+#endif // __SSE2__
diff --git a/src/core/transforms/vector4d_sse_p.h b/src/core/transforms/vector4d_sse_p.h
index fbc7ef309..e50a08725 100644
--- a/src/core/transforms/vector4d_sse_p.h
+++ b/src/core/transforms/vector4d_sse_p.h
@@ -18,14 +18,13 @@
 #include <Qt3DCore/private/vector3d_p.h>
 #include <QtGui/qvector4d.h>
 
-#ifdef QT_COMPILER_SUPPORTS_SSE2
+#ifdef __SSE2__
 
 QT_BEGIN_NAMESPACE
 
 namespace Qt3DCore {
 
 class Matrix4x4_SSE;
-class Matrix4x4_AVX2;
 
 class Vector4D_SSE
 {
@@ -306,13 +305,6 @@ public:
     }
 
     friend class Matrix4x4_SSE;
-
-#ifdef __AVX2__
-    friend class Matrix4x4_AVX2;
-    friend Vector4D_SSE operator*(const Vector4D_SSE &vector, const Matrix4x4_AVX2 &matrix);
-    friend Vector4D_SSE operator*(const Matrix4x4_AVX2 &matrix, const Vector4D_SSE &vector);
-#endif
-
     friend class Vector3D_SSE;
     friend Vector4D_SSE operator*(const Vector4D_SSE &vector, const Matrix4x4_SSE &matrix);
     friend Vector4D_SSE operator*(const Matrix4x4_SSE  &matrix, const Vector4D_SSE &vector);
@@ -356,6 +348,6 @@ QT_END_NAMESPACE
 
 Q_DECLARE_METATYPE(Qt3DCore::Vector4D_SSE)
 
-#endif // QT_COMPILER_SUPPORTS_SSE2
+#endif // __SSE2__
 
 #endif // QT3DCORE_VECTOR4D_SSE_P_H
diff --git a/src/render/backend/uniform_p.h b/src/render/backend/uniform_p.h
index 30fbcc51f..b585ce39d 100644
--- a/src/render/backend/uniform_p.h
+++ b/src/render/backend/uniform_p.h
@@ -111,7 +111,7 @@ public:
     }
 
     // We don t want the QMatrix4x4 builder to use sizeof since QMatrix4x4 contains a type flag
-#if defined(__SSE2__) || defined(__AVX2__)
+#if defined(__SSE2__)
     UniformValue(const Matrix4x4 &mat44)
         : m_data(sizeof(Matrix4x4) / sizeof(float))
     {
author	Thiago Macieira <thiago.macieira@intel.com>	2022-11-23 12:37:00 -0800
committer	Thiago Macieira <thiago.macieira@intel.com>	2023-01-19 19:18:52 -0800
commit	00cb8afafb2867b348aa529c0702252d12d1b4d6 (patch)
tree	e173cbc730f713f44aaeaf399383617493549329 /src
parent	15e4fc5daeeec5eeb0c1ccb0ce6f76b5b284e2c7 (diff)