From 765cac3cba88cb13d029fa7c8fb12ed57de49b51 Mon Sep 17 00:00:00 2001 From: Paul Lemire Date: Mon, 19 Aug 2019 15:38:15 +0200 Subject: Matrix_SSE/Matrix_AVX2: fix mapVector Change-Id: I4584d2c879a72eccbaf273d0e84b3b6f6bb55295 Task-number: QTBUG-77675 Reviewed-by: Mike Krus --- src/core/transforms/matrix4x4_avx2_p.h | 14 +++---- src/core/transforms/matrix4x4_sse_p.h | 14 +++---- .../core/matrix4x4_avx2/tst_matrix4x4_avx2.cpp | 49 ++++++++++++++++++++++ .../auto/core/matrix4x4_sse/tst_matrix4x4_sse.cpp | 49 ++++++++++++++++++++++ 4 files changed, 110 insertions(+), 16 deletions(-) diff --git a/src/core/transforms/matrix4x4_avx2_p.h b/src/core/transforms/matrix4x4_avx2_p.h index 363aaa6e9..329ff4610 100644 --- a/src/core/transforms/matrix4x4_avx2_p.h +++ b/src/core/transforms/matrix4x4_avx2_p.h @@ -473,15 +473,13 @@ public: Vector3D_SSE mapVector(const Vector3D_SSE &vector) const { - const __m128 row1 = _mm_set_ps(0.0f, m13(), m12(), m11()); - const __m128 row2 = _mm_set_ps(0.0f, m23(), m22(), m21()); - const __m128 row3 = _mm_set_ps(0.0f, m33(), m32(), m31()); + const Vector3D_SSE row1(m11(), m12(), m13()); + const Vector3D_SSE row2(m21(), m22(), m23()); + const Vector3D_SSE row3(m31(), m32(), m33()); - const __m128 tmp = _mm_add_ps(_mm_mul_ps(vector.m_xyzw, row1), _mm_mul_ps(vector.m_xyzw, row2)); - - Vector3D_SSE v(Qt::Uninitialized); - v.m_xyzw = _mm_add_ps(tmp, _mm_mul_ps(vector.m_xyzw, row3)); - return v; + return Vector3D(Vector3D_SSE::dotProduct(row1, vector), + Vector3D_SSE::dotProduct(row2, vector), + Vector3D_SSE::dotProduct(row3, vector)); } friend Vector4D operator*(const Vector4D &vector, const Matrix4x4_AVX2 &matrix); diff --git a/src/core/transforms/matrix4x4_sse_p.h b/src/core/transforms/matrix4x4_sse_p.h index be314ca4d..8ba7f8e17 100644 --- a/src/core/transforms/matrix4x4_sse_p.h +++ b/src/core/transforms/matrix4x4_sse_p.h @@ -356,15 +356,13 @@ public: Q_ALWAYS_INLINE Vector3D_SSE mapVector(const Vector3D_SSE &vector) const { - const __m128 row1 = _mm_set_ps(0.0f, m13(), m12(), m11()); - const __m128 row2 = _mm_set_ps(0.0f, m23(), m22(), m21()); - const __m128 row3 = _mm_set_ps(0.0f, m33(), m32(), m31()); + const Vector3D_SSE row1(m11(), m12(), m13()); + const Vector3D_SSE row2(m21(), m22(), m23()); + const Vector3D_SSE row3(m31(), m32(), m33()); - const __m128 tmp = _mm_add_ps(_mm_mul_ps(vector.m_xyzw, row1), _mm_mul_ps(vector.m_xyzw, row2)); - - Vector3D_SSE v(Qt::Uninitialized); - v.m_xyzw = _mm_add_ps(tmp, _mm_mul_ps(vector.m_xyzw, row3)); - return v; + return Vector3D(Vector3D_SSE::dotProduct(row1, vector), + Vector3D_SSE::dotProduct(row2, vector), + Vector3D_SSE::dotProduct(row3, vector)); } friend Q_ALWAYS_INLINE Vector4D operator*(const Vector4D &vector, const Matrix4x4_SSE &matrix); diff --git a/tests/auto/core/matrix4x4_avx2/tst_matrix4x4_avx2.cpp b/tests/auto/core/matrix4x4_avx2/tst_matrix4x4_avx2.cpp index 144df6711..12eaab263 100644 --- a/tests/auto/core/matrix4x4_avx2/tst_matrix4x4_avx2.cpp +++ b/tests/auto/core/matrix4x4_avx2/tst_matrix4x4_avx2.cpp @@ -474,6 +474,55 @@ private Q_SLOTS: QCOMPARE(row.w(), 44.0f); } } + + void checkVectorMapVector() + { + { + // GIVEN + QMatrix4x4 tmpMat; + QVector3D tmpVec3(1.0f, 0.0f, 0.0f); + tmpMat.rotate(90.f, 0.f, 1.f, 0.f); + + Matrix4x4_AVX2 mat(tmpMat); + Vector3D vec3(tmpVec3); + + // WHEN + const Vector3D resultingVec = mat.mapVector(vec3); + + // THEN + QCOMPARE(resultingVec.toQVector3D(), tmpMat.mapVector(tmpVec3)); + } + { + // GIVEN + QMatrix4x4 tmpMat; + QVector3D tmpVec3(0.0f, 0.0f, -1.0f); + tmpMat.rotate(90.f, 0.f, 1.f, 0.f); + + Matrix4x4_AVX2 mat(tmpMat); + Vector3D vec3(tmpVec3); + + // WHEN + const Vector3D resultingVec = mat.mapVector(vec3); + + // THEN + QCOMPARE(resultingVec.toQVector3D(), tmpMat.mapVector(tmpVec3)); + } + { + // GIVEN + QMatrix4x4 tmpMat; + QVector3D tmpVec3(3.0f, -3.0f, -1.0f); + tmpMat.rotate(90.f, 0.33f, 0.33f, 0.33f); + + Matrix4x4_AVX2 mat(tmpMat); + Vector3D vec3(tmpVec3); + + // WHEN + const Vector3D resultingVec = mat.mapVector(vec3); + + // THEN + QCOMPARE(resultingVec.toQVector3D(), tmpMat.mapVector(tmpVec3)); + } + } }; QTEST_MAIN(tst_Matrix4x4_AVX2) diff --git a/tests/auto/core/matrix4x4_sse/tst_matrix4x4_sse.cpp b/tests/auto/core/matrix4x4_sse/tst_matrix4x4_sse.cpp index bbd6596d4..dccf90d10 100644 --- a/tests/auto/core/matrix4x4_sse/tst_matrix4x4_sse.cpp +++ b/tests/auto/core/matrix4x4_sse/tst_matrix4x4_sse.cpp @@ -471,6 +471,55 @@ private Q_SLOTS: QCOMPARE(row.w(), 44.0f); } } + + void checkVectorMapVector() + { + { + // GIVEN + QMatrix4x4 tmpMat; + QVector3D tmpVec3(1.0f, 0.0f, 0.0f); + tmpMat.rotate(90.f, 0.f, 1.f, 0.f); + + Matrix4x4_SSE mat(tmpMat); + Vector3D vec3(tmpVec3); + + // WHEN + const Vector3D resultingVec = mat.mapVector(vec3); + + // THEN + QCOMPARE(resultingVec.toQVector3D(), tmpMat.mapVector(tmpVec3)); + } + { + // GIVEN + QMatrix4x4 tmpMat; + QVector3D tmpVec3(0.0f, 0.0f, -1.0f); + tmpMat.rotate(90.f, 0.f, 1.f, 0.f); + + Matrix4x4_SSE mat(tmpMat); + Vector3D vec3(tmpVec3); + + // WHEN + const Vector3D resultingVec = mat.mapVector(vec3); + + // THEN + QCOMPARE(resultingVec.toQVector3D(), tmpMat.mapVector(tmpVec3)); + } + { + // GIVEN + QMatrix4x4 tmpMat; + QVector3D tmpVec3(3.0f, -3.0f, -1.0f); + tmpMat.rotate(90.f, 0.33f, 0.33f, 0.33f); + + Matrix4x4_SSE mat(tmpMat); + Vector3D vec3(tmpVec3); + + // WHEN + const Vector3D resultingVec = mat.mapVector(vec3); + + // THEN + QCOMPARE(resultingVec.toQVector3D(), tmpMat.mapVector(tmpVec3)); + } + } }; QTEST_MAIN(tst_Matrix4x4_SSE) -- cgit v1.2.3