From 49a110db4c43835681bb89671f8f73c8d8c7c28c Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 17 Apr 2012 05:16:56 +0000 Subject: Convert vperm2f128 and vperm2i128 intrinsics back to using llvm intrinsics. Unfortunately, these instructions have behavior that can't be modeled with shuffle vector. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@154906 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/BuiltinsX86.def | 4 ++++ lib/Headers/avx2intrin.h | 6 +----- lib/Headers/avxintrin.h | 26 +++----------------------- test/CodeGen/avx-shuffle-builtins.c | 6 +++--- test/CodeGen/avx2-builtins.c | 2 +- test/CodeGen/builtins-x86.c | 3 +++ 6 files changed, 15 insertions(+), 32 deletions(-) diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def index 9eddce2427..4aea980a9d 100644 --- a/include/clang/Basic/BuiltinsX86.def +++ b/include/clang/Basic/BuiltinsX86.def @@ -412,6 +412,9 @@ BUILTIN(__builtin_ia32_cvtps2pd256, "V4dV4f", "") BUILTIN(__builtin_ia32_cvttpd2dq256, "V4iV4d", "") BUILTIN(__builtin_ia32_cvtpd2dq256, "V4iV4d", "") BUILTIN(__builtin_ia32_cvttps2dq256, "V8iV8f", "") +BUILTIN(__builtin_ia32_vperm2f128_pd256, "V4dV4dV4dIc", "") +BUILTIN(__builtin_ia32_vperm2f128_ps256, "V8fV8fV8fIc", "") +BUILTIN(__builtin_ia32_vperm2f128_si256, "V8iV8iV8iIc", "") BUILTIN(__builtin_ia32_vinsertf128_pd256, "V4dV4dV2dIc", "") BUILTIN(__builtin_ia32_vinsertf128_ps256, "V8fV8fV4fIc", "") BUILTIN(__builtin_ia32_vinsertf128_si256, "V8iV8iV4iIc", "") @@ -561,6 +564,7 @@ BUILTIN(__builtin_ia32_pbroadcastd128, "V4iV4i", "") BUILTIN(__builtin_ia32_pbroadcastq128, "V2LLiV2LLi", "") BUILTIN(__builtin_ia32_permvarsi256, "V8iV8iV8i", "") BUILTIN(__builtin_ia32_permvarsf256, "V8fV8fV8f", "") +BUILTIN(__builtin_ia32_permti256, "V4LLiV4LLiV4LLiIc", "") BUILTIN(__builtin_ia32_extract128i256, "V2LLiV4LLiIc", "") BUILTIN(__builtin_ia32_insert128i256, "V4LLiV4LLiV2LLiIc", "") BUILTIN(__builtin_ia32_maskloadd256, "V8iV8iC*V8i", "") diff --git a/lib/Headers/avx2intrin.h b/lib/Headers/avx2intrin.h index d6fef59a20..884c46d4d6 100644 --- a/lib/Headers/avx2intrin.h +++ b/lib/Headers/avx2intrin.h @@ -841,11 +841,7 @@ _mm256_permutevar8x32_ps(__m256 a, __m256 b) #define _mm256_permute2x128_si256(V1, V2, M) __extension__ ({ \ __m256i __V1 = (V1); \ __m256i __V2 = (V2); \ - __builtin_shufflevector(__V1, __V2, \ - ((M) & 0x3) * 2, \ - ((M) & 0x3) * 2 + 1, \ - (((M) & 0x30) >> 4) * 2, \ - (((M) & 0x30) >> 4) * 2 + 1); }) + (__m256i)__builtin_ia32_permti256(__V1, __V2, (M)); }) #define _mm256_extracti128_si256(A, O) __extension__ ({ \ __m256i __A = (A); \ diff --git a/lib/Headers/avxintrin.h b/lib/Headers/avxintrin.h index 7a0ec3fbd6..ee7f83572f 100644 --- a/lib/Headers/avxintrin.h +++ b/lib/Headers/avxintrin.h @@ -289,37 +289,17 @@ _mm256_permutevar_ps(__m256 a, __m256i c) #define _mm256_permute2f128_pd(V1, V2, M) __extension__ ({ \ __m256d __V1 = (V1); \ __m256d __V2 = (V2); \ - (__m256d)__builtin_shufflevector((__v4df)__V1, (__v4df)__V2, \ - ((M) & 0x3) * 2, \ - ((M) & 0x3) * 2 + 1, \ - (((M) & 0x30) >> 4) * 2, \ - (((M) & 0x30) >> 4) * 2 + 1); }) + (__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)__V1, (__v4df)__V2, (M)); }) #define _mm256_permute2f128_ps(V1, V2, M) __extension__ ({ \ __m256 __V1 = (V1); \ __m256 __V2 = (V2); \ - (__m256)__builtin_shufflevector((__v8sf)__V1, (__v8sf)__V2, \ - ((M) & 0x3) * 4, \ - ((M) & 0x3) * 4 + 1, \ - ((M) & 0x3) * 4 + 2, \ - ((M) & 0x3) * 4 + 3, \ - (((M) & 0x30) >> 4) * 4, \ - (((M) & 0x30) >> 4) * 4 + 1, \ - (((M) & 0x30) >> 4) * 4 + 2, \ - (((M) & 0x30) >> 4) * 4 + 3); }) + (__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)__V1, (__v8sf)__V2, (M)); }) #define _mm256_permute2f128_si256(V1, V2, M) __extension__ ({ \ __m256i __V1 = (V1); \ __m256i __V2 = (V2); \ - (__m256i)__builtin_shufflevector((__v8si)__V1, (__v8si)__V2, \ - ((M) & 0x3) * 4, \ - ((M) & 0x3) * 4 + 1, \ - ((M) & 0x3) * 4 + 2, \ - ((M) & 0x3) * 4 + 3, \ - (((M) & 0x30) >> 4) * 4, \ - (((M) & 0x30) >> 4) * 4 + 1, \ - (((M) & 0x30) >> 4) * 4 + 2, \ - (((M) & 0x30) >> 4) * 4 + 3); }) + (__m256i)__builtin_ia32_vperm2f128_si256((__v8si)__V1, (__v8si)__V2, (M)); }) /* Vector Blend */ #define _mm256_blend_pd(V1, V2, M) __extension__ ({ \ diff --git a/test/CodeGen/avx-shuffle-builtins.c b/test/CodeGen/avx-shuffle-builtins.c index 538ae50c93..d071f825aa 100644 --- a/test/CodeGen/avx-shuffle-builtins.c +++ b/test/CodeGen/avx-shuffle-builtins.c @@ -48,18 +48,18 @@ __m256 test_mm256_permute_ps(__m256 a) { __m256d test_mm256_permute2f128_pd(__m256d a, __m256d b) { // Check if the mask is correct - // CHECK: shufflevector{{.*}} + // CHECK: @llvm.x86.avx.vperm2f128.pd.256 return _mm256_permute2f128_pd(a, b, 0x31); } __m256 test_mm256_permute2f128_ps(__m256 a, __m256 b) { // Check if the mask is correct - // CHECK: shufflevector{{.*}} + // CHECK: @llvm.x86.avx.vperm2f128.ps.256 return _mm256_permute2f128_ps(a, b, 0x13); } __m256i test_mm256_permute2f128_si256(__m256i a, __m256i b) { // Check if the mask is correct - // CHECK: shufflevector{{.*}} + // CHECK: @llvm.x86.avx.vperm2f128.si.256 return _mm256_permute2f128_si256(a, b, 0x20); } diff --git a/test/CodeGen/avx2-builtins.c b/test/CodeGen/avx2-builtins.c index e56e55d205..7d166b5626 100644 --- a/test/CodeGen/avx2-builtins.c +++ b/test/CodeGen/avx2-builtins.c @@ -677,7 +677,7 @@ __m256i test_mm256_permute4x64_epi64(__m256i a) { } __m256i test_mm256_permute2x128_si256(__m256i a, __m256i b) { - // CHECK: shufflevector{{.*}} + // CHECK: @llvm.x86.avx2.vperm2i128 return _mm256_permute2x128_si256(a, b, 0x31); } diff --git a/test/CodeGen/builtins-x86.c b/test/CodeGen/builtins-x86.c index 30138d6374..acb5554db4 100644 --- a/test/CodeGen/builtins-x86.c +++ b/test/CodeGen/builtins-x86.c @@ -414,6 +414,9 @@ void f0() { tmp_V4i = __builtin_ia32_cvttpd2dq256(tmp_V4d); tmp_V4i = __builtin_ia32_cvtpd2dq256(tmp_V4d); tmp_V8i = __builtin_ia32_cvttps2dq256(tmp_V8f); + tmp_V4d = __builtin_ia32_vperm2f128_pd256(tmp_V4d, tmp_V4d, 0x7); + tmp_V8f = __builtin_ia32_vperm2f128_ps256(tmp_V8f, tmp_V8f, 0x7); + tmp_V8i = __builtin_ia32_vperm2f128_si256(tmp_V8i, tmp_V8i, 0x7); tmp_V4d = __builtin_ia32_vinsertf128_pd256(tmp_V4d, tmp_V2d, 0x7); tmp_V8f = __builtin_ia32_vinsertf128_ps256(tmp_V8f, tmp_V4f, 0x7); tmp_V8i = __builtin_ia32_vinsertf128_si256(tmp_V8i, tmp_V4i, 0x7); -- cgit v1.2.3