summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHans Wennborg <hans@hanshq.net>2019-02-18 11:21:42 +0000
committerHans Wennborg <hans@hanshq.net>2019-02-18 11:21:42 +0000
commit28a9d6338253d6dbe6048c5ff686536a1eb92005 (patch)
tree83c30017054d5b28fb9a5f7cced167f0d75fbe43
parent2a7ad002c0420704d1b6736a3eec10882429827f (diff)
Merging r354034 and r354117:
------------------------------------------------------------------------ r354034 | rksimon | 2019-02-14 15:45:32 +0100 (Thu, 14 Feb 2019) | 1 line [X86][AVX] Add PR40730 test case ------------------------------------------------------------------------ ------------------------------------------------------------------------ r354117 | rksimon | 2019-02-15 12:39:21 +0100 (Fri, 15 Feb 2019) | 9 lines [X86][AVX] lowerShuffleAsLanePermuteAndPermute - fully populate the lane shuffle mask (PR40730) As detailed on PR40730, we are not correctly filling in the lane shuffle mask (D53148/rL344446) - we fill in for the correct src lane but don't add it to the correct mask element, so any reference to the correct element is likely to see an UNDEF mask index. This allows constant folding to propagate UNDEFs prior to the lane mask being (correctly) lowered to vperm2f128. This patch fixes the issue by fully populating the lane shuffle mask - this is more than is necessary (if we only filled in the required mask elements we might be able to match other shuffle instructions - broadcasts etc.), but its the most cautious approach as this needs to be cherrypicked into the 8.0.0 release branch. Differential Revision: https://reviews.llvm.org/D58237 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_80@354260 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp13
-rw-r--r--test/CodeGen/X86/pr40730.ll36
2 files changed, 47 insertions, 2 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index f4f37a894620..e1a6d22f0616 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -13884,7 +13884,6 @@ static SDValue lowerVectorShuffleAsLanePermuteAndPermute(
int NumEltsPerLane = NumElts / NumLanes;
SmallVector<int, 4> SrcLaneMask(NumLanes, SM_SentinelUndef);
- SmallVector<int, 16> LaneMask(NumElts, SM_SentinelUndef);
SmallVector<int, 16> PermMask(NumElts, SM_SentinelUndef);
for (int i = 0; i != NumElts; ++i) {
@@ -13899,10 +13898,20 @@ static SDValue lowerVectorShuffleAsLanePermuteAndPermute(
return SDValue();
SrcLaneMask[DstLane] = SrcLane;
- LaneMask[i] = (SrcLane * NumEltsPerLane) + (i % NumEltsPerLane);
PermMask[i] = (DstLane * NumEltsPerLane) + (M % NumEltsPerLane);
}
+ // Make sure we set all elements of the lane mask, to avoid undef propagation.
+ SmallVector<int, 16> LaneMask(NumElts, SM_SentinelUndef);
+ for (int DstLane = 0; DstLane != NumLanes; ++DstLane) {
+ int SrcLane = SrcLaneMask[DstLane];
+ if (0 <= SrcLane)
+ for (int j = 0; j != NumEltsPerLane; ++j) {
+ LaneMask[(DstLane * NumEltsPerLane) + j] =
+ (SrcLane * NumEltsPerLane) + j;
+ }
+ }
+
// If we're only shuffling a single lowest lane and the rest are identity
// then don't bother.
// TODO - isShuffleMaskInputInPlace could be extended to something like this.
diff --git a/test/CodeGen/X86/pr40730.ll b/test/CodeGen/X86/pr40730.ll
new file mode 100644
index 000000000000..12b372dea33b
--- /dev/null
+++ b/test/CodeGen/X86/pr40730.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
+
+define <8 x i32> @shuffle_v8i32_0dcd3f14(<8 x i32> %a, <8 x i32> %b) {
+; CHECK-LABEL: shuffle_v8i32_0dcd3f14:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2
+; CHECK-NEXT: vblendps {{.*#+}} xmm2 = xmm2[0],xmm0[1,2,3]
+; CHECK-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,1,0]
+; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; CHECK-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,2,3]
+; CHECK-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,0,3,2]
+; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5],ymm0[6,7]
+; CHECK-NEXT: retq
+ %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 13, i32 12, i32 13, i32 3, i32 15, i32 1, i32 4>
+ ret <8 x i32> %shuffle
+}
+
+; CHECK: .LCPI1_0:
+; CHECK-NEXT: .quad 60129542157
+; CHECK-NEXT: .quad 60129542157
+; CHECK-NEXT: .quad 68719476736
+; CHECK-NEXT: .quad 60129542157
+
+define <8 x i32> @shuffle_v8i32_0dcd3f14_constant(<8 x i32> %a0) {
+; CHECK-LABEL: shuffle_v8i32_0dcd3f14_constant:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3]
+; CHECK-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,1,0]
+; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],mem[1,2,3],ymm0[4],mem[5],ymm0[6,7]
+; CHECK-NEXT: retq
+ %res = shufflevector <8 x i32> %a0, <8 x i32> <i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>, <8 x i32> <i32 0, i32 13, i32 12, i32 13, i32 3, i32 15, i32 1, i32 4>
+ ret <8 x i32> %res
+}