diff options
author | Alexey Bataev <a.bataev@outlook.com> | 2024-04-03 13:10:16 -0700 |
---|---|---|
committer | Alexey Bataev <a.bataev@outlook.com> | 2024-04-03 13:10:16 -0700 |
commit | fa2bbea14df3273b3403f34cc295c56233fdbd0d (patch) | |
tree | cae18c19a634c9e70f54ae53893be9b14a388dbc | |
parent | 899855d2b11856a44e530fffe854d76be69b9008 (diff) |
Revert "[SLP]Improve minbitwidth analysis for operands of IToFP and ICmp instructions."
This reverts commit 899855d2b11856a44e530fffe854d76be69b9008 to fix the
issue reported in https://lab.llvm.org/buildbot/#/builders/165/builds/51659.
3 files changed, 16 insertions, 48 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 779c7b7f08b7..9b87e6e11e06 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1107,7 +1107,7 @@ public: MinBWs.clear(); ReductionBitWidth = 0; CastMaxMinBWSizes.reset(); - ExtraBitWidthNodes.clear(); + TruncNodes.clear(); InstrElementSize.clear(); UserIgnoreList = nullptr; PostponedGathers.clear(); @@ -3683,9 +3683,8 @@ private: /// type sizes, used in the tree. std::optional<std::pair<unsigned, unsigned>> CastMaxMinBWSizes; - /// Indices of the vectorized nodes, which supposed to be the roots of the new - /// bitwidth analysis attempt, like trunc, IToFP or ICmp. - DenseSet<unsigned> ExtraBitWidthNodes; + /// Indices of the vectorized trunc nodes. + DenseSet<unsigned> TruncNodes; }; } // end namespace slpvectorizer @@ -6613,18 +6612,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, PrevMaxBW), std::min<unsigned>(DL->getTypeSizeInBits(VL0->getType()), PrevMinBW)); - ExtraBitWidthNodes.insert(VectorizableTree.size() + 1); - } else if (ShuffleOrOp == Instruction::SIToFP || - ShuffleOrOp == Instruction::UIToFP) { - unsigned NumSignBits = - ComputeNumSignBits(VL0->getOperand(0), *DL, 0, AC, nullptr, DT); - if (auto *OpI = dyn_cast<Instruction>(VL0->getOperand(0))) { - APInt Mask = DB->getDemandedBits(OpI); - NumSignBits = std::max(NumSignBits, Mask.countl_zero()); - } - if (NumSignBits * 2 >= - DL->getTypeSizeInBits(VL0->getOperand(0)->getType())) - ExtraBitWidthNodes.insert(VectorizableTree.size() + 1); + TruncNodes.insert(VectorizableTree.size()); } TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); @@ -6672,18 +6660,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, TE->setOperand(1, Right); buildTree_rec(Left, Depth + 1, {TE, 0}); buildTree_rec(Right, Depth + 1, {TE, 1}); - if (ShuffleOrOp == Instruction::ICmp) { - unsigned NumSignBits0 = - ComputeNumSignBits(VL0->getOperand(0), *DL, 0, AC, nullptr, DT); - if (NumSignBits0 * 2 >= - DL->getTypeSizeInBits(VL0->getOperand(0)->getType())) - ExtraBitWidthNodes.insert(getOperandEntry(TE, 0)->Idx); - unsigned NumSignBits1 = - ComputeNumSignBits(VL0->getOperand(1), *DL, 0, AC, nullptr, DT); - if (NumSignBits1 * 2 >= - DL->getTypeSizeInBits(VL0->getOperand(1)->getType())) - ExtraBitWidthNodes.insert(getOperandEntry(TE, 1)->Idx); - } return; } case Instruction::Select: @@ -14326,8 +14302,7 @@ void BoUpSLP::computeMinimumValueSizes() { bool IsStoreOrInsertElt = VectorizableTree.front()->getOpcode() == Instruction::Store || VectorizableTree.front()->getOpcode() == Instruction::InsertElement; - if ((IsStoreOrInsertElt || UserIgnoreList) && - ExtraBitWidthNodes.size() <= 1 && + if ((IsStoreOrInsertElt || UserIgnoreList) && TruncNodes.size() <= 1 && (!CastMaxMinBWSizes || CastMaxMinBWSizes->second == 0 || CastMaxMinBWSizes->first / CastMaxMinBWSizes->second <= 2)) return; @@ -14531,21 +14506,16 @@ void BoUpSLP::computeMinimumValueSizes() { IsTopRoot = false; IsProfitableToDemoteRoot = true; - if (ExtraBitWidthNodes.empty()) { + if (TruncNodes.empty()) { NodeIdx = VectorizableTree.size(); } else { unsigned NewIdx = 0; do { - NewIdx = *ExtraBitWidthNodes.begin(); - ExtraBitWidthNodes.erase(ExtraBitWidthNodes.begin()); - } while (NewIdx <= NodeIdx && !ExtraBitWidthNodes.empty()); + NewIdx = *TruncNodes.begin() + 1; + TruncNodes.erase(TruncNodes.begin()); + } while (NewIdx <= NodeIdx && !TruncNodes.empty()); NodeIdx = NewIdx; - IsTruncRoot = any_of( - VectorizableTree[NewIdx]->UserTreeIndices, [](const EdgeInfo &EI) { - return EI.EdgeIdx == 0 && - EI.UserTE->getOpcode() == Instruction::ICmp && - !EI.UserTE->isAltShuffle(); - }); + IsTruncRoot = true; } // If the maximum bit width we compute is less than the with of the roots' diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll index e1fd8a7ec88a..fc28d7ab4ee7 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll @@ -19,8 +19,8 @@ define i1 @test(ptr noalias %0, i64 %1, ptr noalias %p, ptr %p1) { ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <2 x i24> [[TMP8]], <i24 24, i24 24> ; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP9]], <2 x i24> <i24 23, i24 23>, <2 x i24> [[TMP8]] ; CHECK-NEXT: [[TMP23:%.*]] = trunc <2 x i24> [[TMP10]] to <2 x i8> -; CHECK-NEXT: [[TMP26:%.*]] = zext <2 x i8> [[TMP23]] to <2 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = and <2 x i32> [[TMP26]], <i32 254, i32 254> +; CHECK-NEXT: [[TMP11:%.*]] = zext <2 x i8> [[TMP23]] to <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = and <2 x i32> [[TMP11]], <i32 254, i32 254> ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq <2 x i32> [[TMP12]], <i32 4, i32 4> ; CHECK-NEXT: [[TMP25:%.*]] = select <2 x i1> [[TMP13]], <2 x i8> <i8 2, i8 2>, <2 x i8> [[TMP23]] ; CHECK-NEXT: [[TMP14:%.*]] = zext <2 x i8> [[TMP25]] to <2 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-node-with-multi-users.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-node-with-multi-users.ll index 668d3c3c8c82..136ab6400773 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-node-with-multi-users.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-node-with-multi-users.ll @@ -10,14 +10,12 @@ define void @test() { ; CHECK-NEXT: [[TMP3:%.*]] = select i1 false, i32 0, i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i8> <i8 poison, i8 0, i8 poison, i8 poison>, i8 [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1> -; CHECK-NEXT: [[TMP15:%.*]] = trunc <4 x i8> [[TMP5]] to <4 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i8> [[TMP5]] to <4 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i8> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = trunc <4 x i8> [[TMP8]] to <4 x i1> -; CHECK-NEXT: [[TMP10:%.*]] = or <4 x i1> zeroinitializer, [[TMP15]] -; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x i1> [[TMP9]], [[TMP10]] -; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i1> [[TMP15]], <4 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> -; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i1> [[TMP16]] to <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = sext <4 x i8> [[TMP8]] to <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = or <4 x i32> zeroinitializer, [[TMP6]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x i32> [[TMP9]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, <4 x i32> <i32 4, i32 5, i32 2, i32 7> ; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP13]]) |