summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@outlook.com>2024-04-23 06:21:15 -0700
committerAlexey Bataev <a.bataev@outlook.com>2024-04-23 06:45:43 -0700
commitb4a0fd40f1b94eac571d29ee7695b492934d9bfc (patch)
tree35442f2bded538a74a950ff82154de48f230db4e
parente0a763c490d8ef58dca867e0ef834978ccf8e17d (diff)
[SLP]Fix PR89635: do not try to vectorize single-gather alternate node.
No need to try to vectorize single gather/buildvector with alternate opcode graph, it is not profitable. In other cases, need to use last instruction for inserting the vectorized code.
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp6
-rw-r--r--llvm/test/Transforms/SLPVectorizer/X86/alternate-opcode-sindle-bv.ll25
2 files changed, 30 insertions, 1 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 6ac380a6ab6c..a1a28076881c 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -9640,6 +9640,7 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const {
bool IsAllowedSingleBVNode =
VectorizableTree.size() > 1 ||
(VectorizableTree.size() == 1 && VectorizableTree.front()->getOpcode() &&
+ !VectorizableTree.front()->isAltShuffle() &&
VectorizableTree.front()->getOpcode() != Instruction::PHI &&
VectorizableTree.front()->getOpcode() != Instruction::GetElementPtr &&
allSameBlock(VectorizableTree.front()->Scalars));
@@ -11032,7 +11033,10 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
isUsedOutsideBlock(V);
}) ||
(E->State == TreeEntry::NeedToGather && E->Idx == 0 &&
- all_of(E->Scalars, IsaPred<ExtractElementInst, UndefValue>)))
+ all_of(E->Scalars, [](Value *V) {
+ return isa<ExtractElementInst, UndefValue>(V) ||
+ areAllOperandsNonInsts(V);
+ })))
Res.second = FindLastInst();
else
Res.second = FindFirstInst();
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-opcode-sindle-bv.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-opcode-sindle-bv.ll
new file mode 100644
index 000000000000..89268837c9d8
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-opcode-sindle-bv.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+define <2 x i32> @test(i32 %arg) {
+; CHECK-LABEL: define <2 x i32> @test(
+; CHECK-SAME: i32 [[ARG:%.*]]) {
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[ARG]], 0
+; CHECK-NEXT: [[MUL:%.*]] = mul i32 0, 1
+; CHECK-NEXT: [[MUL1:%.*]] = mul i32 [[OR]], [[MUL]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 0, [[MUL1]]
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[OR]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[MUL]], i32 1
+; CHECK-NEXT: ret <2 x i32> [[TMP1]]
+;
+bb:
+ %or = or i32 %arg, 0
+ %mul = mul i32 0, 1
+ %mul1 = mul i32 %or, %mul
+ %cmp = icmp ugt i32 0, %mul1
+ %0 = insertelement <2 x i32> poison, i32 %or, i32 0
+ %1 = insertelement <2 x i32> %0, i32 %mul, i32 1
+ ret <2 x i32> %1
+}
+