diff options
author | Florian Hahn <flo@fhahn.com> | 2024-04-23 13:59:01 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-23 13:59:01 +0100 |
commit | dadf6f2c5aaf83b27dab181be91c5814be1fc466 (patch) | |
tree | 1faf6317dbcd5be11fe90c9d4188f99cb189b1d4 | |
parent | 8ab3caf4d3acef29f373e09bc6a0ac459918930e (diff) |
[VPlan] Ignore incoming values with constant false mask. (#89384)
Ignore incoming values with constant false masks when trying to simplify
VPBlendRecipes.
As a follow-on optimization, we should also be able to drop all incoming
values with false masks by creating a new VPBlendRecipe with those
operands dropped.
PR: https://github.com/llvm/llvm-project/pull/89384
3 files changed, 16 insertions, 12 deletions
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h index ae7717eb7cc9..50b08bbb7ebf 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h +++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h @@ -51,8 +51,9 @@ template <typename Class> struct bind_ty { }; /// Match a specified integer value or vector of all elements of that -/// value. -struct specific_intval { +/// value. \p BitWidth optionally specifies the bitwidth the matched constant +/// must have. If it is 0, the matched constant can have any bitwidth. +template <unsigned BitWidth = 0> struct specific_intval { APInt Val; specific_intval(APInt V) : Val(std::move(V)) {} @@ -66,15 +67,21 @@ struct specific_intval { if (const auto *C = dyn_cast<Constant>(V)) CI = dyn_cast_or_null<ConstantInt>( C->getSplatValue(/*AllowPoison=*/false)); + if (!CI) + return false; - return CI && APInt::isSameValue(CI->getValue(), Val); + assert((BitWidth == 0 || CI->getBitWidth() == BitWidth) && + "Trying the match constant with unexpected bitwidth."); + return APInt::isSameValue(CI->getValue(), Val); } }; -inline specific_intval m_SpecificInt(uint64_t V) { - return specific_intval(APInt(64, V)); +inline specific_intval<0> m_SpecificInt(uint64_t V) { + return specific_intval<0>(APInt(64, V)); } +inline specific_intval<1> m_False() { return specific_intval<1>(APInt(64, 0)); } + /// Matching combinators template <typename LTy, typename RTy> struct match_combine_or { LTy L; diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 9580dc4a27f6..d7bc128dcfe6 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -884,18 +884,19 @@ void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) { /// Try to simplify recipe \p R. static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { + using namespace llvm::VPlanPatternMatch; // Try to remove redundant blend recipes. if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) { VPValue *Inc0 = Blend->getIncomingValue(0); for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I) - if (Inc0 != Blend->getIncomingValue(I)) + if (Inc0 != Blend->getIncomingValue(I) && + !match(Blend->getMask(I), m_False())) return; Blend->replaceAllUsesWith(Inc0); Blend->eraseFromParent(); return; } - using namespace llvm::VPlanPatternMatch; VPValue *A; if (match(&R, m_Trunc(m_ZExtOrSExt(m_VPValue(A))))) { VPValue *Trunc = R.getVPSingleValue(); diff --git a/llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll b/llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll index c622925510dd..d0c74897f264 100644 --- a/llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll +++ b/llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll @@ -10,17 +10,13 @@ define void @test_not_first_lane_only_constant(ptr %A, ptr noalias %B) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x ptr> poison, ptr [[B]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT3]], <4 x ptr> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16 ; CHECK-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[A]], i16 [[TMP0]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> zeroinitializer, <4 x ptr> poison, <4 x ptr> [[BROADCAST_SPLAT4]] -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x ptr> [[PREDPHI]], i32 0 -; CHECK-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 2 +; CHECK-NEXT: [[TMP13:%.*]] = load i16, ptr %B, align 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <4 x i16> poison, i16 [[TMP13]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT5]], <4 x i16> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0 |