summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@outlook.com>2024-04-08 17:55:22 +0000
committerAlexey Bataev <a.bataev@outlook.com>2024-04-08 17:55:22 +0000
commit48da8bd547f7904ba110c87c82aff8a8c43e2d01 (patch)
tree5c8448fe2d8cebce77e5cba9b6d9f8a959326ecf
parent78c50bbd45de595e9992bf97aa097f7f589f8370 (diff)
Created using spr 1.3.5
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp67
-rw-r--r--llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll23
2 files changed, 55 insertions, 35 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 9e22dce38477..797a3fd1e9db 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1579,13 +1579,7 @@ public:
/// Returns true if VP intrinsics with explicit vector length support should
/// be generated in the tail folded loop.
bool foldTailWithEVL() const {
- return getTailFoldingStyle() == TailFoldingStyle::DataWithEVL &&
- // FIXME: remove this once vp_reverse is supported.
- none_of(
- WideningDecisions,
- [](const std::pair<std::pair<Instruction *, ElementCount>,
- std::pair<InstWidening, InstructionCost>>
- &Data) { return Data.second.first == CM_Widen_Reverse; });
+ return getTailFoldingStyle() == TailFoldingStyle::DataWithEVL;
}
/// Returns true if the Phi is part of an inloop reduction.
@@ -9361,10 +9355,17 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
/// Creates either vp_store or vp_scatter intrinsics calls to represent
/// predicated store/scatter.
-static Instruction *
-lowerStoreUsingVectorIntrinsics(IRBuilderBase &Builder, Value *Addr,
- Value *StoredVal, bool IsScatter, Value *Mask,
- Value *EVL, const Align &Alignment) {
+static Instruction *lowerStoreUsingVectorIntrinsics(
+ IRBuilderBase &Builder, Value *Addr, Value *StoredVal, bool IsScatter,
+ bool IsReverse, Value *Mask, Value *EVL, const Align &Alignment) {
+ if (IsReverse) {
+ auto *StoredValTy = cast<VectorType>(StoredVal->getType());
+ Value *BlockInMaskPart =
+ Builder.getAllOnesMask(StoredValTy->getElementCount());
+ StoredVal = Builder.CreateIntrinsic(
+ StoredValTy, Intrinsic::experimental_vp_reverse,
+ {StoredVal, BlockInMaskPart, EVL}, nullptr, "vp.reverse");
+ }
CallInst *Call;
if (IsScatter) {
Call = Builder.CreateIntrinsic(Type::getVoidTy(EVL->getContext()),
@@ -9384,11 +9385,9 @@ lowerStoreUsingVectorIntrinsics(IRBuilderBase &Builder, Value *Addr,
/// Creates either vp_load or vp_gather intrinsics calls to represent
/// predicated load/gather.
-static Instruction *lowerLoadUsingVectorIntrinsics(IRBuilderBase &Builder,
- VectorType *DataTy,
- Value *Addr, bool IsGather,
- Value *Mask, Value *EVL,
- const Align &Alignment) {
+static Instruction *lowerLoadUsingVectorIntrinsics(
+ IRBuilderBase &Builder, VectorType *DataTy, Value *Addr, bool IsGather,
+ bool IsReverse, Value *Mask, Value *EVL, const Align &Alignment) {
CallInst *Call;
if (IsGather) {
Call =
@@ -9402,7 +9401,14 @@ static Instruction *lowerLoadUsingVectorIntrinsics(IRBuilderBase &Builder,
}
Call->addParamAttr(
0, Attribute::getWithAlignment(Call->getContext(), Alignment));
- return Call;
+ Instruction *Res = Call;
+ if (IsReverse) {
+ Value *BlockInMaskPart = Builder.getAllOnesMask(DataTy->getElementCount());
+ Res = Builder.CreateIntrinsic(DataTy, Intrinsic::experimental_vp_reverse,
+ {Res, BlockInMaskPart, EVL}, nullptr,
+ "vp.reverse");
+ }
+ return Res;
}
void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
@@ -9430,7 +9436,7 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
// a null all-one mask is a null mask.
for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *Mask = State.get(getMask(), Part);
- if (isReverse())
+ if (isReverse() && !State.EVL)
Mask = Builder.CreateVectorReverse(Mask, "reverse");
BlockInMaskParts[Part] = Mask;
}
@@ -9456,11 +9462,20 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
// is created only if TTI prefers predicated vectorization, thus if EVL
// is not nullptr it also implies preference for predicated
// vectorization.
- // FIXME: Support reverse store after vp_reverse is added.
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
+ if (isMaskRequired && isReverse() && !getMask()->isLiveIn()) {
+ VectorType *MaskTy = cast<VectorType>(MaskPart->getType());
+ Value *BlockInMaskPart =
+ Builder.getAllOnesMask(MaskTy->getElementCount());
+ MaskPart = Builder.CreateIntrinsic(
+ MaskTy, Intrinsic::experimental_vp_reverse,
+ {MaskPart, BlockInMaskPart, EVL}, nullptr, "vp.reverse.mask");
+ BlockInMaskParts[Part] = MaskPart;
+ }
NewSI = lowerStoreUsingVectorIntrinsics(
Builder, State.get(getAddr(), Part, !CreateGatherScatter),
- StoredVal, CreateGatherScatter, MaskPart, EVL, Alignment);
+ StoredVal, CreateGatherScatter, isReverse(), MaskPart, EVL,
+ Alignment);
} else if (CreateGatherScatter) {
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
Value *VectorGep = State.get(getAddr(), Part);
@@ -9504,11 +9519,19 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
// is created only if TTI prefers predicated vectorization, thus if EVL
// is not nullptr it also implies preference for predicated
// vectorization.
- // FIXME: Support reverse loading after vp_reverse is added.
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
+ if (isMaskRequired && isReverse() && !getMask()->isLiveIn()) {
+ VectorType *MaskTy = cast<VectorType>(MaskPart->getType());
+ Value *BlockInMaskPart =
+ Builder.getAllOnesMask(MaskTy->getElementCount());
+ MaskPart = Builder.CreateIntrinsic(
+ MaskTy, Intrinsic::experimental_vp_reverse,
+ {MaskPart, BlockInMaskPart, EVL}, nullptr, "vp.reverse.mask");
+ BlockInMaskParts[Part] = MaskPart;
+ }
NewLI = lowerLoadUsingVectorIntrinsics(
Builder, DataTy, State.get(getAddr(), Part, !CreateGatherScatter),
- CreateGatherScatter, MaskPart, EVL, Alignment);
+ CreateGatherScatter, isReverse(), MaskPart, EVL, Alignment);
} else if (CreateGatherScatter) {
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
Value *VectorGep = State.get(getAddr(), Part);
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll
index f2222e0a1f93..f839eafe9b2a 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll
@@ -30,14 +30,11 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL]], [[INDEX]]
+; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
+; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 1024, [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true)
+; IF-EVL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0
-; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[INDEX]], i64 0
-; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; IF-EVL-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
-; IF-EVL-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP8]]
-; IF-EVL-NEXT: [[VEC_IV:%.*]] = add <vscale x 4 x i64> [[BROADCAST_SPLAT]], [[TMP9]]
-; IF-EVL-NEXT: [[TMP10:%.*]] = icmp ule <vscale x 4 x i64> [[VEC_IV]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1023, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[TMP7]], -1
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[TMP11]]
; IF-EVL-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
@@ -46,9 +43,8 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt
; IF-EVL-NEXT: [[TMP16:%.*]] = sub i64 1, [[TMP14]]
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 [[TMP15]]
; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 [[TMP16]]
-; IF-EVL-NEXT: [[REVERSE:%.*]] = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> [[TMP10]])
-; IF-EVL-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP18]], i32 4, <vscale x 4 x i1> [[REVERSE]], <vscale x 4 x i32> poison)
-; IF-EVL-NEXT: [[REVERSE3:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> [[WIDE_MASKED_LOAD]])
+; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP18]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP8]])
+; IF-EVL-NEXT: [[TMP31:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP8]])
; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[TMP11]]
; IF-EVL-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 4
@@ -56,9 +52,10 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt
; IF-EVL-NEXT: [[TMP23:%.*]] = sub i64 1, [[TMP21]]
; IF-EVL-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 [[TMP22]]
; IF-EVL-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i64 [[TMP23]]
-; IF-EVL-NEXT: [[REVERSE4:%.*]] = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> [[TMP10]])
-; IF-EVL-NEXT: [[REVERSE5:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> [[REVERSE3]])
-; IF-EVL-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[REVERSE5]], ptr [[TMP25]], i32 4, <vscale x 4 x i1> [[REVERSE4]])
+; IF-EVL-NEXT: [[TMP28:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[TMP31]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP8]])
+; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP28]], ptr align 4 [[TMP25]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP8]])
+; IF-EVL-NEXT: [[TMP29:%.*]] = zext i32 [[TMP8]] to i64
+; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP29]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
; IF-EVL-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; IF-EVL-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]