diff options
5 files changed, 129 insertions, 165 deletions
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index a7ecf0dc1ba2..90487ae3bc2e 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -281,6 +281,10 @@ private: MachineInstr &MI, LostDebugLocObserver &LocObserver); + MachineInstrBuilder + getNeutralElementForVecReduce(unsigned Opcode, MachineIRBuilder &MIRBuilder, + LLT Ty); + public: /// Return the alignment to use for a stack temporary object with the given /// type. diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index e39fdae1ccbe..ad1003839e33 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -5159,6 +5159,42 @@ LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, return Legalized; } +MachineInstrBuilder LegalizerHelper::getNeutralElementForVecReduce( + unsigned Opcode, MachineIRBuilder &MIRBuilder, LLT Ty) { + assert(Ty.isScalar() && "Expected scalar type to make neutral element for"); + + switch (Opcode) { + default: + return MIRBuilder.buildUndef(Ty); + case TargetOpcode::G_VECREDUCE_ADD: + case TargetOpcode::G_VECREDUCE_OR: + case TargetOpcode::G_VECREDUCE_XOR: + case TargetOpcode::G_VECREDUCE_UMAX: + return MIRBuilder.buildConstant(Ty, 0); + case TargetOpcode::G_VECREDUCE_MUL: + return MIRBuilder.buildConstant(Ty, 1); + case TargetOpcode::G_VECREDUCE_AND: + case TargetOpcode::G_VECREDUCE_UMIN: + return MIRBuilder.buildConstant( + Ty, APInt::getAllOnes(Ty.getScalarSizeInBits())); + case TargetOpcode::G_VECREDUCE_SMAX: + return MIRBuilder.buildConstant( + Ty, APInt::getSignedMinValue(Ty.getSizeInBits())); + case TargetOpcode::G_VECREDUCE_SMIN: + return MIRBuilder.buildConstant( + Ty, APInt::getSignedMaxValue(Ty.getSizeInBits())); + case TargetOpcode::G_VECREDUCE_FADD: + return MIRBuilder.buildFConstant(Ty, -0.0); + case TargetOpcode::G_VECREDUCE_FMUL: + return MIRBuilder.buildFConstant(Ty, 1.0); + case TargetOpcode::G_VECREDUCE_FMINIMUM: + case TargetOpcode::G_VECREDUCE_FMAXIMUM: + assert(false && "getNeutralElementForVecReduce unimplemented for " + "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!"); + } + llvm_unreachable("switch expected to return!"); +} + LegalizerHelper::LegalizeResult LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy) { @@ -5341,6 +5377,35 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_VECREDUCE_FADD: + case TargetOpcode::G_VECREDUCE_FMUL: + case TargetOpcode::G_VECREDUCE_ADD: + case TargetOpcode::G_VECREDUCE_MUL: + case TargetOpcode::G_VECREDUCE_AND: + case TargetOpcode::G_VECREDUCE_OR: + case TargetOpcode::G_VECREDUCE_XOR: + case TargetOpcode::G_VECREDUCE_SMAX: + case TargetOpcode::G_VECREDUCE_SMIN: + case TargetOpcode::G_VECREDUCE_UMAX: + case TargetOpcode::G_VECREDUCE_UMIN: { + LLT OrigTy = MRI.getType(MI.getOperand(1).getReg()); + MachineOperand &MO = MI.getOperand(1); + auto NewVec = MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO); + auto NeutralElement = getNeutralElementForVecReduce( + MI.getOpcode(), MIRBuilder, MoreTy.getElementType()); + for (size_t i = OrigTy.getNumElements(), e = MoreTy.getNumElements(); + i != e; i++) { + auto Idx = MIRBuilder.buildConstant(LLT::scalar(32), i); + NewVec = MIRBuilder.buildInsertVectorElement(MoreTy, NewVec, + NeutralElement, Idx); + } + + Observer.changingInstr(MI); + MO.setReg(NewVec.getReg(0)); + Observer.changedInstr(MI); + return Legalized; + } + default: return UnableToLegalize; } diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 933f13dd5a19..622a2b9cceb4 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -1070,6 +1070,13 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {s16, v8s16}, {s32, v2s32}, {s32, v4s32}}) + .moreElementsIf( + [=](const LegalityQuery &Query) { + return Query.Types[1].isVector() && + Query.Types[1].getElementType() != s8 && + Query.Types[1].getNumElements() & 1; + }, + LegalizeMutations::moreElementsToNextPow2(1)) .clampMaxNumElements(1, s64, 2) .clampMaxNumElements(1, s32, 4) .clampMaxNumElements(1, s16, 8) diff --git a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll index 194fe5be40c2..76790d128d06 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll @@ -595,30 +595,14 @@ entry: } define i16 @sminv_v3i16(<3 x i16> %a) { -; CHECK-SD-LABEL: sminv_v3i16: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: mov w8, #32767 // =0x7fff -; CHECK-SD-NEXT: mov v0.h[3], w8 -; CHECK-SD-NEXT: sminv h0, v0.4h -; CHECK-SD-NEXT: fmov w0, s0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: sminv_v3i16: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: smov w8, v0.h[0] -; CHECK-GI-NEXT: umov w9, v0.h[0] -; CHECK-GI-NEXT: umov w10, v0.h[1] -; CHECK-GI-NEXT: smov w11, v0.h[2] -; CHECK-GI-NEXT: umov w13, v0.h[2] -; CHECK-GI-NEXT: fmov w12, s1 -; CHECK-GI-NEXT: cmp w8, w12, sxth -; CHECK-GI-NEXT: csel w8, w9, w10, lt -; CHECK-GI-NEXT: cmp w11, w8, sxth -; CHECK-GI-NEXT: csel w0, w8, w13, gt -; CHECK-GI-NEXT: ret +; CHECK-LABEL: sminv_v3i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov w8, #32767 // =0x7fff +; CHECK-NEXT: mov v0.h[3], w8 +; CHECK-NEXT: sminv h0, v0.4h +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret entry: %arg1 = call i16 @llvm.vector.reduce.smin.v3i16(<3 x i16> %a) ret i16 %arg1 @@ -670,28 +654,13 @@ entry: } define i32 @sminv_v3i32(<3 x i32> %a) { -; CHECK-SD-LABEL: sminv_v3i32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: mov w8, #2147483647 // =0x7fffffff -; CHECK-SD-NEXT: mov v0.s[3], w8 -; CHECK-SD-NEXT: sminv s0, v0.4s -; CHECK-SD-NEXT: fmov w0, s0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: sminv_v3i32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: fmov w8, s0 -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: fmov w9, s1 -; CHECK-GI-NEXT: cmp w8, w9 -; CHECK-GI-NEXT: fmov w9, s2 -; CHECK-GI-NEXT: fcsel s0, s0, s1, lt -; CHECK-GI-NEXT: fmov w8, s0 -; CHECK-GI-NEXT: cmp w8, w9 -; CHECK-GI-NEXT: fcsel s0, s0, s2, lt -; CHECK-GI-NEXT: fmov w0, s0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: sminv_v3i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #2147483647 // =0x7fffffff +; CHECK-NEXT: mov v0.s[3], w8 +; CHECK-NEXT: sminv s0, v0.4s +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret entry: %arg1 = call i32 @llvm.vector.reduce.smin.v3i32(<3 x i32> %a) ret i32 %arg1 @@ -972,17 +941,10 @@ define i16 @smaxv_v3i16(<3 x i16> %a) { ; CHECK-GI-LABEL: smaxv_v3i16: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: smov w8, v0.h[0] -; CHECK-GI-NEXT: umov w9, v0.h[0] -; CHECK-GI-NEXT: umov w10, v0.h[1] -; CHECK-GI-NEXT: smov w11, v0.h[2] -; CHECK-GI-NEXT: umov w13, v0.h[2] -; CHECK-GI-NEXT: fmov w12, s1 -; CHECK-GI-NEXT: cmp w8, w12, sxth -; CHECK-GI-NEXT: csel w8, w9, w10, gt -; CHECK-GI-NEXT: cmp w11, w8, sxth -; CHECK-GI-NEXT: csel w0, w8, w13, lt +; CHECK-GI-NEXT: mov w8, #32768 // =0x8000 +; CHECK-GI-NEXT: mov v0.h[3], w8 +; CHECK-GI-NEXT: smaxv h0, v0.4h +; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret entry: %arg1 = call i16 @llvm.vector.reduce.smax.v3i16(<3 x i16> %a) @@ -1035,28 +997,13 @@ entry: } define i32 @smaxv_v3i32(<3 x i32> %a) { -; CHECK-SD-LABEL: smaxv_v3i32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: mov w8, #-2147483648 // =0x80000000 -; CHECK-SD-NEXT: mov v0.s[3], w8 -; CHECK-SD-NEXT: smaxv s0, v0.4s -; CHECK-SD-NEXT: fmov w0, s0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: smaxv_v3i32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: fmov w8, s0 -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: fmov w9, s1 -; CHECK-GI-NEXT: cmp w8, w9 -; CHECK-GI-NEXT: fmov w9, s2 -; CHECK-GI-NEXT: fcsel s0, s0, s1, gt -; CHECK-GI-NEXT: fmov w8, s0 -; CHECK-GI-NEXT: cmp w8, w9 -; CHECK-GI-NEXT: fcsel s0, s0, s2, gt -; CHECK-GI-NEXT: fmov w0, s0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: smaxv_v3i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #-2147483648 // =0x80000000 +; CHECK-NEXT: mov v0.s[3], w8 +; CHECK-NEXT: smaxv s0, v0.4s +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret entry: %arg1 = call i32 @llvm.vector.reduce.smax.v3i32(<3 x i32> %a) ret i32 %arg1 @@ -1335,17 +1282,10 @@ define i16 @uminv_v3i16(<3 x i16> %a) { ; CHECK-GI-LABEL: uminv_v3i16: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: umov w8, v0.h[0] -; CHECK-GI-NEXT: umov w9, v0.h[0] -; CHECK-GI-NEXT: umov w10, v0.h[1] -; CHECK-GI-NEXT: umov w11, v0.h[2] -; CHECK-GI-NEXT: umov w13, v0.h[2] -; CHECK-GI-NEXT: fmov w12, s1 -; CHECK-GI-NEXT: cmp w8, w12, uxth -; CHECK-GI-NEXT: csel w8, w9, w10, lo -; CHECK-GI-NEXT: cmp w11, w8, uxth -; CHECK-GI-NEXT: csel w0, w8, w13, hi +; CHECK-GI-NEXT: mov w8, #65535 // =0xffff +; CHECK-GI-NEXT: mov v0.h[3], w8 +; CHECK-GI-NEXT: uminv h0, v0.4h +; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret entry: %arg1 = call i16 @llvm.vector.reduce.umin.v3i16(<3 x i16> %a) @@ -1398,28 +1338,13 @@ entry: } define i32 @uminv_v3i32(<3 x i32> %a) { -; CHECK-SD-LABEL: uminv_v3i32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-SD-NEXT: mov v0.s[3], w8 -; CHECK-SD-NEXT: uminv s0, v0.4s -; CHECK-SD-NEXT: fmov w0, s0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: uminv_v3i32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: fmov w8, s0 -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: fmov w9, s1 -; CHECK-GI-NEXT: cmp w8, w9 -; CHECK-GI-NEXT: fmov w9, s2 -; CHECK-GI-NEXT: fcsel s0, s0, s1, lo -; CHECK-GI-NEXT: fmov w8, s0 -; CHECK-GI-NEXT: cmp w8, w9 -; CHECK-GI-NEXT: fcsel s0, s0, s2, lo -; CHECK-GI-NEXT: fmov w0, s0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: uminv_v3i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: mov v0.s[3], w8 +; CHECK-NEXT: uminv s0, v0.4s +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret entry: %arg1 = call i32 @llvm.vector.reduce.umin.v3i32(<3 x i32> %a) ret i32 %arg1 @@ -1697,17 +1622,10 @@ define i16 @umaxv_v3i16(<3 x i16> %a) { ; CHECK-GI-LABEL: umaxv_v3i16: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: umov w8, v0.h[0] -; CHECK-GI-NEXT: umov w9, v0.h[0] -; CHECK-GI-NEXT: umov w10, v0.h[1] -; CHECK-GI-NEXT: umov w11, v0.h[2] -; CHECK-GI-NEXT: umov w13, v0.h[2] -; CHECK-GI-NEXT: fmov w12, s1 -; CHECK-GI-NEXT: cmp w8, w12, uxth -; CHECK-GI-NEXT: csel w8, w9, w10, hi -; CHECK-GI-NEXT: cmp w11, w8, uxth -; CHECK-GI-NEXT: csel w0, w8, w13, lo +; CHECK-GI-NEXT: mov w8, #0 // =0x0 +; CHECK-GI-NEXT: mov v0.h[3], w8 +; CHECK-GI-NEXT: umaxv h0, v0.4h +; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret entry: %arg1 = call i16 @llvm.vector.reduce.umax.v3i16(<3 x i16> %a) @@ -1760,27 +1678,12 @@ entry: } define i32 @umaxv_v3i32(<3 x i32> %a) { -; CHECK-SD-LABEL: umaxv_v3i32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: mov v0.s[3], wzr -; CHECK-SD-NEXT: umaxv s0, v0.4s -; CHECK-SD-NEXT: fmov w0, s0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: umaxv_v3i32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: fmov w8, s0 -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: fmov w9, s1 -; CHECK-GI-NEXT: cmp w8, w9 -; CHECK-GI-NEXT: fmov w9, s2 -; CHECK-GI-NEXT: fcsel s0, s0, s1, hi -; CHECK-GI-NEXT: fmov w8, s0 -; CHECK-GI-NEXT: cmp w8, w9 -; CHECK-GI-NEXT: fcsel s0, s0, s2, hi -; CHECK-GI-NEXT: fmov w0, s0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: umaxv_v3i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov v0.s[3], wzr +; CHECK-NEXT: umaxv s0, v0.4s +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret entry: %arg1 = call i32 @llvm.vector.reduce.umax.v3i32(<3 x i32> %a) ret i32 %arg1 diff --git a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll index 7f804fe48fd8..6d848e7b5c7c 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll @@ -200,27 +200,12 @@ define i8 @test_v9i8(<9 x i8> %a) nounwind { } define i32 @test_v3i32(<3 x i32> %a) nounwind { -; CHECK-SD-LABEL: test_v3i32: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: mov v0.s[3], wzr -; CHECK-SD-NEXT: umaxv s0, v0.4s -; CHECK-SD-NEXT: fmov w0, s0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_v3i32: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: fmov w8, s0 -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: fmov w9, s1 -; CHECK-GI-NEXT: cmp w8, w9 -; CHECK-GI-NEXT: fmov w9, s2 -; CHECK-GI-NEXT: fcsel s0, s0, s1, hi -; CHECK-GI-NEXT: fmov w8, s0 -; CHECK-GI-NEXT: cmp w8, w9 -; CHECK-GI-NEXT: fcsel s0, s0, s2, hi -; CHECK-GI-NEXT: fmov w0, s0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_v3i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov v0.s[3], wzr +; CHECK-NEXT: umaxv s0, v0.4s +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret %b = call i32 @llvm.vector.reduce.umax.v3i32(<3 x i32> %a) ret i32 %b } |