summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h4
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp65
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp7
-rw-r--r--llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll191
-rw-r--r--llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll27
5 files changed, 129 insertions, 165 deletions
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index a7ecf0dc1ba2..90487ae3bc2e 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -281,6 +281,10 @@ private:
MachineInstr &MI,
LostDebugLocObserver &LocObserver);
+ MachineInstrBuilder
+ getNeutralElementForVecReduce(unsigned Opcode, MachineIRBuilder &MIRBuilder,
+ LLT Ty);
+
public:
/// Return the alignment to use for a stack temporary object with the given
/// type.
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index e39fdae1ccbe..ad1003839e33 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -5159,6 +5159,42 @@ LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
return Legalized;
}
+MachineInstrBuilder LegalizerHelper::getNeutralElementForVecReduce(
+ unsigned Opcode, MachineIRBuilder &MIRBuilder, LLT Ty) {
+ assert(Ty.isScalar() && "Expected scalar type to make neutral element for");
+
+ switch (Opcode) {
+ default:
+ return MIRBuilder.buildUndef(Ty);
+ case TargetOpcode::G_VECREDUCE_ADD:
+ case TargetOpcode::G_VECREDUCE_OR:
+ case TargetOpcode::G_VECREDUCE_XOR:
+ case TargetOpcode::G_VECREDUCE_UMAX:
+ return MIRBuilder.buildConstant(Ty, 0);
+ case TargetOpcode::G_VECREDUCE_MUL:
+ return MIRBuilder.buildConstant(Ty, 1);
+ case TargetOpcode::G_VECREDUCE_AND:
+ case TargetOpcode::G_VECREDUCE_UMIN:
+ return MIRBuilder.buildConstant(
+ Ty, APInt::getAllOnes(Ty.getScalarSizeInBits()));
+ case TargetOpcode::G_VECREDUCE_SMAX:
+ return MIRBuilder.buildConstant(
+ Ty, APInt::getSignedMinValue(Ty.getSizeInBits()));
+ case TargetOpcode::G_VECREDUCE_SMIN:
+ return MIRBuilder.buildConstant(
+ Ty, APInt::getSignedMaxValue(Ty.getSizeInBits()));
+ case TargetOpcode::G_VECREDUCE_FADD:
+ return MIRBuilder.buildFConstant(Ty, -0.0);
+ case TargetOpcode::G_VECREDUCE_FMUL:
+ return MIRBuilder.buildFConstant(Ty, 1.0);
+ case TargetOpcode::G_VECREDUCE_FMINIMUM:
+ case TargetOpcode::G_VECREDUCE_FMAXIMUM:
+ assert(false && "getNeutralElementForVecReduce unimplemented for "
+ "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
+ }
+ llvm_unreachable("switch expected to return!");
+}
+
LegalizerHelper::LegalizeResult
LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
LLT MoreTy) {
@@ -5341,6 +5377,35 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
Observer.changedInstr(MI);
return Legalized;
}
+ case TargetOpcode::G_VECREDUCE_FADD:
+ case TargetOpcode::G_VECREDUCE_FMUL:
+ case TargetOpcode::G_VECREDUCE_ADD:
+ case TargetOpcode::G_VECREDUCE_MUL:
+ case TargetOpcode::G_VECREDUCE_AND:
+ case TargetOpcode::G_VECREDUCE_OR:
+ case TargetOpcode::G_VECREDUCE_XOR:
+ case TargetOpcode::G_VECREDUCE_SMAX:
+ case TargetOpcode::G_VECREDUCE_SMIN:
+ case TargetOpcode::G_VECREDUCE_UMAX:
+ case TargetOpcode::G_VECREDUCE_UMIN: {
+ LLT OrigTy = MRI.getType(MI.getOperand(1).getReg());
+ MachineOperand &MO = MI.getOperand(1);
+ auto NewVec = MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
+ auto NeutralElement = getNeutralElementForVecReduce(
+ MI.getOpcode(), MIRBuilder, MoreTy.getElementType());
+ for (size_t i = OrigTy.getNumElements(), e = MoreTy.getNumElements();
+ i != e; i++) {
+ auto Idx = MIRBuilder.buildConstant(LLT::scalar(32), i);
+ NewVec = MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
+ NeutralElement, Idx);
+ }
+
+ Observer.changingInstr(MI);
+ MO.setReg(NewVec.getReg(0));
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
default:
return UnableToLegalize;
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 933f13dd5a19..622a2b9cceb4 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1070,6 +1070,13 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
{s16, v8s16},
{s32, v2s32},
{s32, v4s32}})
+ .moreElementsIf(
+ [=](const LegalityQuery &Query) {
+ return Query.Types[1].isVector() &&
+ Query.Types[1].getElementType() != s8 &&
+ Query.Types[1].getNumElements() & 1;
+ },
+ LegalizeMutations::moreElementsToNextPow2(1))
.clampMaxNumElements(1, s64, 2)
.clampMaxNumElements(1, s32, 4)
.clampMaxNumElements(1, s16, 8)
diff --git a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
index 194fe5be40c2..76790d128d06 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
@@ -595,30 +595,14 @@ entry:
}
define i16 @sminv_v3i16(<3 x i16> %a) {
-; CHECK-SD-LABEL: sminv_v3i16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: mov w8, #32767 // =0x7fff
-; CHECK-SD-NEXT: mov v0.h[3], w8
-; CHECK-SD-NEXT: sminv h0, v0.4h
-; CHECK-SD-NEXT: fmov w0, s0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: sminv_v3i16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NEXT: smov w8, v0.h[0]
-; CHECK-GI-NEXT: umov w9, v0.h[0]
-; CHECK-GI-NEXT: umov w10, v0.h[1]
-; CHECK-GI-NEXT: smov w11, v0.h[2]
-; CHECK-GI-NEXT: umov w13, v0.h[2]
-; CHECK-GI-NEXT: fmov w12, s1
-; CHECK-GI-NEXT: cmp w8, w12, sxth
-; CHECK-GI-NEXT: csel w8, w9, w10, lt
-; CHECK-GI-NEXT: cmp w11, w8, sxth
-; CHECK-GI-NEXT: csel w0, w8, w13, gt
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: sminv_v3i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov w8, #32767 // =0x7fff
+; CHECK-NEXT: mov v0.h[3], w8
+; CHECK-NEXT: sminv h0, v0.4h
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
entry:
%arg1 = call i16 @llvm.vector.reduce.smin.v3i16(<3 x i16> %a)
ret i16 %arg1
@@ -670,28 +654,13 @@ entry:
}
define i32 @sminv_v3i32(<3 x i32> %a) {
-; CHECK-SD-LABEL: sminv_v3i32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: mov w8, #2147483647 // =0x7fffffff
-; CHECK-SD-NEXT: mov v0.s[3], w8
-; CHECK-SD-NEXT: sminv s0, v0.4s
-; CHECK-SD-NEXT: fmov w0, s0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: sminv_v3i32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov s1, v0.s[1]
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: mov s2, v0.s[2]
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: cmp w8, w9
-; CHECK-GI-NEXT: fmov w9, s2
-; CHECK-GI-NEXT: fcsel s0, s0, s1, lt
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: cmp w8, w9
-; CHECK-GI-NEXT: fcsel s0, s0, s2, lt
-; CHECK-GI-NEXT: fmov w0, s0
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: sminv_v3i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #2147483647 // =0x7fffffff
+; CHECK-NEXT: mov v0.s[3], w8
+; CHECK-NEXT: sminv s0, v0.4s
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
entry:
%arg1 = call i32 @llvm.vector.reduce.smin.v3i32(<3 x i32> %a)
ret i32 %arg1
@@ -972,17 +941,10 @@ define i16 @smaxv_v3i16(<3 x i16> %a) {
; CHECK-GI-LABEL: smaxv_v3i16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NEXT: smov w8, v0.h[0]
-; CHECK-GI-NEXT: umov w9, v0.h[0]
-; CHECK-GI-NEXT: umov w10, v0.h[1]
-; CHECK-GI-NEXT: smov w11, v0.h[2]
-; CHECK-GI-NEXT: umov w13, v0.h[2]
-; CHECK-GI-NEXT: fmov w12, s1
-; CHECK-GI-NEXT: cmp w8, w12, sxth
-; CHECK-GI-NEXT: csel w8, w9, w10, gt
-; CHECK-GI-NEXT: cmp w11, w8, sxth
-; CHECK-GI-NEXT: csel w0, w8, w13, lt
+; CHECK-GI-NEXT: mov w8, #32768 // =0x8000
+; CHECK-GI-NEXT: mov v0.h[3], w8
+; CHECK-GI-NEXT: smaxv h0, v0.4h
+; CHECK-GI-NEXT: fmov w0, s0
; CHECK-GI-NEXT: ret
entry:
%arg1 = call i16 @llvm.vector.reduce.smax.v3i16(<3 x i16> %a)
@@ -1035,28 +997,13 @@ entry:
}
define i32 @smaxv_v3i32(<3 x i32> %a) {
-; CHECK-SD-LABEL: smaxv_v3i32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: mov w8, #-2147483648 // =0x80000000
-; CHECK-SD-NEXT: mov v0.s[3], w8
-; CHECK-SD-NEXT: smaxv s0, v0.4s
-; CHECK-SD-NEXT: fmov w0, s0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: smaxv_v3i32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov s1, v0.s[1]
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: mov s2, v0.s[2]
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: cmp w8, w9
-; CHECK-GI-NEXT: fmov w9, s2
-; CHECK-GI-NEXT: fcsel s0, s0, s1, gt
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: cmp w8, w9
-; CHECK-GI-NEXT: fcsel s0, s0, s2, gt
-; CHECK-GI-NEXT: fmov w0, s0
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: smaxv_v3i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #-2147483648 // =0x80000000
+; CHECK-NEXT: mov v0.s[3], w8
+; CHECK-NEXT: smaxv s0, v0.4s
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
entry:
%arg1 = call i32 @llvm.vector.reduce.smax.v3i32(<3 x i32> %a)
ret i32 %arg1
@@ -1335,17 +1282,10 @@ define i16 @uminv_v3i16(<3 x i16> %a) {
; CHECK-GI-LABEL: uminv_v3i16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NEXT: umov w8, v0.h[0]
-; CHECK-GI-NEXT: umov w9, v0.h[0]
-; CHECK-GI-NEXT: umov w10, v0.h[1]
-; CHECK-GI-NEXT: umov w11, v0.h[2]
-; CHECK-GI-NEXT: umov w13, v0.h[2]
-; CHECK-GI-NEXT: fmov w12, s1
-; CHECK-GI-NEXT: cmp w8, w12, uxth
-; CHECK-GI-NEXT: csel w8, w9, w10, lo
-; CHECK-GI-NEXT: cmp w11, w8, uxth
-; CHECK-GI-NEXT: csel w0, w8, w13, hi
+; CHECK-GI-NEXT: mov w8, #65535 // =0xffff
+; CHECK-GI-NEXT: mov v0.h[3], w8
+; CHECK-GI-NEXT: uminv h0, v0.4h
+; CHECK-GI-NEXT: fmov w0, s0
; CHECK-GI-NEXT: ret
entry:
%arg1 = call i16 @llvm.vector.reduce.umin.v3i16(<3 x i16> %a)
@@ -1398,28 +1338,13 @@ entry:
}
define i32 @uminv_v3i32(<3 x i32> %a) {
-; CHECK-SD-LABEL: uminv_v3i32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-SD-NEXT: mov v0.s[3], w8
-; CHECK-SD-NEXT: uminv s0, v0.4s
-; CHECK-SD-NEXT: fmov w0, s0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: uminv_v3i32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov s1, v0.s[1]
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: mov s2, v0.s[2]
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: cmp w8, w9
-; CHECK-GI-NEXT: fmov w9, s2
-; CHECK-GI-NEXT: fcsel s0, s0, s1, lo
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: cmp w8, w9
-; CHECK-GI-NEXT: fcsel s0, s0, s2, lo
-; CHECK-GI-NEXT: fmov w0, s0
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: uminv_v3i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #-1 // =0xffffffff
+; CHECK-NEXT: mov v0.s[3], w8
+; CHECK-NEXT: uminv s0, v0.4s
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
entry:
%arg1 = call i32 @llvm.vector.reduce.umin.v3i32(<3 x i32> %a)
ret i32 %arg1
@@ -1697,17 +1622,10 @@ define i16 @umaxv_v3i16(<3 x i16> %a) {
; CHECK-GI-LABEL: umaxv_v3i16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NEXT: umov w8, v0.h[0]
-; CHECK-GI-NEXT: umov w9, v0.h[0]
-; CHECK-GI-NEXT: umov w10, v0.h[1]
-; CHECK-GI-NEXT: umov w11, v0.h[2]
-; CHECK-GI-NEXT: umov w13, v0.h[2]
-; CHECK-GI-NEXT: fmov w12, s1
-; CHECK-GI-NEXT: cmp w8, w12, uxth
-; CHECK-GI-NEXT: csel w8, w9, w10, hi
-; CHECK-GI-NEXT: cmp w11, w8, uxth
-; CHECK-GI-NEXT: csel w0, w8, w13, lo
+; CHECK-GI-NEXT: mov w8, #0 // =0x0
+; CHECK-GI-NEXT: mov v0.h[3], w8
+; CHECK-GI-NEXT: umaxv h0, v0.4h
+; CHECK-GI-NEXT: fmov w0, s0
; CHECK-GI-NEXT: ret
entry:
%arg1 = call i16 @llvm.vector.reduce.umax.v3i16(<3 x i16> %a)
@@ -1760,27 +1678,12 @@ entry:
}
define i32 @umaxv_v3i32(<3 x i32> %a) {
-; CHECK-SD-LABEL: umaxv_v3i32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: mov v0.s[3], wzr
-; CHECK-SD-NEXT: umaxv s0, v0.4s
-; CHECK-SD-NEXT: fmov w0, s0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: umaxv_v3i32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov s1, v0.s[1]
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: mov s2, v0.s[2]
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: cmp w8, w9
-; CHECK-GI-NEXT: fmov w9, s2
-; CHECK-GI-NEXT: fcsel s0, s0, s1, hi
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: cmp w8, w9
-; CHECK-GI-NEXT: fcsel s0, s0, s2, hi
-; CHECK-GI-NEXT: fmov w0, s0
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: umaxv_v3i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov v0.s[3], wzr
+; CHECK-NEXT: umaxv s0, v0.4s
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
entry:
%arg1 = call i32 @llvm.vector.reduce.umax.v3i32(<3 x i32> %a)
ret i32 %arg1
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
index 7f804fe48fd8..6d848e7b5c7c 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
@@ -200,27 +200,12 @@ define i8 @test_v9i8(<9 x i8> %a) nounwind {
}
define i32 @test_v3i32(<3 x i32> %a) nounwind {
-; CHECK-SD-LABEL: test_v3i32:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: mov v0.s[3], wzr
-; CHECK-SD-NEXT: umaxv s0, v0.4s
-; CHECK-SD-NEXT: fmov w0, s0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_v3i32:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: mov s1, v0.s[1]
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: mov s2, v0.s[2]
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: cmp w8, w9
-; CHECK-GI-NEXT: fmov w9, s2
-; CHECK-GI-NEXT: fcsel s0, s0, s1, hi
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: cmp w8, w9
-; CHECK-GI-NEXT: fcsel s0, s0, s2, hi
-; CHECK-GI-NEXT: fmov w0, s0
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_v3i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v0.s[3], wzr
+; CHECK-NEXT: umaxv s0, v0.4s
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
%b = call i32 @llvm.vector.reduce.umax.v3i32(<3 x i32> %a)
ret i32 %b
}