summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWang Pengcheng <wangpengcheng.pp@bytedance.com>2024-03-29 15:38:39 +0800
committerGitHub <noreply@github.com>2024-03-29 15:38:39 +0800
commit610b9e23c5a3040aacc6fe85de8694f80bf5bdf5 (patch)
treebf9689459ca1a6a311a21a4b4e718c975f022dd9
parente1873d99729012191c72d532226771139b6e8519 (diff)
[SDAG] Use shifts if ISD::MUL is illegal when lowering ISD::CTPOP (#86505)
We can avoid libcalls. Fixes #86205
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp18
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp40
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop-no-implicit-float.mir4
-rw-r--r--llvm/test/CodeGen/Mips/GlobalISel/legalizer/ctpop.mir4
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv32.mir20
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv64.mir4
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv32.mir10
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir2
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv32.mir20
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv64.mir4
-rw-r--r--llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll496
-rw-r--r--llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll102
-rw-r--r--llvm/test/CodeGen/RISCV/rv32xtheadbb.ll96
-rw-r--r--llvm/test/CodeGen/RISCV/rv32zbb.ll447
-rw-r--r--llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll15
-rw-r--r--llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll30
-rw-r--r--llvm/test/CodeGen/RISCV/rv64xtheadbb.ll209
-rw-r--r--llvm/test/CodeGen/RISCV/rv64zbb.ll438
18 files changed, 839 insertions, 1120 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 4981d7b80b0b..c00fe1f8ff54 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -6389,12 +6389,26 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) {
// 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
// bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
- auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
// Shift count result from 8 high bits to low bits.
auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
- B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
+ auto IsMulSupported = [this](const LLT Ty) {
+ auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action;
+ return Action == Legal || Action == WidenScalar || Action == Custom;
+ };
+ if (IsMulSupported(Ty)) {
+ auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
+ B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
+ } else {
+ auto ResTmp = B8Count;
+ for (unsigned Shift = 8; Shift < Size; Shift *= 2) {
+ auto ShiftC = B.buildConstant(Ty, Shift);
+ auto Shl = B.buildShl(Ty, ResTmp, ShiftC);
+ ResTmp = B.buildAdd(Ty, ResTmp, Shl);
+ }
+ B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
+ }
MI.eraseFromParent();
return Legalized;
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 8be03b66e155..962f0d98e3be 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8710,11 +8710,21 @@ SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
}
// v = (v * 0x01010101...) >> (Len - 8)
- SDValue Mask01 =
- DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
- return DAG.getNode(ISD::SRL, dl, VT,
- DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
- DAG.getConstant(Len - 8, dl, ShVT));
+ SDValue V;
+ if (isOperationLegalOrCustomOrPromote(
+ ISD::MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
+ SDValue Mask01 =
+ DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
+ V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01);
+ } else {
+ V = Op;
+ for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
+ SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
+ V = DAG.getNode(ISD::ADD, dl, VT, V,
+ DAG.getNode(ISD::SHL, dl, VT, V, ShiftC));
+ }
+ }
+ return DAG.getNode(ISD::SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT));
}
SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const {
@@ -8767,10 +8777,22 @@ SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const {
return Op;
// v = (v * 0x01010101...) >> (Len - 8)
- SDValue Mask01 =
- DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
- return DAG.getNode(ISD::VP_LSHR, dl, VT,
- DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL),
+ SDValue V;
+ if (isOperationLegalOrCustomOrPromote(
+ ISD::VP_MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
+ SDValue Mask01 =
+ DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
+ V = DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL);
+ } else {
+ V = Op;
+ for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
+ SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
+ V = DAG.getNode(ISD::VP_ADD, dl, VT, V,
+ DAG.getNode(ISD::VP_SHL, dl, VT, V, ShiftC, Mask, VL),
+ Mask, VL);
+ }
+ }
+ return DAG.getNode(ISD::VP_LSHR, dl, VT, V,
DAG.getConstant(Len - 8, dl, ShVT), Mask, VL);
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop-no-implicit-float.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop-no-implicit-float.mir
index d2352be81503..27f2f0bafa95 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop-no-implicit-float.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop-no-implicit-float.mir
@@ -37,6 +37,7 @@ body: |
; CHECK-NEXT: %ctpop:_(s32) = G_LSHR [[MUL]], [[C7]](s64)
; CHECK-NEXT: $w0 = COPY %ctpop(s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
+ ;
; CHECK-CSSC-LABEL: name: s32
; CHECK-CSSC: liveins: $w0
; CHECK-CSSC-NEXT: {{ $}}
@@ -77,11 +78,12 @@ body: |
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1085102592571150095
; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[ADD1]], [[C5]]
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 72340172838076673
- ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND3]], [[C6]]
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 56
+ ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND3]], [[C6]]
; CHECK-NEXT: %ctpop:_(s64) = G_LSHR [[MUL]], [[C7]](s64)
; CHECK-NEXT: $x0 = COPY %ctpop(s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
+ ;
; CHECK-CSSC-LABEL: name: s64
; CHECK-CSSC: liveins: $x0
; CHECK-CSSC-NEXT: {{ $}}
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/ctpop.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/ctpop.mir
index 4c0b3c617721..f518e9ec9e58 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/ctpop.mir
+++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/ctpop.mir
@@ -29,8 +29,8 @@ body: |
; MIPS32-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135
; MIPS32-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C5]]
; MIPS32-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009
- ; MIPS32-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C6]]
; MIPS32-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+ ; MIPS32-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C6]]
; MIPS32-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C7]](s32)
; MIPS32-NEXT: $v0 = COPY [[LSHR3]](s32)
; MIPS32-NEXT: RetRA implicit $v0
@@ -70,8 +70,8 @@ body: |
; MIPS32-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135
; MIPS32-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C5]]
; MIPS32-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009
- ; MIPS32-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C6]]
; MIPS32-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+ ; MIPS32-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C6]]
; MIPS32-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C7]](s32)
; MIPS32-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32)
; MIPS32-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]]
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv32.mir
index a890a411544e..354fc109a463 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv32.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv32.mir
@@ -50,8 +50,8 @@ body: |
; RV32I-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
; RV32I-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C13]]
; RV32I-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND8]], [[C14]]
; RV32I-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND8]], [[C14]]
; RV32I-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C15]](s32)
; RV32I-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; RV32I-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C16]], [[LSHR6]]
@@ -129,8 +129,8 @@ body: |
; RV32I-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 3855
; RV32I-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C15]]
; RV32I-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 257
- ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND9]], [[C16]]
; RV32I-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND9]], [[C16]]
; RV32I-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; RV32I-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C18]]
; RV32I-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[C17]](s32)
@@ -201,8 +201,8 @@ body: |
; RV32I-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135
; RV32I-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C10]]
; RV32I-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009
- ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C11]]
; RV32I-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+ ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C11]]
; RV32I-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C12]](s32)
; RV32I-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; RV32I-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C13]], [[LSHR8]]
@@ -267,8 +267,8 @@ body: |
; RV32I-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135
; RV32I-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C11]]
; RV32I-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009
- ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C12]]
; RV32I-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+ ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C12]]
; RV32I-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C13]](s32)
; RV32I-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; RV32I-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C14]], [[LSHR8]]
@@ -306,8 +306,8 @@ body: |
; RV32I-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135
; RV32I-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ADD4]], [[C26]]
; RV32I-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009
- ; RV32I-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND7]], [[C27]]
; RV32I-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+ ; RV32I-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND7]], [[C27]]
; RV32I-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[MUL1]], [[C28]](s32)
; RV32I-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; RV32I-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C29]], [[LSHR17]]
@@ -389,8 +389,8 @@ body: |
; RV32I-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
; RV32I-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C13]]
; RV32I-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND8]], [[C14]]
; RV32I-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND8]], [[C14]]
; RV32I-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C15]](s32)
; RV32I-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; RV32I-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C16]], [[LSHR6]]
@@ -468,8 +468,8 @@ body: |
; RV32I-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 3855
; RV32I-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C15]]
; RV32I-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 257
- ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND9]], [[C16]]
; RV32I-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND9]], [[C16]]
; RV32I-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; RV32I-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C18]]
; RV32I-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[C17]](s32)
@@ -540,8 +540,8 @@ body: |
; RV32I-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135
; RV32I-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C10]]
; RV32I-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009
- ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C11]]
; RV32I-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+ ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C11]]
; RV32I-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C12]](s32)
; RV32I-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; RV32I-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C13]], [[LSHR8]]
@@ -606,8 +606,8 @@ body: |
; RV32I-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135
; RV32I-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C11]]
; RV32I-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009
- ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C12]]
; RV32I-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+ ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C12]]
; RV32I-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C13]](s32)
; RV32I-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; RV32I-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C14]], [[LSHR8]]
@@ -645,8 +645,8 @@ body: |
; RV32I-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135
; RV32I-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ADD4]], [[C26]]
; RV32I-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009
- ; RV32I-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND7]], [[C27]]
; RV32I-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+ ; RV32I-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND7]], [[C27]]
; RV32I-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[MUL1]], [[C28]](s32)
; RV32I-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; RV32I-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C29]], [[LSHR17]]
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv64.mir
index add8a565202d..38a4b9c6dae3 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv64.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv64.mir
@@ -283,8 +283,8 @@ body: |
; RV64I-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 1085102592571150095
; RV64I-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[ADD1]], [[C11]]
; RV64I-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 72340172838076673
- ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND3]], [[C12]]
; RV64I-NEXT: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 56
+ ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND3]], [[C12]]
; RV64I-NEXT: [[LSHR9:%[0-9]+]]:_(s64) = G_LSHR [[MUL]], [[C13]](s64)
; RV64I-NEXT: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
; RV64I-NEXT: [[SUB1:%[0-9]+]]:_(s64) = G_SUB [[C14]], [[LSHR9]]
@@ -583,8 +583,8 @@ body: |
; RV64I-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 1085102592571150095
; RV64I-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[ADD1]], [[C11]]
; RV64I-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 72340172838076673
- ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND3]], [[C12]]
; RV64I-NEXT: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 56
+ ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND3]], [[C12]]
; RV64I-NEXT: [[LSHR9:%[0-9]+]]:_(s64) = G_LSHR [[MUL]], [[C13]](s64)
; RV64I-NEXT: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
; RV64I-NEXT: [[SUB1:%[0-9]+]]:_(s64) = G_SUB [[C14]], [[LSHR9]]
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv32.mir
index d4eb5ebc2e29..c64669cb7341 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv32.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv32.mir
@@ -35,8 +35,8 @@ body: |
; RV32I-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
; RV32I-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C7]]
; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND5]], [[C8]]
; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND5]], [[C8]]
; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C9]](s32)
; RV32I-NEXT: $x10 = COPY [[LSHR3]](s32)
; RV32I-NEXT: PseudoRET implicit $x10
@@ -90,8 +90,8 @@ body: |
; RV32I-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 3855
; RV32I-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C7]]
; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 257
- ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND5]], [[C8]]
; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND5]], [[C8]]
; RV32I-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; RV32I-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C10]]
; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[C9]](s32)
@@ -143,8 +143,8 @@ body: |
; RV32I-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135
; RV32I-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C5]]
; RV32I-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009
- ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C6]]
; RV32I-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+ ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C6]]
; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C7]](s32)
; RV32I-NEXT: $x10 = COPY [[LSHR3]](s32)
; RV32I-NEXT: PseudoRET implicit $x10
@@ -190,8 +190,8 @@ body: |
; RV32I-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135
; RV32I-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C5]]
; RV32I-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009
- ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C6]]
; RV32I-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+ ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C6]]
; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C7]](s32)
; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; RV32I-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C8]](s32)
@@ -210,8 +210,8 @@ body: |
; RV32I-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135
; RV32I-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ADD3]], [[C13]]
; RV32I-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009
- ; RV32I-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND7]], [[C14]]
; RV32I-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+ ; RV32I-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND7]], [[C14]]
; RV32I-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[MUL1]], [[C15]](s32)
; RV32I-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[LSHR7]], [[LSHR3]]
; RV32I-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir
index e2434ba9301c..196b367e5927 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir
@@ -205,8 +205,8 @@ body: |
; RV64I-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1085102592571150095
; RV64I-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[ADD1]], [[C5]]
; RV64I-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 72340172838076673
- ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND3]], [[C6]]
; RV64I-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 56
+ ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND3]], [[C6]]
; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[MUL]], [[C7]](s64)
; RV64I-NEXT: $x10 = COPY [[LSHR3]](s64)
; RV64I-NEXT: PseudoRET implicit $x10
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv32.mir
index 19555a702b73..372becaf08d9 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv32.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv32.mir
@@ -39,8 +39,8 @@ body: |
; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
; RV32I-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C8]]
; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND6]], [[C9]]
; RV32I-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND6]], [[C9]]
; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C10]](s32)
; RV32I-NEXT: $x10 = COPY [[LSHR3]](s32)
; RV32I-NEXT: PseudoRET implicit $x10
@@ -98,8 +98,8 @@ body: |
; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 3855
; RV32I-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C8]]
; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 257
- ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND6]], [[C9]]
; RV32I-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND6]], [[C9]]
; RV32I-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; RV32I-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C11]]
; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[C10]](s32)
@@ -155,8 +155,8 @@ body: |
; RV32I-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135
; RV32I-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C6]]
; RV32I-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009
- ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND4]], [[C7]]
; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+ ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND4]], [[C7]]
; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C8]](s32)
; RV32I-NEXT: $x10 = COPY [[LSHR3]](s32)
; RV32I-NEXT: PseudoRET implicit $x10
@@ -208,8 +208,8 @@ body: |
; RV32I-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135
; RV32I-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C7]]
; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009
- ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND4]], [[C8]]
; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+ ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND4]], [[C8]]
; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C9]](s32)
; RV32I-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; RV32I-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[LSHR3]], [[C10]]
@@ -234,8 +234,8 @@ body: |
; RV32I-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135
; RV32I-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ADD6]], [[C17]]
; RV32I-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009
- ; RV32I-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND9]], [[C18]]
; RV32I-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+ ; RV32I-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND9]], [[C18]]
; RV32I-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[MUL1]], [[C19]](s32)
; RV32I-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ADD3]], [[LSHR7]]
; RV32I-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
@@ -304,8 +304,8 @@ body: |
; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
; RV32I-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C8]]
; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND6]], [[C9]]
; RV32I-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND6]], [[C9]]
; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C10]](s32)
; RV32I-NEXT: $x10 = COPY [[LSHR3]](s32)
; RV32I-NEXT: PseudoRET implicit $x10
@@ -363,8 +363,8 @@ body: |
; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 3855
; RV32I-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C8]]
; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 257
- ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND6]], [[C9]]
; RV32I-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND6]], [[C9]]
; RV32I-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; RV32I-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C11]]
; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[C10]](s32)
@@ -420,8 +420,8 @@ body: |
; RV32I-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135
; RV32I-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C6]]
; RV32I-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009
- ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND4]], [[C7]]
; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+ ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND4]], [[C7]]
; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C8]](s32)
; RV32I-NEXT: $x10 = COPY [[LSHR3]](s32)
; RV32I-NEXT: PseudoRET implicit $x10
@@ -473,8 +473,8 @@ body: |
; RV32I-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135
; RV32I-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C7]]
; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009
- ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND4]], [[C8]]
; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+ ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND4]], [[C8]]
; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C9]](s32)
; RV32I-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; RV32I-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[LSHR3]], [[C10]]
@@ -499,8 +499,8 @@ body: |
; RV32I-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135
; RV32I-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ADD6]], [[C17]]
; RV32I-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009
- ; RV32I-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND9]], [[C18]]
; RV32I-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+ ; RV32I-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND9]], [[C18]]
; RV32I-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[MUL1]], [[C19]](s32)
; RV32I-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ADD3]], [[LSHR7]]
; RV32I-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv64.mir
index e030e3ce2a80..e51a2143efd0 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv64.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv64.mir
@@ -221,8 +221,8 @@ body: |
; RV64I-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1085102592571150095
; RV64I-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[ADD2]], [[C6]]
; RV64I-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 72340172838076673
- ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND4]], [[C7]]
; RV64I-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 56
+ ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND4]], [[C7]]
; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[MUL]], [[C8]](s64)
; RV64I-NEXT: $x10 = COPY [[LSHR3]](s64)
; RV64I-NEXT: PseudoRET implicit $x10
@@ -457,8 +457,8 @@ body: |
; RV64I-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1085102592571150095
; RV64I-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[ADD2]], [[C6]]
; RV64I-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 72340172838076673
- ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND4]], [[C7]]
; RV64I-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 56
+ ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND4]], [[C7]]
; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[MUL]], [[C8]](s64)
; RV64I-NEXT: $x10 = COPY [[LSHR3]](s64)
; RV64I-NEXT: PseudoRET implicit $x10
diff --git a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
index 455e6e54c9b3..549d531e829e 100644
--- a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
@@ -1160,8 +1160,6 @@ define i32 @test_ctlz_i32(i32 %a) nounwind {
; RV32I: # %bb.0:
; RV32I-NEXT: beqz a0, .LBB10_2
; RV32I-NEXT: # %bb.1: # %cond.false
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: srli a1, a0, 1
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 2
@@ -1189,12 +1187,11 @@ define i32 @test_ctlz_i32(i32 %a) nounwind {
; RV32I-NEXT: lui a1, 61681
; RV32I-NEXT: addi a1, a1, -241
; RV32I-NEXT: and a0, a0, a1
-; RV32I-NEXT: lui a1, 4112
-; RV32I-NEXT: addi a1, a1, 257
-; RV32I-NEXT: call __mulsi3
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: slli a1, a0, 16
+; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: srli a0, a0, 24
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB10_2:
; RV32I-NEXT: li a0, 32
@@ -1205,8 +1202,6 @@ define i32 @test_ctlz_i32(i32 %a) nounwind {
; RV64I-NEXT: sext.w a1, a0
; RV64I-NEXT: beqz a1, .LBB10_2
; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: srliw a1, a0, 1
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: srliw a1, a0, 2
@@ -1232,14 +1227,13 @@ define i32 @test_ctlz_i32(i32 %a) nounwind {
; RV64I-NEXT: srli a1, a0, 4
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: lui a1, 61681
-; RV64I-NEXT: addiw a1, a1, -241
+; RV64I-NEXT: addi a1, a1, -241
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 16
+; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srliw a0, a0, 24
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB10_2:
; RV64I-NEXT: li a0, 32
@@ -1354,19 +1348,16 @@ define i32 @test_ctlz_i32(i32 %a) nounwind {
define i64 @test_ctlz_i64(i64 %a) nounwind {
; RV32I-LABEL: test_ctlz_i64:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a1
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: srli a0, a1, 1
-; RV32I-NEXT: or a0, a1, a0
+; RV32I-NEXT: lui a2, 349525
+; RV32I-NEXT: addi a4, a2, 1365
+; RV32I-NEXT: lui a2, 209715
+; RV32I-NEXT: addi a3, a2, 819
+; RV32I-NEXT: lui a2, 61681
+; RV32I-NEXT: addi a2, a2, -241
+; RV32I-NEXT: bnez a1, .LBB11_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: srli a1, a0, 1
+; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 2
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 4
@@ -1377,28 +1368,26 @@ define i64 @test_ctlz_i64(i64 %a) nounwind {
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: not a0, a0
; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: lui a2, 349525
-; RV32I-NEXT: addi s4, a2, 1365
-; RV32I-NEXT: and a1, a1, s4
+; RV32I-NEXT: and a1, a1, a4
; RV32I-NEXT: sub a0, a0, a1
-; RV32I-NEXT: lui a1, 209715
-; RV32I-NEXT: addi s5, a1, 819
-; RV32I-NEXT: and a1, a0, s5
+; RV32I-NEXT: and a1, a0, a3
; RV32I-NEXT: srli a0, a0, 2
-; RV32I-NEXT: and a0, a0, s5
+; RV32I-NEXT: and a0, a0, a3
; RV32I-NEXT: add a0, a1, a0
; RV32I-NEXT: srli a1, a0, 4
; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: lui a1, 61681
-; RV32I-NEXT: addi s6, a1, -241
-; RV32I-NEXT: and a0, a0, s6
-; RV32I-NEXT: lui a1, 4112
-; RV32I-NEXT: addi s3, a1, 257
-; RV32I-NEXT: mv a1, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: srli a0, s2, 1
-; RV32I-NEXT: or a0, s2, a0
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: slli a1, a0, 16
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: srli a0, a0, 24
+; RV32I-NEXT: addi a0, a0, 32
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB11_2:
+; RV32I-NEXT: srli a0, a1, 1
+; RV32I-NEXT: or a0, a1, a0
; RV32I-NEXT: srli a1, a0, 2
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 4
@@ -1409,43 +1398,27 @@ define i64 @test_ctlz_i64(i64 %a) nounwind {
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: not a0, a0
; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: and a1, a1, s4
+; RV32I-NEXT: and a1, a1, a4
; RV32I-NEXT: sub a0, a0, a1
-; RV32I-NEXT: and a1, a0, s5
+; RV32I-NEXT: and a1, a0, a3
; RV32I-NEXT: srli a0, a0, 2
-; RV32I-NEXT: and a0, a0, s5
+; RV32I-NEXT: and a0, a0, a3
; RV32I-NEXT: add a0, a1, a0
; RV32I-NEXT: srli a1, a0, 4
; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: and a0, a0, s6
-; RV32I-NEXT: mv a1, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: bnez s0, .LBB11_2
-; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: slli a1, a0, 16
+; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: srli a0, a0, 24
-; RV32I-NEXT: addi a0, a0, 32
-; RV32I-NEXT: j .LBB11_3
-; RV32I-NEXT: .LBB11_2:
-; RV32I-NEXT: srli a0, s1, 24
-; RV32I-NEXT: .LBB11_3:
; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s6, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_ctlz_i64:
; RV64I: # %bb.0:
; RV64I-NEXT: beqz a0, .LBB11_2
; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: srli a1, a0, 1
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: srli a1, a0, 2
@@ -1481,14 +1454,13 @@ define i64 @test_ctlz_i64(i64 %a) nounwind {
; RV64I-NEXT: slli a2, a1, 32
; RV64I-NEXT: add a1, a1, a2
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: slli a2, a1, 32
-; RV64I-NEXT: add a1, a1, a2
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 16
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 32
+; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srli a0, a0, 56
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB11_2:
; RV64I-NEXT: li a0, 64
@@ -1831,8 +1803,6 @@ define i16 @test_ctlz_i16_zero_undef(i16 %a) nounwind {
define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind {
; RV32I-LABEL: test_ctlz_i32_zero_undef:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: srli a1, a0, 1
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 2
@@ -1860,18 +1830,15 @@ define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind {
; RV32I-NEXT: lui a1, 61681
; RV32I-NEXT: addi a1, a1, -241
; RV32I-NEXT: and a0, a0, a1
-; RV32I-NEXT: lui a1, 4112
-; RV32I-NEXT: addi a1, a1, 257
-; RV32I-NEXT: call __mulsi3
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: slli a1, a0, 16
+; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: srli a0, a0, 24
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_ctlz_i32_zero_undef:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: srliw a1, a0, 1
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: srliw a1, a0, 2
@@ -1897,14 +1864,13 @@ define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind {
; RV64I-NEXT: srli a1, a0, 4
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: lui a1, 61681
-; RV64I-NEXT: addiw a1, a1, -241
+; RV64I-NEXT: addi a1, a1, -241
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 16
+; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srliw a0, a0, 24
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV32M-LABEL: test_ctlz_i32_zero_undef:
@@ -2005,19 +1971,16 @@ define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind {
define i64 @test_ctlz_i64_zero_undef(i64 %a) nounwind {
; RV32I-LABEL: test_ctlz_i64_zero_undef:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a1
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: srli a0, a1, 1
-; RV32I-NEXT: or a0, a1, a0
+; RV32I-NEXT: lui a2, 349525
+; RV32I-NEXT: addi a4, a2, 1365
+; RV32I-NEXT: lui a2, 209715
+; RV32I-NEXT: addi a3, a2, 819
+; RV32I-NEXT: lui a2, 61681
+; RV32I-NEXT: addi a2, a2, -241
+; RV32I-NEXT: bnez a1, .LBB15_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: srli a1, a0, 1
+; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 2
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 4
@@ -2028,28 +1991,26 @@ define i64 @test_ctlz_i64_zero_undef(i64 %a) nounwind {
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: not a0, a0
; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: lui a2, 349525
-; RV32I-NEXT: addi s4, a2, 1365
-; RV32I-NEXT: and a1, a1, s4
+; RV32I-NEXT: and a1, a1, a4
; RV32I-NEXT: sub a0, a0, a1
-; RV32I-NEXT: lui a1, 209715
-; RV32I-NEXT: addi s5, a1, 819
-; RV32I-NEXT: and a1, a0, s5
+; RV32I-NEXT: and a1, a0, a3
; RV32I-NEXT: srli a0, a0, 2
-; RV32I-NEXT: and a0, a0, s5
+; RV32I-NEXT: and a0, a0, a3
; RV32I-NEXT: add a0, a1, a0
; RV32I-NEXT: srli a1, a0, 4
; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: lui a1, 61681
-; RV32I-NEXT: addi s6, a1, -241
-; RV32I-NEXT: and a0, a0, s6
-; RV32I-NEXT: lui a1, 4112
-; RV32I-NEXT: addi s3, a1, 257
-; RV32I-NEXT: mv a1, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: srli a0, s2, 1
-; RV32I-NEXT: or a0, s2, a0
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: slli a1, a0, 16
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: srli a0, a0, 24
+; RV32I-NEXT: addi a0, a0, 32
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB15_2:
+; RV32I-NEXT: srli a0, a1, 1
+; RV32I-NEXT: or a0, a1, a0
; RV32I-NEXT: srli a1, a0, 2
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 4
@@ -2060,41 +2021,25 @@ define i64 @test_ctlz_i64_zero_undef(i64 %a) nounwind {
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: not a0, a0
; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: and a1, a1, s4
+; RV32I-NEXT: and a1, a1, a4
; RV32I-NEXT: sub a0, a0, a1
-; RV32I-NEXT: and a1, a0, s5
+; RV32I-NEXT: and a1, a0, a3
; RV32I-NEXT: srli a0, a0, 2
-; RV32I-NEXT: and a0, a0, s5
+; RV32I-NEXT: and a0, a0, a3
; RV32I-NEXT: add a0, a1, a0
; RV32I-NEXT: srli a1, a0, 4
; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: and a0, a0, s6
-; RV32I-NEXT: mv a1, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: bnez s0, .LBB15_2
-; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: slli a1, a0, 16
+; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: srli a0, a0, 24
-; RV32I-NEXT: addi a0, a0, 32
-; RV32I-NEXT: j .LBB15_3
-; RV32I-NEXT: .LBB15_2:
-; RV32I-NEXT: srli a0, s1, 24
-; RV32I-NEXT: .LBB15_3:
; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s6, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_ctlz_i64_zero_undef:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: srli a1, a0, 1
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: srli a1, a0, 2
@@ -2130,14 +2075,13 @@ define i64 @test_ctlz_i64_zero_undef(i64 %a) nounwind {
; RV64I-NEXT: slli a2, a1, 32
; RV64I-NEXT: add a1, a1, a2
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: slli a2, a1, 32
-; RV64I-NEXT: add a1, a1, a2
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 16
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 32
+; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srli a0, a0, 56
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV32M-LABEL: test_ctlz_i64_zero_undef:
@@ -2464,8 +2408,6 @@ define i16 @test_ctpop_i16(i16 %a) nounwind {
define i32 @test_ctpop_i32(i32 %a) nounwind {
; RV32I-LABEL: test_ctpop_i32:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: srli a1, a0, 1
; RV32I-NEXT: lui a2, 349525
; RV32I-NEXT: addi a2, a2, 1365
@@ -2482,18 +2424,15 @@ define i32 @test_ctpop_i32(i32 %a) nounwind {
; RV32I-NEXT: lui a1, 61681
; RV32I-NEXT: addi a1, a1, -241
; RV32I-NEXT: and a0, a0, a1
-; RV32I-NEXT: lui a1, 4112
-; RV32I-NEXT: addi a1, a1, 257
-; RV32I-NEXT: call __mulsi3
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: slli a1, a0, 16
+; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: srli a0, a0, 24
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_ctpop_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: srli a1, a0, 1
; RV64I-NEXT: lui a2, 349525
; RV64I-NEXT: addiw a2, a2, 1365
@@ -2508,14 +2447,13 @@ define i32 @test_ctpop_i32(i32 %a) nounwind {
; RV64I-NEXT: srli a1, a0, 4
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: lui a1, 61681
-; RV64I-NEXT: addiw a1, a1, -241
+; RV64I-NEXT: addi a1, a1, -241
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 16
+; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srliw a0, a0, 24
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV32M-LABEL: test_ctpop_i32:
@@ -2578,8 +2516,6 @@ define i32 @test_ctpop_i32(i32 %a) nounwind {
;
; RV32XTHEADBB-LABEL: test_ctpop_i32:
; RV32XTHEADBB: # %bb.0:
-; RV32XTHEADBB-NEXT: addi sp, sp, -16
-; RV32XTHEADBB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32XTHEADBB-NEXT: srli a1, a0, 1
; RV32XTHEADBB-NEXT: lui a2, 349525
; RV32XTHEADBB-NEXT: addi a2, a2, 1365
@@ -2596,18 +2532,15 @@ define i32 @test_ctpop_i32(i32 %a) nounwind {
; RV32XTHEADBB-NEXT: lui a1, 61681
; RV32XTHEADBB-NEXT: addi a1, a1, -241
; RV32XTHEADBB-NEXT: and a0, a0, a1
-; RV32XTHEADBB-NEXT: lui a1, 4112
-; RV32XTHEADBB-NEXT: addi a1, a1, 257
-; RV32XTHEADBB-NEXT: call __mulsi3
+; RV32XTHEADBB-NEXT: slli a1, a0, 8
+; RV32XTHEADBB-NEXT: add a0, a0, a1
+; RV32XTHEADBB-NEXT: slli a1, a0, 16
+; RV32XTHEADBB-NEXT: add a0, a0, a1
; RV32XTHEADBB-NEXT: srli a0, a0, 24
-; RV32XTHEADBB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32XTHEADBB-NEXT: addi sp, sp, 16
; RV32XTHEADBB-NEXT: ret
;
; RV64XTHEADBB-LABEL: test_ctpop_i32:
; RV64XTHEADBB: # %bb.0:
-; RV64XTHEADBB-NEXT: addi sp, sp, -16
-; RV64XTHEADBB-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64XTHEADBB-NEXT: srli a1, a0, 1
; RV64XTHEADBB-NEXT: lui a2, 349525
; RV64XTHEADBB-NEXT: addiw a2, a2, 1365
@@ -2622,14 +2555,13 @@ define i32 @test_ctpop_i32(i32 %a) nounwind {
; RV64XTHEADBB-NEXT: srli a1, a0, 4
; RV64XTHEADBB-NEXT: add a0, a0, a1
; RV64XTHEADBB-NEXT: lui a1, 61681
-; RV64XTHEADBB-NEXT: addiw a1, a1, -241
+; RV64XTHEADBB-NEXT: addi a1, a1, -241
; RV64XTHEADBB-NEXT: and a0, a0, a1
-; RV64XTHEADBB-NEXT: lui a1, 4112
-; RV64XTHEADBB-NEXT: addiw a1, a1, 257
-; RV64XTHEADBB-NEXT: call __muldi3
+; RV64XTHEADBB-NEXT: slli a1, a0, 8
+; RV64XTHEADBB-NEXT: add a0, a0, a1
+; RV64XTHEADBB-NEXT: slli a1, a0, 16
+; RV64XTHEADBB-NEXT: add a0, a0, a1
; RV64XTHEADBB-NEXT: srliw a0, a0, 24
-; RV64XTHEADBB-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64XTHEADBB-NEXT: addi sp, sp, 16
; RV64XTHEADBB-NEXT: ret
%1 = call i32 @llvm.ctpop.i32(i32 %a)
ret i32 %1
@@ -2638,65 +2570,48 @@ define i32 @test_ctpop_i32(i32 %a) nounwind {
define i64 @test_ctpop_i64(i64 %a) nounwind {
; RV32I-LABEL: test_ctpop_i64:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: srli a0, a1, 1
-; RV32I-NEXT: lui a2, 349525
-; RV32I-NEXT: addi s2, a2, 1365
-; RV32I-NEXT: and a0, a0, s2
-; RV32I-NEXT: sub a1, a1, a0
-; RV32I-NEXT: lui a0, 209715
-; RV32I-NEXT: addi s3, a0, 819
-; RV32I-NEXT: and a0, a1, s3
+; RV32I-NEXT: srli a2, a1, 1
+; RV32I-NEXT: lui a3, 349525
+; RV32I-NEXT: addi a3, a3, 1365
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sub a1, a1, a2
+; RV32I-NEXT: lui a2, 209715
+; RV32I-NEXT: addi a2, a2, 819
+; RV32I-NEXT: and a4, a1, a2
; RV32I-NEXT: srli a1, a1, 2
-; RV32I-NEXT: and a1, a1, s3
-; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: srli a1, a0, 4
-; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: lui a1, 61681
-; RV32I-NEXT: addi s4, a1, -241
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: lui a1, 4112
-; RV32I-NEXT: addi s1, a1, 257
-; RV32I-NEXT: mv a1, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: srli s5, a0, 24
-; RV32I-NEXT: srli a0, s0, 1
-; RV32I-NEXT: and a0, a0, s2
-; RV32I-NEXT: sub s0, s0, a0
-; RV32I-NEXT: and a0, s0, s3
-; RV32I-NEXT: srli s0, s0, 2
-; RV32I-NEXT: and a1, s0, s3
-; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: srli a1, a0, 4
-; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: mv a1, s1
-; RV32I-NEXT: call __mulsi3
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: add a1, a4, a1
+; RV32I-NEXT: srli a4, a1, 4
+; RV32I-NEXT: add a1, a1, a4
+; RV32I-NEXT: lui a4, 61681
+; RV32I-NEXT: addi a4, a4, -241
+; RV32I-NEXT: and a1, a1, a4
+; RV32I-NEXT: slli a5, a1, 8
+; RV32I-NEXT: add a1, a1, a5
+; RV32I-NEXT: slli a5, a1, 16
+; RV32I-NEXT: add a1, a1, a5
+; RV32I-NEXT: srli a1, a1, 24
+; RV32I-NEXT: srli a5, a0, 1
+; RV32I-NEXT: and a3, a5, a3
+; RV32I-NEXT: sub a0, a0, a3
+; RV32I-NEXT: and a3, a0, a2
+; RV32I-NEXT: srli a0, a0, 2
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: add a0, a3, a0
+; RV32I-NEXT: srli a2, a0, 4
+; RV32I-NEXT: add a0, a0, a2
+; RV32I-NEXT: and a0, a0, a4
+; RV32I-NEXT: slli a2, a0, 8
+; RV32I-NEXT: add a0, a0, a2
+; RV32I-NEXT: slli a2, a0, 16
+; RV32I-NEXT: add a0, a0, a2
; RV32I-NEXT: srli a0, a0, 24
-; RV32I-NEXT: add a0, a0, s5
+; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_ctpop_i64:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: srli a1, a0, 1
; RV64I-NEXT: lui a2, 349525
; RV64I-NEXT: addiw a2, a2, 1365
@@ -2719,14 +2634,13 @@ define i64 @test_ctpop_i64(i64 %a) nounwind {
; RV64I-NEXT: slli a2, a1, 32
; RV64I-NEXT: add a1, a1, a2
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: slli a2, a1, 32
-; RV64I-NEXT: add a1, a1, a2
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 16
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 32
+; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srli a0, a0, 56
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV32M-LABEL: test_ctpop_i64:
@@ -2814,65 +2728,48 @@ define i64 @test_ctpop_i64(i64 %a) nounwind {
;
; RV32XTHEADBB-LABEL: test_ctpop_i64:
; RV32XTHEADBB: # %bb.0:
-; RV32XTHEADBB-NEXT: addi sp, sp, -32
-; RV32XTHEADBB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32XTHEADBB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32XTHEADBB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32XTHEADBB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32XTHEADBB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32XTHEADBB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32XTHEADBB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32XTHEADBB-NEXT: mv s0, a0
-; RV32XTHEADBB-NEXT: srli a0, a1, 1
-; RV32XTHEADBB-NEXT: lui a2, 349525
-; RV32XTHEADBB-NEXT: addi s2, a2, 1365
-; RV32XTHEADBB-NEXT: and a0, a0, s2
-; RV32XTHEADBB-NEXT: sub a1, a1, a0
-; RV32XTHEADBB-NEXT: lui a0, 209715
-; RV32XTHEADBB-NEXT: addi s3, a0, 819
-; RV32XTHEADBB-NEXT: and a0, a1, s3
+; RV32XTHEADBB-NEXT: srli a2, a1, 1
+; RV32XTHEADBB-NEXT: lui a3, 349525
+; RV32XTHEADBB-NEXT: addi a3, a3, 1365
+; RV32XTHEADBB-NEXT: and a2, a2, a3
+; RV32XTHEADBB-NEXT: sub a1, a1, a2
+; RV32XTHEADBB-NEXT: lui a2, 209715
+; RV32XTHEADBB-NEXT: addi a2, a2, 819
+; RV32XTHEADBB-NEXT: and a4, a1, a2
; RV32XTHEADBB-NEXT: srli a1, a1, 2
-; RV32XTHEADBB-NEXT: and a1, a1, s3
-; RV32XTHEADBB-NEXT: add a0, a0, a1
-; RV32XTHEADBB-NEXT: srli a1, a0, 4
-; RV32XTHEADBB-NEXT: add a0, a0, a1
-; RV32XTHEADBB-NEXT: lui a1, 61681
-; RV32XTHEADBB-NEXT: addi s4, a1, -241
-; RV32XTHEADBB-NEXT: and a0, a0, s4
-; RV32XTHEADBB-NEXT: lui a1, 4112
-; RV32XTHEADBB-NEXT: addi s1, a1, 257
-; RV32XTHEADBB-NEXT: mv a1, s1
-; RV32XTHEADBB-NEXT: call __mulsi3
-; RV32XTHEADBB-NEXT: srli s5, a0, 24
-; RV32XTHEADBB-NEXT: srli a0, s0, 1
-; RV32XTHEADBB-NEXT: and a0, a0, s2
-; RV32XTHEADBB-NEXT: sub s0, s0, a0
-; RV32XTHEADBB-NEXT: and a0, s0, s3
-; RV32XTHEADBB-NEXT: srli s0, s0, 2
-; RV32XTHEADBB-NEXT: and a1, s0, s3
-; RV32XTHEADBB-NEXT: add a0, a0, a1
-; RV32XTHEADBB-NEXT: srli a1, a0, 4
-; RV32XTHEADBB-NEXT: add a0, a0, a1
-; RV32XTHEADBB-NEXT: and a0, a0, s4
-; RV32XTHEADBB-NEXT: mv a1, s1
-; RV32XTHEADBB-NEXT: call __mulsi3
+; RV32XTHEADBB-NEXT: and a1, a1, a2
+; RV32XTHEADBB-NEXT: add a1, a4, a1
+; RV32XTHEADBB-NEXT: srli a4, a1, 4
+; RV32XTHEADBB-NEXT: add a1, a1, a4
+; RV32XTHEADBB-NEXT: lui a4, 61681
+; RV32XTHEADBB-NEXT: addi a4, a4, -241
+; RV32XTHEADBB-NEXT: and a1, a1, a4
+; RV32XTHEADBB-NEXT: slli a5, a1, 8
+; RV32XTHEADBB-NEXT: add a1, a1, a5
+; RV32XTHEADBB-NEXT: slli a5, a1, 16
+; RV32XTHEADBB-NEXT: add a1, a1, a5
+; RV32XTHEADBB-NEXT: srli a1, a1, 24
+; RV32XTHEADBB-NEXT: srli a5, a0, 1
+; RV32XTHEADBB-NEXT: and a3, a5, a3
+; RV32XTHEADBB-NEXT: sub a0, a0, a3
+; RV32XTHEADBB-NEXT: and a3, a0, a2
+; RV32XTHEADBB-NEXT: srli a0, a0, 2
+; RV32XTHEADBB-NEXT: and a0, a0, a2
+; RV32XTHEADBB-NEXT: add a0, a3, a0
+; RV32XTHEADBB-NEXT: srli a2, a0, 4
+; RV32XTHEADBB-NEXT: add a0, a0, a2
+; RV32XTHEADBB-NEXT: and a0, a0, a4
+; RV32XTHEADBB-NEXT: slli a2, a0, 8
+; RV32XTHEADBB-NEXT: add a0, a0, a2
+; RV32XTHEADBB-NEXT: slli a2, a0, 16
+; RV32XTHEADBB-NEXT: add a0, a0, a2
; RV32XTHEADBB-NEXT: srli a0, a0, 24
-; RV32XTHEADBB-NEXT: add a0, a0, s5
+; RV32XTHEADBB-NEXT: add a0, a0, a1
; RV32XTHEADBB-NEXT: li a1, 0
-; RV32XTHEADBB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32XTHEADBB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32XTHEADBB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32XTHEADBB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32XTHEADBB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32XTHEADBB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32XTHEADBB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32XTHEADBB-NEXT: addi sp, sp, 32
; RV32XTHEADBB-NEXT: ret
;
; RV64XTHEADBB-LABEL: test_ctpop_i64:
; RV64XTHEADBB: # %bb.0:
-; RV64XTHEADBB-NEXT: addi sp, sp, -16
-; RV64XTHEADBB-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64XTHEADBB-NEXT: srli a1, a0, 1
; RV64XTHEADBB-NEXT: lui a2, 349525
; RV64XTHEADBB-NEXT: addiw a2, a2, 1365
@@ -2895,14 +2792,13 @@ define i64 @test_ctpop_i64(i64 %a) nounwind {
; RV64XTHEADBB-NEXT: slli a2, a1, 32
; RV64XTHEADBB-NEXT: add a1, a1, a2
; RV64XTHEADBB-NEXT: and a0, a0, a1
-; RV64XTHEADBB-NEXT: lui a1, 4112
-; RV64XTHEADBB-NEXT: addiw a1, a1, 257
-; RV64XTHEADBB-NEXT: slli a2, a1, 32
-; RV64XTHEADBB-NEXT: add a1, a1, a2
-; RV64XTHEADBB-NEXT: call __muldi3
+; RV64XTHEADBB-NEXT: slli a1, a0, 8
+; RV64XTHEADBB-NEXT: add a0, a0, a1
+; RV64XTHEADBB-NEXT: slli a1, a0, 16
+; RV64XTHEADBB-NEXT: add a0, a0, a1
+; RV64XTHEADBB-NEXT: slli a1, a0, 32
+; RV64XTHEADBB-NEXT: add a0, a0, a1
; RV64XTHEADBB-NEXT: srli a0, a0, 56
-; RV64XTHEADBB-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64XTHEADBB-NEXT: addi sp, sp, 16
; RV64XTHEADBB-NEXT: ret
%1 = call i64 @llvm.ctpop.i64(i64 %a)
ret i64 %1
diff --git a/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll b/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll
index adf614435b31..9ae30e646fdb 100644
--- a/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll
+++ b/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll
@@ -602,19 +602,16 @@ define signext i32 @ctlz(i64 %b) nounwind {
;
; RV32I-LABEL: ctlz:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a1
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: srli a0, a1, 1
-; RV32I-NEXT: or a0, a1, a0
+; RV32I-NEXT: lui a2, 349525
+; RV32I-NEXT: addi a4, a2, 1365
+; RV32I-NEXT: lui a2, 209715
+; RV32I-NEXT: addi a3, a2, 819
+; RV32I-NEXT: lui a2, 61681
+; RV32I-NEXT: addi a2, a2, -241
+; RV32I-NEXT: bnez a1, .LBB7_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: srli a1, a0, 1
+; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 2
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 4
@@ -625,28 +622,26 @@ define signext i32 @ctlz(i64 %b) nounwind {
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: not a0, a0
; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: lui a2, 349525
-; RV32I-NEXT: addi s4, a2, 1365
-; RV32I-NEXT: and a1, a1, s4
+; RV32I-NEXT: and a1, a1, a4
; RV32I-NEXT: sub a0, a0, a1
-; RV32I-NEXT: lui a1, 209715
-; RV32I-NEXT: addi s5, a1, 819
-; RV32I-NEXT: and a1, a0, s5
+; RV32I-NEXT: and a1, a0, a3
; RV32I-NEXT: srli a0, a0, 2
-; RV32I-NEXT: and a0, a0, s5
+; RV32I-NEXT: and a0, a0, a3
; RV32I-NEXT: add a0, a1, a0
; RV32I-NEXT: srli a1, a0, 4
; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: lui a1, 61681
-; RV32I-NEXT: addi s6, a1, -241
-; RV32I-NEXT: and a0, a0, s6
-; RV32I-NEXT: lui a1, 4112
-; RV32I-NEXT: addi s3, a1, 257
-; RV32I-NEXT: mv a1, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: srli a0, s2, 1
-; RV32I-NEXT: or a0, s2, a0
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: slli a1, a0, 16
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: srli a0, a0, 24
+; RV32I-NEXT: addi a0, a0, 32
+; RV32I-NEXT: andi a0, a0, 63
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB7_2:
+; RV32I-NEXT: srli a0, a1, 1
+; RV32I-NEXT: or a0, a1, a0
; RV32I-NEXT: srli a1, a0, 2
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 4
@@ -657,41 +652,25 @@ define signext i32 @ctlz(i64 %b) nounwind {
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: not a0, a0
; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: and a1, a1, s4
+; RV32I-NEXT: and a1, a1, a4
; RV32I-NEXT: sub a0, a0, a1
-; RV32I-NEXT: and a1, a0, s5
+; RV32I-NEXT: and a1, a0, a3
; RV32I-NEXT: srli a0, a0, 2
-; RV32I-NEXT: and a0, a0, s5
+; RV32I-NEXT: and a0, a0, a3
; RV32I-NEXT: add a0, a1, a0
; RV32I-NEXT: srli a1, a0, 4
; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: and a0, a0, s6
-; RV32I-NEXT: mv a1, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: bnez s0, .LBB7_2
-; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: slli a1, a0, 16
+; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: srli a0, a0, 24
-; RV32I-NEXT: addi s1, a0, 32
-; RV32I-NEXT: j .LBB7_3
-; RV32I-NEXT: .LBB7_2:
-; RV32I-NEXT: srli s1, s1, 24
-; RV32I-NEXT: .LBB7_3: # %entry
-; RV32I-NEXT: andi a0, s1, 63
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s6, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: andi a0, a0, 63
; RV32I-NEXT: ret
;
; RV64I-LABEL: ctlz:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: srli a1, a0, 1
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: srli a1, a0, 2
@@ -727,15 +706,14 @@ define signext i32 @ctlz(i64 %b) nounwind {
; RV64I-NEXT: slli a2, a1, 32
; RV64I-NEXT: add a1, a1, a2
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: slli a2, a1, 32
-; RV64I-NEXT: add a1, a1, a2
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 16
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 32
+; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: slli a0, a0, 2
; RV64I-NEXT: srli a0, a0, 58
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll
index 3731b9719445..b45ab135fa1c 100644
--- a/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll
@@ -11,8 +11,6 @@ define i32 @ctlz_i32(i32 %a) nounwind {
; RV32I: # %bb.0:
; RV32I-NEXT: beqz a0, .LBB0_2
; RV32I-NEXT: # %bb.1: # %cond.false
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: srli a1, a0, 1
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 2
@@ -40,12 +38,11 @@ define i32 @ctlz_i32(i32 %a) nounwind {
; RV32I-NEXT: lui a1, 61681
; RV32I-NEXT: addi a1, a1, -241
; RV32I-NEXT: and a0, a0, a1
-; RV32I-NEXT: lui a1, 4112
-; RV32I-NEXT: addi a1, a1, 257
-; RV32I-NEXT: call __mulsi3
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: slli a1, a0, 16
+; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: srli a0, a0, 24
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB0_2:
; RV32I-NEXT: li a0, 32
@@ -64,19 +61,16 @@ declare i64 @llvm.ctlz.i64(i64, i1)
define i64 @ctlz_i64(i64 %a) nounwind {
; RV32I-LABEL: ctlz_i64:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a1
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: srli a0, a1, 1
-; RV32I-NEXT: or a0, a1, a0
+; RV32I-NEXT: lui a2, 349525
+; RV32I-NEXT: addi a4, a2, 1365
+; RV32I-NEXT: lui a2, 209715
+; RV32I-NEXT: addi a3, a2, 819
+; RV32I-NEXT: lui a2, 61681
+; RV32I-NEXT: addi a2, a2, -241
+; RV32I-NEXT: bnez a1, .LBB1_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: srli a1, a0, 1
+; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 2
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 4
@@ -87,28 +81,26 @@ define i64 @ctlz_i64(i64 %a) nounwind {
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: not a0, a0
; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: lui a2, 349525
-; RV32I-NEXT: addi s4, a2, 1365
-; RV32I-NEXT: and a1, a1, s4
+; RV32I-NEXT: and a1, a1, a4
; RV32I-NEXT: sub a0, a0, a1
-; RV32I-NEXT: lui a1, 209715
-; RV32I-NEXT: addi s5, a1, 819
-; RV32I-NEXT: and a1, a0, s5
+; RV32I-NEXT: and a1, a0, a3
; RV32I-NEXT: srli a0, a0, 2
-; RV32I-NEXT: and a0, a0, s5
+; RV32I-NEXT: and a0, a0, a3
; RV32I-NEXT: add a0, a1, a0
; RV32I-NEXT: srli a1, a0, 4
; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: lui a1, 61681
-; RV32I-NEXT: addi s6, a1, -241
-; RV32I-NEXT: and a0, a0, s6
-; RV32I-NEXT: lui a1, 4112
-; RV32I-NEXT: addi s3, a1, 257
-; RV32I-NEXT: mv a1, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: srli a0, s2, 1
-; RV32I-NEXT: or a0, s2, a0
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: slli a1, a0, 16
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: srli a0, a0, 24
+; RV32I-NEXT: addi a0, a0, 32
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB1_2:
+; RV32I-NEXT: srli a0, a1, 1
+; RV32I-NEXT: or a0, a1, a0
; RV32I-NEXT: srli a1, a0, 2
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 4
@@ -119,35 +111,21 @@ define i64 @ctlz_i64(i64 %a) nounwind {
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: not a0, a0
; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: and a1, a1, s4
+; RV32I-NEXT: and a1, a1, a4
; RV32I-NEXT: sub a0, a0, a1
-; RV32I-NEXT: and a1, a0, s5
+; RV32I-NEXT: and a1, a0, a3
; RV32I-NEXT: srli a0, a0, 2
-; RV32I-NEXT: and a0, a0, s5
+; RV32I-NEXT: and a0, a0, a3
; RV32I-NEXT: add a0, a1, a0
; RV32I-NEXT: srli a1, a0, 4
; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: and a0, a0, s6
-; RV32I-NEXT: mv a1, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: bnez s0, .LBB1_2
-; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: slli a1, a0, 16
+; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: srli a0, a0, 24
-; RV32I-NEXT: addi a0, a0, 32
-; RV32I-NEXT: j .LBB1_3
-; RV32I-NEXT: .LBB1_2:
-; RV32I-NEXT: srli a0, s1, 24
-; RV32I-NEXT: .LBB1_3:
; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s6, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32XTHEADBB-LABEL: ctlz_i64:
diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll
index 36c107061795..7e6c3f9c87d2 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll
@@ -11,8 +11,6 @@ define i32 @ctlz_i32(i32 %a) nounwind {
; RV32I: # %bb.0:
; RV32I-NEXT: beqz a0, .LBB0_2
; RV32I-NEXT: # %bb.1: # %cond.false
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: srli a1, a0, 1
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 2
@@ -40,12 +38,11 @@ define i32 @ctlz_i32(i32 %a) nounwind {
; RV32I-NEXT: lui a1, 61681
; RV32I-NEXT: addi a1, a1, -241
; RV32I-NEXT: and a0, a0, a1
-; RV32I-NEXT: lui a1, 4112
-; RV32I-NEXT: addi a1, a1, 257
-; RV32I-NEXT: call __mulsi3
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: slli a1, a0, 16
+; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: srli a0, a0, 24
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB0_2:
; RV32I-NEXT: li a0, 32
@@ -64,19 +61,16 @@ declare i64 @llvm.ctlz.i64(i64, i1)
define i64 @ctlz_i64(i64 %a) nounwind {
; RV32I-LABEL: ctlz_i64:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a1
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: srli a0, a1, 1
-; RV32I-NEXT: or a0, a1, a0
+; RV32I-NEXT: lui a2, 349525
+; RV32I-NEXT: addi a4, a2, 1365
+; RV32I-NEXT: lui a2, 209715
+; RV32I-NEXT: addi a3, a2, 819
+; RV32I-NEXT: lui a2, 61681
+; RV32I-NEXT: addi a2, a2, -241
+; RV32I-NEXT: bnez a1, .LBB1_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: srli a1, a0, 1
+; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 2
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 4
@@ -87,28 +81,26 @@ define i64 @ctlz_i64(i64 %a) nounwind {
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: not a0, a0
; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: lui a2, 349525
-; RV32I-NEXT: addi s4, a2, 1365
-; RV32I-NEXT: and a1, a1, s4
+; RV32I-NEXT: and a1, a1, a4
; RV32I-NEXT: sub a0, a0, a1
-; RV32I-NEXT: lui a1, 209715
-; RV32I-NEXT: addi s5, a1, 819
-; RV32I-NEXT: and a1, a0, s5
+; RV32I-NEXT: and a1, a0, a3
; RV32I-NEXT: srli a0, a0, 2
-; RV32I-NEXT: and a0, a0, s5
+; RV32I-NEXT: and a0, a0, a3
; RV32I-NEXT: add a0, a1, a0
; RV32I-NEXT: srli a1, a0, 4
; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: lui a1, 61681
-; RV32I-NEXT: addi s6, a1, -241
-; RV32I-NEXT: and a0, a0, s6
-; RV32I-NEXT: lui a1, 4112
-; RV32I-NEXT: addi s3, a1, 257
-; RV32I-NEXT: mv a1, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: srli a0, s2, 1
-; RV32I-NEXT: or a0, s2, a0
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: slli a1, a0, 16
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: srli a0, a0, 24
+; RV32I-NEXT: addi a0, a0, 32
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB1_2:
+; RV32I-NEXT: srli a0, a1, 1
+; RV32I-NEXT: or a0, a1, a0
; RV32I-NEXT: srli a1, a0, 2
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 4
@@ -119,35 +111,21 @@ define i64 @ctlz_i64(i64 %a) nounwind {
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: not a0, a0
; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: and a1, a1, s4
+; RV32I-NEXT: and a1, a1, a4
; RV32I-NEXT: sub a0, a0, a1
-; RV32I-NEXT: and a1, a0, s5
+; RV32I-NEXT: and a1, a0, a3
; RV32I-NEXT: srli a0, a0, 2
-; RV32I-NEXT: and a0, a0, s5
+; RV32I-NEXT: and a0, a0, a3
; RV32I-NEXT: add a0, a1, a0
; RV32I-NEXT: srli a1, a0, 4
; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: and a0, a0, s6
-; RV32I-NEXT: mv a1, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: bnez s0, .LBB1_2
-; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: slli a1, a0, 16
+; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: srli a0, a0, 24
-; RV32I-NEXT: addi a0, a0, 32
-; RV32I-NEXT: j .LBB1_3
-; RV32I-NEXT: .LBB1_2:
-; RV32I-NEXT: srli a0, s1, 24
-; RV32I-NEXT: .LBB1_3:
; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s6, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: ctlz_i64:
@@ -275,8 +253,6 @@ declare i32 @llvm.ctpop.i32(i32)
define i32 @ctpop_i32(i32 %a) nounwind {
; RV32I-LABEL: ctpop_i32:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: srli a1, a0, 1
; RV32I-NEXT: lui a2, 349525
; RV32I-NEXT: addi a2, a2, 1365
@@ -293,12 +269,11 @@ define i32 @ctpop_i32(i32 %a) nounwind {
; RV32I-NEXT: lui a1, 61681
; RV32I-NEXT: addi a1, a1, -241
; RV32I-NEXT: and a0, a0, a1
-; RV32I-NEXT: lui a1, 4112
-; RV32I-NEXT: addi a1, a1, 257
-; RV32I-NEXT: call __mulsi3
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: slli a1, a0, 16
+; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: srli a0, a0, 24
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: ctpop_i32:
@@ -390,58 +365,42 @@ declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
define <2 x i32> @ctpop_v2i32(<2 x i32> %a) nounwind {
; RV32I-LABEL: ctpop_v2i32:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a1
-; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: lui a2, 349525
-; RV32I-NEXT: addi s3, a2, 1365
-; RV32I-NEXT: and a1, a1, s3
-; RV32I-NEXT: sub a0, a0, a1
-; RV32I-NEXT: lui a1, 209715
-; RV32I-NEXT: addi s4, a1, 819
-; RV32I-NEXT: and a1, a0, s4
+; RV32I-NEXT: srli a2, a0, 1
+; RV32I-NEXT: lui a3, 349525
+; RV32I-NEXT: addi a3, a3, 1365
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sub a0, a0, a2
+; RV32I-NEXT: lui a2, 209715
+; RV32I-NEXT: addi a2, a2, 819
+; RV32I-NEXT: and a4, a0, a2
; RV32I-NEXT: srli a0, a0, 2
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: add a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 4
-; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: lui a1, 61681
-; RV32I-NEXT: addi s5, a1, -241
-; RV32I-NEXT: and a0, a0, s5
-; RV32I-NEXT: lui a1, 4112
-; RV32I-NEXT: addi s1, a1, 257
-; RV32I-NEXT: mv a1, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: srli s2, a0, 24
-; RV32I-NEXT: srli a0, s0, 1
-; RV32I-NEXT: and a0, a0, s3
-; RV32I-NEXT: sub s0, s0, a0
-; RV32I-NEXT: and a0, s0, s4
-; RV32I-NEXT: srli s0, s0, 2
-; RV32I-NEXT: and a1, s0, s4
-; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: srli a1, a0, 4
-; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: and a0, a0, s5
-; RV32I-NEXT: mv a1, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: srli a1, a0, 24
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: add a0, a4, a0
+; RV32I-NEXT: srli a4, a0, 4
+; RV32I-NEXT: add a0, a0, a4
+; RV32I-NEXT: lui a4, 61681
+; RV32I-NEXT: addi a4, a4, -241
+; RV32I-NEXT: and a0, a0, a4
+; RV32I-NEXT: slli a5, a0, 8
+; RV32I-NEXT: add a0, a0, a5
+; RV32I-NEXT: slli a5, a0, 16
+; RV32I-NEXT: add a0, a0, a5
+; RV32I-NEXT: srli a0, a0, 24
+; RV32I-NEXT: srli a5, a1, 1
+; RV32I-NEXT: and a3, a5, a3
+; RV32I-NEXT: sub a1, a1, a3
+; RV32I-NEXT: and a3, a1, a2
+; RV32I-NEXT: srli a1, a1, 2
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: add a1, a3, a1
+; RV32I-NEXT: srli a2, a1, 4
+; RV32I-NEXT: add a1, a1, a2
+; RV32I-NEXT: and a1, a1, a4
+; RV32I-NEXT: slli a2, a1, 8
+; RV32I-NEXT: add a1, a1, a2
+; RV32I-NEXT: slli a2, a1, 16
+; RV32I-NEXT: add a1, a1, a2
+; RV32I-NEXT: srli a1, a1, 24
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: ctpop_v2i32:
@@ -558,59 +517,44 @@ declare i64 @llvm.ctpop.i64(i64)
define i64 @ctpop_i64(i64 %a) nounwind {
; RV32I-LABEL: ctpop_i64:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: srli a0, a1, 1
-; RV32I-NEXT: lui a2, 349525
-; RV32I-NEXT: addi s2, a2, 1365
-; RV32I-NEXT: and a0, a0, s2
-; RV32I-NEXT: sub a1, a1, a0
-; RV32I-NEXT: lui a0, 209715
-; RV32I-NEXT: addi s3, a0, 819
-; RV32I-NEXT: and a0, a1, s3
+; RV32I-NEXT: srli a2, a1, 1
+; RV32I-NEXT: lui a3, 349525
+; RV32I-NEXT: addi a3, a3, 1365
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sub a1, a1, a2
+; RV32I-NEXT: lui a2, 209715
+; RV32I-NEXT: addi a2, a2, 819
+; RV32I-NEXT: and a4, a1, a2
; RV32I-NEXT: srli a1, a1, 2
-; RV32I-NEXT: and a1, a1, s3
-; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: srli a1, a0, 4
-; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: lui a1, 61681
-; RV32I-NEXT: addi s4, a1, -241
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: lui a1, 4112
-; RV32I-NEXT: addi s1, a1, 257
-; RV32I-NEXT: mv a1, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: srli s5, a0, 24
-; RV32I-NEXT: srli a0, s0, 1
-; RV32I-NEXT: and a0, a0, s2
-; RV32I-NEXT: sub s0, s0, a0
-; RV32I-NEXT: and a0, s0, s3
-; RV32I-NEXT: srli s0, s0, 2
-; RV32I-NEXT: and a1, s0, s3
-; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: srli a1, a0, 4
-; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: mv a1, s1
-; RV32I-NEXT: call __mulsi3
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: add a1, a4, a1
+; RV32I-NEXT: srli a4, a1, 4
+; RV32I-NEXT: add a1, a1, a4
+; RV32I-NEXT: lui a4, 61681
+; RV32I-NEXT: addi a4, a4, -241
+; RV32I-NEXT: and a1, a1, a4
+; RV32I-NEXT: slli a5, a1, 8
+; RV32I-NEXT: add a1, a1, a5
+; RV32I-NEXT: slli a5, a1, 16
+; RV32I-NEXT: add a1, a1, a5
+; RV32I-NEXT: srli a1, a1, 24
+; RV32I-NEXT: srli a5, a0, 1
+; RV32I-NEXT: and a3, a5, a3
+; RV32I-NEXT: sub a0, a0, a3
+; RV32I-NEXT: and a3, a0, a2
+; RV32I-NEXT: srli a0, a0, 2
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: add a0, a3, a0
+; RV32I-NEXT: srli a2, a0, 4
+; RV32I-NEXT: add a0, a0, a2
+; RV32I-NEXT: and a0, a0, a4
+; RV32I-NEXT: slli a2, a0, 8
+; RV32I-NEXT: add a0, a0, a2
+; RV32I-NEXT: slli a2, a0, 16
+; RV32I-NEXT: add a0, a0, a2
; RV32I-NEXT: srli a0, a0, 24
-; RV32I-NEXT: add a0, a0, s5
+; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: ctpop_i64:
@@ -738,99 +682,82 @@ declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
define <2 x i64> @ctpop_v2i64(<2 x i64> %a) nounwind {
; RV32I-LABEL: ctpop_v2i64:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -48
-; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a0, 4(a1)
-; RV32I-NEXT: lw s2, 8(a1)
-; RV32I-NEXT: lw s5, 12(a1)
-; RV32I-NEXT: lw s6, 0(a1)
-; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: lui a2, 349525
-; RV32I-NEXT: addi s3, a2, 1365
-; RV32I-NEXT: and a1, a1, s3
-; RV32I-NEXT: sub a0, a0, a1
-; RV32I-NEXT: lui a1, 209715
-; RV32I-NEXT: addi s4, a1, 819
-; RV32I-NEXT: and a1, a0, s4
-; RV32I-NEXT: srli a0, a0, 2
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: add a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 4
-; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: lui a1, 61681
-; RV32I-NEXT: addi s7, a1, -241
-; RV32I-NEXT: and a0, a0, s7
-; RV32I-NEXT: lui a1, 4112
-; RV32I-NEXT: addi s1, a1, 257
-; RV32I-NEXT: mv a1, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: srli s8, a0, 24
-; RV32I-NEXT: srli a0, s6, 1
-; RV32I-NEXT: and a0, a0, s3
-; RV32I-NEXT: sub a0, s6, a0
-; RV32I-NEXT: and a1, a0, s4
-; RV32I-NEXT: srli a0, a0, 2
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: add a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 4
-; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: and a0, a0, s7
-; RV32I-NEXT: mv a1, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: srli a0, a0, 24
-; RV32I-NEXT: add s8, a0, s8
-; RV32I-NEXT: srli a0, s5, 1
-; RV32I-NEXT: and a0, a0, s3
-; RV32I-NEXT: sub a0, s5, a0
-; RV32I-NEXT: and a1, a0, s4
-; RV32I-NEXT: srli a0, a0, 2
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: add a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 4
-; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: and a0, a0, s7
-; RV32I-NEXT: mv a1, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: srli s5, a0, 24
-; RV32I-NEXT: srli a0, s2, 1
-; RV32I-NEXT: and a0, a0, s3
-; RV32I-NEXT: sub a0, s2, a0
-; RV32I-NEXT: and a1, a0, s4
-; RV32I-NEXT: srli a0, a0, 2
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: add a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 4
-; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: and a0, a0, s7
-; RV32I-NEXT: mv a1, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: srli a0, a0, 24
-; RV32I-NEXT: add a0, a0, s5
-; RV32I-NEXT: sw zero, 12(s0)
-; RV32I-NEXT: sw zero, 4(s0)
-; RV32I-NEXT: sw a0, 8(s0)
-; RV32I-NEXT: sw s8, 0(s0)
-; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s8, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 48
+; RV32I-NEXT: lw a3, 4(a1)
+; RV32I-NEXT: lw a2, 8(a1)
+; RV32I-NEXT: lw a4, 12(a1)
+; RV32I-NEXT: lw a1, 0(a1)
+; RV32I-NEXT: srli a5, a3, 1
+; RV32I-NEXT: lui a6, 349525
+; RV32I-NEXT: addi a6, a6, 1365
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: sub a3, a3, a5
+; RV32I-NEXT: lui a5, 209715
+; RV32I-NEXT: addi a5, a5, 819
+; RV32I-NEXT: and a7, a3, a5
+; RV32I-NEXT: srli a3, a3, 2
+; RV32I-NEXT: and a3, a3, a5
+; RV32I-NEXT: add a3, a7, a3
+; RV32I-NEXT: srli a7, a3, 4
+; RV32I-NEXT: add a3, a3, a7
+; RV32I-NEXT: lui a7, 61681
+; RV32I-NEXT: addi a7, a7, -241
+; RV32I-NEXT: and a3, a3, a7
+; RV32I-NEXT: slli t0, a3, 8
+; RV32I-NEXT: add a3, a3, t0
+; RV32I-NEXT: slli t0, a3, 16
+; RV32I-NEXT: add a3, a3, t0
+; RV32I-NEXT: srli a3, a3, 24
+; RV32I-NEXT: srli t0, a1, 1
+; RV32I-NEXT: and t0, t0, a6
+; RV32I-NEXT: sub a1, a1, t0
+; RV32I-NEXT: and t0, a1, a5
+; RV32I-NEXT: srli a1, a1, 2
+; RV32I-NEXT: and a1, a1, a5
+; RV32I-NEXT: add a1, t0, a1
+; RV32I-NEXT: srli t0, a1, 4
+; RV32I-NEXT: add a1, a1, t0
+; RV32I-NEXT: and a1, a1, a7
+; RV32I-NEXT: slli t0, a1, 8
+; RV32I-NEXT: add a1, a1, t0
+; RV32I-NEXT: slli t0, a1, 16
+; RV32I-NEXT: add a1, a1, t0
+; RV32I-NEXT: srli a1, a1, 24
+; RV32I-NEXT: add a1, a1, a3
+; RV32I-NEXT: srli a3, a4, 1
+; RV32I-NEXT: and a3, a3, a6
+; RV32I-NEXT: sub a4, a4, a3
+; RV32I-NEXT: and a3, a4, a5
+; RV32I-NEXT: srli a4, a4, 2
+; RV32I-NEXT: and a4, a4, a5
+; RV32I-NEXT: add a3, a3, a4
+; RV32I-NEXT: srli a4, a3, 4
+; RV32I-NEXT: add a3, a3, a4
+; RV32I-NEXT: and a3, a3, a7
+; RV32I-NEXT: slli a4, a3, 8
+; RV32I-NEXT: add a3, a3, a4
+; RV32I-NEXT: slli a4, a3, 16
+; RV32I-NEXT: add a3, a3, a4
+; RV32I-NEXT: srli a3, a3, 24
+; RV32I-NEXT: srli a4, a2, 1
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sub a2, a2, a4
+; RV32I-NEXT: and a4, a2, a5
+; RV32I-NEXT: srli a2, a2, 2
+; RV32I-NEXT: and a2, a2, a5
+; RV32I-NEXT: add a2, a4, a2
+; RV32I-NEXT: srli a4, a2, 4
+; RV32I-NEXT: add a2, a2, a4
+; RV32I-NEXT: and a2, a2, a7
+; RV32I-NEXT: slli a4, a2, 8
+; RV32I-NEXT: add a2, a2, a4
+; RV32I-NEXT: slli a4, a2, 16
+; RV32I-NEXT: add a2, a2, a4
+; RV32I-NEXT: srli a2, a2, 24
+; RV32I-NEXT: add a2, a2, a3
+; RV32I-NEXT: sw zero, 12(a0)
+; RV32I-NEXT: sw zero, 4(a0)
+; RV32I-NEXT: sw a2, 8(a0)
+; RV32I-NEXT: sw a1, 0(a0)
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: ctpop_v2i64:
diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll
index 73bfc6480b4d..acd63f24bb8f 100644
--- a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll
@@ -317,8 +317,6 @@ define i64 @ctlz_i64(i64 %a) nounwind {
; RV64I: # %bb.0:
; RV64I-NEXT: beqz a0, .LBB5_2
; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: srli a1, a0, 1
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: srli a1, a0, 2
@@ -354,14 +352,13 @@ define i64 @ctlz_i64(i64 %a) nounwind {
; RV64I-NEXT: slli a2, a1, 32
; RV64I-NEXT: add a1, a1, a2
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: slli a2, a1, 32
-; RV64I-NEXT: add a1, a1, a2
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 16
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 32
+; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srli a0, a0, 56
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB5_2:
; RV64I-NEXT: li a0, 64
diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll
index 7feef4dad411..b0e447b71178 100644
--- a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll
@@ -307,8 +307,6 @@ define i64 @ctlz_i64(i64 %a) nounwind {
; RV64I: # %bb.0:
; RV64I-NEXT: beqz a0, .LBB5_2
; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: srli a1, a0, 1
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: srli a1, a0, 2
@@ -344,14 +342,13 @@ define i64 @ctlz_i64(i64 %a) nounwind {
; RV64I-NEXT: slli a2, a1, 32
; RV64I-NEXT: add a1, a1, a2
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: slli a2, a1, 32
-; RV64I-NEXT: add a1, a1, a2
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 16
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 32
+; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srli a0, a0, 56
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB5_2:
; RV64I-NEXT: li a0, 64
@@ -623,8 +620,6 @@ declare i64 @llvm.ctpop.i64(i64)
define i64 @ctpop_i64(i64 %a) nounwind {
; RV64I-LABEL: ctpop_i64:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: srli a1, a0, 1
; RV64I-NEXT: lui a2, 349525
; RV64I-NEXT: addiw a2, a2, 1365
@@ -647,14 +642,13 @@ define i64 @ctpop_i64(i64 %a) nounwind {
; RV64I-NEXT: slli a2, a1, 32
; RV64I-NEXT: add a1, a1, a2
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: slli a2, a1, 32
-; RV64I-NEXT: add a1, a1, a2
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 16
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 32
+; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srli a0, a0, 56
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: ctpop_i64:
diff --git a/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll
index 1f62ea9f5681..6cdab888ffcd 100644
--- a/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll
@@ -11,8 +11,6 @@ define signext i32 @ctlz_i32(i32 signext %a) nounwind {
; RV64I: # %bb.0:
; RV64I-NEXT: beqz a0, .LBB0_2
; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: srliw a1, a0, 1
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: srliw a1, a0, 2
@@ -38,14 +36,13 @@ define signext i32 @ctlz_i32(i32 signext %a) nounwind {
; RV64I-NEXT: srli a1, a0, 4
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: lui a1, 61681
-; RV64I-NEXT: addiw a1, a1, -241
+; RV64I-NEXT: addi a1, a1, -241
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 16
+; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srliw a0, a0, 24
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB0_2:
; RV64I-NEXT: li a0, 32
@@ -66,8 +63,6 @@ define signext i32 @log2_i32(i32 signext %a) nounwind {
; RV64I: # %bb.0:
; RV64I-NEXT: beqz a0, .LBB1_2
; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: srliw a1, a0, 1
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: srliw a1, a0, 2
@@ -93,14 +88,13 @@ define signext i32 @log2_i32(i32 signext %a) nounwind {
; RV64I-NEXT: srli a1, a0, 4
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: lui a1, 61681
-; RV64I-NEXT: addiw a1, a1, -241
+; RV64I-NEXT: addi a1, a1, -241
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 16
+; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srliw a0, a0, 24
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: j .LBB1_3
; RV64I-NEXT: .LBB1_2:
; RV64I-NEXT: li a0, 32
@@ -125,50 +119,45 @@ define signext i32 @log2_i32(i32 signext %a) nounwind {
define signext i32 @log2_ceil_i32(i32 signext %a) nounwind {
; RV64I-LABEL: log2_ceil_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: addiw a0, a0, -1
-; RV64I-NEXT: li s0, 32
-; RV64I-NEXT: li a1, 32
-; RV64I-NEXT: beqz a0, .LBB2_2
+; RV64I-NEXT: addiw a1, a0, -1
+; RV64I-NEXT: li a0, 32
+; RV64I-NEXT: li a2, 32
+; RV64I-NEXT: beqz a1, .LBB2_2
; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: srliw a1, a0, 1
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 2
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 4
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 8
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 16
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: not a0, a0
-; RV64I-NEXT: srli a1, a0, 1
-; RV64I-NEXT: lui a2, 349525
-; RV64I-NEXT: addiw a2, a2, 1365
+; RV64I-NEXT: srliw a2, a1, 1
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: srliw a2, a1, 2
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: srliw a2, a1, 4
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: srliw a2, a1, 8
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: srliw a2, a1, 16
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: not a1, a1
+; RV64I-NEXT: srli a2, a1, 1
+; RV64I-NEXT: lui a3, 349525
+; RV64I-NEXT: addiw a3, a3, 1365
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sub a1, a1, a2
+; RV64I-NEXT: lui a2, 209715
+; RV64I-NEXT: addiw a2, a2, 819
+; RV64I-NEXT: and a3, a1, a2
+; RV64I-NEXT: srli a1, a1, 2
; RV64I-NEXT: and a1, a1, a2
-; RV64I-NEXT: sub a0, a0, a1
-; RV64I-NEXT: lui a1, 209715
-; RV64I-NEXT: addiw a1, a1, 819
-; RV64I-NEXT: and a2, a0, a1
-; RV64I-NEXT: srli a0, a0, 2
-; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: add a0, a2, a0
-; RV64I-NEXT: srli a1, a0, 4
-; RV64I-NEXT: add a0, a0, a1
-; RV64I-NEXT: lui a1, 61681
-; RV64I-NEXT: addiw a1, a1, -241
-; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: srliw a1, a0, 24
+; RV64I-NEXT: add a1, a3, a1
+; RV64I-NEXT: srli a2, a1, 4
+; RV64I-NEXT: add a1, a1, a2
+; RV64I-NEXT: lui a2, 61681
+; RV64I-NEXT: addi a2, a2, -241
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: slli a2, a1, 8
+; RV64I-NEXT: add a1, a1, a2
+; RV64I-NEXT: slli a2, a1, 16
+; RV64I-NEXT: add a1, a1, a2
+; RV64I-NEXT: srliw a2, a1, 24
; RV64I-NEXT: .LBB2_2: # %cond.end
-; RV64I-NEXT: sub a0, s0, a1
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: sub a0, a0, a2
; RV64I-NEXT: ret
;
; RV64XTHEADBB-LABEL: log2_ceil_i32:
@@ -189,48 +178,42 @@ define signext i32 @log2_ceil_i32(i32 signext %a) nounwind {
define signext i32 @findLastSet_i32(i32 signext %a) nounwind {
; RV64I-LABEL: findLastSet_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: srliw a0, a0, 1
-; RV64I-NEXT: or a0, s0, a0
-; RV64I-NEXT: srliw a1, a0, 2
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 4
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 8
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 16
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: not a0, a0
-; RV64I-NEXT: srli a1, a0, 1
-; RV64I-NEXT: lui a2, 349525
-; RV64I-NEXT: addiw a2, a2, 1365
+; RV64I-NEXT: srliw a1, a0, 1
+; RV64I-NEXT: or a1, a0, a1
+; RV64I-NEXT: srliw a2, a1, 2
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: srliw a2, a1, 4
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: srliw a2, a1, 8
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: srliw a2, a1, 16
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: not a1, a1
+; RV64I-NEXT: srli a2, a1, 1
+; RV64I-NEXT: lui a3, 349525
+; RV64I-NEXT: addiw a3, a3, 1365
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sub a1, a1, a2
+; RV64I-NEXT: lui a2, 209715
+; RV64I-NEXT: addiw a2, a2, 819
+; RV64I-NEXT: and a3, a1, a2
+; RV64I-NEXT: srli a1, a1, 2
; RV64I-NEXT: and a1, a1, a2
-; RV64I-NEXT: sub a0, a0, a1
-; RV64I-NEXT: lui a1, 209715
-; RV64I-NEXT: addiw a1, a1, 819
-; RV64I-NEXT: and a2, a0, a1
-; RV64I-NEXT: srli a0, a0, 2
-; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: add a0, a2, a0
-; RV64I-NEXT: srli a1, a0, 4
-; RV64I-NEXT: add a0, a0, a1
-; RV64I-NEXT: lui a1, 61681
-; RV64I-NEXT: addiw a1, a1, -241
-; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: srliw a0, a0, 24
-; RV64I-NEXT: xori a0, a0, 31
-; RV64I-NEXT: snez a1, s0
-; RV64I-NEXT: addi a1, a1, -1
-; RV64I-NEXT: or a0, a1, a0
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: add a1, a3, a1
+; RV64I-NEXT: srli a2, a1, 4
+; RV64I-NEXT: add a1, a1, a2
+; RV64I-NEXT: lui a2, 61681
+; RV64I-NEXT: addi a2, a2, -241
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: slli a2, a1, 8
+; RV64I-NEXT: add a1, a1, a2
+; RV64I-NEXT: slli a2, a1, 16
+; RV64I-NEXT: add a1, a1, a2
+; RV64I-NEXT: srliw a1, a1, 24
+; RV64I-NEXT: xori a1, a1, 31
+; RV64I-NEXT: snez a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: ret
;
; RV64XTHEADBB-LABEL: findLastSet_i32:
@@ -256,10 +239,6 @@ define i32 @ctlz_lshr_i32(i32 signext %a) {
; RV64I-NEXT: srliw a0, a0, 1
; RV64I-NEXT: beqz a0, .LBB4_2
; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: .cfi_def_cfa_offset 16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
; RV64I-NEXT: srliw a1, a0, 1
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: srliw a1, a0, 2
@@ -285,14 +264,13 @@ define i32 @ctlz_lshr_i32(i32 signext %a) {
; RV64I-NEXT: srli a1, a0, 4
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: lui a1, 61681
-; RV64I-NEXT: addiw a1, a1, -241
+; RV64I-NEXT: addi a1, a1, -241
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 16
+; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srliw a0, a0, 24
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB4_2:
; RV64I-NEXT: li a0, 32
@@ -317,8 +295,6 @@ define i64 @ctlz_i64(i64 %a) nounwind {
; RV64I: # %bb.0:
; RV64I-NEXT: beqz a0, .LBB5_2
; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: srli a1, a0, 1
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: srli a1, a0, 2
@@ -354,14 +330,13 @@ define i64 @ctlz_i64(i64 %a) nounwind {
; RV64I-NEXT: slli a2, a1, 32
; RV64I-NEXT: add a1, a1, a2
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: slli a2, a1, 32
-; RV64I-NEXT: add a1, a1, a2
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 16
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 32
+; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srli a0, a0, 56
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB5_2:
; RV64I-NEXT: li a0, 64
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll
index 2269d8d04c9c..4d5ef5db8605 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll
@@ -11,8 +11,6 @@ define signext i32 @ctlz_i32(i32 signext %a) nounwind {
; RV64I: # %bb.0:
; RV64I-NEXT: beqz a0, .LBB0_2
; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: srliw a1, a0, 1
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: srliw a1, a0, 2
@@ -38,14 +36,13 @@ define signext i32 @ctlz_i32(i32 signext %a) nounwind {
; RV64I-NEXT: srli a1, a0, 4
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: lui a1, 61681
-; RV64I-NEXT: addiw a1, a1, -241
+; RV64I-NEXT: addi a1, a1, -241
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 16
+; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srliw a0, a0, 24
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB0_2:
; RV64I-NEXT: li a0, 32
@@ -64,8 +61,6 @@ define signext i32 @log2_i32(i32 signext %a) nounwind {
; RV64I: # %bb.0:
; RV64I-NEXT: beqz a0, .LBB1_2
; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: srliw a1, a0, 1
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: srliw a1, a0, 2
@@ -91,14 +86,13 @@ define signext i32 @log2_i32(i32 signext %a) nounwind {
; RV64I-NEXT: srli a1, a0, 4
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: lui a1, 61681
-; RV64I-NEXT: addiw a1, a1, -241
+; RV64I-NEXT: addi a1, a1, -241
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 16
+; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srliw a0, a0, 24
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: j .LBB1_3
; RV64I-NEXT: .LBB1_2:
; RV64I-NEXT: li a0, 32
@@ -121,50 +115,45 @@ define signext i32 @log2_i32(i32 signext %a) nounwind {
define signext i32 @log2_ceil_i32(i32 signext %a) nounwind {
; RV64I-LABEL: log2_ceil_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: addiw a0, a0, -1
-; RV64I-NEXT: li s0, 32
-; RV64I-NEXT: li a1, 32
-; RV64I-NEXT: beqz a0, .LBB2_2
+; RV64I-NEXT: addiw a1, a0, -1
+; RV64I-NEXT: li a0, 32
+; RV64I-NEXT: li a2, 32
+; RV64I-NEXT: beqz a1, .LBB2_2
; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: srliw a1, a0, 1
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 2
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 4
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 8
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 16
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: not a0, a0
-; RV64I-NEXT: srli a1, a0, 1
-; RV64I-NEXT: lui a2, 349525
-; RV64I-NEXT: addiw a2, a2, 1365
+; RV64I-NEXT: srliw a2, a1, 1
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: srliw a2, a1, 2
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: srliw a2, a1, 4
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: srliw a2, a1, 8
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: srliw a2, a1, 16
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: not a1, a1
+; RV64I-NEXT: srli a2, a1, 1
+; RV64I-NEXT: lui a3, 349525
+; RV64I-NEXT: addiw a3, a3, 1365
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sub a1, a1, a2
+; RV64I-NEXT: lui a2, 209715
+; RV64I-NEXT: addiw a2, a2, 819
+; RV64I-NEXT: and a3, a1, a2
+; RV64I-NEXT: srli a1, a1, 2
; RV64I-NEXT: and a1, a1, a2
-; RV64I-NEXT: sub a0, a0, a1
-; RV64I-NEXT: lui a1, 209715
-; RV64I-NEXT: addiw a1, a1, 819
-; RV64I-NEXT: and a2, a0, a1
-; RV64I-NEXT: srli a0, a0, 2
-; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: add a0, a2, a0
-; RV64I-NEXT: srli a1, a0, 4
-; RV64I-NEXT: add a0, a0, a1
-; RV64I-NEXT: lui a1, 61681
-; RV64I-NEXT: addiw a1, a1, -241
-; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: srliw a1, a0, 24
+; RV64I-NEXT: add a1, a3, a1
+; RV64I-NEXT: srli a2, a1, 4
+; RV64I-NEXT: add a1, a1, a2
+; RV64I-NEXT: lui a2, 61681
+; RV64I-NEXT: addi a2, a2, -241
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: slli a2, a1, 8
+; RV64I-NEXT: add a1, a1, a2
+; RV64I-NEXT: slli a2, a1, 16
+; RV64I-NEXT: add a1, a1, a2
+; RV64I-NEXT: srliw a2, a1, 24
; RV64I-NEXT: .LBB2_2: # %cond.end
-; RV64I-NEXT: sub a0, s0, a1
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: sub a0, a0, a2
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: log2_ceil_i32:
@@ -183,48 +172,42 @@ define signext i32 @log2_ceil_i32(i32 signext %a) nounwind {
define signext i32 @findLastSet_i32(i32 signext %a) nounwind {
; RV64I-LABEL: findLastSet_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: srliw a0, a0, 1
-; RV64I-NEXT: or a0, s0, a0
-; RV64I-NEXT: srliw a1, a0, 2
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 4
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 8
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 16
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: not a0, a0
-; RV64I-NEXT: srli a1, a0, 1
-; RV64I-NEXT: lui a2, 349525
-; RV64I-NEXT: addiw a2, a2, 1365
+; RV64I-NEXT: srliw a1, a0, 1
+; RV64I-NEXT: or a1, a0, a1
+; RV64I-NEXT: srliw a2, a1, 2
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: srliw a2, a1, 4
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: srliw a2, a1, 8
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: srliw a2, a1, 16
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: not a1, a1
+; RV64I-NEXT: srli a2, a1, 1
+; RV64I-NEXT: lui a3, 349525
+; RV64I-NEXT: addiw a3, a3, 1365
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sub a1, a1, a2
+; RV64I-NEXT: lui a2, 209715
+; RV64I-NEXT: addiw a2, a2, 819
+; RV64I-NEXT: and a3, a1, a2
+; RV64I-NEXT: srli a1, a1, 2
; RV64I-NEXT: and a1, a1, a2
-; RV64I-NEXT: sub a0, a0, a1
-; RV64I-NEXT: lui a1, 209715
-; RV64I-NEXT: addiw a1, a1, 819
-; RV64I-NEXT: and a2, a0, a1
-; RV64I-NEXT: srli a0, a0, 2
-; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: add a0, a2, a0
-; RV64I-NEXT: srli a1, a0, 4
-; RV64I-NEXT: add a0, a0, a1
-; RV64I-NEXT: lui a1, 61681
-; RV64I-NEXT: addiw a1, a1, -241
-; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: srliw a0, a0, 24
-; RV64I-NEXT: xori a0, a0, 31
-; RV64I-NEXT: snez a1, s0
-; RV64I-NEXT: addi a1, a1, -1
-; RV64I-NEXT: or a0, a1, a0
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: add a1, a3, a1
+; RV64I-NEXT: srli a2, a1, 4
+; RV64I-NEXT: add a1, a1, a2
+; RV64I-NEXT: lui a2, 61681
+; RV64I-NEXT: addi a2, a2, -241
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: slli a2, a1, 8
+; RV64I-NEXT: add a1, a1, a2
+; RV64I-NEXT: slli a2, a1, 16
+; RV64I-NEXT: add a1, a1, a2
+; RV64I-NEXT: srliw a1, a1, 24
+; RV64I-NEXT: xori a1, a1, 31
+; RV64I-NEXT: snez a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: findLastSet_i32:
@@ -248,10 +231,6 @@ define i32 @ctlz_lshr_i32(i32 signext %a) {
; RV64I-NEXT: srliw a0, a0, 1
; RV64I-NEXT: beqz a0, .LBB4_2
; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: .cfi_def_cfa_offset 16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
; RV64I-NEXT: srliw a1, a0, 1
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: srliw a1, a0, 2
@@ -277,14 +256,13 @@ define i32 @ctlz_lshr_i32(i32 signext %a) {
; RV64I-NEXT: srli a1, a0, 4
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: lui a1, 61681
-; RV64I-NEXT: addiw a1, a1, -241
+; RV64I-NEXT: addi a1, a1, -241
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 16
+; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srliw a0, a0, 24
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB4_2:
; RV64I-NEXT: li a0, 32
@@ -307,8 +285,6 @@ define i64 @ctlz_i64(i64 %a) nounwind {
; RV64I: # %bb.0:
; RV64I-NEXT: beqz a0, .LBB5_2
; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: srli a1, a0, 1
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: srli a1, a0, 2
@@ -344,14 +320,13 @@ define i64 @ctlz_i64(i64 %a) nounwind {
; RV64I-NEXT: slli a2, a1, 32
; RV64I-NEXT: add a1, a1, a2
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: slli a2, a1, 32
-; RV64I-NEXT: add a1, a1, a2
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 16
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 32
+; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srli a0, a0, 56
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB5_2:
; RV64I-NEXT: li a0, 64
@@ -544,8 +519,6 @@ declare i32 @llvm.ctpop.i32(i32)
define signext i32 @ctpop_i32(i32 signext %a) nounwind {
; RV64I-LABEL: ctpop_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: srli a1, a0, 1
; RV64I-NEXT: lui a2, 349525
; RV64I-NEXT: addiw a2, a2, 1365
@@ -560,14 +533,13 @@ define signext i32 @ctpop_i32(i32 signext %a) nounwind {
; RV64I-NEXT: srli a1, a0, 4
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: lui a1, 61681
-; RV64I-NEXT: addiw a1, a1, -241
+; RV64I-NEXT: addi a1, a1, -241
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 16
+; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srliw a0, a0, 24
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: ctpop_i32:
@@ -657,8 +629,6 @@ define i1 @ctpop_i32_ne_one(i32 signext %a) nounwind {
define signext i32 @ctpop_i32_load(ptr %p) nounwind {
; RV64I-LABEL: ctpop_i32_load:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: lw a0, 0(a0)
; RV64I-NEXT: srli a1, a0, 1
; RV64I-NEXT: lui a2, 349525
@@ -674,14 +644,13 @@ define signext i32 @ctpop_i32_load(ptr %p) nounwind {
; RV64I-NEXT: srli a1, a0, 4
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: lui a1, 61681
-; RV64I-NEXT: addiw a1, a1, -241
+; RV64I-NEXT: addi a1, a1, -241
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 16
+; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srliw a0, a0, 24
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: ctpop_i32_load:
@@ -699,58 +668,42 @@ declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
define <2 x i32> @ctpop_v2i32(<2 x i32> %a) nounwind {
; RV64I-LABEL: ctpop_v2i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -64
-; RV64I-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s4, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s5, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a1
-; RV64I-NEXT: srli a1, a0, 1
-; RV64I-NEXT: lui a2, 349525
-; RV64I-NEXT: addiw s3, a2, 1365
-; RV64I-NEXT: and a1, a1, s3
-; RV64I-NEXT: sub a0, a0, a1
-; RV64I-NEXT: lui a1, 209715
-; RV64I-NEXT: addiw s4, a1, 819
-; RV64I-NEXT: and a1, a0, s4
+; RV64I-NEXT: srli a2, a0, 1
+; RV64I-NEXT: lui a3, 349525
+; RV64I-NEXT: addiw a3, a3, 1365
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: lui a2, 209715
+; RV64I-NEXT: addiw a2, a2, 819
+; RV64I-NEXT: and a4, a0, a2
; RV64I-NEXT: srli a0, a0, 2
-; RV64I-NEXT: and a0, a0, s4
-; RV64I-NEXT: add a0, a1, a0
-; RV64I-NEXT: srli a1, a0, 4
-; RV64I-NEXT: add a0, a0, a1
-; RV64I-NEXT: lui a1, 61681
-; RV64I-NEXT: addiw s5, a1, -241
-; RV64I-NEXT: and a0, a0, s5
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw s1, a1, 257
-; RV64I-NEXT: mv a1, s1
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: srliw s2, a0, 24
-; RV64I-NEXT: srli a0, s0, 1
-; RV64I-NEXT: and a0, a0, s3
-; RV64I-NEXT: sub s0, s0, a0
-; RV64I-NEXT: and a0, s0, s4
-; RV64I-NEXT: srli s0, s0, 2
-; RV64I-NEXT: and a1, s0, s4
-; RV64I-NEXT: add a0, a0, a1
-; RV64I-NEXT: srli a1, a0, 4
-; RV64I-NEXT: add a0, a0, a1
-; RV64I-NEXT: and a0, a0, s5
-; RV64I-NEXT: mv a1, s1
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: srliw a1, a0, 24
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s4, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s5, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 64
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: add a0, a4, a0
+; RV64I-NEXT: srli a4, a0, 4
+; RV64I-NEXT: add a0, a0, a4
+; RV64I-NEXT: lui a4, 61681
+; RV64I-NEXT: addi a4, a4, -241
+; RV64I-NEXT: and a0, a0, a4
+; RV64I-NEXT: slli a5, a0, 8
+; RV64I-NEXT: add a0, a0, a5
+; RV64I-NEXT: slli a5, a0, 16
+; RV64I-NEXT: add a0, a0, a5
+; RV64I-NEXT: srliw a0, a0, 24
+; RV64I-NEXT: srli a5, a1, 1
+; RV64I-NEXT: and a3, a5, a3
+; RV64I-NEXT: sub a1, a1, a3
+; RV64I-NEXT: and a3, a1, a2
+; RV64I-NEXT: srli a1, a1, 2
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: add a1, a3, a1
+; RV64I-NEXT: srli a2, a1, 4
+; RV64I-NEXT: add a1, a1, a2
+; RV64I-NEXT: and a1, a1, a4
+; RV64I-NEXT: slli a2, a1, 8
+; RV64I-NEXT: add a1, a1, a2
+; RV64I-NEXT: slli a2, a1, 16
+; RV64I-NEXT: add a1, a1, a2
+; RV64I-NEXT: srliw a1, a1, 24
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: ctpop_v2i32:
@@ -875,8 +828,6 @@ declare i64 @llvm.ctpop.i64(i64)
define i64 @ctpop_i64(i64 %a) nounwind {
; RV64I-LABEL: ctpop_i64:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: srli a1, a0, 1
; RV64I-NEXT: lui a2, 349525
; RV64I-NEXT: addiw a2, a2, 1365
@@ -899,14 +850,13 @@ define i64 @ctpop_i64(i64 %a) nounwind {
; RV64I-NEXT: slli a2, a1, 32
; RV64I-NEXT: add a1, a1, a2
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: slli a2, a1, 32
-; RV64I-NEXT: add a1, a1, a2
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 16
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 32
+; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srli a0, a0, 56
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: ctpop_i64:
@@ -998,66 +948,52 @@ declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
define <2 x i64> @ctpop_v2i64(<2 x i64> %a) nounwind {
; RV64I-LABEL: ctpop_v2i64:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -64
-; RV64I-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s4, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s5, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a1
-; RV64I-NEXT: srli a1, a0, 1
-; RV64I-NEXT: lui a2, 349525
-; RV64I-NEXT: addiw a2, a2, 1365
-; RV64I-NEXT: slli a3, a2, 32
-; RV64I-NEXT: add s3, a2, a3
-; RV64I-NEXT: and a1, a1, s3
-; RV64I-NEXT: sub a0, a0, a1
-; RV64I-NEXT: lui a1, 209715
-; RV64I-NEXT: addiw a1, a1, 819
-; RV64I-NEXT: slli a2, a1, 32
-; RV64I-NEXT: add s4, a1, a2
-; RV64I-NEXT: and a1, a0, s4
+; RV64I-NEXT: srli a2, a0, 1
+; RV64I-NEXT: lui a3, 349525
+; RV64I-NEXT: addiw a3, a3, 1365
+; RV64I-NEXT: slli a4, a3, 32
+; RV64I-NEXT: add a3, a3, a4
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: lui a2, 209715
+; RV64I-NEXT: addiw a2, a2, 819
+; RV64I-NEXT: slli a4, a2, 32
+; RV64I-NEXT: add a2, a2, a4
+; RV64I-NEXT: and a4, a0, a2
; RV64I-NEXT: srli a0, a0, 2
-; RV64I-NEXT: and a0, a0, s4
-; RV64I-NEXT: add a0, a1, a0
-; RV64I-NEXT: srli a1, a0, 4
-; RV64I-NEXT: add a0, a0, a1
-; RV64I-NEXT: lui a1, 61681
-; RV64I-NEXT: addiw a1, a1, -241
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: add a0, a4, a0
+; RV64I-NEXT: srli a4, a0, 4
+; RV64I-NEXT: add a0, a0, a4
+; RV64I-NEXT: lui a4, 61681
+; RV64I-NEXT: addiw a4, a4, -241
+; RV64I-NEXT: slli a5, a4, 32
+; RV64I-NEXT: add a4, a4, a5
+; RV64I-NEXT: and a0, a0, a4
+; RV64I-NEXT: slli a5, a0, 8
+; RV64I-NEXT: add a0, a0, a5
+; RV64I-NEXT: slli a5, a0, 16
+; RV64I-NEXT: add a0, a0, a5
+; RV64I-NEXT: slli a5, a0, 32
+; RV64I-NEXT: add a0, a0, a5
+; RV64I-NEXT: srli a0, a0, 56
+; RV64I-NEXT: srli a5, a1, 1
+; RV64I-NEXT: and a3, a5, a3
+; RV64I-NEXT: sub a1, a1, a3
+; RV64I-NEXT: and a3, a1, a2
+; RV64I-NEXT: srli a1, a1, 2
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: add a1, a3, a1
+; RV64I-NEXT: srli a2, a1, 4
+; RV64I-NEXT: add a1, a1, a2
+; RV64I-NEXT: and a1, a1, a4
+; RV64I-NEXT: slli a2, a1, 8
+; RV64I-NEXT: add a1, a1, a2
+; RV64I-NEXT: slli a2, a1, 16
+; RV64I-NEXT: add a1, a1, a2
; RV64I-NEXT: slli a2, a1, 32
-; RV64I-NEXT: add s5, a1, a2
-; RV64I-NEXT: and a0, a0, s5
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw s1, a1, 257
-; RV64I-NEXT: slli a1, s1, 32
-; RV64I-NEXT: add s1, s1, a1
-; RV64I-NEXT: mv a1, s1
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: srli s2, a0, 56
-; RV64I-NEXT: srli a0, s0, 1
-; RV64I-NEXT: and a0, a0, s3
-; RV64I-NEXT: sub s0, s0, a0
-; RV64I-NEXT: and a0, s0, s4
-; RV64I-NEXT: srli s0, s0, 2
-; RV64I-NEXT: and a1, s0, s4
-; RV64I-NEXT: add a0, a0, a1
-; RV64I-NEXT: srli a1, a0, 4
-; RV64I-NEXT: add a0, a0, a1
-; RV64I-NEXT: and a0, a0, s5
-; RV64I-NEXT: mv a1, s1
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: srli a1, a0, 56
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s4, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s5, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 64
+; RV64I-NEXT: add a1, a1, a2
+; RV64I-NEXT: srli a1, a1, 56
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: ctpop_v2i64: