summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMalay Sanghi <malay.sanghi@intel.com>2024-04-16 16:36:17 +0530
committerGitHub <noreply@github.com>2024-04-16 13:06:17 +0200
commit92e96c7bbacbb477265c7e5ff6c49a6de5d4ee69 (patch)
treee0d07279e4b32a20e03446f52643047cc8d0200a
parent32b74ca6e41768c91eee8b8ca26235b110a65deb (diff)
[X86][GISel] Add DU chain lookups for LOAD & STORE (#87453)
For G_LOAD and G_STORE we want this information during regbankselect. Today we treat load dest as integer and insert converts. --------- Co-authored-by: Evgenii Kudriashov <evgenii.kudriashov@intel.com>
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/Utils.h4
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Utils.cpp44
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp37
-rw-r--r--llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp24
-rw-r--r--llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp39
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp40
-rw-r--r--llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp139
-rw-r--r--llvm/lib/Target/X86/GISel/X86RegisterBankInfo.h16
-rw-r--r--llvm/test/CodeGen/X86/GlobalISel/fconstant.ll13
-rw-r--r--llvm/test/CodeGen/X86/GlobalISel/regbankselect-sse-intrinsics.ll153
-rw-r--r--llvm/test/CodeGen/X86/GlobalISel/regbankselect-x87.ll29
11 files changed, 367 insertions, 171 deletions
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index 807cec3c177d..c4174cee5e10 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -555,5 +555,9 @@ void eraseInstr(MachineInstr &MI, MachineRegisterInfo &MRI,
/// debug users of \p MI by writing the effect of \p MI in a DIExpression.
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI);
+/// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode,
+/// having only floating-point operands.
+bool isPreISelGenericFloatingPointOpcode(unsigned Opc);
+
} // End namespace llvm.
#endif
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index c3bc3203b636..ae43e9ccf611 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -1665,3 +1665,47 @@ void llvm::salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI) {
}
}
}
+
+bool llvm::isPreISelGenericFloatingPointOpcode(unsigned Opc) {
+ switch (Opc) {
+ case TargetOpcode::G_FABS:
+ case TargetOpcode::G_FADD:
+ case TargetOpcode::G_FCANONICALIZE:
+ case TargetOpcode::G_FCEIL:
+ case TargetOpcode::G_FCONSTANT:
+ case TargetOpcode::G_FCOPYSIGN:
+ case TargetOpcode::G_FCOS:
+ case TargetOpcode::G_FDIV:
+ case TargetOpcode::G_FEXP2:
+ case TargetOpcode::G_FEXP:
+ case TargetOpcode::G_FFLOOR:
+ case TargetOpcode::G_FLOG10:
+ case TargetOpcode::G_FLOG2:
+ case TargetOpcode::G_FLOG:
+ case TargetOpcode::G_FMA:
+ case TargetOpcode::G_FMAD:
+ case TargetOpcode::G_FMAXIMUM:
+ case TargetOpcode::G_FMAXNUM:
+ case TargetOpcode::G_FMAXNUM_IEEE:
+ case TargetOpcode::G_FMINIMUM:
+ case TargetOpcode::G_FMINNUM:
+ case TargetOpcode::G_FMINNUM_IEEE:
+ case TargetOpcode::G_FMUL:
+ case TargetOpcode::G_FNEARBYINT:
+ case TargetOpcode::G_FNEG:
+ case TargetOpcode::G_FPEXT:
+ case TargetOpcode::G_FPOW:
+ case TargetOpcode::G_FPTRUNC:
+ case TargetOpcode::G_FREM:
+ case TargetOpcode::G_FRINT:
+ case TargetOpcode::G_FSIN:
+ case TargetOpcode::G_FSQRT:
+ case TargetOpcode::G_FSUB:
+ case TargetOpcode::G_INTRINSIC_ROUND:
+ case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
+ case TargetOpcode::G_INTRINSIC_TRUNC:
+ return true;
+ default:
+ return false;
+ }
+}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index d39de770eaf1..d5c4ce1888e7 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -424,43 +424,6 @@ void AArch64RegisterBankInfo::applyMappingImpl(
}
}
-/// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode,
-/// having only floating-point operands.
-static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
- switch (Opc) {
- case TargetOpcode::G_FADD:
- case TargetOpcode::G_FSUB:
- case TargetOpcode::G_FMUL:
- case TargetOpcode::G_FMA:
- case TargetOpcode::G_FDIV:
- case TargetOpcode::G_FCONSTANT:
- case TargetOpcode::G_FPEXT:
- case TargetOpcode::G_FPTRUNC:
- case TargetOpcode::G_FCEIL:
- case TargetOpcode::G_FFLOOR:
- case TargetOpcode::G_FNEARBYINT:
- case TargetOpcode::G_FNEG:
- case TargetOpcode::G_FCOS:
- case TargetOpcode::G_FSIN:
- case TargetOpcode::G_FLOG10:
- case TargetOpcode::G_FLOG:
- case TargetOpcode::G_FLOG2:
- case TargetOpcode::G_FSQRT:
- case TargetOpcode::G_FABS:
- case TargetOpcode::G_FEXP:
- case TargetOpcode::G_FRINT:
- case TargetOpcode::G_INTRINSIC_TRUNC:
- case TargetOpcode::G_INTRINSIC_ROUND:
- case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
- case TargetOpcode::G_FMAXNUM:
- case TargetOpcode::G_FMINNUM:
- case TargetOpcode::G_FMAXIMUM:
- case TargetOpcode::G_FMINIMUM:
- return true;
- }
- return false;
-}
-
const RegisterBankInfo::InstructionMapping &
AArch64RegisterBankInfo::getSameKindOfOperandsMapping(
const MachineInstr &MI) const {
diff --git a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
index 6af1fd8c88e5..62b58cba9f24 100644
--- a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
@@ -104,26 +104,6 @@ MipsRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
}
}
-// Instructions where all register operands are floating point.
-static bool isFloatingPointOpcode(unsigned Opc) {
- switch (Opc) {
- case TargetOpcode::G_FCONSTANT:
- case TargetOpcode::G_FADD:
- case TargetOpcode::G_FSUB:
- case TargetOpcode::G_FMUL:
- case TargetOpcode::G_FDIV:
- case TargetOpcode::G_FABS:
- case TargetOpcode::G_FSQRT:
- case TargetOpcode::G_FCEIL:
- case TargetOpcode::G_FFLOOR:
- case TargetOpcode::G_FPEXT:
- case TargetOpcode::G_FPTRUNC:
- return true;
- default:
- return false;
- }
-}
-
// Instructions where use operands are floating point registers.
// Def operands are general purpose.
static bool isFloatingPointOpcodeUse(unsigned Opc) {
@@ -133,7 +113,7 @@ static bool isFloatingPointOpcodeUse(unsigned Opc) {
case TargetOpcode::G_FCMP:
return true;
default:
- return isFloatingPointOpcode(Opc);
+ return isPreISelGenericFloatingPointOpcode(Opc);
}
}
@@ -145,7 +125,7 @@ static bool isFloatingPointOpcodeDef(unsigned Opc) {
case TargetOpcode::G_UITOFP:
return true;
default:
- return isFloatingPointOpcode(Opc);
+ return isPreISelGenericFloatingPointOpcode(Opc);
}
}
diff --git a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp
index 6aeef145e307..125a49de7b27 100644
--- a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp
+++ b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp
@@ -13,6 +13,7 @@
#include "PPCRegisterBankInfo.h"
#include "PPCRegisterInfo.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
@@ -239,44 +240,6 @@ PPCRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
return getInstructionMapping(MappingID, Cost, OperandsMapping, NumOperands);
}
-/// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode,
-/// having only floating-point operands.
-/// FIXME: this is copied from target AArch64. Needs some code refactor here to
-/// put this function in GlobalISel/Utils.cpp.
-static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
- switch (Opc) {
- case TargetOpcode::G_FADD:
- case TargetOpcode::G_FSUB:
- case TargetOpcode::G_FMUL:
- case TargetOpcode::G_FMA:
- case TargetOpcode::G_FDIV:
- case TargetOpcode::G_FCONSTANT:
- case TargetOpcode::G_FPEXT:
- case TargetOpcode::G_FPTRUNC:
- case TargetOpcode::G_FCEIL:
- case TargetOpcode::G_FFLOOR:
- case TargetOpcode::G_FNEARBYINT:
- case TargetOpcode::G_FNEG:
- case TargetOpcode::G_FCOS:
- case TargetOpcode::G_FSIN:
- case TargetOpcode::G_FLOG10:
- case TargetOpcode::G_FLOG:
- case TargetOpcode::G_FLOG2:
- case TargetOpcode::G_FSQRT:
- case TargetOpcode::G_FABS:
- case TargetOpcode::G_FEXP:
- case TargetOpcode::G_FRINT:
- case TargetOpcode::G_INTRINSIC_TRUNC:
- case TargetOpcode::G_INTRINSIC_ROUND:
- case TargetOpcode::G_FMAXNUM:
- case TargetOpcode::G_FMINNUM:
- case TargetOpcode::G_FMAXIMUM:
- case TargetOpcode::G_FMINIMUM:
- return true;
- }
- return false;
-}
-
/// \returns true if a given intrinsic \p ID only uses and defines FPRs.
static bool isFPIntrinsic(unsigned ID) {
// TODO: Add more intrinsics.
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
index 86e44343b508..cc534f29685f 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
@@ -154,46 +154,6 @@ static const RegisterBankInfo::ValueMapping *getFPValueMapping(unsigned Size) {
return &RISCV::ValueMappings[Idx];
}
-/// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode,
-/// having only floating-point operands.
-/// FIXME: this is copied from target AArch64. Needs some code refactor here to
-/// put this function in GlobalISel/Utils.cpp.
-static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
- switch (Opc) {
- case TargetOpcode::G_FADD:
- case TargetOpcode::G_FSUB:
- case TargetOpcode::G_FMUL:
- case TargetOpcode::G_FMA:
- case TargetOpcode::G_FDIV:
- case TargetOpcode::G_FCONSTANT:
- case TargetOpcode::G_FPEXT:
- case TargetOpcode::G_FPTRUNC:
- case TargetOpcode::G_FCEIL:
- case TargetOpcode::G_FFLOOR:
- case TargetOpcode::G_FNEARBYINT:
- case TargetOpcode::G_FNEG:
- case TargetOpcode::G_FCOPYSIGN:
- case TargetOpcode::G_FCOS:
- case TargetOpcode::G_FSIN:
- case TargetOpcode::G_FLOG10:
- case TargetOpcode::G_FLOG:
- case TargetOpcode::G_FLOG2:
- case TargetOpcode::G_FSQRT:
- case TargetOpcode::G_FABS:
- case TargetOpcode::G_FEXP:
- case TargetOpcode::G_FRINT:
- case TargetOpcode::G_INTRINSIC_TRUNC:
- case TargetOpcode::G_INTRINSIC_ROUND:
- case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
- case TargetOpcode::G_FMAXNUM:
- case TargetOpcode::G_FMINNUM:
- case TargetOpcode::G_FMAXIMUM:
- case TargetOpcode::G_FMINIMUM:
- return true;
- }
- return false;
-}
-
// TODO: Make this more like AArch64?
bool RISCVRegisterBankInfo::hasFPConstraints(
const MachineInstr &MI, const MachineRegisterInfo &MRI,
diff --git a/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp b/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp
index e7c9e60ba95f..9e85424e76e6 100644
--- a/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp
@@ -13,10 +13,13 @@
#include "X86RegisterBankInfo.h"
#include "X86InstrInfo.h"
#include "X86Subtarget.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterBank.h"
#include "llvm/CodeGen/RegisterBankInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/IR/IntrinsicsX86.h"
#define GET_TARGET_REGBANK_IMPL
#include "X86GenRegisterBank.inc"
@@ -68,6 +71,98 @@ X86RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
llvm_unreachable("Unsupported register kind yet.");
}
+// \returns true if a given intrinsic only uses and defines FPRs.
+static bool isFPIntrinsic(const MachineRegisterInfo &MRI,
+ const MachineInstr &MI) {
+ // TODO: Add more intrinsics.
+ switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
+ default:
+ return false;
+ // SSE1
+ case Intrinsic::x86_sse_rcp_ss:
+ case Intrinsic::x86_sse_rcp_ps:
+ case Intrinsic::x86_sse_rsqrt_ss:
+ case Intrinsic::x86_sse_rsqrt_ps:
+ case Intrinsic::x86_sse_min_ss:
+ case Intrinsic::x86_sse_min_ps:
+ case Intrinsic::x86_sse_max_ss:
+ case Intrinsic::x86_sse_max_ps:
+ return true;
+ }
+ return false;
+}
+
+bool X86RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ unsigned Depth) const {
+ unsigned Op = MI.getOpcode();
+ if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(MRI, MI))
+ return true;
+
+ // Do we have an explicit floating point instruction?
+ if (isPreISelGenericFloatingPointOpcode(Op))
+ return true;
+
+ // No. Check if we have a copy-like instruction. If we do, then we could
+ // still be fed by floating point instructions.
+ if (Op != TargetOpcode::COPY && !MI.isPHI() &&
+ !isPreISelGenericOptimizationHint(Op))
+ return false;
+
+ // Check if we already know the register bank.
+ auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI);
+ if (RB == &getRegBank(X86::PSRRegBankID))
+ return true;
+ if (RB == &getRegBank(X86::GPRRegBankID))
+ return false;
+
+ // We don't know anything.
+ //
+ // If we have a phi, we may be able to infer that it will be assigned a fp
+ // type based off of its inputs.
+ if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
+ return false;
+
+ return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) {
+ return Op.isReg() &&
+ onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1);
+ });
+}
+
+bool X86RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ unsigned Depth) const {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_FPTOSI:
+ case TargetOpcode::G_FPTOUI:
+ case TargetOpcode::G_FCMP:
+ case TargetOpcode::G_LROUND:
+ case TargetOpcode::G_LLROUND:
+ case TargetOpcode::G_INTRINSIC_TRUNC:
+ case TargetOpcode::G_INTRINSIC_ROUND:
+ return true;
+ default:
+ break;
+ }
+ return hasFPConstraints(MI, MRI, TRI, Depth);
+}
+
+bool X86RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ unsigned Depth) const {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_SITOFP:
+ case TargetOpcode::G_UITOFP:
+ return true;
+ default:
+ break;
+ }
+ return hasFPConstraints(MI, MRI, TRI, Depth);
+}
+
X86GenRegisterBankInfo::PartialMappingIdx
X86GenRegisterBankInfo::getPartialMappingIdx(const MachineInstr &MI,
const LLT &Ty, bool isFP) {
@@ -180,11 +275,13 @@ X86RegisterBankInfo::getSameOperandsMapping(const MachineInstr &MI,
const RegisterBankInfo::InstructionMapping &
X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
const MachineFunction &MF = *MI.getParent()->getParent();
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
const MachineRegisterInfo &MRI = MF.getRegInfo();
unsigned Opc = MI.getOpcode();
- // Try the default logic for non-generic instructions that are either copies
- // or already have some operands assigned to banks.
+ // Try the default logic for non-generic instructions that are either
+ // copies or already have some operands assigned to banks.
if (!isPreISelGenericOpcode(Opc) || Opc == TargetOpcode::G_PHI) {
const InstructionMapping &Mapping = getInstrMappingImpl(MI);
if (Mapping.isValid())
@@ -221,13 +318,14 @@ X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case TargetOpcode::G_FPEXT:
case TargetOpcode::G_FPTRUNC:
case TargetOpcode::G_FCONSTANT:
- // Instruction having only floating-point operands (all scalars in VECRReg)
+ // Instruction having only floating-point operands (all scalars in
+ // VECRReg)
getInstrPartialMappingIdxs(MI, MRI, /* isFP= */ true, OpRegBankIdx);
break;
case TargetOpcode::G_SITOFP:
case TargetOpcode::G_FPTOSI: {
- // Some of the floating-point instructions have mixed GPR and FP operands:
- // fine-tune the computed mapping.
+ // Some of the floating-point instructions have mixed GPR and FP
+ // operands: fine-tune the computed mapping.
auto &Op0 = MI.getOperand(0);
auto &Op1 = MI.getOperand(1);
const LLT Ty0 = MRI.getType(Op0.getReg());
@@ -271,9 +369,36 @@ X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
getInstrPartialMappingIdxs(MI, MRI, /* isFP= */ isFPTrunc || isFPAnyExt,
OpRegBankIdx);
- } break;
+ break;
+ }
+ case TargetOpcode::G_LOAD: {
+ // Check if that load feeds fp instructions.
+ // In that case, we want the default mapping to be on FPR
+ // instead of blind map every scalar to GPR.
+ bool IsFP = any_of(MRI.use_nodbg_instructions(cast<GLoad>(MI).getDstReg()),
+ [&](const MachineInstr &UseMI) {
+ // If we have at least one direct use in a FP
+ // instruction, assume this was a floating point load
+ // in the IR. If it was not, we would have had a
+ // bitcast before reaching that instruction.
+ return onlyUsesFP(UseMI, MRI, TRI);
+ });
+ getInstrPartialMappingIdxs(MI, MRI, IsFP, OpRegBankIdx);
+ break;
+ }
+ case TargetOpcode::G_STORE: {
+ // Check if that store is fed by fp instructions.
+ Register VReg = cast<GStore>(MI).getValueReg();
+ if (!VReg)
+ break;
+ MachineInstr *DefMI = MRI.getVRegDef(VReg);
+ bool IsFP = onlyDefinesFP(*DefMI, MRI, TRI);
+ getInstrPartialMappingIdxs(MI, MRI, IsFP, OpRegBankIdx);
+ break;
+ }
default:
- // Track the bank of each register, use NotFP mapping (all scalars in GPRs)
+ // Track the bank of each register, use NotFP mapping (all scalars in
+ // GPRs)
getInstrPartialMappingIdxs(MI, MRI, /* isFP= */ false, OpRegBankIdx);
break;
}
diff --git a/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.h b/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.h
index 989c5956ad59..8f38e717e36b 100644
--- a/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.h
+++ b/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.h
@@ -62,6 +62,22 @@ private:
const SmallVectorImpl<PartialMappingIdx> &OpRegBankIdx,
SmallVectorImpl<const ValueMapping *> &OpdsMapping);
+ // Maximum recursion depth for hasFPConstraints.
+ const unsigned MaxFPRSearchDepth = 2;
+
+ /// \returns true if \p MI only uses and defines FPRs.
+ bool hasFPConstraints(const MachineInstr &MI, const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ unsigned Depth = 0) const;
+
+ /// \returns true if \p MI only uses FPRs.
+ bool onlyUsesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI, unsigned Depth = 0) const;
+
+ /// \returns true if \p MI only defines FPRs.
+ bool onlyDefinesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI, unsigned Depth = 0) const;
+
public:
X86RegisterBankInfo(const TargetRegisterInfo &TRI);
diff --git a/llvm/test/CodeGen/X86/GlobalISel/fconstant.ll b/llvm/test/CodeGen/X86/GlobalISel/fconstant.ll
index a9b2037e9947..8d2ee3c50f21 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/fconstant.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/fconstant.ll
@@ -10,27 +10,22 @@ define void @test_float(ptr %a , float %b) {
; CHECK64_SMALL: # %bb.0: # %entry
; CHECK64_SMALL-NEXT: movss {{.*#+}} xmm1 = [5.5E+0,0.0E+0,0.0E+0,0.0E+0]
; CHECK64_SMALL-NEXT: addss %xmm0, %xmm1
-; CHECK64_SMALL-NEXT: movd %xmm1, %eax
-; CHECK64_SMALL-NEXT: movl %eax, (%rdi)
+; CHECK64_SMALL-NEXT: movss %xmm1, (%rdi)
; CHECK64_SMALL-NEXT: retq
;
; CHECK64_LARGE-LABEL: test_float:
; CHECK64_LARGE: # %bb.0: # %entry
; CHECK64_LARGE-NEXT: movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax
; CHECK64_LARGE-NEXT: addss (%rax), %xmm0
-; CHECK64_LARGE-NEXT: movd %xmm0, %eax
-; CHECK64_LARGE-NEXT: movl %eax, (%rdi)
+; CHECK64_LARGE-NEXT: movss %xmm0, (%rdi)
; CHECK64_LARGE-NEXT: retq
;
; CHECK32-LABEL: test_float:
; CHECK32: # %bb.0: # %entry
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK32-NEXT: movss {{.*#+}} xmm0 = [5.5E+0,0.0E+0,0.0E+0,0.0E+0]
-; CHECK32-NEXT: movd %ecx, %xmm1
-; CHECK32-NEXT: addss %xmm0, %xmm1
-; CHECK32-NEXT: movd %xmm1, %ecx
-; CHECK32-NEXT: movl %ecx, (%eax)
+; CHECK32-NEXT: addss {{[0-9]+}}(%esp), %xmm0
+; CHECK32-NEXT: movss %xmm0, (%eax)
; CHECK32-NEXT: retl
entry:
%aa = fadd float 5.500000e+00, %b
diff --git a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-sse-intrinsics.ll b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-sse-intrinsics.ll
new file mode 100644
index 000000000000..3388af605d96
--- /dev/null
+++ b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-sse-intrinsics.ll
@@ -0,0 +1,153 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mtriple=i686-- -mattr=+sse -global-isel -stop-after=regbankselect | FileCheck %s
+
+define void @test_x86_sse_max_ps(ptr %p1, ptr %p2) {
+ ; CHECK-LABEL: name: test_x86_sse_max_ps
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.1
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.1)
+ ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.0
+ ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (p0) from %fixed-stack.0)
+ ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD]](p0) :: (load (<4 x s32>) from %ir.p1)
+ ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD1]](p0) :: (load (<4 x s32>) from %ir.p2)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:vecr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.x86.sse.max.ps), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>)
+ ; CHECK-NEXT: G_STORE [[INT]](<4 x s32>), [[LOAD]](p0) :: (store (<4 x s32>) into %ir.p1)
+ ; CHECK-NEXT: RET 0
+ %a0 = load <4 x float>, ptr %p1, align 16
+ %a1 = load <4 x float>, ptr %p2, align 16
+ %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+ store <4 x float> %res, ptr %p1
+ ret void
+}
+declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define void @test_x86_sse_max_ss(ptr %p1, ptr %p2) {
+ ; CHECK-LABEL: name: test_x86_sse_max_ss
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.1
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.1)
+ ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.0
+ ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (p0) from %fixed-stack.0)
+ ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD]](p0) :: (load (<4 x s32>) from %ir.p1)
+ ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD1]](p0) :: (load (<4 x s32>) from %ir.p2)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:vecr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.x86.sse.max.ss), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>)
+ ; CHECK-NEXT: G_STORE [[INT]](<4 x s32>), [[LOAD]](p0) :: (store (<4 x s32>) into %ir.p1)
+ ; CHECK-NEXT: RET 0
+ %a0 = load <4 x float>, ptr %p1, align 16
+ %a1 = load <4 x float>, ptr %p2, align 16
+ %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+ store <4 x float> %res, ptr %p1
+ ret void
+}
+declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define void @test_x86_sse_min_ps(ptr %p1, ptr %p2) {
+ ; CHECK-LABEL: name: test_x86_sse_min_ps
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.1
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.1)
+ ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.0
+ ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (p0) from %fixed-stack.0)
+ ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD]](p0) :: (load (<4 x s32>) from %ir.p1)
+ ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD1]](p0) :: (load (<4 x s32>) from %ir.p2)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:vecr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.x86.sse.min.ps), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>)
+ ; CHECK-NEXT: G_STORE [[INT]](<4 x s32>), [[LOAD]](p0) :: (store (<4 x s32>) into %ir.p1)
+ ; CHECK-NEXT: RET 0
+ %a0 = load <4 x float>, ptr %p1, align 16
+ %a1 = load <4 x float>, ptr %p2, align 16
+ %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+ store <4 x float> %res, ptr %p1
+ ret void
+}
+declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define void @test_x86_sse_min_ss(ptr %p1, ptr %p2) {
+ ; CHECK-LABEL: name: test_x86_sse_min_ss
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.1
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.1)
+ ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.0
+ ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (p0) from %fixed-stack.0)
+ ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD]](p0) :: (load (<4 x s32>) from %ir.p1)
+ ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD1]](p0) :: (load (<4 x s32>) from %ir.p2)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:vecr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.x86.sse.min.ss), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>)
+ ; CHECK-NEXT: G_STORE [[INT]](<4 x s32>), [[LOAD]](p0) :: (store (<4 x s32>) into %ir.p1)
+ ; CHECK-NEXT: RET 0
+ %a0 = load <4 x float>, ptr %p1, align 16
+ %a1 = load <4 x float>, ptr %p2, align 16
+ %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+ store <4 x float> %res, ptr %p1
+ ret void
+}
+declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define void @test_x86_sse_rcp_ps(ptr %p1, ptr %p2) {
+ ; CHECK-LABEL: name: test_x86_sse_rcp_ps
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.1
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.1)
+ ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD]](p0) :: (load (<4 x s32>) from %ir.p1)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:vecr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.x86.sse.rcp.ps), [[LOAD1]](<4 x s32>)
+ ; CHECK-NEXT: G_STORE [[INT]](<4 x s32>), [[LOAD]](p0) :: (store (<4 x s32>) into %ir.p1)
+ ; CHECK-NEXT: RET 0
+ %a0 = load <4 x float>, ptr %p1, align 16
+ %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+ store <4 x float> %res, ptr %p1
+ ret void
+}
+declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
+
+
+define void @test_x86_sse_rcp_ss(ptr %p1, ptr %p2) {
+ ; CHECK-LABEL: name: test_x86_sse_rcp_ss
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.1
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.1)
+ ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD]](p0) :: (load (<4 x s32>) from %ir.p1)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:vecr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.x86.sse.rcp.ss), [[LOAD1]](<4 x s32>)
+ ; CHECK-NEXT: G_STORE [[INT]](<4 x s32>), [[LOAD]](p0) :: (store (<4 x s32>) into %ir.p1)
+ ; CHECK-NEXT: RET 0
+ %a0 = load <4 x float>, ptr %p1, align 16
+ %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+ store <4 x float> %res, ptr %p1
+ ret void
+}
+declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
+
+
+define void @test_x86_sse_rsqrt_ps(ptr %p1, ptr %p2) {
+ ; CHECK-LABEL: name: test_x86_sse_rsqrt_ps
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.1
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.1)
+ ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD]](p0) :: (load (<4 x s32>) from %ir.p1)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:vecr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.x86.sse.rsqrt.ps), [[LOAD1]](<4 x s32>)
+ ; CHECK-NEXT: G_STORE [[INT]](<4 x s32>), [[LOAD]](p0) :: (store (<4 x s32>) into %ir.p1)
+ ; CHECK-NEXT: RET 0
+ %a0 = load <4 x float>, ptr %p1, align 16
+ %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+ store <4 x float> %res, ptr %p1
+ ret void
+}
+declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
+
+
+define void @test_x86_sse_rsqrt_ss(ptr %p1, ptr %p2) {
+ ; CHECK-LABEL: name: test_x86_sse_rsqrt_ss
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.1
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.1)
+ ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD]](p0) :: (load (<4 x s32>) from %ir.p1)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:vecr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.x86.sse.rsqrt.ss), [[LOAD1]](<4 x s32>)
+ ; CHECK-NEXT: G_STORE [[INT]](<4 x s32>), [[LOAD]](p0) :: (store (<4 x s32>) into %ir.p1)
+ ; CHECK-NEXT: RET 0
+ %a0 = load <4 x float>, ptr %p1, align 16
+ %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+ store <4 x float> %res, ptr %p1
+ ret void
+}
+declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
diff --git a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-x87.ll b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-x87.ll
index d09db0f2474c..99d458a183a9 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-x87.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-x87.ll
@@ -142,7 +142,7 @@ define float @f4(float %val) {
; X86-LABEL: name: f4
; X86: bb.1 (%ir-block.0):
; X86-NEXT: [[FRAME_INDEX:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.0
- ; X86-NEXT: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.0)
+ ; X86-NEXT: [[LOAD:%[0-9]+]]:psr(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.0)
; X86-NEXT: $fp0 = COPY [[LOAD]](s32)
; X86-NEXT: RET 0, implicit $fp0
;
@@ -187,13 +187,10 @@ define void @f5(ptr %a, ptr %b) {
; X64-NEXT: {{ $}}
; X64-NEXT: [[COPY:%[0-9]+]]:gpr(p0) = COPY $rdi
; X64-NEXT: [[COPY1:%[0-9]+]]:gpr(p0) = COPY $rsi
- ; X64-NEXT: [[LOAD:%[0-9]+]]:gpr(s64) = G_LOAD [[COPY]](p0) :: (load (s64) from %ir.a)
- ; X64-NEXT: [[LOAD1:%[0-9]+]]:gpr(s64) = G_LOAD [[COPY1]](p0) :: (load (s64) from %ir.b)
- ; X64-NEXT: [[COPY2:%[0-9]+]]:psr(s64) = COPY [[LOAD]](s64)
- ; X64-NEXT: [[COPY3:%[0-9]+]]:psr(s64) = COPY [[LOAD1]](s64)
- ; X64-NEXT: [[FADD:%[0-9]+]]:psr(s64) = G_FADD [[COPY2]], [[COPY3]]
- ; X64-NEXT: [[COPY4:%[0-9]+]]:gpr(s64) = COPY [[FADD]](s64)
- ; X64-NEXT: G_STORE [[COPY4]](s64), [[COPY]](p0) :: (store (s64) into %ir.a)
+ ; X64-NEXT: [[LOAD:%[0-9]+]]:psr(s64) = G_LOAD [[COPY]](p0) :: (load (s64) from %ir.a)
+ ; X64-NEXT: [[LOAD1:%[0-9]+]]:psr(s64) = G_LOAD [[COPY1]](p0) :: (load (s64) from %ir.b)
+ ; X64-NEXT: [[FADD:%[0-9]+]]:psr(s64) = G_FADD [[LOAD]], [[LOAD1]]
+ ; X64-NEXT: G_STORE [[FADD]](s64), [[COPY]](p0) :: (store (s64) into %ir.a)
; X64-NEXT: RET 0
%load1 = load double, ptr %a, align 8
%load2 = load double, ptr %b, align 8
@@ -210,11 +207,9 @@ define void @f6(ptr %0, ptr %1) {
; X86-NEXT: [[FRAME_INDEX1:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.0
; X86-NEXT: [[LOAD1:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (p0) from %fixed-stack.0)
; X86-NEXT: [[C:%[0-9]+]]:psr(s32) = G_FCONSTANT float 2.000000e+01
- ; X86-NEXT: [[LOAD2:%[0-9]+]]:gpr(s32) = G_LOAD [[LOAD]](p0) :: (load (s32) from %ir.0)
- ; X86-NEXT: [[COPY:%[0-9]+]]:psr(s32) = COPY [[LOAD2]](s32)
- ; X86-NEXT: [[FADD:%[0-9]+]]:psr(s32) = G_FADD [[COPY]], [[C]]
- ; X86-NEXT: [[COPY1:%[0-9]+]]:gpr(s32) = COPY [[FADD]](s32)
- ; X86-NEXT: G_STORE [[COPY1]](s32), [[LOAD1]](p0) :: (store (s32) into %ir.1)
+ ; X86-NEXT: [[LOAD2:%[0-9]+]]:psr(s32) = G_LOAD [[LOAD]](p0) :: (load (s32) from %ir.0)
+ ; X86-NEXT: [[FADD:%[0-9]+]]:psr(s32) = G_FADD [[LOAD2]], [[C]]
+ ; X86-NEXT: G_STORE [[FADD]](s32), [[LOAD1]](p0) :: (store (s32) into %ir.1)
; X86-NEXT: RET 0
;
; X64-LABEL: name: f6
@@ -224,11 +219,9 @@ define void @f6(ptr %0, ptr %1) {
; X64-NEXT: [[COPY:%[0-9]+]]:gpr(p0) = COPY $rdi
; X64-NEXT: [[COPY1:%[0-9]+]]:gpr(p0) = COPY $rsi
; X64-NEXT: [[C:%[0-9]+]]:psr(s32) = G_FCONSTANT float 2.000000e+01
- ; X64-NEXT: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.0)
- ; X64-NEXT: [[COPY2:%[0-9]+]]:psr(s32) = COPY [[LOAD]](s32)
- ; X64-NEXT: [[FADD:%[0-9]+]]:psr(s32) = G_FADD [[COPY2]], [[C]]
- ; X64-NEXT: [[COPY3:%[0-9]+]]:gpr(s32) = COPY [[FADD]](s32)
- ; X64-NEXT: G_STORE [[COPY3]](s32), [[COPY1]](p0) :: (store (s32) into %ir.1)
+ ; X64-NEXT: [[LOAD:%[0-9]+]]:psr(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.0)
+ ; X64-NEXT: [[FADD:%[0-9]+]]:psr(s32) = G_FADD [[LOAD]], [[C]]
+ ; X64-NEXT: G_STORE [[FADD]](s32), [[COPY1]](p0) :: (store (s32) into %ir.1)
; X64-NEXT: RET 0
%load1 = load float, ptr %0
%add = fadd float %load1, 20.0