diff options
author | Abhinav Garg <39309352+abhigargrepo@users.noreply.github.com> | 2024-05-03 23:17:15 +0530 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-03 19:47:15 +0200 |
commit | 76508dce4380e0cea2ecb396200a161f7dbefd0b (patch) | |
tree | 2f0dc5058b96a4c0f8181ae0f0969c727c5be355 | |
parent | fc398a112d264a1b4d52e1be4ec1f75d83c3baf0 (diff) |
[AMDGPU] Fix mode register pass for constrained FP operations (#90085)
This PR will fix the si-mode-register pass which is inserting an extra
setreg instruction in case of constrained FP operations. This pass will
be ignored for strictfp functions.
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIModeRegister.cpp | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/mode-register-fpconstrain.ll | 4 |
2 files changed, 8 insertions, 4 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp index c01b1266a553..e7f448233ca3 100644 --- a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp +++ b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp @@ -430,6 +430,14 @@ void SIModeRegister::processBlockPhase3(MachineBasicBlock &MBB, } bool SIModeRegister::runOnMachineFunction(MachineFunction &MF) { + // Constrained FP intrinsics are used to support non-default rounding modes. + // strictfp attribute is required to mark functions with strict FP semantics + // having constrained FP intrinsics. This pass fixes up operations that uses + // a non-default rounding mode for non-strictfp functions. But it should not + // assume or modify any default rounding modes in case of strictfp functions. + const Function &F = MF.getFunction(); + if (F.hasFnAttribute(llvm::Attribute::StrictFP)) + return Changed; BlockInfo.resize(MF.getNumBlockIDs()); const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); const SIInstrInfo *TII = ST.getInstrInfo(); diff --git a/llvm/test/CodeGen/AMDGPU/mode-register-fpconstrain.ll b/llvm/test/CodeGen/AMDGPU/mode-register-fpconstrain.ll index 2403aeaa4428..8a29229c152f 100644 --- a/llvm/test/CodeGen/AMDGPU/mode-register-fpconstrain.ll +++ b/llvm/test/CodeGen/AMDGPU/mode-register-fpconstrain.ll @@ -9,8 +9,6 @@ define double @ignoreStrictfp(double noundef %a, double noundef %b) #0 { ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 1 -; GCN-NEXT: s_nop 1 -; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 0 ; GCN-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] ; GCN-NEXT: s_setpc_b64 s[30:31] tail call void @llvm.amdgcn.s.setreg(i32 2177, i32 1) @@ -24,8 +22,6 @@ define double @set_fpenv(double noundef %a, double noundef %b) #0 { ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 23), 4 ; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 0, 5), 0 -; GCN-NEXT: s_nop 0 -; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 0 ; GCN-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] ; GCN-NEXT: s_setpc_b64 s[30:31] entry: |