diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2024-01-09 17:02:27 +0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-09 17:02:27 +0700 |
commit | f9fec402896a90f3b09cea359c330f65a0908649 (patch) | |
tree | 1a21cfbdbebd171f7ea4f50d93ee2a7adde23859 | |
parent | 25e0dc92a1df906d6e42c66a32f1fa764f1acabd (diff) |
AMDGPU: Make v32bf16 a legal type (#76679)
Depends #76678
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 5 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 32 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIRegisterInfo.td | 4 |
3 files changed, 39 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 209debb3a105..975178b313ae 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -173,6 +173,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, addRegisterClass(MVT::v16bf16, &AMDGPU::SGPR_256RegClass); addRegisterClass(MVT::v32i16, &AMDGPU::SGPR_512RegClass); addRegisterClass(MVT::v32f16, &AMDGPU::SGPR_512RegClass); + addRegisterClass(MVT::v32bf16, &AMDGPU::SGPR_512RegClass); } addRegisterClass(MVT::v32i32, &AMDGPU::VReg_1024RegClass); @@ -719,11 +720,15 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, AddPromotedToType(ISD::LOAD, MVT::v32i16, MVT::v16i32); setOperationAction(ISD::LOAD, MVT::v32f16, Promote); AddPromotedToType(ISD::LOAD, MVT::v32f16, MVT::v16i32); + setOperationAction(ISD::LOAD, MVT::v32bf16, Promote); + AddPromotedToType(ISD::LOAD, MVT::v32bf16, MVT::v16i32); setOperationAction(ISD::STORE, MVT::v32i16, Promote); AddPromotedToType(ISD::STORE, MVT::v32i16, MVT::v16i32); setOperationAction(ISD::STORE, MVT::v32f16, Promote); AddPromotedToType(ISD::STORE, MVT::v32f16, MVT::v16i32); + setOperationAction(ISD::STORE, MVT::v32bf16, Promote); + AddPromotedToType(ISD::STORE, MVT::v32bf16, MVT::v16i32); setOperationAction({ISD::ANY_EXTEND, ISD::ZERO_EXTEND, ISD::SIGN_EXTEND}, MVT::v2i32, Expand); diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 1cd8a37c3aa9..e28b3d412e48 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1801,6 +1801,38 @@ def : BitConvert <v8f64, v16f32, VReg_512>; def : BitConvert <v16f32, v8i64, VReg_512>; def : BitConvert <v16f32, v8f64, VReg_512>; + + +def : BitConvert <v32bf16, v32i16, VReg_512>; +def : BitConvert <v32i16, v32bf16, VReg_512>; +def : BitConvert <v32bf16, v32i16, SReg_512>; +def : BitConvert <v32i16, v32bf16, SReg_512>; + +def : BitConvert <v32bf16, v32f16, VReg_512>; +def : BitConvert <v32f16, v32bf16, VReg_512>; +def : BitConvert <v32bf16, v32f16, SReg_512>; +def : BitConvert <v32f16, v32bf16, SReg_512>; + +def : BitConvert <v32bf16, v16i32, VReg_512>; +def : BitConvert <v16i32, v32bf16, VReg_512>; +def : BitConvert <v32bf16, v16i32, SReg_512>; +def : BitConvert <v16i32, v32bf16, SReg_512>; + +def : BitConvert <v32bf16, v16f32, VReg_512>; +def : BitConvert <v16f32, v32bf16, VReg_512>; +def : BitConvert <v32bf16, v16f32, SReg_512>; +def : BitConvert <v16f32, v32bf16, SReg_512>; + +def : BitConvert <v32bf16, v8f64, VReg_512>; +def : BitConvert <v8f64, v32bf16, VReg_512>; +def : BitConvert <v32bf16, v8f64, SReg_512>; +def : BitConvert <v8f64, v32bf16, SReg_512>; + +def : BitConvert <v32bf16, v8i64, VReg_512>; +def : BitConvert <v8i64, v32bf16, VReg_512>; +def : BitConvert <v32bf16, v8i64, SReg_512>; +def : BitConvert <v8i64, v32bf16, SReg_512>; + // 1024-bit bitcast def : BitConvert <v32i32, v32f32, VReg_1024>; def : BitConvert <v32f32, v32i32, VReg_1024>; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index 1d197dc08ac2..f42af89cf5e6 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -916,7 +916,7 @@ defm "" : SRegClass<11, [v11i32, v11f32], SGPR_352Regs, TTMP_352Regs>; defm "" : SRegClass<12, [v12i32, v12f32], SGPR_384Regs, TTMP_384Regs>; let GlobalPriority = true in { -defm "" : SRegClass<16, [v16i32, v16f32, v8i64, v8f64, v32i16, v32f16], SGPR_512Regs, TTMP_512Regs>; +defm "" : SRegClass<16, [v16i32, v16f32, v8i64, v8f64, v32i16, v32f16, v32bf16], SGPR_512Regs, TTMP_512Regs>; defm "" : SRegClass<32, [v32i32, v32f32, v16i64, v16f64], SGPR_1024Regs>; } @@ -970,7 +970,7 @@ defm VReg_352 : VRegClass<11, [v11i32, v11f32], (add VGPR_352)>; defm VReg_384 : VRegClass<12, [v12i32, v12f32], (add VGPR_384)>; let GlobalPriority = true in { -defm VReg_512 : VRegClass<16, [v16i32, v16f32, v8i64, v8f64, v32i16, v32f16], (add VGPR_512)>; +defm VReg_512 : VRegClass<16, [v16i32, v16f32, v8i64, v8f64, v32i16, v32f16, v32bf16], (add VGPR_512)>; defm VReg_1024 : VRegClass<32, [v32i32, v32f32, v16i64, v16f64], (add VGPR_1024)>; } |