diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir | 454 |
1 files changed, 454 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir index 4d307a444b19..a98b02d792d9 100644 --- a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir +++ b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir @@ -2028,3 +2028,457 @@ body: | $agpr0_agpr1 = V_MFMA_F64_4X4X4F64_e64 $agpr0_agpr1, $agpr0_agpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec BUFFER_STORE_DWORDX2_OFFEN_exact $vgpr2_vgpr3, $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec ... + +... +# 2 pass source +# GCN-LABEL: name: xdl_mfma_2pass_write_vgpr_xdl_mfma_read_overlap_srcc +# GCN: V_MFMA +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_MFMA +name: xdl_mfma_2pass_write_vgpr_xdl_mfma_read_overlap_srcc +body: | + bb.0: + + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X4F16_vgprcd_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 1, 2, 3, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X4F16_vgprcd_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr2_vgpr3_vgpr4_vgpr5, 1, 2, 3, implicit $mode, implicit $exec + +... + +... +# 2 pass source +# GCN-LABEL: name: xdl_mfma_2pass_write_vgpr_xdl_mfma_read_overlap_srca +# GCN: V_MFMA +# GCN-NEXT: S_NOP 4 +# GCN-NEXT: V_MFMA +name: xdl_mfma_2pass_write_vgpr_xdl_mfma_read_overlap_srca +body: | + bb.0: + + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X4F16_vgprcd_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 1, 2, 3, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X4F16_vgprcd_e64 $vgpr0_vgpr1, $vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11, 1, 2, 3, implicit $mode, implicit $exec + +... + +... +# 2 pass source +# GCN-LABEL: name: xdl_mfma_2pass_write_vgpr_xdl_mfma_read_overlap_srcb +# GCN: V_MFMA +# GCN-NEXT: S_NOP 4 +# GCN-NEXT: V_MFMA +name: xdl_mfma_2pass_write_vgpr_xdl_mfma_read_overlap_srcb +body: | + bb.0: + + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X4F16_vgprcd_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 1, 2, 3, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X4F16_vgprcd_e64 $vgpr6_vgpr7, $vgpr2_vgpr3, $vgpr8_vgpr9_vgpr10_vgpr11, 1, 2, 3, implicit $mode, implicit $exec + +... + +... +# 4 pass source +# GCN-LABEL: name: xdl_mfma_4pass_write_vgpr_xdl_mfma_read_overlap_srcc +# GCN: V_MFMA +# GCN-NEXT: S_NOP 4 +# GCN-NEXT: V_MFMA +name: xdl_mfma_4pass_write_vgpr_xdl_mfma_read_overlap_srcc +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_16X16X16F16_vgprcd_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 1, 2, 3, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_16X16X16F16_vgprcd_e64 $vgpr8_vgpr9, $vgpr10_vgpr11, $vgpr2_vgpr3_vgpr4_vgpr5, 1, 2, 3, implicit $mode, implicit $exec + +... + +... +# 4 pass source +# GCN-LABEL: name: xdl_mfma_4pass_write_vgpr_xdl_mfma_read_overlap_srca +# GCN: V_MFMA +# GCN-NEXT: S_NOP 6 +# GCN-NEXT: V_MFMA +name: xdl_mfma_4pass_write_vgpr_xdl_mfma_read_overlap_srca +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_16X16X16F16_vgprcd_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 1, 2, 3, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_16X16X16F16_vgprcd_e64 $vgpr2_vgpr3, $vgpr10_vgpr11, $vgpr6_vgpr7_vgpr8_vgpr9, 1, 2, 3, implicit $mode, implicit $exec + +... + +... +# 4 pass source +# GCN-LABEL: name: xdl_mfma_4pass_write_vgpr_xdl_mfma_read_overlap_srcb +# GCN: V_MFMA +# GCN-NEXT: S_NOP 6 +# GCN-NEXT: V_MFMA +name: xdl_mfma_4pass_write_vgpr_xdl_mfma_read_overlap_srcb +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_16X16X16F16_vgprcd_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 1, 2, 3, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_16X16X16F16_vgprcd_e64 $vgpr10_vgpr11, $vgpr2_vgpr3, $vgpr6_vgpr7_vgpr8_vgpr9, 1, 2, 3, implicit $mode, implicit $exec + +... + +... +# 2 pass source +# GCN-LABEL: name: xdl_mfma_2pass_write_vgpr_sgemm_mfma_read_overlap_srcc +# GCN: V_MFMA +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_MFMA +name: xdl_mfma_2pass_write_vgpr_sgemm_mfma_read_overlap_srcc +body: | + bb.0: + + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X4F16_vgprcd_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 1, 2, 3, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr6, $vgpr8, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, 0, implicit $mode, implicit $exec + +... + +... +# 2 pass source +# GCN-LABEL: name: xdl_mfma_2pass_write_vgpr_sgemm_mfma_read_overlap_srca +# GCN: V_MFMA +# GCN-NEXT: S_NOP 4 +# GCN-NEXT: V_MFMA +name: xdl_mfma_2pass_write_vgpr_sgemm_mfma_read_overlap_srca +body: | + bb.0: + + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X4F16_vgprcd_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 1, 2, 3, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr1, $vgpr8, $vgpr6_vgpr7_vgpr8_vgpr9, 0, 0, 0, implicit $mode, implicit $exec + +... + +... +# 2 pass source +# GCN-LABEL: name: xdl_mfma_2pass_write_vgpr_sgemm_mfma_read_overlap_srcb +# GCN: V_MFMA +# GCN-NEXT: S_NOP 4 +# GCN-NEXT: V_MFMA +name: xdl_mfma_2pass_write_vgpr_sgemm_mfma_read_overlap_srcb +body: | + bb.0: + + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X4F16_vgprcd_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 1, 2, 3, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr8, $vgpr1, $vgpr6_vgpr7_vgpr8_vgpr9, 0, 0, 0, implicit $mode, implicit $exec + +... + +... +# 4 pass source +# GCN-LABEL: name: xdl_mfma_4pass_write_vgpr_sgemm_mfma_read_overlap_srcc +# GCN: V_MFMA +# GCN-NEXT: S_NOP 4 +# GCN-NEXT: V_MFMA +name: xdl_mfma_4pass_write_vgpr_sgemm_mfma_read_overlap_srcc +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_16X16X16F16_vgprcd_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 1, 2, 3, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr8, $vgpr9, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, 0, implicit $mode, implicit $exec + +... + +... +# 4 pass source +# GCN-LABEL: name: xdl_mfma_4pass_write_vgpr_sgemm_mfma_read_overlap_srca +# GCN: V_MFMA +# GCN-NEXT: S_NOP 6 +# GCN-NEXT: V_MFMA +name: xdl_mfma_4pass_write_vgpr_sgemm_mfma_read_overlap_srca +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_16X16X16F16_vgprcd_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 1, 2, 3, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr1, $vgpr8, $vgpr6_vgpr7_vgpr8_vgpr9, 0, 0, 0, implicit $mode, implicit $exec + +... + +... +# 4 pass source +# GCN-LABEL: name: xdl_mfma_4pass_write_vgpr_sgemm_mfma_read_overlap_srcb +# GCN: V_MFMA +# GCN-NEXT: S_NOP 6 +# GCN-NEXT: V_MFMA +name: xdl_mfma_4pass_write_vgpr_sgemm_mfma_read_overlap_srcb +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_16X16X16F16_vgprcd_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 1, 2, 3, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr8, $vgpr1, $vgpr6_vgpr7_vgpr8_vgpr9, 0, 0, 0, implicit $mode, implicit $exec + +... + +... +# 8 pass source +# GCN-LABEL: name: xdl_mfma_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcc +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_MFMA +name: xdl_mfma_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcc +body: | + bb.0: + renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 killed $vgpr0_vgpr1, killed $vgpr2_vgpr3, 1065353216, 0, 0, 0, implicit $mode, implicit $exec + + $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr18, $vgpr19, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec +... + +... +# 8 pass source +# GCN-LABEL: name: xdl_mfma_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srca +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_MFMA +name: xdl_mfma_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srca +body: | + bb.0: + renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 killed $vgpr0_vgpr1, killed $vgpr2_vgpr3, 1065353216, 0, 0, 0, implicit $mode, implicit $exec + + $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr0, $vgpr33, $vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33, 0, 0, 0, implicit $mode, implicit $exec +... + +... +# 8 pass source +# GCN-LABEL: name: xdl_mfma_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcb +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_MFMA +name: xdl_mfma_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcb +body: | + bb.0: + renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 killed $vgpr0_vgpr1, killed $vgpr2_vgpr3, 1065353216, 0, 0, 0, implicit $mode, implicit $exec + + $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr33, $vgpr1, $vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33, 0, 0, 0, implicit $mode, implicit $exec +... + +... +# 16 pass source +# GCN-LABEL: name: xdl_16pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcc +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_MFMA +name: xdl_16pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcc +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec + + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X2F32_vgprcd_e64 killed $vgpr32, killed $vgpr33, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 1, 2, 3, implicit $mode, implicit $exec + +... + +... +# 16 pass source +# GCN-LABEL: name: xdl_16pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srca +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_MFMA +name: xdl_16pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srca +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X2F32_vgprcd_e64 killed $vgpr0, killed $vgpr33, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, 1, 2, 3, implicit $mode, implicit $exec + +... + +... +# 16 pass source +# GCN-LABEL: name: xdl_16pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcb +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_MFMA +name: xdl_16pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcb +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X2F32_vgprcd_e64 killed $vgpr33, killed $vgpr0, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, 1, 2, 3, implicit $mode, implicit $exec + +... + +... +# 8 pass source +# GCN-LABEL: name: nonxdl_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcc +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: V_MFMA +name: nonxdl_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcc +body: | + bb.0: + $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr18, $vgpr19, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec + $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr18, $vgpr19, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec +... + +... +# 8 pass source +# GCN-LABEL: name: nonxdl_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srca +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 1 +# GCN-NEXT: V_MFMA +name: nonxdl_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srca +body: | + bb.0: + $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr18, $vgpr19, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec + $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr3, $vgpr19, $vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33, 0, 0, 0, implicit $mode, implicit $exec +... + +... +# 8 pass source +# GCN-LABEL: name: nonxdl_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcb +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 1 +# GCN-NEXT: V_MFMA +name: nonxdl_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcb +body: | + bb.0: + $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr18, $vgpr19, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec + $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr19, $vgpr3, $vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33, 0, 0, 0, implicit $mode, implicit $exec +... +... +# 8 pass source +# GCN-LABEL: name: xdl_mfma_8pass_write_vgpr_xdl_mfma_read_overlap_srcc +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_MFMA +name: xdl_mfma_8pass_write_vgpr_xdl_mfma_read_overlap_srcc +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 killed $vgpr0_vgpr1, killed $vgpr2_vgpr3, 1065353216, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 killed $vgpr18_vgpr19, killed $vgpr20_vgpr21, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec +... + +... +# 8 pass source +# GCN-LABEL: name: xdl_mfma_8pass_write_vgpr_xdl_mfma_read_overlap_srca +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_MFMA +name: xdl_mfma_8pass_write_vgpr_xdl_mfma_read_overlap_srca +body: | + bb.0: + renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 killed $vgpr0_vgpr1, killed $vgpr2_vgpr3, 1065353216, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 killed $vgpr2_vgpr3, killed $vgpr36_vgpr37, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec +... + +... +# 8 pass source +# GCN-LABEL: name: xdl_mfma_8pass_write_vgpr_xdl_mfma_read_overlap_srcb +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_MFMA +name: xdl_mfma_8pass_write_vgpr_xdl_mfma_read_overlap_srcb +body: | + bb.0: + renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 killed $vgpr0_vgpr1, killed $vgpr2_vgpr3, 1065353216, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 killed $vgpr36_vgpr37, killed $vgpr2_vgpr3, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec +... + +... +# 16 pass source +# GCN-LABEL: name: xdl_16pass_write_vgpr_xdl_mfma_read_overlap_srcc +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_MFMA +name: xdl_16pass_write_vgpr_xdl_mfma_read_overlap_srcc +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33, 0, 0, 0, implicit $mode, implicit $exec + +... + +... +# 16 pass source +# GCN-LABEL: name: xdl_16pass_write_vgpr_xdl_mfma_read_overlap_srca +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_MFMA +name: xdl_16pass_write_vgpr_xdl_mfma_read_overlap_srca +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr2_vgpr3, $vgpr128_vgpr129, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, 0, 0, 0, implicit $mode, implicit $exec + + +... + +... +# 16 pass source +# GCN-LABEL: name: xdl_16pass_write_vgpr_xdl_mfma_read_overlap_srcb +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_MFMA +name: xdl_16pass_write_vgpr_xdl_mfma_read_overlap_srcb +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr128_vgpr129, $vgpr2_vgpr3, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, 0, 0, 0, implicit $mode, implicit $exec + +... + +... +# 2 pass source +# GCN-LABEL: name: xdl_mfma_2pass_write_agpr_smfmac_read_overlap_srcc +# GCN: V_MFMA +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_SMFMAC_ +name: xdl_mfma_2pass_write_agpr_smfmac_read_overlap_srcc +body: | + bb.0: + + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X4F16_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $agpr0_agpr1_agpr2_agpr3, 1, 2, 3, implicit $mode, implicit $exec + $agpr2_agpr3_agpr4_agpr5 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr2_agpr3_agpr4_agpr5, implicit $mode, implicit $exec + +... + +... +# GCN-LABEL: name: xdl_4pass_mfma_write_agpr_smfmac_read_overlap_srcc +# GCN: V_MFMA +# GCN-NEXT: S_NOP 4 +# GCN-NEXT: V_SMFMAC_ +name: xdl_4pass_mfma_write_agpr_smfmac_read_overlap_srcc +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_16X16X32I8_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + $agpr2_agpr3_agpr4_agpr5 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr2_agpr3_agpr4_agpr5, implicit $mode, implicit $exec + +... + +... +# GCN-LABEL: name: xdl_8pass_mfma_write_agpr_smfmac_read_overlap_srcc +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_SMFMAC_ +name: xdl_8pass_mfma_write_agpr_smfmac_read_overlap_srcc +body: | + bb.0: + renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X8F16_e64 killed $vgpr0_vgpr1, killed $vgpr2_vgpr3, 1065353216, 0, 0, 0, implicit $mode, implicit $exec + $agpr2_agpr3_agpr4_agpr5 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr2_agpr3_agpr4_agpr5, implicit $mode, implicit $exec +... + +... +# GCN-LABEL: name: xdl_16pass_mfma_write_agpr_smfmac_read_overlap_srcc +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_SMFMAC_ +name: xdl_16pass_mfma_write_agpr_smfmac_read_overlap_srcc +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = V_MFMA_F32_32X32X4F16_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 0, 0, 0, implicit $mode, implicit $exec + $agpr2_agpr3_agpr4_agpr5 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr2_agpr3_agpr4_agpr5, implicit $mode, implicit $exec +... |