summaryrefslogtreecommitdiffstats
path: root/include/clang/Basic/BuiltinsNVPTX.def
diff options
context:
space:
mode:
authorArtem Belevich <tra@google.com>2018-04-18 21:51:48 +0000
committerArtem Belevich <tra@google.com>2018-04-18 21:51:48 +0000
commitc4d3d32435b0867c3e518acd4004249b7cd63b71 (patch)
treee6c8aaed8323b701be0dcebf0c7d3e703558b368 /include/clang/Basic/BuiltinsNVPTX.def
parent202f222f94f9ca45d6c6896ba397c4646d7f99cb (diff)
[NVPTX, CUDA] Added support for m8n32k16 and m32n8k16 variants of wmma instructions.
The new instructions were added added for sm_70+ GPUs in CUDA-9.1. Differential Revision: https://reviews.llvm.org/D45068 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@330296 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'include/clang/Basic/BuiltinsNVPTX.def')
-rw-r--r--include/clang/Basic/BuiltinsNVPTX.def60
1 files changed, 47 insertions, 13 deletions
diff --git a/include/clang/Basic/BuiltinsNVPTX.def b/include/clang/Basic/BuiltinsNVPTX.def
index ae33315d89..4e5d049930 100644
--- a/include/clang/Basic/BuiltinsNVPTX.def
+++ b/include/clang/Basic/BuiltinsNVPTX.def
@@ -18,11 +18,18 @@
# define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
#endif
+#pragma push_macro("SM_70")
+#define SM_70 "sm_70|sm_71"
#pragma push_macro("SM_60")
-#define SM_60 "sm_60|sm_61|sm_62|sm_70|sm_71"
+#define SM_60 "sm_60|sm_61|sm_62|" SM_70
+#pragma push_macro("PTX61")
+#define PTX61 "ptx61"
#pragma push_macro("PTX60")
-#define PTX60 "ptx60|ptx61"
+#define PTX60 "ptx60|" PTX61
+
+#pragma push_macro("AND")
+#define AND(a, b) a "," b
// Special Registers
@@ -698,19 +705,46 @@ BUILTIN(__nvvm_ldg_f4, "E4fE4fC*", "")
BUILTIN(__nvvm_ldg_d2, "E2dE2dC*", "")
// Builtins to support WMMA instructions on sm_70
-TARGET_BUILTIN(__hmma_m16n16k16_ld_a, "vi*iC*UiIi", "", PTX60)
-TARGET_BUILTIN(__hmma_m16n16k16_ld_b, "vi*iC*UiIi", "", PTX60)
-TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f16, "vi*iC*UiIi", "", PTX60)
-TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f32, "vf*fC*UiIi", "", PTX60)
-TARGET_BUILTIN(__hmma_m16n16k16_st_c_f16, "vi*i*UiIi", "", PTX60)
-TARGET_BUILTIN(__hmma_m16n16k16_st_c_f32, "vf*f*UiIi", "", PTX60)
-
-TARGET_BUILTIN(__hmma_m16n16k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", PTX60)
-TARGET_BUILTIN(__hmma_m16n16k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", PTX60)
-TARGET_BUILTIN(__hmma_m16n16k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", PTX60)
-TARGET_BUILTIN(__hmma_m16n16k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", PTX60)
+TARGET_BUILTIN(__hmma_m16n16k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX60))
+TARGET_BUILTIN(__hmma_m16n16k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX60))
+TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX60))
+TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX60))
+TARGET_BUILTIN(__hmma_m16n16k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX60))
+TARGET_BUILTIN(__hmma_m16n16k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX60))
+
+TARGET_BUILTIN(__hmma_m32n8k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX61))
+TARGET_BUILTIN(__hmma_m32n8k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX61))
+TARGET_BUILTIN(__hmma_m32n8k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61))
+TARGET_BUILTIN(__hmma_m32n8k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61))
+TARGET_BUILTIN(__hmma_m32n8k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX61))
+TARGET_BUILTIN(__hmma_m32n8k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX61))
+
+TARGET_BUILTIN(__hmma_m8n32k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX61))
+TARGET_BUILTIN(__hmma_m8n32k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX61))
+TARGET_BUILTIN(__hmma_m8n32k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61))
+TARGET_BUILTIN(__hmma_m8n32k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61))
+TARGET_BUILTIN(__hmma_m8n32k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX61))
+TARGET_BUILTIN(__hmma_m8n32k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX61))
+
+TARGET_BUILTIN(__hmma_m16n16k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX60))
+TARGET_BUILTIN(__hmma_m16n16k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX60))
+TARGET_BUILTIN(__hmma_m16n16k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX60))
+TARGET_BUILTIN(__hmma_m16n16k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX60))
+
+TARGET_BUILTIN(__hmma_m32n8k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX61))
+TARGET_BUILTIN(__hmma_m32n8k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX61))
+TARGET_BUILTIN(__hmma_m32n8k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX61))
+TARGET_BUILTIN(__hmma_m32n8k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX61))
+
+TARGET_BUILTIN(__hmma_m8n32k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX61))
+TARGET_BUILTIN(__hmma_m8n32k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX61))
+TARGET_BUILTIN(__hmma_m8n32k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX61))
+TARGET_BUILTIN(__hmma_m8n32k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX61))
#undef BUILTIN
#undef TARGET_BUILTIN
+#pragma pop_macro("AND")
#pragma pop_macro("SM_60")
+#pragma pop_macro("SM_70")
#pragma pop_macro("PTX60")
+#pragma pop_macro("PTX61")