summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChao Chen <chao.chen@intel.com>2024-03-26 21:12:35 +0000
committerChao Chen <chao.chen@intel.com>2024-03-26 21:12:35 +0000
commitba62715a93a1a864b0ef8fd79468ae2b0714269f (patch)
tree10e6d82d8eb4e2588f73e9829df01a9bb3390be0
parent253b96f12c377753f8f9383a20f8c1541fcce850 (diff)
add an overlapping example for createDesc.upstream/users/chencha3/xegpu_scatter_ops_upstream
-rw-r--r--mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td15
-rw-r--r--mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td24
-rw-r--r--mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp4
3 files changed, 29 insertions, 14 deletions
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index 41fe0ea77e5e..a031a75984a5 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -406,13 +406,28 @@ def XeGPU_CreateDescOp: XeGPU_Op<"create_tdesc", [Pure, ViewLikeOpInterface]> {
elements accessed for each offset, default is 1.
Example 1. It assumes subgroup size is 4, and accesses a[0], a[16], a[32], a[64]
+ ```
%a = memref.alloc() : memref<1024xf32>
%1 = xegpu.create_tdesc %a[0, 16, 32, 64]: memref<1024xf32> -> TensorDesc<4xf32>
+ ```
Example 2. It assumes subgroup size is 4, and each workitem access 8 elements.
It will access totally 32 data elements: a[0:7], a[16:23], a[32:39], a[64:71]
+ ```
%0 = memref.alloc() : memref<1024xf32>
%1 = xegpu.create_tdesc %0[0, 16, 32, 64] {chunk_size = 8}: memref<1024xf32> -> TensorDesc<4x8xf32>
+ ```
+
+ Example 3. It is similar to Example 2, but there is some overlaps among workitems.
+ It accesses: a[0:7], a[4:11], a[8:15], a[12:19]
+ ```
+ %0 = memref.alloc() : memref<1024xf32>
+ %1 = xegpu.create_tdesc %0[0, 4, 8, 12] {chunk_size = 8}: memref<1024xf32> -> TensorDesc<4x8xf32>
+ ```
+
+
+
+
}];
let arguments = (ins XeGPU_BaseAddrType: $source,
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
index 0c62e513bee4..4cd4e5411653 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
@@ -34,10 +34,10 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc",
[ShapedTypeInterface], "::mlir::TensorType"> {
let summary = "TensorDesc describing regions of interested data.";
let description = [{
- TensorDesc is a type designed to describe regions of the interested data as well as some
- features that are unique to Intel hardware. Different with the builtin tensor type in MLIR,
- it essentially only contains the meta data, and doesn't hold the data by itself. It is designed
- to mainly support 2D block load/store and DPAS (matrix multiplication instruction) on Intel GPU.
+ TensorDesc is a type designed to describe regions of the interested data as well as some
+ features that are unique to Intel hardware. Different with the builtin tensor type in MLIR,
+ it essentially only contains the meta data, and doesn't hold the data by itself. It is designed
+ to mainly support 2D block load/store and DPAS (matrix multiplication instruction) on Intel GPU.
It encodes the following information:
* shape: the sizes/shape of the intereted data block, e.g., 8x16 means 8 rows
@@ -46,15 +46,15 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc",
is set or not.
* element_type: the data type of the data element, e.g., f16, f32.
- Similar to the builtin tensor, it also provides an optinal attribute to encoding
+ Similar to the builtin tensor, it also provides an optinal attribute to encoding
the following information via the TensorDescAttr object:
- * memory_scope (xegpu::MemoryScope): [optional] where the data is located,
+ * memory_scope (xegpu::MemoryScope): [optional] where the data is located,
global memory or shared memory. It is default to Global.
* array_length (int): [optional] The number of contiguous blocks with size as `shape`,
that will be loaded by block load at a time. It is default to 1.
- * boundary_check (bool): [optional] indicates whether the operation detects the boundary
+ * boundary_check (bool): [optional] indicates whether the operation detects the boundary
and pads with zero for out-of-boundary access. It is default to do boundary check.
-
+
Syntax:
@@ -85,8 +85,8 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc",
OptionalParameter<"mlir::Attribute">: $encoding);
let builders = [
- TypeBuilder<(ins
- "llvm::ArrayRef<int64_t>": $shape,
+ TypeBuilderWithInferredContext<(ins
+ "llvm::ArrayRef<int64_t>": $shape,
"mlir::Type": $elementType,
CArg<"bool", "false">: $scattered,
CArg<"int", "1">: $array_length,
@@ -127,7 +127,7 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc",
if (attr && attr.getArrayLength())
return attr.getArrayLength().getInt();
// return default value
- return 1;
+ return 1;
}
bool getBoundaryCheck() {
@@ -148,7 +148,7 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc",
}];
let hasCustomAssemblyFormat = true;
-
+
}
#endif // MLIR_DIALECT_XEGPU_IR_XEGPUTYPES_TD
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
index 858cda32013e..24719fe748fe 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
@@ -107,11 +107,11 @@ void TensorDescType::print(::mlir::AsmPrinter &printer) const {
printer << ">";
}
-TensorDescType TensorDescType::get(mlir::MLIRContext *context,
- llvm::ArrayRef<int64_t> shape,
+TensorDescType TensorDescType::get(llvm::ArrayRef<int64_t> shape,
mlir::Type elementType, bool scattered,
int array_length, MemoryScope memory_scope,
bool boundary_check) {
+ auto context = elementType.getContext();
auto attr = TensorDescAttr::get(context, memory_scope, array_length,
boundary_check, scattered);
return Base::get(context, shape, elementType, attr);