add an overlapping example for createDesc.upstream/users/chencha3/xegpu_scatter_ops_upstream

author: Chao Chen <chao.chen@intel.com> 2024-03-26 21:12:35 +0000
committer: Chao Chen <chao.chen@intel.com> 2024-03-26 21:12:35 +0000
commit: ba62715a93a1a864b0ef8fd79468ae2b0714269f (patch)
tree: 10e6d82d8eb4e2588f73e9829df01a9bb3390be0
parent: 253b96f12c377753f8f9383a20f8c1541fcce850 (diff)
3 files changed, 29 insertions, 14 deletions
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index 41fe0ea77e5e..a031a75984a5 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -406,13 +406,28 @@ def XeGPU_CreateDescOp: XeGPU_Op<"create_tdesc", [Pure, ViewLikeOpInterface]> {
       elements accessed for each offset, default is 1.
 
     Example 1. It assumes subgroup size is 4, and accesses a[0], a[16], a[32], a[64]
+    ```
     %a = memref.alloc() : memref<1024xf32>
     %1 = xegpu.create_tdesc %a[0, 16, 32, 64]: memref<1024xf32> -> TensorDesc<4xf32>
+    ```
 
     Example 2. It assumes subgroup size is 4, and each workitem access 8 elements.
                It will access totally 32 data elements: a[0:7], a[16:23], a[32:39], a[64:71]
+    ```
     %0 = memref.alloc() : memref<1024xf32>
     %1 = xegpu.create_tdesc %0[0, 16, 32, 64] {chunk_size = 8}: memref<1024xf32> -> TensorDesc<4x8xf32>
+    ```
+
+    Example 3. It is similar to Example 2, but there is some overlaps among workitems.
+               It accesses: a[0:7], a[4:11], a[8:15], a[12:19]
+    ```
+    %0 = memref.alloc() : memref<1024xf32>
+    %1 = xegpu.create_tdesc %0[0, 4, 8, 12] {chunk_size = 8}: memref<1024xf32> -> TensorDesc<4x8xf32>
+    ```
+
+
+
+
   }];
 
   let arguments = (ins XeGPU_BaseAddrType: $source,
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
index 0c62e513bee4..4cd4e5411653 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
@@ -34,10 +34,10 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc",
         [ShapedTypeInterface], "::mlir::TensorType"> {
   let summary = "TensorDesc describing regions of interested data.";
   let description = [{
-    TensorDesc is a type designed to describe regions of the interested data as well as some 
-    features that are unique to Intel hardware. Different with the builtin tensor type in MLIR, 
-    it essentially only contains the meta data, and doesn't hold the data by itself. It is designed 
-    to mainly support 2D block load/store and DPAS (matrix multiplication instruction) on Intel GPU. 
+    TensorDesc is a type designed to describe regions of the interested data as well as some
+    features that are unique to Intel hardware. Different with the builtin tensor type in MLIR,
+    it essentially only contains the meta data, and doesn't hold the data by itself. It is designed
+    to mainly support 2D block load/store and DPAS (matrix multiplication instruction) on Intel GPU.
     It encodes the following information:
 
     * shape:  the sizes/shape of the intereted data block, e.g., 8x16 means 8 rows
@@ -46,15 +46,15 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc",
               is set or not.
     * element_type: the data type of the data element, e.g., f16, f32.
 
-    Similar to the builtin tensor, it also provides an optinal attribute to encoding 
+    Similar to the builtin tensor, it also provides an optinal attribute to encoding
     the following information via the TensorDescAttr object:
-    * memory_scope (xegpu::MemoryScope): [optional] where the data is located, 
+    * memory_scope (xegpu::MemoryScope): [optional] where the data is located,
                 global memory or shared memory. It is default to Global.
     * array_length (int): [optional] The number of contiguous blocks with size as `shape`,
                that will be loaded by block load at a time. It is default to 1.
-    * boundary_check (bool): [optional] indicates whether the operation detects the boundary 
+    * boundary_check (bool): [optional] indicates whether the operation detects the boundary
                 and pads with zero for out-of-boundary access. It is default to do boundary check.
-    
+
 
     Syntax:
 
@@ -85,8 +85,8 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc",
                         OptionalParameter<"mlir::Attribute">: $encoding);
 
   let builders = [
-    TypeBuilder<(ins
-      "llvm::ArrayRef<int64_t>": $shape, 
+    TypeBuilderWithInferredContext<(ins
+      "llvm::ArrayRef<int64_t>": $shape,
       "mlir::Type": $elementType,
       CArg<"bool", "false">: $scattered,
       CArg<"int", "1">: $array_length,
@@ -127,7 +127,7 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc",
       if (attr && attr.getArrayLength())
         return attr.getArrayLength().getInt();
       // return default value
-      return 1; 
+      return 1;
     }
 
     bool getBoundaryCheck() {
@@ -148,7 +148,7 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc",
   }];
 
   let hasCustomAssemblyFormat = true;
-  
+
 }
 
 #endif // MLIR_DIALECT_XEGPU_IR_XEGPUTYPES_TD
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
index 858cda32013e..24719fe748fe 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
@@ -107,11 +107,11 @@ void TensorDescType::print(::mlir::AsmPrinter &printer) const {
   printer << ">";
 }
 
-TensorDescType TensorDescType::get(mlir::MLIRContext *context,
-                                   llvm::ArrayRef<int64_t> shape,
+TensorDescType TensorDescType::get(llvm::ArrayRef<int64_t> shape,
                                    mlir::Type elementType, bool scattered,
                                    int array_length, MemoryScope memory_scope,
                                    bool boundary_check) {
+  auto context = elementType.getContext();
   auto attr = TensorDescAttr::get(context, memory_scope, array_length,
                                   boundary_check, scattered);
   return Base::get(context, shape, elementType, attr);
author	Chao Chen <chao.chen@intel.com>	2024-03-26 21:12:35 +0000
committer	Chao Chen <chao.chen@intel.com>	2024-03-26 21:12:35 +0000
commit	ba62715a93a1a864b0ef8fd79468ae2b0714269f (patch)
tree	10e6d82d8eb4e2588f73e9829df01a9bb3390be0
parent	253b96f12c377753f8f9383a20f8c1541fcce850 (diff)