[𝘀𝗽𝗿] changes introduced through rebaseupstream/users/fhahn/main.passes-disable-code-sinking-in-instcombine-early-on

Created using spr 1.3.4 [skip ci]
author: Jacek Caban <jacek@codeweavers.com> 2023-11-17 11:46:52 +0000
committer: Florian Hahn <flo@fhahn.com> 2023-11-17 11:46:52 +0000
commit: 77c2f330a9fc2f332388e777f396451a5d4a6bc8 (patch)
tree: 51c91ad2fd7d215ca0f55250070f8e24b48c907f
parent: 05bd5ecad3cbd218fca59dfcd71be2e5294a08ed (diff)
parent: ec42d547eba5c0ad0bddbecc8902d35383968e78 (diff)
181 files changed, 8110 insertions, 2107 deletions
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 19c8a7bc3cfa..e81d58ef0b10 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -54,11 +54,9 @@ namespace opts {
 
 extern cl::OptionCategory BoltCategory;
 extern cl::OptionCategory BoltOptCategory;
-extern cl::OptionCategory BoltRelocCategory;
 
 extern cl::opt<bool> EnableBAT;
 extern cl::opt<bool> Instrument;
-extern cl::opt<bool> KeepNops;
 extern cl::opt<bool> StrictMode;
 extern cl::opt<bool> UpdateDebugSections;
 extern cl::opt<unsigned> Verbosity;
diff --git a/clang-tools-extra/include-cleaner/lib/Record.cpp b/clang-tools-extra/include-cleaner/lib/Record.cpp
index 7a8e10a9c675..6e00ff93a7fe 100644
--- a/clang-tools-extra/include-cleaner/lib/Record.cpp
+++ b/clang-tools-extra/include-cleaner/lib/Record.cpp
@@ -240,20 +240,10 @@ public:
     // Make sure current include is covered by the export pragma.
     if ((Top.Block && HashLine > Top.SeenAtLine) ||
         Top.SeenAtLine == HashLine) {
-      if (IncludedHeader) {
-        switch (IncludedHeader->kind()) {
-        case Header::Physical:
-          Out->IWYUExportBy[IncludedHeader->physical().getUniqueID()]
-              .push_back(Top.Path);
-          break;
-        case Header::Standard:
-          Out->StdIWYUExportBy[IncludedHeader->standard()].push_back(Top.Path);
-          break;
-        case Header::Verbatim:
-          assert(false && "unexpected Verbatim header");
-          break;
-        }
-      }
+      if (IncludedFile)
+        Out->IWYUExportBy[IncludedFile->getUniqueID()].push_back(Top.Path);
+      if (IncludedHeader && IncludedHeader->kind() == Header::Standard)
+        Out->StdIWYUExportBy[IncludedHeader->standard()].push_back(Top.Path);
       // main-file #include with export pragma should never be removed.
       if (Top.SeenAtFile == SM.getMainFileID() && IncludedFile)
         Out->ShouldKeep.insert(IncludedFile->getUniqueID());
diff --git a/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp b/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp
index 36850731d514..0f2ded5f1834 100644
--- a/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp
+++ b/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp
@@ -452,6 +452,8 @@ TEST_F(PragmaIncludeTest, IWYUExportForStandardHeaders) {
   auto &FM = Processed.fileManager();
   EXPECT_THAT(PI.getExporters(*tooling::stdlib::Header::named("<string>"), FM),
               testing::UnorderedElementsAre(FileNamed("export.h")));
+  EXPECT_THAT(PI.getExporters(llvm::cantFail(FM.getFileRef("string")), FM),
+              testing::UnorderedElementsAre(FileNamed("export.h")));
 }
 
 TEST_F(PragmaIncludeTest, IWYUExportBlock) {
@@ -556,5 +558,35 @@ TEST_F(PragmaIncludeTest, ExportInUnnamedBuffer) {
       PI.getExporters(llvm::cantFail(FM->getFileRef("foo.h")), *FM),
       testing::ElementsAre(llvm::cantFail(FM->getFileRef("exporter.h"))));
 }
+
+TEST_F(PragmaIncludeTest, OutlivesFMAndSM) {
+  Inputs.Code = R"cpp(
+    #include "public.h"
+  )cpp";
+  Inputs.ExtraFiles["public.h"] = R"cpp(
+    #include "private.h"
+    #include "private2.h" // IWYU pragma: export
+  )cpp";
+  Inputs.ExtraFiles["private.h"] = R"cpp(
+    // IWYU pragma: private, include "public.h"
+  )cpp";
+  Inputs.ExtraFiles["private2.h"] = R"cpp(
+    // IWYU pragma: private
+  )cpp";
+  build(); // Fills up PI, file/source manager used is destroyed afterwards.
+  Inputs.MakeAction = nullptr; // Don't populate PI anymore.
+
+  // Now this build gives us a new File&Source Manager.
+  TestAST Processed = build();
+  auto &FM = Processed.fileManager();
+  auto PrivateFE = FM.getFile("private.h");
+  assert(PrivateFE);
+  EXPECT_EQ(PI.getPublic(PrivateFE.get()), "\"public.h\"");
+
+  auto Private2FE = FM.getFile("private2.h");
+  assert(Private2FE);
+  EXPECT_THAT(PI.getExporters(Private2FE.get(), FM),
+              testing::ElementsAre(llvm::cantFail(FM.getFileRef("public.h"))));
+}
 } // namespace
 } // namespace clang::include_cleaner
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index ed1a978b5382..739831ddfb3e 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -219,6 +219,8 @@ Non-comprehensive list of changes in this release
   determined at runtime.
 * The ``__datasizeof`` keyword has been added. It is similar to ``sizeof``
   except that it returns the size of a type ignoring tail padding.
+* ``__builtin_classify_type()`` now classifies ``_BitInt`` values as the return value ``18``,
+  to match GCC 14's behavior.
 
 New Compiler Flags
 ------------------
@@ -760,6 +762,12 @@ Arm and AArch64 Support
 
 - New AArch64 asm constraints have been added for r8-r11(Uci) and r12-r15(Ucj).
 
+  Support has been added for the following processors (-mcpu identifiers in parenthesis):
+
+  * Arm Cortex-A520 (cortex-a520).
+  * Arm Cortex-A720 (cortex-a720).
+  * Arm Cortex-X4 (cortex-x4).
+
 Android Support
 ^^^^^^^^^^^^^^^
 
diff --git a/clang/include/clang/Sema/Overload.h b/clang/include/clang/Sema/Overload.h
index a97968dc7b20..333309a3686a 100644
--- a/clang/include/clang/Sema/Overload.h
+++ b/clang/include/clang/Sema/Overload.h
@@ -192,6 +192,9 @@ class Sema;
     /// C-only conversion between pointers with incompatible types
     ICK_Incompatible_Pointer_Conversion,
 
+    /// Fixed point type conversions according to N1169.
+    ICK_Fixed_Point_Conversion,
+
     /// The number of conversion kinds
     ICK_Num_Conversion_Kinds,
   };
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 373972eb6cab..4fb444e3b9f7 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11520,6 +11520,9 @@ enum class GCCTypeClass {
   // decay to pointer. (Prior to version 6 it was only used in C++ mode).
   // GCC reserves 15 for strings, but actually uses 5 (pointer) for string
   // literals.
+  // Lang = 16,
+  // OpaqueType = 17,
+  BitInt = 18
 };
 
 /// EvaluateBuiltinClassifyType - Evaluate __builtin_classify_type the same way
@@ -11652,11 +11655,13 @@ EvaluateBuiltinClassifyType(QualType T, const LangOptions &LangOpts) {
   case Type::ObjCInterface:
   case Type::ObjCObjectPointer:
   case Type::Pipe:
-  case Type::BitInt:
     // GCC classifies vectors as None. We follow its lead and classify all
     // other types that don't fit into the regular classification the same way.
     return GCCTypeClass::None;
 
+  case Type::BitInt:
+    return GCCTypeClass::BitInt;
+
   case Type::LValueReference:
   case Type::RValueReference:
     llvm_unreachable("invalid type for expression");
diff --git a/clang/lib/AST/Interp/IntegralAP.h b/clang/lib/AST/Interp/IntegralAP.h
index bd665959cf3d..9019f32e6cef 100644
--- a/clang/lib/AST/Interp/IntegralAP.h
+++ b/clang/lib/AST/Interp/IntegralAP.h
@@ -102,7 +102,12 @@ public:
 
   template <bool InputSigned>
   static IntegralAP from(IntegralAP<InputSigned> V, unsigned NumBits = 0) {
-    return IntegralAP<Signed>(V.V);
+    if (NumBits == 0)
+      NumBits = V.bitWidth();
+
+    if constexpr (InputSigned)
+      return IntegralAP<Signed>(V.V.sextOrTrunc(NumBits));
+    return IntegralAP<Signed>(V.V.zextOrTrunc(NumBits));
   }
 
   template <unsigned Bits, bool InputSigned>
diff --git a/clang/lib/AST/Interp/InterpBuiltin.cpp b/clang/lib/AST/Interp/InterpBuiltin.cpp
index 8c5efe2df909..bb3e13599b01 100644
--- a/clang/lib/AST/Interp/InterpBuiltin.cpp
+++ b/clang/lib/AST/Interp/InterpBuiltin.cpp
@@ -59,13 +59,54 @@ static void pushInt(InterpState &S, int32_t Val) {
     llvm_unreachable("Int isn't 16 or 32 bit?");
 }
 
-static bool retInt(InterpState &S, CodePtr OpPC, APValue &Result) {
-  PrimType IntType = getIntPrimType(S);
-  if (IntType == PT_Sint32)
-    return Ret<PT_Sint32>(S, OpPC, Result);
-  else if (IntType == PT_Sint16)
-    return Ret<PT_Sint16>(S, OpPC, Result);
-  llvm_unreachable("Int isn't 16 or 32 bit?");
+static void pushAPSInt(InterpState &S, const APSInt &Val) {
+  bool Signed = Val.isSigned();
+
+  if (Signed) {
+    switch (Val.getBitWidth()) {
+    case 64:
+      S.Stk.push<Integral<64, true>>(
+          Integral<64, true>::from(Val.getSExtValue()));
+      break;
+    case 32:
+      S.Stk.push<Integral<32, true>>(
+          Integral<32, true>::from(Val.getSExtValue()));
+      break;
+    case 16:
+      S.Stk.push<Integral<16, true>>(
+          Integral<16, true>::from(Val.getSExtValue()));
+      break;
+    case 8:
+      S.Stk.push<Integral<8, true>>(
+          Integral<8, true>::from(Val.getSExtValue()));
+      break;
+    default:
+      llvm_unreachable("Invalid integer bitwidth");
+    }
+    return;
+  }
+
+  // Unsigned.
+  switch (Val.getBitWidth()) {
+  case 64:
+    S.Stk.push<Integral<64, false>>(
+        Integral<64, false>::from(Val.getZExtValue()));
+    break;
+  case 32:
+    S.Stk.push<Integral<32, false>>(
+        Integral<32, false>::from(Val.getZExtValue()));
+    break;
+  case 16:
+    S.Stk.push<Integral<16, false>>(
+        Integral<16, false>::from(Val.getZExtValue()));
+    break;
+  case 8:
+    S.Stk.push<Integral<8, false>>(
+        Integral<8, false>::from(Val.getZExtValue()));
+    break;
+  default:
+    llvm_unreachable("Invalid integer bitwidth");
+  }
 }
 
 static void pushSizeT(InterpState &S, uint64_t Val) {
@@ -87,20 +128,29 @@ static void pushSizeT(InterpState &S, uint64_t Val) {
   }
 }
 
-static bool retSizeT(InterpState &S, CodePtr OpPC, APValue &Result) {
-  const TargetInfo &TI = S.getCtx().getTargetInfo();
-  unsigned SizeTWidth = TI.getTypeWidth(TI.getSizeType());
-
-  switch (SizeTWidth) {
-  case 64:
-    return Ret<PT_Uint64>(S, OpPC, Result);
-  case 32:
-    return Ret<PT_Uint32>(S, OpPC, Result);
-  case 16:
-    return Ret<PT_Uint16>(S, OpPC, Result);
+static bool retPrimValue(InterpState &S, CodePtr OpPC, APValue &Result,
+                         std::optional<PrimType> &T) {
+  if (!T)
+    return RetVoid(S, OpPC, Result);
+
+#define RET_CASE(X)                                                            \
+  case X:                                                                      \
+    return Ret<X>(S, OpPC, Result);
+  switch (*T) {
+    RET_CASE(PT_Float);
+    RET_CASE(PT_Bool);
+    RET_CASE(PT_Sint8);
+    RET_CASE(PT_Uint8);
+    RET_CASE(PT_Sint16);
+    RET_CASE(PT_Uint16);
+    RET_CASE(PT_Sint32);
+    RET_CASE(PT_Uint32);
+    RET_CASE(PT_Sint64);
+    RET_CASE(PT_Uint64);
+  default:
+    llvm_unreachable("Unsupported return type for builtin function");
   }
-
-  llvm_unreachable("size_t isn't 64 or 32 bit?");
+#undef RET_CASE
 }
 
 static bool interp__builtin_strcmp(InterpState &S, CodePtr OpPC,
@@ -457,40 +507,55 @@ static bool interp__builtin_clrsb(InterpState &S, CodePtr OpPC,
   return true;
 }
 
+static bool interp__builtin_bitreverse(InterpState &S, CodePtr OpPC,
+                                       const InterpFrame *Frame,
+                                       const Function *Func,
+                                       const CallExpr *Call) {
+  PrimType ArgT = *S.getContext().classify(Call->getArg(0)->getType());
+  APSInt Val = peekToAPSInt(S.Stk, ArgT);
+  pushAPSInt(S, APSInt(Val.reverseBits(), /*IsUnsigned=*/true));
+  return true;
+}
+
 bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
                       const CallExpr *Call) {
   InterpFrame *Frame = S.Current;
   APValue Dummy;
 
+  QualType ReturnType = Call->getCallReturnType(S.getCtx());
+  std::optional<PrimType> ReturnT = S.getContext().classify(ReturnType);
+  // If classify failed, we assume void.
+  assert(ReturnT || ReturnType->isVoidType());
+
   switch (F->getBuiltinID()) {
   case Builtin::BI__builtin_is_constant_evaluated:
     S.Stk.push<Boolean>(Boolean::from(S.inConstantContext()));
-    return Ret<PT_Bool>(S, OpPC, Dummy);
+    break;
   case Builtin::BI__builtin_assume:
-    return RetVoid(S, OpPC, Dummy);
+    break;
   case Builtin::BI__builtin_strcmp:
-    if (interp__builtin_strcmp(S, OpPC, Frame))
-      return retInt(S, OpPC, Dummy);
+    if (!interp__builtin_strcmp(S, OpPC, Frame))
+      return false;
     break;
   case Builtin::BI__builtin_strlen:
-    if (interp__builtin_strlen(S, OpPC, Frame))
-      return retSizeT(S, OpPC, Dummy);
+    if (!interp__builtin_strlen(S, OpPC, Frame))
+      return false;
     break;
   case Builtin::BI__builtin_nan:
   case Builtin::BI__builtin_nanf:
   case Builtin::BI__builtin_nanl:
   case Builtin::BI__builtin_nanf16:
   case Builtin::BI__builtin_nanf128:
-    if (interp__builtin_nan(S, OpPC, Frame, F, /*Signaling=*/false))
-      return Ret<PT_Float>(S, OpPC, Dummy);
+    if (!interp__builtin_nan(S, OpPC, Frame, F, /*Signaling=*/false))
+      return false;
     break;
   case Builtin::BI__builtin_nans:
   case Builtin::BI__builtin_nansf:
   case Builtin::BI__builtin_nansl:
   case Builtin::BI__builtin_nansf16:
   case Builtin::BI__builtin_nansf128:
-    if (interp__builtin_nan(S, OpPC, Frame, F, /*Signaling=*/true))
-      return Ret<PT_Float>(S, OpPC, Dummy);
+    if (!interp__builtin_nan(S, OpPC, Frame, F, /*Signaling=*/true))
+      return false;
     break;
 
   case Builtin::BI__builtin_huge_val:
@@ -503,15 +568,15 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
   case Builtin::BI__builtin_infl:
   case Builtin::BI__builtin_inff16:
   case Builtin::BI__builtin_inff128:
-    if (interp__builtin_inf(S, OpPC, Frame, F))
-      return Ret<PT_Float>(S, OpPC, Dummy);
+    if (!interp__builtin_inf(S, OpPC, Frame, F))
+      return false;
     break;
   case Builtin::BI__builtin_copysign:
   case Builtin::BI__builtin_copysignf:
   case Builtin::BI__builtin_copysignl:
   case Builtin::BI__builtin_copysignf128:
-    if (interp__builtin_copysign(S, OpPC, Frame, F))
-      return Ret<PT_Float>(S, OpPC, Dummy);
+    if (!interp__builtin_copysign(S, OpPC, Frame, F))
+      return false;
     break;
 
   case Builtin::BI__builtin_fmin:
@@ -519,8 +584,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
   case Builtin::BI__builtin_fminl:
   case Builtin::BI__builtin_fminf16:
   case Builtin::BI__builtin_fminf128:
-    if (interp__builtin_fmin(S, OpPC, Frame, F))
-      return Ret<PT_Float>(S, OpPC, Dummy);
+    if (!interp__builtin_fmin(S, OpPC, Frame, F))
+      return false;
     break;
 
   case Builtin::BI__builtin_fmax:
@@ -528,60 +593,60 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
   case Builtin::BI__builtin_fmaxl:
   case Builtin::BI__builtin_fmaxf16:
   case Builtin::BI__builtin_fmaxf128:
-    if (interp__builtin_fmax(S, OpPC, Frame, F))
-      return Ret<PT_Float>(S, OpPC, Dummy);
+    if (!interp__builtin_fmax(S, OpPC, Frame, F))
+      return false;
     break;
 
   case Builtin::BI__builtin_isnan:
-    if (interp__builtin_isnan(S, OpPC, Frame, F))
-      return retInt(S, OpPC, Dummy);
+    if (!interp__builtin_isnan(S, OpPC, Frame, F))
+      return false;
     break;
   case Builtin::BI__builtin_issignaling:
-    if (interp__builtin_issignaling(S, OpPC, Frame, F))
-      return retInt(S, OpPC, Dummy);
+    if (!interp__builtin_issignaling(S, OpPC, Frame, F))
+      return false;
     break;
 
   case Builtin::BI__builtin_isinf:
-    if (interp__builtin_isinf(S, OpPC, Frame, F, /*Sign=*/false))
-      return retInt(S, OpPC, Dummy);
+    if (!interp__builtin_isinf(S, OpPC, Frame, F, /*Sign=*/false))
+      return false;
     break;
 
   case Builtin::BI__builtin_isinf_sign:
-    if (interp__builtin_isinf(S, OpPC, Frame, F, /*Sign=*/true))
-      return retInt(S, OpPC, Dummy);
+    if (!interp__builtin_isinf(S, OpPC, Frame, F, /*Sign=*/true))
+      return false;
     break;
 
   case Builtin::BI__builtin_isfinite:
-    if (interp__builtin_isfinite(S, OpPC, Frame, F))
-      return retInt(S, OpPC, Dummy);
+    if (!interp__builtin_isfinite(S, OpPC, Frame, F))
+      return false;
     break;
   case Builtin::BI__builtin_isnormal:
-    if (interp__builtin_isnormal(S, OpPC, Frame, F))
-      return retInt(S, OpPC, Dummy);
+    if (!interp__builtin_isnormal(S, OpPC, Frame, F))
+      return false;
     break;
   case Builtin::BI__builtin_issubnormal:
-    if (interp__builtin_issubnormal(S, OpPC, Frame, F))
-      return retInt(S, OpPC, Dummy);
+    if (!interp__builtin_issubnormal(S, OpPC, Frame, F))
+      return false;
     break;
   case Builtin::BI__builtin_iszero:
-    if (interp__builtin_iszero(S, OpPC, Frame, F))
-      return retInt(S, OpPC, Dummy);
+    if (!interp__builtin_iszero(S, OpPC, Frame, F))
+      return false;
     break;
   case Builtin::BI__builtin_isfpclass:
-    if (interp__builtin_isfpclass(S, OpPC, Frame, F, Call))
-      return retInt(S, OpPC, Dummy);
+    if (!interp__builtin_isfpclass(S, OpPC, Frame, F, Call))
+      return false;
     break;
   case Builtin::BI__builtin_fpclassify:
-    if (interp__builtin_fpclassify(S, OpPC, Frame, F))
-      return retInt(S, OpPC, Dummy);
+    if (!interp__builtin_fpclassify(S, OpPC, Frame, F))
+      return false;
     break;
 
   case Builtin::BI__builtin_fabs:
   case Builtin::BI__builtin_fabsf:
   case Builtin::BI__builtin_fabsl:
   case Builtin::BI__builtin_fabsf128:
-    if (interp__builtin_fabs(S, OpPC, Frame, F))
-      return Ret<PT_Float>(S, OpPC, Dummy);
+    if (!interp__builtin_fabs(S, OpPC, Frame, F))
+      return false;
     break;
 
   case Builtin::BI__builtin_popcount:
@@ -590,29 +655,37 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
   case Builtin::BI__popcnt16: // Microsoft variants of popcount
   case Builtin::BI__popcnt:
   case Builtin::BI__popcnt64:
-    if (interp__builtin_popcount(S, OpPC, Frame, F, Call))
-      return retInt(S, OpPC, Dummy);
+    if (!interp__builtin_popcount(S, OpPC, Frame, F, Call))
+      return false;
     break;
 
   case Builtin::BI__builtin_parity:
   case Builtin::BI__builtin_parityl:
   case Builtin::BI__builtin_parityll:
-    if (interp__builtin_parity(S, OpPC, Frame, F, Call))
-      return retInt(S, OpPC, Dummy);
+    if (!interp__builtin_parity(S, OpPC, Frame, F, Call))
+      return false;
     break;
 
   case Builtin::BI__builtin_clrsb:
   case Builtin::BI__builtin_clrsbl:
   case Builtin::BI__builtin_clrsbll:
-    if (interp__builtin_clrsb(S, OpPC, Frame, F, Call))
-      return retInt(S, OpPC, Dummy);
+    if (!interp__builtin_clrsb(S, OpPC, Frame, F, Call))
+      return false;
+    break;
+
+  case Builtin::BI__builtin_bitreverse8:
+  case Builtin::BI__builtin_bitreverse16:
+  case Builtin::BI__builtin_bitreverse32:
+  case Builtin::BI__builtin_bitreverse64:
+    if (!interp__builtin_bitreverse(S, OpPC, Frame, F, Call))
+      return false;
     break;
 
   default:
     return false;
   }
 
-  return false;
+  return retPrimValue(S, OpPC, Dummy, ReturnT);
 }
 
 bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, const OffsetOfExpr *E,
diff --git a/clang/lib/Driver/ToolChains/DragonFly.cpp b/clang/lib/Driver/ToolChains/DragonFly.cpp
index b13449bf778f..9942fc632e0a 100644
--- a/clang/lib/Driver/ToolChains/DragonFly.cpp
+++ b/clang/lib/Driver/ToolChains/DragonFly.cpp
@@ -57,11 +57,11 @@ void dragonfly::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   const auto &ToolChain = static_cast<const DragonFly &>(getToolChain());
   const Driver &D = ToolChain.getDriver();
   const llvm::Triple::ArchType Arch = ToolChain.getArch();
+  const bool Static = Args.hasArg(options::OPT_static);
+  const bool Shared = Args.hasArg(options::OPT_shared);
+  const bool Profiling = Args.hasArg(options::OPT_pg);
+  const bool Pie = Args.hasArg(options::OPT_pie);
   ArgStringList CmdArgs;
-  bool Static = Args.hasArg(options::OPT_static);
-  bool Shared = Args.hasArg(options::OPT_shared);
-  bool Profiling = Args.hasArg(options::OPT_pg);
-  bool Pie = Args.hasArg(options::OPT_pie);
 
   if (!D.SysRoot.empty())
     CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot));
diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp
index 76986481686a..ba95ce9c5a28 100644
--- a/clang/lib/Driver/ToolChains/Gnu.cpp
+++ b/clang/lib/Driver/ToolChains/Gnu.cpp
@@ -292,18 +292,6 @@ static const char *getLDMOption(const llvm::Triple &T, const ArgList &Args) {
   }
 }
 
-static bool getPIE(const ArgList &Args, const ToolChain &TC) {
-  if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_static) ||
-      Args.hasArg(options::OPT_r) || Args.hasArg(options::OPT_static_pie))
-    return false;
-
-  Arg *A = Args.getLastArg(options::OPT_pie, options::OPT_no_pie,
-                           options::OPT_nopie);
-  if (!A)
-    return TC.isPIEDefault(Args);
-  return A->getOption().matches(options::OPT_pie);
-}
-
 static bool getStaticPIE(const ArgList &Args, const ToolChain &TC) {
   bool HasStaticPIE = Args.hasArg(options::OPT_static_pie);
   // -no-pie is an alias for -nopie. So, handling -nopie takes care of
@@ -386,7 +374,6 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   const bool isAndroid = ToolChain.getTriple().isAndroid();
   const bool IsIAMCU = ToolChain.getTriple().isOSIAMCU();
   const bool IsVE = ToolChain.getTriple().isVE();
-  const bool IsPIE = getPIE(Args, ToolChain);
   const bool IsStaticPIE = getStaticPIE(Args, ToolChain);
   const bool IsStatic = getStatic(Args);
   const bool HasCRTBeginEndFiles =
@@ -406,17 +393,6 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   if (!D.SysRoot.empty())
     CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot));
 
-  if (IsPIE)
-    CmdArgs.push_back("-pie");
-
-  if (IsStaticPIE) {
-    CmdArgs.push_back("-static");
-    CmdArgs.push_back("-pie");
-    CmdArgs.push_back("--no-dynamic-linker");
-    CmdArgs.push_back("-z");
-    CmdArgs.push_back("text");
-  }
-
   if (Args.hasArg(options::OPT_s))
     CmdArgs.push_back("-s");
 
@@ -451,19 +427,32 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   if (Triple.isRISCV())
     CmdArgs.push_back("-X");
 
-  if (Args.hasArg(options::OPT_shared))
+  const bool IsShared = Args.hasArg(options::OPT_shared);
+  if (IsShared)
     CmdArgs.push_back("-shared");
-
-  if (IsStatic) {
+  bool IsPIE = false;
+  if (IsStaticPIE) {
+    CmdArgs.push_back("-static");
+    CmdArgs.push_back("-pie");
+    CmdArgs.push_back("--no-dynamic-linker");
+    CmdArgs.push_back("-z");
+    CmdArgs.push_back("text");
+  } else if (IsStatic) {
     CmdArgs.push_back("-static");
-  } else if (!Args.hasArg(options::OPT_r) &&
-             !Args.hasArg(options::OPT_shared) && !IsStaticPIE) {
+  } else if (!Args.hasArg(options::OPT_r)) {
     if (Args.hasArg(options::OPT_rdynamic))
       CmdArgs.push_back("-export-dynamic");
-
-    CmdArgs.push_back("-dynamic-linker");
-    CmdArgs.push_back(Args.MakeArgString(Twine(D.DyldPrefix) +
-                                         ToolChain.getDynamicLinker(Args)));
+    if (!IsShared) {
+      Arg *A = Args.getLastArg(options::OPT_pie, options::OPT_no_pie,
+                               options::OPT_nopie);
+      IsPIE = A ? A->getOption().matches(options::OPT_pie)
+                : ToolChain.isPIEDefault(Args);
+      if (IsPIE)
+        CmdArgs.push_back("-pie");
+      CmdArgs.push_back("-dynamic-linker");
+      CmdArgs.push_back(Args.MakeArgString(Twine(D.DyldPrefix) +
+                                           ToolChain.getDynamicLinker(Args)));
+    }
   }
 
   CmdArgs.push_back("-o");
diff --git a/clang/lib/Driver/ToolChains/OpenBSD.cpp b/clang/lib/Driver/ToolChains/OpenBSD.cpp
index 5d06cd8ab0ba..c8f02161d831 100644
--- a/clang/lib/Driver/ToolChains/OpenBSD.cpp
+++ b/clang/lib/Driver/ToolChains/OpenBSD.cpp
@@ -33,9 +33,9 @@ void openbsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
   const auto &ToolChain = static_cast<const OpenBSD &>(getToolChain());
   const Driver &D = ToolChain.getDriver();
   const llvm::Triple &Triple = ToolChain.getTriple();
+  ArgStringList CmdArgs;
 
   claimNoWarnArgs(Args);
-  ArgStringList CmdArgs;
 
   switch (ToolChain.getArch()) {
   case llvm::Triple::x86:
@@ -112,13 +112,13 @@ void openbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   const auto &ToolChain = static_cast<const OpenBSD &>(getToolChain());
   const Driver &D = ToolChain.getDriver();
   const llvm::Triple::ArchType Arch = ToolChain.getArch();
-  ArgStringList CmdArgs;
-  bool Static = Args.hasArg(options::OPT_static);
-  bool Shared = Args.hasArg(options::OPT_shared);
-  bool Profiling = Args.hasArg(options::OPT_pg);
-  bool Pie = Args.hasArg(options::OPT_pie);
-  bool Nopie = Args.hasArg(options::OPT_nopie);
+  const bool Static = Args.hasArg(options::OPT_static);
+  const bool Shared = Args.hasArg(options::OPT_shared);
+  const bool Profiling = Args.hasArg(options::OPT_pg);
+  const bool Pie = Args.hasArg(options::OPT_pie);
+  const bool Nopie = Args.hasArg(options::OPT_nopie);
   const bool Relocatable = Args.hasArg(options::OPT_r);
+  ArgStringList CmdArgs;
 
   // Silence warning for "clang -g foo.o -o foo"
   Args.ClaimAllArgs(options::OPT_g_Group);
diff --git a/clang/lib/Format/QualifierAlignmentFixer.cpp b/clang/lib/Format/QualifierAlignmentFixer.cpp
index e2fab1c1e3c2..84941746f0df 100644
--- a/clang/lib/Format/QualifierAlignmentFixer.cpp
+++ b/clang/lib/Format/QualifierAlignmentFixer.cpp
@@ -535,14 +535,21 @@ LeftRightQualifierAlignmentFixer::analyze(
     SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
     FormatTokenLexer &Tokens) {
   tooling::Replacements Fixes;
-  const AdditionalKeywords &Keywords = Tokens.getKeywords();
-  const SourceManager &SourceMgr = Env.getSourceManager();
   AffectedRangeMgr.computeAffectedLines(AnnotatedLines);
+  fixQualifierAlignment(AnnotatedLines, Tokens, Fixes);
+  return {Fixes, 0};
+}
 
+void LeftRightQualifierAlignmentFixer::fixQualifierAlignment(
+    SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, FormatTokenLexer &Tokens,
+    tooling::Replacements &Fixes) {
+  const AdditionalKeywords &Keywords = Tokens.getKeywords();
+  const SourceManager &SourceMgr = Env.getSourceManager();
   tok::TokenKind QualifierToken = getTokenFromQualifier(Qualifier);
   assert(QualifierToken != tok::identifier && "Unrecognised Qualifier");
 
   for (AnnotatedLine *Line : AnnotatedLines) {
+    fixQualifierAlignment(Line->Children, Tokens, Fixes);
     if (!Line->Affected || Line->InPPDirective)
       continue;
     FormatToken *First = Line->First;
@@ -565,7 +572,6 @@ LeftRightQualifierAlignmentFixer::analyze(
       }
     }
   }
-  return {Fixes, 0};
 }
 
 void prepareLeftRightOrderingForQualifierAlignmentFixer(
diff --git a/clang/lib/Format/QualifierAlignmentFixer.h b/clang/lib/Format/QualifierAlignmentFixer.h
index a72d135179f1..e922d8005595 100644
--- a/clang/lib/Format/QualifierAlignmentFixer.h
+++ b/clang/lib/Format/QualifierAlignmentFixer.h
@@ -52,6 +52,10 @@ public:
 
   static tok::TokenKind getTokenFromQualifier(const std::string &Qualifier);
 
+  void fixQualifierAlignment(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+                             FormatTokenLexer &Tokens,
+                             tooling::Replacements &Fixes);
+
   const FormatToken *analyzeRight(const SourceManager &SourceMgr,
                                   const AdditionalKeywords &Keywords,
                                   tooling::Replacements &Fixes,
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index a6cd0bb9ea2a..da8e2db36aaa 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -148,6 +148,9 @@ bool Sema::isSimpleTypeSpecifier(tok::TokenKind Kind) const {
   case tok::kw___ibm128:
   case tok::kw_wchar_t:
   case tok::kw_bool:
+  case tok::kw__Accum:
+  case tok::kw__Fract:
+  case tok::kw__Sat:
 #define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) case tok::kw___##Trait:
 #include "clang/Basic/TransformTypeTraits.def"
   case tok::kw___auto_type:
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index 023411c7edc9..081b568762ae 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -4506,6 +4506,36 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType,
                  .get();
     break;
 
+  case ICK_Fixed_Point_Conversion:
+    assert((FromType->isFixedPointType() || ToType->isFixedPointType()) &&
+           "Attempting implicit fixed point conversion without a fixed "
+           "point operand");
+    if (FromType->isFloatingType())
+      From = ImpCastExprToType(From, ToType, CK_FloatingToFixedPoint,
+                               VK_PRValue,
+                               /*BasePath=*/nullptr, CCK).get();
+    else if (ToType->isFloatingType())
+      From = ImpCastExprToType(From, ToType, CK_FixedPointToFloating,
+                               VK_PRValue,
+                               /*BasePath=*/nullptr, CCK).get();
+    else if (FromType->isIntegralType(Context))
+      From = ImpCastExprToType(From, ToType, CK_IntegralToFixedPoint,
+                               VK_PRValue,
+                               /*BasePath=*/nullptr, CCK).get();
+    else if (ToType->isIntegralType(Context))
+      From = ImpCastExprToType(From, ToType, CK_FixedPointToIntegral,
+                               VK_PRValue,
+                               /*BasePath=*/nullptr, CCK).get();
+    else if (ToType->isBooleanType())
+      From = ImpCastExprToType(From, ToType, CK_FixedPointToBoolean,
+                               VK_PRValue,
+                               /*BasePath=*/nullptr, CCK).get();
+    else
+      From = ImpCastExprToType(From, ToType, CK_FixedPointCast,
+                               VK_PRValue,
+                               /*BasePath=*/nullptr, CCK).get();
+    break;
+
   case ICK_Compatible_Conversion:
     From = ImpCastExprToType(From, ToType, CK_NoOp, From->getValueKind(),
                              /*BasePath=*/nullptr, CCK).get();
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 858654e35cbb..9800d7f1c9cf 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -158,7 +158,8 @@ ImplicitConversionRank clang::GetConversionRank(ImplicitConversionKind Kind) {
                      // it was omitted by the patch that added
                      // ICK_Zero_Queue_Conversion
     ICR_C_Conversion,
-    ICR_C_Conversion_Extension
+    ICR_C_Conversion_Extension,
+    ICR_Conversion,
   };
   static_assert(std::size(Rank) == (int)ICK_Num_Conversion_Kinds);
   return Rank[(int)Kind];
@@ -197,7 +198,8 @@ static const char* GetImplicitConversionName(ImplicitConversionKind Kind) {
     "OpenCL Zero Event Conversion",
     "OpenCL Zero Queue Conversion",
     "C specific type conversion",
-    "Incompatible pointer conversion"
+    "Incompatible pointer conversion",
+    "Fixed point conversion",
   };
   static_assert(std::size(Name) == (int)ICK_Num_Conversion_Kinds);
   return Name[Kind];
@@ -2189,6 +2191,9 @@ static bool IsStandardConversion(Sema &S, Expr* From, QualType ToType,
              From->isIntegerConstantExpr(S.getASTContext())) {
     SCS.Second = ICK_Compatible_Conversion;
     FromType = ToType;
+  } else if (ToType->isFixedPointType() || FromType->isFixedPointType()) {
+    SCS.Second = ICK_Fixed_Point_Conversion;
+    FromType = ToType;
   } else {
     // No second conversion required.
     SCS.Second = ICK_Identity;
@@ -5947,6 +5952,7 @@ static bool CheckConvertedConstantConversions(Sema &S,
   case ICK_Zero_Event_Conversion:
   case ICK_C_Only_Conversion:
   case ICK_Incompatible_Pointer_Conversion:
+  case ICK_Fixed_Point_Conversion:
     return false;
 
   case ICK_Lvalue_To_Rvalue:
diff --git a/clang/test/AST/Interp/builtin-functions.cpp b/clang/test/AST/Interp/builtin-functions.cpp
index acc2dc315a27..0726dab37cb4 100644
--- a/clang/test/AST/Interp/builtin-functions.cpp
+++ b/clang/test/AST/Interp/builtin-functions.cpp
@@ -324,3 +324,10 @@ namespace clrsb {
   char clrsb11[__builtin_clrsb(0xf) == BITSIZE(int) - 5 ? 1 : -1];
   char clrsb12[__builtin_clrsb(~0x1f) == BITSIZE(int) - 6 ? 1 : -1];
 }
+
+namespace bitreverse {
+  char bitreverse1[__builtin_bitreverse8(0x01) == 0x80 ? 1 : -1];
+  char bitreverse2[__builtin_bitreverse16(0x3C48) == 0x123C ? 1 : -1];
+  char bitreverse3[__builtin_bitreverse32(0x12345678) == 0x1E6A2C48 ? 1 : -1];
+  char bitreverse4[__builtin_bitreverse64(0x0123456789ABCDEFULL) == 0xF7B3D591E6A2C480 ? 1 : -1];
+}
diff --git a/clang/test/Driver/aarch64-mcpu.c b/clang/test/Driver/aarch64-mcpu.c
index 321d3a739b35..511482a420da 100644
--- a/clang/test/Driver/aarch64-mcpu.c
+++ b/clang/test/Driver/aarch64-mcpu.c
@@ -44,12 +44,16 @@
 // CORTEXX1C: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-x1c"
 // RUN: %clang --target=aarch64 -mcpu=cortex-x3 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEXX3 %s
 // CORTEXX3: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-x3"
+// RUN: %clang --target=aarch64 -mcpu=cortex-x4 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-X4 %s
+// CORTEX-X4: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-x4"
 // RUN: %clang --target=aarch64 -mcpu=cortex-a78  -### -c %s 2>&1 | FileCheck -check-prefix=CORTEXA78 %s
 // CORTEXA78: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a78"
 // RUN: %clang --target=aarch64 -mcpu=cortex-a78c  -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A78C %s
 // CORTEX-A78C: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a78c"
 // RUN: %clang --target=aarch64 -mcpu=cortex-a715  -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A715 %s
 // CORTEX-A715: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a715"
+// RUN: %clang --target=aarch64 -mcpu=cortex-a720  -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A720 %s
+// CORTEX-A720: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a720"
 // RUN: %clang --target=aarch64 -mcpu=neoverse-e1  -### -c %s 2>&1 | FileCheck -check-prefix=NEOVERSE-E1 %s
 // NEOVERSE-E1: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "neoverse-e1"
 // RUN: %clang --target=aarch64 -mcpu=neoverse-v1  -### -c %s 2>&1 | FileCheck -check-prefix=NEOVERSE-V1 %s
@@ -62,6 +66,8 @@
 // NEOVERSE-N2: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "neoverse-n2"
 // RUN: %clang --target=aarch64 -mcpu=neoverse-512tvb -### -c %s 2>&1 | FileCheck -check-prefix=NEOVERSE-512TVB %s
 // NEOVERSE-512TVB: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "neoverse-512tvb"
+// RUN: %clang --target=aarch64 -mcpu=cortex-a520 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A520 %s
+// CORTEX-A520: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a520"
 
 // RUN: %clang --target=aarch64 -mcpu=cortex-r82  -### -c %s 2>&1 | FileCheck -check-prefix=CORTEXR82 %s
 // CORTEXR82: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-r82"
diff --git a/clang/test/Driver/dynamic-linker.c b/clang/test/Driver/dynamic-linker.c
index 555e46aba5f0..978907e0adee 100644
--- a/clang/test/Driver/dynamic-linker.c
+++ b/clang/test/Driver/dynamic-linker.c
@@ -11,7 +11,7 @@
 // RUN: %clang -target x86_64-unknown-linux-gnu -### -shared /dev/null -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-SHARED %s
 
 
-// RUN: %clang -target armv7-unknown-linux-gnueabi -### -shared -rdynamic /dev/null -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-SHARED %s
+// RUN: %clang --target=armv7-unknown-linux-gnueabi -### -Werror -shared -rdynamic /dev/null -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-SHARED %s
 // RUN: %clang -target i386-unknown-linux-gnu -### -shared -rdynamic /dev/null -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-SHARED %s
 // RUN: %clang -target mips64-unknown-linux-gnu -### -shared -rdynamic /dev/null -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-SHARED %s
 // RUN: %clang -target powerpc64-unknown-linux-gnu -### -shared -rdynamic /dev/null -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-SHARED %s
diff --git a/clang/test/Driver/linux-ld.c b/clang/test/Driver/linux-ld.c
index 1e3666539de8..7adb078f755d 100644
--- a/clang/test/Driver/linux-ld.c
+++ b/clang/test/Driver/linux-ld.c
@@ -182,12 +182,8 @@
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-CLANG-LD-STATIC-PIE %s
 // CHECK-CLANG-LD-STATIC-PIE: "{{.*}}ld{{(.exe)?}}" "--sysroot=[[SYSROOT:[^"]+]]"
-// CHECK-CLANG-LD-STATIC-PIE: "-static"
-// CHECK-CLANG-LD-STATIC-PIE: "-pie"
-// CHECK-CLANG-LD-STATIC-PIE: "--no-dynamic-linker"
-// CHECK-CLANG-LD-STATIC-PIE: "-z"
-// CHECK-CLANG-LD-STATIC-PIE: "text"
 // CHECK-CLANG-LD-STATIC-PIE: "-m" "elf_x86_64"
+// CHECK-CLANG-LD-STATIC-PIE-SAME: "-static" "-pie" "--no-dynamic-linker" "-z" "text"
 // CHECK-CLANG-LD-STATIC-PIE: "{{.*}}rcrt1.o"
 // CHECK-CLANG-LD-STATIC-PIE: "--start-group" "-lgcc" "-lgcc_eh" "-lc" "--end-group"
 //
@@ -197,12 +193,8 @@
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-CLANG-LD-STATIC-PIE-PIE %s
 // CHECK-CLANG-LD-STATIC-PIE-PIE: "{{.*}}ld{{(.exe)?}}" "--sysroot=[[SYSROOT:[^"]+]]"
-// CHECK-CLANG-LD-STATIC-PIE-PIE: "-static"
-// CHECK-CLANG-LD-STATIC-PIE-PIE: "-pie"
-// CHECK-CLANG-LD-STATIC-PIE-PIE: "--no-dynamic-linker"
-// CHECK-CLANG-LD-STATIC-PIE-PIE: "-z"
-// CHECK-CLANG-LD-STATIC-PIE-PIE: "text"
 // CHECK-CLANG-LD-STATIC-PIE-PIE: "-m" "elf_x86_64"
+// CHECK-CLANG-LD-STATIC-PIE-PIE-SAME: "-static" "-pie" "--no-dynamic-linker" "-z" "text"
 // CHECK-CLANG-LD-STATIC-PIE-PIE: "{{.*}}rcrt1.o"
 // CHECK-CLANG-LD-STATIC-PIE-PIE: "--start-group" "-lgcc" "-lgcc_eh" "-lc" "--end-group"
 //
@@ -212,12 +204,8 @@
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-CLANG-LD-STATIC-PIE-STATIC %s
 // CHECK-CLANG-LD-STATIC-PIE-STATIC: "{{.*}}ld{{(.exe)?}}" "--sysroot=[[SYSROOT:[^"]+]]"
-// CHECK-CLANG-LD-STATIC-PIE-STATIC: "-static"
-// CHECK-CLANG-LD-STATIC-PIE-STATIC: "-pie"
-// CHECK-CLANG-LD-STATIC-PIE-STATIC: "--no-dynamic-linker"
-// CHECK-CLANG-LD-STATIC-PIE-STATIC: "-z"
-// CHECK-CLANG-LD-STATIC-PIE-STATIC: "text"
 // CHECK-CLANG-LD-STATIC-PIE-STATIC: "-m" "elf_x86_64"
+// CHECK-CLANG-LD-STATIC-PIE-STATIC-SAME: "-static" "-pie" "--no-dynamic-linker" "-z" "text"
 // CHECK-CLANG-LD-STATIC-PIE-STATIC: "{{.*}}rcrt1.o"
 // CHECK-CLANG-LD-STATIC-PIE-STATIC: "--start-group" "-lgcc" "-lgcc_eh" "-lc" "--end-group"
 //
diff --git a/clang/test/Driver/ohos.c b/clang/test/Driver/ohos.c
index 9b98c709815c..dfb7981525fd 100644
--- a/clang/test/Driver/ohos.c
+++ b/clang/test/Driver/ohos.c
@@ -17,9 +17,9 @@
 // CHECK-NOT: "-fno-common"
 // CHECK: {{.*}}ld.lld{{.*}}" "--sysroot=[[SYSROOT]]"
 // CHECK-NOT: "--sysroot=[[SYSROOT]]"
-// CHECK: "-pie"
 // CHECK-NOT: "--build-id"
 // CHECK: "--hash-style=both"
+// CHECK: "-pie"
 // CHECK: "-dynamic-linker" "/lib/ld-musl-arm.so.1"
 // CHECK: Scrt1.o
 // CHECK: crti.o
diff --git a/clang/test/Frontend/fixed_point_conversions.c b/clang/test/Frontend/fixed_point_conversions.c
index ebd1d7e521df..efa3f1b34724 100644
--- a/clang/test/Frontend/fixed_point_conversions.c
+++ b/clang/test/Frontend/fixed_point_conversions.c
@@ -1,6 +1,12 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -ffixed-point -triple x86_64-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,SIGNED
-// RUN: %clang_cc1 -ffixed-point -triple x86_64-unknown-linux-gnu -S -emit-llvm %s -o - -fpadding-on-unsigned-fixed-point | FileCheck %s --check-prefixes=CHECK,UNSIGNED
+// RUN: %clang_cc1 -x c -ffixed-point -triple x86_64-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,SIGNED
+// RUN: %clang_cc1 -x c -ffixed-point -triple x86_64-unknown-linux-gnu -S -emit-llvm %s -o - -fpadding-on-unsigned-fixed-point | FileCheck %s --check-prefixes=CHECK,UNSIGNED
+// RUN: %clang_cc1 -x c++ -ffixed-point -triple x86_64-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,SIGNED
+// RUN: %clang_cc1 -x c++ -ffixed-point -triple x86_64-unknown-linux-gnu -S -emit-llvm %s -o - -fpadding-on-unsigned-fixed-point | FileCheck %s --check-prefixes=CHECK,UNSIGNED
+
+#ifdef __cplusplus
+extern "C" {
+#endif
 
 short _Accum sa;
 _Accum a, a2;
@@ -994,3 +1000,7 @@ void float_sat5(void) {
 void float_sat6(void) {
   sat_uf = fl;
 }
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/clang/test/Misc/cc1as-compress.s b/clang/test/Misc/cc1as-compress.s
index 54563c33bd16..4c81c5ca0f7f 100644
--- a/clang/test/Misc/cc1as-compress.s
+++ b/clang/test/Misc/cc1as-compress.s
@@ -12,4 +12,4 @@
 // ZSTD: 0000 02000000 00000000
 
 .section        .debug_str,"MS",@progbits,1
-.asciz  "perfectly compressable data sample *****************************************"
+.asciz  "perfectly compressable data sample ******************************************"
diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c
index 8e91eb4c62dd..25ff51e071b6 100644
--- a/clang/test/Misc/target-invalid-cpu-note.c
+++ b/clang/test/Misc/target-invalid-cpu-note.c
@@ -5,11 +5,11 @@
 
 // RUN: not %clang_cc1 -triple arm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AARCH64
 // AARCH64: error: unknown target CPU 'not-a-cpu'
-// AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-a715, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, cortex-x3, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, neoverse-v2, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-m1, apple-m2, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, ampere1a, grace{{$}}
+// AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a520, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-a715, cortex-a720, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, cortex-x3, cortex-x4, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, neoverse-v2, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-m1, apple-m2, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, ampere1a, grace{{$}}
 
 // RUN: not %clang_cc1 -triple arm64--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_AARCH64
 // TUNE_AARCH64: error: unknown target CPU 'not-a-cpu'
-// TUNE_AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-a715, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, cortex-x3, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, neoverse-v2, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16,  apple-m1, apple-m2, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, ampere1a, grace{{$}}
+// TUNE_AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a520, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-a715, cortex-a720, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, cortex-x3, cortex-x4, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, neoverse-v2, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16,  apple-m1, apple-m2, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, ampere1a, grace{{$}}
 
 // RUN: not %clang_cc1 -triple i386--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86
 // X86: error: unknown target CPU 'not-a-cpu'
diff --git a/clang/test/Sema/builtin-classify-type.c b/clang/test/Sema/builtin-classify-type.c
index a222ac8af0e3..9a4de34e823f 100644
--- a/clang/test/Sema/builtin-classify-type.c
+++ b/clang/test/Sema/builtin-classify-type.c
@@ -11,7 +11,8 @@ enum gcc_type_class {
   function_type_class, method_type_class,
   record_type_class, union_type_class,
   array_type_class, string_type_class,
-  lang_type_class
+  lang_type_class, opaque_type_class,
+  bitint_type_class
 };
 
 void foo(void) {
@@ -45,6 +46,7 @@ void foo(void) {
   vint32_t3 vt5;
   typedef _BitInt(64) vint64_t3 __attribute__((vector_size(16)));
   vint64_t3 vt6;
+  _BitInt(16) bitint;
 
   _Atomic int atomic_i;
   _Atomic double atomic_d;
@@ -70,6 +72,7 @@ void foo(void) {
   int a17[__builtin_classify_type(atomic_d) == real_type_class ? 1 : -1];
   int a18[__builtin_classify_type(complex_i) == complex_type_class ? 1 : -1];
   int a19[__builtin_classify_type(complex_d) == complex_type_class ? 1 : -1];
+  int a20[__builtin_classify_type(bitint) == bitint_type_class ? 1 : -1];
 }
 
 extern int (^p)(void);
diff --git a/clang/test/SemaCXX/builtin-classify-type.cpp b/clang/test/SemaCXX/builtin-classify-type.cpp
index ebc81425e401..ed5430960001 100644
--- a/clang/test/SemaCXX/builtin-classify-type.cpp
+++ b/clang/test/SemaCXX/builtin-classify-type.cpp
@@ -11,7 +11,8 @@ enum gcc_type_class {
   function_type_class, method_type_class,
   record_type_class, union_type_class,
   array_type_class, string_type_class,
-  lang_type_class
+  lang_type_class, opaque_type_class,
+  bitint_type_class
 };
 
 class cl {
@@ -42,6 +43,7 @@ void foo() {
   _Atomic double atomic_d;
   _Complex int complex_i;
   _Complex double complex_d;
+  _BitInt(32) bitint;
 
   int a1[__builtin_classify_type(f()) == void_type_class ? 1 : -1];
   int a2[__builtin_classify_type(i) == integer_type_class ? 1 : -1];
@@ -65,5 +67,6 @@ void foo() {
   int a20[__builtin_classify_type(atomic_d) == real_type_class ? 1 : -1];
   int a21[__builtin_classify_type(complex_i) == complex_type_class ? 1 : -1];
   int a22[__builtin_classify_type(complex_d) == complex_type_class ? 1 : -1];
+  int a23[__builtin_classify_type(bitint) == bitint_type_class ? 1 : -1];
 }
 
diff --git a/clang/unittests/Format/QualifierFixerTest.cpp b/clang/unittests/Format/QualifierFixerTest.cpp
index a56323a88f4a..324366ca7f5e 100644
--- a/clang/unittests/Format/QualifierFixerTest.cpp
+++ b/clang/unittests/Format/QualifierFixerTest.cpp
@@ -357,6 +357,9 @@ TEST_F(QualifierFixerTest, RightQualifier) {
   verifyFormat("void f(std::integral auto const &x);",
                "void f(const std::integral auto &x);", Style);
 
+  verifyFormat("auto lambda = [] { int const i = 0; };",
+               "auto lambda = [] { const int i = 0; };", Style);
+
   verifyFormat("Foo<Foo<int> const> P;\n#if 0\n#else\n#endif",
                "Foo<const Foo<int>> P;\n#if 0\n#else\n#endif", Style);
 
@@ -663,6 +666,9 @@ TEST_F(QualifierFixerTest, LeftQualifier) {
   verifyFormat("void f(const std::integral auto &x);",
                "void f(std::integral auto const &x);", Style);
 
+  verifyFormat("auto lambda = [] { const int i = 0; };",
+               "auto lambda = [] { int const i = 0; };", Style);
+
   verifyFormat("Foo<const Foo<int>> P;\n#if 0\n#else\n#endif",
                "Foo<Foo<int> const> P;\n#if 0\n#else\n#endif", Style);
 
diff --git a/compiler-rt/lib/orc/bitmask_enum.h b/compiler-rt/lib/orc/bitmask_enum.h
new file mode 100644
index 000000000000..b9fb776bdf23
--- /dev/null
+++ b/compiler-rt/lib/orc/bitmask_enum.h
@@ -0,0 +1,151 @@
+//===---- bitmask_enum.h - Enable bitmask operations on enums ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of the ORC runtime support library.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ORC_RT_BITMASK_ENUM_H
+#define ORC_RT_BITMASK_ENUM_H
+
+#include "stl_extras.h"
+
+#include <cassert>
+#include <type_traits>
+
+namespace __orc_rt {
+
+/// ORC_RT_MARK_AS_BITMASK_ENUM lets you opt in an individual enum type so you
+/// can perform bitwise operations on it without putting static_cast everywhere.
+///
+/// \code
+///   enum MyEnum {
+///     E1 = 1, E2 = 2, E3 = 4, E4 = 8,
+///     ORC_RT_MARK_AS_BITMASK_ENUM(/* LargestValue = */ E4)
+///   };
+///
+///   void Foo() {
+///     MyEnum A = (E1 | E2) & E3 ^ ~E4; // Look, ma: No static_cast!
+///   }
+/// \endcode
+///
+/// Normally when you do a bitwise operation on an enum value, you get back an
+/// instance of the underlying type (e.g. int).  But using this macro, bitwise
+/// ops on your enum will return you back instances of the enum.  This is
+/// particularly useful for enums which represent a combination of flags.
+///
+/// The parameter to ORC_RT_MARK_AS_BITMASK_ENUM should be the largest
+/// individual value in your enum.
+///
+/// All of the enum's values must be non-negative.
+#define ORC_RT_MARK_AS_BITMASK_ENUM(LargestValue)                              \
+  ORC_RT_BITMASK_LARGEST_ENUMERATOR = LargestValue
+
+/// ORC_RT_DECLARE_ENUM_AS_BITMASK can be used to declare an enum type as a bit
+/// set, so that bitwise operation on such enum does not require static_cast.
+///
+/// \code
+///   enum MyEnum { E1 = 1, E2 = 2, E3 = 4, E4 = 8 };
+///   ORC_RT_DECLARE_ENUM_AS_BITMASK(MyEnum, E4);
+///
+///   void Foo() {
+///     MyEnum A = (E1 | E2) & E3 ^ ~E4; // No static_cast
+///   }
+/// \endcode
+///
+/// The second parameter to ORC_RT_DECLARE_ENUM_AS_BITMASK specifies the largest
+/// bit value of the enum type.
+///
+/// ORC_RT_DECLARE_ENUM_AS_BITMASK should be used in __orc_rt namespace.
+///
+/// This a non-intrusive alternative for ORC_RT_MARK_AS_BITMASK_ENUM. It allows
+/// declaring more than one non-scoped enumerations as bitmask types in the same
+/// scope. Otherwise it provides the same functionality as
+/// ORC_RT_MARK_AS_BITMASK_ENUM.
+#define ORC_RT_DECLARE_ENUM_AS_BITMASK(Enum, LargestValue)                     \
+  template <> struct is_bitmask_enum<Enum> : std::true_type {};                \
+  template <> struct largest_bitmask_enum_bit<Enum> {                          \
+    static constexpr std::underlying_type_t<Enum> value = LargestValue;        \
+  }
+
+/// Traits class to determine whether an enum has been declared as a bitwise
+/// enum via ORC_RT_DECLARE_ENUM_AS_BITMASK.
+template <typename E, typename Enable = void>
+struct is_bitmask_enum : std::false_type {};
+
+template <typename E>
+struct is_bitmask_enum<
+    E, std::enable_if_t<sizeof(E::ORC_RT_BITMASK_LARGEST_ENUMERATOR) >= 0>>
+    : std::true_type {};
+
+template <typename E>
+inline constexpr bool is_bitmask_enum_v = is_bitmask_enum<E>::value;
+
+/// Traits class to deermine bitmask enum largest bit.
+template <typename E, typename Enable = void> struct largest_bitmask_enum_bit;
+
+template <typename E>
+struct largest_bitmask_enum_bit<
+    E, std::enable_if_t<sizeof(E::ORC_RT_BITMASK_LARGEST_ENUMERATOR) >= 0>> {
+  using UnderlyingTy = std::underlying_type_t<E>;
+  static constexpr UnderlyingTy value =
+      static_cast<UnderlyingTy>(E::ORC_RT_BITMASK_LARGEST_ENUMERATOR);
+};
+
+template <typename E> constexpr std::underlying_type_t<E> Mask() {
+  return bit_ceil(largest_bitmask_enum_bit<E>::value) - 1;
+}
+
+template <typename E> constexpr std::underlying_type_t<E> Underlying(E Val) {
+  auto U = static_cast<std::underlying_type_t<E>>(Val);
+  assert(U >= 0 && "Negative enum values are not allowed");
+  assert(U <= Mask<E>() && "Enum value too large (or langest val too small");
+  return U;
+}
+
+template <typename E, typename = std::enable_if_t<is_bitmask_enum_v<E>>>
+constexpr E operator~(E Val) {
+  return static_cast<E>(~Underlying(Val) & Mask<E>());
+}
+
+template <typename E, typename = std::enable_if_t<is_bitmask_enum_v<E>>>
+constexpr E operator|(E LHS, E RHS) {
+  return static_cast<E>(Underlying(LHS) | Underlying(RHS));
+}
+
+template <typename E, typename = std::enable_if_t<is_bitmask_enum_v<E>>>
+constexpr E operator&(E LHS, E RHS) {
+  return static_cast<E>(Underlying(LHS) & Underlying(RHS));
+}
+
+template <typename E, typename = std::enable_if_t<is_bitmask_enum_v<E>>>
+constexpr E operator^(E LHS, E RHS) {
+  return static_cast<E>(Underlying(LHS) ^ Underlying(RHS));
+}
+
+template <typename E, typename = std::enable_if_t<is_bitmask_enum_v<E>>>
+E &operator|=(E &LHS, E RHS) {
+  LHS = LHS | RHS;
+  return LHS;
+}
+
+template <typename E, typename = std::enable_if_t<is_bitmask_enum_v<E>>>
+E &operator&=(E &LHS, E RHS) {
+  LHS = LHS & RHS;
+  return LHS;
+}
+
+template <typename E, typename = std::enable_if_t<is_bitmask_enum_v<E>>>
+E &operator^=(E &LHS, E RHS) {
+  LHS = LHS ^ RHS;
+  return LHS;
+}
+
+} // end namespace __orc_rt
+
+#endif // ORC_RT_BITMASK_ENUM_H
diff --git a/compiler-rt/lib/orc/stl_extras.h b/compiler-rt/lib/orc/stl_extras.h
index 33c877b193c5..80a6cd13ac28 100644
--- a/compiler-rt/lib/orc/stl_extras.h
+++ b/compiler-rt/lib/orc/stl_extras.h
@@ -13,6 +13,7 @@
 #ifndef ORC_RT_STL_EXTRAS_H
 #define ORC_RT_STL_EXTRAS_H
 
+#include <cstdint>
 #include <utility>
 #include <tuple>
 
@@ -28,6 +29,17 @@ template <class Ty> struct identity {
   const Ty &operator()(const Ty &self) const { return self; }
 };
 
+/// Substitute for std::bit_ceil.
+constexpr uint64_t bit_ceil(uint64_t Val) noexcept {
+  Val |= (Val >> 1);
+  Val |= (Val >> 2);
+  Val |= (Val >> 4);
+  Val |= (Val >> 8);
+  Val |= (Val >> 16);
+  Val |= (Val >> 32);
+  return Val + 1;
+}
+
 } // namespace __orc_rt
 
 #endif // ORC_RT_STL_EXTRAS
diff --git a/compiler-rt/lib/orc/tests/unit/CMakeLists.txt b/compiler-rt/lib/orc/tests/unit/CMakeLists.txt
index 7792d21bfa7c..81df7e803bc5 100644
--- a/compiler-rt/lib/orc/tests/unit/CMakeLists.txt
+++ b/compiler-rt/lib/orc/tests/unit/CMakeLists.txt
@@ -1,5 +1,6 @@
 set(UNITTEST_SOURCES
   adt_test.cpp
+  bitmask_enum_test.cpp
   c_api_test.cpp
   endian_test.cpp
   error_test.cpp
diff --git a/compiler-rt/lib/orc/tests/unit/bitmask_enum_test.cpp b/compiler-rt/lib/orc/tests/unit/bitmask_enum_test.cpp
new file mode 100644
index 000000000000..4c27d54fb4a9
--- /dev/null
+++ b/compiler-rt/lib/orc/tests/unit/bitmask_enum_test.cpp
@@ -0,0 +1,143 @@
+//===-- adt_test.cpp ------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of the ORC runtime.
+//
+//===----------------------------------------------------------------------===//
+
+#include "bitmask_enum.h"
+#include "gtest/gtest.h"
+
+#include <sstream>
+#include <string>
+
+using namespace __orc_rt;
+
+namespace {
+
+enum Flags { F0 = 0, F1 = 1, F2 = 2, F3 = 4, F4 = 8 };
+
+} // namespace
+
+namespace __orc_rt {
+ORC_RT_DECLARE_ENUM_AS_BITMASK(Flags, F4);
+} // namespace __orc_rt
+
+static_assert(is_bitmask_enum<Flags>::value != 0);
+static_assert(largest_bitmask_enum_bit<Flags>::value == Flags::F4);
+
+namespace {
+
+static_assert(is_bitmask_enum<Flags>::value != 0);
+static_assert(largest_bitmask_enum_bit<Flags>::value == Flags::F4);
+
+TEST(BitmaskEnumTest, BitwiseOr) {
+  Flags f = F1 | F2;
+  EXPECT_EQ(3, f);
+
+  f = f | F3;
+  EXPECT_EQ(7, f);
+}
+
+TEST(BitmaskEnumTest, BitwiseOrEquals) {
+  Flags f = F1;
+  f |= F3;
+  EXPECT_EQ(5, f);
+
+  // |= should return a reference to the LHS.
+  f = F2;
+  (f |= F3) = F1;
+  EXPECT_EQ(F1, f);
+}
+
+TEST(BitmaskEnumTest, BitwiseAnd) {
+  Flags f = static_cast<Flags>(3) & F2;
+  EXPECT_EQ(F2, f);
+
+  f = (f | F3) & (F1 | F2 | F3);
+  EXPECT_EQ(6, f);
+}
+
+TEST(BitmaskEnumTest, BitwiseAndEquals) {
+  Flags f = F1 | F2 | F3;
+  f &= F1 | F2;
+  EXPECT_EQ(3, f);
+
+  // &= should return a reference to the LHS.
+  (f &= F1) = F3;
+  EXPECT_EQ(F3, f);
+}
+
+TEST(BitmaskEnumTest, BitwiseXor) {
+  Flags f = (F1 | F2) ^ (F2 | F3);
+  EXPECT_EQ(5, f);
+
+  f = f ^ F1;
+  EXPECT_EQ(4, f);
+}
+
+TEST(BitmaskEnumTest, BitwiseXorEquals) {
+  Flags f = (F1 | F2);
+  f ^= (F2 | F4);
+  EXPECT_EQ(9, f);
+
+  // ^= should return a reference to the LHS.
+  (f ^= F4) = F3;
+  EXPECT_EQ(F3, f);
+}
+
+TEST(BitmaskEnumTest, ConstantExpression) {
+  constexpr Flags f1 = ~F1;
+  constexpr Flags f2 = F1 | F2;
+  constexpr Flags f3 = F1 & F2;
+  constexpr Flags f4 = F1 ^ F2;
+  EXPECT_EQ(f1, ~F1);
+  EXPECT_EQ(f2, F1 | F2);
+  EXPECT_EQ(f3, F1 & F2);
+  EXPECT_EQ(f4, F1 ^ F2);
+}
+
+TEST(BitmaskEnumTest, BitwiseNot) {
+  Flags f = ~F1;
+  EXPECT_EQ(14, f); // Largest value for f is 15.
+  EXPECT_EQ(15, ~F0);
+}
+
+enum class FlagsClass {
+  F0 = 0,
+  F1 = 1,
+  F2 = 2,
+  F3 = 4,
+  ORC_RT_MARK_AS_BITMASK_ENUM(F3)
+};
+
+TEST(BitmaskEnumTest, ScopedEnum) {
+  FlagsClass f = (FlagsClass::F1 & ~FlagsClass::F0) | FlagsClass::F2;
+  f |= FlagsClass::F3;
+  EXPECT_EQ(7, static_cast<int>(f));
+}
+
+struct Container {
+  enum Flags {
+    F0 = 0,
+    F1 = 1,
+    F2 = 2,
+    F3 = 4,
+    ORC_RT_MARK_AS_BITMASK_ENUM(F3)
+  };
+
+  static Flags getFlags() {
+    Flags f = F0 | F1;
+    f |= F2;
+    return f;
+  }
+};
+
+TEST(BitmaskEnumTest, EnumInStruct) { EXPECT_EQ(3, Container::getFlags()); }
+
+} // namespace
diff --git a/flang/docs/AssumedRank.md b/flang/docs/AssumedRank.md
new file mode 100644
index 000000000000..c5d2c3e3909c
--- /dev/null
+++ b/flang/docs/AssumedRank.md
@@ -0,0 +1,688 @@
+<!--===- docs/AssumedRank.md
+
+   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+   See https://llvm.org/LICENSE.txt for license information.
+   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+-->
+# Assumed-Rank Objects
+
+An assumed-rank dummy data object is a dummy argument that takes its rank from
+its effective argument. It is a dummy argument, or the associated entity of a
+SELECT RANK in the `RANK DEFAULT` block. Its rank is not known at compile
+time. The rank can be anything from 0 (scalar) to the maximum allowed rank in
+Fortran (currently 15 according to Fortran 2018 standard section 5.4.6 point
+1).
+
+This document summarizes the contexts where assumed-rank objects can appear,
+and then describes how they are implemented and lowered to HLFIR and FIR. All
+section references are made to the Fortran 2018 standard.
+
+## Fortran Standard References
+
+Here is a list of sections and constraints from the Fortran standard involving
+assumed-ranks.
+
+- 7.3.2.2 TYPE
+	- C711
+- 7.5.6.1 FINAL statement
+	- C789
+- 8.5.7 CONTIGUOUS attribute
+	- C830
+- 8.5.8 DIMENSION attribute
+- 8.5.8.7 Assumed-rank entity
+	- C837
+	- C838
+	- C839
+- 11.1.10 SELECT RANK
+- 15.5.2.13 Restrictions on entities associated with dummy arguments
+	- 1 (3) (b) and (c)
+	- 1 (4) (b) and (c)
+- 15.5.2.4 Ordinary dummy variables - point 17
+- 18 Interoperability with C
+	- 18.3.6 point 2 (5)
+
+### Summary of the constraints:
+
+Assumed-rank can:
+- be pointers, allocatables (or have neither of those atttributes).
+- be monomorphic or polymorphic (both `TYPE(*)` and `CLASS(*)`)
+- have all the attributes, except VALUE and CODIMENSION (C837). Notably, they
+  can have the CONTIGUOUS or OPTIONAL attributes (C830).
+- appear as an actual argument of an assumed-rank dummy (C838)
+- appear as the selector of SELECT RANK (C838)
+- appear as the argument of C_LOC and C_SIZEOF from ISO_C_BINDING (C838)
+- appear as the first argument of inquiry intrinsic functions (C838). These
+  inquiry functions listed in table 16.1 are detailed in the "Assumed-rank
+  features" section below.
+- appear in BIND(C) and non BIND(C interface (18.1 point 3)
+- be finalized on entry as INTENT(OUT) under some conditions that prevents the
+  assumed-rank to be associated with an assumed-size.
+- be associated with any kind of scalars and arrays, including assumed-size.
+
+Assumed-rank cannot:
+- be coarrays (C837)
+- have the VALUE attribute (C837)
+- be something that is not a named variable (they cannot be the result of a
+  function or a component reference)
+- appear in a designator other than the case listed above (C838). Notably, they
+  cannot be directly addressed, they cannot be used in elemental operations or
+  transformational intrinsics, they cannot be used in IO, they cannot be
+  assigned to....
+- be finalized on entry as INTENT(OUT) if it could be associated with an
+  assumed-size (C839).
+- be used in a reference to a procedure without an explicit interface
+  (15.4.2.2. point 3 (c)).
+
+With regard to aliasing, assumed-rank dummy objects follow the same rules as
+for assumed shapes, with the addition of 15.5.2.13 (c) which adds a rule when
+the actual is a scalar (adding that TARGET assumed-rank may alias if the actual
+argument is a scalar even if they have the CONTIGUOUS attribute, while it is OK
+to assume that CONTIGUOUS TARGET assumed shape do not alias with other
+dummies).
+
+---
+
+## Assumed-Rank Representations in Flang
+
+### Representation in Semantics
+In semantics (there is no concept of assumed-rank expression needed in
+`evaluate::Expr`). Such symbols have either `semantics::ObjectEntityDetails` (
+dummy data objects) with a `semantics::ArraySpec` that encodes the
+"assumed-rank-shape" (can be tested with IsAssumedRank()), or they have
+`semantics::AssocEntityDetails` (associated entity in the RANK DEFAULT case).
+
+Inside a select rank, a `semantics::Symbol` is created for the associated
+entity with `semantics::AssocEntityDetails` that points to the the selector
+and holds the rank outside of the RANK DEFAULT case.
+
+Assumed-rank dummies are also represented in the
+`evaluate::characteristics::TypeAndShape` (with the AssumedRank attribute) to
+represent assumed-rank in procedure characteristics.
+
+### Runtime Representation of Assumed-Ranks
+Assumed-ranks are implemented as CFI_cdesc_t (18.5.3) with the addition of an
+f18 specific addendum when required for the type. This is the usual f18
+descriptor, and no changes is required to represent assumed-ranks in this data
+structure. In fact, there is no difference between the runtime descriptor
+created for an assumed shape and the runtime descriptor created when the
+corresponding entity is passed as an assumed-rank.
+
+This means that any descriptor can be passed to an assumed-rank dummy (with
+care to ensure that the POINTER/ALLOCATABLE attribute match the dummy argument
+attributes as usual). Notably, any runtime interface that takes descriptor
+arguments of any ranks already work with assumed-rank entities without any
+changes or special cases.
+
+This also implies that the runtime cannot tell that an entity is an
+assumed-rank based on its descriptor, but there seems to be not need for this
+so far ("rank based" dispatching for user defined assignments and IO is not
+possible with assumed-ranks, and finalization is possible, but there is no need
+for the runtime to distinguish between finalization of an assumed-rank and
+finalization of other entities: only the runtime rank matters).
+
+The only difference and difficulty is that descriptor storage size of
+assumed-rank cannot be precisely known at compile time, and this impacts the
+way descriptor copies are generated in inline code. The size can still be
+maximized using the maximum rank, which the runtime code already does when
+creating temporary descriptor in many cases. Inline code also needs care if it
+needs to access the descriptor addendum (like the type descriptor), since its
+offset will not be a compile time constant as usual.
+
+Note that an alternative to maximizing the allocation of assumed-rank temporary
+descriptor could be to use automatic allocation based on the rank of the input
+descriptor, but this would make stack allocation analysis more complex (tools
+will likely not have the Fortran knowledge that this allocation size is bounded
+for instance) while the stack "over" allocation is likely reasonable (24 bytes
+per dimension). Hence the selection of the simple approach using static size
+allocation to the maximum rank.
+
+### Representation in FIR and HLFIR
+SSA values for assumed-rank entities have an MLIR type containing a
+`!fir.array<*xT>` sequence type wrapped in a `!fir.box` or `!fir.class` type
+(additionally wrapped in a `!fir.ref` type for pointers and allocatables).
+
+Examples:
+`INTEGER :: x(..)`  -> `!fir.box<!fir.array<* x i32>>` 
+`CLASS(*) :: x(..)`  -> `!fir.class<!fir.array<* x none>>`
+`TYPE(*) :: x(..)`  -> `!fir.box<!fir.array<* x none>>`
+`REAL, ALLOCATABLE :: x(..)`  -> `!fir.ref<!fir.box<!fir.heap<!fir.array<* x f32>>>>`
+`TYPE(t), POINTER :: x(..)`  -> `!fir.ref<!fir.box<!fir.ptr<!fir.array<* x !fir.type<t>>>>>` 
+
+All these FIR types are implemented as the address of a CFI_cdesc_t in code
+generation.
+
+There is no need to allow assumed-rank "expression" in HLFIR (hlfir.expr) since
+assumed-rank cannot appear in expressions (except as the actual argument to an
+assumed-rank dummy). Assumed-rank are variables. Also, since they cannot have
+the VALUE attribute, there is no need to use the hlfir.as_expr +
+hlfir.associate idiom to make copies for them.
+
+FIR/HLFIR operation where assumed-rank may appear:
+- as `hlfir.declare` and `fir.declare` operand and result.
+- as `fir.convert` operand and/or result.
+- as `fir.load` operand and result (POINTER and ALLOCATABLE dereference).
+- as a block argument (dummy argument).
+- as `fir.rebox_assumed_rank` operand/result (new operation to change some
+  fields of assumed-rank descriptors).
+- as `fir.box_rank` operand (rank inquiry).
+- as `fir.box_dim` operand (brutal user inquiry about the bounds of an
+  assumed-rank in a compile time constant dimension).
+- as `fir.box_addr` operand (to get the base address in inlined code for
+  C_LOC).
+- as `fir.box_elesize` operand (to implement LEN and STORAGE_SIZE).
+- as `fir.absent` result (passing absent actual to OPTIONAL assumed-rank dummy)
+- as `fir.is_present` operand (PRESENT inquiry)
+- as `hlfir.copy_in` and `hlfir.copy_out` operand and result (copy in and
+  copy-out of assumed-rank)
+- as `fir.alloca` type and result (when creating an assumed-rank POINTER dummy
+  from a non POINTER dummy).
+- as `fir.store` operands (same case as `fir.alloca`).
+
+FIR/HLFIR Operations that should not need to accept assumed-ranks but where it
+could still be relevant:
+- `fir.box_tdesc` and `fir.box_typecode` (polymorphic assumed-rank cannot
+  appear in a SELECT TYPE directly without using a SELECT RANK). Given the
+  CFI_cdesc_t structure, no change would be needed for `fir.box_typecode` to
+  support assumed-ranks, but `fir.box_tdesc` would require change since the
+  position of the type descriptor pointer depends on the rank.
+- as `fir.allocmem` / `fir.global` result (assumed-ranks are never local/global
+  entities). 
+- as `fir.embox` result (When creating descriptor for an explicit shape, the
+  descriptor can be created with the entity rank, and then casted via
+`fir.convert`).
+
+It is not expected for any other FIR or HLFIR operations to handle assumed-rank
+SSA values.
+
+#### Summary of the impact in FIR
+One new operation is needed, `fir.rebox_assumed_rank`, the rational being that
+fir.rebox codegen is already quite complex and not all the aspects of fir.rebox
+matters for assumed-ranks (only simple field changes are required with
+assumed-ranks). Also, this operation will be allowed to take an operand in
+memory to avoid expensive fir.load of pointer/allocatable inputs. The operation
+will also allow creating rank-one assumed-size descriptor from an input
+assumed-rank descriptor to cover the SELECT RANK `RANK(*)` case.
+
+It is proposed that the FIR descriptor inquiry operation (fir.box_addr,
+fir.box_rank, fir.box_dim, fir.box_elesize at least) be allowed to take
+fir.ref<fir.box> arguments (allocatable and pointer descriptors) directly
+instead of generating a fir.load first. A conditional "read" effect will be
+added in such case. Again, the purpose is to avoid generating descriptor copies
+for the sole purpose of satisfying the SSA IR constraints. This change will
+likely benefit the non assumed-rank case too (even though LLVM is quite good at
+removing pointless descriptor copies in these cases).
+
+It will be ensured that all the operation listed above accept assumed-rank
+operands (both the verifiers and coedgen). The codegen of `fir.load`,
+`fir.alloca`, `fir.store`, `hlfir.copy_in` and `hlfir.copy_out` will need
+special handling for assumed-ranks.
+
+### Representation in LLVM IR
+
+Assumed-rank descriptor types are lowered to the LLVM type of a CFI_cdesc_t
+descriptor with no dimension array field and no addendum. That way, any inline
+code attempt to directly access dimensions and addendum with constant offset
+will be invalid for more safety, but it will still be easy to generate LLVM GEP
+to address the first descriptor fields in LLVM (to get the base address, rank,
+type code and attributes).
+
+`!fir.box<!fir.array<* x i32>>` -> `!llvm.struct<(ptr, i64, i32, i8, i8, i8, i8>`
+
+## Assumed-rank Features
+
+This section list the different Fortran features where assumed-rank objects are
+involved and describes the related implementation design.
+
+### Assumed-rank in procedure references
+Assumed-rank arguments are implemented as being the address of a CFI_cdesc_t.
+
+When passing an actual argument to an assumed-rank dummy, the following points
+need special attention and are further described below:
+- Copy-in/copy-out when required
+- Creation of forwarding of the assumed-rank dummy descriptor (including when
+  the actual is an assumed-size).
+- Finalization, deallocation, and initialization of INTENT(OUT) assumed-rank
+  dummy.
+
+OPTIONAL assumed-ranks are implemented like other non assumed-rank OPTIONAL
+objects passed by descriptor: an absent assumed-rank is represented by a null
+pointer to a CFI_cdesc_t.
+
+The passing interface for assumed-rank described above and below is compliant
+by default with the BIND(C) case, except for the assumed-rank dummy descriptor
+lower bounds, which are only set to zeros in BIND(C) interface because it
+implies in most of the cases to create a new descriptor.
+
+VALUE is forbidden for assumed-rank dummies, so there is nothing to be done for
+it (although since copy-in/copy-out is possible, the compiler must anyway deal
+with creating assumed-rank copies, so it would likely not be an issue to relax
+this constraint).
+
+#### Copy-in and Copy out
+Copy-in and copy-out is required when passing an actual that is not contiguous
+to a non POINTER CONTIGUOUS assumed-rank.
+
+When the actual argument is ranked, the copy-in/copy-out can be performed on
+the ranked actual argument where the dynamic type has been aligned with the
+dummy type if needed (passing CLASS(T) to TYPE(T)) as illustrated below.
+
+```Fortran
+module m
+type t
+ integer :: i
+end type
+contains
+subroutine foo(x)
+ class(t) :: x(:)
+ interface
+  subroutine bar(x)
+    import :: t
+    type(t), contiguous :: x(..)
+  end subroutine
+ end interface
+ ! copy-in and copy-out is required aroud bar
+ call bar(x)
+end
+end module
+```
+
+When the actual is also an assumed-rank special the same copy-in/copy-out need
+may arise, and the `hlfir.copy_in` and `hlfir.copy_out` are also used to cover
+this case. The `hlfir.copy_in`operation is implemented using the `IsContiguous`
+runtime (can be used as-is) and the `AssignTemporary` temporary runtime.
+
+The difference with the ranked case is that more care is needed to create the
+output descriptor passed to `AssignTemporary`: it must be allocated to the
+maximum rank with the same type as the input descriptor and only the descriptor
+fields prior to the array dimensions will be initialized to those of an
+unallocated descriptor prior to the runtime call (`AssignTemporary` copies the
+addendum if needed).
+
+```Fortran
+subroutine foo2(x)
+ class(t) :: x(..)
+ interface
+  subroutine bar(x)
+    import :: t
+    type(t), contiguous :: x(..)
+  end subroutine
+ end interface
+ ! copy-in and copy-out is required aroud bar
+ call bar(x)
+end
+```
+#### Creating the descriptor for assumed-rank dummies
+
+There are four cases to distinguish:
+1. Actual does not have a descriptor (and is therefore ranked)
+2. Actual has a descriptor that can be forwarded for the dummy
+3. Actual has a ranked descriptor that cannot be forwarded for the dummy
+4. Actual has an assumed-rank descriptor that cannot be forwarded for the dummy
+
+For the first case, a descriptor will be created for the dummy with `fir.embox`
+has if it has the rank of the actual argument. This is the same logic as when
+dealing with assumed shape or INTENT(IN) POINTER dummy arguments, except that
+an extra cast to the assumed-rank descriptor type is added (no-op at runtime).
+Care must be taken to set the final dimension extent to -1 in the descriptor
+created for an assumed-size actual argument. Note that the descriptor created
+for an assumed-size still has the rank of the assumed-size, a rank-one
+descriptor will be created for it if needed in a RANK(*) block (nothing says
+that an assumed-size should be passed as a rank-one array in 15.5.2.4 point 17).
+
+For the second case, a cast is added to assumed-rank descriptor type if it is
+not one already and the descriptor is forwarded.
+
+For the third case, a new ranked descriptor with the dummy attribute/lower
+bounds is created from the actual argument descriptor with `fir.rebox` as it is
+done when passing to an assume shape dummy, and a cast to the assumed-rank
+descriptor is added .
+
+The last case is the same as the third one, except the that the descriptor
+manipulation is more complex since the storage size of the descriptor is
+unknown. `fir.rebox` codegen is already quite complex since it deals with
+creating descriptor for descriptor based array sections and pointer remapping.
+Both of those are meaningless in this case where the output descriptor is the
+same as the input one, except for the lower bounds, attribute, and derived type
+pointer field that may need to be changed to match the values describing the
+dummy. A simpler `fir.rebox_assumed_rank` operation is added for this use case.
+Notably, this operation can take fir.ref<fir.box> inputs to avoid creating an
+expensive and useless fir.load of POINTER/ALLOCATABLE descriptors.
+
+Fortran requires the compiler to fall in the 3rd and 4th case and create
+descriptor temporary for the dummy a lot more than one would think and hope. An
+annex section below discusses cases that force the compiler to create a new
+descriptor for the dummy even if the actual already has a descriptor. These are
+the same situations than with non assumed-rank arguments, but when passing
+assumed-rank to assumed-ranks, the cost of this extra copy is higher.
+
+#### Intent(out) assumed-rank finalization, deallocation, initialization
+
+The standard prevents INTENT(OUT) assumed-rank requiring finalization to be
+associated with assumed-size arrays (C839) because there would be no way to
+finalize such entities. But INTENT(OUT) finalization is still possible if the
+actual is not an assumed-size and not a nonpointer nonallocatable assumed-rank.
+
+Flang therefore needs to implement finalization, deallocation and
+initialization of INTENT(OUT) as usual. Non pointer non allocatable INTENT(OUT)
+finalization is done via a call to `Destroy` runtime API that takes a
+descriptor and can be directly used with an assumed-rank descriptor with no
+change. The initialization is done via a call to the `Initialize` runtime API
+that takes a descriptor and can also directly be used with an assumed
+descriptor. Conditional deallocation of INTENT(OUT) allocatable is done via an
+inline allocation status check and either an inline deallocate for intrinsic
+types, or a runtime call to `Deallocate` for the other cases. For
+assumed-ranks, the runtime call is always used regardless of the type to avoid
+inline descriptor manipulations. `Deallocate` runtime API also works with
+assumed-rank descriptors with no changes (like any runtime API taking
+descriptors of any rank).
+
+```Fortran
+subroutine foo(x)
+ class(*), allocatable :: x(..)
+ interface
+  subroutine bar(x)
+    class(*), intent(out) :: x(..)
+  end subroutine
+ end interface
+ ! x may require finalization and initialization on bar entry.
+ call bar(x)
+end
+subroutine bar(x)
+  class(*), intent(out) :: x(..)
+end subroutine
+```
+### Select Rank
+
+Select rank is implemented with a rank inquiry (and last extent for `RANK(*)`),
+followed by a jump in the related block where the selector descriptor is cast
+to a descriptor with the associated entity rank for the current block for the
+`RANK(cst)` cases. In the `RANK DEFAULT`, the input descriptor is kept with no
+cast, and in the RANK(*), a rank-one descriptor is created with the same
+dynamic type as the input.
+These new descriptor values are mapped to the associated entity symbol and
+lowering precede as usual. This is very similar to how Select Type is
+implemented. The `RANK(*)` is a bit odd, it detects assumed-ranks associated
+with an assumed-size arrays regardless of the rank, and takes precedence over
+any rank based matching.
+
+Note that `-1` is a magic extent number that encodes that a descriptor describes
+an entity that is an assumed-size (user specified extents of explicit shape
+arrays are always normalized to zero when negative, so `-1` is a safe value to
+identify a descriptor created for an assumed-size). It is actually well
+specified for the BIND(C) (18.5.2 point 1.) and is always used as such in flang
+descriptors.
+
+The implementation of SELECT RANK is done as follow:
+- Read the rank `r` in the descriptor
+- If there is a `RANK(*)`, read the extent in dimension `r`. If it is `-1`,
+  jump to the `RANK(*)` block. Otherwise, continue to the steps below.
+- For each `RANK(constant)` case, compare `constant` to `r`. Stop at first
+  match and jump to related block. The order of the comparisons does not matter
+(there cannot be more than one match).
+- Jump to `RANK DEFAULT` block is any. Otherwise jump to the end of the
+  construct.
+
+The blocks for each cases jumps at the end of the construct at the end. As
+opposed to SELECT TYPE, no clean-up should be needed at the construct level
+since the select-rank selector is a named entity and cannot be a temporary with
+a lifetime of the construct.
+
+Except for the `RANK(*)` case, the branching logic is implemented in FIR with a
+`fir.select_case` operating on the rank.
+
+Example:
+
+```Fortran
+subroutine test(x)
+  interface
+    subroutine assumed_size(x)
+      real :: x(*)
+    end subroutine
+    subroutine scalar(x)
+      real :: x
+    end subroutine
+    subroutine rank_one(x)
+      real :: x(:)
+    end subroutine
+    subroutine many_dim_array(x)
+      real :: x(..)
+    end subroutine
+  end interface
+  
+  real :: x(..)
+  select rank (y => x)
+  rank(*)
+    call assumed_size(y)
+  rank(0)
+    call scalar(y)
+  rank(1)
+    call rank_one(y)
+  rank default
+    call many_dim_array(y)
+  end select
+end subroutine
+```
+
+Pseudo FIR for the example (some converts and SSA constants creation are not shown for more clarity):
+
+```
+func.func @_QPtest(%arg0: !fir.box<!fir.array<?xf32>>) {
+  %x:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEx"} : (!fir.box<!fir.array<*xf32>>) -> (!fir.box<!fir.array<*xf32>>, !fir.box<!fir.array<*xf32>>)
+  %r = fir.box_rank %x#1 : (!fir.box<!fir.array<*xf32>>) -> i32
+  %last_extent = fir.call @_FortranASizeDim(%x#1, %r, %sourcename, %sourceline)
+  %is_assumed_size = arith.cmpi eq %last_extent, %c-1: (i64, i64) -> i1
+  cf.cond_br %is_assumed_size, ^bb_assumed_size, ^bb_not_assumed_size
+^bb_assumed_size:
+  %r1_box = fir.rebox_assumed_rank %x#0 : (!fir.box<!fir.array<*xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %addr = fir.box_addr %addr, !fir.box<!fir.array<?xf32>> -> !fir.ref<!fir.array<?xf32>>
+  fir.call @_QPassumed_size(%addr) (!fir.ref<!fir.array<?xf32>>) -> ()
+  cf.br ^bb_end
+^bb_not_assumed_size:
+  fir.select_case %3 : i32 [#fir.point, %c0, ^bb_scalar, #fir.point, %c1, ^bb_rank1, unit, ^bb_default]
+^bb_scalar:
+  %scalar_cast = fir.convert %x#1 : (!fir.box<!fir.array<*xf32>>) -> !fir.box<f32>
+  %x_scalar = fir.box_addr %scalar_cast: (!fir.box<f32>) -> !fir.ref<f32>
+  fir.call @_QPscalar(%x_scalar) (!fir.ref<f32>) -> ()
+  cf.br ^bb_end
+^bb_rank1:
+  %rank1_cast = fir.convert %x#1 : (!fir.box<!fir.array<*xf32>>) -> !fir.box<!fir.array<?xf32>>
+  fir.call @_QPrank_one(%rank1_cast) (!fir.box<!fir.array<?xf32>>) -> ()
+  cf.br ^bb_end
+^bb_default:
+  fir.call @_QPmany_dim_array(%x#1) (!fir.box<!fir.array<*xf32>>) -> ()
+  cf.br ^bb_end
+^bb_end
+  return
+}
+```
+
+### Inquiry intrinsic functions
+#### ALLOCATED and ASSOCIATED
+Implemented inline with `fir.box_addr` (reading the descriptor first address
+inline). Currently, FIR descriptor inquiry happens at the "descriptor value"
+level (require a fir.load of the POINTER or ALLOCATABLE !fir.ref<!fir.box<>>),
+to satisfy the SSA value semantics, the fir.load creates a copy of the
+underlying descriptor storage. With assume ranks, this copy will be "expensive"
+and harder to optimize out given the descriptor storage size is not a compile
+time constant. To avoid this extra cost, ALLOCATABLE and POINTER assumed-ranks
+will be cast to scalar descriptors before the `fir.load`.
+
+```Fortran
+real, allocatable :: x(..)
+print *, allocated(x)
+```
+
+```
+%1 = fir.convert %x : (!fir.ref<!fir.box<!fir.heap<!fir.array<* x f32>>>>) -> !fir.ref<!fir.box<!fir.heap<f32>>>
+%2 = fir.load %x : !fir.ref<!fir.box<!fir.heap<f32>>>
+%addr = fir.box_addr %2 : (!fir.box<!fir.heap<f32>>) -> fir.ref<f32>
+# .... "addr != null" as usual
+```
+#### LEN and STORAGE_SIZE
+Implemented inline with `fir.box_elesize` with the same approach as
+ALLOCATED/ASSOCIATED when dealing with fir.box load for POINTERS and
+ALLOCATABLES.
+
+```Fortran
+character(*) :: x(..)
+print *, len(x)
+```
+
+```
+%ele_size = fir.box_elesize %x : (!fir.box<!fir.array<*x!fir.char<?>>>) -> i64
+# .... divide by character KIND byte size if needed as usual 
+```
+#### PRESENT
+Implemented inline with `fir.is_present` which ends-up implemented as a check
+that the descriptor address is not null just like with OPTIONAL assumed shapes
+and OPTIONAL pointers and allocatables.
+
+```Fortran
+real, optional :: x(..)
+print *, present(x)
+```
+
+```
+%is_present = fir.is_prent %x : (!fir.box<!fir.array<*xf32>>) -> i1
+```
+#### RANK
+Implemented inline with `fir.box_rank` which simply reads the descriptor rank
+field.
+
+```Fortran
+real :: x(..)
+print *, len(x)
+```
+
+```
+%rank = fir.box_rank %x : (!fir.box<!fir.array<*xf32>>) -> i32
+```
+#### SIZE
+Using the runtime can be queried as it is done for assumed shapes. When DIM is
+present and is constant, `fir.box_dim` can also be used with the option to add
+a runtime check that RANK <= DIM. Pointers and allocatables are dereferenced,
+which in FIR currently creates a descriptor copy that cannot be simplified
+like for the previous inquiries by inserting a cast before the fir.load (the
+dimension info must be correctly copied). 
+
+#### LBOUND, SHAPE, and UBOUND
+When DIM is present an is present, the runtime can be used as it is currently
+with assumed shapes. When DIM is absent, the result is a rank-one array whose
+extent is the rank. The runtime has an entry for UBOUND that takes a descriptor
+and allocate the result as needed, so the same logic as for assumed shape can
+be used.
+
+There is no such entry for LBOUND/SHAPE currently, it would likely be best to
+add one rather than to jungle with inline code. Pointers and allocatables
+dereference is similar as with SIZE.
+
+#### EXTENDS_TYPE_OF, SAME_TYPE_AS, and IS_CONTIGUOUS
+Using the runtime as it is done currently with assumed shapes. Pointers and
+allocatables dereference is similar as with SIZE.
+
+#### C_LOC from ISO_C_BINDING
+Implemented with `fir.box_addr` as with other C_LOC cases for entities that
+have descriptors.
+
+#### C_SIZE_OF from ISO_C_BINDING
+Implemented as STORAGE_SIZE * SIZE.
+
+#### Floating point inquiries and NEW_LINE
+BIT_SIZE, DIGITS, EPSILON, HUGE, KIND, MAXEXPONENT, MINEXPONENT, NEW_LINE,
+PRECISION, RADIX, RANGE, TINY all accept assumed-rank, but are always constant
+folded by semantics based on the type and lowering does not need to deal with
+them.
+
+#### Coarray inquiries
+Assumed-rank cannot be coarrays (C837), but they can still technically appear
+in COSHAPE (which should obviously return zero). They cannot appear in LBOUND,
+LCOBOUND, UBOUND, UCOBOUND that require the argument to be a coarray.
+
+## Annex 1 - Descriptor temporary for the dummy arguments
+
+When passing an actual argument that is descriptor to a dummy that must be
+passed by descriptor, one could expect that the descriptor of the actual can
+just be forwarded to the dummy, but this is unfortunately not possible in quite
+some cases. This is not specific to assumed-ranks, but since the cost of
+descriptor temporaries is higher for assumed-ranked, it is discussed here.
+
+Below are the reasons for which a new descriptor may be required:
+1. passing a POINTER to a non POINTER
+2. setting the descriptor CFI_cdesc_t `attribute` according to the dummy
+   POINTER/ALLOCATABLE attributes (18.3.6 point 4 for the BIND(C) case).
+3. setting the CFI_cdesc_t lower bounds to zero for a BIND(C) assumed
+   shape/rank dummy (18.5.3 point 3).
+4. setting the derived type pointer to the dummy dynamic type when passing a
+   CLASS() actual to a TYPE() dummy.
+
+Justification of 1.:
+When passing a POINTER to a non POINTER, the target of the pointer is passed,
+and nothing prevents the association status of the actual argument to change
+during the call (e.g. if the POINTER is another argument of the call, or is a
+module variable, it may be re-associated in the call). These association status
+change of the actual should not impact the dummy, so they must not share the
+same descriptor.
+
+Justification of 2.:
+In the BIND(C) case, this is required by 18.3.6 point 4. Outside of the BIND(C)
+case, this should still be done because any runtime call where the dummy
+descriptor is forwarded may misbehave if the ALLOCATABLE/POINTER attribute is
+not the one of the dummy (e.g. reallocation could be triggered instead of
+padding/trimming characters).
+
+Justification of 3:
+18.5.3 point 3.
+
+Justification of 4:
+If the descriptor derived type info pointer is not the one of the dummy dynamic
+type, many runtime call like IO and assignment will misbehave when being
+provided the dummy descriptor.
+
+For point 2., 3., and 4., one could be tempted to change the descriptor fields
+before and after the call, but this is risky since this would assume nothing
+will access the actual argument descriptor during the call. And even without
+bringing any potential asynchronous behavior of OpenMP/OpenACC/Cuda Fortran
+extensions, the actual argument descriptor may be passed inside a call in
+another arguments with "different" lower bounds POINTER or ALLOCATABLE (but
+could also be accessed via host of use association in general).
+
+
+## Annex 2 - Assumed-Rank Objects and IGNORE_TKR
+
+It is possible to:
+- Set IGNORE_TKR(TK) on assumed-rank dummies (but TYPE(*) is better when
+  possible).
+- Pass an assumed-rank to an IGNORE_TKR(R) dummy that is not passed
+  by descriptor (explicit shape and assumed-size). Note that copy-in and
+  copy-out will be performed for the dummy
+
+It is not possible to:
+- Set IGNORE_TKR(R) on an assumed-rank dummy.
+
+Example:
+
+```Fortran
+subroutine test(assumed_rank_actual)
+interface
+ subroutine assumed_size_dummy(x)
+    !dir$ ignore_tkr(tkr) x
+    integer :: x(*)
+ end subroutine
+ subroutine any_type_assumed_rank(x)
+    !dir$ ignore_tkr(tk) x
+    integer :: x(..)
+ end subroutine
+end interface
+  real :: assumed_rank_actual(..)
+  call assumed_size_dummy(assumed_rank_actual) !OK
+  call any_type_assumed_rank(assumed_rank_actual) !OK
+end subroutine
+```
+
+## Annex 3 - Test Plan
+
+MPI_f08 module makes usage of assumed-rank (see
+https://www.mpi-forum.org/docs/mpi-3.1/mpi31-report.pdf).  As such compiling
+MPI_f08 modules of MPI libraries and some applications making usage of MPI_f08
+will be a good test for the implementation of this feature.
diff --git a/flang/docs/index.md b/flang/docs/index.md
index 5c9b8895298c..a61950714089 100644
--- a/flang/docs/index.md
+++ b/flang/docs/index.md
@@ -41,6 +41,7 @@ on how to get in touch with us and to learn more about the current status.
    Aliasing
    AliasingAnalysisFIR
    ArrayComposition
+   AssumedRank
    BijectiveInternalNameUniquing
    Calls
    Character
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index e470154ce8c2..8c6c22210cf0 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -163,7 +163,8 @@ static void createDeclareAllocFuncWithArg(mlir::OpBuilder &modBuilder,
       builder, loc, boxAddrOp.getResult(), asFortran, bounds,
       /*structured=*/false, /*implicit=*/false, clause, boxAddrOp.getType());
   builder.create<mlir::acc::DeclareEnterOp>(
-      loc, mlir::ValueRange(entryOp.getAccPtr()));
+      loc, mlir::acc::DeclareTokenType::get(entryOp.getContext()),
+      mlir::ValueRange(entryOp.getAccPtr()));
 
   modBuilder.setInsertionPointAfter(registerFuncOp);
   builder.restoreInsertionPoint(crtInsPt);
@@ -195,7 +196,7 @@ static void createDeclareDeallocFuncWithArg(
           /*structured=*/false, /*implicit=*/false, clause,
           boxAddrOp.getType());
   builder.create<mlir::acc::DeclareExitOp>(
-      loc, mlir::ValueRange(entryOp.getAccPtr()));
+      loc, mlir::Value{}, mlir::ValueRange(entryOp.getAccPtr()));
 
   mlir::Value varPtr;
   if constexpr (std::is_same_v<ExitOp, mlir::acc::CopyoutOp> ||
@@ -2762,7 +2763,13 @@ static void createDeclareGlobalOp(mlir::OpBuilder &modBuilder,
   EntryOp entryOp = createDataEntryOp<EntryOp>(
       builder, loc, addrOp.getResTy(), asFortran, bounds,
       /*structured=*/false, implicit, clause, addrOp.getResTy().getType());
-  builder.create<DeclareOp>(loc, mlir::ValueRange(entryOp.getAccPtr()));
+  if constexpr (std::is_same_v<DeclareOp, mlir::acc::DeclareEnterOp>)
+    builder.create<DeclareOp>(
+        loc, mlir::acc::DeclareTokenType::get(entryOp.getContext()),
+        mlir::ValueRange(entryOp.getAccPtr()));
+  else
+    builder.create<DeclareOp>(loc, mlir::Value{},
+                              mlir::ValueRange(entryOp.getAccPtr()));
   mlir::Value varPtr;
   if constexpr (std::is_same_v<GlobalOp, mlir::acc::GlobalDestructorOp>) {
     builder.create<ExitOp>(entryOp.getLoc(), entryOp.getAccPtr(), varPtr,
@@ -2812,7 +2819,8 @@ static void createDeclareAllocFunc(mlir::OpBuilder &modBuilder,
       builder, loc, boxAddrOp.getResult(), asFortran, bounds,
       /*structured=*/false, /*implicit=*/false, clause, boxAddrOp.getType());
   builder.create<mlir::acc::DeclareEnterOp>(
-      loc, mlir::ValueRange(entryOp.getAccPtr()));
+      loc, mlir::acc::DeclareTokenType::get(entryOp.getContext()),
+      mlir::ValueRange(entryOp.getAccPtr()));
 
   modBuilder.setInsertionPointAfter(registerFuncOp);
 }
@@ -2850,7 +2858,7 @@ static void createDeclareDeallocFunc(mlir::OpBuilder &modBuilder,
           boxAddrOp.getType());
 
   builder.create<mlir::acc::DeclareExitOp>(
-      loc, mlir::ValueRange(entryOp.getAccPtr()));
+      loc, mlir::Value{}, mlir::ValueRange(entryOp.getAccPtr()));
 
   mlir::Value varPtr;
   if constexpr (std::is_same_v<ExitOp, mlir::acc::CopyoutOp> ||
@@ -3092,34 +3100,37 @@ genDeclareInFunction(Fortran::lower::AbstractConverter &converter,
 
   mlir::func::FuncOp funcOp = builder.getFunction();
   auto ops = funcOp.getOps<mlir::acc::DeclareEnterOp>();
+  mlir::Value declareToken;
   if (ops.empty()) {
-    builder.create<mlir::acc::DeclareEnterOp>(loc, dataClauseOperands);
+    declareToken = builder.create<mlir::acc::DeclareEnterOp>(
+        loc, mlir::acc::DeclareTokenType::get(builder.getContext()),
+        dataClauseOperands);
   } else {
     auto declareOp = *ops.begin();
     auto newDeclareOp = builder.create<mlir::acc::DeclareEnterOp>(
-        loc, declareOp.getDataClauseOperands());
+        loc, mlir::acc::DeclareTokenType::get(builder.getContext()),
+        declareOp.getDataClauseOperands());
     newDeclareOp.getDataClauseOperandsMutable().append(dataClauseOperands);
+    declareToken = newDeclareOp.getToken();
     declareOp.erase();
   }
 
   openAccCtx.attachCleanup([&builder, loc, createEntryOperands,
                             copyEntryOperands, copyoutEntryOperands,
-                            deviceResidentEntryOperands]() {
+                            deviceResidentEntryOperands, declareToken]() {
     llvm::SmallVector<mlir::Value> operands;
     operands.append(createEntryOperands);
     operands.append(deviceResidentEntryOperands);
     operands.append(copyEntryOperands);
     operands.append(copyoutEntryOperands);
 
-    if (!operands.empty()) {
-      mlir::func::FuncOp funcOp = builder.getFunction();
-      auto ops = funcOp.getOps<mlir::acc::DeclareExitOp>();
-      if (ops.empty()) {
-        builder.create<mlir::acc::DeclareExitOp>(loc, operands);
-      } else {
-        auto declareOp = *ops.begin();
-        declareOp.getDataClauseOperandsMutable().append(operands);
-      }
+    mlir::func::FuncOp funcOp = builder.getFunction();
+    auto ops = funcOp.getOps<mlir::acc::DeclareExitOp>();
+    if (ops.empty()) {
+      builder.create<mlir::acc::DeclareExitOp>(loc, declareToken, operands);
+    } else {
+      auto declareOp = *ops.begin();
+      declareOp.getDataClauseOperandsMutable().append(operands);
     }
 
     genDataExitOperations<mlir::acc::CreateOp, mlir::acc::DeleteOp>(
diff --git a/flang/test/Lower/OpenACC/HLFIR/acc-declare.f90 b/flang/test/Lower/OpenACC/HLFIR/acc-declare.f90
index 489f81d297df..b0a78fbd5439 100644
--- a/flang/test/Lower/OpenACC/HLFIR/acc-declare.f90
+++ b/flang/test/Lower/OpenACC/HLFIR/acc-declare.f90
@@ -24,11 +24,11 @@ module acc_declare
 ! ALL: %[[BOUND:.*]] = acc.bounds   lowerbound(%{{.*}} : index) upperbound(%{{.*}} : index) extent(%{{.*}} : index) stride(%[[C1]] : index) startIdx(%[[C1]] : index)
 ! FIR: %[[COPYIN:.*]] = acc.copyin varPtr(%[[DECL]] : !fir.ref<!fir.array<100xi32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<100xi32>> {dataClause = #acc<data_clause acc_copy>, name = "a"}
 ! HLFIR: %[[COPYIN:.*]] = acc.copyin varPtr(%[[DECL]]#1 : !fir.ref<!fir.array<100xi32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<100xi32>> {dataClause = #acc<data_clause acc_copy>, name = "a"}
-! ALL: acc.declare_enter dataOperands(%[[COPYIN]] : !fir.ref<!fir.array<100xi32>>)
+! ALL: %[[TOKEN:.*]] = acc.declare_enter dataOperands(%[[COPYIN]] : !fir.ref<!fir.array<100xi32>>)
 
 ! ALL: %{{.*}}:2 = fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%{{.*}} = %{{.*}}) -> (index, i32) {
 ! ALL: }
-! ALL: acc.declare_exit dataOperands(%[[COPYIN]] : !fir.ref<!fir.array<100xi32>>)
+! ALL: acc.declare_exit token(%[[TOKEN]]) dataOperands(%[[COPYIN]] : !fir.ref<!fir.array<100xi32>>)
 ! FIR: acc.copyout accPtr(%[[COPYIN]] : !fir.ref<!fir.array<100xi32>>)   bounds(%[[BOUND]]) to varPtr(%[[DECL]] : !fir.ref<!fir.array<100xi32>>) {dataClause = #acc<data_clause acc_copy>, name = "a"}
 ! HLFIR: acc.copyout accPtr(%[[COPYIN]] : !fir.ref<!fir.array<100xi32>>) bounds(%[[BOUND]]) to varPtr(%[[DECL]]#1 : !fir.ref<!fir.array<100xi32>>) {dataClause = #acc<data_clause acc_copy>, name = "a"}
 
@@ -51,11 +51,11 @@ module acc_declare
 ! ALL: %[[BOUND:.*]] = acc.bounds   lowerbound(%{{.*}} : index) upperbound(%{{.*}} : index) extent(%{{.*}} : index) stride(%[[C1]] : index) startIdx(%[[C1]] : index)
 ! FIR: %[[CREATE:.*]] = acc.create varPtr(%[[DECL]] : !fir.ref<!fir.array<100xi32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<100xi32>> {name = "a"}
 ! HLFIR: %[[CREATE:.*]] = acc.create varPtr(%[[DECL]]#1 : !fir.ref<!fir.array<100xi32>>)   bounds(%[[BOUND]]) -> !fir.ref<!fir.array<100xi32>> {name = "a"}
-! ALL: acc.declare_enter dataOperands(%[[CREATE]] : !fir.ref<!fir.array<100xi32>>)
+! ALL: %[[TOKEN:.*]] = acc.declare_enter dataOperands(%[[CREATE]] : !fir.ref<!fir.array<100xi32>>)
 
 ! ALL: %{{.*}}:2 = fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%{{.*}} = %{{.*}}) -> (index, i32) {
 ! ALL: }
-! ALL: acc.declare_exit dataOperands(%[[CREATE]] : !fir.ref<!fir.array<100xi32>>)
+! ALL: acc.declare_exit token(%[[TOKEN]]) dataOperands(%[[CREATE]] : !fir.ref<!fir.array<100xi32>>)
 ! ALL: acc.delete accPtr(%[[CREATE]] : !fir.ref<!fir.array<100xi32>>) bounds(%[[BOUND]]) {dataClause = #acc<data_clause acc_create>, name = "a"}
 ! ALL: return
 
@@ -119,10 +119,10 @@ module acc_declare
 ! HLFIR: %[[ADECL:.*]]:2 = hlfir.declare %[[A]](%{{.*}}) {acc.declare = #acc.declare<dataClause =  acc_copyout>, uniq_name = "_QMacc_declareFacc_declare_copyoutEa"} : (!fir.ref<!fir.array<100xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100xi32>>, !fir.ref<!fir.array<100xi32>>)
 ! FIR: %[[CREATE:.*]] = acc.create varPtr(%[[ADECL]] : !fir.ref<!fir.array<100xi32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<100xi32>> {dataClause = #acc<data_clause acc_copyout>, name = "a"}
 ! HLFIR: %[[CREATE:.*]] = acc.create varPtr(%[[ADECL]]#1 : !fir.ref<!fir.array<100xi32>>)   bounds(%{{.*}}) -> !fir.ref<!fir.array<100xi32>> {dataClause = #acc<data_clause acc_copyout>, name = "a"}
-! ALL: acc.declare_enter dataOperands(%[[CREATE]] : !fir.ref<!fir.array<100xi32>>)
+! ALL: %[[TOKEN:.*]] = acc.declare_enter dataOperands(%[[CREATE]] : !fir.ref<!fir.array<100xi32>>)
 ! ALL: %{{.*}}:2 = fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%arg{{.*}} = %{{.*}}) -> (index, i32)
 
-! ALL: acc.declare_exit dataOperands(%[[CREATE]] : !fir.ref<!fir.array<100xi32>>)
+! ALL: acc.declare_exit token(%[[TOKEN]]) dataOperands(%[[CREATE]] : !fir.ref<!fir.array<100xi32>>)
 ! FIR: acc.copyout accPtr(%[[CREATE]] : !fir.ref<!fir.array<100xi32>>) bounds(%{{.*}}) to varPtr(%[[ADECL]] : !fir.ref<!fir.array<100xi32>>) {name = "a"}
 ! HLFIR: acc.copyout accPtr(%[[CREATE]] : !fir.ref<!fir.array<100xi32>>)   bounds(%{{.*}}) to varPtr(%[[ADECL]]#1 : !fir.ref<!fir.array<100xi32>>) {name = "a"}
 ! ALL: return
@@ -178,9 +178,9 @@ module acc_declare
 ! HLFIR: %[[DECL:.*]]:2 = hlfir.declare %[[ARG0]](%{{.*}}) {acc.declare = #acc.declare<dataClause =  acc_declare_device_resident>, uniq_name = "_QMacc_declareFacc_declare_device_residentEa"} : (!fir.ref<!fir.array<100xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100xi32>>, !fir.ref<!fir.array<100xi32>>)
 ! FIR: %[[DEVICERES:.*]] = acc.declare_device_resident varPtr(%[[DECL]] : !fir.ref<!fir.array<100xi32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<100xi32>> {name = "a"}
 ! HLFIR: %[[DEVICERES:.*]] = acc.declare_device_resident varPtr(%[[DECL]]#1 : !fir.ref<!fir.array<100xi32>>)   bounds(%{{.*}}) -> !fir.ref<!fir.array<100xi32>> {name = "a"}
-! ALL: acc.declare_enter dataOperands(%[[DEVICERES]] : !fir.ref<!fir.array<100xi32>>)
+! ALL: %[[TOKEN:.*]] = acc.declare_enter dataOperands(%[[DEVICERES]] : !fir.ref<!fir.array<100xi32>>)
 ! ALL: %{{.*}}:2 = fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%arg{{.*}} = %{{.*}}) -> (index, i32)
-! ALL: acc.declare_exit dataOperands(%[[DEVICERES]] : !fir.ref<!fir.array<100xi32>>)
+! ALL: acc.declare_exit token(%[[TOKEN]]) dataOperands(%[[DEVICERES]] : !fir.ref<!fir.array<100xi32>>)
 ! ALL: acc.delete accPtr(%[[DEVICERES]] : !fir.ref<!fir.array<100xi32>>) bounds(%{{.*}}) {dataClause = #acc<data_clause acc_declare_device_resident>, name = "a"}
 
   subroutine acc_declare_device_resident2()
@@ -195,8 +195,8 @@ module acc_declare
 ! HLFIR: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOCA]](%{{.*}}) {acc.declare = #acc.declare<dataClause =  acc_declare_device_resident>, uniq_name = "_QMacc_declareFacc_declare_device_resident2Edataparam"} : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100xf32>>, !fir.ref<!fir.array<100xf32>>)
 ! FIR: %[[DEVICERES:.*]] = acc.declare_device_resident varPtr(%[[DECL]] : !fir.ref<!fir.array<100xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<100xf32>> {name = "dataparam"}
 ! HLFIR: %[[DEVICERES:.*]] = acc.declare_device_resident varPtr(%[[DECL]]#1 : !fir.ref<!fir.array<100xf32>>)   bounds(%{{.*}}) -> !fir.ref<!fir.array<100xf32>> {name = "dataparam"}
-! ALL: acc.declare_enter dataOperands(%[[DEVICERES]] : !fir.ref<!fir.array<100xf32>>)
-! ALL: acc.declare_exit dataOperands(%[[DEVICERES]] : !fir.ref<!fir.array<100xf32>>)
+! ALL: %[[TOKEN:.*]] = acc.declare_enter dataOperands(%[[DEVICERES]] : !fir.ref<!fir.array<100xf32>>)
+! ALL: acc.declare_exit token(%[[TOKEN]]) dataOperands(%[[DEVICERES]] : !fir.ref<!fir.array<100xf32>>)
 ! ALL: acc.delete accPtr(%[[DEVICERES]] : !fir.ref<!fir.array<100xf32>>) bounds(%{{.*}}) {dataClause = #acc<data_clause acc_declare_device_resident>, name = "dataparam"}
 
   subroutine acc_declare_link2()
@@ -234,10 +234,10 @@ module acc_declare
 
 ! ALL-LABEL: func.func @_QMacc_declarePacc_declare_in_func() -> f32 {
 ! HLFIR: %[[DEVICE_RESIDENT:.*]] = acc.declare_device_resident varPtr(%{{.*}}#1 : !fir.ref<!fir.array<1024xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<1024xf32>> {name = "a"}
-! HLFIR: acc.declare_enter dataOperands(%[[DEVICE_RESIDENT]] : !fir.ref<!fir.array<1024xf32>>)
+! HLFIR: %[[TOKEN:.*]] = acc.declare_enter dataOperands(%[[DEVICE_RESIDENT]] : !fir.ref<!fir.array<1024xf32>>)
 
 ! HLFIR: %[[LOAD:.*]] = fir.load %{{.*}}#1 : !fir.ref<f32>
-! HLFIR: acc.declare_exit dataOperands(%[[DEVICE_RESIDENT]] : !fir.ref<!fir.array<1024xf32>>)
+! HLFIR: acc.declare_exit token(%[[TOKEN]]) dataOperands(%[[DEVICE_RESIDENT]] : !fir.ref<!fir.array<1024xf32>>)
 ! HLFIR: acc.delete accPtr(%[[DEVICE_RESIDENT]] : !fir.ref<!fir.array<1024xf32>>) bounds(%6) {dataClause = #acc<data_clause acc_declare_device_resident>, name = "a"}
 ! HLFIR: return %[[LOAD]] : f32
 ! ALL: }
@@ -254,10 +254,10 @@ module acc_declare
 ! HLFIR: %[[ALLOCA_A:.*]] = fir.alloca !fir.array<1024xf32> {bindc_name = "a", uniq_name = "_QMacc_declareFacc_declare_in_func2Ea"}
 ! HLFIR: %[[DECL_A:.*]]:2 = hlfir.declare %[[ALLOCA_A]](%{{.*}}) {acc.declare = #acc.declare<dataClause =  acc_create>, uniq_name = "_QMacc_declareFacc_declare_in_func2Ea"} : (!fir.ref<!fir.array<1024xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<1024xf32>>, !fir.ref<!fir.array<1024xf32>>)
 ! HLFIR: %[[CREATE:.*]] = acc.create varPtr(%[[DECL_A]]#1 : !fir.ref<!fir.array<1024xf32>>) bounds(%7) -> !fir.ref<!fir.array<1024xf32>> {name = "a"}
-! HLFIR: acc.declare_enter dataOperands(%[[CREATE]] : !fir.ref<!fir.array<1024xf32>>)
+! HLFIR: %[[TOKEN:.*]] = acc.declare_enter dataOperands(%[[CREATE]] : !fir.ref<!fir.array<1024xf32>>)
 ! HLFIR:   cf.br ^bb1
 ! HLFIR: ^bb1:
-! HLFIR: acc.declare_exit dataOperands(%[[CREATE]] : !fir.ref<!fir.array<1024xf32>>)
+! HLFIR: acc.declare_exit token(%[[TOKEN]]) dataOperands(%[[CREATE]] : !fir.ref<!fir.array<1024xf32>>)
 ! HLFIR: acc.delete accPtr(%[[CREATE]] : !fir.ref<!fir.array<1024xf32>>) bounds(%7) {dataClause = #acc<data_clause acc_create>, name = "a"}
 ! ALL:   return %{{.*}} : f32
 ! ALL: }
diff --git a/libcxx/include/deque b/libcxx/include/deque
index d2ddce319567..1438f1e992e2 100644
--- a/libcxx/include/deque
+++ b/libcxx/include/deque
@@ -967,7 +967,6 @@ public:
 // For more details, see the "Using libc++" documentation page or
 // the documentation for __sanitizer_annotate_contiguous_container.
 #if !defined(_LIBCPP_HAS_NO_ASAN)
-    // TODO LLVM18: Remove the special-casing
     _LIBCPP_HIDE_FROM_ABI void __annotate_double_ended_contiguous_container(
         const void* __beg,
         const void* __end,
diff --git a/libcxx/test/std/experimental/simd/test_utils.h b/libcxx/test/std/experimental/simd/test_utils.h
index a478a43a8727..65ffc736096f 100644
--- a/libcxx/test/std/experimental/simd/test_utils.h
+++ b/libcxx/test/std/experimental/simd/test_utils.h
@@ -6,8 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef TEST_UTIL_H
-#define TEST_UTIL_H
+#ifndef LIBCXX_TEST_STD_EXPERIMENTAL_SIMD_TEST_UTILS_H
+#define LIBCXX_TEST_STD_EXPERIMENTAL_SIMD_TEST_UTILS_H
 
 #include <algorithm>
 #include <array>
@@ -78,4 +78,4 @@ void assert_simd_mask_values_equal(const ex::simd_mask<T, SimdAbi>& origin_mask,
     assert(origin_mask[i] == expected_value[i]);
 }
 
-#endif // TEST_UTIL_H
+#endif // LIBCXX_TEST_STD_EXPERIMENTAL_SIMD_TEST_UTILS_H
diff --git a/libcxx/utils/libcxx/test/params.py b/libcxx/utils/libcxx/test/params.py
index e31130572df6..3fedbf972c0c 100644
--- a/libcxx/utils/libcxx/test/params.py
+++ b/libcxx/utils/libcxx/test/params.py
@@ -114,7 +114,7 @@ DEFAULT_PARAMETERS = [
         ),
         actions=lambda std: [
             AddFeature(std),
-            AddSubstitution("%{cxx_std}", re.sub("\+", "x", std)),
+            AddSubstitution("%{cxx_std}", re.sub(r"\+", "x", std)),
             AddCompileFlag(lambda cfg: getStdFlag(cfg, std)),
         ],
     ),
@@ -187,7 +187,7 @@ DEFAULT_PARAMETERS = [
                 AddFeature("stdlib={}".format(stdlib)),
                 # Also add an umbrella feature 'stdlib=libc++' for all flavors of libc++, to simplify
                 # the test suite.
-                AddFeature("stdlib=libc++") if re.match(".+-libc\+\+", stdlib) else None,
+                AddFeature("stdlib=libc++") if re.match(r".+-libc\+\+", stdlib) else None,
             ],
         ),
     ),
diff --git a/libcxxabi/CMakeLists.txt b/libcxxabi/CMakeLists.txt
index 6fd4f02c750f..efe830bd2ad6 100644
--- a/libcxxabi/CMakeLists.txt
+++ b/libcxxabi/CMakeLists.txt
@@ -88,11 +88,6 @@ else()
   set(LIBCXXABI_GENERATED_INCLUDE_DIR "${CMAKE_BINARY_DIR}/include/c++/v1")
 endif()
 
-# TODO: Remove this after branching for LLVM 15
-if(LIBCXXABI_SYSROOT OR LIBCXXABI_TARGET_TRIPLE OR LIBCXXABI_GCC_TOOLCHAIN)
-  message(WARNING "LIBCXXABI_SYSROOT, LIBCXXABI_TARGET_TRIPLE and LIBCXXABI_GCC_TOOLCHAIN are not supported anymore, please use the native CMake equivalents instead")
-endif()
-
 set(LIBCXXABI_LIBCXX_LIBRARY_PATH "" CACHE PATH "The path to libc++ library.")
 set(LIBCXXABI_LIBRARY_VERSION "1.0" CACHE STRING
 "Version of libc++abi. This will be reflected in the name of the shared \
diff --git a/libunwind/CMakeLists.txt b/libunwind/CMakeLists.txt
index 84f8ce296a74..248e888619e4 100644
--- a/libunwind/CMakeLists.txt
+++ b/libunwind/CMakeLists.txt
@@ -63,11 +63,6 @@ cmake_dependent_option(LIBUNWIND_INSTALL_SHARED_LIBRARY
   "Install the shared libunwind library." ON
   "LIBUNWIND_ENABLE_SHARED;LIBUNWIND_INSTALL_LIBRARY" OFF)
 
-# TODO: Remove this after branching for LLVM 15
-if(LIBUNWIND_SYSROOT OR LIBUNWIND_TARGET_TRIPLE OR LIBUNWIND_GCC_TOOLCHAIN)
-  message(WARNING "LIBUNWIND_SYSROOT, LIBUNWIND_TARGET_TRIPLE and LIBUNWIND_GCC_TOOLCHAIN are not supported anymore, please use the native CMake equivalents instead")
-endif()
-
 if(MINGW)
   set(LIBUNWIND_DEFAULT_TEST_CONFIG "llvm-libunwind-mingw.cfg.in")
 elseif (LIBUNWIND_ENABLE_SHARED)
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index 0477c094c0ac..6dbfa8e73064 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -247,7 +247,8 @@ private:
   void writeBuildId();
   void writePEChecksum();
   void sortSections();
-  void sortExceptionTable();
+  template <typename T> void sortExceptionTable(Chunk *first, Chunk *last);
+  void sortExceptionTables();
   void sortCRTSectionChunks(std::vector<Chunk *> &chunks);
   void addSyntheticIdata();
   void sortBySectionOrder(std::vector<Chunk *> &chunks);
@@ -751,7 +752,7 @@ void Writer::run() {
     }
     writeSections();
     prepareLoadConfig();
-    sortExceptionTable();
+    sortExceptionTables();
 
     // Fix up the alignment in the TLS Directory's characteristic field,
     // if a specific alignment value is needed
@@ -2164,41 +2165,52 @@ void Writer::writeBuildId() {
 }
 
 // Sort .pdata section contents according to PE/COFF spec 5.5.
-void Writer::sortExceptionTable() {
-  if (!firstPdata)
-    return;
-  llvm::TimeTraceScope timeScope("Sort exception table");
+template <typename T>
+void Writer::sortExceptionTable(Chunk *first, Chunk *last) {
   // We assume .pdata contains function table entries only.
   auto bufAddr = [&](Chunk *c) {
     OutputSection *os = ctx.getOutputSection(c);
     return buffer->getBufferStart() + os->getFileOff() + c->getRVA() -
            os->getRVA();
   };
-  uint8_t *begin = bufAddr(firstPdata);
-  uint8_t *end = bufAddr(lastPdata) + lastPdata->getSize();
-  if (ctx.config.machine == AMD64) {
-    struct Entry { ulittle32_t begin, end, unwind; };
-    if ((end - begin) % sizeof(Entry) != 0) {
-      fatal("unexpected .pdata size: " + Twine(end - begin) +
-            " is not a multiple of " + Twine(sizeof(Entry)));
-    }
-    parallelSort(
-        MutableArrayRef<Entry>((Entry *)begin, (Entry *)end),
-        [](const Entry &a, const Entry &b) { return a.begin < b.begin; });
-    return;
+  uint8_t *begin = bufAddr(first);
+  uint8_t *end = bufAddr(last) + last->getSize();
+  if ((end - begin) % sizeof(T) != 0) {
+    fatal("unexpected .pdata size: " + Twine(end - begin) +
+          " is not a multiple of " + Twine(sizeof(T)));
   }
-  if (ctx.config.machine == ARMNT || ctx.config.machine == ARM64) {
-    struct Entry { ulittle32_t begin, unwind; };
-    if ((end - begin) % sizeof(Entry) != 0) {
-      fatal("unexpected .pdata size: " + Twine(end - begin) +
-            " is not a multiple of " + Twine(sizeof(Entry)));
-    }
-    parallelSort(
-        MutableArrayRef<Entry>((Entry *)begin, (Entry *)end),
-        [](const Entry &a, const Entry &b) { return a.begin < b.begin; });
-    return;
+
+  parallelSort(MutableArrayRef<T>(reinterpret_cast<T *>(begin),
+                                  reinterpret_cast<T *>(end)),
+               [](const T &a, const T &b) { return a.begin < b.begin; });
+}
+
+// Sort .pdata section contents according to PE/COFF spec 5.5.
+void Writer::sortExceptionTables() {
+  llvm::TimeTraceScope timeScope("Sort exception table");
+
+  struct EntryX64 {
+    ulittle32_t begin, end, unwind;
+  };
+  struct EntryArm {
+    ulittle32_t begin, unwind;
+  };
+
+  switch (ctx.config.machine) {
+  case AMD64:
+    if (firstPdata)
+      sortExceptionTable<EntryX64>(firstPdata, lastPdata);
+    break;
+  case ARMNT:
+  case ARM64:
+    if (firstPdata)
+      sortExceptionTable<EntryArm>(firstPdata, lastPdata);
+    break;
+  default:
+    if (firstPdata)
+      lld::errs() << "warning: don't know how to handle .pdata.\n";
+    break;
   }
-  lld::errs() << "warning: don't know how to handle .pdata.\n";
 }
 
 // The CRT section contains, among other things, the array of function
diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index 4b4d7d6db93c..e83cca31105d 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -348,7 +348,7 @@ static std::string createFileLineMsg(StringRef path, unsigned line) {
 
 template <class ELFT>
 static std::string getSrcMsgAux(ObjFile<ELFT> &file, const Symbol &sym,
-                                InputSectionBase &sec, uint64_t offset) {
+                                const InputSectionBase &sec, uint64_t offset) {
   // In DWARF, functions and variables are stored to different places.
   // First, look up a function for a given offset.
   if (std::optional<DILineInfo> info = file.getDILineInfo(&sec, offset))
@@ -363,7 +363,7 @@ static std::string getSrcMsgAux(ObjFile<ELFT> &file, const Symbol &sym,
   return std::string(file.sourceFile);
 }
 
-std::string InputFile::getSrcMsg(const Symbol &sym, InputSectionBase &sec,
+std::string InputFile::getSrcMsg(const Symbol &sym, const InputSectionBase &sec,
                                  uint64_t offset) {
   if (kind() != ObjKind)
     return "";
@@ -474,8 +474,8 @@ ObjFile<ELFT>::getVariableLoc(StringRef name) {
 // Returns source line information for a given offset
 // using DWARF debug info.
 template <class ELFT>
-std::optional<DILineInfo> ObjFile<ELFT>::getDILineInfo(InputSectionBase *s,
-                                                       uint64_t offset) {
+std::optional<DILineInfo>
+ObjFile<ELFT>::getDILineInfo(const InputSectionBase *s, uint64_t offset) {
   // Detect SectionIndex for specified section.
   uint64_t sectionIndex = object::SectionedAddress::UndefSection;
   ArrayRef<InputSectionBase *> sections = s->file->getSections();
@@ -1822,7 +1822,7 @@ template <class ELFT> void ObjFile<ELFT>::parseLazy() {
   }
 }
 
-bool InputFile::shouldExtractForCommon(StringRef name) {
+bool InputFile::shouldExtractForCommon(StringRef name) const {
   if (isa<BitcodeFile>(this))
     return isBitcodeNonCommonDef(mb, name, archiveName);
 
diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h
index 41c2ba4e307b..ab98d78fcf14 100644
--- a/lld/ELF/InputFiles.h
+++ b/lld/ELF/InputFiles.h
@@ -101,7 +101,7 @@ public:
 
   // Check if a non-common symbol should be extracted to override a common
   // definition.
-  bool shouldExtractForCommon(StringRef name);
+  bool shouldExtractForCommon(StringRef name) const;
 
   // .got2 in the current file. This is used by PPC32 -fPIC/-fPIE to compute
   // offsets in PLT call stubs.
@@ -133,7 +133,7 @@ public:
   // True if this is an argument for --just-symbols. Usually false.
   bool justSymbols = false;
 
-  std::string getSrcMsg(const Symbol &sym, InputSectionBase &sec,
+  std::string getSrcMsg(const Symbol &sym, const InputSectionBase &sec,
                         uint64_t offset);
 
   // On PPC64 we need to keep track of which files contain small code model
@@ -255,7 +255,8 @@ public:
     return getSymbol(symIndex);
   }
 
-  std::optional<llvm::DILineInfo> getDILineInfo(InputSectionBase *, uint64_t);
+  std::optional<llvm::DILineInfo> getDILineInfo(const InputSectionBase *,
+                                                uint64_t);
   std::optional<std::pair<std::string, unsigned>>
   getVariableLoc(StringRef name);
 
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index e4ce050a789d..e6942a928787 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -242,7 +242,8 @@ InputSection *InputSectionBase::getLinkOrderDep() const {
 }
 
 // Find a symbol that encloses a given location.
-Defined *InputSectionBase::getEnclosingSymbol(uint64_t offset, uint8_t type) {
+Defined *InputSectionBase::getEnclosingSymbol(uint64_t offset,
+                                              uint8_t type) const {
   for (Symbol *b : file->getSymbols())
     if (Defined *d = dyn_cast<Defined>(b))
       if (d->section == this && d->value <= offset &&
@@ -252,7 +253,7 @@ Defined *InputSectionBase::getEnclosingSymbol(uint64_t offset, uint8_t type) {
 }
 
 // Returns an object file location string. Used to construct an error message.
-std::string InputSectionBase::getLocation(uint64_t offset) {
+std::string InputSectionBase::getLocation(uint64_t offset) const {
   std::string secAndOffset =
       (name + "+0x" + Twine::utohexstr(offset) + ")").str();
 
@@ -273,7 +274,8 @@ std::string InputSectionBase::getLocation(uint64_t offset) {
 //   foo.c:42 (/home/alice/possibly/very/long/path/foo.c:42)
 //
 //  Returns an empty string if there's no way to get line info.
-std::string InputSectionBase::getSrcMsg(const Symbol &sym, uint64_t offset) {
+std::string InputSectionBase::getSrcMsg(const Symbol &sym,
+                                        uint64_t offset) const {
   return file->getSrcMsg(sym, *this, offset);
 }
 
@@ -286,7 +288,7 @@ std::string InputSectionBase::getSrcMsg(const Symbol &sym, uint64_t offset) {
 // or
 //
 //   path/to/foo.o:(function bar) in archive path/to/bar.a
-std::string InputSectionBase::getObjMsg(uint64_t off) {
+std::string InputSectionBase::getObjMsg(uint64_t off) const {
   std::string filename = std::string(file->getName());
 
   std::string archive;
diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h
index 7570901b4ef9..fbaea57bd586 100644
--- a/lld/ELF/InputSection.h
+++ b/lld/ELF/InputSection.h
@@ -191,15 +191,15 @@ public:
 
   // Get a symbol that encloses this offset from within the section. If type is
   // not zero, return a symbol with the specified type.
-  Defined *getEnclosingSymbol(uint64_t offset, uint8_t type = 0);
-  Defined *getEnclosingFunction(uint64_t offset) {
+  Defined *getEnclosingSymbol(uint64_t offset, uint8_t type = 0) const;
+  Defined *getEnclosingFunction(uint64_t offset) const {
     return getEnclosingSymbol(offset, llvm::ELF::STT_FUNC);
   }
 
   // Returns a source location string. Used to construct an error message.
-  std::string getLocation(uint64_t offset);
-  std::string getSrcMsg(const Symbol &sym, uint64_t offset);
-  std::string getObjMsg(uint64_t offset);
+  std::string getLocation(uint64_t offset) const;
+  std::string getSrcMsg(const Symbol &sym, uint64_t offset) const;
+  std::string getObjMsg(uint64_t offset) const;
 
   // Each section knows how to relocate itself. These functions apply
   // relocations, assuming that Buf points to this section's copy in
diff --git a/lld/test/ELF/lto/arm.ll b/lld/test/ELF/lto/arm.ll
new file mode 100644
index 000000000000..00d08658adfe
--- /dev/null
+++ b/lld/test/ELF/lto/arm.ll
@@ -0,0 +1,59 @@
+; REQUIRES: arm
+;; Test we can infer the e_machine value EM_ARM from a bitcode file.
+
+; RUN: split-file %s %t
+
+; RUN: llvm-as %t/arm.ll -o %t/arm.o
+; RUN: ld.lld %t/arm.o -o %t/arm
+; RUN: llvm-readobj -h %t/arm | FileCheck %s --check-prefix=ARM
+; RUN: llvm-as %t/armeb.ll -o %t/armeb.o
+; RUN: not ld.lld %t/armeb.o -o %t/armeb
+
+; RUN: llvm-as %t/thumb.ll -o %t/thumb.o
+; RUN: ld.lld %t/thumb.o -o %t/thumb
+; RUN: llvm-readobj -h %t/thumb | FileCheck %s --check-prefix=THUMB
+; RUN: llvm-as %t/thumbeb.ll -o %t/thumbeb.o
+; RUN: not ld.lld %t/thumbeb.o -o %t/thumbeb
+
+; ARM:       Class: 32-bit
+; ARM:       DataEncoding: LittleEndian
+; ARM:     Machine: EM_ARM (
+; ARMEB:     Class: 32-bit
+; ARMEB:     DataEncoding: BigEndian
+; ARMEB:   Machine: EM_ARM (
+
+; THUMB:     Class: 32-bit
+; THUMB:     DataEncoding: LittleEndian
+; THUMB:   Machine: EM_ARM (
+
+;--- arm.ll
+target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv7-linux-musleabi"
+
+define void @_start() {
+  ret void
+}
+
+;--- thumb.ll
+target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv8m.base-none-unknown-gnueabi"
+
+define void @_start() {
+  ret void
+}
+
+;--- armeb.ll
+target datalayout = "E-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armebv7-unknown-linux-musleabi"
+
+define void @_start() {
+  ret void
+}
+
+;--- thumbeb.ll
+target datalayout = "E-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbebv8m.base-none-unknown-gnueabi"
+
+define void @_start() {
+  ret void
+}
diff --git a/lldb/include/lldb/Expression/UserExpression.h b/lldb/include/lldb/Expression/UserExpression.h
index df7a76664f6d..b6cfeec7e899 100644
--- a/lldb/include/lldb/Expression/UserExpression.h
+++ b/lldb/include/lldb/Expression/UserExpression.h
@@ -192,6 +192,14 @@ public:
   /// expression.  Text() should contain the definition of this function.
   const char *FunctionName() override { return "$__lldb_expr"; }
 
+  /// Returns whether the call to Parse on this user expression is cacheable.
+  /// This function exists to provide an escape hatch for supporting languages
+  /// where parsing an expression in the exact same context is unsafe. For
+  /// example, languages where generic functions aren't monomorphized, but
+  /// implement some other mechanism to represent generic values, may be unsafe
+  /// to cache, as the concrete type substitution may be different in every
+  /// expression evaluation.
+  virtual bool IsParseCacheable() { return true; }
   /// Return the language that should be used when parsing.  To use the
   /// default, return eLanguageTypeUnknown.
   lldb::LanguageType Language() const override { return m_language; }
diff --git a/lldb/source/Breakpoint/BreakpointLocation.cpp b/lldb/source/Breakpoint/BreakpointLocation.cpp
index 27dc7458dc26..931e1ad4b2d9 100644
--- a/lldb/source/Breakpoint/BreakpointLocation.cpp
+++ b/lldb/source/Breakpoint/BreakpointLocation.cpp
@@ -250,6 +250,7 @@ bool BreakpointLocation::ConditionSaysStop(ExecutionContext &exe_ctx,
   DiagnosticManager diagnostics;
 
   if (condition_hash != m_condition_hash || !m_user_expression_sp ||
+      !m_user_expression_sp->IsParseCacheable() ||
       !m_user_expression_sp->MatchesContext(exe_ctx)) {
     LanguageType language = eLanguageTypeUnknown;
     // See if we can figure out the language from the frame, otherwise use the
diff --git a/lldb/source/Commands/CommandObjectTarget.cpp b/lldb/source/Commands/CommandObjectTarget.cpp
index 8f052d0a7b83..58785cde3ec7 100644
--- a/lldb/source/Commands/CommandObjectTarget.cpp
+++ b/lldb/source/Commands/CommandObjectTarget.cpp
@@ -436,8 +436,7 @@ protected:
           error = process_sp->LoadCore();
 
           if (error.Fail()) {
-            result.AppendError(
-                error.AsCString("can't find plug-in for core file"));
+            result.AppendError(error.AsCString("unknown core file format"));
             return;
           } else {
             result.AppendMessageWithFormatv(
@@ -447,9 +446,8 @@ protected:
             on_error.release();
           }
         } else {
-          result.AppendErrorWithFormatv(
-              "Unable to find process plug-in for core file '{0}'\n",
-              core_file.GetPath());
+          result.AppendErrorWithFormatv("Unknown core file format '{0}'\n",
+                                        core_file.GetPath());
         }
       } else {
         result.AppendMessageWithFormat(
diff --git a/lldb/source/Core/IOHandlerCursesGUI.cpp b/lldb/source/Core/IOHandlerCursesGUI.cpp
index 22b8cc3582ea..abf0b6b801f3 100644
--- a/lldb/source/Core/IOHandlerCursesGUI.cpp
+++ b/lldb/source/Core/IOHandlerCursesGUI.cpp
@@ -3178,13 +3178,13 @@ public:
         m_debugger.GetListener(), llvm::StringRef(), &core_file_spec, false));
 
     if (!process_sp) {
-      SetError("Unable to find process plug-in for core file!");
+      SetError("Unknown core file format!");
       return;
     }
 
     Status status = process_sp->LoadCore();
     if (status.Fail()) {
-      SetError("Can't find plug-in for core file!");
+      SetError("Unknown core file format!");
       return;
     }
   }
diff --git a/lldb/test/API/commands/target/basic/TestTargetCommand.py b/lldb/test/API/commands/target/basic/TestTargetCommand.py
index 96e7fe86ac5a..cb7a5f33f664 100644
--- a/lldb/test/API/commands/target/basic/TestTargetCommand.py
+++ b/lldb/test/API/commands/target/basic/TestTargetCommand.py
@@ -447,7 +447,7 @@ class targetCommandTestCase(TestBase):
         self.expect(
             "target create -c '" + invalid_core_path + "'",
             error=True,
-            substrs=["Unable to find process plug-in for core file '"],
+            substrs=["Unknown core file format '"],
         )
 
     # Write only files don't seem to be supported on Windows.
diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake
index 05e4d6c823c0..0fa52150095e 100644
--- a/llvm/cmake/modules/AddLLVM.cmake
+++ b/llvm/cmake/modules/AddLLVM.cmake
@@ -1307,6 +1307,7 @@ if(NOT LLVM_TOOLCHAIN_TOOLS)
     llvm-ar
     llvm-cov
     llvm-cxxfilt
+    llvm-dlltool
     llvm-dwp
     llvm-ranlib
     llvm-lib
diff --git a/llvm/docs/CommandGuide/llvm-exegesis.rst b/llvm/docs/CommandGuide/llvm-exegesis.rst
index f44f3a74ffbe..874bae82a102 100644
--- a/llvm/docs/CommandGuide/llvm-exegesis.rst
+++ b/llvm/docs/CommandGuide/llvm-exegesis.rst
@@ -37,14 +37,14 @@ SUPPORTED PLATFORMS
 only), MIPS, and PowerPC (PowerPC64LE only) on Linux for benchmarking. Not all
 benchmarking functionality is guaranteed to work on every platform.
 :program:`llvm-exegesis` also has a separate analysis mode that is supported
-on every platform on which LLVM is.
+on every platform that LLVM is.
 
 SNIPPET ANNOTATIONS
 -------------------
 
 :program:`llvm-exegesis` supports benchmarking arbitrary snippets of assembly.
 However, benchmarking these snippets often requires some setup so that they
-can execute properly. :program:`llvm-exegesis` has two annotations and some
+can execute properly. :program:`llvm-exegesis` has four annotations and some
 additional utilities to help with setup so that snippets can be benchmarked
 properly.
 
@@ -69,10 +69,11 @@ properly.
   specifying memory definitions that can later be mapped into the execution
   process of a snippet with the `LLVM-EXEGESIS-MEM-MAP` annotation. Each
   value is named using the `<value name>` argument so that it can be referenced
-  later within a map annotation. The size is specified in bytes the the value
-  is taken in hexadecimal. If the size of the value is less than the specified
-  size, the value will be repeated until it fills the entire section of memory.
-  Using this annotation requires using the subprocess execution mode.
+  later within a map annotation. The size is specified in a decimal number of
+  bytes and the value is given in hexadecimal. If the size of the value is less
+  than the specified size, the value will be repeated until it fills the entire
+  section of memory. Using this annotation requires using the subprocess execution
+  mode.
 * `LLVM-EXEGESIS-MEM-MAP <value name> <address>` - This annotation allows for
   mapping previously defined memory definitions into the execution context of a
   process. The value name refers to a previously defined memory definition and
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index ff767d951c77..eccac6658807 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -102,6 +102,8 @@ Changes to the AMDGPU Backend
 
 * Implemented :ref:`llvm.get.rounding <int_get_rounding>`
 
+* Added support for Cortex-A520, Cortex-A720 and Cortex-X4 CPUs.
+
 Changes to the ARM Backend
 --------------------------
 
diff --git a/llvm/include/llvm-c/Orc.h b/llvm/include/llvm-c/Orc.h
index a40b17b712fb..9a57c2edfd54 100644
--- a/llvm/include/llvm-c/Orc.h
+++ b/llvm/include/llvm-c/Orc.h
@@ -508,7 +508,7 @@ void LLVMOrcSymbolStringPoolClearDeadEntries(LLVMOrcSymbolStringPoolRef SSP);
  * Intern a string in the ExecutionSession's SymbolStringPool and return a
  * reference to it. This increments the ref-count of the pool entry, and the
  * returned value should be released once the client is done with it by
- * calling LLVMOrReleaseSymbolStringPoolEntry.
+ * calling LLVMOrcReleaseSymbolStringPoolEntry.
  *
  * Since strings are uniqued within the SymbolStringPool
  * LLVMOrcSymbolStringPoolEntryRefs can be compared by value to test string
diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index 234c05f48d8e..4f1237c4b1f9 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -943,6 +943,10 @@ public:
   /// def-use chain linking it to a loop.
   void forgetValue(Value *V);
 
+  /// Forget LCSSA phi node V of loop L to which a new predecessor was added,
+  /// such that it may no longer be trivial.
+  void forgetLcssaPhiWithNewPredecessor(Loop *L, PHINode *V);
+
   /// Called when the client has changed the disposition of values in
   /// this loop.
   ///
diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h
index 1fe054316b75..3d9ee1c33e94 100644
--- a/llvm/include/llvm/IR/DebugInfoMetadata.h
+++ b/llvm/include/llvm/IR/DebugInfoMetadata.h
@@ -128,6 +128,8 @@ public:
 /// A metadata node with a DWARF tag (i.e., a constant named \c DW_TAG_*,
 /// defined in llvm/BinaryFormat/Dwarf.h).  Called \a DINode because it's
 /// potentially used for non-DWARF output.
+///
+/// Uses the SubclassData16 Metadata slot.
 class DINode : public MDNode {
   friend class LLVMContextImpl;
   friend class MDNode;
@@ -227,6 +229,8 @@ public:
 /// (possibly empty) null-separated \a MDString header that contains arbitrary
 /// fields.  The remaining operands are \a dwarf_operands(), and are pointers
 /// to other metadata.
+///
+/// Uses the SubclassData32 Metadata slot.
 class GenericDINode : public DINode {
   friend class LLVMContextImpl;
   friend class MDNode;
@@ -695,12 +699,13 @@ std::optional<StringRef> DIScope::getSource() const {
 /// TODO: Remove the hardcoded name and context, since many types don't use
 /// them.
 /// TODO: Split up flags.
+///
+/// Uses the SubclassData32 Metadata slot.
 class DIType : public DIScope {
   unsigned Line;
   DIFlags Flags;
   uint64_t SizeInBits;
   uint64_t OffsetInBits;
-  uint32_t AlignInBits;
 
 protected:
   DIType(LLVMContext &C, unsigned ID, StorageType Storage, unsigned Tag,
@@ -716,7 +721,7 @@ protected:
     this->Line = Line;
     this->Flags = Flags;
     this->SizeInBits = SizeInBits;
-    this->AlignInBits = AlignInBits;
+    this->SubclassData32 = AlignInBits;
     this->OffsetInBits = OffsetInBits;
   }
 
@@ -735,7 +740,7 @@ public:
 
   unsigned getLine() const { return Line; }
   uint64_t getSizeInBits() const { return SizeInBits; }
-  uint32_t getAlignInBits() const { return AlignInBits; }
+  uint32_t getAlignInBits() const { return SubclassData32; }
   uint32_t getAlignInBytes() const { return getAlignInBits() / CHAR_BIT; }
   uint64_t getOffsetInBits() const { return OffsetInBits; }
   DIFlags getFlags() const { return Flags; }
@@ -1389,13 +1394,13 @@ public:
 
 private:
   unsigned SourceLanguage;
-  bool IsOptimized;
   unsigned RuntimeVersion;
-  unsigned EmissionKind;
   uint64_t DWOId;
+  unsigned EmissionKind;
+  unsigned NameTableKind;
+  bool IsOptimized;
   bool SplitDebugInlining;
   bool DebugInfoForProfiling;
-  unsigned NameTableKind;
   bool RangesBaseAddress;
 
   DICompileUnit(LLVMContext &C, StorageType Storage, unsigned SourceLanguage,
@@ -1876,6 +1881,10 @@ public:
 /// Debug location.
 ///
 /// A debug location in source code, used for debug info and otherwise.
+///
+/// Uses the SubclassData1, SubclassData16 and SubclassData32
+/// Metadata slots.
+
 class DILocation : public MDNode {
   friend class LLVMContextImpl;
   friend class MDNode;
@@ -2161,17 +2170,20 @@ public:
   }
 };
 
+/// Debug lexical block.
+///
+/// Uses the SubclassData32 Metadata slot.
 class DILexicalBlock : public DILexicalBlockBase {
   friend class LLVMContextImpl;
   friend class MDNode;
 
-  unsigned Line;
   uint16_t Column;
 
   DILexicalBlock(LLVMContext &C, StorageType Storage, unsigned Line,
                  unsigned Column, ArrayRef<Metadata *> Ops)
-      : DILexicalBlockBase(C, DILexicalBlockKind, Storage, Ops), Line(Line),
+      : DILexicalBlockBase(C, DILexicalBlockKind, Storage, Ops),
         Column(Column) {
+    SubclassData32 = Line;
     assert(Column < (1u << 16) && "Expected 16-bit column");
   }
   ~DILexicalBlock() = default;
@@ -2206,7 +2218,7 @@ public:
 
   TempDILexicalBlock clone() const { return cloneImpl(); }
 
-  unsigned getLine() const { return Line; }
+  unsigned getLine() const { return SubclassData32; }
   unsigned getColumn() const { return Column; }
 
   static bool classof(const Metadata *MD) {
@@ -2218,12 +2230,11 @@ class DILexicalBlockFile : public DILexicalBlockBase {
   friend class LLVMContextImpl;
   friend class MDNode;
 
-  unsigned Discriminator;
-
   DILexicalBlockFile(LLVMContext &C, StorageType Storage,
                      unsigned Discriminator, ArrayRef<Metadata *> Ops)
-      : DILexicalBlockBase(C, DILexicalBlockFileKind, Storage, Ops),
-        Discriminator(Discriminator) {}
+      : DILexicalBlockBase(C, DILexicalBlockFileKind, Storage, Ops) {
+    SubclassData32 = Discriminator;
+  }
   ~DILexicalBlockFile() = default;
 
   static DILexicalBlockFile *getImpl(LLVMContext &Context, DILocalScope *Scope,
@@ -2255,7 +2266,7 @@ public:
                     (Scope, File, Discriminator))
 
   TempDILexicalBlockFile clone() const { return cloneImpl(); }
-  unsigned getDiscriminator() const { return Discriminator; }
+  unsigned getDiscriminator() const { return SubclassData32; }
 
   static bool classof(const Metadata *MD) {
     return MD->getMetadataID() == DILexicalBlockFileKind;
@@ -2338,12 +2349,13 @@ DILocation::cloneByMultiplyingDuplicationFactor(unsigned DF) const {
   return std::nullopt;
 }
 
+/// Debug lexical block.
+///
+/// Uses the SubclassData1 Metadata slot.
 class DINamespace : public DIScope {
   friend class LLVMContextImpl;
   friend class MDNode;
 
-  unsigned ExportSymbols : 1;
-
   DINamespace(LLVMContext &Context, StorageType Storage, bool ExportSymbols,
               ArrayRef<Metadata *> Ops);
   ~DINamespace() = default;
@@ -2373,7 +2385,7 @@ public:
 
   TempDINamespace clone() const { return cloneImpl(); }
 
-  bool getExportSymbols() const { return ExportSymbols; }
+  bool getExportSymbols() const { return SubclassData1; }
   DIScope *getScope() const { return cast_or_null<DIScope>(getRawScope()); }
   StringRef getName() const { return getStringOperand(2); }
 
@@ -2387,11 +2399,11 @@ public:
 
 /// Represents a module in the programming language, for example, a Clang
 /// module, or a Fortran module.
+///
+/// Uses the SubclassData1 and SubclassData32 Metadata slots.
 class DIModule : public DIScope {
   friend class LLVMContextImpl;
   friend class MDNode;
-  unsigned LineNo;
-  bool IsDecl;
 
   DIModule(LLVMContext &Context, StorageType Storage, unsigned LineNo,
            bool IsDecl, ArrayRef<Metadata *> Ops);
@@ -2443,8 +2455,8 @@ public:
   StringRef getConfigurationMacros() const { return getStringOperand(3); }
   StringRef getIncludePath() const { return getStringOperand(4); }
   StringRef getAPINotesFile() const { return getStringOperand(5); }
-  unsigned getLineNo() const { return LineNo; }
-  bool getIsDecl() const { return IsDecl; }
+  unsigned getLineNo() const { return SubclassData32; }
+  bool getIsDecl() const { return SubclassData1; }
 
   Metadata *getRawScope() const { return getOperand(1); }
   MDString *getRawName() const { return getOperandAs<MDString>(2); }
@@ -2460,13 +2472,15 @@ public:
 };
 
 /// Base class for template parameters.
+///
+/// Uses the SubclassData1 Metadata slot.
 class DITemplateParameter : public DINode {
 protected:
-  bool IsDefault;
-
   DITemplateParameter(LLVMContext &Context, unsigned ID, StorageType Storage,
                       unsigned Tag, bool IsDefault, ArrayRef<Metadata *> Ops)
-      : DINode(Context, ID, Storage, Tag, Ops), IsDefault(IsDefault) {}
+      : DINode(Context, ID, Storage, Tag, Ops) {
+    SubclassData1 = IsDefault;
+  }
   ~DITemplateParameter() = default;
 
 public:
@@ -2475,7 +2489,7 @@ public:
 
   MDString *getRawName() const { return getOperandAs<MDString>(0); }
   Metadata *getRawType() const { return getOperand(1); }
-  bool isDefault() const { return IsDefault; }
+  bool isDefault() const { return SubclassData1; }
 
   static bool classof(const Metadata *MD) {
     return MD->getMetadataID() == DITemplateTypeParameterKind ||
@@ -2572,9 +2586,10 @@ public:
 };
 
 /// Base class for variables.
+///
+/// Uses the SubclassData32 Metadata slot.
 class DIVariable : public DINode {
   unsigned Line;
-  uint32_t AlignInBits;
 
 protected:
   DIVariable(LLVMContext &C, unsigned ID, StorageType Storage, signed Line,
@@ -2587,7 +2602,7 @@ public:
   StringRef getName() const { return getStringOperand(1); }
   DIFile *getFile() const { return cast_or_null<DIFile>(getRawFile()); }
   DIType *getType() const { return cast_or_null<DIType>(getRawType()); }
-  uint32_t getAlignInBits() const { return AlignInBits; }
+  uint32_t getAlignInBits() const { return SubclassData32; }
   uint32_t getAlignInBytes() const { return getAlignInBits() / CHAR_BIT; }
   /// Determines the size of the variable's type.
   std::optional<uint64_t> getSizeInBits() const;
@@ -3161,9 +3176,10 @@ public:
   }
 };
 
+/// Debug common block.
+///
+/// Uses the SubclassData32 Metadata slot.
 class DICommonBlock : public DIScope {
-  unsigned LineNo;
-
   friend class LLVMContextImpl;
   friend class MDNode;
 
@@ -3205,7 +3221,7 @@ public:
   }
   StringRef getName() const { return getStringOperand(2); }
   DIFile *getFile() const { return cast_or_null<DIFile>(getRawFile()); }
-  unsigned getLineNo() const { return LineNo; }
+  unsigned getLineNo() const { return SubclassData32; }
 
   Metadata *getRawScope() const { return getOperand(0); }
   Metadata *getRawDecl() const { return getOperand(1); }
@@ -3310,12 +3326,11 @@ public:
 
 /// Label.
 ///
+/// Uses the SubclassData32 Metadata slot.
 class DILabel : public DINode {
   friend class LLVMContextImpl;
   friend class MDNode;
 
-  unsigned Line;
-
   DILabel(LLVMContext &C, StorageType Storage, unsigned Line,
           ArrayRef<Metadata *> Ops);
   ~DILabel() = default;
@@ -3353,7 +3368,7 @@ public:
   DILocalScope *getScope() const {
     return cast_or_null<DILocalScope>(getRawScope());
   }
-  unsigned getLine() const { return Line; }
+  unsigned getLine() const { return SubclassData32; }
   StringRef getName() const { return getStringOperand(1); }
   DIFile *getFile() const { return cast_or_null<DIFile>(getRawFile()); }
 
@@ -3455,15 +3470,17 @@ public:
 };
 
 /// An imported module (C++ using directive or similar).
+///
+/// Uses the SubclassData32 Metadata slot.
 class DIImportedEntity : public DINode {
   friend class LLVMContextImpl;
   friend class MDNode;
 
-  unsigned Line;
-
   DIImportedEntity(LLVMContext &C, StorageType Storage, unsigned Tag,
                    unsigned Line, ArrayRef<Metadata *> Ops)
-      : DINode(C, DIImportedEntityKind, Storage, Tag, Ops), Line(Line) {}
+      : DINode(C, DIImportedEntityKind, Storage, Tag, Ops) {
+    SubclassData32 = Line;
+  }
   ~DIImportedEntity() = default;
 
   static DIImportedEntity *getImpl(LLVMContext &Context, unsigned Tag,
@@ -3499,7 +3516,7 @@ public:
 
   TempDIImportedEntity clone() const { return cloneImpl(); }
 
-  unsigned getLine() const { return Line; }
+  unsigned getLine() const { return SubclassData32; }
   DIScope *getScope() const { return cast_or_null<DIScope>(getRawScope()); }
   DINode *getEntity() const { return cast_or_null<DINode>(getRawEntity()); }
   StringRef getName() const { return getStringOperand(2); }
@@ -3567,6 +3584,8 @@ public:
 /// \c DW_MACINFO_*, defined in llvm/BinaryFormat/Dwarf.h).  Called \a
 /// DIMacroNode
 /// because it's potentially used for non-DWARF output.
+///
+/// Uses the SubclassData16 Metadata slot.
 class DIMacroNode : public MDNode {
   friend class LLVMContextImpl;
   friend class MDNode;
@@ -3611,15 +3630,18 @@ public:
   }
 };
 
+/// Macro
+///
+/// Uses the SubclassData32 Metadata slot.
 class DIMacro : public DIMacroNode {
   friend class LLVMContextImpl;
   friend class MDNode;
 
-  unsigned Line;
-
   DIMacro(LLVMContext &C, StorageType Storage, unsigned MIType, unsigned Line,
           ArrayRef<Metadata *> Ops)
-      : DIMacroNode(C, DIMacroKind, Storage, MIType, Ops), Line(Line) {}
+      : DIMacroNode(C, DIMacroKind, Storage, MIType, Ops) {
+    SubclassData32 = Line;
+  }
   ~DIMacro() = default;
 
   static DIMacro *getImpl(LLVMContext &Context, unsigned MIType, unsigned Line,
@@ -3649,7 +3671,7 @@ public:
 
   TempDIMacro clone() const { return cloneImpl(); }
 
-  unsigned getLine() const { return Line; }
+  unsigned getLine() const { return SubclassData32; }
 
   StringRef getName() const { return getStringOperand(0); }
   StringRef getValue() const { return getStringOperand(1); }
@@ -3662,15 +3684,18 @@ public:
   }
 };
 
+/// Macro file
+///
+/// Uses the SubclassData32 Metadata slot.
 class DIMacroFile : public DIMacroNode {
   friend class LLVMContextImpl;
   friend class MDNode;
 
-  unsigned Line;
-
   DIMacroFile(LLVMContext &C, StorageType Storage, unsigned MIType,
               unsigned Line, ArrayRef<Metadata *> Ops)
-      : DIMacroNode(C, DIMacroFileKind, Storage, MIType, Ops), Line(Line) {}
+      : DIMacroNode(C, DIMacroFileKind, Storage, MIType, Ops) {
+    SubclassData32 = Line;
+  }
   ~DIMacroFile() = default;
 
   static DIMacroFile *getImpl(LLVMContext &Context, unsigned MIType,
@@ -3711,7 +3736,7 @@ public:
     replaceOperandWith(1, Elements.get());
   }
 
-  unsigned getLine() const { return Line; }
+  unsigned getLine() const { return SubclassData32; }
   DIFile *getFile() const { return cast_or_null<DIFile>(getRawFile()); }
 
   DIMacroNodeArray getElements() const {
diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
index 4505f6e8c5da..c74b7b80749e 100644
--- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h
+++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
@@ -426,6 +426,11 @@ inline constexpr CpuInfo CpuInfos[] = {
           AArch64::AEK_PAUTH, AArch64::AEK_MTE, AArch64::AEK_SSBS,
           AArch64::AEK_SVE, AArch64::AEK_SVE2, AArch64::AEK_SVE2BITPERM,
           AArch64::AEK_FP16FML}))},
+    {"cortex-a520", ARMV9_2A,
+     (AArch64::ExtensionBitset(
+         {AArch64::AEK_SB, AArch64::AEK_SSBS, AArch64::AEK_MTE,
+          AArch64::AEK_FP16FML, AArch64::AEK_PAUTH, AArch64::AEK_SVE2BITPERM,
+          AArch64::AEK_FLAGM, AArch64::AEK_PERFMON, AArch64::AEK_PREDRES}))},
     {"cortex-a57", ARMV8A,
      (AArch64::ExtensionBitset(
          {AArch64::AEK_AES, AArch64::AEK_SHA2, AArch64::AEK_CRC}))},
@@ -483,6 +488,12 @@ inline constexpr CpuInfo CpuInfos[] = {
           AArch64::AEK_I8MM, AArch64::AEK_PREDRES, AArch64::AEK_PERFMON,
           AArch64::AEK_PROFILE, AArch64::AEK_SVE, AArch64::AEK_SVE2BITPERM,
           AArch64::AEK_BF16, AArch64::AEK_FLAGM}))},
+    {"cortex-a720", ARMV9_2A,
+     (AArch64::ExtensionBitset(
+         {AArch64::AEK_SB, AArch64::AEK_SSBS, AArch64::AEK_MTE,
+          AArch64::AEK_FP16FML, AArch64::AEK_PAUTH, AArch64::AEK_SVE2BITPERM,
+          AArch64::AEK_FLAGM, AArch64::AEK_PERFMON, AArch64::AEK_PREDRES,
+          AArch64::AEK_PROFILE}))},
     {"cortex-r82", ARMV8R,
      (AArch64::ExtensionBitset({AArch64::AEK_LSE}))},
     {"cortex-x1", ARMV8_2A,
@@ -508,6 +519,12 @@ inline constexpr CpuInfo CpuInfos[] = {
           AArch64::AEK_SVE2BITPERM, AArch64::AEK_SB, AArch64::AEK_PAUTH,
           AArch64::AEK_FP16, AArch64::AEK_FP16FML, AArch64::AEK_PREDRES,
           AArch64::AEK_FLAGM, AArch64::AEK_SSBS}))},
+    {"cortex-x4", ARMV9_2A,
+     (AArch64::ExtensionBitset(
+         {AArch64::AEK_SB, AArch64::AEK_SSBS, AArch64::AEK_MTE,
+          AArch64::AEK_FP16FML, AArch64::AEK_PAUTH, AArch64::AEK_SVE2BITPERM,
+          AArch64::AEK_FLAGM, AArch64::AEK_PERFMON, AArch64::AEK_PREDRES,
+          AArch64::AEK_PROFILE}))},
     {"neoverse-e1", ARMV8_2A,
      (AArch64::ExtensionBitset(
          {AArch64::AEK_AES, AArch64::AEK_SHA2, AArch64::AEK_DOTPROD,
diff --git a/llvm/lib/Analysis/AssumptionCache.cpp b/llvm/lib/Analysis/AssumptionCache.cpp
index bbcf25edf315..81b26678ae5d 100644
--- a/llvm/lib/Analysis/AssumptionCache.cpp
+++ b/llvm/lib/Analysis/AssumptionCache.cpp
@@ -94,13 +94,7 @@ findAffectedValues(CallBase *CI, TargetTransformInfo *TTI,
     if (Pred == ICmpInst::ICMP_EQ) {
       // For equality comparisons, we handle the case of bit inversion.
       auto AddAffectedFromEq = [&AddAffected](Value *V) {
-        Value *A;
-        if (match(V, m_Not(m_Value(A)))) {
-          AddAffected(A);
-          V = A;
-        }
-
-        Value *B;
+        Value *A, *B;
         // (A & B) or (A | B) or (A ^ B).
         if (match(V, m_BitwiseLogic(m_Value(A), m_Value(B)))) {
           AddAffected(A);
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 8b3fc8d12605..9670e5309b32 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -8410,6 +8410,44 @@ void ScalarEvolution::forgetValue(Value *V) {
   forgetMemoizedResults(ToForget);
 }
 
+void ScalarEvolution::forgetLcssaPhiWithNewPredecessor(Loop *L, PHINode *V) {
+  if (!isSCEVable(V->getType()))
+    return;
+
+  // If SCEV looked through a trivial LCSSA phi node, we might have SCEV's
+  // directly using a SCEVUnknown/SCEVAddRec defined in the loop. After an
+  // extra predecessor is added, this is no longer valid. Find all Unknowns and
+  // AddRecs defined in the loop and invalidate any SCEV's making use of them.
+  if (const SCEV *S = getExistingSCEV(V)) {
+    struct InvalidationRootCollector {
+      Loop *L;
+      SmallVector<const SCEV *, 8> Roots;
+
+      InvalidationRootCollector(Loop *L) : L(L) {}
+
+      bool follow(const SCEV *S) {
+        if (auto *SU = dyn_cast<SCEVUnknown>(S)) {
+          if (auto *I = dyn_cast<Instruction>(SU->getValue()))
+            if (L->contains(I))
+              Roots.push_back(S);
+        } else if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
+          if (L->contains(AddRec->getLoop()))
+            Roots.push_back(S);
+        }
+        return true;
+      }
+      bool isDone() const { return false; }
+    };
+
+    InvalidationRootCollector C(L);
+    visitAll(S, C);
+    forgetMemoizedResults(C.Roots);
+  }
+
+  // Also perform the normal invalidation.
+  forgetValue(V);
+}
+
 void ScalarEvolution::forgetLoopDispositions() { LoopDispositions.clear(); }
 
 void ScalarEvolution::forgetBlockAndLoopDispositions(Value *V) {
diff --git a/llvm/lib/Analysis/TrainingLogger.cpp b/llvm/lib/Analysis/TrainingLogger.cpp
index e236890aa2bc..344ca92e18b5 100644
--- a/llvm/lib/Analysis/TrainingLogger.cpp
+++ b/llvm/lib/Analysis/TrainingLogger.cpp
@@ -27,11 +27,6 @@
 
 using namespace llvm;
 
-// FIXME(mtrofin): remove the flag altogether
-static cl::opt<bool>
-    UseSimpleLogger("tfutils-use-simplelogger", cl::init(true), cl::Hidden,
-                    cl::desc("Output simple (non-protobuf) log."));
-
 void Logger::writeHeader(std::optional<TensorSpec> AdviceSpec) {
   json::OStream JOS(*OS);
   JOS.object([&]() {
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 5f5d7e07cac1..e25aa9c6863f 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -662,16 +662,6 @@ static void computeKnownBitsFromCmp(const Value *V, const ICmpInst *Cmp,
       // known bits from the RHS to V.
       Known.Zero |= RHSKnown.Zero & MaskKnown.One;
       Known.One |= RHSKnown.One & MaskKnown.One;
-      // assume(~(v & b) = a)
-    } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))),
-                                   m_Value(A)))) {
-      KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC);
-      KnownBits MaskKnown = computeKnownBits(B, Depth + 1, QueryNoAC);
-
-      // For those bits in the mask that are known to be one, we can propagate
-      // inverted known bits from the RHS to V.
-      Known.Zero |= RHSKnown.One & MaskKnown.One;
-      Known.One |= RHSKnown.Zero & MaskKnown.One;
       // assume(v | b = a)
     } else if (match(Cmp,
                      m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A)))) {
@@ -682,16 +672,6 @@ static void computeKnownBitsFromCmp(const Value *V, const ICmpInst *Cmp,
       // bits from the RHS to V.
       Known.Zero |= RHSKnown.Zero & BKnown.Zero;
       Known.One |= RHSKnown.One & BKnown.Zero;
-      // assume(~(v | b) = a)
-    } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))),
-                                   m_Value(A)))) {
-      KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC);
-      KnownBits BKnown = computeKnownBits(B, Depth + 1, QueryNoAC);
-
-      // For those bits in B that are known to be zero, we can propagate
-      // inverted known bits from the RHS to V.
-      Known.Zero |= RHSKnown.One & BKnown.Zero;
-      Known.One |= RHSKnown.Zero & BKnown.Zero;
       // assume(v ^ b = a)
     } else if (match(Cmp,
                      m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A)))) {
@@ -705,19 +685,6 @@ static void computeKnownBitsFromCmp(const Value *V, const ICmpInst *Cmp,
       Known.One |= RHSKnown.One & BKnown.Zero;
       Known.Zero |= RHSKnown.One & BKnown.One;
       Known.One |= RHSKnown.Zero & BKnown.One;
-      // assume(~(v ^ b) = a)
-    } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))),
-                                   m_Value(A)))) {
-      KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC);
-      KnownBits BKnown = computeKnownBits(B, Depth + 1, QueryNoAC);
-
-      // For those bits in B that are known to be zero, we can propagate
-      // inverted known bits from the RHS to V. For those bits in B that are
-      // known to be one, we can propagate known bits from the RHS to V.
-      Known.Zero |= RHSKnown.One & BKnown.Zero;
-      Known.One |= RHSKnown.Zero & BKnown.Zero;
-      Known.Zero |= RHSKnown.Zero & BKnown.One;
-      Known.One |= RHSKnown.One & BKnown.One;
       // assume(v << c = a)
     } else if (match(Cmp, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)),
                                    m_Value(A))) &&
@@ -729,17 +696,6 @@ static void computeKnownBitsFromCmp(const Value *V, const ICmpInst *Cmp,
       RHSKnown.Zero.lshrInPlace(C);
       RHSKnown.One.lshrInPlace(C);
       Known = Known.unionWith(RHSKnown);
-      // assume(~(v << c) = a)
-    } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))),
-                                   m_Value(A))) &&
-               C < BitWidth) {
-      KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC);
-      // For those bits in RHS that are known, we can propagate them inverted
-      // to known bits in V shifted to the right by C.
-      RHSKnown.One.lshrInPlace(C);
-      Known.Zero |= RHSKnown.One;
-      RHSKnown.Zero.lshrInPlace(C);
-      Known.One |= RHSKnown.Zero;
       // assume(v >> c = a)
     } else if (match(Cmp, m_c_ICmp(Pred, m_Shr(m_V, m_ConstantInt(C)),
                                    m_Value(A))) &&
@@ -749,15 +705,6 @@ static void computeKnownBitsFromCmp(const Value *V, const ICmpInst *Cmp,
       // bits in V shifted to the right by C.
       Known.Zero |= RHSKnown.Zero << C;
       Known.One |= RHSKnown.One << C;
-      // assume(~(v >> c) = a)
-    } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_Shr(m_V, m_ConstantInt(C))),
-                                   m_Value(A))) &&
-               C < BitWidth) {
-      KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC);
-      // For those bits in RHS that are known, we can propagate them inverted
-      // to known bits in V shifted to the right by C.
-      Known.Zero |= RHSKnown.One << C;
-      Known.One |= RHSKnown.Zero << C;
     }
     break;
   case ICmpInst::ICMP_NE: {
diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp
index bf114921a7d2..f3cb7fa5af61 100644
--- a/llvm/lib/CodeGen/CalcSpillWeights.cpp
+++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -163,8 +163,6 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
   const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
   MachineBasicBlock *MBB = nullptr;
-  MachineLoop *Loop = nullptr;
-  bool IsExiting = false;
   float TotalWeight = 0;
   unsigned NumInstr = 0; // Number of instructions using LI
   SmallPtrSet<MachineInstr *, 8> Visited;
@@ -221,6 +219,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
     }
   };
 
+  bool IsExiting = false;
   std::set<CopyHint> CopyHints;
   DenseMap<unsigned, float> Hint;
   for (MachineRegisterInfo::reg_instr_nodbg_iterator
@@ -262,7 +261,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
       // Get loop info for mi.
       if (MI->getParent() != MBB) {
         MBB = MI->getParent();
-        Loop = Loops.getLoopFor(MBB);
+        const MachineLoop *Loop = Loops.getLoopFor(MBB);
         IsExiting = Loop ? Loop->isLoopExiting(MBB) : false;
       }
 
diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp
index 2740265f7534..213dc1e18e7f 100644
--- a/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -33,7 +33,6 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineInstrBundle.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SlotIndexes.h"
@@ -86,7 +85,6 @@ class HoistSpillHelper : private LiveRangeEdit::Delegate {
   LiveIntervals &LIS;
   LiveStacks &LSS;
   MachineDominatorTree &MDT;
-  MachineLoopInfo &Loops;
   VirtRegMap &VRM;
   MachineRegisterInfo &MRI;
   const TargetInstrInfo &TII;
@@ -138,8 +136,7 @@ public:
                    VirtRegMap &vrm)
       : MF(mf), LIS(pass.getAnalysis<LiveIntervals>()),
         LSS(pass.getAnalysis<LiveStacks>()),
-        MDT(pass.getAnalysis<MachineDominatorTree>()),
-        Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm),
+        MDT(pass.getAnalysis<MachineDominatorTree>()), VRM(vrm),
         MRI(mf.getRegInfo()), TII(*mf.getSubtarget().getInstrInfo()),
         TRI(*mf.getSubtarget().getRegisterInfo()),
         MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()),
@@ -157,7 +154,6 @@ class InlineSpiller : public Spiller {
   LiveIntervals &LIS;
   LiveStacks &LSS;
   MachineDominatorTree &MDT;
-  MachineLoopInfo &Loops;
   VirtRegMap &VRM;
   MachineRegisterInfo &MRI;
   const TargetInstrInfo &TII;
@@ -197,8 +193,7 @@ public:
                 VirtRegAuxInfo &VRAI)
       : MF(MF), LIS(Pass.getAnalysis<LiveIntervals>()),
         LSS(Pass.getAnalysis<LiveStacks>()),
-        MDT(Pass.getAnalysis<MachineDominatorTree>()),
-        Loops(Pass.getAnalysis<MachineLoopInfo>()), VRM(VRM),
+        MDT(Pass.getAnalysis<MachineDominatorTree>()), VRM(VRM),
         MRI(MF.getRegInfo()), TII(*MF.getSubtarget().getInstrInfo()),
         TRI(*MF.getSubtarget().getRegisterInfo()),
         MBFI(Pass.getAnalysis<MachineBlockFrequencyInfo>()),
diff --git a/llvm/lib/CodeGen/SpillPlacement.cpp b/llvm/lib/CodeGen/SpillPlacement.cpp
index 6e74e51d6782..cdb8099e354b 100644
--- a/llvm/lib/CodeGen/SpillPlacement.cpp
+++ b/llvm/lib/CodeGen/SpillPlacement.cpp
@@ -32,7 +32,6 @@
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
@@ -52,7 +51,6 @@ char &llvm::SpillPlacementID = SpillPlacement::ID;
 INITIALIZE_PASS_BEGIN(SpillPlacement, DEBUG_TYPE,
                       "Spill Code Placement Analysis", true, true)
 INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
 INITIALIZE_PASS_END(SpillPlacement, DEBUG_TYPE,
                     "Spill Code Placement Analysis", true, true)
 
@@ -60,7 +58,6 @@ void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
   AU.addRequired<MachineBlockFrequencyInfo>();
   AU.addRequiredTransitive<EdgeBundles>();
-  AU.addRequiredTransitive<MachineLoopInfo>();
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
@@ -195,7 +192,6 @@ struct SpillPlacement::Node {
 bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) {
   MF = &mf;
   bundles = &getAnalysis<EdgeBundles>();
-  loops = &getAnalysis<MachineLoopInfo>();
 
   assert(!nodes && "Leaking node array");
   nodes = new Node[bundles->getNumBundles()];
diff --git a/llvm/lib/CodeGen/SpillPlacement.h b/llvm/lib/CodeGen/SpillPlacement.h
index 2a298c7a74aa..5fd9b085259d 100644
--- a/llvm/lib/CodeGen/SpillPlacement.h
+++ b/llvm/lib/CodeGen/SpillPlacement.h
@@ -38,13 +38,11 @@ class BitVector;
 class EdgeBundles;
 class MachineBlockFrequencyInfo;
 class MachineFunction;
-class MachineLoopInfo;
 
 class SpillPlacement : public MachineFunctionPass {
   struct Node;
   const MachineFunction *MF = nullptr;
   const EdgeBundles *bundles = nullptr;
-  const MachineLoopInfo *loops = nullptr;
   const MachineBlockFrequencyInfo *MBFI = nullptr;
   Node *nodes = nullptr;
 
diff --git a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
index 6d8e23961373..164adce444a1 100644
--- a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
@@ -469,11 +469,11 @@ MachOPlatform::MachOPlatform(
   // itself (to build the allocation actions that will call the registration
   // functions). Further complicating the situation (a) the graph containing
   // the registration functions is allowed to depend on other graphs (e.g. the
-  // graph containing the ORC runtime RTTI support) so we need to handle with
-  // an unknown set of dependencies during bootstrap, and (b) these graphs may
+  // graph containing the ORC runtime RTTI support) so we need to handle an
+  // unknown set of dependencies during bootstrap, and (b) these graphs may
   // be linked concurrently if the user has installed a concurrent dispatcher.
   //
-  // We satisfy these constraint by implementing a bootstrap phase during which
+  // We satisfy these constraints by implementing a bootstrap phase during which
   // allocation actions generated by MachOPlatform are appended to a list of
   // deferred allocation actions, rather than to the graphs themselves. At the
   // end of the bootstrap process the deferred actions are attached to a final
diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp
index 943826c6ac89..927aefb8bd47 100644
--- a/llvm/lib/IR/DebugInfoMetadata.cpp
+++ b/llvm/lib/IR/DebugInfoMetadata.cpp
@@ -915,11 +915,11 @@ DICompileUnit::DICompileUnit(LLVMContext &C, StorageType Storage,
                              bool DebugInfoForProfiling, unsigned NameTableKind,
                              bool RangesBaseAddress, ArrayRef<Metadata *> Ops)
     : DIScope(C, DICompileUnitKind, Storage, dwarf::DW_TAG_compile_unit, Ops),
-      SourceLanguage(SourceLanguage), IsOptimized(IsOptimized),
-      RuntimeVersion(RuntimeVersion), EmissionKind(EmissionKind), DWOId(DWOId),
-      SplitDebugInlining(SplitDebugInlining),
+      SourceLanguage(SourceLanguage), RuntimeVersion(RuntimeVersion),
+      DWOId(DWOId), EmissionKind(EmissionKind), NameTableKind(NameTableKind),
+      IsOptimized(IsOptimized), SplitDebugInlining(SplitDebugInlining),
       DebugInfoForProfiling(DebugInfoForProfiling),
-      NameTableKind(NameTableKind), RangesBaseAddress(RangesBaseAddress) {
+      RangesBaseAddress(RangesBaseAddress) {
   assert(Storage != Uniqued);
 }
 
@@ -1181,8 +1181,9 @@ DILexicalBlockFile *DILexicalBlockFile::getImpl(LLVMContext &Context,
 
 DINamespace::DINamespace(LLVMContext &Context, StorageType Storage,
                          bool ExportSymbols, ArrayRef<Metadata *> Ops)
-    : DIScope(Context, DINamespaceKind, Storage, dwarf::DW_TAG_namespace, Ops),
-      ExportSymbols(ExportSymbols) {}
+    : DIScope(Context, DINamespaceKind, Storage, dwarf::DW_TAG_namespace, Ops) {
+  SubclassData1 = ExportSymbols;
+}
 DINamespace *DINamespace::getImpl(LLVMContext &Context, Metadata *Scope,
                                   MDString *Name, bool ExportSymbols,
                                   StorageType Storage, bool ShouldCreate) {
@@ -1196,8 +1197,9 @@ DINamespace *DINamespace::getImpl(LLVMContext &Context, Metadata *Scope,
 DICommonBlock::DICommonBlock(LLVMContext &Context, StorageType Storage,
                              unsigned LineNo, ArrayRef<Metadata *> Ops)
     : DIScope(Context, DICommonBlockKind, Storage, dwarf::DW_TAG_common_block,
-              Ops),
-      LineNo(LineNo) {}
+              Ops) {
+  SubclassData32 = LineNo;
+}
 DICommonBlock *DICommonBlock::getImpl(LLVMContext &Context, Metadata *Scope,
                                       Metadata *Decl, MDString *Name,
                                       Metadata *File, unsigned LineNo,
@@ -1211,8 +1213,10 @@ DICommonBlock *DICommonBlock::getImpl(LLVMContext &Context, Metadata *Scope,
 
 DIModule::DIModule(LLVMContext &Context, StorageType Storage, unsigned LineNo,
                    bool IsDecl, ArrayRef<Metadata *> Ops)
-    : DIScope(Context, DIModuleKind, Storage, dwarf::DW_TAG_module, Ops),
-      LineNo(LineNo), IsDecl(IsDecl) {}
+    : DIScope(Context, DIModuleKind, Storage, dwarf::DW_TAG_module, Ops) {
+  SubclassData1 = IsDecl;
+  SubclassData32 = LineNo;
+}
 DIModule *DIModule::getImpl(LLVMContext &Context, Metadata *File,
                             Metadata *Scope, MDString *Name,
                             MDString *ConfigurationMacros,
@@ -1301,8 +1305,9 @@ DILocalVariable::getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
 DIVariable::DIVariable(LLVMContext &C, unsigned ID, StorageType Storage,
                        signed Line, ArrayRef<Metadata *> Ops,
                        uint32_t AlignInBits)
-    : DINode(C, ID, Storage, dwarf::DW_TAG_variable, Ops), Line(Line),
-      AlignInBits(AlignInBits) {}
+    : DINode(C, ID, Storage, dwarf::DW_TAG_variable, Ops), Line(Line) {
+  SubclassData32 = AlignInBits;
+}
 std::optional<uint64_t> DIVariable::getSizeInBits() const {
   // This is used by the Verifier so be mindful of broken types.
   const Metadata *RawType = getRawType();
@@ -1328,7 +1333,9 @@ std::optional<uint64_t> DIVariable::getSizeInBits() const {
 
 DILabel::DILabel(LLVMContext &C, StorageType Storage, unsigned Line,
                  ArrayRef<Metadata *> Ops)
-    : DINode(C, DILabelKind, Storage, dwarf::DW_TAG_label, Ops), Line(Line) {}
+    : DINode(C, DILabelKind, Storage, dwarf::DW_TAG_label, Ops) {
+  SubclassData32 = Line;
+}
 DILabel *DILabel::getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
                           Metadata *File, unsigned Line, StorageType Storage,
                           bool ShouldCreate) {
diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp
index 8490fefe7ff5..e4d18d8a7dd5 100644
--- a/llvm/lib/MC/ELFObjectWriter.cpp
+++ b/llvm/lib/MC/ELFObjectWriter.cpp
@@ -843,7 +843,7 @@ bool ELFWriter::maybeWriteCompression(
     uint32_t ChType, uint64_t Size,
     SmallVectorImpl<uint8_t> &CompressedContents, Align Alignment) {
   uint64_t HdrSize =
-      is64Bit() ? sizeof(ELF::Elf32_Chdr) : sizeof(ELF::Elf64_Chdr);
+      is64Bit() ? sizeof(ELF::Elf64_Chdr) : sizeof(ELF::Elf32_Chdr);
   if (Size <= HdrSize + CompressedContents.size())
     return false;
   // Platform specific header is followed by compressed data.
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index dd2ae3e70114..f1f3d56335e3 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -852,6 +852,12 @@ def TuneA510    : SubtargetFeature<"a510", "ARMProcFamily", "CortexA510",
                                    FeaturePostRAScheduler
                                    ]>;
 
+def TuneA520    : SubtargetFeature<"a520", "ARMProcFamily", "CortexA520",
+                                   "Cortex-A520 ARM processors", [
+                                   FeatureFuseAES,
+                                   FeatureFuseAdrpAdd,
+                                   FeaturePostRAScheduler]>;
+
 def TuneA57     : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
                                    "Cortex-A57 ARM processors", [
                                    FeatureFuseAES,
@@ -957,6 +963,17 @@ def TuneA715 : SubtargetFeature<"a715", "ARMProcFamily", "CortexA715",
                                  FeatureEnableSelectOptimize,
                                  FeaturePredictableSelectIsExpensive]>;
 
+def TuneA720 : SubtargetFeature<"a720", "ARMProcFamily", "CortexA720",
+                                "Cortex-A720 ARM processors", [
+                                 FeatureFuseAES,
+                                 FeaturePostRAScheduler,
+                                 FeatureCmpBccFusion,
+                                 FeatureAddrLSLFast,
+                                 FeatureALULSLFast,
+                                 FeatureFuseAdrpAdd,
+                                 FeatureEnableSelectOptimize,
+                                 FeaturePredictableSelectIsExpensive]>;
+
 def TuneR82 : SubtargetFeature<"cortex-r82", "ARMProcFamily",
                                "CortexR82",
                                "Cortex-R82 ARM processors", [
@@ -994,6 +1011,16 @@ def TuneX3 : SubtargetFeature<"cortex-x3", "ARMProcFamily", "CortexX3",
                                FeatureEnableSelectOptimize,
                                FeaturePredictableSelectIsExpensive]>;
 
+def TuneX4 : SubtargetFeature<"cortex-x4", "ARMProcFamily", "CortexX4",
+                              "Cortex-X4 ARM processors", [
+                               FeatureAddrLSLFast,
+                               FeatureALULSLFast,
+                               FeatureFuseAdrpAdd,
+                               FeatureFuseAES,
+                               FeaturePostRAScheduler,
+                               FeatureEnableSelectOptimize,
+                               FeaturePredictableSelectIsExpensive]>;
+
 def TuneA64FX : SubtargetFeature<"a64fx", "ARMProcFamily", "A64FX",
                                  "Fujitsu A64FX processors", [
                                  FeaturePostRAScheduler,
@@ -1329,6 +1356,9 @@ def ProcessorFeatures {
                                  FeatureMatMulInt8, FeatureBF16, FeatureAM,
                                  FeatureMTE, FeatureETE, FeatureSVE2BitPerm,
                                  FeatureFP16FML];
+  list<SubtargetFeature> A520 = [HasV9_2aOps, FeaturePerfMon, FeatureAM,
+                                 FeatureMTE, FeatureETE, FeatureSVE2BitPerm,
+                                 FeatureFP16FML];
   list<SubtargetFeature> A65  = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
                                  FeatureNEON, FeatureFullFP16, FeatureDotProd,
                                  FeatureRCPC, FeatureSSBS, FeatureRAS,
@@ -1355,6 +1385,9 @@ def ProcessorFeatures {
                                  FeatureFP16FML, FeatureSVE, FeatureTRBE,
                                  FeatureSVE2BitPerm, FeatureBF16, FeatureETE,
                                  FeaturePerfMon, FeatureMatMulInt8, FeatureSPE];
+  list<SubtargetFeature> A720 = [HasV9_2aOps, FeatureMTE, FeatureFP16FML,
+                                 FeatureTRBE, FeatureSVE2BitPerm, FeatureETE,
+                                 FeaturePerfMon, FeatureSPE, FeatureSPE_EEF];
   list<SubtargetFeature> R82  = [HasV8_0rOps, FeaturePerfMon, FeatureFullFP16,
                                  FeatureFP16FML, FeatureSSBS, FeaturePredRes,
                                  FeatureSB];
@@ -1376,6 +1409,10 @@ def ProcessorFeatures {
                                  FeatureSPE, FeatureBF16, FeatureMatMulInt8,
                                  FeatureMTE, FeatureSVE2BitPerm, FeatureFullFP16,
                                  FeatureFP16FML];
+  list<SubtargetFeature> X4 =   [HasV9_2aOps,
+                                 FeaturePerfMon, FeatureETE, FeatureTRBE,
+                                 FeatureSPE, FeatureMTE, FeatureSVE2BitPerm,
+                                 FeatureFP16FML, FeatureSPE_EEF];
   list<SubtargetFeature> A64FX    = [HasV8_2aOps, FeatureFPARMv8, FeatureNEON,
                                      FeatureSHA2, FeaturePerfMon, FeatureFullFP16,
                                      FeatureSVE, FeatureComplxNum];
@@ -1480,6 +1517,8 @@ def : ProcessorModel<"cortex-a55", CortexA55Model, ProcessorFeatures.A55,
                      [TuneA55]>;
 def : ProcessorModel<"cortex-a510", CortexA510Model, ProcessorFeatures.A510,
                      [TuneA510]>;
+def : ProcessorModel<"cortex-a520", CortexA510Model, ProcessorFeatures.A520,
+                     [TuneA520]>;
 def : ProcessorModel<"cortex-a57", CortexA57Model, ProcessorFeatures.A53,
                      [TuneA57]>;
 def : ProcessorModel<"cortex-a65", CortexA53Model, ProcessorFeatures.A65,
@@ -1506,6 +1545,8 @@ def : ProcessorModel<"cortex-a710", NeoverseN2Model, ProcessorFeatures.A710,
                      [TuneA710]>;
 def : ProcessorModel<"cortex-a715", NeoverseN2Model, ProcessorFeatures.A715,
                      [TuneA715]>;
+def : ProcessorModel<"cortex-a720", NeoverseN2Model, ProcessorFeatures.A720,
+                     [TuneA720]>;
 def : ProcessorModel<"cortex-r82", CortexA55Model, ProcessorFeatures.R82,
                      [TuneR82]>;
 def : ProcessorModel<"cortex-x1", CortexA57Model, ProcessorFeatures.X1,
@@ -1516,6 +1557,8 @@ def : ProcessorModel<"cortex-x2", NeoverseN2Model, ProcessorFeatures.X2,
                      [TuneX2]>;
 def : ProcessorModel<"cortex-x3", NeoverseN2Model, ProcessorFeatures.X3,
                      [TuneX3]>;
+def : ProcessorModel<"cortex-x4", NeoverseN2Model, ProcessorFeatures.X4,
+                     [TuneX4]>;
 def : ProcessorModel<"neoverse-e1", CortexA53Model,
                      ProcessorFeatures.NeoverseE1, [TuneNeoverseE1]>;
 def : ProcessorModel<"neoverse-n1", NeoverseN1Model,
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index ff14fcf5dfcc..beae9b52c531 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -149,6 +149,7 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) {
     MaxBytesForLoopAlignment = 16;
     break;
   case CortexA510:
+  case CortexA520:
     PrefFunctionAlignment = Align(16);
     VScaleForTuning = 1;
     PrefLoopAlignment = Align(16);
@@ -156,8 +157,10 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) {
     break;
   case CortexA710:
   case CortexA715:
+  case CortexA720:
   case CortexX2:
   case CortexX3:
+  case CortexX4:
     PrefFunctionAlignment = Align(16);
     VScaleForTuning = 1;
     PrefLoopAlignment = Align(32);
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index b2ee2e76d0e8..ae2bea62056e 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -56,6 +56,7 @@ public:
     CortexA53,
     CortexA55,
     CortexA510,
+    CortexA520,
     CortexA57,
     CortexA65,
     CortexA72,
@@ -67,11 +68,13 @@ public:
     CortexA78C,
     CortexA710,
     CortexA715,
+    CortexA720,
     CortexR82,
     CortexX1,
     CortexX1C,
     CortexX2,
     CortexX3,
+    CortexX4,
     ExynosM3,
     Falkor,
     Kryo,
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c9381218eee7..cf667077e882 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1396,6 +1396,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::FMINIMUM,          VT, Custom);
     }
 
+    setOperationAction(ISD::FABS, MVT::v8f16, Custom);
+
     // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
     // even though v8i16 is a legal type.
     setOperationPromotedToType(ISD::FP_TO_SINT,        MVT::v8i16, MVT::v8i32);
@@ -4662,6 +4664,8 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
 
     unsigned SrcEltSizeInBits = CstTy->getScalarSizeInBits();
     unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
+    if ((SizeInBits % SrcEltSizeInBits) != 0)
+      return false;
 
     APInt UndefSrcElts(NumSrcElts, 0);
     SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index aba2ebf31758..ae47929fce6a 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -208,6 +208,7 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
         .Case("0xd03", "cortex-a53")
         .Case("0xd05", "cortex-a55")
         .Case("0xd46", "cortex-a510")
+        .Case("0xd80", "cortex-a520")
         .Case("0xd07", "cortex-a57")
         .Case("0xd08", "cortex-a72")
         .Case("0xd09", "cortex-a73")
@@ -217,10 +218,12 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
         .Case("0xd41", "cortex-a78")
         .Case("0xd47", "cortex-a710")
         .Case("0xd4d", "cortex-a715")
+        .Case("0xd81", "cortex-a720")
         .Case("0xd44", "cortex-x1")
         .Case("0xd4c", "cortex-x1c")
         .Case("0xd48", "cortex-x2")
         .Case("0xd4e", "cortex-x3")
+        .Case("0xd82", "cortex-x4")
         .Case("0xd0c", "neoverse-n1")
         .Case("0xd49", "neoverse-n2")
         .Case("0xd40", "neoverse-v1")
diff --git a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
index b786685d0943..8f0b12d0cfed 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
@@ -237,7 +237,8 @@ void SampleProfileProber::computeProbeIdForCallsites() {
       if (LastProbeId >= 0xFFFF) {
         std::string Msg = "Pseudo instrumentation incomplete for " +
                           std::string(F->getName()) + " because it's too large";
-        Ctx.diagnose(DiagnosticInfoSampleProfile(M->getName().data(), Msg));
+        Ctx.diagnose(
+            DiagnosticInfoSampleProfile(M->getName().data(), Msg, DS_Warning));
         return;
       }
 
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index d6fac88a39d0..40156726c703 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -1433,7 +1433,7 @@ Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) {
   // sdiv Op0, (sext i1 X) --> -Op0 (because if X is 0, the op is undefined)
   if (match(Op1, m_AllOnes()) ||
       (match(Op1, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)))
-    return BinaryOperator::CreateNeg(Op0);
+    return BinaryOperator::CreateNSWNeg(Op0);
 
   // X / INT_MIN --> X == INT_MIN
   if (match(Op1, m_SignMask()))
@@ -1456,7 +1456,7 @@ Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) {
       Constant *NegPow2C = ConstantExpr::getNeg(cast<Constant>(Op1));
       Constant *C = ConstantExpr::getExactLogBase2(NegPow2C);
       Value *Ashr = Builder.CreateAShr(Op0, C, I.getName() + ".neg", true);
-      return BinaryOperator::CreateNeg(Ashr);
+      return BinaryOperator::CreateNSWNeg(Ashr);
     }
   }
 
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 80c044b6bee8..c82b9260ee34 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -803,7 +803,9 @@ public:
         // do globals-gc.
         UseCtorComdat(UseGlobalsGC && ClWithComdat && !this->CompileKernel),
         DestructorKind(DestructorKind),
-        ConstructorKind(ConstructorKind) {
+        ConstructorKind(ClConstructorKind.getNumOccurrences() > 0
+                            ? ClConstructorKind
+                            : ConstructorKind) {
     C = &(M.getContext());
     int LongSize = M.getDataLayout().getPointerSizeInBits();
     IntptrTy = Type::getIntNTy(*C, LongSize);
@@ -1151,7 +1153,7 @@ AddressSanitizerPass::AddressSanitizerPass(
     AsanCtorKind ConstructorKind)
     : Options(Options), UseGlobalGC(UseGlobalGC),
       UseOdrIndicator(UseOdrIndicator), DestructorKind(DestructorKind),
-      ConstructorKind(ClConstructorKind) {}
+      ConstructorKind(ConstructorKind) {}
 
 PreservedAnalyses AddressSanitizerPass::run(Module &M,
                                             ModuleAnalysisManager &MAM) {
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 0a10f0d64717..64093a0db5f8 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3599,7 +3599,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
   OrigLoop->getExitBlocks(ExitBlocks);
   for (BasicBlock *Exit : ExitBlocks)
     for (PHINode &PN : Exit->phis())
-      PSE.getSE()->forgetValue(&PN);
+      PSE.getSE()->forgetLcssaPhiWithNewPredecessor(OrigLoop, &PN);
 
   VPBasicBlock *LatchVPBB = Plan.getVectorLoopRegion()->getExitingBasicBlock();
   Loop *VectorLoop = LI->getLoopFor(State.CFG.VPBB2IRBB[LatchVPBB]);
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 5b9c20671d24..fca9649193e5 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -8564,7 +8564,7 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const {
   // profitable for the vectorization, we can skip it, if the cost threshold is
   // default. The cost of vectorized PHI nodes is almost always 0 + the cost of
   // gathers/buildvectors.
-  constexpr unsigned Limit = 4;
+  constexpr int Limit = 4;
   if (!ForReduction && !SLPCostThreshold.getNumOccurrences() &&
       !VectorizableTree.empty() &&
       all_of(VectorizableTree, [&](const std::unique_ptr<TreeEntry> &TE) {
@@ -10261,7 +10261,7 @@ public:
   /// Checks if the specified entry \p E needs to be delayed because of its
   /// dependency nodes.
   Value *needToDelay(const TreeEntry *E,
-                     ArrayRef<SmallVector<const TreeEntry *>> Deps) {
+                     ArrayRef<SmallVector<const TreeEntry *>> Deps) const {
     // No need to delay emission if all deps are ready.
     if (all_of(Deps, [](ArrayRef<const TreeEntry *> TEs) {
           return all_of(
@@ -10276,6 +10276,16 @@ public:
         VecTy, PoisonValue::get(PointerType::getUnqual(VecTy->getContext())),
         MaybeAlign());
   }
+  /// Adds 2 input vectors (in form of tree entries) and the mask for their
+  /// shuffling.
+  void add(const TreeEntry &E1, const TreeEntry &E2, ArrayRef<int> Mask) {
+    add(E1.VectorizedValue, E2.VectorizedValue, Mask);
+  }
+  /// Adds single input vector (in form of tree entry) and the mask for its
+  /// shuffling.
+  void add(const TreeEntry &E1, ArrayRef<int> Mask) {
+    add(E1.VectorizedValue, Mask);
+  }
   /// Adds 2 input vectors and the mask for their shuffling.
   void add(Value *V1, Value *V2, ArrayRef<int> Mask) {
     assert(V1 && V2 && !Mask.empty() && "Expected non-empty input vectors.");
@@ -10690,7 +10700,7 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
             Mask[I] = FrontTE->findLaneForValue(V);
           }
         }
-        ShuffleBuilder.add(FrontTE->VectorizedValue, Mask);
+        ShuffleBuilder.add(*FrontTE, Mask);
         Res = ShuffleBuilder.finalize(E->getCommonMask());
         return Res;
       }
@@ -10868,17 +10878,14 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
         VecMask.assign(VecMask.size(), PoisonMaskElem);
         copy(SubMask, std::next(VecMask.begin(), I * SliceSize));
         if (TEs.size() == 1) {
-          IsUsedInExpr &= FindReusedSplat(
-              VecMask,
-              cast<FixedVectorType>(TEs.front()->VectorizedValue->getType())
-                  ->getNumElements());
-          ShuffleBuilder.add(TEs.front()->VectorizedValue, VecMask);
+          IsUsedInExpr &=
+              FindReusedSplat(VecMask, TEs.front()->getVectorFactor());
+          ShuffleBuilder.add(*TEs.front(), VecMask);
           IsNonPoisoned &=
               isGuaranteedNotToBePoison(TEs.front()->VectorizedValue);
         } else {
           IsUsedInExpr = false;
-          ShuffleBuilder.add(TEs.front()->VectorizedValue,
-                             TEs.back()->VectorizedValue, VecMask);
+          ShuffleBuilder.add(*TEs.front(), *TEs.back(), VecMask);
           IsNonPoisoned &=
               isGuaranteedNotToBePoison(TEs.front()->VectorizedValue) &&
               isGuaranteedNotToBePoison(TEs.back()->VectorizedValue);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index c55864de9c17..0eaaa037ad57 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -816,15 +816,28 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
     break;
   }
   case Instruction::Trunc: {
-    VPRecipeBase *Zext = R.getOperand(0)->getDefiningRecipe();
-    if (!Zext || getOpcodeForRecipe(*Zext) != Instruction::ZExt)
+    VPRecipeBase *Ext = R.getOperand(0)->getDefiningRecipe();
+    if (!Ext)
       break;
-    VPValue *A = Zext->getOperand(0);
+    unsigned ExtOpcode = getOpcodeForRecipe(*Ext);
+    if (ExtOpcode != Instruction::ZExt && ExtOpcode != Instruction::SExt)
+      break;
+    VPValue *A = Ext->getOperand(0);
     VPValue *Trunc = R.getVPSingleValue();
-    Type *TruncToTy = TypeInfo.inferScalarType(Trunc);
-    if (TruncToTy && TruncToTy == TypeInfo.inferScalarType(A))
+    Type *TruncTy = TypeInfo.inferScalarType(Trunc);
+    Type *ATy = TypeInfo.inferScalarType(A);
+    if (TruncTy == ATy) {
       Trunc->replaceAllUsesWith(A);
-
+    } else if (ATy->getScalarSizeInBits() < TruncTy->getScalarSizeInBits()) {
+      auto *VPC =
+          new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), A, TruncTy);
+      VPC->insertBefore(&R);
+      Trunc->replaceAllUsesWith(VPC);
+    } else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) {
+      auto *VPC = new VPWidenCastRecipe(Instruction::Trunc, A, TruncTy);
+      VPC->insertBefore(&R);
+      Trunc->replaceAllUsesWith(VPC);
+    }
 #ifndef NDEBUG
     // Verify that the cached type info is for both A and its users is still
     // accurate by comparing it to freshly computed types.
diff --git a/llvm/test/CodeGen/RISCV/rvv/concat-vector-insert-elt.ll b/llvm/test/CodeGen/RISCV/rvv/concat-vector-insert-elt.ll
new file mode 100644
index 000000000000..3fc22818a240
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/concat-vector-insert-elt.ll
@@ -0,0 +1,235 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v -target-abi=ilp32 \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -target-abi=lp64 \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+
+define void @v4xi8_concat_vector_insert_idx0(ptr %a, ptr %b, i8 %x) {
+; CHECK-LABEL: v4xi8_concat_vector_insert_idx0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vle8.v v9, (a1)
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT:    vslideup.vi v8, v9, 2
+; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, tu, ma
+; CHECK-NEXT:    vmv.s.x v8, a2
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
+  %v1 = load <2 x i8>, ptr %a
+  %v2 = load <2 x i8>, ptr %b
+  %concat = shufflevector <2 x i8> %v1, <2 x i8> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %ins = insertelement <4 x i8> %concat, i8 %x, i32 0
+  store <4 x i8> %ins, ptr %a
+  ret void
+}
+
+define void @v4xi8_concat_vector_insert_idx1(ptr %a, ptr %b, i8 %x) {
+; CHECK-LABEL: v4xi8_concat_vector_insert_idx1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vle8.v v9, (a1)
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT:    vslideup.vi v8, v9, 2
+; CHECK-NEXT:    vmv.s.x v9, a2
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, tu, ma
+; CHECK-NEXT:    vslideup.vi v8, v9, 1
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
+  %v1 = load <2 x i8>, ptr %a
+  %v2 = load <2 x i8>, ptr %b
+  %concat = shufflevector <2 x i8> %v1, <2 x i8> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %ins = insertelement <4 x i8> %concat, i8 %x, i32 1
+  store <4 x i8> %ins, ptr %a
+  ret void
+}
+
+define void @v4xi8_concat_vector_insert_idx2(ptr %a, ptr %b, i8 %x) {
+; CHECK-LABEL: v4xi8_concat_vector_insert_idx2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vle8.v v9, (a1)
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT:    vslideup.vi v8, v9, 2
+; CHECK-NEXT:    vmv.s.x v9, a2
+; CHECK-NEXT:    vsetivli zero, 3, e8, mf4, tu, ma
+; CHECK-NEXT:    vslideup.vi v8, v9, 2
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
+  %v1 = load <2 x i8>, ptr %a
+  %v2 = load <2 x i8>, ptr %b
+  %concat = shufflevector <2 x i8> %v1, <2 x i8> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %ins = insertelement <4 x i8> %concat, i8 %x, i32 2
+  store <4 x i8> %ins, ptr %a
+  ret void
+}
+
+define void @v4xi8_concat_vector_insert_idx3(ptr %a, ptr %b, i8 %x) {
+; CHECK-LABEL: v4xi8_concat_vector_insert_idx3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vle8.v v9, (a1)
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT:    vslideup.vi v8, v9, 2
+; CHECK-NEXT:    vmv.s.x v9, a2
+; CHECK-NEXT:    vslideup.vi v8, v9, 3
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
+  %v1 = load <2 x i8>, ptr %a
+  %v2 = load <2 x i8>, ptr %b
+  %concat = shufflevector <2 x i8> %v1, <2 x i8> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %ins = insertelement <4 x i8> %concat, i8 %x, i32 3
+  store <4 x i8> %ins, ptr %a
+  ret void
+}
+
+define void @v4xi64_concat_vector_insert_idx0(ptr %a, ptr %b, i64 %x) {
+; RV32-LABEL: v4xi64_concat_vector_insert_idx0:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT:    vle64.v v8, (a0)
+; RV32-NEXT:    vle64.v v10, (a1)
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vslideup.vi v8, v10, 2
+; RV32-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    vslide1down.vx v8, v8, a3
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vse64.v v8, (a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: v4xi64_concat_vector_insert_idx0:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT:    vle64.v v8, (a0)
+; RV64-NEXT:    vle64.v v10, (a1)
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vslideup.vi v8, v10, 2
+; RV64-NEXT:    vsetvli zero, zero, e64, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v8, a2
+; RV64-NEXT:    vse64.v v8, (a0)
+; RV64-NEXT:    ret
+  %v1 = load <2 x i64>, ptr %a
+  %v2 = load <2 x i64>, ptr %b
+  %concat = shufflevector <2 x i64> %v1, <2 x i64> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %ins = insertelement <4 x i64> %concat, i64 %x, i32 0
+  store <4 x i64> %ins, ptr %a
+  ret void
+}
+
+define void @v4xi64_concat_vector_insert_idx1(ptr %a, ptr %b, i64 %x) {
+; RV32-LABEL: v4xi64_concat_vector_insert_idx1:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT:    vle64.v v8, (a0)
+; RV32-NEXT:    vle64.v v10, (a1)
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vslideup.vi v8, v10, 2
+; RV32-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
+; RV32-NEXT:    vslide1down.vx v10, v8, a2
+; RV32-NEXT:    vslide1down.vx v10, v10, a3
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, tu, ma
+; RV32-NEXT:    vslideup.vi v8, v10, 1
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vse64.v v8, (a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: v4xi64_concat_vector_insert_idx1:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT:    vle64.v v8, (a0)
+; RV64-NEXT:    vle64.v v10, (a1)
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vslideup.vi v8, v10, 2
+; RV64-NEXT:    vmv.s.x v10, a2
+; RV64-NEXT:    vsetivli zero, 2, e64, m1, tu, ma
+; RV64-NEXT:    vslideup.vi v8, v10, 1
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vse64.v v8, (a0)
+; RV64-NEXT:    ret
+  %v1 = load <2 x i64>, ptr %a
+  %v2 = load <2 x i64>, ptr %b
+  %concat = shufflevector <2 x i64> %v1, <2 x i64> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %ins = insertelement <4 x i64> %concat, i64 %x, i32 1
+  store <4 x i64> %ins, ptr %a
+  ret void
+}
+
+define void @v4xi64_concat_vector_insert_idx2(ptr %a, ptr %b, i64 %x) {
+; RV32-LABEL: v4xi64_concat_vector_insert_idx2:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT:    vle64.v v8, (a0)
+; RV32-NEXT:    vle64.v v10, (a1)
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vslideup.vi v8, v10, 2
+; RV32-NEXT:    vsetivli zero, 2, e32, m2, ta, ma
+; RV32-NEXT:    vslide1down.vx v10, v8, a2
+; RV32-NEXT:    vslide1down.vx v10, v10, a3
+; RV32-NEXT:    vsetivli zero, 3, e64, m2, tu, ma
+; RV32-NEXT:    vslideup.vi v8, v10, 2
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vse64.v v8, (a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: v4xi64_concat_vector_insert_idx2:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT:    vle64.v v8, (a0)
+; RV64-NEXT:    vle64.v v10, (a1)
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vslideup.vi v8, v10, 2
+; RV64-NEXT:    vmv.s.x v10, a2
+; RV64-NEXT:    vsetivli zero, 3, e64, m2, tu, ma
+; RV64-NEXT:    vslideup.vi v8, v10, 2
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vse64.v v8, (a0)
+; RV64-NEXT:    ret
+  %v1 = load <2 x i64>, ptr %a
+  %v2 = load <2 x i64>, ptr %b
+  %concat = shufflevector <2 x i64> %v1, <2 x i64> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %ins = insertelement <4 x i64> %concat, i64 %x, i32 2
+  store <4 x i64> %ins, ptr %a
+  ret void
+}
+
+define void @v4xi64_concat_vector_insert_idx3(ptr %a, ptr %b, i64 %x) {
+; RV32-LABEL: v4xi64_concat_vector_insert_idx3:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT:    vle64.v v8, (a0)
+; RV32-NEXT:    vle64.v v10, (a1)
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vslideup.vi v8, v10, 2
+; RV32-NEXT:    vsetivli zero, 2, e32, m2, ta, ma
+; RV32-NEXT:    vslide1down.vx v10, v8, a2
+; RV32-NEXT:    vslide1down.vx v10, v10, a3
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vslideup.vi v8, v10, 3
+; RV32-NEXT:    vse64.v v8, (a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: v4xi64_concat_vector_insert_idx3:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT:    vle64.v v8, (a0)
+; RV64-NEXT:    vle64.v v10, (a1)
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vslideup.vi v8, v10, 2
+; RV64-NEXT:    vmv.s.x v10, a2
+; RV64-NEXT:    vslideup.vi v8, v10, 3
+; RV64-NEXT:    vse64.v v8, (a0)
+; RV64-NEXT:    ret
+  %v1 = load <2 x i64>, ptr %a
+  %v2 = load <2 x i64>, ptr %b
+  %concat = shufflevector <2 x i64> %v1, <2 x i64> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %ins = insertelement <4 x i64> %concat, i64 %x, i32 3
+  store <4 x i64> %ins, ptr %a
+  ret void
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll
index 717dfb1bfd00..2ffca983ac10 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll
@@ -446,6 +446,25 @@ define <4 x i32> @add_general_splat(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
 ; This test previously failed with an assertion failure because constant shift
 ; amounts are type legalized early.
 define void @buggy(i32 %0) #0 {
+; RV32-LABEL: buggy:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v8, a0
+; RV32-NEXT:    vadd.vv v8, v8, v8
+; RV32-NEXT:    vor.vi v8, v8, 1
+; RV32-NEXT:    vrgather.vi v9, v8, 0
+; RV32-NEXT:    vse32.v v9, (zero)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: buggy:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT:    vmv.v.x v8, a0
+; RV64-NEXT:    vor.vi v8, v8, 1
+; RV64-NEXT:    vrgather.vi v9, v8, 0
+; RV64-NEXT:    vse32.v v9, (zero)
+; RV64-NEXT:    ret
 entry:
   %mul.us.us.i.3 = shl i32 %0, 1
   %1 = insertelement <4 x i32> zeroinitializer, i32 %mul.us.us.i.3, i64 0
@@ -454,3 +473,134 @@ entry:
   store <4 x i32> %3, ptr null, align 16
   ret void
 }
+
+
+define <8 x i32> @add_constant_rhs_8xi32_vector_in(<8 x i32> %vin, i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK-LABEL: add_constant_rhs_8xi32_vector_in:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a0, a0, 23
+; CHECK-NEXT:    addi a1, a1, 25
+; CHECK-NEXT:    addi a2, a2, 1
+; CHECK-NEXT:    addi a3, a3, 2047
+; CHECK-NEXT:    addi a3, a3, 308
+; CHECK-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
+; CHECK-NEXT:    vmv.s.x v8, a0
+; CHECK-NEXT:    vmv.s.x v10, a1
+; CHECK-NEXT:    vslideup.vi v8, v10, 1
+; CHECK-NEXT:    vmv.s.x v10, a2
+; CHECK-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 2
+; CHECK-NEXT:    vmv.s.x v10, a3
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, tu, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 3
+; CHECK-NEXT:    ret
+  %e0 = add i32 %a, 23
+  %e1 = add i32 %b, 25
+  %e2 = add i32 %c, 1
+  %e3 = add i32 %d, 2355
+  %v0 = insertelement <8 x i32> %vin, i32 %e0, i32 0
+  %v1 = insertelement <8 x i32> %v0, i32 %e1, i32 1
+  %v2 = insertelement <8 x i32> %v1, i32 %e2, i32 2
+  %v3 = insertelement <8 x i32> %v2, i32 %e3, i32 3
+  ret <8 x i32> %v3
+}
+
+define <8 x i32> @add_constant_rhs_8xi32_vector_in2(<8 x i32> %vin, i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK-LABEL: add_constant_rhs_8xi32_vector_in2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a0, a0, 23
+; CHECK-NEXT:    addi a1, a1, 25
+; CHECK-NEXT:    addi a2, a2, 1
+; CHECK-NEXT:    addi a3, a3, 2047
+; CHECK-NEXT:    addi a3, a3, 308
+; CHECK-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
+; CHECK-NEXT:    vmv.s.x v10, a0
+; CHECK-NEXT:    vslideup.vi v8, v10, 4
+; CHECK-NEXT:    vmv.s.x v10, a1
+; CHECK-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 5
+; CHECK-NEXT:    vmv.s.x v10, a2
+; CHECK-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 6
+; CHECK-NEXT:    vmv.s.x v10, a3
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 7
+; CHECK-NEXT:    ret
+  %e0 = add i32 %a, 23
+  %e1 = add i32 %b, 25
+  %e2 = add i32 %c, 1
+  %e3 = add i32 %d, 2355
+  %v0 = insertelement <8 x i32> %vin, i32 %e0, i32 4
+  %v1 = insertelement <8 x i32> %v0, i32 %e1, i32 5
+  %v2 = insertelement <8 x i32> %v1, i32 %e2, i32 6
+  %v3 = insertelement <8 x i32> %v2, i32 %e3, i32 7
+  ret <8 x i32> %v3
+}
+
+define <8 x i32> @add_constant_rhs_8xi32_vector_in3(<8 x i32> %vin, i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK-LABEL: add_constant_rhs_8xi32_vector_in3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a0, a0, 23
+; CHECK-NEXT:    addi a1, a1, 25
+; CHECK-NEXT:    addi a2, a2, 1
+; CHECK-NEXT:    addi a3, a3, 2047
+; CHECK-NEXT:    addi a3, a3, 308
+; CHECK-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
+; CHECK-NEXT:    vmv.s.x v8, a0
+; CHECK-NEXT:    vmv.s.x v10, a1
+; CHECK-NEXT:    vslideup.vi v8, v10, 2
+; CHECK-NEXT:    vmv.s.x v10, a2
+; CHECK-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 4
+; CHECK-NEXT:    vmv.s.x v10, a3
+; CHECK-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 6
+; CHECK-NEXT:    ret
+  %e0 = add i32 %a, 23
+  %e1 = add i32 %b, 25
+  %e2 = add i32 %c, 1
+  %e3 = add i32 %d, 2355
+  %v0 = insertelement <8 x i32> %vin, i32 %e0, i32 0
+  %v1 = insertelement <8 x i32> %v0, i32 %e1, i32 2
+  %v2 = insertelement <8 x i32> %v1, i32 %e2, i32 4
+  %v3 = insertelement <8 x i32> %v2, i32 %e3, i32 6
+  ret <8 x i32> %v3
+}
+
+define <8 x i32> @add_constant_rhs_8xi32_partial(<8 x i32> %vin, i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK-LABEL: add_constant_rhs_8xi32_partial:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a4, %hi(.LCPI19_0)
+; CHECK-NEXT:    addi a4, a4, %lo(.LCPI19_0)
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v10, (a4)
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    addi a0, a0, 23
+; CHECK-NEXT:    addi a1, a1, 25
+; CHECK-NEXT:    addi a2, a2, 1
+; CHECK-NEXT:    addi a3, a3, 2047
+; CHECK-NEXT:    addi a3, a3, 308
+; CHECK-NEXT:    vmv.s.x v10, a0
+; CHECK-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 4
+; CHECK-NEXT:    vmv.s.x v10, a1
+; CHECK-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 5
+; CHECK-NEXT:    vmv.s.x v10, a2
+; CHECK-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 6
+; CHECK-NEXT:    vmv.s.x v10, a3
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 7
+; CHECK-NEXT:    ret
+  %vadd = add <8 x i32> %vin, <i32 1, i32 2, i32 3, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
+  %e0 = add i32 %a, 23
+  %e1 = add i32 %b, 25
+  %e2 = add i32 %c, 1
+  %e3 = add i32 %d, 2355
+  %v0 = insertelement <8 x i32> %vadd, i32 %e0, i32 4
+  %v1 = insertelement <8 x i32> %v0, i32 %e1, i32 5
+  %v2 = insertelement <8 x i32> %v1, i32 %e2, i32 6
+  %v3 = insertelement <8 x i32> %v2, i32 %e3, i32 7
+  ret <8 x i32> %v3
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
index d1ea56a1ff93..2d8bae709224 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
@@ -1080,6 +1080,13 @@ define <32 x double> @buildvec_v32f64(double %e0, double %e1, double %e2, double
 ; FIXME: These constants have enough sign bits that we could use vmv.v.x/i and
 ; vsext, but we don't support this for FP yet.
 define <2 x float> @signbits() {
+; CHECK-LABEL: signbits:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lui a0, %hi(.LCPI24_0)
+; CHECK-NEXT:    addi a0, a0, %lo(.LCPI24_0)
+; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    ret
 entry:
   ret <2 x float> <float 0x36A0000000000000, float 0.000000e+00>
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index 85939377c6db..d843750a2077 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -804,3 +804,47 @@ define <16 x i8> @buildvec_not_vid_v16i8() {
 ; CHECK-NEXT:    ret
   ret <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 3, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0>
 }
+
+define <8 x i32> @prefix_overwrite(<8 x i32> %vin, i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK-LABEL: prefix_overwrite:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
+; CHECK-NEXT:    vmv.s.x v8, a0
+; CHECK-NEXT:    vmv.s.x v10, a1
+; CHECK-NEXT:    vslideup.vi v8, v10, 1
+; CHECK-NEXT:    vmv.s.x v10, a2
+; CHECK-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 2
+; CHECK-NEXT:    vmv.s.x v10, a3
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, tu, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 3
+; CHECK-NEXT:    ret
+  %v0 = insertelement <8 x i32> %vin, i32 %a, i32 0
+  %v1 = insertelement <8 x i32> %v0, i32 %b, i32 1
+  %v2 = insertelement <8 x i32> %v1, i32 %c, i32 2
+  %v3 = insertelement <8 x i32> %v2, i32 %d, i32 3
+  ret <8 x i32> %v3
+}
+
+define <8 x i32> @suffix_overwrite(<8 x i32> %vin, i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK-LABEL: suffix_overwrite:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
+; CHECK-NEXT:    vmv.s.x v10, a0
+; CHECK-NEXT:    vslideup.vi v8, v10, 4
+; CHECK-NEXT:    vmv.s.x v10, a1
+; CHECK-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 5
+; CHECK-NEXT:    vmv.s.x v10, a2
+; CHECK-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 6
+; CHECK-NEXT:    vmv.s.x v10, a3
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 7
+; CHECK-NEXT:    ret
+  %v0 = insertelement <8 x i32> %vin, i32 %a, i32 4
+  %v1 = insertelement <8 x i32> %v0, i32 %b, i32 5
+  %v2 = insertelement <8 x i32> %v1, i32 %c, i32 6
+  %v3 = insertelement <8 x i32> %v2, i32 %d, i32 7
+  ret <8 x i32> %v3
+}
diff --git a/llvm/test/CodeGen/X86/pr72539.ll b/llvm/test/CodeGen/X86/pr72539.ll
new file mode 100644
index 000000000000..fb4c98dca8ce
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr72539.ll
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64    | FileCheck %s --check-prefixes=SSE
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=SSE
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX
+
+define void @PR72539(<8 x i32> %insertelement){
+; SSE-LABEL: PR72539:
+; SSE:       # %bb.0:
+; SSE-NEXT:    xorb $7, 0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: PR72539:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movzbl {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
+; AVX-NEXT:    xorb %al, 0
+; AVX-NEXT:    retq
+  %load671 = load i8, ptr addrspace(1) null, align 1
+  %shufflevector = shufflevector <8 x i32> %insertelement, <8 x i32> zeroinitializer, <8 x i32> zeroinitializer
+  %xor68 = xor <8 x i32> %shufflevector, <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  %call69 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %xor68)
+  %trunc70 = trunc i32 %call69 to i8
+  %xor71 = xor i8 %load671, %trunc70
+  store i8 %xor71, ptr addrspace(1) null, align 1
+  ret void
+}
+declare i32 @llvm.vector.reduce.xor.v8i32(<8 x i32>)
diff --git a/llvm/test/CodeGen/X86/vec_fabs.ll b/llvm/test/CodeGen/X86/vec_fabs.ll
index 8876d2f9b199..f691cb76bc68 100644
--- a/llvm/test/CodeGen/X86/vec_fabs.ll
+++ b/llvm/test/CodeGen/X86/vec_fabs.ll
@@ -1,12 +1,18 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X86,X86-AVX,X86-AVX1
 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X86,X86-AVX,X86-AVX2
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=X86,X86-AVX512VL
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X86,X86-AVX512VLDQ
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=X86,X86-AVX512,X86-AVX512VL
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 | FileCheck %s --check-prefixes=X86,X86-AVX512,X86-AVX512FP16
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X86,X86-AVX512,X86-AVX512VLDQ
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=X64,X64-AVX512VL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X64,X64-AVX512VLDQ
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=X64,X64-AVX512,X64-AVX512VL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 | FileCheck %s --check-prefixes=X64,X64-AVX512,X64-AVX512FP16
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X64,X64-AVX512,X64-AVX512VLDQ
+
+;
+; 128-bit Vectors
+;
 
 define <2 x double> @fabs_v2f64(<2 x double> %p) {
 ; X86-AVX-LABEL: fabs_v2f64:
@@ -19,6 +25,11 @@ define <2 x double> @fabs_v2f64(<2 x double> %p) {
 ; X86-AVX512VL-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0
 ; X86-AVX512VL-NEXT:    retl
 ;
+; X86-AVX512FP16-LABEL: fabs_v2f64:
+; X86-AVX512FP16:       # %bb.0:
+; X86-AVX512FP16-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0
+; X86-AVX512FP16-NEXT:    retl
+;
 ; X86-AVX512VLDQ-LABEL: fabs_v2f64:
 ; X86-AVX512VLDQ:       # %bb.0:
 ; X86-AVX512VLDQ-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0
@@ -34,6 +45,11 @@ define <2 x double> @fabs_v2f64(<2 x double> %p) {
 ; X64-AVX512VL-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
 ; X64-AVX512VL-NEXT:    retq
 ;
+; X64-AVX512FP16-LABEL: fabs_v2f64:
+; X64-AVX512FP16:       # %bb.0:
+; X64-AVX512FP16-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; X64-AVX512FP16-NEXT:    retq
+;
 ; X64-AVX512VLDQ-LABEL: fabs_v2f64:
 ; X64-AVX512VLDQ:       # %bb.0:
 ; X64-AVX512VLDQ-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
@@ -60,6 +76,11 @@ define <4 x float> @fabs_v4f32(<4 x float> %p) {
 ; X86-AVX512VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0
 ; X86-AVX512VL-NEXT:    retl
 ;
+; X86-AVX512FP16-LABEL: fabs_v4f32:
+; X86-AVX512FP16:       # %bb.0:
+; X86-AVX512FP16-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0
+; X86-AVX512FP16-NEXT:    retl
+;
 ; X86-AVX512VLDQ-LABEL: fabs_v4f32:
 ; X86-AVX512VLDQ:       # %bb.0:
 ; X86-AVX512VLDQ-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0
@@ -81,6 +102,11 @@ define <4 x float> @fabs_v4f32(<4 x float> %p) {
 ; X64-AVX512VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
 ; X64-AVX512VL-NEXT:    retq
 ;
+; X64-AVX512FP16-LABEL: fabs_v4f32:
+; X64-AVX512FP16:       # %bb.0:
+; X64-AVX512FP16-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; X64-AVX512FP16-NEXT:    retq
+;
 ; X64-AVX512VLDQ-LABEL: fabs_v4f32:
 ; X64-AVX512VLDQ:       # %bb.0:
 ; X64-AVX512VLDQ-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
@@ -90,6 +116,55 @@ define <4 x float> @fabs_v4f32(<4 x float> %p) {
 }
 declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
 
+define <8 x half> @fabs_v8f16(ptr %p) {
+; X86-AVX1-LABEL: fabs_v8f16:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-AVX1-NEXT:    vmovaps (%eax), %xmm0
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: fabs_v8f16:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-AVX2-NEXT:    vpbroadcastw {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
+; X86-AVX2-NEXT:    vpand (%eax), %xmm0, %xmm0
+; X86-AVX2-NEXT:    retl
+;
+; X86-AVX512-LABEL: fabs_v8f16:
+; X86-AVX512:       # %bb.0:
+; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-AVX512-NEXT:    vpbroadcastw {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
+; X86-AVX512-NEXT:    vpand (%eax), %xmm0, %xmm0
+; X86-AVX512-NEXT:    retl
+;
+; X64-AVX1-LABEL: fabs_v8f16:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    vmovaps (%rdi), %xmm0
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: fabs_v8f16:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    vpbroadcastw {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
+; X64-AVX2-NEXT:    vpand (%rdi), %xmm0, %xmm0
+; X64-AVX2-NEXT:    retq
+;
+; X64-AVX512-LABEL: fabs_v8f16:
+; X64-AVX512:       # %bb.0:
+; X64-AVX512-NEXT:    vpbroadcastw {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
+; X64-AVX512-NEXT:    vpand (%rdi), %xmm0, %xmm0
+; X64-AVX512-NEXT:    retq
+  %v = load <8 x half>, ptr %p, align 16
+  %nnv = call <8 x half> @llvm.fabs.v8f16(<8 x half> %v)
+  ret <8 x half> %nnv
+}
+declare <8 x half> @llvm.fabs.v8f16(<8 x half> %p)
+
+;
+; 256-bit Vectors
+;
+
 define <4 x double> @fabs_v4f64(<4 x double> %p) {
 ; X86-AVX1-LABEL: fabs_v4f64:
 ; X86-AVX1:       # %bb.0:
@@ -107,6 +182,11 @@ define <4 x double> @fabs_v4f64(<4 x double> %p) {
 ; X86-AVX512VL-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %ymm0, %ymm0
 ; X86-AVX512VL-NEXT:    retl
 ;
+; X86-AVX512FP16-LABEL: fabs_v4f64:
+; X86-AVX512FP16:       # %bb.0:
+; X86-AVX512FP16-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %ymm0, %ymm0
+; X86-AVX512FP16-NEXT:    retl
+;
 ; X86-AVX512VLDQ-LABEL: fabs_v4f64:
 ; X86-AVX512VLDQ:       # %bb.0:
 ; X86-AVX512VLDQ-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %ymm0, %ymm0
@@ -128,6 +208,11 @@ define <4 x double> @fabs_v4f64(<4 x double> %p) {
 ; X64-AVX512VL-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
 ; X64-AVX512VL-NEXT:    retq
 ;
+; X64-AVX512FP16-LABEL: fabs_v4f64:
+; X64-AVX512FP16:       # %bb.0:
+; X64-AVX512FP16-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
+; X64-AVX512FP16-NEXT:    retq
+;
 ; X64-AVX512VLDQ-LABEL: fabs_v4f64:
 ; X64-AVX512VLDQ:       # %bb.0:
 ; X64-AVX512VLDQ-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
@@ -154,6 +239,11 @@ define <8 x float> @fabs_v8f32(<8 x float> %p) {
 ; X86-AVX512VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %ymm0
 ; X86-AVX512VL-NEXT:    retl
 ;
+; X86-AVX512FP16-LABEL: fabs_v8f32:
+; X86-AVX512FP16:       # %bb.0:
+; X86-AVX512FP16-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %ymm0
+; X86-AVX512FP16-NEXT:    retl
+;
 ; X86-AVX512VLDQ-LABEL: fabs_v8f32:
 ; X86-AVX512VLDQ:       # %bb.0:
 ; X86-AVX512VLDQ-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %ymm0
@@ -175,6 +265,11 @@ define <8 x float> @fabs_v8f32(<8 x float> %p) {
 ; X64-AVX512VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
 ; X64-AVX512VL-NEXT:    retq
 ;
+; X64-AVX512FP16-LABEL: fabs_v8f32:
+; X64-AVX512FP16:       # %bb.0:
+; X64-AVX512FP16-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
+; X64-AVX512FP16-NEXT:    retq
+;
 ; X64-AVX512VLDQ-LABEL: fabs_v8f32:
 ; X64-AVX512VLDQ:       # %bb.0:
 ; X64-AVX512VLDQ-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
@@ -184,6 +279,1388 @@ define <8 x float> @fabs_v8f32(<8 x float> %p) {
 }
 declare <8 x float> @llvm.fabs.v8f32(<8 x float> %p)
 
+define <16 x half> @fabs_v16f16(ptr %p) {
+; X86-AVX1-LABEL: fabs_v16f16:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    pushl %esi
+; X86-AVX1-NEXT:    .cfi_def_cfa_offset 8
+; X86-AVX1-NEXT:    subl $308, %esp # imm = 0x134
+; X86-AVX1-NEXT:    .cfi_def_cfa_offset 316
+; X86-AVX1-NEXT:    .cfi_offset %esi, -8
+; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-AVX1-NEXT:    vmovdqa (%esi), %xmm0
+; X86-AVX1-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovaps 16(%esi), %xmm1
+; X86-AVX1-NEXT:    vmovups %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vbroadcastss 4(%esi), %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vbroadcastss 8(%esi), %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vbroadcastss 12(%esi), %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vbroadcastss 20(%esi), %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vbroadcastss 24(%esi), %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vbroadcastss 28(%esi), %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm1, (%esp)
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
+; X86-AVX1-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm2, %xmm2 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm2 = xmm2[0],mem[0],xmm2[1],mem[1],xmm2[2],mem[2],xmm2[3],mem[3]
+; X86-AVX1-NEXT:    vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X86-AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; X86-AVX1-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
+; X86-AVX1-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X86-AVX1-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; X86-AVX1-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X86-AVX1-NEXT:    vpunpckldq {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; X86-AVX1-NEXT:    vpunpcklqdq {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0]
+; X86-AVX1-NEXT:    vinsertf128 $1, {{[-0-9]+}}(%e{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    addl $308, %esp # imm = 0x134
+; X86-AVX1-NEXT:    .cfi_def_cfa_offset 8
+; X86-AVX1-NEXT:    popl %esi
+; X86-AVX1-NEXT:    .cfi_def_cfa_offset 4
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: fabs_v16f16:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    pushl %esi
+; X86-AVX2-NEXT:    .cfi_def_cfa_offset 8
+; X86-AVX2-NEXT:    subl $372, %esp # imm = 0x174
+; X86-AVX2-NEXT:    .cfi_def_cfa_offset 380
+; X86-AVX2-NEXT:    .cfi_offset %esi, -8
+; X86-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-AVX2-NEXT:    vmovdqa (%esi), %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovaps 16(%esi), %xmm1
+; X86-AVX2-NEXT:    vmovups %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
+; X86-AVX2-NEXT:    vmovups %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps %xmm1, %xmm0, %xmm0
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovd %xmm0, (%esp)
+; X86-AVX2-NEXT:    vpinsrw $0, 4(%esi), %xmm0, %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovd %xmm0, (%esp)
+; X86-AVX2-NEXT:    vpinsrw $0, 20(%esi), %xmm0, %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovd %xmm0, (%esp)
+; X86-AVX2-NEXT:    vpinsrw $0, 8(%esi), %xmm0, %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovd %xmm0, (%esp)
+; X86-AVX2-NEXT:    vpinsrw $0, 24(%esi), %xmm0, %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovd %xmm0, (%esp)
+; X86-AVX2-NEXT:    vpinsrw $0, 12(%esi), %xmm0, %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovd %xmm0, (%esp)
+; X86-AVX2-NEXT:    vpinsrw $0, 28(%esi), %xmm0, %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vpand {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm1, (%esp)
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
+; X86-AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm2, %xmm2 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm2 = xmm2[0],mem[0],xmm2[1],mem[1],xmm2[2],mem[2],xmm2[3],mem[3]
+; X86-AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
+; X86-AVX2-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
+; X86-AVX2-NEXT:    vmovdqu %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) # 32-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
+; X86-AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; X86-AVX2-NEXT:    vmovdqu %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) # 32-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vzeroupper
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X86-AVX2-NEXT:    vinserti128 $1, {{[-0-9]+}}(%e{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vpunpckldq {{[-0-9]+}}(%e{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
+; X86-AVX2-NEXT:    # ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
+; X86-AVX2-NEXT:    vpunpcklqdq {{[-0-9]+}}(%e{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
+; X86-AVX2-NEXT:    # ymm0 = ymm0[0],mem[0],ymm0[2],mem[2]
+; X86-AVX2-NEXT:    addl $372, %esp # imm = 0x174
+; X86-AVX2-NEXT:    .cfi_def_cfa_offset 8
+; X86-AVX2-NEXT:    popl %esi
+; X86-AVX2-NEXT:    .cfi_def_cfa_offset 4
+; X86-AVX2-NEXT:    retl
+;
+; X86-AVX512VL-LABEL: fabs_v16f16:
+; X86-AVX512VL:       # %bb.0:
+; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-AVX512VL-NEXT:    movzwl 28(%eax), %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm0
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm0, %xmm1
+; X86-AVX512VL-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN]
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm3
+; X86-AVX512VL-NEXT:    vmovdqa (%eax), %xmm1
+; X86-AVX512VL-NEXT:    vmovdqa 16(%eax), %xmm2
+; X86-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm4 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm4, %ecx
+; X86-AVX512VL-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm4
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X86-AVX512VL-NEXT:    vmovd %xmm4, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm4
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
+; X86-AVX512VL-NEXT:    movzwl 12(%eax), %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm4
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X86-AVX512VL-NEXT:    vmovd %xmm4, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm4
+; X86-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm5 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm5, %ecx
+; X86-AVX512VL-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm5
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vmovd %xmm5, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm5
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
+; X86-AVX512VL-NEXT:    vinserti128 $1, %xmm3, %ymm4, %ymm3
+; X86-AVX512VL-NEXT:    movzwl 24(%eax), %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm4
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X86-AVX512VL-NEXT:    vmovd %xmm4, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm4
+; X86-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm5 = xmm2[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm5, %ecx
+; X86-AVX512VL-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm5
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vmovd %xmm5, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm5
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
+; X86-AVX512VL-NEXT:    movzwl 8(%eax), %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm5
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vmovd %xmm5, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm5
+; X86-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm6 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm6, %ecx
+; X86-AVX512VL-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm6
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X86-AVX512VL-NEXT:    vmovd %xmm6, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm6
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
+; X86-AVX512VL-NEXT:    vinserti128 $1, %xmm4, %ymm5, %ymm4
+; X86-AVX512VL-NEXT:    vpunpckldq {{.*#+}} ymm3 = ymm4[0],ymm3[0],ymm4[1],ymm3[1],ymm4[4],ymm3[4],ymm4[5],ymm3[5]
+; X86-AVX512VL-NEXT:    movzwl 20(%eax), %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm4
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X86-AVX512VL-NEXT:    vmovd %xmm4, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm4
+; X86-AVX512VL-NEXT:    vpsrlq $48, %xmm2, %xmm5
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm5, %ecx
+; X86-AVX512VL-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm5
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vmovd %xmm5, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm5
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
+; X86-AVX512VL-NEXT:    movzwl 4(%eax), %eax
+; X86-AVX512VL-NEXT:    vmovd %eax, %xmm5
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vmovd %xmm5, %eax
+; X86-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X86-AVX512VL-NEXT:    vpsrlq $48, %xmm1, %xmm6
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm6, %eax
+; X86-AVX512VL-NEXT:    movzwl %ax, %eax
+; X86-AVX512VL-NEXT:    vmovd %eax, %xmm6
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X86-AVX512VL-NEXT:    vmovd %xmm6, %eax
+; X86-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm6
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
+; X86-AVX512VL-NEXT:    vinserti128 $1, %xmm4, %ymm5, %ymm4
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm2, %eax
+; X86-AVX512VL-NEXT:    movzwl %ax, %eax
+; X86-AVX512VL-NEXT:    vmovd %eax, %xmm5
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vmovd %xmm5, %eax
+; X86-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X86-AVX512VL-NEXT:    vpsrld $16, %xmm2, %xmm2
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm2, %eax
+; X86-AVX512VL-NEXT:    movzwl %ax, %eax
+; X86-AVX512VL-NEXT:    vmovd %eax, %xmm2
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm2, %xmm2
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm2, %xmm2
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm2, %xmm2
+; X86-AVX512VL-NEXT:    vmovd %xmm2, %eax
+; X86-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm2
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3]
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm1, %eax
+; X86-AVX512VL-NEXT:    movzwl %ax, %eax
+; X86-AVX512VL-NEXT:    vmovd %eax, %xmm5
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vmovd %xmm5, %eax
+; X86-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X86-AVX512VL-NEXT:    vpsrld $16, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm1, %eax
+; X86-AVX512VL-NEXT:    movzwl %ax, %eax
+; X86-AVX512VL-NEXT:    vmovd %eax, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm0
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
+; X86-AVX512VL-NEXT:    vmovd %xmm0, %eax
+; X86-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm5[0],xmm0[0],xmm5[1],xmm0[1],xmm5[2],xmm0[2],xmm5[3],xmm0[3]
+; X86-AVX512VL-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX512VL-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm4[0],ymm0[1],ymm4[1],ymm0[4],ymm4[4],ymm0[5],ymm4[5]
+; X86-AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm3[0],ymm0[2],ymm3[2]
+; X86-AVX512VL-NEXT:    retl
+;
+; X86-AVX512FP16-LABEL: fabs_v16f16:
+; X86-AVX512FP16:       # %bb.0:
+; X86-AVX512FP16-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-AVX512FP16-NEXT:    vpbroadcastw {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
+; X86-AVX512FP16-NEXT:    vpand (%eax), %ymm0, %ymm0
+; X86-AVX512FP16-NEXT:    retl
+;
+; X86-AVX512VLDQ-LABEL: fabs_v16f16:
+; X86-AVX512VLDQ:       # %bb.0:
+; X86-AVX512VLDQ-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-AVX512VLDQ-NEXT:    movzwl 28(%eax), %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm0
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm0, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN]
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm3
+; X86-AVX512VLDQ-NEXT:    vmovdqa (%eax), %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovdqa 16(%eax), %xmm2
+; X86-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm4 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm4, %ecx
+; X86-AVX512VLDQ-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm4
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm4, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm4
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
+; X86-AVX512VLDQ-NEXT:    movzwl 12(%eax), %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm4
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm4, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm4
+; X86-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm5 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm5, %ecx
+; X86-AVX512VLDQ-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm5
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm5, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm5
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
+; X86-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm3, %ymm4, %ymm3
+; X86-AVX512VLDQ-NEXT:    movzwl 24(%eax), %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm4
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm4, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm4
+; X86-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm5 = xmm2[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm5, %ecx
+; X86-AVX512VLDQ-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm5
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm5, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm5
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
+; X86-AVX512VLDQ-NEXT:    movzwl 8(%eax), %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm5
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm5, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm5
+; X86-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm6 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm6, %ecx
+; X86-AVX512VLDQ-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm6
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm6, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm6
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
+; X86-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm4, %ymm5, %ymm4
+; X86-AVX512VLDQ-NEXT:    vpunpckldq {{.*#+}} ymm3 = ymm4[0],ymm3[0],ymm4[1],ymm3[1],ymm4[4],ymm3[4],ymm4[5],ymm3[5]
+; X86-AVX512VLDQ-NEXT:    movzwl 20(%eax), %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm4
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm4, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm4
+; X86-AVX512VLDQ-NEXT:    vpsrlq $48, %xmm2, %xmm5
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm5, %ecx
+; X86-AVX512VLDQ-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm5
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm5, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm5
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
+; X86-AVX512VLDQ-NEXT:    movzwl 4(%eax), %eax
+; X86-AVX512VLDQ-NEXT:    vmovd %eax, %xmm5
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm5, %eax
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X86-AVX512VLDQ-NEXT:    vpsrlq $48, %xmm1, %xmm6
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm6, %eax
+; X86-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X86-AVX512VLDQ-NEXT:    vmovd %eax, %xmm6
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm6, %eax
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm6
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
+; X86-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm4, %ymm5, %ymm4
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm2, %eax
+; X86-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X86-AVX512VLDQ-NEXT:    vmovd %eax, %xmm5
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm5, %eax
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X86-AVX512VLDQ-NEXT:    vpsrld $16, %xmm2, %xmm2
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm2, %eax
+; X86-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X86-AVX512VLDQ-NEXT:    vmovd %eax, %xmm2
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm2, %xmm2
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm2, %xmm2
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm2, %xmm2
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm2, %eax
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm2
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3]
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm1, %eax
+; X86-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X86-AVX512VLDQ-NEXT:    vmovd %eax, %xmm5
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm5, %eax
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X86-AVX512VLDQ-NEXT:    vpsrld $16, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm1, %eax
+; X86-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X86-AVX512VLDQ-NEXT:    vmovd %eax, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm0
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm0, %eax
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm5[0],xmm0[0],xmm5[1],xmm0[1],xmm5[2],xmm0[2],xmm5[3],xmm0[3]
+; X86-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX512VLDQ-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm4[0],ymm0[1],ymm4[1],ymm0[4],ymm4[4],ymm0[5],ymm4[5]
+; X86-AVX512VLDQ-NEXT:    vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm3[0],ymm0[2],ymm3[2]
+; X86-AVX512VLDQ-NEXT:    retl
+;
+; X64-AVX1-LABEL: fabs_v16f16:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    pushq %rbx
+; X64-AVX1-NEXT:    .cfi_def_cfa_offset 16
+; X64-AVX1-NEXT:    subq $80, %rsp
+; X64-AVX1-NEXT:    .cfi_def_cfa_offset 96
+; X64-AVX1-NEXT:    .cfi_offset %rbx, -16
+; X64-AVX1-NEXT:    movq %rdi, %rbx
+; X64-AVX1-NEXT:    vbroadcastss 28(%rdi), %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovaps (%rbx), %xmm0
+; X64-AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa 16(%rbx), %xmm0
+; X64-AVX1-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vbroadcastss 24(%rbx), %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vbroadcastss 20(%rbx), %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vpunpckldq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; X64-AVX1-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vbroadcastss 12(%rbx), %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vbroadcastss 8(%rbx), %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vbroadcastss 4(%rbx), %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vpunpckldq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; X64-AVX1-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0]
+; X64-AVX1-NEXT:    vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    addq $80, %rsp
+; X64-AVX1-NEXT:    .cfi_def_cfa_offset 16
+; X64-AVX1-NEXT:    popq %rbx
+; X64-AVX1-NEXT:    .cfi_def_cfa_offset 8
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: fabs_v16f16:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    pushq %rbx
+; X64-AVX2-NEXT:    .cfi_def_cfa_offset 16
+; X64-AVX2-NEXT:    subq $128, %rsp
+; X64-AVX2-NEXT:    .cfi_def_cfa_offset 144
+; X64-AVX2-NEXT:    .cfi_offset %rbx, -16
+; X64-AVX2-NEXT:    movq %rdi, %rbx
+; X64-AVX2-NEXT:    vpinsrw $0, 28(%rdi), %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
+; X64-AVX2-NEXT:    vmovdqa %xmm1, (%rsp) # 16-byte Spill
+; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovaps (%rbx), %xmm0
+; X64-AVX2-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa 16(%rbx), %xmm0
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vpinsrw $0, 12(%rbx), %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; X64-AVX2-NEXT:    vpinsrw $0, 24(%rbx), %xmm0, %xmm0
+; X64-AVX2-NEXT:    vzeroupper
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vpinsrw $0, 8(%rbx), %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
+; X64-AVX2-NEXT:    # ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
+; X64-AVX2-NEXT:    vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; X64-AVX2-NEXT:    vpinsrw $0, 20(%rbx), %xmm0, %xmm0
+; X64-AVX2-NEXT:    vzeroupper
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vpinsrw $0, 4(%rbx), %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; X64-AVX2-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vzeroupper
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vandps (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vandps (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
+; X64-AVX2-NEXT:    # ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
+; X64-AVX2-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
+; X64-AVX2-NEXT:    # ymm0 = ymm0[0],mem[0],ymm0[2],mem[2]
+; X64-AVX2-NEXT:    addq $128, %rsp
+; X64-AVX2-NEXT:    .cfi_def_cfa_offset 16
+; X64-AVX2-NEXT:    popq %rbx
+; X64-AVX2-NEXT:    .cfi_def_cfa_offset 8
+; X64-AVX2-NEXT:    retq
+;
+; X64-AVX512VL-LABEL: fabs_v16f16:
+; X64-AVX512VL:       # %bb.0:
+; X64-AVX512VL-NEXT:    movzwl 28(%rdi), %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm0
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm0, %xmm1
+; X64-AVX512VL-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN]
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X64-AVX512VL-NEXT:    vmovd %xmm1, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm3
+; X64-AVX512VL-NEXT:    vmovdqa (%rdi), %xmm1
+; X64-AVX512VL-NEXT:    vmovdqa 16(%rdi), %xmm2
+; X64-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm4 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm4, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm4
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vmovd %xmm4, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm4
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
+; X64-AVX512VL-NEXT:    movzwl 12(%rdi), %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm4
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vmovd %xmm4, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm4
+; X64-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm5 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm5, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm5
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vmovd %xmm5, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
+; X64-AVX512VL-NEXT:    vinserti128 $1, %xmm3, %ymm4, %ymm3
+; X64-AVX512VL-NEXT:    movzwl 24(%rdi), %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm4
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vmovd %xmm4, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm4
+; X64-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm5 = xmm2[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm5, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm5
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vmovd %xmm5, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
+; X64-AVX512VL-NEXT:    movzwl 8(%rdi), %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm5
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vmovd %xmm5, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X64-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm6 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm6, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm6
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vmovd %xmm6, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm6
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
+; X64-AVX512VL-NEXT:    vinserti128 $1, %xmm4, %ymm5, %ymm4
+; X64-AVX512VL-NEXT:    vpunpckldq {{.*#+}} ymm3 = ymm4[0],ymm3[0],ymm4[1],ymm3[1],ymm4[4],ymm3[4],ymm4[5],ymm3[5]
+; X64-AVX512VL-NEXT:    movzwl 20(%rdi), %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm4
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vmovd %xmm4, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm4
+; X64-AVX512VL-NEXT:    vpsrlq $48, %xmm2, %xmm5
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm5, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm5
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vmovd %xmm5, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
+; X64-AVX512VL-NEXT:    movzwl 4(%rdi), %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm5
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vmovd %xmm5, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X64-AVX512VL-NEXT:    vpsrlq $48, %xmm1, %xmm6
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm6, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm6
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vmovd %xmm6, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm6
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
+; X64-AVX512VL-NEXT:    vinserti128 $1, %xmm4, %ymm5, %ymm4
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm2, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm5
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vmovd %xmm5, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X64-AVX512VL-NEXT:    vpsrld $16, %xmm2, %xmm2
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm2, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm2
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm2, %xmm2
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm2, %xmm2
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm2, %xmm2
+; X64-AVX512VL-NEXT:    vmovd %xmm2, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm2
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3]
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm1, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm5
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vmovd %xmm5, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X64-AVX512VL-NEXT:    vpsrld $16, %xmm1, %xmm1
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm1, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm1
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm0
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
+; X64-AVX512VL-NEXT:    vmovd %xmm0, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm5[0],xmm0[0],xmm5[1],xmm0[1],xmm5[2],xmm0[2],xmm5[3],xmm0[3]
+; X64-AVX512VL-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; X64-AVX512VL-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm4[0],ymm0[1],ymm4[1],ymm0[4],ymm4[4],ymm0[5],ymm4[5]
+; X64-AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm3[0],ymm0[2],ymm3[2]
+; X64-AVX512VL-NEXT:    retq
+;
+; X64-AVX512FP16-LABEL: fabs_v16f16:
+; X64-AVX512FP16:       # %bb.0:
+; X64-AVX512FP16-NEXT:    vpbroadcastw {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
+; X64-AVX512FP16-NEXT:    vpand (%rdi), %ymm0, %ymm0
+; X64-AVX512FP16-NEXT:    retq
+;
+; X64-AVX512VLDQ-LABEL: fabs_v16f16:
+; X64-AVX512VLDQ:       # %bb.0:
+; X64-AVX512VLDQ-NEXT:    movzwl 28(%rdi), %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm0
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm0, %xmm1
+; X64-AVX512VLDQ-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN]
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm1, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm3
+; X64-AVX512VLDQ-NEXT:    vmovdqa (%rdi), %xmm1
+; X64-AVX512VLDQ-NEXT:    vmovdqa 16(%rdi), %xmm2
+; X64-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm4 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm4, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm4
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm4, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm4
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
+; X64-AVX512VLDQ-NEXT:    movzwl 12(%rdi), %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm4
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm4, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm4
+; X64-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm5 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm5, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm5
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm5, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
+; X64-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm3, %ymm4, %ymm3
+; X64-AVX512VLDQ-NEXT:    movzwl 24(%rdi), %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm4
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm4, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm4
+; X64-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm5 = xmm2[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm5, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm5
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm5, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
+; X64-AVX512VLDQ-NEXT:    movzwl 8(%rdi), %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm5
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm5, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X64-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm6 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm6, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm6
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm6, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm6
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
+; X64-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm4, %ymm5, %ymm4
+; X64-AVX512VLDQ-NEXT:    vpunpckldq {{.*#+}} ymm3 = ymm4[0],ymm3[0],ymm4[1],ymm3[1],ymm4[4],ymm3[4],ymm4[5],ymm3[5]
+; X64-AVX512VLDQ-NEXT:    movzwl 20(%rdi), %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm4
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm4, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm4
+; X64-AVX512VLDQ-NEXT:    vpsrlq $48, %xmm2, %xmm5
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm5, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm5
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm5, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
+; X64-AVX512VLDQ-NEXT:    movzwl 4(%rdi), %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm5
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm5, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X64-AVX512VLDQ-NEXT:    vpsrlq $48, %xmm1, %xmm6
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm6, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm6
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm6, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm6
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
+; X64-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm4, %ymm5, %ymm4
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm2, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm5
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm5, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X64-AVX512VLDQ-NEXT:    vpsrld $16, %xmm2, %xmm2
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm2, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm2
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm2, %xmm2
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm2, %xmm2
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm2, %xmm2
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm2, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm2
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3]
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm1, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm5
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm5, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X64-AVX512VLDQ-NEXT:    vpsrld $16, %xmm1, %xmm1
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm1, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm1
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm0
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm0, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm5[0],xmm0[0],xmm5[1],xmm0[1],xmm5[2],xmm0[2],xmm5[3],xmm0[3]
+; X64-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; X64-AVX512VLDQ-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm4[0],ymm0[1],ymm4[1],ymm0[4],ymm4[4],ymm0[5],ymm4[5]
+; X64-AVX512VLDQ-NEXT:    vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm3[0],ymm0[2],ymm3[2]
+; X64-AVX512VLDQ-NEXT:    retq
+  %v = load <16 x half>, ptr %p, align 32
+  %nnv = call <16 x half> @llvm.fabs.v16f16(<16 x half> %v)
+  ret <16 x half> %nnv
+}
+declare <16 x half> @llvm.fabs.v16f16(<16 x half> %p)
+
+;
+; 512-bit Vectors
+;
+
 define <8 x double> @fabs_v8f64(<8 x double> %p) {
 ; X86-AVX-LABEL: fabs_v8f64:
 ; X86-AVX:       # %bb.0:
@@ -197,6 +1674,11 @@ define <8 x double> @fabs_v8f64(<8 x double> %p) {
 ; X86-AVX512VL-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %zmm0, %zmm0
 ; X86-AVX512VL-NEXT:    retl
 ;
+; X86-AVX512FP16-LABEL: fabs_v8f64:
+; X86-AVX512FP16:       # %bb.0:
+; X86-AVX512FP16-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %zmm0, %zmm0
+; X86-AVX512FP16-NEXT:    retl
+;
 ; X86-AVX512VLDQ-LABEL: fabs_v8f64:
 ; X86-AVX512VLDQ:       # %bb.0:
 ; X86-AVX512VLDQ-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %zmm0, %zmm0
@@ -214,6 +1696,11 @@ define <8 x double> @fabs_v8f64(<8 x double> %p) {
 ; X64-AVX512VL-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
 ; X64-AVX512VL-NEXT:    retq
 ;
+; X64-AVX512FP16-LABEL: fabs_v8f64:
+; X64-AVX512FP16:       # %bb.0:
+; X64-AVX512FP16-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
+; X64-AVX512FP16-NEXT:    retq
+;
 ; X64-AVX512VLDQ-LABEL: fabs_v8f64:
 ; X64-AVX512VLDQ:       # %bb.0:
 ; X64-AVX512VLDQ-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
@@ -236,6 +1723,11 @@ define <16 x float> @fabs_v16f32(<16 x float> %p) {
 ; X86-AVX512VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0
 ; X86-AVX512VL-NEXT:    retl
 ;
+; X86-AVX512FP16-LABEL: fabs_v16f32:
+; X86-AVX512FP16:       # %bb.0:
+; X86-AVX512FP16-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0
+; X86-AVX512FP16-NEXT:    retl
+;
 ; X86-AVX512VLDQ-LABEL: fabs_v16f32:
 ; X86-AVX512VLDQ:       # %bb.0:
 ; X86-AVX512VLDQ-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0
@@ -253,6 +1745,11 @@ define <16 x float> @fabs_v16f32(<16 x float> %p) {
 ; X64-AVX512VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
 ; X64-AVX512VL-NEXT:    retq
 ;
+; X64-AVX512FP16-LABEL: fabs_v16f32:
+; X64-AVX512FP16:       # %bb.0:
+; X64-AVX512FP16-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
+; X64-AVX512FP16-NEXT:    retq
+;
 ; X64-AVX512VLDQ-LABEL: fabs_v16f32:
 ; X64-AVX512VLDQ:       # %bb.0:
 ; X64-AVX512VLDQ-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
@@ -262,6 +1759,2711 @@ define <16 x float> @fabs_v16f32(<16 x float> %p) {
 }
 declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
 
+define <32 x half> @fabs_v32f16(ptr %p) {
+; X86-AVX1-LABEL: fabs_v32f16:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    pushl %esi
+; X86-AVX1-NEXT:    .cfi_def_cfa_offset 8
+; X86-AVX1-NEXT:    subl $644, %esp # imm = 0x284
+; X86-AVX1-NEXT:    .cfi_def_cfa_offset 652
+; X86-AVX1-NEXT:    .cfi_offset %esi, -8
+; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-AVX1-NEXT:    vmovdqa 32(%esi), %xmm0
+; X86-AVX1-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vbroadcastss 36(%esi), %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vbroadcastss 40(%esi), %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vbroadcastss 44(%esi), %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqa 48(%esi), %xmm0
+; X86-AVX1-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vbroadcastss 52(%esi), %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vbroadcastss 56(%esi), %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vbroadcastss 60(%esi), %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqa (%esi), %xmm1
+; X86-AVX1-NEXT:    vmovdqu %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovaps 16(%esi), %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vpsrld $16, %xmm1, %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vbroadcastss 4(%esi), %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vbroadcastss 8(%esi), %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vbroadcastss 12(%esi), %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vbroadcastss 20(%esi), %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vbroadcastss 24(%esi), %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vbroadcastss 28(%esi), %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm1, (%esp)
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
+; X86-AVX1-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm2, %xmm2 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm2 = xmm2[0],mem[0],xmm2[1],mem[1],xmm2[2],mem[2],xmm2[3],mem[3]
+; X86-AVX1-NEXT:    vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X86-AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm2, %xmm2 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm2 = xmm2[0],mem[0],xmm2[1],mem[1],xmm2[2],mem[2],xmm2[3],mem[3]
+; X86-AVX1-NEXT:    vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm2, %xmm2 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm2 = xmm2[0],mem[0],xmm2[1],mem[1],xmm2[2],mem[2],xmm2[3],mem[3]
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm3 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm3, %xmm3 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm3 = xmm3[0],mem[0],xmm3[1],mem[1],xmm3[2],mem[2],xmm3[3],mem[3]
+; X86-AVX1-NEXT:    vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; X86-AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; X86-AVX1-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) # 32-byte Spill
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
+; X86-AVX1-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm2, %xmm2 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm2 = xmm2[0],mem[0],xmm2[1],mem[1],xmm2[2],mem[2],xmm2[3],mem[3]
+; X86-AVX1-NEXT:    vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X86-AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; X86-AVX1-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
+; X86-AVX1-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X86-AVX1-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; X86-AVX1-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vzeroupper
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X86-AVX1-NEXT:    vpunpckldq {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; X86-AVX1-NEXT:    vpunpcklqdq {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0]
+; X86-AVX1-NEXT:    vinsertf128 $1, {{[-0-9]+}}(%e{{[sb]}}p), %ymm0, %ymm1 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 # 32-byte Reload
+; X86-AVX1-NEXT:    addl $644, %esp # imm = 0x284
+; X86-AVX1-NEXT:    .cfi_def_cfa_offset 8
+; X86-AVX1-NEXT:    popl %esi
+; X86-AVX1-NEXT:    .cfi_def_cfa_offset 4
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: fabs_v32f16:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    pushl %esi
+; X86-AVX2-NEXT:    .cfi_def_cfa_offset 8
+; X86-AVX2-NEXT:    subl $708, %esp # imm = 0x2C4
+; X86-AVX2-NEXT:    .cfi_def_cfa_offset 716
+; X86-AVX2-NEXT:    .cfi_offset %esi, -8
+; X86-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-AVX2-NEXT:    vmovdqa 32(%esi), %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqa 48(%esi), %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
+; X86-AVX2-NEXT:    vmovups %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps %xmm1, %xmm0, %xmm0
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovd %xmm0, (%esp)
+; X86-AVX2-NEXT:    vpinsrw $0, 36(%esi), %xmm0, %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovd %xmm0, (%esp)
+; X86-AVX2-NEXT:    vpinsrw $0, 52(%esi), %xmm0, %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovd %xmm0, (%esp)
+; X86-AVX2-NEXT:    vpinsrw $0, 40(%esi), %xmm0, %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovd %xmm0, (%esp)
+; X86-AVX2-NEXT:    vpinsrw $0, 56(%esi), %xmm0, %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovd %xmm0, (%esp)
+; X86-AVX2-NEXT:    vpinsrw $0, 44(%esi), %xmm0, %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovd %xmm0, (%esp)
+; X86-AVX2-NEXT:    vpinsrw $0, 60(%esi), %xmm0, %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqa (%esi), %xmm1
+; X86-AVX2-NEXT:    vmovdqu %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovaps 16(%esi), %xmm0
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vpsrld $16, %xmm1, %xmm0
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovd %xmm0, (%esp)
+; X86-AVX2-NEXT:    vpinsrw $0, 4(%esi), %xmm0, %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovd %xmm0, (%esp)
+; X86-AVX2-NEXT:    vpinsrw $0, 20(%esi), %xmm0, %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovd %xmm0, (%esp)
+; X86-AVX2-NEXT:    vpinsrw $0, 8(%esi), %xmm0, %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovd %xmm0, (%esp)
+; X86-AVX2-NEXT:    vpinsrw $0, 24(%esi), %xmm0, %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovd %xmm0, (%esp)
+; X86-AVX2-NEXT:    vpinsrw $0, 12(%esi), %xmm0, %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovd %xmm0, (%esp)
+; X86-AVX2-NEXT:    vpinsrw $0, 28(%esi), %xmm0, %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vpand {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm1, (%esp)
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
+; X86-AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm2, %xmm2 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm2 = xmm2[0],mem[0],xmm2[1],mem[1],xmm2[2],mem[2],xmm2[3],mem[3]
+; X86-AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
+; X86-AVX2-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm2, %xmm2 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm2 = xmm2[0],mem[0],xmm2[1],mem[1],xmm2[2],mem[2],xmm2[3],mem[3]
+; X86-AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm2, %xmm2 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm2 = xmm2[0],mem[0],xmm2[1],mem[1],xmm2[2],mem[2],xmm2[3],mem[3]
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm3 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm3, %xmm3 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm3 = xmm3[0],mem[0],xmm3[1],mem[1],xmm3[2],mem[2],xmm3[3],mem[3]
+; X86-AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm3, %ymm2
+; X86-AVX2-NEXT:    vpunpckldq {{.*#+}} ymm1 = ymm2[0],ymm1[0],ymm2[1],ymm1[1],ymm2[4],ymm1[4],ymm2[5],ymm1[5]
+; X86-AVX2-NEXT:    vpunpcklqdq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
+; X86-AVX2-NEXT:    vmovdqu %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) # 32-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
+; X86-AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm2, %xmm2 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm2 = xmm2[0],mem[0],xmm2[1],mem[1],xmm2[2],mem[2],xmm2[3],mem[3]
+; X86-AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
+; X86-AVX2-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
+; X86-AVX2-NEXT:    vmovdqu %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) # 32-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
+; X86-AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; X86-AVX2-NEXT:    vmovdqu %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) # 32-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vzeroupper
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X86-AVX2-NEXT:    vinserti128 $1, {{[-0-9]+}}(%e{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vpunpckldq {{[-0-9]+}}(%e{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
+; X86-AVX2-NEXT:    # ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
+; X86-AVX2-NEXT:    vpunpcklqdq {{[-0-9]+}}(%e{{[sb]}}p), %ymm0, %ymm1 # 32-byte Folded Reload
+; X86-AVX2-NEXT:    # ymm1 = ymm0[0],mem[0],ymm0[2],mem[2]
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 # 32-byte Reload
+; X86-AVX2-NEXT:    addl $708, %esp # imm = 0x2C4
+; X86-AVX2-NEXT:    .cfi_def_cfa_offset 8
+; X86-AVX2-NEXT:    popl %esi
+; X86-AVX2-NEXT:    .cfi_def_cfa_offset 4
+; X86-AVX2-NEXT:    retl
+;
+; X86-AVX512VL-LABEL: fabs_v32f16:
+; X86-AVX512VL:       # %bb.0:
+; X86-AVX512VL-NEXT:    subl $128, %esp
+; X86-AVX512VL-NEXT:    .cfi_def_cfa_offset 132
+; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-AVX512VL-NEXT:    movzwl 60(%eax), %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm0
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm0, %xmm1
+; X86-AVX512VL-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN]
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm1
+; X86-AVX512VL-NEXT:    vmovdqa 48(%eax), %xmm3
+; X86-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm2 = xmm3[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VL-NEXT:    vmovdqa %xmm3, %xmm4
+; X86-AVX512VL-NEXT:    vmovdqu %xmm3, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm2, %ecx
+; X86-AVX512VL-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm2
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm2, %xmm2
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm2, %xmm2
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm2, %xmm2
+; X86-AVX512VL-NEXT:    vmovd %xmm2, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm2
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; X86-AVX512VL-NEXT:    movzwl 44(%eax), %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm2
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm2, %xmm2
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm2, %xmm2
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm2, %xmm2
+; X86-AVX512VL-NEXT:    vmovd %xmm2, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm2
+; X86-AVX512VL-NEXT:    vmovdqa 32(%eax), %xmm3
+; X86-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm5 = xmm3[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm5, %ecx
+; X86-AVX512VL-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm5
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vmovd %xmm5, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm5
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1],xmm2[2],xmm5[2],xmm2[3],xmm5[3]
+; X86-AVX512VL-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm5
+; X86-AVX512VL-NEXT:    movzwl 28(%eax), %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm6
+; X86-AVX512VL-NEXT:    vmovdqa (%eax), %xmm1
+; X86-AVX512VL-NEXT:    vmovdqu %xmm1, (%esp) # 16-byte Spill
+; X86-AVX512VL-NEXT:    vmovdqa 16(%eax), %xmm2
+; X86-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm7 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VL-NEXT:    vmovdqu %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm7, %ecx
+; X86-AVX512VL-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm7
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X86-AVX512VL-NEXT:    vmovd %xmm7, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm7
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3]
+; X86-AVX512VL-NEXT:    movzwl 12(%eax), %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm7
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X86-AVX512VL-NEXT:    vmovd %xmm7, %ecx
+; X86-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm7 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm7, %edx
+; X86-AVX512VL-NEXT:    movzwl %dx, %edx
+; X86-AVX512VL-NEXT:    vmovd %edx, %xmm7
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X86-AVX512VL-NEXT:    vmovd %xmm7, %edx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm7
+; X86-AVX512VL-NEXT:    vpinsrw $0, %edx, %xmm0, %xmm1
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm7[0],xmm1[0],xmm7[1],xmm1[1],xmm7[2],xmm1[2],xmm7[3],xmm1[3]
+; X86-AVX512VL-NEXT:    vinserti128 $1, %xmm6, %ymm1, %ymm1
+; X86-AVX512VL-NEXT:    vinserti64x4 $1, %ymm5, %zmm1, %zmm1
+; X86-AVX512VL-NEXT:    vmovdqu64 %zmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 64-byte Spill
+; X86-AVX512VL-NEXT:    movzwl 56(%eax), %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm1
+; X86-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm6 = xmm4[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm6, %ecx
+; X86-AVX512VL-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm6
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X86-AVX512VL-NEXT:    vmovd %xmm6, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm6
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm6 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3]
+; X86-AVX512VL-NEXT:    movzwl 40(%eax), %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm1 = xmm3[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VL-NEXT:    vmovdqa %xmm3, %xmm4
+; X86-AVX512VL-NEXT:    vmovdqu %xmm3, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm1, %edx
+; X86-AVX512VL-NEXT:    movzwl %dx, %edx
+; X86-AVX512VL-NEXT:    vmovd %edx, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %edx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm1
+; X86-AVX512VL-NEXT:    vpinsrw $0, %edx, %xmm0, %xmm7
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm7[0],xmm1[1],xmm7[1],xmm1[2],xmm7[2],xmm1[3],xmm7[3]
+; X86-AVX512VL-NEXT:    vinserti128 $1, %xmm6, %ymm1, %ymm6
+; X86-AVX512VL-NEXT:    movzwl 24(%eax), %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm1 = xmm2[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm1, %edx
+; X86-AVX512VL-NEXT:    movzwl %dx, %edx
+; X86-AVX512VL-NEXT:    vmovd %edx, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %edx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm1
+; X86-AVX512VL-NEXT:    vpinsrw $0, %edx, %xmm0, %xmm7
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm7 = xmm1[0],xmm7[0],xmm1[1],xmm7[1],xmm1[2],xmm7[2],xmm1[3],xmm7[3]
+; X86-AVX512VL-NEXT:    movzwl 8(%eax), %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VL-NEXT:    vmovdqu (%esp), %xmm2 # 16-byte Reload
+; X86-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm1 = xmm2[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm1, %edx
+; X86-AVX512VL-NEXT:    movzwl %dx, %edx
+; X86-AVX512VL-NEXT:    vmovd %edx, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %edx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm1
+; X86-AVX512VL-NEXT:    vpinsrw $0, %edx, %xmm0, %xmm5
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1],xmm1[2],xmm5[2],xmm1[3],xmm5[3]
+; X86-AVX512VL-NEXT:    vinserti128 $1, %xmm7, %ymm1, %ymm1
+; X86-AVX512VL-NEXT:    vinserti64x4 $1, %ymm6, %zmm1, %zmm1
+; X86-AVX512VL-NEXT:    vpunpckldq {{[-0-9]+}}(%e{{[sb]}}p), %zmm1, %zmm1 # 64-byte Folded Reload
+; X86-AVX512VL-NEXT:    # zmm1 = zmm1[0],mem[0],zmm1[1],mem[1],zmm1[4],mem[4],zmm1[5],mem[5],zmm1[8],mem[8],zmm1[9],mem[9],zmm1[12],mem[12],zmm1[13],mem[13]
+; X86-AVX512VL-NEXT:    vmovdqu64 %zmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 64-byte Spill
+; X86-AVX512VL-NEXT:    movzwl 52(%eax), %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm1
+; X86-AVX512VL-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm3 # 16-byte Reload
+; X86-AVX512VL-NEXT:    vpsrlq $48, %xmm3, %xmm6
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm6, %ecx
+; X86-AVX512VL-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm6
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X86-AVX512VL-NEXT:    vmovd %xmm6, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm6
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm6 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3]
+; X86-AVX512VL-NEXT:    movzwl 36(%eax), %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VL-NEXT:    vpsrlq $48, %xmm4, %xmm1
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm1, %edx
+; X86-AVX512VL-NEXT:    movzwl %dx, %edx
+; X86-AVX512VL-NEXT:    vmovd %edx, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %edx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm1
+; X86-AVX512VL-NEXT:    vpinsrw $0, %edx, %xmm0, %xmm7
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm7[0],xmm1[1],xmm7[1],xmm1[2],xmm7[2],xmm1[3],xmm7[3]
+; X86-AVX512VL-NEXT:    vinserti128 $1, %xmm6, %ymm1, %ymm6
+; X86-AVX512VL-NEXT:    movzwl 20(%eax), %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VL-NEXT:    vpsrlq $48, {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Folded Reload
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm1, %edx
+; X86-AVX512VL-NEXT:    movzwl %dx, %edx
+; X86-AVX512VL-NEXT:    vmovd %edx, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %edx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm1
+; X86-AVX512VL-NEXT:    vpinsrw $0, %edx, %xmm0, %xmm7
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm7 = xmm1[0],xmm7[0],xmm1[1],xmm7[1],xmm1[2],xmm7[2],xmm1[3],xmm7[3]
+; X86-AVX512VL-NEXT:    movzwl 4(%eax), %eax
+; X86-AVX512VL-NEXT:    vmovd %eax, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %eax
+; X86-AVX512VL-NEXT:    vpsrlq $48, %xmm2, %xmm1
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm1, %ecx
+; X86-AVX512VL-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm1
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm5
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1],xmm1[2],xmm5[2],xmm1[3],xmm5[3]
+; X86-AVX512VL-NEXT:    vinserti128 $1, %xmm7, %ymm1, %ymm1
+; X86-AVX512VL-NEXT:    vinserti64x4 $1, %ymm6, %zmm1, %zmm6
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm3, %eax
+; X86-AVX512VL-NEXT:    movzwl %ax, %eax
+; X86-AVX512VL-NEXT:    vmovd %eax, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %eax
+; X86-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm1
+; X86-AVX512VL-NEXT:    vpsrld $16, %xmm3, %xmm4
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm4, %eax
+; X86-AVX512VL-NEXT:    movzwl %ax, %eax
+; X86-AVX512VL-NEXT:    vmovd %eax, %xmm4
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X86-AVX512VL-NEXT:    vmovd %xmm4, %eax
+; X86-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm4
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm4 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
+; X86-AVX512VL-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 16-byte Reload
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm2, %eax
+; X86-AVX512VL-NEXT:    movzwl %ax, %eax
+; X86-AVX512VL-NEXT:    vmovd %eax, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %eax
+; X86-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm1
+; X86-AVX512VL-NEXT:    vpsrld $16, %xmm2, %xmm3
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm3, %eax
+; X86-AVX512VL-NEXT:    movzwl %ax, %eax
+; X86-AVX512VL-NEXT:    vmovd %eax, %xmm3
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm3, %xmm3
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm3, %xmm3
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm3, %xmm3
+; X86-AVX512VL-NEXT:    vmovd %xmm3, %eax
+; X86-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm3
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; X86-AVX512VL-NEXT:    vinserti128 $1, %xmm4, %ymm1, %ymm3
+; X86-AVX512VL-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 16-byte Reload
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm2, %eax
+; X86-AVX512VL-NEXT:    movzwl %ax, %eax
+; X86-AVX512VL-NEXT:    vmovd %eax, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %eax
+; X86-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm1
+; X86-AVX512VL-NEXT:    vpsrld $16, %xmm2, %xmm2
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm2, %eax
+; X86-AVX512VL-NEXT:    movzwl %ax, %eax
+; X86-AVX512VL-NEXT:    vmovd %eax, %xmm2
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm2, %xmm2
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm2, %xmm2
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm2, %xmm2
+; X86-AVX512VL-NEXT:    vmovd %xmm2, %eax
+; X86-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm2
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; X86-AVX512VL-NEXT:    vmovdqu (%esp), %xmm4 # 16-byte Reload
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm4, %eax
+; X86-AVX512VL-NEXT:    movzwl %ax, %eax
+; X86-AVX512VL-NEXT:    vmovd %eax, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %eax
+; X86-AVX512VL-NEXT:    vpsrld $16, %xmm4, %xmm1
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm1, %ecx
+; X86-AVX512VL-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm0
+; X86-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
+; X86-AVX512VL-NEXT:    vmovd %xmm0, %eax
+; X86-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X86-AVX512VL-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX512VL-NEXT:    vinserti64x4 $1, %ymm3, %zmm0, %zmm0
+; X86-AVX512VL-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm6[0],zmm0[1],zmm6[1],zmm0[4],zmm6[4],zmm0[5],zmm6[5],zmm0[8],zmm6[8],zmm0[9],zmm6[9],zmm0[12],zmm6[12],zmm0[13],zmm6[13]
+; X86-AVX512VL-NEXT:    vpunpcklqdq {{[-0-9]+}}(%e{{[sb]}}p), %zmm0, %zmm0 # 64-byte Folded Reload
+; X86-AVX512VL-NEXT:    # zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
+; X86-AVX512VL-NEXT:    addl $128, %esp
+; X86-AVX512VL-NEXT:    .cfi_def_cfa_offset 4
+; X86-AVX512VL-NEXT:    retl
+;
+; X86-AVX512FP16-LABEL: fabs_v32f16:
+; X86-AVX512FP16:       # %bb.0:
+; X86-AVX512FP16-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-AVX512FP16-NEXT:    vpbroadcastw {{.*#+}} zmm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
+; X86-AVX512FP16-NEXT:    vpandq (%eax), %zmm0, %zmm0
+; X86-AVX512FP16-NEXT:    retl
+;
+; X86-AVX512VLDQ-LABEL: fabs_v32f16:
+; X86-AVX512VLDQ:       # %bb.0:
+; X86-AVX512VLDQ-NEXT:    subl $128, %esp
+; X86-AVX512VLDQ-NEXT:    .cfi_def_cfa_offset 132
+; X86-AVX512VLDQ-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-AVX512VLDQ-NEXT:    movzwl 60(%eax), %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm0
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm0, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN]
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovdqa 48(%eax), %xmm3
+; X86-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm2 = xmm3[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VLDQ-NEXT:    vmovdqa %xmm3, %xmm4
+; X86-AVX512VLDQ-NEXT:    vmovdqu %xmm3, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm2, %ecx
+; X86-AVX512VLDQ-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm2
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm2, %xmm2
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm2, %xmm2
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm2, %xmm2
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm2, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm2
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; X86-AVX512VLDQ-NEXT:    movzwl 44(%eax), %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm2
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm2, %xmm2
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm2, %xmm2
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm2, %xmm2
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm2, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm2
+; X86-AVX512VLDQ-NEXT:    vmovdqa 32(%eax), %xmm3
+; X86-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm5 = xmm3[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm5, %ecx
+; X86-AVX512VLDQ-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm5
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm5, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm5
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1],xmm2[2],xmm5[2],xmm2[3],xmm5[3]
+; X86-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm5
+; X86-AVX512VLDQ-NEXT:    movzwl 28(%eax), %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm6
+; X86-AVX512VLDQ-NEXT:    vmovdqa (%eax), %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovdqu %xmm1, (%esp) # 16-byte Spill
+; X86-AVX512VLDQ-NEXT:    vmovdqa 16(%eax), %xmm2
+; X86-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm7 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VLDQ-NEXT:    vmovdqu %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm7, %ecx
+; X86-AVX512VLDQ-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm7
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm7, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm7
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3]
+; X86-AVX512VLDQ-NEXT:    movzwl 12(%eax), %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm7
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm7, %ecx
+; X86-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm7 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm7, %edx
+; X86-AVX512VLDQ-NEXT:    movzwl %dx, %edx
+; X86-AVX512VLDQ-NEXT:    vmovd %edx, %xmm7
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm7, %edx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm7
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %edx, %xmm0, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm7[0],xmm1[0],xmm7[1],xmm1[1],xmm7[2],xmm1[2],xmm7[3],xmm1[3]
+; X86-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm6, %ymm1, %ymm1
+; X86-AVX512VLDQ-NEXT:    vinserti64x4 $1, %ymm5, %zmm1, %zmm1
+; X86-AVX512VLDQ-NEXT:    vmovdqu64 %zmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 64-byte Spill
+; X86-AVX512VLDQ-NEXT:    movzwl 56(%eax), %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm6 = xmm4[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm6, %ecx
+; X86-AVX512VLDQ-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm6
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm6, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm6
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm6 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3]
+; X86-AVX512VLDQ-NEXT:    movzwl 40(%eax), %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm1 = xmm3[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VLDQ-NEXT:    vmovdqa %xmm3, %xmm4
+; X86-AVX512VLDQ-NEXT:    vmovdqu %xmm3, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm1, %edx
+; X86-AVX512VLDQ-NEXT:    movzwl %dx, %edx
+; X86-AVX512VLDQ-NEXT:    vmovd %edx, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %edx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %edx, %xmm0, %xmm7
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm7[0],xmm1[1],xmm7[1],xmm1[2],xmm7[2],xmm1[3],xmm7[3]
+; X86-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm6, %ymm1, %ymm6
+; X86-AVX512VLDQ-NEXT:    movzwl 24(%eax), %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm1 = xmm2[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm1, %edx
+; X86-AVX512VLDQ-NEXT:    movzwl %dx, %edx
+; X86-AVX512VLDQ-NEXT:    vmovd %edx, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %edx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %edx, %xmm0, %xmm7
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm7 = xmm1[0],xmm7[0],xmm1[1],xmm7[1],xmm1[2],xmm7[2],xmm1[3],xmm7[3]
+; X86-AVX512VLDQ-NEXT:    movzwl 8(%eax), %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VLDQ-NEXT:    vmovdqu (%esp), %xmm2 # 16-byte Reload
+; X86-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm1 = xmm2[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm1, %edx
+; X86-AVX512VLDQ-NEXT:    movzwl %dx, %edx
+; X86-AVX512VLDQ-NEXT:    vmovd %edx, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %edx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %edx, %xmm0, %xmm5
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1],xmm1[2],xmm5[2],xmm1[3],xmm5[3]
+; X86-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm7, %ymm1, %ymm1
+; X86-AVX512VLDQ-NEXT:    vinserti64x4 $1, %ymm6, %zmm1, %zmm1
+; X86-AVX512VLDQ-NEXT:    vpunpckldq {{[-0-9]+}}(%e{{[sb]}}p), %zmm1, %zmm1 # 64-byte Folded Reload
+; X86-AVX512VLDQ-NEXT:    # zmm1 = zmm1[0],mem[0],zmm1[1],mem[1],zmm1[4],mem[4],zmm1[5],mem[5],zmm1[8],mem[8],zmm1[9],mem[9],zmm1[12],mem[12],zmm1[13],mem[13]
+; X86-AVX512VLDQ-NEXT:    vmovdqu64 %zmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 64-byte Spill
+; X86-AVX512VLDQ-NEXT:    movzwl 52(%eax), %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm3 # 16-byte Reload
+; X86-AVX512VLDQ-NEXT:    vpsrlq $48, %xmm3, %xmm6
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm6, %ecx
+; X86-AVX512VLDQ-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm6
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm6, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm6
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm6 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3]
+; X86-AVX512VLDQ-NEXT:    movzwl 36(%eax), %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VLDQ-NEXT:    vpsrlq $48, %xmm4, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm1, %edx
+; X86-AVX512VLDQ-NEXT:    movzwl %dx, %edx
+; X86-AVX512VLDQ-NEXT:    vmovd %edx, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %edx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %edx, %xmm0, %xmm7
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm7[0],xmm1[1],xmm7[1],xmm1[2],xmm7[2],xmm1[3],xmm7[3]
+; X86-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm6, %ymm1, %ymm6
+; X86-AVX512VLDQ-NEXT:    movzwl 20(%eax), %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VLDQ-NEXT:    vpsrlq $48, {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Folded Reload
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm1, %edx
+; X86-AVX512VLDQ-NEXT:    movzwl %dx, %edx
+; X86-AVX512VLDQ-NEXT:    vmovd %edx, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %edx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %edx, %xmm0, %xmm7
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm7 = xmm1[0],xmm7[0],xmm1[1],xmm7[1],xmm1[2],xmm7[2],xmm1[3],xmm7[3]
+; X86-AVX512VLDQ-NEXT:    movzwl 4(%eax), %eax
+; X86-AVX512VLDQ-NEXT:    vmovd %eax, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %eax
+; X86-AVX512VLDQ-NEXT:    vpsrlq $48, %xmm2, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm1, %ecx
+; X86-AVX512VLDQ-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm5
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1],xmm1[2],xmm5[2],xmm1[3],xmm5[3]
+; X86-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm7, %ymm1, %ymm1
+; X86-AVX512VLDQ-NEXT:    vinserti64x4 $1, %ymm6, %zmm1, %zmm6
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm3, %eax
+; X86-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X86-AVX512VLDQ-NEXT:    vmovd %eax, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %eax
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpsrld $16, %xmm3, %xmm4
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm4, %eax
+; X86-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X86-AVX512VLDQ-NEXT:    vmovd %eax, %xmm4
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm4, %eax
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm4
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm4 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
+; X86-AVX512VLDQ-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 16-byte Reload
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm2, %eax
+; X86-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X86-AVX512VLDQ-NEXT:    vmovd %eax, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %eax
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpsrld $16, %xmm2, %xmm3
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm3, %eax
+; X86-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X86-AVX512VLDQ-NEXT:    vmovd %eax, %xmm3
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm3, %xmm3
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm3, %xmm3
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm3, %xmm3
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm3, %eax
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm3
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; X86-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm4, %ymm1, %ymm3
+; X86-AVX512VLDQ-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 16-byte Reload
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm2, %eax
+; X86-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X86-AVX512VLDQ-NEXT:    vmovd %eax, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %eax
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpsrld $16, %xmm2, %xmm2
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm2, %eax
+; X86-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X86-AVX512VLDQ-NEXT:    vmovd %eax, %xmm2
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm2, %xmm2
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm2, %xmm2
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm2, %xmm2
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm2, %eax
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm2
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; X86-AVX512VLDQ-NEXT:    vmovdqu (%esp), %xmm4 # 16-byte Reload
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm4, %eax
+; X86-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X86-AVX512VLDQ-NEXT:    vmovd %eax, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %eax
+; X86-AVX512VLDQ-NEXT:    vpsrld $16, %xmm4, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm1, %ecx
+; X86-AVX512VLDQ-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm0
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm0, %eax
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X86-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX512VLDQ-NEXT:    vinserti64x4 $1, %ymm3, %zmm0, %zmm0
+; X86-AVX512VLDQ-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm6[0],zmm0[1],zmm6[1],zmm0[4],zmm6[4],zmm0[5],zmm6[5],zmm0[8],zmm6[8],zmm0[9],zmm6[9],zmm0[12],zmm6[12],zmm0[13],zmm6[13]
+; X86-AVX512VLDQ-NEXT:    vpunpcklqdq {{[-0-9]+}}(%e{{[sb]}}p), %zmm0, %zmm0 # 64-byte Folded Reload
+; X86-AVX512VLDQ-NEXT:    # zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
+; X86-AVX512VLDQ-NEXT:    addl $128, %esp
+; X86-AVX512VLDQ-NEXT:    .cfi_def_cfa_offset 4
+; X86-AVX512VLDQ-NEXT:    retl
+;
+; X64-AVX1-LABEL: fabs_v32f16:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    pushq %rbx
+; X64-AVX1-NEXT:    .cfi_def_cfa_offset 16
+; X64-AVX1-NEXT:    subq $128, %rsp
+; X64-AVX1-NEXT:    .cfi_def_cfa_offset 144
+; X64-AVX1-NEXT:    .cfi_offset %rbx, -16
+; X64-AVX1-NEXT:    movq %rdi, %rbx
+; X64-AVX1-NEXT:    vbroadcastss 28(%rdi), %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovaps (%rbx), %xmm0
+; X64-AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa 16(%rbx), %xmm1
+; X64-AVX1-NEXT:    vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovaps 32(%rbx), %xmm0
+; X64-AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovaps 48(%rbx), %xmm0
+; X64-AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-NEXT:    vbroadcastss 24(%rbx), %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vpunpckldq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-NEXT:    vbroadcastss 20(%rbx), %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; X64-AVX1-NEXT:    vpunpcklqdq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vbroadcastss 12(%rbx), %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-NEXT:    vbroadcastss 8(%rbx), %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vpunpckldq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-NEXT:    vbroadcastss 4(%rbx), %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; X64-AVX1-NEXT:    vpunpcklqdq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0]
+; X64-AVX1-NEXT:    vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; X64-AVX1-NEXT:    vbroadcastss 60(%rbx), %xmm0
+; X64-AVX1-NEXT:    vzeroupper
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vbroadcastss 56(%rbx), %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vbroadcastss 52(%rbx), %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vpunpckldq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; X64-AVX1-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vbroadcastss 44(%rbx), %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vbroadcastss 40(%rbx), %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vbroadcastss 36(%rbx), %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vpunpckldq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; X64-AVX1-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0]
+; X64-AVX1-NEXT:    vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm1 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; X64-AVX1-NEXT:    addq $128, %rsp
+; X64-AVX1-NEXT:    .cfi_def_cfa_offset 16
+; X64-AVX1-NEXT:    popq %rbx
+; X64-AVX1-NEXT:    .cfi_def_cfa_offset 8
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: fabs_v32f16:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    pushq %rbx
+; X64-AVX2-NEXT:    .cfi_def_cfa_offset 16
+; X64-AVX2-NEXT:    subq $192, %rsp
+; X64-AVX2-NEXT:    .cfi_def_cfa_offset 208
+; X64-AVX2-NEXT:    .cfi_offset %rbx, -16
+; X64-AVX2-NEXT:    movq %rdi, %rbx
+; X64-AVX2-NEXT:    vpinsrw $0, 28(%rdi), %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
+; X64-AVX2-NEXT:    vmovdqa %xmm1, (%rsp) # 16-byte Spill
+; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovaps (%rbx), %xmm0
+; X64-AVX2-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa 16(%rbx), %xmm1
+; X64-AVX2-NEXT:    vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovaps 32(%rbx), %xmm0
+; X64-AVX2-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovaps 48(%rbx), %xmm0
+; X64-AVX2-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vpinsrw $0, 12(%rbx), %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; X64-AVX2-NEXT:    vpinsrw $0, 24(%rbx), %xmm0, %xmm0
+; X64-AVX2-NEXT:    vzeroupper
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vpinsrw $0, 8(%rbx), %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
+; X64-AVX2-NEXT:    # ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
+; X64-AVX2-NEXT:    vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; X64-AVX2-NEXT:    vpinsrw $0, 20(%rbx), %xmm0, %xmm0
+; X64-AVX2-NEXT:    vzeroupper
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vpinsrw $0, 4(%rbx), %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; X64-AVX2-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vzeroupper
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vandps (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vandps (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
+; X64-AVX2-NEXT:    # ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
+; X64-AVX2-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
+; X64-AVX2-NEXT:    # ymm0 = ymm0[0],mem[0],ymm0[2],mem[2]
+; X64-AVX2-NEXT:    vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; X64-AVX2-NEXT:    vpinsrw $0, 60(%rbx), %xmm0, %xmm0
+; X64-AVX2-NEXT:    vzeroupper
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vpinsrw $0, 44(%rbx), %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; X64-AVX2-NEXT:    vpinsrw $0, 56(%rbx), %xmm0, %xmm0
+; X64-AVX2-NEXT:    vzeroupper
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vpinsrw $0, 40(%rbx), %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
+; X64-AVX2-NEXT:    # ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
+; X64-AVX2-NEXT:    vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; X64-AVX2-NEXT:    vpinsrw $0, 52(%rbx), %xmm0, %xmm0
+; X64-AVX2-NEXT:    vzeroupper
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vpinsrw $0, 36(%rbx), %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; X64-AVX2-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vzeroupper
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vandps (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vandps (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
+; X64-AVX2-NEXT:    # ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
+; X64-AVX2-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm1 # 32-byte Folded Reload
+; X64-AVX2-NEXT:    # ymm1 = ymm0[0],mem[0],ymm0[2],mem[2]
+; X64-AVX2-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; X64-AVX2-NEXT:    addq $192, %rsp
+; X64-AVX2-NEXT:    .cfi_def_cfa_offset 16
+; X64-AVX2-NEXT:    popq %rbx
+; X64-AVX2-NEXT:    .cfi_def_cfa_offset 8
+; X64-AVX2-NEXT:    retq
+;
+; X64-AVX512VL-LABEL: fabs_v32f16:
+; X64-AVX512VL:       # %bb.0:
+; X64-AVX512VL-NEXT:    movzwl 60(%rdi), %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm0
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm0, %xmm1
+; X64-AVX512VL-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN]
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X64-AVX512VL-NEXT:    vmovd %xmm1, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X64-AVX512VL-NEXT:    vmovdqa (%rdi), %xmm1
+; X64-AVX512VL-NEXT:    vmovdqa 16(%rdi), %xmm2
+; X64-AVX512VL-NEXT:    vmovdqa 32(%rdi), %xmm3
+; X64-AVX512VL-NEXT:    vmovdqa 48(%rdi), %xmm4
+; X64-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm6 = xmm4[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm6, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm6
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vmovd %xmm6, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm6
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
+; X64-AVX512VL-NEXT:    movzwl 44(%rdi), %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm6
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vmovd %xmm6, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm6
+; X64-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm7 = xmm3[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm7, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3]
+; X64-AVX512VL-NEXT:    vinserti128 $1, %xmm5, %ymm6, %ymm5
+; X64-AVX512VL-NEXT:    movzwl 28(%rdi), %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm6
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vmovd %xmm6, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm6
+; X64-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm7 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm7, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3]
+; X64-AVX512VL-NEXT:    movzwl 12(%rdi), %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm8 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm8, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm8
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vmovd %xmm8, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm8
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm7 = xmm7[0],xmm8[0],xmm7[1],xmm8[1],xmm7[2],xmm8[2],xmm7[3],xmm8[3]
+; X64-AVX512VL-NEXT:    vinserti128 $1, %xmm6, %ymm7, %ymm6
+; X64-AVX512VL-NEXT:    vinserti64x4 $1, %ymm5, %zmm6, %zmm5
+; X64-AVX512VL-NEXT:    movzwl 56(%rdi), %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm6
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vmovd %xmm6, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm6
+; X64-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm7 = xmm4[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm7, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3]
+; X64-AVX512VL-NEXT:    movzwl 40(%rdi), %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm8 = xmm3[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm8, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm8
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vmovd %xmm8, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm8
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm7 = xmm7[0],xmm8[0],xmm7[1],xmm8[1],xmm7[2],xmm8[2],xmm7[3],xmm8[3]
+; X64-AVX512VL-NEXT:    vinserti128 $1, %xmm6, %ymm7, %ymm6
+; X64-AVX512VL-NEXT:    movzwl 24(%rdi), %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm8 = xmm2[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm8, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm8
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vmovd %xmm8, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm8
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm7 = xmm7[0],xmm8[0],xmm7[1],xmm8[1],xmm7[2],xmm8[2],xmm7[3],xmm8[3]
+; X64-AVX512VL-NEXT:    movzwl 8(%rdi), %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm8
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vmovd %xmm8, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm8
+; X64-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm9 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm9, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm9
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm9, %xmm9
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm9, %xmm9
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm9, %xmm9
+; X64-AVX512VL-NEXT:    vmovd %xmm9, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm9
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm8 = xmm8[0],xmm9[0],xmm8[1],xmm9[1],xmm8[2],xmm9[2],xmm8[3],xmm9[3]
+; X64-AVX512VL-NEXT:    vinserti128 $1, %xmm7, %ymm8, %ymm7
+; X64-AVX512VL-NEXT:    vinserti64x4 $1, %ymm6, %zmm7, %zmm6
+; X64-AVX512VL-NEXT:    vpunpckldq {{.*#+}} zmm5 = zmm6[0],zmm5[0],zmm6[1],zmm5[1],zmm6[4],zmm5[4],zmm6[5],zmm5[5],zmm6[8],zmm5[8],zmm6[9],zmm5[9],zmm6[12],zmm5[12],zmm6[13],zmm5[13]
+; X64-AVX512VL-NEXT:    movzwl 52(%rdi), %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm6
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vmovd %xmm6, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm6
+; X64-AVX512VL-NEXT:    vpsrlq $48, %xmm4, %xmm7
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm7, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3]
+; X64-AVX512VL-NEXT:    movzwl 36(%rdi), %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VL-NEXT:    vpsrlq $48, %xmm3, %xmm8
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm8, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm8
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vmovd %xmm8, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm8
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm7 = xmm7[0],xmm8[0],xmm7[1],xmm8[1],xmm7[2],xmm8[2],xmm7[3],xmm8[3]
+; X64-AVX512VL-NEXT:    vinserti128 $1, %xmm6, %ymm7, %ymm6
+; X64-AVX512VL-NEXT:    movzwl 20(%rdi), %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VL-NEXT:    vpsrlq $48, %xmm2, %xmm8
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm8, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm8
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vmovd %xmm8, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm8
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm7 = xmm7[0],xmm8[0],xmm7[1],xmm8[1],xmm7[2],xmm8[2],xmm7[3],xmm8[3]
+; X64-AVX512VL-NEXT:    movzwl 4(%rdi), %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm8
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vmovd %xmm8, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm8
+; X64-AVX512VL-NEXT:    vpsrlq $48, %xmm1, %xmm9
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm9, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm9
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm9, %xmm9
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm9, %xmm9
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm9, %xmm9
+; X64-AVX512VL-NEXT:    vmovd %xmm9, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm9
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm8 = xmm8[0],xmm9[0],xmm8[1],xmm9[1],xmm8[2],xmm9[2],xmm8[3],xmm9[3]
+; X64-AVX512VL-NEXT:    vinserti128 $1, %xmm7, %ymm8, %ymm7
+; X64-AVX512VL-NEXT:    vinserti64x4 $1, %ymm6, %zmm7, %zmm6
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm4, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VL-NEXT:    vpsrld $16, %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm4, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm4
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vmovd %xmm4, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm4
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm4 = xmm7[0],xmm4[0],xmm7[1],xmm4[1],xmm7[2],xmm4[2],xmm7[3],xmm4[3]
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm3, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VL-NEXT:    vpsrld $16, %xmm3, %xmm3
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm3, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm3
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm3, %xmm3
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm3, %xmm3
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm3, %xmm3
+; X64-AVX512VL-NEXT:    vmovd %xmm3, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm3
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm7[0],xmm3[0],xmm7[1],xmm3[1],xmm7[2],xmm3[2],xmm7[3],xmm3[3]
+; X64-AVX512VL-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm2, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm4
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vmovd %xmm4, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm4
+; X64-AVX512VL-NEXT:    vpsrld $16, %xmm2, %xmm2
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm2, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm2
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm2, %xmm2
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm2, %xmm2
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm2, %xmm2
+; X64-AVX512VL-NEXT:    vmovd %xmm2, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm2
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm1, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm4
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vmovd %xmm4, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm4
+; X64-AVX512VL-NEXT:    vpsrld $16, %xmm1, %xmm1
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm1, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm1
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm0
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
+; X64-AVX512VL-NEXT:    vmovd %xmm0, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
+; X64-AVX512VL-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; X64-AVX512VL-NEXT:    vinserti64x4 $1, %ymm3, %zmm0, %zmm0
+; X64-AVX512VL-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm6[0],zmm0[1],zmm6[1],zmm0[4],zmm6[4],zmm0[5],zmm6[5],zmm0[8],zmm6[8],zmm0[9],zmm6[9],zmm0[12],zmm6[12],zmm0[13],zmm6[13]
+; X64-AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm5[0],zmm0[2],zmm5[2],zmm0[4],zmm5[4],zmm0[6],zmm5[6]
+; X64-AVX512VL-NEXT:    retq
+;
+; X64-AVX512FP16-LABEL: fabs_v32f16:
+; X64-AVX512FP16:       # %bb.0:
+; X64-AVX512FP16-NEXT:    vpbroadcastw {{.*#+}} zmm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
+; X64-AVX512FP16-NEXT:    vpandq (%rdi), %zmm0, %zmm0
+; X64-AVX512FP16-NEXT:    retq
+;
+; X64-AVX512VLDQ-LABEL: fabs_v32f16:
+; X64-AVX512VLDQ:       # %bb.0:
+; X64-AVX512VLDQ-NEXT:    movzwl 60(%rdi), %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm0
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm0, %xmm1
+; X64-AVX512VLDQ-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN]
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm1, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X64-AVX512VLDQ-NEXT:    vmovdqa (%rdi), %xmm1
+; X64-AVX512VLDQ-NEXT:    vmovdqa 16(%rdi), %xmm2
+; X64-AVX512VLDQ-NEXT:    vmovdqa 32(%rdi), %xmm3
+; X64-AVX512VLDQ-NEXT:    vmovdqa 48(%rdi), %xmm4
+; X64-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm6 = xmm4[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm6, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm6
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm6, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm6
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
+; X64-AVX512VLDQ-NEXT:    movzwl 44(%rdi), %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm6
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm6, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm6
+; X64-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm7 = xmm3[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm7, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3]
+; X64-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm5, %ymm6, %ymm5
+; X64-AVX512VLDQ-NEXT:    movzwl 28(%rdi), %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm6
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm6, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm6
+; X64-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm7 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm7, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3]
+; X64-AVX512VLDQ-NEXT:    movzwl 12(%rdi), %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm8 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm8, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm8
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm8, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm8
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm7 = xmm7[0],xmm8[0],xmm7[1],xmm8[1],xmm7[2],xmm8[2],xmm7[3],xmm8[3]
+; X64-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm6, %ymm7, %ymm6
+; X64-AVX512VLDQ-NEXT:    vinserti64x4 $1, %ymm5, %zmm6, %zmm5
+; X64-AVX512VLDQ-NEXT:    movzwl 56(%rdi), %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm6
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm6, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm6
+; X64-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm7 = xmm4[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm7, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3]
+; X64-AVX512VLDQ-NEXT:    movzwl 40(%rdi), %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm8 = xmm3[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm8, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm8
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm8, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm8
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm7 = xmm7[0],xmm8[0],xmm7[1],xmm8[1],xmm7[2],xmm8[2],xmm7[3],xmm8[3]
+; X64-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm6, %ymm7, %ymm6
+; X64-AVX512VLDQ-NEXT:    movzwl 24(%rdi), %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm8 = xmm2[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm8, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm8
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm8, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm8
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm7 = xmm7[0],xmm8[0],xmm7[1],xmm8[1],xmm7[2],xmm8[2],xmm7[3],xmm8[3]
+; X64-AVX512VLDQ-NEXT:    movzwl 8(%rdi), %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm8
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm8, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm8
+; X64-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm9 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm9, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm9
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm9, %xmm9
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm9, %xmm9
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm9, %xmm9
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm9, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm9
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm8 = xmm8[0],xmm9[0],xmm8[1],xmm9[1],xmm8[2],xmm9[2],xmm8[3],xmm9[3]
+; X64-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm7, %ymm8, %ymm7
+; X64-AVX512VLDQ-NEXT:    vinserti64x4 $1, %ymm6, %zmm7, %zmm6
+; X64-AVX512VLDQ-NEXT:    vpunpckldq {{.*#+}} zmm5 = zmm6[0],zmm5[0],zmm6[1],zmm5[1],zmm6[4],zmm5[4],zmm6[5],zmm5[5],zmm6[8],zmm5[8],zmm6[9],zmm5[9],zmm6[12],zmm5[12],zmm6[13],zmm5[13]
+; X64-AVX512VLDQ-NEXT:    movzwl 52(%rdi), %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm6
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm6, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm6
+; X64-AVX512VLDQ-NEXT:    vpsrlq $48, %xmm4, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm7, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3]
+; X64-AVX512VLDQ-NEXT:    movzwl 36(%rdi), %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpsrlq $48, %xmm3, %xmm8
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm8, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm8
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm8, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm8
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm7 = xmm7[0],xmm8[0],xmm7[1],xmm8[1],xmm7[2],xmm8[2],xmm7[3],xmm8[3]
+; X64-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm6, %ymm7, %ymm6
+; X64-AVX512VLDQ-NEXT:    movzwl 20(%rdi), %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpsrlq $48, %xmm2, %xmm8
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm8, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm8
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm8, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm8
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm7 = xmm7[0],xmm8[0],xmm7[1],xmm8[1],xmm7[2],xmm8[2],xmm7[3],xmm8[3]
+; X64-AVX512VLDQ-NEXT:    movzwl 4(%rdi), %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm8
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm8, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm8
+; X64-AVX512VLDQ-NEXT:    vpsrlq $48, %xmm1, %xmm9
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm9, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm9
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm9, %xmm9
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm9, %xmm9
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm9, %xmm9
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm9, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm9
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm8 = xmm8[0],xmm9[0],xmm8[1],xmm9[1],xmm8[2],xmm9[2],xmm8[3],xmm9[3]
+; X64-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm7, %ymm8, %ymm7
+; X64-AVX512VLDQ-NEXT:    vinserti64x4 $1, %ymm6, %zmm7, %zmm6
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm4, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpsrld $16, %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm4, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm4
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm4, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm4
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm4 = xmm7[0],xmm4[0],xmm7[1],xmm4[1],xmm7[2],xmm4[2],xmm7[3],xmm4[3]
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm3, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpsrld $16, %xmm3, %xmm3
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm3, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm3
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm3, %xmm3
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm3, %xmm3
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm3, %xmm3
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm3, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm3
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm7[0],xmm3[0],xmm7[1],xmm3[1],xmm7[2],xmm3[2],xmm7[3],xmm3[3]
+; X64-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm2, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm4
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm4, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm4
+; X64-AVX512VLDQ-NEXT:    vpsrld $16, %xmm2, %xmm2
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm2, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm2
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm2, %xmm2
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm2, %xmm2
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm2, %xmm2
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm2, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm2
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm1, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm4
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm4, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm4
+; X64-AVX512VLDQ-NEXT:    vpsrld $16, %xmm1, %xmm1
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm1, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm1
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm0
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm0, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
+; X64-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; X64-AVX512VLDQ-NEXT:    vinserti64x4 $1, %ymm3, %zmm0, %zmm0
+; X64-AVX512VLDQ-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm6[0],zmm0[1],zmm6[1],zmm0[4],zmm6[4],zmm0[5],zmm6[5],zmm0[8],zmm6[8],zmm0[9],zmm6[9],zmm0[12],zmm6[12],zmm0[13],zmm6[13]
+; X64-AVX512VLDQ-NEXT:    vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm5[0],zmm0[2],zmm5[2],zmm0[4],zmm5[4],zmm0[6],zmm5[6]
+; X64-AVX512VLDQ-NEXT:    retq
+  %v = load <32 x half>, ptr %p, align 64
+  %nnv = call <32 x half> @llvm.fabs.v32f16(<32 x half> %v)
+  ret <32 x half> %nnv
+}
+declare <32 x half> @llvm.fabs.v32f16(<32 x half> %p)
+
 ; PR20354: when generating code for a vector fabs op,
 ; make sure that we're only turning off the sign bit of each float value.
 ; No constant pool loads or vector ops are needed for the fabs of a
@@ -340,29 +4542,17 @@ define void @PR70947(ptr %src, ptr %dst) {
 ; X86-AVX2-NEXT:    vzeroupper
 ; X86-AVX2-NEXT:    retl
 ;
-; X86-AVX512VL-LABEL: PR70947:
-; X86-AVX512VL:       # %bb.0:
-; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-AVX512VL-NEXT:    vbroadcastsd {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN]
-; X86-AVX512VL-NEXT:    vandps (%ecx), %ymm0, %ymm1
-; X86-AVX512VL-NEXT:    vandps 32(%ecx), %xmm0, %xmm0
-; X86-AVX512VL-NEXT:    vmovups %ymm1, (%eax)
-; X86-AVX512VL-NEXT:    vmovups %xmm0, 16(%eax)
-; X86-AVX512VL-NEXT:    vzeroupper
-; X86-AVX512VL-NEXT:    retl
-;
-; X86-AVX512VLDQ-LABEL: PR70947:
-; X86-AVX512VLDQ:       # %bb.0:
-; X86-AVX512VLDQ-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-AVX512VLDQ-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-AVX512VLDQ-NEXT:    vbroadcastsd {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN]
-; X86-AVX512VLDQ-NEXT:    vandps (%ecx), %ymm0, %ymm1
-; X86-AVX512VLDQ-NEXT:    vandps 32(%ecx), %xmm0, %xmm0
-; X86-AVX512VLDQ-NEXT:    vmovups %ymm1, (%eax)
-; X86-AVX512VLDQ-NEXT:    vmovups %xmm0, 16(%eax)
-; X86-AVX512VLDQ-NEXT:    vzeroupper
-; X86-AVX512VLDQ-NEXT:    retl
+; X86-AVX512-LABEL: PR70947:
+; X86-AVX512:       # %bb.0:
+; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-AVX512-NEXT:    vbroadcastsd {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN]
+; X86-AVX512-NEXT:    vandps (%ecx), %ymm0, %ymm1
+; X86-AVX512-NEXT:    vandps 32(%ecx), %xmm0, %xmm0
+; X86-AVX512-NEXT:    vmovups %ymm1, (%eax)
+; X86-AVX512-NEXT:    vmovups %xmm0, 16(%eax)
+; X86-AVX512-NEXT:    vzeroupper
+; X86-AVX512-NEXT:    retl
 ;
 ; X64-AVX1-LABEL: PR70947:
 ; X64-AVX1:       # %bb.0:
@@ -385,25 +4575,15 @@ define void @PR70947(ptr %src, ptr %dst) {
 ; X64-AVX2-NEXT:    vzeroupper
 ; X64-AVX2-NEXT:    retq
 ;
-; X64-AVX512VL-LABEL: PR70947:
-; X64-AVX512VL:       # %bb.0:
-; X64-AVX512VL-NEXT:    vbroadcastsd {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN]
-; X64-AVX512VL-NEXT:    vandps (%rdi), %ymm0, %ymm1
-; X64-AVX512VL-NEXT:    vandps 32(%rdi), %xmm0, %xmm0
-; X64-AVX512VL-NEXT:    vmovups %ymm1, (%rsi)
-; X64-AVX512VL-NEXT:    vmovups %xmm0, 16(%rsi)
-; X64-AVX512VL-NEXT:    vzeroupper
-; X64-AVX512VL-NEXT:    retq
-;
-; X64-AVX512VLDQ-LABEL: PR70947:
-; X64-AVX512VLDQ:       # %bb.0:
-; X64-AVX512VLDQ-NEXT:    vbroadcastsd {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN]
-; X64-AVX512VLDQ-NEXT:    vandps (%rdi), %ymm0, %ymm1
-; X64-AVX512VLDQ-NEXT:    vandps 32(%rdi), %xmm0, %xmm0
-; X64-AVX512VLDQ-NEXT:    vmovups %ymm1, (%rsi)
-; X64-AVX512VLDQ-NEXT:    vmovups %xmm0, 16(%rsi)
-; X64-AVX512VLDQ-NEXT:    vzeroupper
-; X64-AVX512VLDQ-NEXT:    retq
+; X64-AVX512-LABEL: PR70947:
+; X64-AVX512:       # %bb.0:
+; X64-AVX512-NEXT:    vbroadcastsd {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN]
+; X64-AVX512-NEXT:    vandps (%rdi), %ymm0, %ymm1
+; X64-AVX512-NEXT:    vandps 32(%rdi), %xmm0, %xmm0
+; X64-AVX512-NEXT:    vmovups %ymm1, (%rsi)
+; X64-AVX512-NEXT:    vmovups %xmm0, 16(%rsi)
+; X64-AVX512-NEXT:    vzeroupper
+; X64-AVX512-NEXT:    retq
   %src4 = getelementptr inbounds double, ptr %src, i64 4
   %dst4 = getelementptr inbounds i32, ptr %dst, i64 4
   %ld0 = load <4 x double>, ptr %src, align 8
diff --git a/llvm/test/MC/ELF/compress-debug-sections-zstd.s b/llvm/test/MC/ELF/compress-debug-sections-zstd.s
index 01ae21a2b8ab..a05c50157281 100644
--- a/llvm/test/MC/ELF/compress-debug-sections-zstd.s
+++ b/llvm/test/MC/ELF/compress-debug-sections-zstd.s
@@ -10,7 +10,8 @@
 # SEC: .debug_line   PROGBITS [[#%x,]] [[#%x,]] [[#%x,]] 00   C 0 0  8
 # SEC: .debug_abbrev PROGBITS [[#%x,]] [[#%x,]] [[#%x,]] 00     0 0  1
 # SEC: .debug_info   PROGBITS [[#%x,]] [[#%x,]] [[#%x,]] 00     0 0  1
-# SEC: .debug_str    PROGBITS [[#%x,]] [[#%x,]] [[#%x,]] 01 MSC 0 0  8
+## .debug_str is uncompressed becuase sizeof(Elf64_Chdr)+len(compressed) >= len(uncompressed).
+# SEC: .debug_str    PROGBITS [[#%x,]] [[#%x,]] [[#%x,]] 01 MS  0 0  1
 # SEC: .debug_frame  PROGBITS [[#%x,]] [[#%x,]] [[#%x,]] 00   C 0 0  8
 
 # CHECK:      Contents of section .debug_line
@@ -23,6 +24,12 @@
 # RUN: llvm-objcopy --decompress-debug-sections %t %t.decom
 # RUN: cmp %t.uncom %t.decom
 
+## .debug_str is compressed becuase sizeof(Elf32_Chdr)+len(compressed) < len(uncompressed).
+# RUN: llvm-mc -filetype=obj -triple=i386 --defsym I386=1 -compress-debug-sections=zstd --defsym LONG=1 %s -o %t1
+# RUN: llvm-readelf -S %t1 | FileCheck %s --check-prefix=SEC1
+
+# SEC1: .debug_str    PROGBITS [[#%x,]] [[#%x,]] [[#%x,]] 01 MSC 0 0  4
+
 ## Don't compress a section not named .debug_*.
 	.section .nonalloc,"",@progbits
 .rept 50
diff --git a/llvm/test/Transforms/InstCombine/div.ll b/llvm/test/Transforms/InstCombine/div.ll
index 844b35723b13..cd17d10d0e1d 100644
--- a/llvm/test/Transforms/InstCombine/div.ll
+++ b/llvm/test/Transforms/InstCombine/div.ll
@@ -22,7 +22,7 @@ define i32 @test2(i32 %A) {
 
 define i32 @sdiv_by_minus1(i32 %A) {
 ; CHECK-LABEL: @sdiv_by_minus1(
-; CHECK-NEXT:    [[B:%.*]] = sub i32 0, [[A:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = sub nsw i32 0, [[A:%.*]]
 ; CHECK-NEXT:    ret i32 [[B]]
 ;
   %B = sdiv i32 %A, -1
@@ -31,7 +31,7 @@ define i32 @sdiv_by_minus1(i32 %A) {
 
 define <2 x i64> @sdiv_by_minus1_vec(<2 x i64> %x) {
 ; CHECK-LABEL: @sdiv_by_minus1_vec(
-; CHECK-NEXT:    [[DIV:%.*]] = sub <2 x i64> zeroinitializer, [[X:%.*]]
+; CHECK-NEXT:    [[DIV:%.*]] = sub nsw <2 x i64> zeroinitializer, [[X:%.*]]
 ; CHECK-NEXT:    ret <2 x i64> [[DIV]]
 ;
   %div = sdiv <2 x i64> %x, <i64 -1, i64 -1>
@@ -48,7 +48,7 @@ define <2 x i64> @sdiv_by_minus1_vec_poison_elt(<2 x i64> %x) {
 
 define i32 @sdiv_by_sext_minus1(i1 %x, i32 %y) {
 ; CHECK-LABEL: @sdiv_by_sext_minus1(
-; CHECK-NEXT:    [[DIV:%.*]] = sub i32 0, [[Y:%.*]]
+; CHECK-NEXT:    [[DIV:%.*]] = sub nsw i32 0, [[Y:%.*]]
 ; CHECK-NEXT:    ret i32 [[DIV]]
 ;
   %sext = sext i1 %x to i32
@@ -58,7 +58,7 @@ define i32 @sdiv_by_sext_minus1(i1 %x, i32 %y) {
 
 define <2 x i32> @sdiv_by_sext_minus1_vec(<2 x i1> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @sdiv_by_sext_minus1_vec(
-; CHECK-NEXT:    [[DIV:%.*]] = sub <2 x i32> zeroinitializer, [[Y:%.*]]
+; CHECK-NEXT:    [[DIV:%.*]] = sub nsw <2 x i32> zeroinitializer, [[Y:%.*]]
 ; CHECK-NEXT:    ret <2 x i32> [[DIV]]
 ;
   %sext = sext <2 x i1> %x to <2 x i32>
@@ -1308,8 +1308,8 @@ define i32 @udiv_select_of_constants_divisor(i1 %b, i32 %x) {
 define i1 @sdiv_one_icmpeq_one(i32 %x) {
 ; CHECK-LABEL: @sdiv_one_icmpeq_one(
 ; CHECK-NEXT:    [[X_FR:%.*]] = freeze i32 [[X:%.*]]
-; CHECK-NEXT:    [[B:%.*]] = icmp eq i32 [[X_FR]], 1
-; CHECK-NEXT:    ret i1 [[B]]
+; CHECK-NEXT:    [[B1:%.*]] = icmp eq i32 [[X_FR]], 1
+; CHECK-NEXT:    ret i1 [[B1]]
 ;
   %A = sdiv i32 1, %x
   %B = icmp eq i32 %A, 1
@@ -1319,8 +1319,8 @@ define i1 @sdiv_one_icmpeq_one(i32 %x) {
 define i1 @sdiv_one_icmpeq_negone(i32 %x) {
 ; CHECK-LABEL: @sdiv_one_icmpeq_negone(
 ; CHECK-NEXT:    [[X_FR:%.*]] = freeze i32 [[X:%.*]]
-; CHECK-NEXT:    [[B:%.*]] = icmp eq i32 [[X_FR]], -1
-; CHECK-NEXT:    ret i1 [[B]]
+; CHECK-NEXT:    [[B1:%.*]] = icmp eq i32 [[X_FR]], -1
+; CHECK-NEXT:    ret i1 [[B1]]
 ;
   %A = sdiv i32 1, %x
   %B = icmp eq i32 %A, -1
diff --git a/llvm/test/Transforms/InstCombine/sdiv-exact-by-negative-power-of-two.ll b/llvm/test/Transforms/InstCombine/sdiv-exact-by-negative-power-of-two.ll
index 81f22bb3c664..3451c5e626f3 100644
--- a/llvm/test/Transforms/InstCombine/sdiv-exact-by-negative-power-of-two.ll
+++ b/llvm/test/Transforms/InstCombine/sdiv-exact-by-negative-power-of-two.ll
@@ -38,7 +38,7 @@ define <2 x i8> @t2_vec_splat(<2 x i8> %x) {
 define <2 x i8> @t3_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @t3_vec(
 ; CHECK-NEXT:    [[DIV_NEG:%.*]] = ashr exact <2 x i8> [[X:%.*]], <i8 5, i8 4>
-; CHECK-NEXT:    [[DIV:%.*]] = sub <2 x i8> zeroinitializer, [[DIV_NEG]]
+; CHECK-NEXT:    [[DIV:%.*]] = sub nsw <2 x i8> zeroinitializer, [[DIV_NEG]]
 ; CHECK-NEXT:    ret <2 x i8> [[DIV]]
 ;
   %div = sdiv exact <2 x i8> %x, <i8 -32, i8 -16>
@@ -76,8 +76,8 @@ define i8 @prove_exact_with_high_mask(i8 %x, i8 %y) {
 
 define i8 @prove_exact_with_high_mask_limit(i8 %x, i8 %y) {
 ; CHECK-LABEL: @prove_exact_with_high_mask_limit(
-; CHECK-NEXT:    [[A:%.*]] = ashr i8 [[X:%.*]], 5
-; CHECK-NEXT:    [[D:%.*]] = sub nsw i8 0, [[A]]
+; CHECK-NEXT:    [[D_NEG:%.*]] = ashr i8 [[X:%.*]], 5
+; CHECK-NEXT:    [[D:%.*]] = sub nsw i8 0, [[D_NEG]]
 ; CHECK-NEXT:    ret i8 [[D]]
 ;
   %a = and i8 %x, -32
diff --git a/llvm/test/Transforms/LoopVectorize/pr66616.ll b/llvm/test/Transforms/LoopVectorize/pr66616.ll
new file mode 100644
index 000000000000..2fb7f88e5341
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/pr66616.ll
@@ -0,0 +1,95 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt -passes="print<scalar-evolution>,loop-vectorize" --verify-scev -S < %s -force-vector-width=4 2>/dev/null | FileCheck %s
+
+; Make sure users of SCEVUnknowns from the scalar loop are invalidated.
+
+define void @pr66616(ptr %ptr) {
+; CHECK-LABEL: define void @pr66616(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[PTR]], align 4
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
+; CHECK-NEXT:    br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
+; CHECK-NEXT:    br i1 true, label [[PREHEADER:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    br label [[LOOP_1:%.*]]
+; CHECK:       loop.1:
+; CHECK-NEXT:    [[IV_1:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[LOOP_1]] ]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[PTR]], align 4
+; CHECK-NEXT:    [[ADD3:%.*]] = add i32 [[LOAD]], 1
+; CHECK-NEXT:    [[INC]] = add i8 [[IV_1]], 1
+; CHECK-NEXT:    [[COND1:%.*]] = icmp eq i8 [[INC]], 0
+; CHECK-NEXT:    br i1 [[COND1]], label [[PREHEADER]], label [[LOOP_1]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       preheader:
+; CHECK-NEXT:    [[ADD3_LCSSA:%.*]] = phi i32 [ [[ADD3]], [[LOOP_1]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = sub i32 0, [[ADD3_LCSSA]]
+; CHECK-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
+; CHECK-NEXT:    [[TMP6:%.*]] = add nuw nsw i64 [[TMP5]], 1
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP6]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH2:%.*]], label [[VECTOR_PH3:%.*]]
+; CHECK:       vector.ph3:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP6]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP6]], [[N_MOD_VF]]
+; CHECK-NEXT:    [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32
+; CHECK-NEXT:    [[IND_END:%.*]] = add i32 [[ADD3_LCSSA]], [[DOTCAST]]
+; CHECK-NEXT:    [[IND_END5:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[N_VEC]]
+; CHECK-NEXT:    br label [[VECTOR_BODY7:%.*]]
+; CHECK:       vector.body7:
+; CHECK-NEXT:    [[INDEX8:%.*]] = phi i64 [ 0, [[VECTOR_PH3]] ], [ [[INDEX_NEXT9:%.*]], [[VECTOR_BODY7]] ]
+; CHECK-NEXT:    [[INDEX_NEXT9]] = add nuw i64 [[INDEX8]], 4
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK1:%.*]], label [[VECTOR_BODY7]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK:       middle.block1:
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP6]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH2]]
+; CHECK:       scalar.ph2:
+; CHECK-NEXT:    [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK1]] ], [ [[ADD3_LCSSA]], [[PREHEADER]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[IND_END5]], [[MIDDLE_BLOCK1]] ], [ [[PTR]], [[PREHEADER]] ]
+; CHECK-NEXT:    br label [[LOOP_2:%.*]]
+; CHECK:       loop.2:
+; CHECK-NEXT:    [[IV_2:%.*]] = phi i32 [ [[IV_2_I:%.*]], [[LOOP_2]] ], [ [[BC_RESUME_VAL4]], [[SCALAR_PH2]] ]
+; CHECK-NEXT:    [[IV_3:%.*]] = phi ptr [ [[IV_3_I:%.*]], [[LOOP_2]] ], [ [[BC_RESUME_VAL6]], [[SCALAR_PH2]] ]
+; CHECK-NEXT:    [[IV_2_I]] = add i32 [[IV_2]], 1
+; CHECK-NEXT:    [[IV_3_I]] = getelementptr i8, ptr [[IV_3]], i64 1
+; CHECK-NEXT:    [[COND2:%.*]] = icmp eq i32 [[IV_2]], 0
+; CHECK-NEXT:    br i1 [[COND2]], label [[EXIT]], label [[LOOP_2]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop.1
+
+loop.1:
+  %iv.1 = phi i8 [ 0, %entry ], [ %inc, %loop.1 ]
+  %load = load i32, ptr %ptr, align 4
+  %add3 = add i32 %load, 1
+  %inc = add i8 %iv.1, 1
+  %cond1 = icmp eq i8 %inc, 0
+  br i1 %cond1, label %preheader, label %loop.1
+
+preheader:
+  br label %loop.2
+
+loop.2:
+  %iv.2 = phi i32 [ %iv.2.i, %loop.2 ], [ %add3, %preheader ]
+  %iv.3 = phi ptr [ %iv.3.i, %loop.2 ], [ %ptr, %preheader ]
+  %iv.2.i = add i32 %iv.2, 1
+  %iv.3.i = getelementptr i8, ptr %iv.3, i64 1
+  %cond2 = icmp eq i32 %iv.2, 0
+  br i1 %cond2, label %exit, label %loop.2
+
+exit:
+  ret void
+}
diff --git a/llvm/test/tools/llvm-exegesis/X86/latency/subprocess-preserved-registers.s b/llvm/test/tools/llvm-exegesis/X86/latency/subprocess-preserved-registers.s
new file mode 100644
index 000000000000..320d1d7db38f
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/X86/latency/subprocess-preserved-registers.s
@@ -0,0 +1,30 @@
+# REQUIRES: exegesis-can-measure-latency, x86_64-linux
+
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mode=latency -snippets-file=%s -execution-mode=subprocess | FileCheck %s
+
+# See comment in ./subprocess-abnormal-exit-code.s on the transient
+# PTRACE_ATTACH failure.
+# ALLOW_RETRIES: 2
+
+# Check that the value of the registers preserved in subprocess mode while
+# making the ioctl system call are actually preserved correctly.
+
+# LLVM-EXEGESIS-DEFREG RAX 11
+# LLVM-EXEGESIS-DEFREG RDI 13
+# LLVM-EXEGESIS-DEFREG RSI 17
+# LLVM-EXEGESIS-DEFREG R13 0
+# LLVM-EXEGESIS-DEFREG R12 127
+
+cmpq $0x11, %rax
+cmovneq %r12, %r13
+cmpq $0x13, %rdi
+cmovneq %r12, %r13
+cmpq $0x17, %rsi
+cmovneq %r12, %r13
+
+movq $60, %rax
+movq %r13, %rdi
+syscall
+
+# CHECK-NOT: error:           'Child benchmarking process exited with non-zero exit code: Child process returned with unknown exit code'
+
diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
index ac99e98cc851..bf8fd9ec7066 100644
--- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
@@ -1205,7 +1205,7 @@ ExegesisX86Target::configurePerfCounter(long Request, bool SaveRegisters) const
   if(SaveRegisters) {
     // Restore RAX, RDI, and RSI, in reverse order.
     generateRegisterStackPop(X86::RSI, ConfigurePerfCounterCode);
-    generateRegisterStackPop(X86::RIP, ConfigurePerfCounterCode);
+    generateRegisterStackPop(X86::RDI, ConfigurePerfCounterCode);
     generateRegisterStackPop(X86::RAX, ConfigurePerfCounterCode);
   }
   return ConfigurePerfCounterCode;
diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp
index 68cb4823048e..d41e25492256 100644
--- a/llvm/unittests/TargetParser/TargetParserTest.cpp
+++ b/llvm/unittests/TargetParser/TargetParserTest.cpp
@@ -1097,6 +1097,18 @@ INSTANTIATE_TEST_SUITE_P(
                  AArch64::AEK_SB})),
             "9-A"),
         ARMCPUTestParams<AArch64::ExtensionBitset>(
+            "cortex-a520", "armv9.2-a", "crypto-neon-fp-armv8",
+            (AArch64::ExtensionBitset(
+                {AArch64::AEK_BF16,  AArch64::AEK_I8MM,  AArch64::AEK_SVE,
+                 AArch64::AEK_SVE2,  AArch64::AEK_FP16,  AArch64::AEK_DOTPROD,
+                 AArch64::AEK_LSE,  AArch64::AEK_RDM,  AArch64::AEK_SIMD,
+                 AArch64::AEK_RCPC,  AArch64::AEK_RAS,  AArch64::AEK_CRC,
+                 AArch64::AEK_FP,  AArch64::AEK_SB,  AArch64::AEK_SSBS,
+                 AArch64::AEK_MTE,  AArch64::AEK_FP16FML,  AArch64::AEK_PAUTH,
+                 AArch64::AEK_SVE2BITPERM,  AArch64::AEK_FLAGM,
+                 AArch64::AEK_PERFMON, AArch64::AEK_PREDRES})),
+            "9.2-A"),
+        ARMCPUTestParams<AArch64::ExtensionBitset>(
             "cortex-a57", "armv8-a", "crypto-neon-fp-armv8",
             (AArch64::ExtensionBitset(
                 {AArch64::AEK_CRC, AArch64::AEK_AES, AArch64::AEK_SHA2,
@@ -1209,6 +1221,19 @@ INSTANTIATE_TEST_SUITE_P(
                  AArch64::AEK_FLAGM})),
             "9-A"),
         ARMCPUTestParams<AArch64::ExtensionBitset>(
+            "cortex-a720", "armv9.2-a", "crypto-neon-fp-armv8",
+            (AArch64::ExtensionBitset(
+                {AArch64::AEK_BF16,  AArch64::AEK_I8MM,  AArch64::AEK_SVE,
+                 AArch64::AEK_SVE2,  AArch64::AEK_FP16,  AArch64::AEK_DOTPROD,
+                 AArch64::AEK_LSE,  AArch64::AEK_RDM,  AArch64::AEK_SIMD,
+                 AArch64::AEK_RCPC,  AArch64::AEK_RAS,  AArch64::AEK_CRC,
+                 AArch64::AEK_FP,  AArch64::AEK_SB,  AArch64::AEK_SSBS,
+                 AArch64::AEK_MTE,  AArch64::AEK_FP16FML,  AArch64::AEK_PAUTH,
+                 AArch64::AEK_SVE2BITPERM,  AArch64::AEK_FLAGM,
+                 AArch64::AEK_PERFMON, AArch64::AEK_PREDRES,
+                 AArch64::AEK_PROFILE})),
+            "9.2-A"),
+        ARMCPUTestParams<AArch64::ExtensionBitset>(
             "neoverse-v1", "armv8.4-a", "crypto-neon-fp-armv8",
             (AArch64::ExtensionBitset(
                 {AArch64::AEK_RAS,     AArch64::AEK_SVE,  AArch64::AEK_SSBS,
@@ -1288,6 +1313,19 @@ INSTANTIATE_TEST_SUITE_P(
                  AArch64::AEK_SSBS})),
             "9-A"),
         ARMCPUTestParams<AArch64::ExtensionBitset>(
+            "cortex-x4", "armv9.2-a", "crypto-neon-fp-armv8",
+            (AArch64::ExtensionBitset(
+                {AArch64::AEK_BF16,  AArch64::AEK_I8MM,  AArch64::AEK_SVE,
+                 AArch64::AEK_SVE2,  AArch64::AEK_FP16,  AArch64::AEK_DOTPROD,
+                 AArch64::AEK_LSE,  AArch64::AEK_RDM,  AArch64::AEK_SIMD,
+                 AArch64::AEK_RCPC,  AArch64::AEK_RAS,  AArch64::AEK_CRC,
+                 AArch64::AEK_FP,  AArch64::AEK_SB,  AArch64::AEK_SSBS,
+                 AArch64::AEK_MTE,  AArch64::AEK_FP16FML,  AArch64::AEK_PAUTH,
+                 AArch64::AEK_SVE2BITPERM,  AArch64::AEK_FLAGM,
+                 AArch64::AEK_PERFMON, AArch64::AEK_PREDRES,
+                 AArch64::AEK_PROFILE})),
+            "9.2-A"),
+        ARMCPUTestParams<AArch64::ExtensionBitset>(
             "cyclone", "armv8-a", "crypto-neon-fp-armv8",
             (AArch64::ExtensionBitset(
                 {AArch64::AEK_NONE, AArch64::AEK_AES, AArch64::AEK_SHA2,
@@ -1568,7 +1606,7 @@ INSTANTIATE_TEST_SUITE_P(
             "8.2-A")));
 
 // Note: number of CPUs includes aliases.
-static constexpr unsigned NumAArch64CPUArchs = 62;
+static constexpr unsigned NumAArch64CPUArchs = 65;
 
 TEST(TargetParserTest, testAArch64CPUArchList) {
   SmallVector<StringRef, NumAArch64CPUArchs> List;
diff --git a/mlir/docs/Dialects/emitc.md b/mlir/docs/Dialects/emitc.md
index 03b85611ee3c..809a04660336 100644
--- a/mlir/docs/Dialects/emitc.md
+++ b/mlir/docs/Dialects/emitc.md
@@ -3,12 +3,12 @@ ops. Those can be translated to C/C++ via the Cpp emitter.
 
 The following convention is followed:
 
-*   If template arguments are passed to an `emitc.call` operation, C++ is
+*   If template arguments are passed to an `emitc.call_opaque` operation, C++ is
     generated.
 *   If tensors are used, C++ is generated.
-*   If multiple return values are used within in a functions or an `emitc.call`
-    operation, C++11 is required.
-*   If floating-point type template arguments are passed to an `emitc.call`
+*   If multiple return values are used within in a functions or an
+    `emitc.call_opaque` operation, C++11 is required.
+*   If floating-point type template arguments are passed to an `emitc.call_opaque`
     operation, C++20 is required.
 *   Else the generated code is compatible with C99.
 
diff --git a/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td b/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td
index 2edeb6f8a9cf..e09c63295515 100644
--- a/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td
+++ b/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td
@@ -92,11 +92,12 @@ def EmitC_ApplyOp : EmitC_Op<"apply", []> {
   let hasVerifier = 1;
 }
 
-def EmitC_CallOp : EmitC_Op<"call", []> {
-  let summary = "Call operation";
+def EmitC_CallOpaqueOp : EmitC_Op<"call_opaque", []> {
+  let summary = "Opaque call operation";
   let description = [{
-    The `call` operation represents a C++ function call. The call allows
-    specifying order of operands and attributes in the call as follows:
+    The `call_opaque` operation represents a C++ function call. The callee
+    can be an arbitrary non-empty string. The call allows specifying order
+    of operands and attributes in the call as follows:
 
     - integer value of index type refers to an operand;
     - attribute which will get lowered to constant value in call;
@@ -105,10 +106,10 @@ def EmitC_CallOp : EmitC_Op<"call", []> {
 
     ```mlir
     // Custom form defining a call to `foo()`.
-    %0 = emitc.call "foo" () : () -> i32
+    %0 = emitc.call_opaque "foo" () : () -> i32
 
     // Generic form of the same operation.
-    %0 = "emitc.call"() {callee = "foo"} : () -> i32
+    %0 = "emitc.call_opaque"() {callee = "foo"} : () -> i32
     ```
   }];
   let arguments = (ins
@@ -454,7 +455,8 @@ def EmitC_VariableOp : EmitC_Op<"variable", []> {
     %1 = "emitc.variable"() {value = 0 : i32} : () -> i32
     %2 = emitc.apply "&"(%0) : (i32) -> !emitc.ptr<i32>
     %3 = emitc.apply "&"(%1) : (i32) -> !emitc.ptr<i32>
-    emitc.call "write"(%2, %3) : (!emitc.ptr<i32>, !emitc.ptr<i32>) -> ()
+    emitc.call_opaque "write"(%2, %3)
+      : (!emitc.ptr<i32>, !emitc.ptr<i32>) -> ()
     ```
   }];
 
@@ -477,7 +479,7 @@ def EmitC_AssignOp : EmitC_Op<"assign", []> {
     ```mlir
     // Integer variable
     %0 = "emitc.variable"(){value = 42 : i32} : () -> i32
-    %1 = emitc.call "foo"() : () -> (i32)
+    %1 = emitc.call_opaque "foo"() : () -> (i32)
 
     // Assign emitted as `... = ...;`
     "emitc.assign"(%0, %1) : (i32, i32) -> ()
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
index d0b52a0b4024..391e77e0c408 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
@@ -1430,6 +1430,7 @@ def OpenACC_DeclareEnterOp : OpenACC_Op<"declare_enter", []> {
   }];
 
   let arguments = (ins Variadic<OpenACC_PointerLikeTypeInterface>:$dataClauseOperands);
+  let results = (outs OpenACC_DeclareTokenType:$token);
 
   let assemblyFormat = [{
     oilist(
@@ -1441,7 +1442,7 @@ def OpenACC_DeclareEnterOp : OpenACC_Op<"declare_enter", []> {
   let hasVerifier = 1;
 }
 
-def OpenACC_DeclareExitOp : OpenACC_Op<"declare_exit", []> {
+def OpenACC_DeclareExitOp : OpenACC_Op<"declare_exit", [AttrSizedOperandSegments]> {
   let summary = "declare directive - exit from implicit data region";
 
   let description = [{
@@ -1458,10 +1459,13 @@ def OpenACC_DeclareExitOp : OpenACC_Op<"declare_exit", []> {
     ```
   }];
 
-  let arguments = (ins Variadic<OpenACC_PointerLikeTypeInterface>:$dataClauseOperands);
+  let arguments = (ins
+      Optional<OpenACC_DeclareTokenType>:$token,
+      Variadic<OpenACC_PointerLikeTypeInterface>:$dataClauseOperands);
 
   let assemblyFormat = [{
     oilist(
+        `token` `(` $token `)` |
         `dataOperands` `(` $dataClauseOperands `:` type($dataClauseOperands) `)`
     )
     attr-dict-with-keyword
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOpsTypes.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOpsTypes.td
index 4a930ad94c3f..92ea71a7e841 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOpsTypes.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOpsTypes.td
@@ -24,4 +24,13 @@ def OpenACC_DataBoundsType : OpenACC_Type<"DataBounds", "data_bounds_ty"> {
   let summary = "Type for representing acc data clause bounds information";
 }
 
+def OpenACC_DeclareTokenType : OpenACC_Type<"DeclareToken", "declare_token"> {
+  let summary = "declare token type";
+  let description = [{
+    `acc.declare_token` is a type returned by a `declare_enter` operation and
+    can be passed to a `declare_exit` operation to represent an implicit
+    data region.
+  }];
+}
+
 #endif // OPENACC_OPS_TYPES
diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVCompositeOps.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVCompositeOps.td
index 33078b74cc3c..74fbea6771b8 100644
--- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVCompositeOps.td
+++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVCompositeOps.td
@@ -308,8 +308,8 @@ def SPIRV_VectorShuffleOp : SPIRV_Op<"VectorShuffle", [
     Vector 1 and Vector 2 must both have vector types, with the same
     Component Type as Result Type. They do not have to have the same number
     of components as Result Type or with each other. They are logically
-    concatenated, forming a single vector with Vector 1’s components
-    appearing before Vector 2’s. The components of this logical vector are
+    concatenated, forming a single vector with Vector 1's components
+    appearing before Vector 2's. The components of this logical vector are
     logically numbered with a single consecutive set of numbers from 0 to N
     - 1, where N is the total number of components.
 
@@ -331,9 +331,8 @@ def SPIRV_VectorShuffleOp : SPIRV_Op<"VectorShuffle", [
     #### Example:
 
     ```mlir
-    %0 = spirv.VectorShuffle [1: i32, 3: i32, 5: i32]
-                           %vector1: vector<4xf32>, %vector2: vector<2xf32>
-                        -> vector<3xf32>
+    %0 = spirv.VectorShuffle [1: i32, 3: i32, 5: i32] %vector1, %vector2 :
+      vector<4xf32>, vector<2xf32> -> vector<3xf32>
     ```
   }];
 
@@ -348,8 +347,8 @@ def SPIRV_VectorShuffleOp : SPIRV_Op<"VectorShuffle", [
   );
 
   let assemblyFormat = [{
-    attr-dict $components $vector1 `:` type($vector1) `,`
-                          $vector2 `:` type($vector2) `->` type($result)
+    attr-dict $components $vector1 `,` $vector2 `:`
+      type($vector1) `,` type($vector2) `->` type($result)
   }];
 }
 
diff --git a/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h b/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h
index cde6b2d13e58..0e995d1bf59f 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h
+++ b/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h
@@ -223,29 +223,16 @@ struct LatPoint final {
 /// independently from the basic algorithm if bottlenecks are identified.
 class Merger {
 public:
-  /// Constructs a merger for the given number of tensors, native loops, and
-  /// filter loops. The user supplies the number of tensors involved in the
-  /// kernel, with the last tensor in this set denoting the output tensor.
-  /// The merger adds an additional synthetic tensor at the end of this set
-  /// to represent all invariant expressions in the kernel.
-  ///
-  /// In addition to natives loops (which are specified by the GenericOp),
-  /// extra filter loops are needed in order to handle affine expressions on
-  /// sparse levels.  E.g., (d0, d1, d2) => (d0 + d1, d2), a naive
-  /// implementation of the filter loop could be generated as
-  ///
-  /// for (const auto c0 : coordinates[0]) {
-  ///   if (c0 == d0 + d1) {
-  ///      generated_code;
-  ///   }
-  /// }
-  ///
-  /// to filter out coordinates that are not equal to the affine expression.
+  /// Constructs a merger for the given number of tensors and loops. The user
+  /// supplies the number of tensors involved in the kernel, with the last
+  /// tensor in this set denoting the output tensor. The merger adds an
+  /// additional synthetic tensor at the end of this set to represent all
+  /// invariant expressions in the kernel.
   ///
   /// The maxLvlRank specifies the max level rank of all inputs/output tensors.
   /// It is used to pre-allocate sufficient memory for internal storage.
-  Merger(unsigned numInputOutputTensors, unsigned numNativeLoops,
-         unsigned numFilterLoops, unsigned maxLvlRank);
+  Merger(unsigned numInputOutputTensors, unsigned numLoops,
+         unsigned maxLvlRank);
 
   //
   // Constructing valid tensor and loop identifiers.
@@ -366,19 +353,6 @@ public:
   /// Gets the total number of loops (native loops + filter loops).
   constexpr unsigned getNumLoops() const { return numLoops; }
 
-  /// Gets the number of native loops.
-  constexpr unsigned getNumNativeLoops() const { return numNativeLoops; }
-
-  /// Gets the number of filter loops.
-  constexpr unsigned getNumFilterLoops() const {
-    return numLoops - numNativeLoops;
-  }
-
-  /// Gets the identifier of the first filter-loop.
-  constexpr LoopId getStartingFilterLoopId() const {
-    return getNumNativeLoops();
-  }
-
   /// Returns true if `b` is the `i`th loop of the output tensor.
   constexpr bool isOutTensor(TensorLoopId b, LoopId i) const {
     return b == makeTensorLoopId(outTensor, i);
@@ -391,11 +365,6 @@ public:
   /// tensor expressions).
   constexpr TensorId getSynTensorID() const { return syntheticTensor; }
 
-  constexpr bool isFilterLoop(LoopId i) const {
-    assert(isValidLoopId(i));
-    return i >= numNativeLoops;
-  }
-
   /// Returns true if the expression is `(kTensor t)`.
   bool expIsTensor(ExprId e, TensorId t) const {
     const auto &expr = exp(e);
@@ -657,7 +626,6 @@ private:
   const TensorId outTensor;
   const TensorId syntheticTensor;
   const unsigned numTensors;
-  const unsigned numNativeLoops;
   const unsigned numLoops;
   bool hasSparseOut;
 
diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h
index 85bbfe06a86a..6b4a174596ff 100644
--- a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h
+++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h
@@ -1,4 +1,4 @@
-//===- File.h - Reading sparse tensors from files --------------*- C++ -*-===//
+//===- File.h - Reading sparse tensors from files ---------------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
diff --git a/mlir/lib/Analysis/Presburger/IntegerRelation.cpp b/mlir/lib/Analysis/Presburger/IntegerRelation.cpp
index 6b26e0915820..3724df5abccc 100644
--- a/mlir/lib/Analysis/Presburger/IntegerRelation.cpp
+++ b/mlir/lib/Analysis/Presburger/IntegerRelation.cpp
@@ -1351,7 +1351,6 @@ void IntegerRelation::simplify() {
     changed |= removeDuplicateConstraints();
   }
   // Current set is not empty.
-  return;
 }
 
 /// Removes local variables using equalities. Each equality is checked if it
diff --git a/mlir/lib/Dialect/EmitC/IR/EmitC.cpp b/mlir/lib/Dialect/EmitC/IR/EmitC.cpp
index d06381b7ddad..e8ea4da0b089 100644
--- a/mlir/lib/Dialect/EmitC/IR/EmitC.cpp
+++ b/mlir/lib/Dialect/EmitC/IR/EmitC.cpp
@@ -131,7 +131,7 @@ bool CastOp::areCastCompatible(TypeRange inputs, TypeRange outputs) {
 // CallOp
 //===----------------------------------------------------------------------===//
 
-LogicalResult emitc::CallOp::verify() {
+LogicalResult emitc::CallOpaqueOp::verify() {
   // Callee must not be empty.
   if (getCallee().empty())
     return emitOpError("callee must not be empty");
diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index 6e5df705fee0..08e83cad4822 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -1087,9 +1087,10 @@ LogicalResult AtomicCaptureOp::verifyRegions() { return verifyRegionsCommon(); }
 //===----------------------------------------------------------------------===//
 
 template <typename Op>
-static LogicalResult checkDeclareOperands(Op &op,
-                                          const mlir::ValueRange &operands) {
-  if (operands.empty())
+static LogicalResult
+checkDeclareOperands(Op &op, const mlir::ValueRange &operands,
+                     bool requireAtLeastOneOperand = true) {
+  if (operands.empty() && requireAtLeastOneOperand)
     return emitError(
         op->getLoc(),
         "at least one operand must appear on the declare operation");
@@ -1151,6 +1152,9 @@ LogicalResult acc::DeclareEnterOp::verify() {
 //===----------------------------------------------------------------------===//
 
 LogicalResult acc::DeclareExitOp::verify() {
+  if (getToken())
+    return checkDeclareOperands(*this, this->getDataClauseOperands(),
+                                /*requireAtLeastOneOperand=*/false);
   return checkDeclareOperands(*this, this->getDataClauseOperands());
 }
 
diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
index 53a0c2428842..28445ce2a8bc 100644
--- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
+++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
@@ -1797,7 +1797,22 @@ Operation *SparseTensorDialect::materializeConstant(OpBuilder &builder,
   return nullptr;
 }
 
+namespace {
+struct SparseTensorAsmDialectInterface : public OpAsmDialectInterface {
+  using OpAsmDialectInterface::OpAsmDialectInterface;
+
+  AliasResult getAlias(Attribute attr, raw_ostream &os) const override {
+    if (attr.isa<SparseTensorEncodingAttr>()) {
+      os << "sparse";
+      return AliasResult::OverridableAlias;
+    }
+    return AliasResult::NoAlias;
+  }
+};
+} // namespace
+
 void SparseTensorDialect::initialize() {
+  addInterface<SparseTensorAsmDialectInterface>();
   addAttributes<
 #define GET_ATTRDEF_LIST
 #include "mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.cpp.inc"
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.cpp
index 3a02d5634586..cc05f1d06e30 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.cpp
@@ -28,21 +28,13 @@ static bool isMaterializing(Value val) {
          val.getDefiningOp<bufferization::AllocTensorOp>();
 }
 
-/// Makes target array's elements sorted according to the `order` array.
-static void sortArrayBasedOnOrder(std::vector<LoopCoeffPair> &target,
-                                  ArrayRef<LoopId> order) {
+/// Sorts the dependent loops such that it is ordered in the same sequence in
+/// which loops will be generated.
+static void sortDependentLoops(std::vector<LoopCoeffPair> &target) {
   std::sort(target.begin(), target.end(),
-            [&order](const LoopCoeffPair &l, const LoopCoeffPair &r) {
+            [](const LoopCoeffPair &l, const LoopCoeffPair &r) {
               assert(std::addressof(l) == std::addressof(r) || l != r);
-              int idxL = -1, idxR = -1;
-              for (int i = 0, e = order.size(); i < e; i++) {
-                if (order[i] == l.first)
-                  idxL = i;
-                if (order[i] == r.first)
-                  idxR = i;
-              }
-              assert(idxL >= 0 && idxR >= 0);
-              return idxL < idxR;
+              return l.first < r.first;
             });
 }
 //===----------------------------------------------------------------------===//
@@ -50,18 +42,12 @@ static void sortArrayBasedOnOrder(std::vector<LoopCoeffPair> &target,
 //===----------------------------------------------------------------------===//
 
 CodegenEnv::CodegenEnv(linalg::GenericOp linop, SparsificationOptions opts,
-                       unsigned numTensors, unsigned numLoops,
-                       unsigned numFilterLoops, unsigned maxRank)
+                       unsigned numTensors, unsigned numLoops, unsigned maxRank)
     : linalgOp(linop), sparseOptions(opts),
-      latticeMerger(numTensors, numLoops, numFilterLoops, maxRank),
-      loopEmitter(), topSort(), sparseOut(nullptr), outerParNest(-1u),
-      insChain(), expValues(), expFilled(), expAdded(), expCount(), redVal(),
-      redExp(detail::kInvalidId), redCustom(detail::kInvalidId),
-      redValidLexInsert() {
-  // TODO: remove topSort, loops should be already sorted by previous pass.
-  for (unsigned l = 0; l < latticeMerger.getNumLoops(); l++)
-    topSort.push_back(l);
-}
+      latticeMerger(numTensors, numLoops, maxRank), loopEmitter(),
+      sparseOut(nullptr), outerParNest(-1u), insChain(), expValues(),
+      expFilled(), expAdded(), expCount(), redVal(), redExp(detail::kInvalidId),
+      redCustom(detail::kInvalidId), redValidLexInsert() {}
 
 LogicalResult CodegenEnv::initTensorExp() {
   // Builds the tensor expression for the Linalg operation in SSA form.
@@ -97,7 +83,7 @@ void CodegenEnv::startEmit() {
     (void)enc;
     assert(!enc || lvlRank == enc.getLvlRank());
     for (Level lvl = 0; lvl < lvlRank; lvl++)
-      sortArrayBasedOnOrder(latticeMerger.getDependentLoops(tid, lvl), topSort);
+      sortDependentLoops(latticeMerger.getDependentLoops(tid, lvl));
   }
 
   loopEmitter.initialize(
@@ -105,7 +91,7 @@ void CodegenEnv::startEmit() {
       StringAttr::get(linalgOp.getContext(),
                       linalg::GenericOp::getOperationName()),
       /*hasOutput=*/true,
-      /*isSparseOut=*/sparseOut != nullptr, topSort,
+      /*isSparseOut=*/sparseOut != nullptr, /*numLoops=*/getLoopNum(),
       // TODO: compute the map and pass it to loop emitter directly instead of
       // passing in a callback.
       /*dependentLvlGetter=*/
@@ -190,8 +176,7 @@ bool CodegenEnv::isAdmissibleTensorExp(ExprId exp) {
   // needed.
   outerParNest = 0;
   const auto iteratorTypes = linalgOp.getIteratorTypesArray();
-  assert(topSortSize() == latticeMerger.getNumLoops());
-  for (const LoopId i : topSort) {
+  for (unsigned i = 0, e = getLoopNum(); i < e; i++) {
     if (linalg::isReductionIterator(iteratorTypes[i]))
       break; // terminate at first reduction
     outerParNest++;
@@ -208,26 +193,8 @@ bool CodegenEnv::isAdmissibleTensorExp(ExprId exp) {
 // Code generation environment topological sort methods
 //===----------------------------------------------------------------------===//
 
-ArrayRef<LoopId> CodegenEnv::getTopSortSlice(LoopOrd n, LoopOrd m) const {
-  return ArrayRef<LoopId>(topSort).slice(n, m);
-}
-
-ArrayRef<LoopId> CodegenEnv::getLoopStackUpTo(LoopOrd n) const {
-  return ArrayRef<LoopId>(topSort).take_front(n);
-}
-
-ArrayRef<LoopId> CodegenEnv::getCurrentLoopStack() const {
-  return getLoopStackUpTo(loopEmitter.getCurrentDepth());
-}
-
 Value CodegenEnv::getLoopVar(LoopId i) const {
-  // TODO: this class should store the inverse of `topSort` so that
-  // it can do this conversion directly, instead of searching through
-  // `topSort` every time.  (Or else, `LoopEmitter` should handle this.)
-  for (LoopOrd n = 0, numLoops = topSortSize(); n < numLoops; n++)
-    if (topSort[n] == i)
-      return loopEmitter.getLoopIV(n);
-  llvm_unreachable("invalid loop identifier");
+  return loopEmitter.getLoopIV(i);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.h
index c0fc505d153a..963cdd1dcdeb 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.h
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.h
@@ -38,8 +38,7 @@ public:
   /// passed around during sparsification for bookkeeping
   /// together with some consistency asserts.
   CodegenEnv(linalg::GenericOp linop, SparsificationOptions opts,
-             unsigned numTensors, unsigned numLoops, unsigned numFilterLoops,
-             unsigned maxRank);
+             unsigned numTensors, unsigned numLoops, unsigned maxRank);
 
   //
   // General methods.
@@ -83,6 +82,8 @@ public:
   }
   DimLevelType dlt(TensorLoopId b) const { return latticeMerger.getLvlType(b); }
 
+  unsigned getLoopNum() const { return latticeMerger.getNumLoops(); }
+
   //
   // LoopEmitter delegates.
   //
@@ -107,6 +108,8 @@ public:
     return loopEmitter.unpackTensorLevelRange(std::forward<ContainerTy>(c));
   }
 
+  unsigned getLoopDepth() const { return loopEmitter.getCurrentDepth(); }
+
   //
   // Code generation environment verify functions.
   //
@@ -115,25 +118,6 @@ public:
   /// It also sets the sparseOut if the output tensor is sparse.
   bool isAdmissibleTensorExp(ExprId e);
 
-  /// Whether the iteration graph is sorted in admissible topoOrder.
-  /// Sets outerParNest on success with sparse output
-  bool isAdmissibleTopoOrder();
-
-  //
-  // Topological delegate and sort methods.
-  //
-
-  LoopOrd topSortSize() const { return topSort.size(); }
-  LoopId topSortAt(LoopOrd n) const { return topSort.at(n); }
-  void topSortPushBack(LoopId i) { topSort.push_back(i); }
-  void topSortClear(size_t capacity = 0) {
-    topSort.clear();
-    topSort.reserve(capacity);
-  }
-
-  ArrayRef<LoopId> getTopSortSlice(LoopOrd n, LoopOrd m) const;
-  ArrayRef<LoopId> getLoopStackUpTo(LoopOrd n) const;
-  ArrayRef<LoopId> getCurrentLoopStack() const;
   /// Returns the induction-variable for the loop identified by the given
   /// `LoopId`.  This method handles application of the topological sort
   /// in order to convert the `LoopId` into the corresponding `LoopOrd`.
@@ -191,10 +175,6 @@ private:
   // Loop emitter helper class.
   LoopEmitter loopEmitter;
 
-  // Topological sort.  This serves as a mapping from `LoopOrd` to `LoopId`
-  // (cf., `getLoopVar` and `topSortAt`).
-  std::vector<LoopId> topSort;
-
   // Sparse tensor as output. Implemented either through direct injective
   // insertion in lexicographic index order or through access pattern
   // expansion in the innermost loop nest (`expValues` through `expCount`).
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp
index 81ce525a62d9..ba798f09c4d5 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp
@@ -276,13 +276,13 @@ Value LoopEmitter::genSparseCrd(OpBuilder &builder, Location loc, TensorId tid,
 }
 
 LoopEmitter::LoopEmitter(ValueRange tensors, StringAttr loopTag, bool hasOutput,
-                         bool isSparseOut, ArrayRef<LoopId> topSort,
+                         bool isSparseOut, unsigned numLoops,
                          DependentLvlGetter dimGetter) {
-  initialize(tensors, loopTag, hasOutput, isSparseOut, topSort, dimGetter);
+  initialize(tensors, loopTag, hasOutput, isSparseOut, numLoops, dimGetter);
 }
 
 void LoopEmitter::initialize(ValueRange ts, StringAttr loopTag, bool hasOutput,
-                             bool isSparseOut, ArrayRef<LoopId> topSort,
+                             bool isSparseOut, unsigned numLoops,
                              DependentLvlGetter dimGetter) {
   // First initialize the top-level type of the fields.
   this->loopTag = loopTag;
@@ -308,10 +308,8 @@ void LoopEmitter::initialize(ValueRange ts, StringAttr loopTag, bool hasOutput,
   this->sliceOffsets.assign(numTensors, std::vector<Value>());
   this->sliceStrides.assign(numTensors, std::vector<Value>());
 
-  const LoopOrd numLoops = topSort.size();
   // These zeros will be overwritten below, but we need to initialize
   // them to something since we'll need random-access assignment.
-  this->loopIdToOrd.assign(numLoops, 0);
   this->loopStack.reserve(numLoops);
   this->loopSeqStack.reserve(numLoops);
 
@@ -387,13 +385,6 @@ void LoopEmitter::initialize(ValueRange ts, StringAttr loopTag, bool hasOutput,
       }
     }
   }
-
-  // Construct the inverse of the `topSort` from the sparsifier.
-  // This is needed to map `AffineDimExpr`s back to the `LoopOrd`
-  // used in loop emitter.
-  // FIXME: This map should be maintained outside loop emitter.
-  for (LoopOrd n = 0; n < numLoops; n++)
-    loopIdToOrd[topSort[n]] = n;
 }
 
 void LoopEmitter::initializeLoopEmit(
@@ -611,8 +602,7 @@ Value LoopEmitter::genAffine(OpBuilder &builder, Location loc, AffineExpr a) {
     // However, elsewhere we have been lead to expect that `loopIdToOrd`
     // should be indexed by `LoopId`...
     const auto loopId = cast<AffineDimExpr>(a).getPosition();
-    assert(loopId < loopIdToOrd.size());
-    return loopStack[loopIdToOrd[loopId]].iv;
+    return loopStack[loopId].iv;
   }
   case AffineExprKind::Add: {
     auto binOp = cast<AffineBinaryOpExpr>(a);
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.h b/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.h
index c6518decbdee..320b39765dea 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.h
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.h
@@ -113,12 +113,11 @@ public:
   /// to `LoopId`.
   void initialize(ValueRange tensors, StringAttr loopTag = nullptr,
                   bool hasOutput = false, bool isSparseOut = false,
-                  ArrayRef<LoopId> topSort = {},
-                  DependentLvlGetter getter = nullptr);
+                  unsigned numLoops = 0, DependentLvlGetter getter = nullptr);
 
   explicit LoopEmitter(ValueRange tensors, StringAttr loopTag = nullptr,
                        bool hasOutput = false, bool isSparseOut = false,
-                       ArrayRef<LoopId> topSort = {},
+                       unsigned numLoops = 0,
                        DependentLvlGetter getter = nullptr);
 
   /// Starts a loop emitting session by generating all the buffers needed
@@ -751,11 +750,6 @@ private:
   // TODO: maybe we should have a LoopSeqInfo
   std::vector<std::pair<Value, std::vector<std::tuple<TensorId, Level, bool>>>>
       loopSeqStack;
-
-  /// Maps `LoopId` (used by `AffineDimExpr`) to `LoopOrd` (in the `loopStack`).
-  /// TODO: We should probably use a callback function here to make it more
-  /// general.
-  std::vector<LoopOrd> loopIdToOrd;
 };
 
 } // namespace sparse_tensor
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp
index cfc8eb19918b..9f41db73a509 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp
@@ -159,41 +159,46 @@ static Value createAllocation(OpBuilder &builder, Location loc,
   return buffer;
 }
 
+/// Creates the dim sizes array, filling in from dynamic sizes.
+static void createDimSizes(OpBuilder &builder, Location loc,
+                           SparseTensorType stt, ValueRange dynSizes,
+                           /*out*/ SmallVectorImpl<Value> &dimSizesValues) {
+  const Dimension dimRank = stt.getDimRank();
+  dimSizesValues.clear();
+  dimSizesValues.reserve(dimRank);
+  unsigned i = 0;
+  for (const Size sz : stt.getDimShape())
+    dimSizesValues.push_back(ShapedType::isDynamic(sz)
+                                 ? dynSizes[i++]
+                                 : constantIndex(builder, loc, sz));
+}
+
 /// Creates allocation for each field in sparse tensor type. Note that
 /// for all dynamic memrefs in the sparse tensor stroage layout, the
 /// memory size is really the capacity of the "vector", while the actual
 /// size resides in the sizes array.
 static void createAllocFields(OpBuilder &builder, Location loc,
-                              SparseTensorType stt, ValueRange dynSizes,
-                              bool enableInit, SmallVectorImpl<Value> &fields,
-                              Value sizeHint) {
-  // Build original sizes.
-  assert((dynSizes.size() == static_cast<size_t>(stt.getNumDynamicDims())) &&
-         "Got wrong number of dynamic sizes");
-  const Dimension dimRank = stt.getDimRank();
-  SmallVector<Value> dimSizes;
-  dimSizes.reserve(dimRank);
-  unsigned i = 0; // cumulative index into `dynSizes`.
-  for (const Size sh : stt.getDimShape())
-    dimSizes.push_back(ShapedType::isDynamic(sh)
-                           ? dynSizes[i++]
-                           : constantIndex(builder, loc, sh));
-
+                              SparseTensorType stt, bool enableInit,
+                              Value sizeHint,
+                              SmallVectorImpl<Value> &lvlSizesValues,
+                              /*out*/ SmallVectorImpl<Value> &fields) {
+  Level lvlRank = stt.getLvlRank();
   // Set up some heuristic sizes. We try to set the initial
   // size based on available information. Otherwise we just
   // initialize a few elements to start the reallocation chain.
   // TODO: refine this
   Value posHeuristic, crdHeuristic, valHeuristic;
   if (stt.isAllDense()) {
-    valHeuristic = dimSizes[0];
-    for (const Value sz : ArrayRef<Value>{dimSizes}.drop_front())
-      valHeuristic = builder.create<arith::MulIOp>(loc, valHeuristic, sz);
+    valHeuristic = lvlSizesValues[0];
+    for (Level lvl = 1; lvl < lvlRank; lvl++)
+      valHeuristic =
+          builder.create<arith::MulIOp>(loc, valHeuristic, lvlSizesValues[lvl]);
   } else if (sizeHint) {
     if (getCOOStart(stt.getEncoding()) == 0) {
       posHeuristic = constantIndex(builder, loc, 2);
       crdHeuristic = builder.create<arith::MulIOp>(
-          loc, constantIndex(builder, loc, dimRank), sizeHint); // AOS
-    } else if (dimRank == 2 && stt.isDenseLvl(0) && stt.isCompressedLvl(1)) {
+          loc, constantIndex(builder, loc, lvlRank), sizeHint); // AOS
+    } else if (lvlRank == 2 && stt.isDenseLvl(0) && stt.isCompressedLvl(1)) {
       posHeuristic = builder.create<arith::AddIOp>(
           loc, sizeHint, constantIndex(builder, loc, 1));
       crdHeuristic = sizeHint;
@@ -205,7 +210,6 @@ static void createAllocFields(OpBuilder &builder, Location loc,
     posHeuristic = crdHeuristic = valHeuristic =
         constantIndex(builder, loc, 16);
   }
-
   // Initializes all fields. An initial storage specifier and allocated
   // positions/coordinates/values memrefs (with heuristic capacity).
   foreachFieldAndTypeInSparseTensor(
@@ -237,16 +241,13 @@ static void createAllocFields(OpBuilder &builder, Location loc,
         // Returns true to continue the iteration.
         return true;
       });
-
   // Initialize the storage scheme to an empty tensor. Sets the lvlSizes
   // and gives all position fields an initial zero entry, so that it is
   // easier to maintain the "linear + 1" length property.
   MutSparseTensorDescriptor desc(stt, fields);
   Value posZero = constantZero(builder, loc, stt.getPosType());
   for (Level lvl = 0, lvlRank = stt.getLvlRank(); lvl < lvlRank; lvl++) {
-    // FIXME: `toOrigDim` is deprecated.
-    desc.setLvlSize(builder, loc, lvl,
-                    dimSizes[toOrigDim(stt.getEncoding(), lvl)]);
+    desc.setLvlSize(builder, loc, lvl, lvlSizesValues[lvl]);
     const auto dlt = stt.getLvlType(lvl);
     if (isCompressedDLT(dlt) || isLooseCompressedDLT(dlt))
       createPushback(builder, loc, desc, SparseTensorFieldKind::PosMemRef, lvl,
@@ -371,8 +372,8 @@ static void genEndInsert(OpBuilder &builder, Location loc,
                          SparseTensorDescriptor desc) {
   const SparseTensorType stt(desc.getRankedTensorType());
   const Level lvlRank = stt.getLvlRank();
-  for (Level l = 0; l < lvlRank; l++) {
-    const auto dlt = stt.getLvlType(l);
+  for (Level lvl = 0; lvl < lvlRank; lvl++) {
+    const auto dlt = stt.getLvlType(lvl);
     if (isCompressedDLT(dlt)) {
       // Compressed dimensions need a position cleanup for all entries
       // that were not visited during the insertion pass.
@@ -380,10 +381,10 @@ static void genEndInsert(OpBuilder &builder, Location loc,
       // TODO: avoid cleanup and keep compressed scheme consistent at all
       // times?
       //
-      if (l > 0) {
+      if (lvl > 0) {
         Type posType = stt.getPosType();
-        Value posMemRef = desc.getPosMemRef(l);
-        Value hi = desc.getPosMemSize(builder, loc, l);
+        Value posMemRef = desc.getPosMemRef(lvl);
+        Value hi = desc.getPosMemSize(builder, loc, lvl);
         Value zero = constantIndex(builder, loc, 0);
         Value one = constantIndex(builder, loc, 1);
         // Vector of only one, but needed by createFor's prototype.
@@ -729,7 +730,6 @@ public:
 };
 
 /// Sparse codegen rule for the alloc operator.
-/// TODO(springerm): remove when bufferization.alloc_tensor is gone
 class SparseTensorAllocConverter
     : public OpConversionPattern<bufferization::AllocTensorOp> {
 public:
@@ -746,6 +746,8 @@ public:
     if (!resType.hasEncoding())
       return failure();
     Location loc = op.getLoc();
+
+    // Deal with copy.
     if (op.getCopy()) {
       auto desc = getDescriptorFromTensorTuple(adaptor.getCopy());
       SmallVector<Value> fields;
@@ -766,17 +768,22 @@ public:
       return success();
     }
 
+    // Construct the dim/lvl sizes and the (unused) dim2lvl/lvl2dim buffers.
+    SmallVector<Value> dimSizesValues;
+    SmallVector<Value> lvlSizesValues;
+    Value dimSizesBuffer;
+    Value dim2lvlBuffer;
+    Value lvl2dimBuffer;
+    createDimSizes(rewriter, loc, resType, adaptor.getDynamicSizes(),
+                   dimSizesValues);
+    genMapBuffers(rewriter, loc, resType, dimSizesValues, dimSizesBuffer,
+                  lvlSizesValues, dim2lvlBuffer, lvl2dimBuffer);
+
     // Construct allocation for each field.
     Value sizeHint = op.getSizeHint();
-    ValueRange dynSizes = adaptor.getDynamicSizes();
-    const size_t found = dynSizes.size();
-    const int64_t expected = resType.getNumDynamicDims();
-    if (found != static_cast<size_t>(expected))
-      return rewriter.notifyMatchFailure(op,
-                                         "Got wrong number of dynamic sizes");
     SmallVector<Value> fields;
-    createAllocFields(rewriter, loc, resType, dynSizes,
-                      enableBufferInitialization, fields, sizeHint);
+    createAllocFields(rewriter, loc, resType, enableBufferInitialization,
+                      sizeHint, lvlSizesValues, fields);
 
     // Replace operation with resulting memrefs.
     rewriter.replaceOp(op, genTuple(rewriter, loc, resType, fields));
@@ -788,7 +795,6 @@ private:
 };
 
 /// Sparse codegen rule for the empty tensor operator.
-/// TODO(springerm): remove when bufferization.alloc_tensor is gone
 class SparseTensorEmptyConverter : public OpConversionPattern<tensor::EmptyOp> {
 public:
   using OpConversionPattern::OpConversionPattern;
@@ -803,19 +809,24 @@ public:
     const auto resType = getSparseTensorType(op);
     if (!resType.hasEncoding())
       return failure();
+    Location loc = op.getLoc();
+
+    // Construct the dim/lvl sizes and the (unused) dim2lvl/lvl2dim buffers.
+    SmallVector<Value> dimSizesValues;
+    SmallVector<Value> lvlSizesValues;
+    Value dimSizesBuffer;
+    Value dim2lvlBuffer;
+    Value lvl2dimBuffer;
+    createDimSizes(rewriter, loc, resType, adaptor.getDynamicSizes(),
+                   dimSizesValues);
+    genMapBuffers(rewriter, loc, resType, dimSizesValues, dimSizesBuffer,
+                  lvlSizesValues, dim2lvlBuffer, lvl2dimBuffer);
 
     // Construct allocation for each field.
-    Location loc = op.getLoc();
     Value sizeHint; // none
-    const ValueRange dynSizes = adaptor.getDynamicSizes();
-    const size_t found = dynSizes.size();
-    const int64_t expected = resType.getNumDynamicDims();
-    if (found != static_cast<size_t>(expected))
-      return rewriter.notifyMatchFailure(op,
-                                         "Got wrong number of dynamic sizes");
     SmallVector<Value> fields;
-    createAllocFields(rewriter, loc, resType, dynSizes,
-                      enableBufferInitialization, fields, sizeHint);
+    createAllocFields(rewriter, loc, resType, enableBufferInitialization,
+                      sizeHint, lvlSizesValues, fields);
 
     // Replace operation with resulting memrefs.
     rewriter.replaceOp(op, genTuple(rewriter, loc, resType, fields));
@@ -1461,10 +1472,10 @@ struct SparseNewConverter : public OpConversionPattern<NewOp> {
     //   if (! %isSorted) sparse_tensor.sort_coo(%nse, %coordinates, %values)
     //   update storage specifier
     //   @delSparseTensorReader(%reader)
-    SmallVector<Value> dimShapesValues;
+    SmallVector<Value> dimSizesValues;
     Value dimSizesBuffer;
     Value reader = genReader(rewriter, loc, dstTp, adaptor.getOperands()[0],
-                             dimShapesValues, dimSizesBuffer);
+                             dimSizesValues, dimSizesBuffer);
 
     // Get the number of stored entries.
     const Type indexTp = rewriter.getIndexType();
@@ -1472,25 +1483,19 @@ struct SparseNewConverter : public OpConversionPattern<NewOp> {
                                {indexTp}, {reader}, EmitCInterface::Off)
                     .getResult(0);
 
-    // Construct allocation for each field.
-    SmallVector<Value> dynSizes;
-    if (dstTp.hasDynamicDimShape()) {
-      for (const auto &d : llvm::enumerate(dstTp.getDimShape()))
-        if (ShapedType::isDynamic(d.value()))
-          dynSizes.push_back(rewriter.create<memref::LoadOp>(
-              loc, dimSizesBuffer, constantIndex(rewriter, loc, d.index())));
-    }
-    SmallVector<Value> fields;
-    createAllocFields(rewriter, loc, dstTp, dynSizes, /*enableInit=*/false,
-                      fields, nse);
-
-    // Now construct the lvl sizes and the dim2lvl/lvl2dim buffers.
+    // Construct the lvl sizes and the dim2lvl/lvl2dim buffers.
     SmallVector<Value> lvlSizesValues;
     Value dim2lvlBuffer;
     Value lvl2dimBuffer;
-    genMapBuffers(rewriter, loc, dstTp, dimShapesValues, dimSizesBuffer,
+    genMapBuffers(rewriter, loc, dstTp, dimSizesValues, dimSizesBuffer,
                   lvlSizesValues, dim2lvlBuffer, lvl2dimBuffer);
 
+    // Construct allocation for each field.
+    Value sizeHint = nse;
+    SmallVector<Value> fields;
+    createAllocFields(rewriter, loc, dstTp, /*enableInit=*/false, sizeHint,
+                      lvlSizesValues, fields);
+
     // Read the COO tensor data.
     MutSparseTensorDescriptor desc(dstTp, fields);
     Value xs = desc.getAOSMemRef();
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
index cd6f689a04ac..07a6e6df3ca1 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
@@ -49,8 +49,8 @@ using namespace mlir::sparse_tensor;
 // (assuming that's the actual meaning behind the "idx"-vs-"ldx" convention).
 
 /// Determines if affine expression is invariant.
-static bool isInvariantAffine(AffineExpr a, ArrayRef<LoopId> loopStack,
-                              LoopId ldx, bool &isAtLoop) {
+static bool isInvariantAffine(AffineExpr a, unsigned loopDepth, LoopId ldx,
+                              bool &isAtLoop) {
   switch (a.getKind()) {
   case AffineExprKind::DimId: {
     const LoopId i = cast<AffineDimExpr>(a).getPosition();
@@ -59,19 +59,14 @@ static bool isInvariantAffine(AffineExpr a, ArrayRef<LoopId> loopStack,
       // Must be invariant if we are at the given loop.
       return true;
     }
-    bool isInvariant = false;
-    for (LoopId l : loopStack) {
-      isInvariant = (l == i);
-      if (isInvariant)
-        break;
-    }
-    return isInvariant;
+    // The DimExpr is invariant the loop has already been generated.
+    return i < loopDepth;
   }
   case AffineExprKind::Add:
   case AffineExprKind::Mul: {
     auto binOp = cast<AffineBinaryOpExpr>(a);
-    return isInvariantAffine(binOp.getLHS(), loopStack, ldx, isAtLoop) &&
-           isInvariantAffine(binOp.getRHS(), loopStack, ldx, isAtLoop);
+    return isInvariantAffine(binOp.getLHS(), loopDepth, ldx, isAtLoop) &&
+           isInvariantAffine(binOp.getRHS(), loopDepth, ldx, isAtLoop);
   }
   default: {
     assert(isa<AffineConstantExpr>(a));
@@ -80,21 +75,11 @@ static bool isInvariantAffine(AffineExpr a, ArrayRef<LoopId> loopStack,
   }
 }
 
-/// Determines if affine expression is invariant.
-static bool isInvariantAffine(CodegenEnv &env, AffineExpr a, LoopId ldx,
-                              bool &isAtLoop) {
-  return isInvariantAffine(a, env.getCurrentLoopStack(), ldx, isAtLoop);
-}
-
 /// Helper method to inspect affine expressions. Rejects cases where the
 /// same index is used more than once. Also rejects compound affine
 /// expressions in sparse dimensions.
-/// filterIdx stores the current filter loop idx should be used for the next
-/// compound affine sparse level, and it will be incremented by one when
-/// used.
 static bool findAffine(Merger &merger, TensorId tid, Level lvl, AffineExpr a,
-                       DimLevelType dlt, LoopId &filterLdx,
-                       bool setLvlFormat = true) {
+                       DimLevelType dlt, bool setLvlFormat = true) {
   switch (a.getKind()) {
   case AffineExprKind::DimId: {
     const LoopId idx = merger.makeLoopId(cast<AffineDimExpr>(a).getPosition());
@@ -108,22 +93,14 @@ static bool findAffine(Merger &merger, TensorId tid, Level lvl, AffineExpr a,
   case AffineExprKind::Add:
   case AffineExprKind::Mul:
   case AffineExprKind::Constant: {
-    if (!isDenseDLT(dlt) && setLvlFormat) {
-      assert(isUndefDLT(merger.getLvlType(tid, filterLdx)));
-      // Use a filter loop for sparse affine expression.
-      merger.setLevelAndType(tid, filterLdx, lvl, dlt);
-      ++filterLdx;
-    }
-
+    assert(isDenseDLT(dlt));
     if (auto binOp = dyn_cast<AffineBinaryOpExpr>(a)) {
       // We do not set dim level format for affine expression like d0 + d1 on
       // either loop index at d0 or d1.
       // We continue the recursion merely to check whether current affine is
       // admissible or not.
-      return findAffine(merger, tid, lvl, binOp.getLHS(), dlt, filterLdx,
-                        false) &&
-             findAffine(merger, tid, lvl, binOp.getRHS(), dlt, filterLdx,
-                        false);
+      return findAffine(merger, tid, lvl, binOp.getLHS(), dlt, false) &&
+             findAffine(merger, tid, lvl, binOp.getRHS(), dlt, false);
     }
     // Falls through when it is a constant Affine
     return true;
@@ -236,32 +213,13 @@ static unsigned getNumNonTrivialIdxExpOnSparseLvls(AffineMap map,
     return 0;
   const SparseTensorType stt(rtp);
 
-  // FIXME: There's some dim/lvl confusion here.  The previous version of
-  // the code asserted that there are `lvlRank`-many expressions, but then
-  // the `exprs[d]` expression assumes there are in fact `dimRank`-many
-  // expressions.  Even though `ArrayRef::operator[]` will check for OOB,
-  // the mismatch between the assertion and the usage belies that this code
-  // cannot support non-permutations.
-  //
-  // Elsewhere in this file the maps returned by
-  // `linalg::GenericOp::getMatchingIndexingMap` are inconsistent about
-  // whether they're expected to have `lvlRank`-many or `dimRank`-many
-  // expressions (cf., `genSubscript` vs `findSparseAnnotations`);
-  // so those are no help in determining which is actually intended.
-  //
-  // For now we work around this problem by asserting the two ranks agree.
-  const Dimension dimRank = stt.getDimRank();
   const Level lvlRank = stt.getLvlRank();
-  assert(dimRank == lvlRank && "Non-permutations not currently supported");
   const auto exprs = map.getResults();
-  assert(static_cast<Dimension>(exprs.size()) == dimRank &&
+  assert(static_cast<Dimension>(exprs.size()) == lvlRank &&
          "AffineMap does not have dimension-rank many results");
-  (void)dimRank;
   unsigned num = 0;
   for (Level l = 0; l < lvlRank; l++) {
-    // FIXME: `toOrigDim` is deprecated.
-    const Dimension d = toOrigDim(stt.getEncoding(), l);
-    if (!isa<AffineDimExpr>(exprs[d]) && !stt.isDenseLvl(l))
+    if (!isa<AffineDimExpr>(exprs[l]) && !stt.isDenseLvl(l))
       num++;
   }
   return num;
@@ -292,15 +250,10 @@ static bool hasNonTrivialAffineOnSparseOut(linalg::GenericOp op) {
 /// no annotations are found or inadmissible constructs occur.
 /// We currently support two different ways to handle non-trivial index
 /// expression on sparse tensors, and they accept different affine expressions.
-/// When using filter-loop-based approach, it accept (almost) arbitrary affine
-/// index expression on sparse tensor but it is much less efficient, and will be
-/// gradually removed from the codebase.
 /// When using dependent index reducton-based approach, it currently only
 /// supports affine addition index expression.
 static bool findSparseAnnotations(CodegenEnv &env, bool idxReducBased) {
   bool annotated = false;
-  // `filterLdx` may be mutated by `findAffine`.
-  LoopId filterLdx = env.merger().getStartingFilterLoopId();
   for (OpOperand &t : env.op()->getOpOperands()) {
     const TensorId tid = env.makeTensorId(t.getOperandNumber());
     const auto map = env.op().getMatchingIndexingMap(&t);
@@ -321,19 +274,17 @@ static bool findSparseAnnotations(CodegenEnv &env, bool idxReducBased) {
     // If then current tensor being inspected requires affine index, it need
     // to be sliced.
     for (Level l = 0; l < lvlRank; l++) {
-      // FIXME: `toOrigDim` is deprecated.
-      const AffineExpr a = map.getResult(toOrigDim(enc, l));
+      const AffineExpr a = map.getResult(l);
       const DimLevelType dlt = enc.getLvlType(l);
       if (idxReducBased && needIdxReduc) {
         if (!findDepIdxSet(env.merger(), tid, l, a, dlt))
           return false; // inadmissible affine expression
       } else {
-        if (!findAffine(env.merger(), tid, l, a, dlt, filterLdx))
+        if (!findAffine(env.merger(), tid, l, a, dlt))
           return false; // inadmissible affine expression
       }
     }
   }
-  assert(filterLdx == env.merger().getNumLoops());
   return annotated;
 }
 
@@ -351,17 +302,6 @@ static void genBuffers(CodegenEnv &env, OpBuilder &builder) {
       llvm::cast<linalg::LinalgOp>(op.getOperation())
           .createLoopRanges(builder, loc);
 
-  assert(loopRange.size() == env.merger().getStartingFilterLoopId());
-  SmallVector<Range, 4> sortedRange;
-  for (unsigned i = 0, e = env.topSortSize(); i < e; i++) {
-    LoopId ldx = env.topSortAt(i);
-    // FIXME: Gets rid of filter loops since we have a better algorithm to deal
-    // with affine index expression.
-    if (ldx < env.merger().getStartingFilterLoopId()) {
-      sortedRange.push_back(loopRange[ldx]);
-    }
-  }
-
   env.emitter().initializeLoopEmit(
       builder, loc,
       /// Generates buffer for the output tensor.
@@ -396,15 +336,9 @@ static void genBuffers(CodegenEnv &env, OpBuilder &builder) {
         }
         return init;
       },
-      [&sortedRange, &env](OpBuilder &b, Location loc, Level l) {
-        assert(l < env.topSortSize());
-        // FIXME: Remove filter loop since we have a better algorithm to
-        // deal with affine index expression.
-        if (l >= env.merger().getStartingFilterLoopId())
-          return Value();
-
-        return mlir::getValueOrCreateConstantIndexOp(b, loc,
-                                                     sortedRange[l].size);
+      [&loopRange](OpBuilder &b, Location loc, Level l) {
+        assert(l < loopRange.size());
+        return mlir::getValueOrCreateConstantIndexOp(b, loc, loopRange[l].size);
       });
 }
 
@@ -417,10 +351,7 @@ static Value genIndex(CodegenEnv &env, OpOperand *t) {
   const auto stt = getSparseTensorType(t->get());
   const Level lvlRank = stt.getLvlRank();
   assert(static_cast<Level>(map.getNumResults()) == lvlRank);
-  // FIXME: `toOrigDim` is deprecated.
-  // FIXME: above we asserted that there are `lvlRank` many results,
-  // but this is assuming there are in fact `dimRank` many results instead.
-  const AffineExpr a = map.getResult(toOrigDim(stt.getEncoding(), lvlRank - 1));
+  const AffineExpr a = map.getResult(lvlRank - 1);
   assert(a.getKind() == AffineExprKind::DimId);
   const LoopId idx = env.makeLoopId(cast<AffineDimExpr>(a).getPosition());
   return env.getLoopVar(idx);
@@ -750,19 +681,8 @@ static void genInvariants(CodegenEnv &env, OpBuilder &builder, ExprId exp,
     const Level lvlRank = stt.getLvlRank();
     assert(static_cast<Level>(map.getNumResults()) == lvlRank);
     for (Level l = 0; l < lvlRank; l++) {
-      // FIXME: `toOrigDim` is deprecated.
-      // FIXME: above we asserted that there are `lvlRank` many results,
-      // but this is assuming there are in fact `dimRank` many results instead.
-      const AffineExpr a = map.getResult(toOrigDim(stt.getEncoding(), l));
-      const auto sldx =
-          env.merger().getLoopId(env.makeTensorId(t.getOperandNumber()), l);
-      if (sldx && env.merger().isFilterLoop(*sldx)) {
-        if (!env.getLoopVar(*sldx))
-          // The filter loops has not been constructed.
-          return;
-        if (*sldx == ldx)
-          isAtLoop = true;
-      } else if (!isInvariantAffine(env, a, ldx, isAtLoop))
+      const AffineExpr a = map.getResult(l);
+      if (!isInvariantAffine(a, env.getLoopDepth(), ldx, isAtLoop))
         return; // still in play
     }
     // All exhausted at this level (isAtLoop denotes exactly at this LoopId).
@@ -890,29 +810,6 @@ static bool shouldTryParallize(CodegenEnv &env, LoopId ldx, bool isOuter,
   return isParallelFor(env, isOuter, isSparse);
 }
 
-/// Generates a "filter loop" on the given tid level to locate a coordinate that
-/// is of the same value as evaluated by the affine expression in its matching
-/// indexing map.
-static Operation *genFilterLoop(CodegenEnv &env, OpBuilder &builder, LoopId ldx,
-                                TensorLevel tidLvl) {
-  linalg::GenericOp op = env.op();
-  Location loc = op.getLoc();
-  Operation *loop = *env.genLoopBoundary([&](MutableArrayRef<Value> reduc) {
-    assert(env.merger().isFilterLoop(ldx));
-    const auto [tid, lvl] = env.unpackTensorLevel(tidLvl);
-    // tids/lvls must only have one value because filter loops only
-    // corresponding to the one and only sparse tensor level.
-    OpOperand *t = &op->getOpOperand(tid);
-    auto enc = getSparseTensorEncoding(t->get().getType());
-    // Retrieves the affine expression for the filter loop.
-    // FIXME: `toOrigDim` is deprecated.
-    AffineExpr a = op.getMatchingIndexingMap(t).getResult(toOrigDim(enc, lvl));
-    return env.emitter().enterFilterLoopOverTensorAtLvl(builder, loc, tid, lvl,
-                                                        a, reduc);
-  });
-  return loop;
-}
-
 /// Emit a loop to coiterate over the list of tensor levels. The generated loop
 /// can either be a for loop or while loop depending on whether there is at most
 /// one sparse level in the list.
@@ -934,14 +831,8 @@ static Operation *genCoIteration(CodegenEnv &env, OpBuilder &builder,
 /// singleton iteration or co-iteration over the given conjunction.
 static Operation *genLoop(CodegenEnv &env, OpBuilder &builder, LoopOrd at,
                           bool needsUniv, ArrayRef<TensorLevel> tidLvls) {
-  const LoopId ldx = env.topSortAt(at);
-  if (env.merger().isFilterLoop(ldx)) {
-    assert(tidLvls.size() == 1);
-    return genFilterLoop(env, builder, ldx, tidLvls.front());
-  }
-
-  bool tryParallel = shouldTryParallize(env, ldx, at == 0, tidLvls);
-  return genCoIteration(env, builder, ldx, tidLvls, tryParallel, needsUniv);
+  bool tryParallel = shouldTryParallize(env, at, at == 0, tidLvls);
+  return genCoIteration(env, builder, at, tidLvls, tryParallel, needsUniv);
 }
 
 /// Generates the induction structure for a while-loop.
@@ -1066,12 +957,12 @@ static void endIf(CodegenEnv &env, OpBuilder &builder, scf::IfOp ifOp,
 /// Starts a loop sequence at given level. Returns true if
 /// the universal loop index must be maintained at this level.
 static bool startLoopSeq(CodegenEnv &env, OpBuilder &builder, ExprId exp,
-                         LoopOrd at, LoopId idx, LoopId ldx, LatSetId lts) {
+                         LoopOrd idx, LoopId ldx, LatSetId lts) {
   assert(!env.getLoopVar(idx));
   // Emit invariants at this loop sequence level.
   genInvariants(env, builder, exp, ldx, /*atStart=*/true);
   // Emit access pattern expansion for sparse tensor output.
-  genExpand(env, builder, at, /*atStart=*/true);
+  genExpand(env, builder, idx, /*atStart=*/true);
   // Emit further intitialization at this loop sequence level.
   const LatPointId l0 = env.set(lts)[0];
   bool needsUniv = false;
@@ -1125,10 +1016,8 @@ static void genConstantDenseAddressFromLevel(CodegenEnv &env,
     const TensorId tid = env.makeTensorId(input->getOperandNumber());
     const Level lvlRank = enc.getLvlRank();
     assert(lvlExprs.size() == static_cast<size_t>(lvlRank));
-    // FIXME: there is dim/lvl confusion here
     for (Level l = startLvl; l < lvlRank; l++) {
-      // FIXME: `toOrigDim` is deprecated.
-      AffineExpr lvlExpr = lvlExprs[toOrigDim(enc, l)];
+      AffineExpr lvlExpr = lvlExprs[l];
       if (enc.isDenseLvl(l) && isa<AffineConstantExpr>(lvlExpr))
         env.emitter().genDenseAffineAddress(
             builder, loc, env.makeTensorLevel(tid, l), lvlExpr);
@@ -1216,8 +1105,7 @@ static bool translateBitsToTidLvlPairs(
           const Level lvlRank = stt.getLvlRank();
           assert(affines.size() == static_cast<size_t>(lvlRank));
           for (Level l = 0; l < lvlRank; l++) {
-            // FIXME: `toOrigDim` is deprecated.
-            AffineExpr exp = affines[toOrigDim(stt.getEncoding(), l)];
+            AffineExpr exp = affines[l];
             // Skip simple affine expression and non-dense levels (which
             // have their own filter loop).
             if (isa<AffineDimExpr>(exp) || !stt.isDenseLvl(l))
@@ -1226,7 +1114,8 @@ static bool translateBitsToTidLvlPairs(
             // Constant affine expression are handled in genLoop
             if (!isa<AffineConstantExpr>(exp)) {
               bool isAtLoop = false;
-              if (isInvariantAffine(env, exp, ldx, isAtLoop) && isAtLoop) {
+              if (isInvariantAffine(exp, env.getLoopDepth(), ldx, isAtLoop) &&
+                  isAtLoop) {
                 // If the compound affine is invariant and we are right at the
                 // level. We need to generate the address according to the
                 // affine expression. This is also the best place we can do it
@@ -1273,8 +1162,8 @@ static std::pair<Operation *, bool> startLoop(CodegenEnv &env,
   // becomes invariant and the address shall now be generated at the current
   // level.
   SmallVector<std::pair<TensorLevel, AffineExpr>> affineTidLvls;
-  bool isSingleCond = translateBitsToTidLvlPairs(env, li, env.topSortAt(at),
-                                                 tidLvls, affineTidLvls);
+  bool isSingleCond =
+      translateBitsToTidLvlPairs(env, li, at, tidLvls, affineTidLvls);
 
   // Emit the for/while-loop control.
   Operation *loop = genLoop(env, builder, at, needsUniv, tidLvls);
@@ -1324,13 +1213,13 @@ static bool endLoop(CodegenEnv &env, RewriterBase &rewriter, Operation *loop,
 
 /// Ends a loop sequence at given level.
 static void endLoopSeq(CodegenEnv &env, OpBuilder &builder, unsigned exp,
-                       unsigned at, unsigned idx, unsigned ldx) {
+                       unsigned idx, unsigned ldx) {
   assert(!env.getLoopVar(idx));
   env.emitter().exitCurrentLoopSeq(builder, env.op().getLoc());
   // Unmark bookkeeping of invariants and loop index.
   genInvariants(env, builder, exp, ldx, /*atStart=*/false);
   // Finalize access pattern expansion for sparse tensor output.
-  genExpand(env, builder, at, /*atStart=*/false);
+  genExpand(env, builder, idx, /*atStart=*/false);
 }
 
 /// Recursively generates code while computing iteration lattices in order
@@ -1339,22 +1228,19 @@ static void endLoopSeq(CodegenEnv &env, OpBuilder &builder, unsigned exp,
 static void genStmt(CodegenEnv &env, RewriterBase &rewriter, ExprId exp,
                     LoopOrd at) {
   // At each leaf, assign remaining tensor (sub)expression to output tensor.
-  if (at == env.topSortSize()) {
-    const LoopId ldx = env.topSortAt(at - 1);
-    Value rhs = genExp(env, rewriter, exp, ldx);
+  if (at == env.getLoopNum()) {
+    Value rhs = genExp(env, rewriter, exp, at - 1);
     genTensorStore(env, rewriter, exp, rhs);
     return;
   }
 
   // Construct iteration lattices for current loop index, with L0 at top.
-  const LoopId idx = env.topSortAt(at);
-  const LoopId ldx = at == 0 ? ::mlir::sparse_tensor::detail::kInvalidId
-                             : env.topSortAt(at - 1);
+  const LoopId ldx = at == 0 ? sparse_tensor::detail::kInvalidId : at - 1;
   const LatSetId lts =
-      env.merger().optimizeSet(env.merger().buildLattices(exp, idx));
+      env.merger().optimizeSet(env.merger().buildLattices(exp, at));
 
   // Start a loop sequence.
-  bool needsUniv = startLoopSeq(env, rewriter, exp, at, idx, ldx, lts);
+  bool needsUniv = startLoopSeq(env, rewriter, exp, at, ldx, lts);
 
   // Emit a loop for every lattice point L0 >= Li in this loop sequence.
   //
@@ -1382,7 +1268,7 @@ static void genStmt(CodegenEnv &env, RewriterBase &rewriter, ExprId exp,
       if (li == lj || env.merger().latGT(li, lj)) {
         // Recurse into body of each branch.
         if (!isSingleCond) {
-          scf::IfOp ifOp = genIf(env, rewriter, idx, lj);
+          scf::IfOp ifOp = genIf(env, rewriter, at, lj);
           genStmt(env, rewriter, ej, at + 1);
           endIf(env, rewriter, ifOp, redInput, cntInput, insInput, validIns);
         } else {
@@ -1392,11 +1278,11 @@ static void genStmt(CodegenEnv &env, RewriterBase &rewriter, ExprId exp,
     }
 
     // End a loop.
-    needsUniv = endLoop(env, rewriter, loop, idx, li, needsUniv, isSingleCond);
+    needsUniv = endLoop(env, rewriter, loop, at, li, needsUniv, isSingleCond);
   }
 
   // End a loop sequence.
-  endLoopSeq(env, rewriter, exp, at, idx, ldx);
+  endLoopSeq(env, rewriter, exp, at, ldx);
 }
 
 /// Converts the result computed by the sparse kernel into the required form.
@@ -1450,14 +1336,13 @@ public:
           op, "Loops not yet scheduled, try run --sparse-reinterpret-map "
               "before sparsification.");
     }
+    // Must have been demapped as well if the generic op is sorted.
+    assert(!hasAnyNonIdentityOperandsOrResults(op));
 
     // Sets up a code generation environment.
     const unsigned numTensors = op->getNumOperands();
     const unsigned numLoops = op.getNumLoops();
-    const unsigned numFilterLoops = getNumNonTrivialIdxExpOnSparseLvls(op);
-    // TODO: we should probably always use slice-based codegen whenever
-    // possible, we can even intermix slice-based and filter-loop based codegen.
-    bool idxReducBased = numFilterLoops != 0;
+    bool needIdxRed = getNumNonTrivialIdxExpOnSparseLvls(op) != 0;
     // If we have indexing map like (d0) -> (0, d0), there might be more
     // levels then loops because of the constant index, that means we can not
     // use numLoops as the upper bound for ranks of all tensors.
@@ -1471,14 +1356,10 @@ public:
       }
     }
 
-    // A slice based algorithm for affine indices does not need filter loops.
-    CodegenEnv env(op, options, numTensors, numLoops,
-                   /*numFilterLoops=*/idxReducBased ? 0 : numFilterLoops,
-                   maxLvlRank);
-
+    CodegenEnv env(op, options, numTensors, numLoops, maxLvlRank);
     // Detects sparse annotations and translates the per-level sparsity
     // information for all tensors to loop indices in the kernel.
-    if (!findSparseAnnotations(env, idxReducBased))
+    if (!findSparseAnnotations(env, needIdxRed))
       return failure();
 
     // Only standard reduction operations (add, sub, or, xor) that can be
diff --git a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp
index 033b61fc872a..12fc51b4c57d 100644
--- a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp
@@ -220,12 +220,12 @@ TensorExp::TensorExp(TensorExp::Kind k, unsigned x, ExprId y, Value v,
   llvm_unreachable("unexpected kind");
 }
 
-Merger::Merger(unsigned numInputOutputTensors, unsigned numNativeLoops,
-               unsigned numFilterLoops, unsigned maxLvlRank)
+Merger::Merger(unsigned numInputOutputTensors, unsigned numLoops,
+               unsigned maxLvlRank)
     : outTensor(numInputOutputTensors - 1),
       syntheticTensor(numInputOutputTensors),
-      numTensors(numInputOutputTensors + 1), numNativeLoops(numNativeLoops),
-      numLoops(numNativeLoops + numFilterLoops), hasSparseOut(false),
+      numTensors(numInputOutputTensors + 1), numLoops(numLoops),
+      hasSparseOut(false),
       lvlTypes(numTensors,
                std::vector<DimLevelType>(numLoops, DimLevelType::Undef)),
       loopToLvl(numTensors,
diff --git a/mlir/lib/Target/Cpp/TranslateToCpp.cpp b/mlir/lib/Target/Cpp/TranslateToCpp.cpp
index 291624c54803..1b4ec9eae936 100644
--- a/mlir/lib/Target/Cpp/TranslateToCpp.cpp
+++ b/mlir/lib/Target/Cpp/TranslateToCpp.cpp
@@ -412,13 +412,14 @@ static LogicalResult printOperation(CppEmitter &emitter, func::CallOp callOp) {
   return success();
 }
 
-static LogicalResult printOperation(CppEmitter &emitter, emitc::CallOp callOp) {
+static LogicalResult printOperation(CppEmitter &emitter,
+                                    emitc::CallOpaqueOp callOpaqueOp) {
   raw_ostream &os = emitter.ostream();
-  Operation &op = *callOp.getOperation();
+  Operation &op = *callOpaqueOp.getOperation();
 
   if (failed(emitter.emitAssignPrefix(op)))
     return failure();
-  os << callOp.getCallee();
+  os << callOpaqueOp.getCallee();
 
   auto emitArgs = [&](Attribute attr) -> LogicalResult {
     if (auto t = dyn_cast<IntegerAttr>(attr)) {
@@ -441,10 +442,10 @@ static LogicalResult printOperation(CppEmitter &emitter, emitc::CallOp callOp) {
     return success();
   };
 
-  if (callOp.getTemplateArgs()) {
+  if (callOpaqueOp.getTemplateArgs()) {
     os << "<";
-    if (failed(
-            interleaveCommaWithError(*callOp.getTemplateArgs(), os, emitArgs)))
+    if (failed(interleaveCommaWithError(*callOpaqueOp.getTemplateArgs(), os,
+                                        emitArgs)))
       return failure();
     os << ">";
   }
@@ -452,8 +453,8 @@ static LogicalResult printOperation(CppEmitter &emitter, emitc::CallOp callOp) {
   os << "(";
 
   LogicalResult emittedArgs =
-      callOp.getArgs()
-          ? interleaveCommaWithError(*callOp.getArgs(), os, emitArgs)
+      callOpaqueOp.getArgs()
+          ? interleaveCommaWithError(*callOpaqueOp.getArgs(), os, emitArgs)
           : emitter.emitOperands(op);
   if (failed(emittedArgs))
     return failure();
@@ -949,10 +950,11 @@ LogicalResult CppEmitter::emitOperation(Operation &op, bool trailingSemicolon) {
           .Case<cf::BranchOp, cf::CondBranchOp>(
               [&](auto op) { return printOperation(*this, op); })
           // EmitC ops.
-          .Case<emitc::AddOp, emitc::ApplyOp, emitc::AssignOp, emitc::CallOp,
-                emitc::CastOp, emitc::CmpOp, emitc::ConstantOp, emitc::DivOp,
-                emitc::ForOp, emitc::IfOp, emitc::IncludeOp, emitc::MulOp,
-                emitc::RemOp, emitc::SubOp, emitc::VariableOp>(
+          .Case<emitc::AddOp, emitc::ApplyOp, emitc::AssignOp,
+                emitc::CallOpaqueOp, emitc::CastOp, emitc::CmpOp,
+                emitc::ConstantOp, emitc::DivOp, emitc::ForOp, emitc::IfOp,
+                emitc::IncludeOp, emitc::MulOp, emitc::RemOp, emitc::SubOp,
+                emitc::VariableOp>(
               [&](auto op) { return printOperation(*this, op); })
           // Func ops.
           .Case<func::CallOp, func::ConstantOp, func::FuncOp, func::ReturnOp>(
diff --git a/mlir/test/Conversion/SCFToEmitC/if.mlir b/mlir/test/Conversion/SCFToEmitC/if.mlir
index e34fd6a5efa4..afc9abc761eb 100644
--- a/mlir/test/Conversion/SCFToEmitC/if.mlir
+++ b/mlir/test/Conversion/SCFToEmitC/if.mlir
@@ -2,7 +2,7 @@
 
 func.func @test_if(%arg0: i1, %arg1: f32) {
   scf.if %arg0 {
-     %0 = emitc.call "func_const"(%arg1) : (f32) -> i32
+     %0 = emitc.call_opaque "func_const"(%arg1) : (f32) -> i32
   }
   return
 }
@@ -10,7 +10,7 @@ func.func @test_if(%arg0: i1, %arg1: f32) {
 // CHECK-SAME:                     %[[VAL_0:.*]]: i1,
 // CHECK-SAME:                     %[[VAL_1:.*]]: f32) {
 // CHECK-NEXT:    emitc.if %[[VAL_0]] {
-// CHECK-NEXT:      %[[VAL_2:.*]] = emitc.call "func_const"(%[[VAL_1]]) : (f32) -> i32
+// CHECK-NEXT:      %[[VAL_2:.*]] = emitc.call_opaque "func_const"(%[[VAL_1]]) : (f32) -> i32
 // CHECK-NEXT:    }
 // CHECK-NEXT:    return
 // CHECK-NEXT:  }
@@ -18,9 +18,9 @@ func.func @test_if(%arg0: i1, %arg1: f32) {
 
 func.func @test_if_else(%arg0: i1, %arg1: f32) {
   scf.if %arg0 {
-    %0 = emitc.call "func_true"(%arg1) : (f32) -> i32
+    %0 = emitc.call_opaque "func_true"(%arg1) : (f32) -> i32
   } else {
-    %0 = emitc.call "func_false"(%arg1) : (f32) -> i32
+    %0 = emitc.call_opaque "func_false"(%arg1) : (f32) -> i32
   }
   return
 }
@@ -28,9 +28,9 @@ func.func @test_if_else(%arg0: i1, %arg1: f32) {
 // CHECK-SAME:                          %[[VAL_0:.*]]: i1,
 // CHECK-SAME:                          %[[VAL_1:.*]]: f32) {
 // CHECK-NEXT:    emitc.if %[[VAL_0]] {
-// CHECK-NEXT:      %[[VAL_2:.*]] = emitc.call "func_true"(%[[VAL_1]]) : (f32) -> i32
+// CHECK-NEXT:      %[[VAL_2:.*]] = emitc.call_opaque "func_true"(%[[VAL_1]]) : (f32) -> i32
 // CHECK-NEXT:    } else {
-// CHECK-NEXT:      %[[VAL_3:.*]] = emitc.call "func_false"(%[[VAL_1]]) : (f32) -> i32
+// CHECK-NEXT:      %[[VAL_3:.*]] = emitc.call_opaque "func_false"(%[[VAL_1]]) : (f32) -> i32
 // CHECK-NEXT:    }
 // CHECK-NEXT:    return
 // CHECK-NEXT:  }
@@ -39,12 +39,12 @@ func.func @test_if_else(%arg0: i1, %arg1: f32) {
 func.func @test_if_yield(%arg0: i1, %arg1: f32) {
   %0 = arith.constant 0 : i8
   %x, %y = scf.if %arg0 -> (i32, f64) {
-    %1 = emitc.call "func_true_1"(%arg1) : (f32) -> i32
-    %2 = emitc.call "func_true_2"(%arg1) : (f32) -> f64
+    %1 = emitc.call_opaque "func_true_1"(%arg1) : (f32) -> i32
+    %2 = emitc.call_opaque "func_true_2"(%arg1) : (f32) -> f64
     scf.yield %1, %2 : i32, f64
   } else {
-    %1 = emitc.call "func_false_1"(%arg1) : (f32) -> i32
-    %2 = emitc.call "func_false_2"(%arg1) : (f32) -> f64
+    %1 = emitc.call_opaque "func_false_1"(%arg1) : (f32) -> i32
+    %2 = emitc.call_opaque "func_false_2"(%arg1) : (f32) -> f64
     scf.yield %1, %2 : i32, f64
   }
   return
@@ -56,13 +56,13 @@ func.func @test_if_yield(%arg0: i1, %arg1: f32) {
 // CHECK-NEXT:    %[[VAL_3:.*]] = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> i32
 // CHECK-NEXT:    %[[VAL_4:.*]] = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> f64
 // CHECK-NEXT:    emitc.if %[[VAL_0]] {
-// CHECK-NEXT:      %[[VAL_5:.*]] = emitc.call "func_true_1"(%[[VAL_1]]) : (f32) -> i32
-// CHECK-NEXT:      %[[VAL_6:.*]] = emitc.call "func_true_2"(%[[VAL_1]]) : (f32) -> f64
+// CHECK-NEXT:      %[[VAL_5:.*]] = emitc.call_opaque "func_true_1"(%[[VAL_1]]) : (f32) -> i32
+// CHECK-NEXT:      %[[VAL_6:.*]] = emitc.call_opaque "func_true_2"(%[[VAL_1]]) : (f32) -> f64
 // CHECK-NEXT:      emitc.assign %[[VAL_5]] : i32 to %[[VAL_3]] : i32
 // CHECK-NEXT:      emitc.assign %[[VAL_6]] : f64 to %[[VAL_4]] : f64
 // CHECK-NEXT:    } else {
-// CHECK-NEXT:      %[[VAL_7:.*]] = emitc.call "func_false_1"(%[[VAL_1]]) : (f32) -> i32
-// CHECK-NEXT:      %[[VAL_8:.*]] = emitc.call "func_false_2"(%[[VAL_1]]) : (f32) -> f64
+// CHECK-NEXT:      %[[VAL_7:.*]] = emitc.call_opaque "func_false_1"(%[[VAL_1]]) : (f32) -> i32
+// CHECK-NEXT:      %[[VAL_8:.*]] = emitc.call_opaque "func_false_2"(%[[VAL_1]]) : (f32) -> f64
 // CHECK-NEXT:      emitc.assign %[[VAL_7]] : i32 to %[[VAL_3]] : i32
 // CHECK-NEXT:      emitc.assign %[[VAL_8]] : f64 to %[[VAL_4]] : f64
 // CHECK-NEXT:    }
diff --git a/mlir/test/Conversion/SPIRVToLLVM/misc-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/misc-ops-to-llvm.mlir
index 13bde6e6fc56..9fe1e532dfc7 100644
--- a/mlir/test/Conversion/SPIRVToLLVM/misc-ops-to-llvm.mlir
+++ b/mlir/test/Conversion/SPIRVToLLVM/misc-ops-to-llvm.mlir
@@ -65,7 +65,7 @@ spirv.func @select_vector(%arg0: vector<2xi1>, %arg1: vector<2xi32>) "None" {
 spirv.func @vector_shuffle_same_size(%vector1: vector<2xf32>, %vector2: vector<2xf32>) -> vector<3xf32> "None" {
   //      CHECK: %[[res:.*]] = llvm.shufflevector {{.*}} [0, 2, -1] : vector<2xf32>
   // CHECK-NEXT: return %[[res]] : vector<3xf32>
-  %0 = spirv.VectorShuffle [0: i32, 2: i32, 0xffffffff: i32] %vector1: vector<2xf32>, %vector2: vector<2xf32> -> vector<3xf32>
+  %0 = spirv.VectorShuffle [0: i32, 2: i32, 0xffffffff: i32] %vector1, %vector2 : vector<2xf32>, vector<2xf32> -> vector<3xf32>
   spirv.ReturnValue %0: vector<3xf32>
 }
 
@@ -80,7 +80,7 @@ spirv.func @vector_shuffle_different_size(%vector1: vector<3xf32>, %vector2: vec
   // CHECK-NEXT: %[[EXT1:.*]] = llvm.extractelement {{.*}}[%[[C1_1]] : i32] : vector<2xf32>
   // CHECK-NEXT: %[[RES:.*]] = llvm.insertelement %[[EXT1]], %[[INSERT0]][%[[C1_0]] : i32] : vector<3xf32>
   // CHECK-NEXT: llvm.return %[[RES]] : vector<3xf32>
-  %0 = spirv.VectorShuffle [0: i32, 4: i32, 0xffffffff: i32] %vector1: vector<3xf32>, %vector2: vector<2xf32> -> vector<3xf32>
+  %0 = spirv.VectorShuffle [0: i32, 4: i32, 0xffffffff: i32] %vector1, %vector2 : vector<3xf32>, vector<2xf32> -> vector<3xf32>
   spirv.ReturnValue %0: vector<3xf32>
 }
 
diff --git a/mlir/test/Conversion/VectorToSPIRV/vector-to-spirv.mlir b/mlir/test/Conversion/VectorToSPIRV/vector-to-spirv.mlir
index eba763eab9c2..6265a057a1b8 100644
--- a/mlir/test/Conversion/VectorToSPIRV/vector-to-spirv.mlir
+++ b/mlir/test/Conversion/VectorToSPIRV/vector-to-spirv.mlir
@@ -266,7 +266,7 @@ func.func @extract_element_0d_vector(%arg0 : f32) -> f32 {
 
 // CHECK-LABEL: @extract_strided_slice
 //  CHECK-SAME: %[[ARG:.+]]: vector<4xf32>
-//       CHECK:   spirv.VectorShuffle [1 : i32, 2 : i32] %[[ARG]] : vector<4xf32>, %[[ARG]] : vector<4xf32> -> vector<2xf32>
+//       CHECK:   spirv.VectorShuffle [1 : i32, 2 : i32] %[[ARG]], %[[ARG]] : vector<4xf32>, vector<4xf32> -> vector<2xf32>
 //       CHECK:   spirv.CompositeExtract %[[ARG]][1 : i32] : vector<4xf32>
 func.func @extract_strided_slice(%arg0: vector<4xf32>) -> (vector<2xf32>, vector<1xf32>) {
   %0 = vector.extract_strided_slice %arg0 {offsets = [1], sizes = [2], strides = [1]} : vector<4xf32> to vector<2xf32>
@@ -339,7 +339,7 @@ func.func @insert_element_0d_vector(%scalar: f32, %vector : vector<f32>) -> vect
 
 // CHECK-LABEL: @insert_strided_slice
 //  CHECK-SAME: %[[PART:.+]]: vector<2xf32>, %[[ALL:.+]]: vector<4xf32>
-//       CHECK:   spirv.VectorShuffle [0 : i32, 4 : i32, 5 : i32, 3 : i32] %[[ALL]] : vector<4xf32>, %[[PART]] : vector<2xf32> -> vector<4xf32>
+//       CHECK:   spirv.VectorShuffle [0 : i32, 4 : i32, 5 : i32, 3 : i32] %[[ALL]], %[[PART]] : vector<4xf32>, vector<2xf32> -> vector<4xf32>
 func.func @insert_strided_slice(%arg0: vector<2xf32>, %arg1: vector<4xf32>) -> vector<4xf32> {
   %0 = vector.insert_strided_slice %arg0, %arg1 {offsets = [1], strides = [1]} : vector<2xf32> into vector<4xf32>
   return %0 : vector<4xf32>
@@ -425,7 +425,7 @@ func.func @shuffle_index_vector(%v0 : vector<1xindex>, %v1: vector<1xindex>) ->
 
 // CHECK-LABEL:  func @shuffle
 //  CHECK-SAME:  %[[V0:.+]]: vector<3xf32>, %[[V1:.+]]: vector<3xf32>
-//       CHECK:    spirv.VectorShuffle [3 : i32, 2 : i32, 5 : i32, 1 : i32] %[[V0]] : vector<3xf32>, %[[V1]] : vector<3xf32> -> vector<4xf32>
+//       CHECK:    spirv.VectorShuffle [3 : i32, 2 : i32, 5 : i32, 1 : i32] %[[V0]], %[[V1]] : vector<3xf32>, vector<3xf32> -> vector<4xf32>
 func.func @shuffle(%v0 : vector<3xf32>, %v1: vector<3xf32>) -> vector<4xf32> {
   %shuffle = vector.shuffle %v0, %v1 [3, 2, 5, 1] : vector<3xf32>, vector<3xf32>
   return %shuffle : vector<4xf32>
diff --git a/mlir/test/Dialect/EmitC/attrs.mlir b/mlir/test/Dialect/EmitC/attrs.mlir
index 8bf196297176..11251b88ff0c 100644
--- a/mlir/test/Dialect/EmitC/attrs.mlir
+++ b/mlir/test/Dialect/EmitC/attrs.mlir
@@ -5,8 +5,8 @@
 // CHECK-LABEL: func @opaque_attrs() {
 func.func @opaque_attrs() {
   // CHECK-NEXT: #emitc.opaque<"attr">
-  emitc.call "f"() {args = [#emitc.opaque<"attr">]} : () -> ()
+  emitc.call_opaque "f"() {args = [#emitc.opaque<"attr">]} : () -> ()
   // CHECK-NEXT: #emitc.opaque<"\22quoted_attr\22">
-  emitc.call "f"() {args = [#emitc.opaque<"\"quoted_attr\"">]} : () -> ()
+  emitc.call_opaque "f"() {args = [#emitc.opaque<"\"quoted_attr\"">]} : () -> ()
   return
 }
diff --git a/mlir/test/Dialect/EmitC/invalid_ops.mlir b/mlir/test/Dialect/EmitC/invalid_ops.mlir
index 53d88adf4305..49efb962dfa2 100644
--- a/mlir/test/Dialect/EmitC/invalid_ops.mlir
+++ b/mlir/test/Dialect/EmitC/invalid_ops.mlir
@@ -25,48 +25,48 @@ func.func @empty_constant() {
 // -----
 
 func.func @index_args_out_of_range_1() {
-    // expected-error @+1 {{'emitc.call' op index argument is out of range}}
-    emitc.call "test" () {args = [0 : index]} : () -> ()
+    // expected-error @+1 {{'emitc.call_opaque' op index argument is out of range}}
+    emitc.call_opaque "test" () {args = [0 : index]} : () -> ()
     return
 }
 
 // -----
 
 func.func @index_args_out_of_range_2(%arg : i32) {
-    // expected-error @+1 {{'emitc.call' op index argument is out of range}}
-    emitc.call "test" (%arg, %arg) {args = [2 : index]} : (i32, i32) -> ()
+    // expected-error @+1 {{'emitc.call_opaque' op index argument is out of range}}
+    emitc.call_opaque "test" (%arg, %arg) {args = [2 : index]} : (i32, i32) -> ()
     return
 }
 
 // -----
 
 func.func @empty_callee() {
-    // expected-error @+1 {{'emitc.call' op callee must not be empty}}
-    emitc.call "" () : () -> ()
+    // expected-error @+1 {{'emitc.call_opaque' op callee must not be empty}}
+    emitc.call_opaque "" () : () -> ()
     return
 }
 
 // -----
 
 func.func @nonetype_arg(%arg : i32) {
-    // expected-error @+1 {{'emitc.call' op array argument has no type}}
-    emitc.call "nonetype_arg"(%arg) {args = [0 : index, [0, 1, 2]]} : (i32) -> i32
+    // expected-error @+1 {{'emitc.call_opaque' op array argument has no type}}
+    emitc.call_opaque "nonetype_arg"(%arg) {args = [0 : index, [0, 1, 2]]} : (i32) -> i32
     return
 }
 
 // -----
 
 func.func @array_template_arg(%arg : i32) {
-    // expected-error @+1 {{'emitc.call' op template argument has invalid type}}
-    emitc.call "nonetype_template_arg"(%arg) {template_args = [[0, 1, 2]]} : (i32) -> i32
+    // expected-error @+1 {{'emitc.call_opaque' op template argument has invalid type}}
+    emitc.call_opaque "nonetype_template_arg"(%arg) {template_args = [[0, 1, 2]]} : (i32) -> i32
     return
 }
 
 // -----
 
 func.func @dense_template_argument(%arg : i32) {
-    // expected-error @+1 {{'emitc.call' op template argument has invalid type}}
-    emitc.call "dense_template_argument"(%arg) {template_args = [dense<[1.0, 1.0]> : tensor<2xf32>]} : (i32) -> i32
+    // expected-error @+1 {{'emitc.call_opaque' op template argument has invalid type}}
+    emitc.call_opaque "dense_template_argument"(%arg) {template_args = [dense<[1.0, 1.0]> : tensor<2xf32>]} : (i32) -> i32
     return
 }
 
diff --git a/mlir/test/Dialect/EmitC/ops.mlir b/mlir/test/Dialect/EmitC/ops.mlir
index 6c8398680980..b3a24c26b96c 100644
--- a/mlir/test/Dialect/EmitC/ops.mlir
+++ b/mlir/test/Dialect/EmitC/ops.mlir
@@ -8,8 +8,8 @@ emitc.include "test.h"
 
 // CHECK-LABEL: func @f(%{{.*}}: i32, %{{.*}}: !emitc.opaque<"int32_t">) {
 func.func @f(%arg0: i32, %f: !emitc.opaque<"int32_t">) {
-  %1 = "emitc.call"() {callee = "blah"} : () -> i64
-  emitc.call "foo" (%1) {args = [
+  %1 = "emitc.call_opaque"() {callee = "blah"} : () -> i64
+  emitc.call_opaque "foo" (%1) {args = [
     0 : index, dense<[0, 1]> : tensor<2xi32>, 0 : index
   ]} : (i64) -> ()
   return
@@ -100,14 +100,14 @@ func.func @cmp(%arg0 : i32, %arg1 : f32, %arg2 : i64, %arg3 : f64, %arg4 : !emit
 
 func.func @test_if(%arg0: i1, %arg1: f32) {
   emitc.if %arg0 {
-     %0 = emitc.call "func_const"(%arg1) : (f32) -> i32
+     %0 = emitc.call_opaque "func_const"(%arg1) : (f32) -> i32
   }
   return
 }
 
 func.func @test_if_explicit_yield(%arg0: i1, %arg1: f32) {
   emitc.if %arg0 {
-     %0 = emitc.call "func_const"(%arg1) : (f32) -> i32
+     %0 = emitc.call_opaque "func_const"(%arg1) : (f32) -> i32
      emitc.yield
   }
   return
@@ -115,9 +115,9 @@ func.func @test_if_explicit_yield(%arg0: i1, %arg1: f32) {
 
 func.func @test_if_else(%arg0: i1, %arg1: f32) {
   emitc.if %arg0 {
-    %0 = emitc.call "func_true"(%arg1) : (f32) -> i32
+    %0 = emitc.call_opaque "func_true"(%arg1) : (f32) -> i32
   } else {
-    %0 = emitc.call "func_false"(%arg1) : (f32) -> i32
+    %0 = emitc.call_opaque "func_false"(%arg1) : (f32) -> i32
   }
   return
 }
@@ -130,14 +130,14 @@ func.func @test_assign(%arg1: f32) {
 
 func.func @test_for(%arg0 : index, %arg1 : index, %arg2 : index) {
   emitc.for %i0 = %arg0 to %arg1 step %arg2 {
-    %0 = emitc.call "func_const"(%i0) : (index) -> i32
+    %0 = emitc.call_opaque "func_const"(%i0) : (index) -> i32
   }
   return
 }
 
 func.func @test_for_explicit_yield(%arg0 : index, %arg1 : index, %arg2 : index) {
   emitc.for %i0 = %arg0 to %arg1 step %arg2 {
-    %0 = emitc.call "func_const"(%i0) : (index) -> i32
+    %0 = emitc.call_opaque "func_const"(%i0) : (index) -> i32
     emitc.yield
   }
   return
@@ -145,7 +145,7 @@ func.func @test_for_explicit_yield(%arg0 : index, %arg1 : index, %arg2 : index)
 
 func.func @test_for_not_index_induction(%arg0 : i16, %arg1 : i16, %arg2 : i16) {
   emitc.for %i0 = %arg0 to %arg1 step %arg2 : i16 {
-    %0 = emitc.call "func_const"(%i0) : (i16) -> i32
+    %0 = emitc.call_opaque "func_const"(%i0) : (i16) -> i32
   }
   return
 }
diff --git a/mlir/test/Dialect/EmitC/types.mlir b/mlir/test/Dialect/EmitC/types.mlir
index 3d68629a96e1..26d6f43a5824 100644
--- a/mlir/test/Dialect/EmitC/types.mlir
+++ b/mlir/test/Dialect/EmitC/types.mlir
@@ -5,17 +5,17 @@
 // CHECK-LABEL: func @opaque_types() {
 func.func @opaque_types() {
   // CHECK-NEXT: !emitc.opaque<"int">
-  emitc.call "f"() {template_args = [!emitc<opaque<"int">>]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc<opaque<"int">>]} : () -> ()
   // CHECK-NEXT: !emitc.opaque<"byte">
-  emitc.call "f"() {template_args = [!emitc<opaque<"byte">>]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc<opaque<"byte">>]} : () -> ()
   // CHECK-NEXT: !emitc.opaque<"unsigned">
-  emitc.call "f"() {template_args = [!emitc<opaque<"unsigned">>]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc<opaque<"unsigned">>]} : () -> ()
   // CHECK-NEXT: !emitc.opaque<"status_t">
-  emitc.call "f"() {template_args = [!emitc<opaque<"status_t">>]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc<opaque<"status_t">>]} : () -> ()
   // CHECK-NEXT: !emitc.opaque<"std::vector<std::string>">
-  emitc.call "f"() {template_args = [!emitc.opaque<"std::vector<std::string>">]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc.opaque<"std::vector<std::string>">]} : () -> ()
   // CHECK-NEXT: !emitc.opaque<"SmallVector<int*, 4>">
-  emitc.call "f"() {template_args = [!emitc.opaque<"SmallVector<int*, 4>">]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc.opaque<"SmallVector<int*, 4>">]} : () -> ()
 
   return
 }
@@ -23,19 +23,19 @@ func.func @opaque_types() {
 // CHECK-LABEL: func @pointer_types() {
 func.func @pointer_types() {
   // CHECK-NEXT: !emitc.ptr<i32>
-  emitc.call "f"() {template_args = [!emitc.ptr<i32>]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc.ptr<i32>]} : () -> ()
   // CHECK-NEXT: !emitc.ptr<i64>
-  emitc.call "f"() {template_args = [!emitc.ptr<i64>]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc.ptr<i64>]} : () -> ()
   // CHECK-NEXT: !emitc.ptr<f32>
-  emitc.call "f"() {template_args = [!emitc.ptr<f32>]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc.ptr<f32>]} : () -> ()
   // CHECK-NEXT: !emitc.ptr<f64>
-  emitc.call "f"() {template_args = [!emitc.ptr<f64>]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc.ptr<f64>]} : () -> ()
   // CHECK-NEXT: !emitc.ptr<i32>
-  %0 = emitc.call "f"() : () -> (!emitc.ptr<i32>)
+  %0 = emitc.call_opaque "f"() : () -> (!emitc.ptr<i32>)
   // CHECK-NEXT: (!emitc.ptr<i32>) -> !emitc.ptr<!emitc.ptr<i32>>
-  %1 = emitc.call "f"(%0) : (!emitc.ptr<i32>) -> (!emitc.ptr<!emitc.ptr<i32>>)
+  %1 = emitc.call_opaque "f"(%0) : (!emitc.ptr<i32>) -> (!emitc.ptr<!emitc.ptr<i32>>)
   // CHECK-NEXT: !emitc.ptr<!emitc.opaque<"int">>
-  emitc.call "f"() {template_args = [!emitc.ptr<!emitc.opaque<"int">>]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc.ptr<!emitc.opaque<"int">>]} : () -> ()
 
   return
 }
diff --git a/mlir/test/Dialect/SPIRV/IR/composite-ops.mlir b/mlir/test/Dialect/SPIRV/IR/composite-ops.mlir
index 2891513961d5..b10677f0f5f9 100644
--- a/mlir/test/Dialect/SPIRV/IR/composite-ops.mlir
+++ b/mlir/test/Dialect/SPIRV/IR/composite-ops.mlir
@@ -338,8 +338,8 @@ func.func @vector_dynamic_insert(%val: f32, %vec: vector<4xf32>, %id : i32) -> v
 //===----------------------------------------------------------------------===//
 
 func.func @vector_shuffle(%vector1: vector<4xf32>, %vector2: vector<2xf32>) -> vector<3xf32> {
-  // CHECK: %{{.+}} = spirv.VectorShuffle [1 : i32, 3 : i32, -1 : i32] %{{.+}} : vector<4xf32>, %arg1 : vector<2xf32> -> vector<3xf32>
-  %0 = spirv.VectorShuffle [1: i32, 3: i32, 0xffffffff: i32] %vector1: vector<4xf32>, %vector2: vector<2xf32> -> vector<3xf32>
+  // CHECK: %{{.+}} = spirv.VectorShuffle [1 : i32, 3 : i32, -1 : i32] %{{.+}}, %arg1 : vector<4xf32>, vector<2xf32> -> vector<3xf32>
+  %0 = spirv.VectorShuffle [1: i32, 3: i32, 0xffffffff: i32] %vector1, %vector2 : vector<4xf32>, vector<2xf32> -> vector<3xf32>
   return %0: vector<3xf32>
 }
 
@@ -347,7 +347,7 @@ func.func @vector_shuffle(%vector1: vector<4xf32>, %vector2: vector<2xf32>) -> v
 
 func.func @vector_shuffle_extra_selector(%vector1: vector<4xf32>, %vector2: vector<2xf32>) -> vector<3xf32> {
   // expected-error @+1 {{result type element count (3) mismatch with the number of component selectors (4)}}
-  %0 = spirv.VectorShuffle [1: i32, 3: i32, 5: i32, 2: i32] %vector1: vector<4xf32>, %vector2: vector<2xf32> -> vector<3xf32>
+  %0 = spirv.VectorShuffle [1: i32, 3: i32, 5: i32, 2: i32] %vector1, %vector2 : vector<4xf32>, vector<2xf32> -> vector<3xf32>
   return %0: vector<3xf32>
 }
 
@@ -355,6 +355,6 @@ func.func @vector_shuffle_extra_selector(%vector1: vector<4xf32>, %vector2: vect
 
 func.func @vector_shuffle_extra_selector(%vector1: vector<4xf32>, %vector2: vector<2xf32>) -> vector<3xf32> {
   // expected-error @+1 {{component selector 7 out of range: expected to be in [0, 6) or 0xffffffff}}
-  %0 = spirv.VectorShuffle [1: i32, 7: i32, 5: i32] %vector1: vector<4xf32>, %vector2: vector<2xf32> -> vector<3xf32>
+  %0 = spirv.VectorShuffle [1: i32, 7: i32, 5: i32] %vector1, %vector2 : vector<4xf32>, vector<2xf32> -> vector<3xf32>
   return %0: vector<3xf32>
 }
diff --git a/mlir/test/Dialect/SparseTensor/GPU/gpu_matmul_lib.mlir b/mlir/test/Dialect/SparseTensor/GPU/gpu_matmul_lib.mlir
index 34189d329cc4..699508acf13c 100644
--- a/mlir/test/Dialect/SparseTensor/GPU/gpu_matmul_lib.mlir
+++ b/mlir/test/Dialect/SparseTensor/GPU/gpu_matmul_lib.mlir
@@ -6,18 +6,18 @@
 // Compute matrix matrix C = AB
 //
 // CHECK-LABEL:   func.func @matmul(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?x?xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?x?xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<?x?xf64>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<?x?xf64>) -> tensor<?x?xf64> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.number_of_entries %[[VAL_0]] : tensor<?x?xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_6:.*]] = tensor.dim %[[VAL_0]], %[[VAL_3]] : tensor<?x?xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_7:.*]] = tensor.dim %[[VAL_0]], %[[VAL_4]] : tensor<?x?xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.number_of_entries %[[VAL_0]] : tensor<?x?xf64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_6:.*]] = tensor.dim %[[VAL_0]], %[[VAL_3]] : tensor<?x?xf64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_7:.*]] = tensor.dim %[[VAL_0]], %[[VAL_4]] : tensor<?x?xf64, #sparse{{[0-9]*}}>
 // CHECK-DAG:       %[[VAL_8:.*]] = tensor.dim %[[VAL_1]], %[[VAL_4]] : tensor<?x?xf64>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<?x?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<?x?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<?x?xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<?x?xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:           %[[VAL_12:.*]] = gpu.wait async
 // CHECK:           %[[VAL_13:.*]] = memref.dim %[[VAL_9]], %[[VAL_3]] : memref<?xindex>
 // CHECK:           %[[VAL_14:.*]], %[[VAL_15:.*]] = gpu.alloc async {{\[}}%[[VAL_12]]] (%[[VAL_13]]) : memref<?xindex>
diff --git a/mlir/test/Dialect/SparseTensor/GPU/gpu_matvec_lib.mlir b/mlir/test/Dialect/SparseTensor/GPU/gpu_matvec_lib.mlir
index bd0bf6927b0d..14a0e9c30b20 100644
--- a/mlir/test/Dialect/SparseTensor/GPU/gpu_matvec_lib.mlir
+++ b/mlir/test/Dialect/SparseTensor/GPU/gpu_matvec_lib.mlir
@@ -7,17 +7,17 @@
 module {
 
 // CHECK-LABEL:   func.func @matvec(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?x?xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?x?xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<?xf64>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<?xf64>) -> tensor<?xf64> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.number_of_entries %[[VAL_0]] : tensor<?x?xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_6:.*]] = tensor.dim %[[VAL_0]], %[[VAL_3]] : tensor<?x?xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_7:.*]] = tensor.dim %[[VAL_0]], %[[VAL_4]] : tensor<?x?xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<?x?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex, strided<[?], offset: ?>>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<?x?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex, strided<[?], offset: ?>>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.number_of_entries %[[VAL_0]] : tensor<?x?xf64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_6:.*]] = tensor.dim %[[VAL_0]], %[[VAL_3]] : tensor<?x?xf64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_7:.*]] = tensor.dim %[[VAL_0]], %[[VAL_4]] : tensor<?x?xf64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<?x?xf64, #sparse{{[0-9]*}}> to memref<?xindex, strided<[?], offset: ?>>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<?x?xf64, #sparse{{[0-9]*}}> to memref<?xindex, strided<[?], offset: ?>>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:           %[[VAL_11:.*]] = gpu.wait async
 // CHECK:           %[[VAL_12:.*]] = memref.dim %[[VAL_8]], %[[VAL_3]] : memref<?xindex, strided<[?], offset: ?>>
 // CHECK:           %[[VAL_13:.*]], %[[VAL_14:.*]] = gpu.alloc async {{\[}}%[[VAL_11]]] (%[[VAL_12]]) : memref<?xindex>
diff --git a/mlir/test/Dialect/SparseTensor/GPU/gpu_sampled_matmul_lib.mlir b/mlir/test/Dialect/SparseTensor/GPU/gpu_sampled_matmul_lib.mlir
index ce7af53bb346..97f36d49927b 100644
--- a/mlir/test/Dialect/SparseTensor/GPU/gpu_sampled_matmul_lib.mlir
+++ b/mlir/test/Dialect/SparseTensor/GPU/gpu_sampled_matmul_lib.mlir
@@ -22,12 +22,12 @@
 #CSR = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : dense, d1 : compressed) }>
 
 // CHECK-LABEL:   func.func @sparse_sampled_dd(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<8x8xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<8x8xf64>,
-// CHECK-SAME:      %[[VAL_2:.*]]: tensor<8x8xf64>) -> tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> {
+// CHECK-SAME:      %[[VAL_2:.*]]: tensor<8x8xf64>) -> tensor<8x8xf64, #sparse{{[0-9]*}}> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 8 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
-// CHECK:           %[[VAL_5:.*]] = sparse_tensor.number_of_entries %[[VAL_0]] : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_5:.*]] = sparse_tensor.number_of_entries %[[VAL_0]] : tensor<8x8xf64, #sparse{{[0-9]*}}>
 // CHECK:           %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_1]] : memref<8x8xf64>
 // CHECK:           %[[VAL_7:.*]] = gpu.wait async
 // CHECK:           %[[VAL_8:.*]], %[[VAL_9:.*]] = gpu.alloc async {{\[}}%[[VAL_7]]] () : memref<8x8xf64>
@@ -36,9 +36,9 @@
 // CHECK:           %[[VAL_12:.*]] = gpu.wait async
 // CHECK:           %[[VAL_13:.*]], %[[VAL_14:.*]] = gpu.alloc async {{\[}}%[[VAL_12]]] () : memref<8x8xf64>
 // CHECK:           %[[VAL_15:.*]] = gpu.memcpy async {{\[}}%[[VAL_14]]] %[[VAL_13]], %[[VAL_11]] : memref<8x8xf64>, memref<8x8xf64>
-// CHECK:           %[[VAL_16:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_17:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_18:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK:           %[[VAL_16:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_17:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_18:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:           %[[VAL_19:.*]] = gpu.wait async
 // CHECK:           %[[VAL_20:.*]] = memref.dim %[[VAL_16]], %[[VAL_4]] : memref<?xindex>
 // CHECK:           %[[VAL_21:.*]], %[[VAL_22:.*]] = gpu.alloc async {{\[}}%[[VAL_19]]] (%[[VAL_20]]) : memref<?xindex>
@@ -70,8 +70,8 @@
 // CHECK:           %[[VAL_57:.*]] = gpu.memcpy async {{\[}}%[[VAL_56]]] %[[VAL_18]], %[[VAL_31]] : memref<?xf64>, memref<?xf64>
 // CHECK:           %[[VAL_58:.*]] = gpu.dealloc async {{\[}}%[[VAL_57]]] %[[VAL_31]] : memref<?xf64>
 // CHECK:           gpu.wait {{\[}}%[[VAL_58]]]
-// CHECK:           %[[VAL_59:.*]] = sparse_tensor.load %[[VAL_0]] : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           return %[[VAL_59]] : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_59:.*]] = sparse_tensor.load %[[VAL_0]] : tensor<8x8xf64, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_59]] : tensor<8x8xf64, #sparse{{[0-9]*}}>
 // CHECK:         }
 //
 // A kernel that computes a direct sampled matrix matrix multiplication
diff --git a/mlir/test/Dialect/SparseTensor/GPU/gpu_sddmm_lib.mlir b/mlir/test/Dialect/SparseTensor/GPU/gpu_sddmm_lib.mlir
index dd79a9017f7f..93f49002a47d 100644
--- a/mlir/test/Dialect/SparseTensor/GPU/gpu_sddmm_lib.mlir
+++ b/mlir/test/Dialect/SparseTensor/GPU/gpu_sddmm_lib.mlir
@@ -19,14 +19,14 @@
 }
 
 // CHECK-LABEL:   func.func @SDDMM_block(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?x?xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<?x?xf32>,
-// CHECK-SAME:      %[[VAL_2:.*]]: tensor<?x?xf32>) -> tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> {
+// CHECK-SAME:      %[[VAL_2:.*]]: tensor<?x?xf32>) -> tensor<?x?xf32, #sparse{{[0-9]*}}> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 2 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 4 : index
-// CHECK:           %[[VAL_7:.*]] = sparse_tensor.number_of_entries %[[VAL_0]] : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_7:.*]] = sparse_tensor.number_of_entries %[[VAL_0]] : tensor<?x?xf32, #sparse{{[0-9]*}}>
 // CHECK:           %[[VAL_8:.*]] = tensor.dim %[[VAL_1]], %[[VAL_3]] : tensor<?x?xf32>
 // CHECK:           %[[VAL_9:.*]] = tensor.dim %[[VAL_1]], %[[VAL_4]] : tensor<?x?xf32>
 // CHECK:           %[[VAL_10:.*]] = tensor.dim %[[VAL_2]], %[[VAL_4]] : tensor<?x?xf32>
@@ -79,8 +79,8 @@
 // CHECK:           %[[VAL_66:.*]] = gpu.memcpy async {{\[}}%[[VAL_65]]] %[[VAL_27]], %[[VAL_40]] : memref<?xf32>, memref<?xf32>
 // CHECK:           %[[VAL_67:.*]] = gpu.dealloc async {{\[}}%[[VAL_66]]] %[[VAL_40]] : memref<?xf32>
 // CHECK:           gpu.wait {{\[}}%[[VAL_67]]]
-// CHECK:           %[[VAL_68:.*]] = sparse_tensor.load %[[VAL_0]] : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           return %[[VAL_68]] : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_68:.*]] = sparse_tensor.load %[[VAL_0]] : tensor<?x?xf32, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_68]] : tensor<?x?xf32, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @SDDMM_block(%args: tensor<?x?xf32, #BSR>,
                        %arga: tensor<?x?xf32>,
diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir
index 5876eef6f193..e63595bed53e 100644
--- a/mlir/test/Dialect/SparseTensor/codegen.mlir
+++ b/mlir/test/Dialect/SparseTensor/codegen.mlir
@@ -252,7 +252,7 @@ func.func @sparse_values_coo(%arg0: tensor<?x?x?xf64, #ccoo>) -> memref<?xf64> {
 }
 
 
-// CHECK-LABEL:   func.func @sparse_indices_coo(
+// CHECK-LABEL: func.func @sparse_indices_coo(
 //  CHECK-SAME: %[[A0:.*0]]: memref<?xindex>,
 //  CHECK-SAME: %[[A1:.*1]]: memref<?xindex>,
 //  CHECK-SAME: %[[A2:.*2]]: memref<?xindex>,
@@ -270,7 +270,7 @@ func.func @sparse_indices_coo(%arg0: tensor<?x?x?xf64, #ccoo>) -> memref<?xindex
   return %0 : memref<?xindex, strided<[?], offset: ?>>
 }
 
-// CHECK-LABEL:   func.func @sparse_indices_buffer_coo(
+// CHECK-LABEL: func.func @sparse_indices_buffer_coo(
 //  CHECK-SAME: %[[A0:.*0]]: memref<?xindex>,
 //  CHECK-SAME: %[[A1:.*1]]: memref<?xindex>,
 //  CHECK-SAME: %[[A2:.*2]]: memref<?xindex>,
@@ -664,7 +664,7 @@ func.func @sparse_convert_element_type(%arg0: tensor<32xf32, #SparseVector>) ->
 }
 
 // CHECK-LABEL: func.func @sparse_new_coo(
-// CHECK-SAME:  %[[A0:.*]]: !llvm.ptr) -> (memref<?xindex>, memref<?xindex>, memref<?xf32>, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK-SAME:  %[[A0:.*]]: !llvm.ptr) -> (memref<?xindex>, memref<?xindex>, memref<?xf32>, !sparse_tensor.storage_specifier<#sparse{{[0-9]*}}>) {
 // CHECK-DAG:   %[[VAL_1:.*]] = arith.constant false
 // CHECK-DAG:   %[[VAL_2:.*]] = arith.constant 2 : i32
 // CHECK-DAG:   %[[VAL_3:.*]] = arith.constant 1 : index
@@ -676,26 +676,26 @@ func.func @sparse_convert_element_type(%arg0: tensor<32xf32, #SparseVector>) ->
 // CHECK:       memref.store %[[VAL_4]], %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<2xindex>
 // CHECK:       %[[VAL_8:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_7]], %[[VAL_2]]) : (!llvm.ptr, memref<?xindex>, i32) -> !llvm.ptr
 // CHECK:       %[[VAL_9:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_8]]) : (!llvm.ptr) -> memref<?xindex>
-// CHECK:       %[[VAL_10:.*]] = call @getSparseTensorReaderNSE(%[[VAL_8]]) : (!llvm.ptr) -> index
-// CHECK:       %[[VAL_11:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref<?xindex>
-// CHECK:       %[[VAL_12:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_3]]] : memref<?xindex>
-// CHECK:       %[[VAL_13:.*]] = arith.muli %[[VAL_10]], %[[VAL_5]] : index
-// CHECK:       %[[VAL_14:.*]] = memref.alloc() : memref<2xindex>
-// CHECK:       %[[VAL_15:.*]] = memref.cast %[[VAL_14]] : memref<2xindex> to memref<?xindex>
-// CHECK:       %[[VAL_16:.*]] = memref.alloc(%[[VAL_13]]) : memref<?xindex>
-// CHECK:       %[[VAL_17:.*]] = memref.alloc(%[[VAL_10]]) : memref<?xf32>
-// CHECK:       %[[VAL_18:.*]] = sparse_tensor.storage_specifier.init
-// CHECK:       %[[VAL_19:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]]  lvl_sz at 0 with %[[VAL_11]]
-// CHECK:       %[[VAL_20:.*]] = sparse_tensor.storage_specifier.get %[[VAL_19]]  pos_mem_sz at 0
-// CHECK:       %[[VAL_21:.*]], %[[VAL_22:.*]] = sparse_tensor.push_back %[[VAL_20]], %[[VAL_15]], %[[VAL_4]]
-// CHECK:       %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_19]]  pos_mem_sz at 0 with %[[VAL_22]]
-// CHECK:       %[[VAL_24:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]]  lvl_sz at 1 with %[[VAL_12]]
-// CHECK:       %[[VAL_25:.*]], %[[VAL_26:.*]] = sparse_tensor.push_back %[[VAL_22]], %[[VAL_21]], %[[VAL_4]], %[[VAL_3]]
-// CHECK:       %[[VAL_27:.*]] = sparse_tensor.storage_specifier.set %[[VAL_24]]  pos_mem_sz at 0 with %[[VAL_26]]
-// CHECK:       %[[VAL_28:.*]] = memref.alloca() : memref<2xindex>
-// CHECK:       %[[VAL_29:.*]] = memref.cast %[[VAL_28]] : memref<2xindex> to memref<?xindex>
-// CHECK:       memref.store %[[VAL_4]], %[[VAL_28]]{{\[}}%[[VAL_4]]] : memref<2xindex>
-// CHECK:       memref.store %[[VAL_3]], %[[VAL_28]]{{\[}}%[[VAL_3]]] : memref<2xindex>
+// CHECK-DAG:   %[[VAL_10:.*]] = call @getSparseTensorReaderNSE(%[[VAL_8]]) : (!llvm.ptr) -> index
+// CHECK-DAG:   %[[VAL_11:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK-DAG:   %[[VAL_12:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_3]]] : memref<?xindex>
+// CHECK-DAG:   %[[VAL_13:.*]] = arith.muli %[[VAL_10]], %[[VAL_5]] : index
+// CHECK-DAG:   %[[VAL_14:.*]] = memref.alloc() : memref<2xindex>
+// CHECK-DAG:   %[[VAL_15:.*]] = memref.cast %[[VAL_14]] : memref<2xindex> to memref<?xindex>
+// CHECK-DAG:   %[[VAL_16:.*]] = memref.alloc(%[[VAL_13]]) : memref<?xindex>
+// CHECK-DAG:   %[[VAL_17:.*]] = memref.alloc(%[[VAL_10]]) : memref<?xf32>
+// CHECK-DAG:   %[[VAL_18:.*]] = sparse_tensor.storage_specifier.init
+// CHECK-DAG:   %[[VAL_19:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]]  lvl_sz at 0 with %[[VAL_11]]
+// CHECK-DAG:   %[[VAL_20:.*]] = sparse_tensor.storage_specifier.get %[[VAL_19]]  pos_mem_sz at 0
+// CHECK-DAG:   %[[VAL_21:.*]], %[[VAL_22:.*]] = sparse_tensor.push_back %[[VAL_20]], %[[VAL_15]], %[[VAL_4]]
+// CHECK-DAG:   %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_19]]  pos_mem_sz at 0 with %[[VAL_22]]
+// CHECK-DAG:   %[[VAL_24:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]]  lvl_sz at 1 with %[[VAL_12]]
+// CHECK-DAG:   %[[VAL_25:.*]], %[[VAL_26:.*]] = sparse_tensor.push_back %[[VAL_22]], %[[VAL_21]], %[[VAL_4]], %[[VAL_3]]
+// CHECK-DAG:   %[[VAL_27:.*]] = sparse_tensor.storage_specifier.set %[[VAL_24]]  pos_mem_sz at 0 with %[[VAL_26]]
+// CHECK-DAG:   %[[VAL_28:.*]] = memref.alloca() : memref<2xindex>
+// CHECK-DAG:   %[[VAL_29:.*]] = memref.cast %[[VAL_28]] : memref<2xindex> to memref<?xindex>
+// CHECK-DAG:   memref.store %[[VAL_4]], %[[VAL_28]]{{\[}}%[[VAL_4]]] : memref<2xindex>
+// CHECK-DAG:   memref.store %[[VAL_3]], %[[VAL_28]]{{\[}}%[[VAL_3]]] : memref<2xindex>
 // CHECK:       %[[VAL_30:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_8]], %[[VAL_29]], %[[VAL_29]], %[[VAL_16]], %[[VAL_17]]) : (!llvm.ptr, memref<?xindex>, memref<?xindex>, memref<?xindex>, memref<?xf32>) -> i1
 // CHECK:       %[[VAL_31:.*]] = arith.cmpi eq, %[[VAL_30]], %[[VAL_1]] : i1
 // CHECK:       scf.if %[[VAL_31]] {
@@ -712,7 +712,7 @@ func.func @sparse_new_coo(%arg0: !llvm.ptr) -> tensor<?x?xf32, #Coo> {
 }
 
 // CHECK-LABEL: func.func @sparse_new_coo_permute_no(
-// CHECK-SAME:  %[[A0:.*]]: !llvm.ptr) -> (memref<?xindex>, memref<?xindex>, memref<?xf32>, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK-SAME:  %[[A0:.*]]: !llvm.ptr) -> (memref<?xindex>, memref<?xindex>, memref<?xf32>, !sparse_tensor.storage_specifier<#sparse{{[0-9]*}}>) {
 // CHECK-DAG:   %[[VAL_1:.*]] = arith.constant 2 : i32
 // CHECK-DAG:   %[[VAL_2:.*]] = arith.constant 1 : index
 // CHECK-DAG:   %[[VAL_3:.*]] = arith.constant 0 : index
@@ -722,31 +722,31 @@ func.func @sparse_new_coo(%arg0: !llvm.ptr) -> tensor<?x?xf32, #Coo> {
 // CHECK:       memref.store %[[VAL_3]], %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<2xindex>
 // CHECK:       memref.store %[[VAL_3]], %[[VAL_5]]{{\[}}%[[VAL_2]]] : memref<2xindex>
 // CHECK:       %[[VAL_7:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_6]], %[[VAL_1]]) : (!llvm.ptr, memref<?xindex>, i32) -> !llvm.ptr
-// CHECK:       %[[VAL_8:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_7]]) : (!llvm.ptr) -> memref<?xindex>
-// CHECK:       %[[VAL_9:.*]] = call @getSparseTensorReaderNSE(%[[VAL_7]]) : (!llvm.ptr) -> index
-// CHECK:       %[[VAL_10:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref<?xindex>
-// CHECK:       %[[VAL_11:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_2]]] : memref<?xindex>
-// CHECK:       %[[VAL_12:.*]] = arith.muli %[[VAL_9]], %[[VAL_4]] : index
-// CHECK:       %[[VAL_13:.*]] = memref.alloc() : memref<2xindex>
-// CHECK:       %[[VAL_14:.*]] = memref.cast %[[VAL_13]] : memref<2xindex> to memref<?xindex>
-// CHECK:       %[[VAL_15:.*]] = memref.alloc(%[[VAL_12]]) : memref<?xindex>
-// CHECK:       %[[VAL_16:.*]] = memref.alloc(%[[VAL_9]]) : memref<?xf32>
-// CHECK:       %[[VAL_17:.*]] = sparse_tensor.storage_specifier.init
-// CHECK:       %[[VAL_18:.*]] = sparse_tensor.storage_specifier.set %[[VAL_17]]  lvl_sz at 0 with %[[VAL_11]]
-// CHECK:       %[[VAL_19:.*]] = sparse_tensor.storage_specifier.get %[[VAL_18]]  pos_mem_sz at 0
-// CHECK:       %[[VAL_20:.*]], %[[VAL_21:.*]] = sparse_tensor.push_back %[[VAL_19]], %[[VAL_14]], %[[VAL_3]]
-// CHECK:       %[[VAL_22:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]]  pos_mem_sz at 0 with %[[VAL_21]]
-// CHECK:       %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_22]]  lvl_sz at 1 with %[[VAL_10]]
-// CHECK:       %[[VAL_24:.*]], %[[VAL_25:.*]] = sparse_tensor.push_back %[[VAL_21]], %[[VAL_20]], %[[VAL_3]], %[[VAL_2]]
-// CHECK:       %[[VAL_26:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]]  pos_mem_sz at 0 with %[[VAL_25]]
-// CHECK:       %[[VAL_27:.*]] = memref.alloca() : memref<2xindex>
-// CHECK:       %[[VAL_28:.*]] = memref.cast %[[VAL_27]] : memref<2xindex> to memref<?xindex>
-// CHECK:       memref.store %[[VAL_2]], %[[VAL_27]]{{\[}}%[[VAL_3]]] : memref<2xindex>
-// CHECK:       memref.store %[[VAL_3]], %[[VAL_27]]{{\[}}%[[VAL_2]]] : memref<2xindex>
-// CHECK:       %[[VAL_29:.*]] = memref.alloca() : memref<2xindex>
-// CHECK:       %[[VAL_30:.*]] = memref.cast %[[VAL_29]] : memref<2xindex> to memref<?xindex>
-// CHECK:       memref.store %[[VAL_2]], %[[VAL_29]]{{\[}}%[[VAL_3]]] : memref<2xindex>
-// CHECK:       memref.store %[[VAL_3]], %[[VAL_29]]{{\[}}%[[VAL_2]]] : memref<2xindex>
+// CHECK-DAG:   %[[VAL_8:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_7]]) : (!llvm.ptr) -> memref<?xindex>
+// CHECK-DAG:   %[[VAL_9:.*]] = call @getSparseTensorReaderNSE(%[[VAL_7]]) : (!llvm.ptr) -> index
+// CHECK-DAG:   %[[VAL_10:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref<?xindex>
+// CHECK-DAG:   %[[VAL_11:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_2]]] : memref<?xindex>
+// CHECK-DAG:   %[[VAL_12:.*]] = arith.muli %[[VAL_9]], %[[VAL_4]] : index
+// CHECK-DAG:   %[[VAL_13:.*]] = memref.alloc() : memref<2xindex>
+// CHECK-DAG:   %[[VAL_14:.*]] = memref.cast %[[VAL_13]] : memref<2xindex> to memref<?xindex>
+// CHECK-DAG:   %[[VAL_15:.*]] = memref.alloc(%[[VAL_12]]) : memref<?xindex>
+// CHECK-DAG:   %[[VAL_16:.*]] = memref.alloc(%[[VAL_9]]) : memref<?xf32>
+// CHECK-DAG:   %[[VAL_17:.*]] = sparse_tensor.storage_specifier.init
+// CHECK-DAG:   %[[VAL_18:.*]] = sparse_tensor.storage_specifier.set %[[VAL_17]]  lvl_sz at 0 with %[[VAL_11]]
+// CHECK-DAG:   %[[VAL_19:.*]] = sparse_tensor.storage_specifier.get %[[VAL_18]]  pos_mem_sz at 0
+// CHECK-DAG:   %[[VAL_20:.*]], %[[VAL_21:.*]] = sparse_tensor.push_back %[[VAL_19]], %[[VAL_14]], %[[VAL_3]]
+// CHECK-DAG:   %[[VAL_22:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]]  pos_mem_sz at 0 with %[[VAL_21]]
+// CHECK-DAG:   %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_22]]  lvl_sz at 1 with %[[VAL_10]]
+// CHECK-DAG:   %[[VAL_24:.*]], %[[VAL_25:.*]] = sparse_tensor.push_back %[[VAL_21]], %[[VAL_20]], %[[VAL_3]], %[[VAL_2]]
+// CHECK-DAG:   %[[VAL_26:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]]  pos_mem_sz at 0 with %[[VAL_25]]
+// CHECK-DAG:   %[[VAL_27:.*]] = memref.alloca() : memref<2xindex>
+// CHECK-DAG:   %[[VAL_28:.*]] = memref.cast %[[VAL_27]] : memref<2xindex> to memref<?xindex>
+// CHECK-DAG:   memref.store %[[VAL_2]], %[[VAL_27]]{{\[}}%[[VAL_3]]] : memref<2xindex>
+// CHECK-DAG:   memref.store %[[VAL_3]], %[[VAL_27]]{{\[}}%[[VAL_2]]] : memref<2xindex>
+// CHECK-DAG:   %[[VAL_29:.*]] = memref.alloca() : memref<2xindex>
+// CHECK-DAG:   %[[VAL_30:.*]] = memref.cast %[[VAL_29]] : memref<2xindex> to memref<?xindex>
+// CHECK-DAG:   memref.store %[[VAL_2]], %[[VAL_29]]{{\[}}%[[VAL_3]]] : memref<2xindex>
+// CHECK-DAG:   memref.store %[[VAL_3]], %[[VAL_29]]{{\[}}%[[VAL_2]]] : memref<2xindex>
 // CHECK:       %[[VAL_31:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_7]], %[[VAL_28]], %[[VAL_30]], %[[VAL_15]], %[[VAL_16]]) : (!llvm.ptr, memref<?xindex>, memref<?xindex>, memref<?xindex>, memref<?xf32>) -> i1
 // CHECK:       memref.store %[[VAL_9]], %[[VAL_24]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK:       %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_26]]  crd_mem_sz at 0 with %[[VAL_12]]
diff --git a/mlir/test/Dialect/SparseTensor/codegen_buffer_initialization.mlir b/mlir/test/Dialect/SparseTensor/codegen_buffer_initialization.mlir
index 14a8f0e43d35..e6d9700ffe4f 100644
--- a/mlir/test/Dialect/SparseTensor/codegen_buffer_initialization.mlir
+++ b/mlir/test/Dialect/SparseTensor/codegen_buffer_initialization.mlir
@@ -4,9 +4,9 @@
 
 // CHECK-LABEL:   func.func @empty_sparse_vector(
 //  CHECK-SAME:     %[[VAL_0:.*]]: index) -> (memref<?xindex>, memref<?xindex>, memref<?xf64>, !sparse_tensor.storage_specifier
-//       CHECK:     %[[VAL_1:.*]] = arith.constant 1 : index
-//       CHECK:     %[[VAL_2:.*]] = arith.constant 0.000000e+00 : f64
-//       CHECK:     %[[VAL_3:.*]] = arith.constant 0 : index
+//   CHECK-DAG:     %[[VAL_1:.*]] = arith.constant 1 : index
+//   CHECK-DAG:     %[[VAL_2:.*]] = arith.constant 0.000000e+00 : f64
+//   CHECK-DAG:     %[[VAL_3:.*]] = arith.constant 0 : index
 //       CHECK:     %[[VAL_4:.*]] = memref.alloc() : memref<16xindex>
 //       CHECK:     %[[VAL_5:.*]] = memref.cast %[[VAL_4]] : memref<16xindex> to memref<?xindex>
 //       CHECK:     linalg.fill ins(%[[VAL_3]] : index) outs(%[[VAL_4]] : memref<16xindex>)
diff --git a/mlir/test/Dialect/SparseTensor/dense.mlir b/mlir/test/Dialect/SparseTensor/dense.mlir
index 52db814572a1..2d8dcfea9adc 100644
--- a/mlir/test/Dialect/SparseTensor/dense.mlir
+++ b/mlir/test/Dialect/SparseTensor/dense.mlir
@@ -32,14 +32,14 @@
 // a non-annotated dense matrix as output.
 //
 // CHECK-LABEL:   func @dense1(
-// CHECK-SAME:                 %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>>,
+// CHECK-SAME:                 %[[VAL_0:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:                 %[[VAL_1:.*]]: tensor<32x16xf32>) -> tensor<32x16xf32> {
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 1.000000e+00 : f32
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 16 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
-// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xf32>
+// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK:           scf.for %[[VAL_9:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
 // CHECK:             scf.for %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {
@@ -73,14 +73,14 @@ func.func @dense1(%arga: tensor<32x16xf32, #DenseMatrix>,
 //
 // CHECK-LABEL:   func @dense2(
 // CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32>,
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>>) -> tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> {
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>) -> tensor<32x16xf32, #sparse{{[0-9]*}}> {
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 1.000000e+00 : f32
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 16 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
 // CHECK:           %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_0]] : memref<32x16xf32>
-// CHECK:           %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xf32>
+// CHECK:           %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK:           scf.for %[[VAL_9:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
 // CHECK:             scf.for %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {
 // CHECK:               %[[VAL_11:.*]] = arith.muli %[[VAL_9]], %[[VAL_4]] : index
@@ -90,8 +90,8 @@ func.func @dense1(%arga: tensor<32x16xf32, #DenseMatrix>,
 // CHECK:               memref.store %[[VAL_14]], %[[VAL_8]]{{\[}}%[[VAL_12]]] : memref<?xf32>
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_15:.*]] = sparse_tensor.load %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>>
-// CHECK:           return %[[VAL_15]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>>
+// CHECK:           %[[VAL_15:.*]] = sparse_tensor.load %[[VAL_1]] : tensor<32x16xf32, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_15]] : tensor<32x16xf32, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @dense2(%arga: tensor<32x16xf32>,
                   %argx: tensor<32x16xf32, #DenseMatrix>)
@@ -116,14 +116,14 @@ func.func @dense2(%arga: tensor<32x16xf32>,
 //
 // CHECK-LABEL:   func @dense3(
 // CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32>,
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>>) -> tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> {
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>) -> tensor<32x16xf32, #sparse{{[0-9]*}}> {
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 8 : index
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 16 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
 // CHECK:           %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_0]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}}>> to memref<?xf32>
+// CHECK:           %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK:           scf.for %[[VAL_9:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
 // CHECK:             scf.for %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {
 // CHECK:               %[[VAL_11:.*]] = arith.muli %[[VAL_9]], %[[VAL_4]] : index
@@ -137,8 +137,8 @@ func.func @dense2(%arga: tensor<32x16xf32>,
 // CHECK:               memref.store %[[VAL_19:.*]], %[[VAL_8]]{{\[}}%[[VAL_12]]] : memref<?xf32>
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_20:.*]] = sparse_tensor.load %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>>
-// CHECK:           return %[[VAL_20]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>>
+// CHECK:           %[[VAL_20:.*]] = sparse_tensor.load %[[VAL_1]] : tensor<32x16xf32, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_20]] : tensor<32x16xf32, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @dense3(%arga: tensor<32x16x8xf32>,
                   %argx: tensor<32x16xf32, #DenseMatrix>)
diff --git a/mlir/test/Dialect/SparseTensor/fold.mlir b/mlir/test/Dialect/SparseTensor/fold.mlir
index 73851c086aa2..0153d444215e 100644
--- a/mlir/test/Dialect/SparseTensor/fold.mlir
+++ b/mlir/test/Dialect/SparseTensor/fold.mlir
@@ -21,7 +21,7 @@ func.func @sparse_dce_convert(%arg0: tensor<64xf32>) {
 }
 
 // CHECK-LABEL: func @sparse_dce_getters(
-//  CHECK-SAME: %[[A:.*]]: tensor<64xf32, #sparse_tensor.encoding<{{{.*}}}>>)
+//  CHECK-SAME: %[[A:.*]]: tensor<64xf32, #sparse{{[0-9]*}}>)
 //   CHECK-NOT: sparse_tensor.positions
 //   CHECK-NOT: sparse_tensor.coordinates
 //   CHECK-NOT: sparse_tensor.values
@@ -68,7 +68,7 @@ func.func @sparse_get_specifier_dce_fold(%arg0: !sparse_tensor.storage_specifier
 #COO = #sparse_tensor.encoding<{map = (d0, d1) -> (d0 : compressed(nonunique), d1 : singleton)}>
 
 // CHECK-LABEL: func @sparse_reorder_coo(
-//  CHECK-SAME: %[[A:.*]]: tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+//  CHECK-SAME: %[[A:.*]]: tensor<?x?xf32, #sparse{{[0-9]*}}>
 //   CHECK-NOT: %[[R:.*]] = sparse_tensor.reorder_coo
 //       CHECK: return %[[A]]
 func.func @sparse_reorder_coo(%arg0 : tensor<?x?xf32, #COO>) -> tensor<?x?xf32, #COO> {
diff --git a/mlir/test/Dialect/SparseTensor/one_trip.mlir b/mlir/test/Dialect/SparseTensor/one_trip.mlir
index b8ab17735749..9d2a125dbef1 100644
--- a/mlir/test/Dialect/SparseTensor/one_trip.mlir
+++ b/mlir/test/Dialect/SparseTensor/one_trip.mlir
@@ -13,15 +13,15 @@
 }
 
 // CHECK-LABEL: func.func @sparse_scale(
-// CHECK-SAME:    %[[VAL_0:.*]]: tensor<1x1xf32, #sparse_tensor.encoding<{{{.*}}}>>)
+// CHECK-SAME:    %[[VAL_0:.*]]: tensor<1x1xf32, #sparse{{[0-9]*}}>)
 // CHECK-DAG:     %[[VAL_1:.*]] = arith.constant 0 : index
 // CHECK-DAG:     %[[VAL_2:.*]] = arith.constant 2.000000e+00 : f32
-// CHECK:         %[[VAL_3:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<1x1xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK:         %[[VAL_3:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<1x1xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK:         %[[VAL_4:.*]] = memref.load %[[VAL_3]]{{\[}}%[[VAL_1]]] : memref<?xf32>
 // CHECK:         %[[VAL_5:.*]] = arith.mulf %[[VAL_4]], %[[VAL_2]] : f32
 // CHECK:         memref.store %[[VAL_5]], %[[VAL_3]]{{\[}}%[[VAL_1]]] : memref<?xf32>
-// CHECK:         %[[VAL_6:.*]] = sparse_tensor.load %[[VAL_0]] : tensor<1x1xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:         return %[[VAL_6]] : tensor<1x1xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:         %[[VAL_6:.*]] = sparse_tensor.load %[[VAL_0]] : tensor<1x1xf32, #sparse{{[0-9]*}}>
+// CHECK:         return %[[VAL_6]] : tensor<1x1xf32, #sparse{{[0-9]*}}>
 func.func @sparse_scale(%argx: tensor<1x1xf32, #Dense>) -> tensor<1x1xf32, #Dense> {
   %c = arith.constant 2.0 : f32
   %0 = linalg.generic #trait_scale
diff --git a/mlir/test/Dialect/SparseTensor/pre_rewriting.mlir b/mlir/test/Dialect/SparseTensor/pre_rewriting.mlir
index c2cab30edec9..5fdbb46e3d8b 100644
--- a/mlir/test/Dialect/SparseTensor/pre_rewriting.mlir
+++ b/mlir/test/Dialect/SparseTensor/pre_rewriting.mlir
@@ -27,8 +27,8 @@
 }
 
 // CHECK-LABEL: func @sparse_nop_cast(
-//  CHECK-SAME: %[[A:.*]]: tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>>)
-//       CHECK: return %[[A]] : tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+//  CHECK-SAME: %[[A:.*]]: tensor<?xf32, #sparse{{[0-9]*}}>)
+//       CHECK: return %[[A]] : tensor<?xf32, #sparse{{[0-9]*}}>
 func.func @sparse_nop_cast(%a : tensor<?xf32, #SparseVector>) -> tensor<?xf32, #SparseVector> {
   %0 = tensor.cast %a : tensor<?xf32, #SparseVector> to tensor<?xf32, #SparseVector>
   %1 = tensor.cast %0 : tensor<?xf32, #SparseVector> to tensor<?xf32, #SparseVector>
@@ -38,18 +38,18 @@ func.func @sparse_nop_cast(%a : tensor<?xf32, #SparseVector>) -> tensor<?xf32, #
 
 // CHECK-LABEL: func @sparse_repair_cast(
 //  CHECK-SAME: %[[A:.*]]: tensor<?xf32>)
-//       CHECK: %[[C:.*]] = sparse_tensor.convert %[[A]] : tensor<?xf32> to tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>
-//       CHECK: return %[[C]] : tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+//       CHECK: %[[C:.*]] = sparse_tensor.convert %[[A]] : tensor<?xf32> to tensor<?xf32, #sparse{{[0-9]*}}>
+//       CHECK: return %[[C]] : tensor<?xf32, #sparse{{[0-9]*}}>
 func.func @sparse_repair_cast(%a : tensor<?xf32>) -> tensor<?xf32, #SparseVector> {
   %0 = tensor.cast %a : tensor<?xf32> to tensor<?xf32, #SparseVector>
   return %0 : tensor<?xf32, #SparseVector>
 }
 
 // CHECK-LABEL: func @sparse_fuse_slice(
-//  CHECK-SAME: %[[A:.*]]: tensor<2x3xi64, #sparse_tensor.encoding<{{{.*}}}>>)
-//       CHECK: %[[E:.*]] = tensor.extract_slice %[[A]][1, 0] [1, 3] [1, 1] : tensor<2x3xi64, #sparse_tensor.encoding<{{{.*}}}>> to tensor<1x3xi64, #sparse_tensor.encoding<{{{.*}}}>>
-//       CHECK: %[[C:.*]] = sparse_tensor.convert %[[E]] : tensor<1x3xi64, #sparse_tensor.encoding<{{{.*}}}>> to tensor<1x3xi64, #sparse_tensor.encoding<{{{.*}}}>>
-//       CHECK: return %[[C]] : tensor<1x3xi64, #sparse_tensor.encoding<{{{.*}}}>>
+//  CHECK-SAME: %[[A:.*]]: tensor<2x3xi64, #sparse{{[0-9]*}}>)
+//       CHECK: %[[E:.*]] = tensor.extract_slice %[[A]][1, 0] [1, 3] [1, 1] : tensor<2x3xi64, #sparse{{[0-9]*}}> to tensor<1x3xi64, #sparse{{[0-9]*}}>
+//       CHECK: %[[C:.*]] = sparse_tensor.convert %[[E]] : tensor<1x3xi64, #sparse{{[0-9]*}}> to tensor<1x3xi64, #sparse{{[0-9]*}}>
+//       CHECK: return %[[C]] : tensor<1x3xi64, #sparse{{[0-9]*}}>
 func.func @sparse_fuse_slice(%a : tensor<2x3xi64, #SortedCOO>) -> tensor<1x3xi64, #SortedCOO> {
   %extracted_slice = tensor.extract_slice %a[1, 0] [1, 3] [1, 1] : tensor<2x3xi64, #SortedCOO> to tensor<1x3xi64>
   %cast = tensor.cast %extracted_slice : tensor<1x3xi64> to tensor<1x3xi64, #Slice>
@@ -59,10 +59,10 @@ func.func @sparse_fuse_slice(%a : tensor<2x3xi64, #SortedCOO>) -> tensor<1x3xi64
 
 // CHECK-LABEL:   func.func @sparse_select(
 // CHECK-SAME:      %[[VAL_0:.*]]: tensor<4x4xi1>,
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<4x4xf64, #sparse_tensor.encoding<{{.*}}>>,
-// CHECK-SAME:      %[[VAL_2:.*]]: tensor<4x4xf64, #sparse_tensor.encoding<{{.*}}>>) -> tensor<4x4xf64, #sparse_tensor.encoding<{{.*}}>> {
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<4x4xf64, #sparse{{[0-9]*}}>,
+// CHECK-SAME:      %[[VAL_2:.*]]: tensor<4x4xf64, #sparse{{[0-9]*}}>) -> tensor<4x4xf64, #sparse{{[0-9]*}}> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f64
-// CHECK-DAG:       %[[VAL_4:.*]] = tensor.empty() : tensor<4x4xf64, #sparse_tensor.encoding<{{.*}}>>
+// CHECK-DAG:       %[[VAL_4:.*]] = tensor.empty() : tensor<4x4xf64, #sparse{{[0-9]*}}>
 // CHECK-NEXT:      %[[VAL_5:.*]] = linalg.generic {indexing_maps = [#map, #map, #map, #map], iterator_types = ["parallel", "parallel"]}
 // CHECK-SAME:      ins(%[[VAL_0]], %[[VAL_1]], %[[VAL_2]]
 // CHECK-NEXT:      ^bb0(%[[VAL_6:.*]]: i1, %[[VAL_7:.*]]: f64, %[[VAL_8:.*]]: f64, %[[VAL_9:.*]]: f64):
@@ -83,8 +83,8 @@ func.func @sparse_fuse_slice(%a : tensor<2x3xi64, #SortedCOO>) -> tensor<1x3xi64
 // CHECK-NEXT:          sparse_tensor.yield %[[VAL_17]] : f64
 // CHECK-NEXT:        }
 // CHECK-NEXT:        linalg.yield %[[VAL_10]] : f64
-// CHECK-NEXT:      } -> tensor<4x4xf64, #sparse_tensor.encoding<{{.*}}>>
-// CHECK-NEXT:      return %[[VAL_18:.*]] : tensor<4x4xf64, #sparse_tensor.encoding<{{.*}}>>
+// CHECK-NEXT:      } -> tensor<4x4xf64, #sparse{{[0-9]*}}>
+// CHECK-NEXT:      return %[[VAL_18:.*]] : tensor<4x4xf64, #sparse{{[0-9]*}}>
 // CHECK-NEXT:    }
 func.func @sparse_select(%cond: tensor<4x4xi1>,
                          %arga: tensor<4x4xf64, #DCSR>,
diff --git a/mlir/test/Dialect/SparseTensor/rejected.mlir b/mlir/test/Dialect/SparseTensor/rejected.mlir
index c68c576766b8..c3cbde9703bb 100644
--- a/mlir/test/Dialect/SparseTensor/rejected.mlir
+++ b/mlir/test/Dialect/SparseTensor/rejected.mlir
@@ -15,7 +15,7 @@
 
 // CHECK-LABEL:   func.func @sparse_reduction_subi(
 // CHECK-SAME:      %[[VAL_0:.*]]: tensor<i32>,
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<?xi32, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<i32> {
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<?xi32, #sparse{{[0-9]*}}>) -> tensor<i32> {
 // CHECK:           %[[VAL_2:.*]] = linalg.generic
 // CHECK:           ^bb0(%[[VAL_3:.*]]: i32, %[[VAL_4:.*]]: i32):
 // CHECK:             %[[VAL_5:.*]] = arith.subi %[[VAL_3]], %[[VAL_4]] : i32
diff --git a/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir b/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir
index 579b0f20f5f6..11fc50282253 100644
--- a/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir
+++ b/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir
@@ -14,8 +14,8 @@
 }>
 
 // CHECK-LABEL:   func.func @sparse_new(
-// CHECK-SAME:    %[[A:.*]]: !llvm.ptr) -> tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> {
-// CHECK:         %[[COO:.*]] = sparse_tensor.new %[[A]] : !llvm.ptr to tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-SAME:    %[[A:.*]]: !llvm.ptr) -> tensor<?x?xf32, #sparse{{[0-9]*}}> {
+// CHECK:         %[[COO:.*]] = sparse_tensor.new %[[A]] : !llvm.ptr to tensor<?x?xf32, #sparse{{[0-9]*}}>
 // CHECK:         %[[R:.*]] = sparse_tensor.convert %[[COO]]
 // CHECK:         bufferization.dealloc_tensor %[[COO]]
 // CHECK:         return %[[R]]
@@ -25,8 +25,8 @@ func.func @sparse_new(%arg0: !llvm.ptr) -> tensor<?x?xf32, #CSR> {
 }
 
 // CHECK-LABEL:   func.func @sparse_new_csc(
-// CHECK-SAME:    %[[A:.*]]: !llvm.ptr) -> tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> {
-// CHECK:         %[[COO:.*]] = sparse_tensor.new %[[A]] : !llvm.ptr to tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-SAME:    %[[A:.*]]: !llvm.ptr) -> tensor<?x?xf32, #sparse{{[0-9]*}}> {
+// CHECK:         %[[COO:.*]] = sparse_tensor.new %[[A]] : !llvm.ptr to tensor<?x?xf32, #sparse{{[0-9]*}}>
 // CHECK:         %[[R:.*]] = sparse_tensor.convert %[[COO]]
 // CHECK:         bufferization.dealloc_tensor %[[COO]]
 // CHECK:         return %[[R]]
@@ -36,8 +36,8 @@ func.func @sparse_new_csc(%arg0: !llvm.ptr) -> tensor<?x?xf32, #CSC> {
 }
 
 // CHECK-LABEL:   func.func @sparse_new_coo(
-// CHECK-SAME:    %[[A:.*]]: !llvm.ptr) -> tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> {
-// CHECK:         %[[COO:.*]] = sparse_tensor.new %[[A]] : !llvm.ptr to tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-SAME:    %[[A:.*]]: !llvm.ptr) -> tensor<?x?xf32, #sparse{{[0-9]*}}> {
+// CHECK:         %[[COO:.*]] = sparse_tensor.new %[[A]] : !llvm.ptr to tensor<?x?xf32, #sparse{{[0-9]*}}>
 // CHECK:         return %[[COO]]
 func.func @sparse_new_coo(%arg0: !llvm.ptr) -> tensor<?x?xf32, #COO> {
   %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor<?x?xf32, #COO>
@@ -45,7 +45,7 @@ func.func @sparse_new_coo(%arg0: !llvm.ptr) -> tensor<?x?xf32, #COO> {
 }
 
 // CHECK-LABEL:   func.func @sparse_out(
-// CHECK-SAME:    %[[A:.*]]: tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:    %[[A:.*]]: tensor<10x20xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:    %[[B:.*]]: !llvm.ptr) {
 // CHECK-DAG:     %[[C0:.*]] = arith.constant 0 : index
 // CHECK-DAG:     %[[C1:.*]] = arith.constant 1 : index
diff --git a/mlir/test/Dialect/SparseTensor/roundtrip.mlir b/mlir/test/Dialect/SparseTensor/roundtrip.mlir
index 5b733ecbe787..476fa1b08a39 100644
--- a/mlir/test/Dialect/SparseTensor/roundtrip.mlir
+++ b/mlir/test/Dialect/SparseTensor/roundtrip.mlir
@@ -307,7 +307,7 @@ func.func @sparse_load_ins(%arg0: tensor<16x32xf64, #DenseMatrix>) -> tensor<16x
 #SparseVector = #sparse_tensor.encoding<{map = (d0) -> (d0 : compressed)}>
 
 // CHECK-LABEL: func @sparse_insert(
-//  CHECK-SAME: %[[A:.*]]: tensor<128xf64, #sparse_tensor.encoding<{{.*}}>>,
+//  CHECK-SAME: %[[A:.*]]: tensor<128xf64, #sparse{{[0-9]*}}>,
 //  CHECK-SAME: %[[B:.*]]: index,
 //  CHECK-SAME: %[[C:.*]]: f64)
 //       CHECK: %[[T:.*]] = sparse_tensor.insert %[[C]] into %[[A]][%[[B]]] : tensor<128xf64, #{{.*}}>
@@ -362,7 +362,7 @@ func.func @sparse_push_back_n(%arg0: index, %arg1: memref<?xf64>, %arg2: f64, %a
 #SparseMatrix = #sparse_tensor.encoding<{map = (d0, d1) -> (d0 : compressed, d1 : compressed)}>
 
 // CHECK-LABEL: func @sparse_expansion(
-//  CHECK-SAME: %[[A:.*]]: tensor<8x8xf64, #sparse_tensor.encoding<{{.*}}>>)
+//  CHECK-SAME: %[[A:.*]]: tensor<8x8xf64, #sparse{{[0-9]*}}>)
 //       CHECK: %{{.*}}, %{{.*}}, %{{.*}}, %[[T:.*]] = sparse_tensor.expand %[[A]]
 //       CHECK: return %[[T]] : index
 func.func @sparse_expansion(%tensor: tensor<8x8xf64, #SparseMatrix>) -> index {
@@ -380,10 +380,10 @@ func.func @sparse_expansion(%tensor: tensor<8x8xf64, #SparseMatrix>) -> index {
 //  CHECK-SAME: %[[A1:.*1]]: memref<?xi1>,
 //  CHECK-SAME: %[[A2:.*2]]: memref<?xindex>,
 //  CHECK-SAME: %[[A3:.*3]]: index
-//  CHECK-SAME: %[[A4:.*4]]: tensor<8x8xf64, #sparse_tensor.encoding<{{.*}}>>,
+//  CHECK-SAME: %[[A4:.*4]]: tensor<8x8xf64, #sparse{{[0-9]*}}>,
 //  CHECK-SAME: %[[A5:.*5]]: index)
 //       CHECK: %[[T:.*]] = sparse_tensor.compress %[[A0]], %[[A1]], %[[A2]], %[[A3]] into %[[A4]][%[[A5]]
-//       CHECK: return %[[T]] : tensor<8x8xf64, #sparse_tensor.encoding<{{.*}}>>
+//       CHECK: return %[[T]] : tensor<8x8xf64, #sparse{{[0-9]*}}>
 func.func @sparse_compression(%values: memref<?xf64>,
                               %filled: memref<?xi1>,
                               %added: memref<?xindex>,
@@ -400,9 +400,9 @@ func.func @sparse_compression(%values: memref<?xf64>,
 #SparseMatrix = #sparse_tensor.encoding<{map = (d0, d1) -> (d0 : compressed, d1 : compressed)}>
 
 // CHECK-LABEL: func @sparse_out(
-//  CHECK-SAME: %[[A:.*]]: tensor<?x?xf64, #sparse_tensor.encoding<{{.*}}>>,
+//  CHECK-SAME: %[[A:.*]]: tensor<?x?xf64, #sparse{{[0-9]*}}>,
 //  CHECK-SAME: %[[B:.*]]: !llvm.ptr)
-//       CHECK: sparse_tensor.out %[[A]], %[[B]] : tensor<?x?xf64, #sparse_tensor.encoding<{{.*}}>>, !llvm.ptr
+//       CHECK: sparse_tensor.out %[[A]], %[[B]] : tensor<?x?xf64, #sparse{{[0-9]*}}>, !llvm.ptr
 //       CHECK: return
 func.func @sparse_out(%arg0: tensor<?x?xf64, #SparseMatrix>, %arg1: !llvm.ptr) {
   sparse_tensor.out %arg0, %arg1 : tensor<?x?xf64, #SparseMatrix>, !llvm.ptr
@@ -590,7 +590,7 @@ func.func @sparse_tensor_foreach(%arg0: tensor<2x4xf64, #DCSR>) -> () {
 #DCSR = #sparse_tensor.encoding<{map = (d0, d1) -> (d0 : compressed, d1 : compressed)}>
 
 // CHECK-LABEL: func @sparse_tensor_foreach(
-//  CHECK-SAME:   %[[A0:.*]]: tensor<2x4xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+//  CHECK-SAME:   %[[A0:.*]]: tensor<2x4xf64, #sparse{{[0-9]*}}>,
 //  CHECK-SAME:   %[[A1:.*]]: f32
 //  CHECK-NEXT:   %[[RET:.*]] = sparse_tensor.foreach in %[[A0]] init(%[[A1]])
 //  CHECK-NEXT:    ^bb0(%[[TMP_1:.*]]: index, %[[TMP_2:.*]]: index, %[[TMP_v:.*]]: f64, %[[TMP_r:.*]]: f32)
@@ -640,7 +640,7 @@ func.func @sparse_sort_coo_stable(%arg0: index, %arg1: memref<?xi64>, %arg2: mem
 #OrderedCOO = #sparse_tensor.encoding<{map = (d0, d1) -> (d0 : compressed(nonunique), d1 : singleton)}>
 
 // CHECK-LABEL: func @sparse_reorder_coo(
-//  CHECK-SAME: %[[A:.*]]: tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+//  CHECK-SAME: %[[A:.*]]: tensor<?x?xf32, #sparse{{[0-9]*}}>
 //       CHECK: %[[R:.*]] = sparse_tensor.reorder_coo quick_sort %[[A]]
 //       CHECK: return %[[R]]
 func.func @sparse_reorder_coo(%arg0 : tensor<?x?xf32, #UnorderedCOO>) -> tensor<?x?xf32, #OrderedCOO> {
diff --git a/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir b/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir
index db844312d02b..20702bb98502 100644
--- a/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir
+++ b/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir
@@ -1,8 +1,11 @@
-// RUN: mlir-opt %s -split-input-file | mlir-opt | FileCheck %s
+// RUN: mlir-opt %s -split-input-file | mlir-opt -split-input-file | FileCheck %s
 
+#SV  = #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }>
+
+// CHECK: #[[$SV:.*]] = #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }>
 // CHECK-LABEL: func private @sparse_1d_tensor(
-// CHECK-SAME: tensor<32xf64, #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }>>)
-func.func private @sparse_1d_tensor(tensor<32xf64, #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }>>)
+// CHECK-SAME: tensor<32xf64, #[[$SV]]>)
+func.func private @sparse_1d_tensor(tensor<32xf64, #SV>)
 
 // -----
 
@@ -12,8 +15,9 @@ func.func private @sparse_1d_tensor(tensor<32xf64, #sparse_tensor.encoding<{ map
   crdWidth = 64
 }>
 
+// CHECK: #[[$CSR:.*]] = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : dense, d1 : compressed), posWidth = 64, crdWidth = 64 }>
 // CHECK-LABEL: func private @sparse_csr(
-// CHECK-SAME: tensor<?x?xf32, #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : dense, d1 : compressed), posWidth = 64, crdWidth = 64 }>>)
+// CHECK-SAME: tensor<?x?xf32, #[[$CSR]]>)
 func.func private @sparse_csr(tensor<?x?xf32, #CSR>)
 
 // -----
@@ -22,8 +26,9 @@ func.func private @sparse_csr(tensor<?x?xf32, #CSR>)
   map = {l0, l1} (d0 = l0, d1 = l1) -> (l0 = d0 : dense, l1 = d1 : compressed)
 }>
 
+// CHECK: #[[$CSR_EXPLICIT:.*]] = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : dense, d1 : compressed) }>
 // CHECK-LABEL: func private @CSR_explicit(
-// CHECK-SAME: tensor<?x?xf64, #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : dense, d1 : compressed) }>>
+// CHECK-SAME: tensor<?x?xf64, #[[$CSR_EXPLICIT]]>
 func.func private @CSR_explicit(%arg0: tensor<?x?xf64, #CSR_explicit>) {
   return
 }
@@ -36,8 +41,9 @@ func.func private @CSR_explicit(%arg0: tensor<?x?xf64, #CSR_explicit>) {
   crdWidth = 0
 }>
 
+// CHECK-DAG: #[[$CSC:.*]] = #sparse_tensor.encoding<{ map = (d0, d1) -> (d1 : dense, d0 : compressed) }>
 // CHECK-LABEL: func private @sparse_csc(
-// CHECK-SAME: tensor<?x?xf32, #sparse_tensor.encoding<{ map = (d0, d1) -> (d1 : dense, d0 : compressed) }>>)
+// CHECK-SAME: tensor<?x?xf32, #[[$CSC]]>)
 func.func private @sparse_csc(tensor<?x?xf32, #CSC>)
 
 // -----
@@ -48,8 +54,9 @@ func.func private @sparse_csc(tensor<?x?xf32, #CSC>)
   crdWidth = 64
 }>
 
+// CHECK-DAG: #[[$DCSC:.*]] = #sparse_tensor.encoding<{ map = (d0, d1) -> (d1 : compressed, d0 : compressed), crdWidth = 64 }>
 // CHECK-LABEL: func private @sparse_dcsc(
-// CHECK-SAME: tensor<?x?xf32, #sparse_tensor.encoding<{ map = (d0, d1) -> (d1 : compressed, d0 : compressed), crdWidth = 64 }>>)
+// CHECK-SAME: tensor<?x?xf32, #[[$DCSC]]>)
 func.func private @sparse_dcsc(tensor<?x?xf32, #DCSC>)
 
 // -----
@@ -58,8 +65,9 @@ func.func private @sparse_dcsc(tensor<?x?xf32, #DCSC>)
   map = (d0, d1) -> (d0 : compressed(nonunique, nonordered), d1 : singleton(nonordered))
 }>
 
+// CHECK-DAG: #[[$COO:.*]] = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : compressed(nonunique, nonordered), d1 : singleton(nonordered)) }>
 // CHECK-LABEL: func private @sparse_coo(
-// CHECK-SAME: tensor<?x?xf32, #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : compressed(nonunique, nonordered), d1 : singleton(nonordered)) }>>)
+// CHECK-SAME: tensor<?x?xf32, #[[$COO]]>)
 func.func private @sparse_coo(tensor<?x?xf32, #COO>)
 
 // -----
@@ -68,8 +76,9 @@ func.func private @sparse_coo(tensor<?x?xf32, #COO>)
   map = (d0, d1, d2) -> (d0 : dense, d1 : loose_compressed(nonunique), d2 : singleton)
 }>
 
+// CHECK-DAG: #[[$BCOO:.*]] = #sparse_tensor.encoding<{ map = (d0, d1, d2) -> (d0 : dense, d1 : loose_compressed(nonunique), d2 : singleton) }>
 // CHECK-LABEL: func private @sparse_bcoo(
-// CHECK-SAME: tensor<?x?x?xf32, #sparse_tensor.encoding<{ map = (d0, d1, d2) -> (d0 : dense, d1 : loose_compressed(nonunique), d2 : singleton) }>>)
+// CHECK-SAME: tensor<?x?x?xf32, #[[$BCOO]]>)
 func.func private @sparse_bcoo(tensor<?x?x?xf32, #BCOO>)
 
 // -----
@@ -78,8 +87,9 @@ func.func private @sparse_bcoo(tensor<?x?x?xf32, #BCOO>)
   map = (d0, d1) -> (d0 : compressed(nonunique), d1 : singleton)
 }>
 
+// CHECK-DAG: #[[$SortedCOO:.*]] = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : compressed(nonunique), d1 : singleton) }>
 // CHECK-LABEL: func private @sparse_sorted_coo(
-// CHECK-SAME: tensor<10x10xf64, #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : compressed(nonunique), d1 : singleton) }>>)
+// CHECK-SAME: tensor<10x10xf64, #[[$SortedCOO]]>)
 func.func private @sparse_sorted_coo(tensor<10x10xf64, #SortedCOO>)
 
 // -----
@@ -93,8 +103,9 @@ func.func private @sparse_sorted_coo(tensor<10x10xf64, #SortedCOO>)
       )
 }>
 
+// CHECK-DAG: #[[$BSR:.*]] = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 floordiv 2 : dense, d1 floordiv 3 : compressed, d0 mod 2 : dense, d1 mod 3 : dense) }>
 // CHECK-LABEL: func private @sparse_bsr(
-// CHECK-SAME: tensor<10x60xf64, #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 floordiv 2 : dense, d1 floordiv 3 : compressed, d0 mod 2 : dense, d1 mod 3 : dense) }>>
+// CHECK-SAME: tensor<10x60xf64, #[[$BSR]]>
 func.func private @sparse_bsr(tensor<10x60xf64, #BSR>)
 
 
@@ -104,8 +115,9 @@ func.func private @sparse_bsr(tensor<10x60xf64, #BSR>)
   map = [s0](d0, d1) -> (d0 * (s0 * 4) : dense, d0 : dense, d1 : compressed)
 }>
 
+// CHECK-DAG: #[[$ELL:.*]] = #sparse_tensor.encoding<{ map = [s0](d0, d1) -> (d0 * (s0 * 4) : dense, d0 : dense, d1 : compressed) }>
 // CHECK-LABEL: func private @sparse_ell(
-// CHECK-SAME: tensor<?x?xf64, #sparse_tensor.encoding<{ map = [s0](d0, d1) -> (d0 * (s0 * 4) : dense, d0 : dense, d1 : compressed) }>>
+// CHECK-SAME: tensor<?x?xf64, #[[$ELL]]>
 func.func private @sparse_ell(tensor<?x?xf64, #ELL>)
 
 // -----
@@ -114,8 +126,9 @@ func.func private @sparse_ell(tensor<?x?xf64, #ELL>)
   map = (d0 : #sparse_tensor<slice(1, 4, 1)>, d1 : #sparse_tensor<slice(1, 4, 2)>) -> (d0 : dense, d1 : compressed)
 }>
 
+// CHECK-DAG: #[[$CSR_SLICE:.*]] = #sparse_tensor.encoding<{ map = (d0 : #sparse_tensor<slice(1, 4, 1)>, d1 : #sparse_tensor<slice(1, 4, 2)>) -> (d0 : dense, d1 : compressed) }>
 // CHECK-LABEL: func private @sparse_slice(
-// CHECK-SAME: tensor<?x?xf64, #sparse_tensor.encoding<{ map = (d0 : #sparse_tensor<slice(1, 4, 1)>, d1 : #sparse_tensor<slice(1, 4, 2)>) -> (d0 : dense, d1 : compressed) }>>
+// CHECK-SAME: tensor<?x?xf64, #[[$CSR_SLICE]]>
 func.func private @sparse_slice(tensor<?x?xf64, #CSR_SLICE>)
 
 // -----
@@ -124,8 +137,9 @@ func.func private @sparse_slice(tensor<?x?xf64, #CSR_SLICE>)
   map = (d0 : #sparse_tensor<slice(1, ?, 1)>, d1 : #sparse_tensor<slice(?, 4, 2)>) -> (d0 : dense, d1 : compressed)
 }>
 
+// CHECK-DAG: #[[$CSR_SLICE:.*]] = #sparse_tensor.encoding<{ map = (d0 : #sparse_tensor<slice(1, ?, 1)>, d1 : #sparse_tensor<slice(?, 4, 2)>) -> (d0 : dense, d1 : compressed) }>
 // CHECK-LABEL: func private @sparse_slice(
-// CHECK-SAME: tensor<?x?xf64, #sparse_tensor.encoding<{ map = (d0 : #sparse_tensor<slice(1, ?, 1)>, d1 : #sparse_tensor<slice(?, 4, 2)>) -> (d0 : dense, d1 : compressed) }>>
+// CHECK-SAME: tensor<?x?xf64, #[[$CSR_SLICE]]>
 func.func private @sparse_slice(tensor<?x?xf64, #CSR_SLICE>)
 
 // -----
@@ -139,8 +153,9 @@ func.func private @sparse_slice(tensor<?x?xf64, #CSR_SLICE>)
   )
 }>
 
+// CHECK-DAG: #[[$BSR:.*]] = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 floordiv 2 : dense, d1 floordiv 3 : compressed, d0 mod 2 : dense, d1 mod 3 : dense) }>
 // CHECK-LABEL: func private @BSR(
-// CHECK-SAME: tensor<?x?xf64, #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 floordiv 2 : dense, d1 floordiv 3 : compressed, d0 mod 2 : dense, d1 mod 3 : dense) }>>
+// CHECK-SAME: tensor<?x?xf64, #[[$BSR]]>
 func.func private @BSR(%arg0: tensor<?x?xf64, #BSR>) {
   return
 }
@@ -158,8 +173,9 @@ func.func private @BSR(%arg0: tensor<?x?xf64, #BSR>) {
   )
 }>
 
+// CHECK-DAG: #[[$BCSR:.*]] = #sparse_tensor.encoding<{ map = (d0, d1, d2) -> (d0 floordiv 2 : dense, d1 floordiv 3 : dense, d2 floordiv 4 : compressed, d0 mod 2 : dense, d1 mod 3 : dense, d2 mod 4 : dense) }>
 // CHECK-LABEL: func private @BCSR(
-// CHECK-SAME: tensor<?x?x?xf64, #sparse_tensor.encoding<{ map = (d0, d1, d2) -> (d0 floordiv 2 : dense, d1 floordiv 3 : dense, d2 floordiv 4 : compressed, d0 mod 2 : dense, d1 mod 3 : dense, d2 mod 4 : dense) }>>
+// CHECK-SAME: tensor<?x?x?xf64, #[[$BCSR]]>
 func.func private @BCSR(%arg0: tensor<?x?x?xf64, #BCSR>) {
   return
 }
@@ -178,8 +194,9 @@ func.func private @BCSR(%arg0: tensor<?x?x?xf64, #BCSR>) {
   )
 }>
 
+// CHECK-DAG: #[[$BSR_explicit:.*]] = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 floordiv 2 : dense, d1 floordiv 3 : compressed, d0 mod 2 : dense, d1 mod 3 : dense) }>
 // CHECK-LABEL: func private @BSR_explicit(
-// CHECK-SAME: tensor<?x?xf64, #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 floordiv 2 : dense, d1 floordiv 3 : compressed, d0 mod 2 : dense, d1 mod 3 : dense) }>>
+// CHECK-SAME: tensor<?x?xf64, #[[$BSR_explicit]]>
 func.func private @BSR_explicit(%arg0: tensor<?x?xf64, #BSR_explicit>) {
   return
 }
@@ -195,8 +212,9 @@ func.func private @BSR_explicit(%arg0: tensor<?x?xf64, #BSR_explicit>) {
   crdWidth = 8  // we would even like just 2-bits
 }>
 
+// CHECK-DAG: #[[$NV_24:.*]] = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : dense, d1 floordiv 4 : dense, d1 mod 4 : block2_4), crdWidth = 8 }>
 // CHECK-LABEL: func private @NV_24(
-// CHECK-SAME: tensor<?x?xf64, #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : dense, d1 floordiv 4 : dense, d1 mod 4 : block2_4), crdWidth = 8 }>>
+// CHECK-SAME: tensor<?x?xf64, #[[$NV_24]]>
 func.func private @NV_24(%arg0: tensor<?x?xf64, #NV_24>) {
   return
 }
@@ -212,8 +230,9 @@ func.func private @NV_24(%arg0: tensor<?x?xf64, #NV_24>) {
   )
 }>
 
+// CHECK-DAG: #[[$NV_24:.*]] = #sparse_tensor.encoding<{ map = (d0, d1, d2) -> (d0 : dense, d1 : dense, d2 floordiv 4 : dense, d2 mod 4 : block2_4) }>
 // CHECK-LABEL: func private @NV_24(
-// CHECK-SAME: tensor<?x?x?xf64, #sparse_tensor.encoding<{ map = (d0, d1, d2) -> (d0 : dense, d1 : dense, d2 floordiv 4 : dense, d2 mod 4 : block2_4) }>>
+// CHECK-SAME: tensor<?x?x?xf64, #[[$NV_24]]>
 func.func private @NV_24(%arg0: tensor<?x?x?xf64, #NV_24>) {
   return
 }
@@ -229,8 +248,9 @@ func.func private @NV_24(%arg0: tensor<?x?x?xf64, #NV_24>) {
   )
 }>
 
+// CHECK-DAG: #[[$NV_24:.*]] = #sparse_tensor.encoding<{ map = (d0, d1, d2) -> (d0 : dense, d2 floordiv 4 : dense, d1 : dense, d2 mod 4 : block2_4) }>
 // CHECK-LABEL: func private @NV_24(
-// CHECK-SAME: tensor<?x?x?xf64, #sparse_tensor.encoding<{ map = (d0, d1, d2) -> (d0 : dense, d2 floordiv 4 : dense, d1 : dense, d2 mod 4 : block2_4) }>>
+// CHECK-SAME: tensor<?x?x?xf64, #[[$NV_24]]>
 func.func private @NV_24(%arg0: tensor<?x?x?xf64, #NV_24>) {
   return
 }
diff --git a/mlir/test/Dialect/SparseTensor/semi_ring.mlir b/mlir/test/Dialect/SparseTensor/semi_ring.mlir
index 5a936bfa1e59..814acd673622 100644
--- a/mlir/test/Dialect/SparseTensor/semi_ring.mlir
+++ b/mlir/test/Dialect/SparseTensor/semi_ring.mlir
@@ -19,13 +19,13 @@ module {
   // operation, and the values can be change "in-place".
   //
   // CHECK-LABEL: func.func @add_only_where_nonzero(
-  // CHECK-SAME:    %[[VAL_0:.*]]: tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> {
+  // CHECK-SAME:    %[[VAL_0:.*]]: tensor<8x8xf64, #sparse{{[0-9]*}}>) -> tensor<8x8xf64, #sparse{{[0-9]*}}> {
   // CHECK-DAG:     %[[VAL_1:.*]] = arith.constant 8 : index
   // CHECK-DAG:     %[[VAL_2:.*]] = arith.constant 0 : index
   // CHECK-DAG:     %[[VAL_3:.*]] = arith.constant 1 : index
   // CHECK-DAG:     %[[VAL_4:.*]] = arith.constant 2.000000e+00 : f64
-  // CHECK-DAG:     %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-  // CHECK-DAG:     %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+  // CHECK-DAG:     %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+  // CHECK-DAG:     %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xf64>
   // CHECK:         scf.for %[[VAL_7:.*]] = %[[VAL_2]] to %[[VAL_1]] step %[[VAL_3]] {
   // CHECK:           %[[VAL_8:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_7]]] : memref<?xindex>
   // CHECK:           %[[VAL_9:.*]] = arith.addi %[[VAL_7]], %[[VAL_3]] : index
@@ -36,8 +36,8 @@ module {
   // CHECK:             memref.store %[[VAL_13]], %[[VAL_6]]{{\[}}%[[VAL_11]]] : memref<?xf64>
   // CHECK:           } {"Emitted from" = "linalg.generic"}
   // CHECK:         } {"Emitted from" = "linalg.generic"}
-  // CHECK:         %[[VAL_14:.*]] = sparse_tensor.load %[[VAL_0]] : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>
-  // CHECK:         return %[[VAL_14]] : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>
+  // CHECK:         %[[VAL_14:.*]] = sparse_tensor.load %[[VAL_0]] : tensor<8x8xf64, #sparse{{[0-9]*}}>
+  // CHECK:         return %[[VAL_14]] : tensor<8x8xf64, #sparse{{[0-9]*}}>
   // CHECK:       }
   func.func @add_only_where_nonzero(%argA: tensor<8x8xf64, #SM>) -> tensor<8x8xf64, #SM> {
     %c = arith.constant 2.0 : f64
diff --git a/mlir/test/Dialect/SparseTensor/sorted_coo.mlir b/mlir/test/Dialect/SparseTensor/sorted_coo.mlir
index 1a8af35358bf..91e7920b3a90 100644
--- a/mlir/test/Dialect/SparseTensor/sorted_coo.mlir
+++ b/mlir/test/Dialect/SparseTensor/sorted_coo.mlir
@@ -37,14 +37,14 @@
 //
 
 // CHECK-LABEL:   func.func @sparse_scale(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> {
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?x?xf32, #sparse{{[0-9]*}}>) -> tensor<?x?xf32, #sparse{{[0-9]*}}> {
 // CHECK-DAG:       %[[VAL_1:.*]] = arith.constant false
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 2.000000e+00 : f32
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex, strided<[?], offset: ?>>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex, strided<[?], offset: ?>>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_8:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = scf.while (%[[VAL_11:.*]] = %[[VAL_8]]) : (index) -> index {
@@ -75,8 +75,8 @@
 // CHECK:             } {"Emitted from" = "linalg.generic"}
 // CHECK:             scf.yield %[[VAL_28:.*]] : index
 // CHECK:           } attributes {"Emitted from" = "linalg.generic"}
-// CHECK:           %[[VAL_29:.*]] = sparse_tensor.load %[[VAL_0]] : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           return %[[VAL_29]] : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_29:.*]] = sparse_tensor.load %[[VAL_0]] : tensor<?x?xf32, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_29]] : tensor<?x?xf32, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @sparse_scale(%argx: tensor<?x?xf32, #SortedCOO>) -> tensor<?x?xf32, #SortedCOO> {
   %c = arith.constant 2.0 : f32
@@ -90,16 +90,16 @@ func.func @sparse_scale(%argx: tensor<?x?xf32, #SortedCOO>) -> tensor<?x?xf32, #
 }
 
 // CHECK-LABEL:   func.func @matvec(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x64xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<64xf64>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32xf64>) -> tensor<32xf64> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant false
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex, strided<[?], offset: ?>>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex, strided<[?], offset: ?>>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x64xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x64xf64, #sparse{{[0-9]*}}> to memref<?xindex, strided<[?], offset: ?>>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x64xf64, #sparse{{[0-9]*}}> to memref<?xindex, strided<[?], offset: ?>>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x64xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:           %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf64>
 // CHECK:           %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
@@ -155,20 +155,20 @@ func.func @matvec(%arga: tensor<32x64xf64, #SortedCOO>,
 }
 
 // CHECK-LABEL:   func.func @mateltmul(
-// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>>, %[[VAL_1:.*1]]: tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<32x64xf64, #sparse{{[0-9]*}}>, %[[VAL_1:.*1]]: tensor<32x64xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*2]]: tensor<32x64xf64>) -> tensor<32x64xf64> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant false
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0.000000e+00 : f64
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex, strided<[?], offset: ?>>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex, strided<[?], offset: ?>>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex, strided<[?], offset: ?>>
-// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex, strided<[?], offset: ?>>
-// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x64xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x64xf64, #sparse{{[0-9]*}}> to memref<?xindex, strided<[?], offset: ?>>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x64xf64, #sparse{{[0-9]*}}> to memref<?xindex, strided<[?], offset: ?>>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x64xf64, #sparse{{[0-9]*}}> to memref<?xf64>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32x64xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32x64xf64, #sparse{{[0-9]*}}> to memref<?xindex, strided<[?], offset: ?>>
+// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x64xf64, #sparse{{[0-9]*}}> to memref<?xindex, strided<[?], offset: ?>>
+// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x64xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:           %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x64xf64>
 // CHECK:           linalg.fill ins(%[[VAL_4]] : f64) outs(%[[VAL_15]] : memref<32x64xf64>)
 // CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_5]]] : memref<?xindex>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_1d.mlir b/mlir/test/Dialect/SparseTensor/sparse_1d.mlir
index d29758e4fa97..8f96f3379487 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_1d.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_1d.mlir
@@ -14,13 +14,13 @@
 }
 
 // CHECK-LABEL:   func @add_d(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: f32,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32xf32>) -> tensor<32xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_2]]
 // CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_8]] : memref<32xf32>)
 // CHECK:           scf.for %[[VAL_9:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
@@ -43,14 +43,14 @@ func.func @add_d(%arga: tensor<32xf32, #DV>, %argb: f32, %argx: tensor<32xf32>)
 }
 
 // CHECK-LABEL:   func @add_d_init(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: f32) -> tensor<32xf32> {
 // CHECK:           %[[VAL_2:.*]] = arith.constant 32 : index
 // CHECK:           %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f32
 // CHECK:           %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK:           %[[VAL_5:.*]] = arith.constant 1 : index
 // CHECK:           %[[VAL_INITTENSOR:.*]] = tensor.empty() : tensor<32xf32>
-// CHECK:           %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK:           %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK:           %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_INITTENSOR]] : memref<32xf32>
 // CHECK:           linalg.fill ins(%[[VAL_3]] : f32) outs(%[[VAL_7]] : memref<32xf32>)
 // CHECK:           scf.for %[[VAL_8:.*]] = %[[VAL_4]] to %[[VAL_2]] step %[[VAL_5]] {
@@ -74,13 +74,13 @@ func.func @add_d_init(%arga: tensor<32xf32, #DV>, %argb: f32) -> tensor<32xf32>
 }
 
 // CHECK-LABEL:   func @mul_d(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: f32,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32xf32>) -> tensor<32xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_2]]
 // CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_8]] : memref<32xf32>)
 // CHECK:           scf.for %[[VAL_9:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
@@ -103,16 +103,16 @@ func.func @mul_d(%arga: tensor<32xf32, #DV>, %argb: f32, %argx: tensor<32xf32>)
 }
 
 // CHECK-LABEL:   func @add_s(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: f32,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32xf32>) -> tensor<32xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]]
@@ -158,13 +158,13 @@ func.func @add_s(%arga: tensor<32xf32, #SV>, %argb: f32, %argx: tensor<32xf32>)
 }
 
 // CHECK-LABEL:   func @repeated_add_s(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32xf32>) -> tensor<32xf32> {
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]]
 // CHECK-DAG:       %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_10:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -197,14 +197,14 @@ func.func @repeated_add_s(%arga: tensor<32xf32, #SV>, %argx: tensor<32xf32>) ->
 }
 
 // CHECK-LABEL:   func @mul_s(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: f32,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32xf32>) -> tensor<32xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]]
 // CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_9]] : memref<32xf32>)
 // CHECK-DAG:       %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -240,13 +240,13 @@ func.func @mul_s(%arga: tensor<32xf32, #SV>, %argb: f32, %argx: tensor<32xf32>)
 }
 
 // CHECK-LABEL:   func @add_dd(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32xf32>) -> tensor<32xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]]
 // CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_9]] : memref<32xf32>)
@@ -271,13 +271,13 @@ func.func @add_dd(%arga: tensor<32xf32, #DV>, %argb: tensor<32xf32>, %argx: tens
 }
 
 // CHECK-LABEL:   func @mul_dd(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32xf32>) -> tensor<32xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]]
 // CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_9]] : memref<32xf32>)
@@ -303,16 +303,16 @@ func.func @mul_dd(%arga: tensor<32xf32, #DV>, %argb: tensor<32xf32>, %argx: tens
 
 // CHECK-LABEL:   func @add_ds(
 // CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32>,
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<32xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32xf32>) -> tensor<32xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_0]] : memref<32xf32>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]]
 // CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32xf32>)
 // CHECK-DAG:       %[[VAL_13:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -362,14 +362,14 @@ func.func @add_ds(%arga: tensor<32xf32>, %argb: tensor<32xf32, #SV>, %argx: tens
 
 // CHECK-LABEL:   func @mul_ds(
 // CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32>,
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<32xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32xf32>) -> tensor<32xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = bufferization.to_memref %[[VAL_0]] : memref<32xf32>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]]
 // CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_10]] : memref<32xf32>)
 // CHECK-DAG:       %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -396,16 +396,16 @@ func.func @mul_ds(%arga: tensor<32xf32>, %argb: tensor<32xf32, #SV>, %argx: tens
 }
 
 // CHECK-LABEL:   func @add_sd(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32xf32>) -> tensor<32xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]]
 // CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32xf32>)
@@ -455,14 +455,14 @@ func.func @add_sd(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32>, %argx: tens
 }
 
 // CHECK-LABEL:   func @mul_sd(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32xf32>) -> tensor<32xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]]
 // CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_10]] : memref<32xf32>)
@@ -490,16 +490,16 @@ func.func @mul_sd(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32>, %argx: tens
 }
 
 // CHECK-LABEL:   func @add_ss(
-// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>, %[[VAL_1:.*1]]: tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<32xf32, #sparse{{[0-9]*}}>, %[[VAL_1:.*1]]: tensor<32xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*2]]: tensor<32xf32>) -> tensor<32xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]]
 // CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32xf32>)
 // CHECK-DAG:       %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -572,16 +572,16 @@ func.func @add_ss(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32, #SV>, %argx:
 }
 
 // CHECK-LABEL:   func @mul_ss(
-// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>, %[[VAL_1:.*1]]: tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<32xf32, #sparse{{[0-9]*}}>, %[[VAL_1:.*1]]: tensor<32xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*2]]: tensor<32xf32>) -> tensor<32xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]]
 // CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32xf32>)
 // CHECK-DAG:       %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -632,17 +632,17 @@ func.func @mul_ss(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32, #SV>, %argx:
 }
 
 // CHECK-LABEL:   func @two_way_inv(
-// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>>, %[[VAL_1:.*1]]: tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<16xf32, #sparse{{[0-9]*}}>, %[[VAL_1:.*1]]: tensor<16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*2]]: f32,
 // CHECK-SAME:      %[[VAL_3:.*3]]: tensor<16xf32>) -> tensor<16xf32> {
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_3]]
 // CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_13]] : memref<16xf32>)
 // CHECK-DAG:       %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -724,17 +724,17 @@ func.func @two_way_inv(%arga: tensor<16xf32, #SV>, %argb: tensor<16xf32, #SV>, %
 }
 
 // CHECK-LABEL:   func @two_way_inv_alt(
-// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>>, %[[VAL_1:.*1]]: tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<16xf32, #sparse{{[0-9]*}}>, %[[VAL_1:.*1]]: tensor<16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*2]]: f32,
 // CHECK-SAME:      %[[VAL_3:.*3]]: tensor<16xf32>) -> tensor<16xf32> {
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_3]]
 // CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_13]] : memref<16xf32>)
 // CHECK-DAG:       %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -824,12 +824,12 @@ func.func @two_way_inv_alt(%arga: tensor<16xf32, #SV>,
 }
 
 // CHECK-LABEL:   func @sum_reduction(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<f32>) -> tensor<f32> {
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_1]] : memref<f32>
 // CHECK-DAG:       %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -865,16 +865,16 @@ func.func @sum_reduction(%arga: tensor<?xf32, #SV>, %argx: tensor<f32>) -> tenso
 }
 
 // CHECK-LABEL:   func @sum_reduction_ss(
-// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>>, %[[VAL_1:.*1]]: tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<16xf32, #sparse{{[0-9]*}}>, %[[VAL_1:.*1]]: tensor<16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*2]]: tensor<f32>) -> tensor<f32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<f32>
 // CHECK-DAG:           %[[VAL_13:.*]] = memref.load %[[VAL_11]][] : memref<f32>
 // CHECK-DAG:           %[[VAL_14:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -970,17 +970,17 @@ func.func @sum_reduction_ss(%arga: tensor<16xf32, #SV>,
 }
 
 // CHECK-LABEL:   func @sum_reduction_inv(
-// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>>, %[[VAL_1:.*1]]: tensor<f32>, %[[VAL_2:.*2]]: tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<16xf32, #sparse{{[0-9]*}}>, %[[VAL_1:.*1]]: tensor<f32>, %[[VAL_2:.*2]]: tensor<16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_3:.*3]]: tensor<f32>) -> tensor<f32> {
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<f32>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_2]] {level = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_2]] {level = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_2]] {level = 0 : index} : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_2]] {level = 0 : index} : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_3]] : memref<f32>
 // CHECK-DAG:       %[[VAL_15:.*]] = memref.load %[[VAL_13]][] : memref<f32>
 // CHECK-DAG:       %[[VAL_16:.*]] = memref.load %[[VAL_9]][] : memref<f32>
@@ -1084,19 +1084,19 @@ func.func @sum_reduction_inv(%arga: tensor<16xf32, #SV>,
 
 // CHECK-LABEL:   func @four_tensors_op(
 // CHECK-SAME:      %[[VAL_0:.*0]]: tensor<?xf64>,
-// CHECK-SAME:      %[[VAL_1:.*1]]: tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>>, %[[VAL_2:.*2]]: tensor<?xf64>, %[[VAL_3:.*3]]: tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_1:.*1]]: tensor<?xf64, #sparse{{[0-9]*}}>, %[[VAL_2:.*2]]: tensor<?xf64>, %[[VAL_3:.*3]]: tensor<?xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_4:.*]]: tensor<?xf64>) -> tensor<?xf64> {
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : memref<?xf64>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<?xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<?xf64>
-// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.positions %[[VAL_3]] {level = 0 : index} : tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.coordinates %[[VAL_3]] {level = 0 : index} : tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_15:.*]] = sparse_tensor.values %[[VAL_3]] : tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.positions %[[VAL_3]] {level = 0 : index} : tensor<?xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.coordinates %[[VAL_3]] {level = 0 : index} : tensor<?xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_15:.*]] = sparse_tensor.values %[[VAL_3]] : tensor<?xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK-DAG:       %[[VAL_16:.*]] = tensor.dim %[[VAL_0]], %[[VAL_5]] : tensor<?xf64>
 // CHECK-DAG:       %[[VAL_18:.*]] = bufferization.to_memref %[[VAL_4]]
 // CHECK-DAG:       linalg.fill ins(%{{.*}} : f64) outs(%[[VAL_18]] : memref<?xf64>)
@@ -1259,19 +1259,19 @@ func.func @four_tensors_op(%arga: tensor<?xf64>,
 }
 
 // CHECK-LABEL:   func @red3s(
-// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>>, %[[VAL_1:.*1]]: tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>>, %[[VAL_2:.*2]]: tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<?xf64, #sparse{{[0-9]*}}>, %[[VAL_1:.*1]]: tensor<?xf64, #sparse{{[0-9]*}}>, %[[VAL_2:.*2]]: tensor<?xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_3:.*3]]: tensor<f64>) -> tensor<f64> {
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_2]] {level = 0 : index} : tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_2]] {level = 0 : index} : tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<?xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<?xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?xf64, #sparse{{[0-9]*}}> to memref<?xf64>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<?xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xf64, #sparse{{[0-9]*}}> to memref<?xf64>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_2]] {level = 0 : index} : tensor<?xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_2]] {level = 0 : index} : tensor<?xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<?xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK-DAG:       %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_3]] : memref<f64>
 // CHECK-DAG:       %[[VAL_17:.*]] = memref.load %[[VAL_15]][] : memref<f64>
 // CHECK-DAG:       %[[VAL_18:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_2d.mlir b/mlir/test/Dialect/SparseTensor/sparse_2d.mlir
index 22681daa9414..57ae18391daf 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_2d.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_2d.mlir
@@ -17,14 +17,14 @@
 }
 
 // CHECK-LABEL:   func @add_dd(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16xf32>) -> tensor<32x16xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 16 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
 // CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_10]] : memref<32x16xf32>)
@@ -53,7 +53,7 @@ func.func @add_dd(%arga: tensor<32x16xf32, #Tdd>, %argb: tensor<32x16xf32>, %arg
 }
 
 // CHECK-LABEL:   func.func @cmp_dd(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16xi1>) -> tensor<32x16xi1> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
@@ -61,7 +61,7 @@ func.func @add_dd(%arga: tensor<32x16xf32, #Tdd>, %argb: tensor<32x16xf32>, %arg
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant false
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xi1>
 // CHECK:           linalg.fill ins(%[[VAL_5]] : i1) outs(%[[VAL_10]] : memref<32x16xi1>)
@@ -90,14 +90,14 @@ func.func @cmp_dd(%arga: tensor<32x16xf32, #Tdd>, %argb: tensor<32x16xf32>, %arg
 }
 
 // CHECK-LABEL:   func @mul_dd(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16xf32>) -> tensor<32x16xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 16 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
 // CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_10]] : memref<32x16xf32>)
@@ -126,7 +126,7 @@ func.func @mul_dd(%arga: tensor<32x16xf32, #Tdd>, %argb: tensor<32x16xf32>, %arg
 }
 
 // CHECK-LABEL:   func @add_ds(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16xf32>) -> tensor<32x16xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
@@ -134,9 +134,9 @@ func.func @mul_dd(%arga: tensor<32x16xf32, #Tdd>, %argb: tensor<32x16xf32>, %arg
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
 // CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_13]] : memref<32x16xf32>)
@@ -189,7 +189,7 @@ func.func @add_ds(%arga: tensor<32x16xf32, #Tds>, %argb: tensor<32x16xf32>, %arg
 }
 
 // CHECK-LABEL:   func.func @cmp_ds(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16xi1>) -> tensor<32x16xi1> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
@@ -199,9 +199,9 @@ func.func @add_ds(%arga: tensor<32x16xf32, #Tds>, %argb: tensor<32x16xf32>, %arg
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_8:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f32
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK-DAG:       %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xi1>
 // CHECK:           linalg.fill ins(%[[VAL_5]] : i1) outs(%[[VAL_14]] : memref<32x16xi1>)
@@ -256,15 +256,15 @@ func.func @cmp_ds(%arga: tensor<32x16xf32, #Tds>, %argb: tensor<32x16xf32>, %arg
 }
 
 // CHECK-LABEL:   func @mul_ds(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16xf32>) -> tensor<32x16xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
 // CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_11]] : memref<32x16xf32>)
@@ -295,7 +295,7 @@ func.func @mul_ds(%arga: tensor<32x16xf32, #Tds>, %argb: tensor<32x16xf32>, %arg
 }
 
 // CHECK-LABEL:   func @add_sd(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16xf32>) -> tensor<32x16xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
@@ -303,9 +303,9 @@ func.func @mul_ds(%arga: tensor<32x16xf32, #Tds>, %argb: tensor<32x16xf32>, %arg
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
 // CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_13]] : memref<32x16xf32>)
@@ -363,7 +363,7 @@ func.func @add_sd(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32>, %arg
 }
 
 // CHECK-LABEL:   func.func @cmp_sd(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16xi1>) -> tensor<32x16xi1> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 16 : index
@@ -373,9 +373,9 @@ func.func @add_sd(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32>, %arg
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_8:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f32
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK:           %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK:           %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xi1>
 // CHECK:           linalg.fill ins(%[[VAL_5]] : i1) outs(%[[VAL_14]] : memref<32x16xi1>)
@@ -435,15 +435,15 @@ func.func @cmp_sd(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32>, %arg
 }
 
 // CHECK-LABEL:   func @mul_sd(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16xf32>) -> tensor<32x16xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 16 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
 // CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_11]] : memref<32x16xf32>)
@@ -475,7 +475,7 @@ func.func @mul_sd(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32>, %arg
 }
 
 // CHECK-LABEL:   func @add_ss(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16xf32>) -> tensor<32x16xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
@@ -483,11 +483,11 @@ func.func @mul_sd(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32>, %arg
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK-DAG:       %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
 // CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_15]] : memref<32x16xf32>)
@@ -569,7 +569,7 @@ func.func @add_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32>, %arg
 }
 
 // CHECK-LABEL:   func.func @cmp_ss(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16xi1>) -> tensor<32x16xi1> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
@@ -579,11 +579,11 @@ func.func @add_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32>, %arg
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_8:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f32
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK:           %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK:           %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xi1>
 // CHECK:           linalg.fill ins(%[[VAL_5]] : i1) outs(%[[VAL_16]] : memref<32x16xi1>)
@@ -669,16 +669,16 @@ func.func @cmp_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32>, %arg
 }
 
 // CHECK-LABEL:   func @mul_ss(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16xf32>) -> tensor<32x16xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
 // CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32x16xf32>)
@@ -712,20 +712,20 @@ func.func @mul_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32>, %arg
 }
 
 // CHECK-LABEL:   func @add_ss_ss(
-// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>, %[[VAL_1:.*1]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<32x16xf32, #sparse{{[0-9]*}}>, %[[VAL_1:.*1]]: tensor<32x16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*2]]: tensor<32x16xf32>) -> tensor<32x16xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
 // CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_16]] : memref<32x16xf32>)
 // CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -875,22 +875,22 @@ func.func @add_ss_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32, #T
 }
 
 // CHECK-LABEL:   func.func @cmp_ss_ss(
-// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>, %[[VAL_1:.*1]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<32x16xf32, #sparse{{[0-9]*}}>, %[[VAL_1:.*1]]: tensor<32x16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16xi1>) -> tensor<32x16xi1> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant false
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 0.000000e+00 : f32
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_15:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_16:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_15:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_16:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:           %[[VAL_17:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xi1>
 // CHECK:           linalg.fill ins(%[[VAL_3]] : i1) outs(%[[VAL_17]] : memref<32x16xi1>)
 // CHECK:           %[[VAL_18:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -1051,19 +1051,19 @@ func.func @cmp_ss_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32, #T
   map = (d0, d1) -> (d0 : dense, d1 : loose_compressed)
 }>
 // CHECK-LABEL:   func.func @sub_ss_batched(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<2x3xf64, #{{.*}}>>,
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<2x3xf64, #{{.*}}>>) -> tensor<2x3xf64, #{{.*}}>> {
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<2x3xf64, #sparse{{[0-9]*}}>,
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<2x3xf64, #sparse{{[0-9]*}}>) -> tensor<2x3xf64, #sparse{{[0-9]*}}> {
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 2 : index
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_5:.*]] = tensor.empty() : tensor<2x3xf64, #{{.*}}>>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<2x3xf64, #{{.*}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<2x3xf64, #{{.*}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<2x3xf64, #{{.*}}>> to memref<?xf64>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<2x3xf64, #{{.*}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<2x3xf64, #{{.*}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<2x3xf64, #{{.*}}>> to memref<?xf64>
-// CHECK:           %[[VAL_12:.*]] = scf.for %[[VAL_13:.*]] = %[[VAL_3]] to %[[VAL_2]] step %[[VAL_4]] iter_args(%[[VAL_14:.*]] = %[[VAL_5]]) -> (tensor<2x3xf64, #{{.*}}>>) {
+// CHECK-DAG:       %[[VAL_5:.*]] = tensor.empty() : tensor<2x3xf64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<2x3xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<2x3xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<2x3xf64, #sparse{{[0-9]*}}> to memref<?xf64>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<2x3xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<2x3xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<2x3xf64, #sparse{{[0-9]*}}> to memref<?xf64>
+// CHECK:           %[[VAL_12:.*]] = scf.for %[[VAL_13:.*]] = %[[VAL_3]] to %[[VAL_2]] step %[[VAL_4]] iter_args(%[[VAL_14:.*]] = %[[VAL_5]]) -> (tensor<2x3xf64, #sparse{{[0-9]*}}>) {
 // CHECK:             %[[VAL_15:.*]] = arith.muli %[[VAL_13]], %[[VAL_2]] : index
 // CHECK:             %[[VAL_16:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_15]]] : memref<?xindex>
 // CHECK:             %[[VAL_17:.*]] = arith.addi %[[VAL_15]], %[[VAL_4]] : index
@@ -1076,9 +1076,9 @@ func.func @cmp_ss_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32, #T
 // CHECK:               %[[VAL_27:.*]] = arith.cmpi ult, %[[VAL_24]], %[[VAL_18]] : index
 // CHECK:               %[[VAL_28:.*]] = arith.cmpi ult, %[[VAL_25]], %[[VAL_22]] : index
 // CHECK:               %[[VAL_29:.*]] = arith.andi %[[VAL_27]], %[[VAL_28]] : i1
-// CHECK:               scf.condition(%[[VAL_29]]) %[[VAL_24]], %[[VAL_25]], %[[VAL_26]] : index, index, tensor<2x3xf64, #{{.*}}>>
+// CHECK:               scf.condition(%[[VAL_29]]) %[[VAL_24]], %[[VAL_25]], %[[VAL_26]] : index, index, tensor<2x3xf64, #sparse{{[0-9]*}}>
 // CHECK:             } do {
-// CHECK:             ^bb0(%[[VAL_30:.*]]: index, %[[VAL_31:.*]]: index, %[[VAL_32:.*]]: tensor<2x3xf64, #{{.*}}>>):
+// CHECK:             ^bb0(%[[VAL_30:.*]]: index, %[[VAL_31:.*]]: index, %[[VAL_32:.*]]: tensor<2x3xf64, #sparse{{[0-9]*}}>):
 // CHECK:               %[[VAL_33:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_30]]] : memref<?xindex>
 // CHECK:               %[[VAL_34:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_31]]] : memref<?xindex>
 // CHECK:               %[[VAL_35:.*]] = arith.cmpi ult, %[[VAL_34]], %[[VAL_33]] : index
@@ -1086,31 +1086,31 @@ func.func @cmp_ss_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32, #T
 // CHECK:               %[[VAL_37:.*]] = arith.cmpi eq, %[[VAL_33]], %[[VAL_36]] : index
 // CHECK:               %[[VAL_38:.*]] = arith.cmpi eq, %[[VAL_34]], %[[VAL_36]] : index
 // CHECK:               %[[VAL_39:.*]] = arith.andi %[[VAL_37]], %[[VAL_38]] : i1
-// CHECK:               %[[VAL_40:.*]] = scf.if %[[VAL_39]] -> (tensor<2x3xf64, #{{.*}}>>) {
+// CHECK:               %[[VAL_40:.*]] = scf.if %[[VAL_39]] -> (tensor<2x3xf64, #sparse{{[0-9]*}}>) {
 // CHECK:                 %[[VAL_41:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_30]]] : memref<?xf64>
 // CHECK:                 %[[VAL_42:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_31]]] : memref<?xf64>
 // CHECK:                 %[[VAL_43:.*]] = arith.subf %[[VAL_41]], %[[VAL_42]] : f64
-// CHECK:                 %[[VAL_44:.*]] = sparse_tensor.insert %[[VAL_43]] into %[[VAL_32]]{{\[}}%[[VAL_13]], %[[VAL_36]]] : tensor<2x3xf64, #{{.*}}>>
-// CHECK:                 scf.yield %[[VAL_44]] : tensor<2x3xf64, #{{.*}}>>
+// CHECK:                 %[[VAL_44:.*]] = sparse_tensor.insert %[[VAL_43]] into %[[VAL_32]]{{\[}}%[[VAL_13]], %[[VAL_36]]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
+// CHECK:                 scf.yield %[[VAL_44]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
 // CHECK:               } else {
 // CHECK:                 %[[VAL_45:.*]] = arith.cmpi eq, %[[VAL_33]], %[[VAL_36]] : index
-// CHECK:                 %[[VAL_46:.*]] = scf.if %[[VAL_45]] -> (tensor<2x3xf64, #{{.*}}>>) {
+// CHECK:                 %[[VAL_46:.*]] = scf.if %[[VAL_45]] -> (tensor<2x3xf64, #sparse{{[0-9]*}}>) {
 // CHECK:                   %[[VAL_47:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_30]]] : memref<?xf64>
-// CHECK:                   %[[VAL_48:.*]] = sparse_tensor.insert %[[VAL_47]] into %[[VAL_32]]{{\[}}%[[VAL_13]], %[[VAL_36]]] : tensor<2x3xf64, #{{.*}}>>
-// CHECK:                   scf.yield %[[VAL_48]] : tensor<2x3xf64, #{{.*}}>>
+// CHECK:                   %[[VAL_48:.*]] = sparse_tensor.insert %[[VAL_47]] into %[[VAL_32]]{{\[}}%[[VAL_13]], %[[VAL_36]]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
+// CHECK:                   scf.yield %[[VAL_48]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
 // CHECK:                 } else {
 // CHECK:                   %[[VAL_49:.*]] = arith.cmpi eq, %[[VAL_34]], %[[VAL_36]] : index
-// CHECK:                   %[[VAL_50:.*]] = scf.if %[[VAL_49]] -> (tensor<2x3xf64, #{{.*}}>>) {
+// CHECK:                   %[[VAL_50:.*]] = scf.if %[[VAL_49]] -> (tensor<2x3xf64, #sparse{{[0-9]*}}>) {
 // CHECK:                     %[[VAL_51:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_31]]] : memref<?xf64>
 // CHECK:                     %[[VAL_52:.*]] = arith.negf %[[VAL_51]] : f64
-// CHECK:                     %[[VAL_53:.*]] = sparse_tensor.insert %[[VAL_52]] into %[[VAL_32]]{{\[}}%[[VAL_13]], %[[VAL_36]]] : tensor<2x3xf64, #{{.*}}>>
-// CHECK:                     scf.yield %[[VAL_53]] : tensor<2x3xf64, #{{.*}}>>
+// CHECK:                     %[[VAL_53:.*]] = sparse_tensor.insert %[[VAL_52]] into %[[VAL_32]]{{\[}}%[[VAL_13]], %[[VAL_36]]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
+// CHECK:                     scf.yield %[[VAL_53]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
 // CHECK:                   } else {
-// CHECK:                     scf.yield %[[VAL_32]] : tensor<2x3xf64, #{{.*}}>>
+// CHECK:                     scf.yield %[[VAL_32]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
 // CHECK:                   }
-// CHECK:                   scf.yield %[[VAL_54:.*]] : tensor<2x3xf64, #{{.*}}>>
+// CHECK:                   scf.yield %[[VAL_54:.*]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
 // CHECK:                 }
-// CHECK:                 scf.yield %[[VAL_55:.*]] : tensor<2x3xf64, #{{.*}}>>
+// CHECK:                 scf.yield %[[VAL_55:.*]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
 // CHECK:               }
 // CHECK:               %[[VAL_56:.*]] = arith.cmpi eq, %[[VAL_33]], %[[VAL_36]] : index
 // CHECK:               %[[VAL_57:.*]] = arith.addi %[[VAL_30]], %[[VAL_4]] : index
@@ -1118,25 +1118,25 @@ func.func @cmp_ss_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32, #T
 // CHECK:               %[[VAL_59:.*]] = arith.cmpi eq, %[[VAL_34]], %[[VAL_36]] : index
 // CHECK:               %[[VAL_60:.*]] = arith.addi %[[VAL_31]], %[[VAL_4]] : index
 // CHECK:               %[[VAL_61:.*]] = arith.select %[[VAL_59]], %[[VAL_60]], %[[VAL_31]] : index
-// CHECK:               scf.yield %[[VAL_58]], %[[VAL_61]], %[[VAL_62:.*]] : index, index, tensor<2x3xf64, #{{.*}}>>
+// CHECK:               scf.yield %[[VAL_58]], %[[VAL_61]], %[[VAL_62:.*]] : index, index, tensor<2x3xf64, #sparse{{[0-9]*}}>
 // CHECK:             } attributes
 // CHECK:             %[[VAL_63:.*]] = scf.for %[[VAL_64:.*]] = %[[VAL_65:.*]]#0 to %[[VAL_18]] step %[[VAL_4]] iter_args(%[[VAL_66:.*]] = %[[VAL_65]]#2)
 // CHECK:               %[[VAL_67:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_64]]] : memref<?xindex>
 // CHECK:               %[[VAL_68:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_64]]] : memref<?xf64>
-// CHECK:               %[[VAL_69:.*]] = sparse_tensor.insert %[[VAL_68]] into %[[VAL_66]]{{\[}}%[[VAL_13]], %[[VAL_67]]] : tensor<2x3xf64, #{{.*}}>>
-// CHECK:               scf.yield %[[VAL_69]] : tensor<2x3xf64, #{{.*}}>>
+// CHECK:               %[[VAL_69:.*]] = sparse_tensor.insert %[[VAL_68]] into %[[VAL_66]]{{\[}}%[[VAL_13]], %[[VAL_67]]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
+// CHECK:               scf.yield %[[VAL_69]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
 // CHECK:             }
 // CHECK:             %[[VAL_70:.*]] = scf.for %[[VAL_71:.*]] = %[[VAL_72:.*]]#1 to %[[VAL_22]] step %[[VAL_4]] iter_args(%[[VAL_73:.*]] = %[[VAL_74:.*]])
 // CHECK:               %[[VAL_75:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_71]]] : memref<?xindex>
 // CHECK:               %[[VAL_76:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_71]]] : memref<?xf64>
 // CHECK:               %[[VAL_77:.*]] = arith.negf %[[VAL_76]] : f64
-// CHECK:               %[[VAL_78:.*]] = sparse_tensor.insert %[[VAL_77]] into %[[VAL_73]]{{\[}}%[[VAL_13]], %[[VAL_75]]] : tensor<2x3xf64, #{{.*}}>>
-// CHECK:               scf.yield %[[VAL_78]] : tensor<2x3xf64, #{{.*}}>>
+// CHECK:               %[[VAL_78:.*]] = sparse_tensor.insert %[[VAL_77]] into %[[VAL_73]]{{\[}}%[[VAL_13]], %[[VAL_75]]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
+// CHECK:               scf.yield %[[VAL_78]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
 // CHECK:             }
-// CHECK:             scf.yield %[[VAL_79:.*]] : tensor<2x3xf64, #{{.*}}>>
+// CHECK:             scf.yield %[[VAL_79:.*]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
 // CHECK:           }
-// CHECK:           %[[VAL_80:.*]] = sparse_tensor.load %[[VAL_81:.*]] hasInserts : tensor<2x3xf64, #{{.*}}>>
-// CHECK:           return %[[VAL_80]] : tensor<2x3xf64, #{{.*}}>>
+// CHECK:           %[[VAL_80:.*]] = sparse_tensor.load %[[VAL_81:.*]] hasInserts : tensor<2x3xf64, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_80]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @sub_ss_batched(%0: tensor<2x3xf64, #BatchedVector>, %1: tensor<2x3xf64, #BatchedVector>)
                            -> tensor<2x3xf64, #BatchedVector> {
@@ -1152,20 +1152,20 @@ func.func @sub_ss_batched(%0: tensor<2x3xf64, #BatchedVector>, %1: tensor<2x3xf6
 }
 
 // CHECK-LABEL:   func @mul_ss_ss(
-// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>, %[[VAL_1:.*1]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<32x16xf32, #sparse{{[0-9]*}}>, %[[VAL_1:.*1]]: tensor<32x16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*2]]: tensor<32x16xf32>) -> tensor<32x16xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
 // CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_16]] : memref<32x16xf32>)
 // CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -1247,19 +1247,19 @@ func.func @mul_ss_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32, #T
 }
 
 // CHECK-LABEL:   func @add_sd_ds(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>, %[[VAL_1:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>, %[[VAL_1:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16xf32>) -> tensor<32x16xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 16 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
 // CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_15]] : memref<32x16xf32>)
 // CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_5]]] : memref<?xindex>
@@ -1352,17 +1352,17 @@ func.func @add_sd_ds(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32, #T
 }
 
 // CHECK-LABEL:   func @mul_sd_ds(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>, %[[VAL_1:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>, %[[VAL_1:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16xf32>) -> tensor<32x16xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 16 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
 // CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_13]] : memref<32x16xf32>)
 // CHECK:           %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -1407,15 +1407,15 @@ func.func @mul_sd_ds(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32, #T
 }
 
 // CHECK-LABEL:   func @matvec(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<16x32xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<16x32xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<16xf32>) -> tensor<16xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 16 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<16x32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<16x32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<16x32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<16x32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<16x32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<16x32xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<16xf32>
 // CHECK:           scf.for %[[VAL_12:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
@@ -1458,13 +1458,13 @@ func.func @matvec(%argA: tensor<16x32xf32, #Tds>, %argb: tensor<32xf32>, %argx:
 }
 
 // CHECK-LABEL:   func @sum_reduction(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<10x20xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<f32>) -> tensor<f32> {
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 10 : index
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<10x20xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10x20xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<f32>
 // CHECK:           %[[VAL_9:.*]] = memref.load %[[VAL_8]][] : memref<f32>
 // CHECK:           %[[VAL_10:.*]] = scf.for %[[VAL_11:.*]] = %[[VAL_4]] to %[[VAL_2]] step %[[VAL_3]] iter_args(%[[VAL_12:.*]] = %[[VAL_9]]) -> (f32) {
@@ -1503,15 +1503,15 @@ func.func @sum_reduction(%arga: tensor<10x20xf32, #Tds>, %argx: tensor<f32>) ->
 }
 
 // CHECK-LABEL:   func @scale(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?x?xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?x?xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<?x?xf64>) -> tensor<?x?xf64> {
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 2.000000e+00 : f64
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<?x?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<?x?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.lvl %[[VAL_0]], %[[VAL_3]] : tensor<?x?xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<?x?xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<?x?xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xf64, #sparse{{[0-9]*}}> to memref<?xf64>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.lvl %[[VAL_0]], %[[VAL_3]] : tensor<?x?xf64, #sparse{{[0-9]*}}>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<?x?xf64>
 // CHECK:           linalg.fill ins(%{{.*}} : f64) outs(%[[VAL_11]] : memref<?x?xf64>)
 // CHECK:           scf.for %[[VAL_12:.*]] = %[[VAL_3]] to %[[VAL_8]] step %[[VAL_4]] {
@@ -1552,17 +1552,17 @@ func.func @scale(%arga: tensor<?x?xf64, #Tds>, %argx: tensor<?x?xf64>) -> tensor
 }
 
 // CHECK-LABEL:   func.func @sampled_dense_dense(
-// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<?x?xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*1]]: tensor<?x?xf32>,
 // CHECK-SAME:      %[[VAL_2:.*2]]: tensor<?x?xf32>,
 // CHECK-SAME:      %[[VAL_3:.*3]]: tensor<?x?xf32>) -> tensor<?x?xf32> {
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = tensor.dim %[[VAL_1]], %[[VAL_4]] : tensor<?x?xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : memref<?x?xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<?x?xf32>
@@ -1621,24 +1621,24 @@ func.func @sampled_dense_dense(%args: tensor<?x?xf32, #Tss>,
 }
 
 // CHECK-LABEL:   func @sum_kernel_with_inv(
-// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>, %[[VAL_1:.*1]]: tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>, %[[VAL_2:.*2]]: tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<?x?xf32, #sparse{{[0-9]*}}>, %[[VAL_1:.*1]]: tensor<?x?xf32, #sparse{{[0-9]*}}>, %[[VAL_2:.*2]]: tensor<?x?xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_3:.*3]]: tensor<?xf32>,
 // CHECK-SAME:      %[[VAL_4:.*4]]: tensor<f32>,
 // CHECK-SAME:      %[[VAL_5:.*5]]: tensor<?xf32>) -> tensor<?xf32> {
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_8:.*]] = arith.constant true
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
-// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_15:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_16:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
-// CHECK-DAG:       %[[VAL_17:.*]] = sparse_tensor.positions %[[VAL_2]] {level = 1 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_18:.*]] = sparse_tensor.coordinates %[[VAL_2]] {level = 1 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_19:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_15:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_16:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_17:.*]] = sparse_tensor.positions %[[VAL_2]] {level = 1 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_18:.*]] = sparse_tensor.coordinates %[[VAL_2]] {level = 1 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_19:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_20:.*]] = bufferization.to_memref %[[VAL_3]] : memref<?xf32>
 // CHECK-DAG:       %[[VAL_21:.*]] = bufferization.to_memref %[[VAL_4]] : memref<f32>
 // CHECK-DAG:       %[[VAL_22:.*]] = sparse_tensor.lvl %[[VAL_2]], %[[VAL_6]] : tensor<?x?xf32,
diff --git a/mlir/test/Dialect/SparseTensor/sparse_3d.mlir b/mlir/test/Dialect/SparseTensor/sparse_3d.mlir
index f2daa77a0c24..4911c78bcff3 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_3d.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_3d.mlir
@@ -23,7 +23,7 @@
 }
 
 // CHECK-LABEL:   func @add_ddd(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16x8xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
 // CHECK-DAG:       %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32
@@ -32,7 +32,7 @@
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 8 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_11]] : memref<32x16x8xf32>)
@@ -65,7 +65,7 @@ func.func @add_ddd(%arga: tensor<32x16x8xf32, #Tddd>, %argb: tensor<32x16x8xf32>
 }
 
 // CHECK-LABEL:   func @mul_ddd(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16x8xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
 // CHECK-DAG:       %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32
@@ -74,7 +74,7 @@ func.func @add_ddd(%arga: tensor<32x16x8xf32, #Tddd>, %argb: tensor<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 8 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_11]] : memref<32x16x8xf32>)
@@ -107,7 +107,7 @@ func.func @mul_ddd(%arga: tensor<32x16x8xf32, #Tddd>, %argb: tensor<32x16x8xf32>
 }
 
 // CHECK-LABEL:   func @add_dds(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16x8xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
 // CHECK-DAG:       %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32
@@ -117,9 +117,9 @@ func.func @mul_ddd(%arga: tensor<32x16x8xf32, #Tddd>, %argb: tensor<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_8:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_9:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_15]] : memref<32x16x8xf32>)
@@ -176,7 +176,7 @@ func.func @add_dds(%arga: tensor<32x16x8xf32, #Tdds>, %argb: tensor<32x16x8xf32>
 }
 
 // CHECK-LABEL:   func @mul_dds(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16x8xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
 // CHECK-DAG:       %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32
@@ -184,9 +184,9 @@ func.func @add_dds(%arga: tensor<32x16x8xf32, #Tdds>, %argb: tensor<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 16 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_13]] : memref<32x16x8xf32>)
@@ -221,7 +221,7 @@ func.func @mul_dds(%arga: tensor<32x16x8xf32, #Tdds>, %argb: tensor<32x16x8xf32>
 }
 
 // CHECK-LABEL:   func @add_dsd(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16x8xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
 // CHECK-DAG:       %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32
@@ -231,9 +231,9 @@ func.func @mul_dds(%arga: tensor<32x16x8xf32, #Tdds>, %argb: tensor<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_8:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_14]] : memref<32x16x8xf32>)
@@ -294,7 +294,7 @@ func.func @add_dsd(%arga: tensor<32x16x8xf32, #Tdsd>, %argb: tensor<32x16x8xf32>
 }
 
 // CHECK-LABEL:   func @mul_dsd(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16x8xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
 // CHECK-DAG:       %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32
@@ -302,9 +302,9 @@ func.func @add_dsd(%arga: tensor<32x16x8xf32, #Tdsd>, %argb: tensor<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 8 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_12]] : memref<32x16x8xf32>)
@@ -339,7 +339,7 @@ func.func @mul_dsd(%arga: tensor<32x16x8xf32, #Tdsd>, %argb: tensor<32x16x8xf32>
 }
 
 // CHECK-LABEL:   func @add_dss(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16x8xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
 // CHECK-DAG:       %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32
@@ -349,11 +349,11 @@ func.func @mul_dsd(%arga: tensor<32x16x8xf32, #Tdsd>, %argb: tensor<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_8:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_9:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_17:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_17]] : memref<32x16x8xf32>)
@@ -438,18 +438,18 @@ func.func @add_dss(%arga: tensor<32x16x8xf32, #Tdss>, %argb: tensor<32x16x8xf32>
 }
 
 // CHECK-LABEL:   func @mul_dss(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16x8xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
 // CHECK-DAG:       %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 32 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_14]] : memref<32x16x8xf32>)
@@ -486,7 +486,7 @@ func.func @mul_dss(%arga: tensor<32x16x8xf32, #Tdss>, %argb: tensor<32x16x8xf32>
 }
 
 // CHECK-LABEL:   func @add_sdd(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16x8xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
 // CHECK-DAG:       %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32
@@ -496,9 +496,9 @@ func.func @mul_dss(%arga: tensor<32x16x8xf32, #Tdss>, %argb: tensor<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_8:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_14]] : memref<32x16x8xf32>)
@@ -564,7 +564,7 @@ func.func @add_sdd(%arga: tensor<32x16x8xf32, #Tsdd>, %argb: tensor<32x16x8xf32>
 }
 
 // CHECK-LABEL:   func @mul_sdd(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16x8xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
 // CHECK-DAG:       %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32
@@ -572,9 +572,9 @@ func.func @add_sdd(%arga: tensor<32x16x8xf32, #Tsdd>, %argb: tensor<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 8 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_12]] : memref<32x16x8xf32>)
@@ -610,7 +610,7 @@ func.func @mul_sdd(%arga: tensor<32x16x8xf32, #Tsdd>, %argb: tensor<32x16x8xf32>
 }
 
 // CHECK-LABEL:   func @add_sds(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16x8xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
 // CHECK-DAG:       %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32
@@ -620,11 +620,11 @@ func.func @mul_sdd(%arga: tensor<32x16x8xf32, #Tsdd>, %argb: tensor<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_8:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_9:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_17:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_17]] : memref<32x16x8xf32>)
@@ -714,18 +714,18 @@ func.func @add_sds(%arga: tensor<32x16x8xf32, #Tsds>, %argb: tensor<32x16x8xf32>
 }
 
 // CHECK-LABEL:   func @mul_sds(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16x8xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
 // CHECK-DAG:       %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 16 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_14]] : memref<32x16x8xf32>)
@@ -763,7 +763,7 @@ func.func @mul_sds(%arga: tensor<32x16x8xf32, #Tsds>, %argb: tensor<32x16x8xf32>
 }
 
 // CHECK-LABEL:   func @add_ssd(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16x8xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
 // CHECK-DAG:       %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32
@@ -773,11 +773,11 @@ func.func @mul_sds(%arga: tensor<32x16x8xf32, #Tsds>, %argb: tensor<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_8:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_16]] : memref<32x16x8xf32>)
@@ -871,18 +871,18 @@ func.func @add_ssd(%arga: tensor<32x16x8xf32, #Tssd>, %argb: tensor<32x16x8xf32>
 }
 
 // CHECK-LABEL:   func @mul_ssd(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16x8xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
 // CHECK-DAG:       %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 8 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_13]] : memref<32x16x8xf32>)
@@ -920,7 +920,7 @@ func.func @mul_ssd(%arga: tensor<32x16x8xf32, #Tssd>, %argb: tensor<32x16x8xf32>
 }
 
 // CHECK-LABEL:   func @add_sss(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16x8xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
 // CHECK-DAG:       %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32
@@ -930,13 +930,13 @@ func.func @mul_ssd(%arga: tensor<32x16x8xf32, #Tssd>, %argb: tensor<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_8:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_9:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_15:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_16:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_15:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_16:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_17:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_19:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_19]] : memref<32x16x8xf32>)
@@ -1054,19 +1054,19 @@ func.func @add_sss(%arga: tensor<32x16x8xf32, #Tsss>, %argb: tensor<32x16x8xf32>
 }
 
 // CHECK-LABEL:   func @mul_sss(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16x8xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
 // CHECK-DAG:       %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_15]] : memref<32x16x8xf32>)
@@ -1118,18 +1118,18 @@ func.func @mul_sss(%arga: tensor<32x16x8xf32, #Tsss>, %argb: tensor<32x16x8xf32>
 
 // CHECK-LABEL:   func @kernel_3d(
 // CHECK-SAME:      %[[VAL_0:.*0]]: tensor<?x?xf32>,
-// CHECK-SAME:      %[[VAL_1:.*1]]: tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_1:.*1]]: tensor<?x?x?xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*2]]: tensor<?x?xf32>,
 // CHECK-SAME:      %[[VAL_3:.*3]]: tensor<?x?xf32>) -> tensor<?x?xf32> {
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 2 : index} : tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 2 : index} : tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.lvl %[[VAL_1]], %[[VAL_6]] : tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 2 : index} : tensor<?x?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 2 : index} : tensor<?x?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?x?x?xf32, #sparse{{[0-9]*}}> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.lvl %[[VAL_1]], %[[VAL_6]] : tensor<?x?x?xf32, #sparse{{[0-9]*}}>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<?x?xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_3]] : memref<?x?xf32>
-// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.lvl %[[VAL_1]], %[[VAL_5]] : tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.lvl %[[VAL_1]], %[[VAL_5]] : tensor<?x?x?xf32, #sparse{{[0-9]*}}>
 // CHECK-DAG:       %[[VAL_14:.*]] = tensor.dim %[[VAL_2]], %[[VAL_6]] : tensor<?x?xf32>
 // CHECK-DAG:       %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_0]] : memref<?x?xf32>
 // CHECK:           scf.for %[[VAL_17:.*]] = %[[VAL_5]] to %[[VAL_13]] step %[[VAL_6]] {
@@ -1183,14 +1183,14 @@ func.func @kernel_3d(%arga: tensor<?x?xf32>,
 }
 
 // CHECK-LABEL:   func @sum_reduction(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<10x20x30xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<10x20x30xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<f32>) -> tensor<f32> {
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<10x20x30xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<10x20x30xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<10x20x30xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10x20x30xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<10x20x30xf32, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<10x20x30xf32, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<10x20x30xf32, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10x20x30xf32, #sparse{{[0-9]*}}>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<f32>
 // CHECK:           %[[VAL_11:.*]] = memref.load %[[VAL_10]][] : memref<f32>
 // CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_2]]] : memref<?xindex>
@@ -1239,7 +1239,7 @@ func.func @sum_reduction(%arga: tensor<10x20x30xf32, #Tsss>, %argx: tensor<f32>)
 
 // CHECK-LABEL:   func @sum_reduction_inv(
 // CHECK-SAME:      %[[VAL_0:.*]]: tensor<?x?x?xf32>,
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<?xf32, #sparse{{[0-9]*}}>
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<f32>) -> tensor<f32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 2 : index
@@ -1248,7 +1248,7 @@ func.func @sum_reduction(%arga: tensor<10x20x30xf32, #Tsss>, %argx: tensor<f32>)
 // CHECK-DAG:       %[[VAL_7:.*]] = tensor.dim %[[VAL_0]], %[[VAL_4]] : tensor<?x?x?xf32>
 // CHECK-DAG:       %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : memref<?x?x?xf32>
 // CHECK-DAG:       %[[VAL_9:.*]] = tensor.dim %[[VAL_0]], %[[VAL_5]] : tensor<?x?x?xf32>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xf32, #sparse{{[0-9]*}}>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<f32>
 // CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_12]][] : memref<f32>
 // CHECK:           %[[VAL_14:.*]] = scf.for %[[VAL_15:.*]] = %[[VAL_5]] to %[[VAL_9]] step %[[VAL_3]] iter_args(%[[VAL_16:.*]] = %[[VAL_13]]) -> (f32) {
@@ -1294,7 +1294,7 @@ func.func @sum_reduction_inv(%arga: tensor<?x?x?xf32>,
 }
 
 // CHECK-LABEL:   func @invariants(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<10xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<10xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<20xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<30xf32>,
 // CHECK-SAME:      %[[VAL_3:.*]]: tensor<10x20x30xf32>) -> tensor<10x20x30xf32> {
@@ -1304,7 +1304,7 @@ func.func @sum_reduction_inv(%arga: tensor<?x?x?xf32>,
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 30 : index
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_8:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<20xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<30xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_3]] : memref<10x20x30xf32>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_affine.mlir b/mlir/test/Dialect/SparseTensor/sparse_affine.mlir
index ca55d2eb0177..aa75e20460f5 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_affine.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_affine.mlir
@@ -16,15 +16,15 @@
 }
 
 // CHECK-LABEL:   func @mul_inv_dense1d(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<4xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32xf32>) -> tensor<32xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 3 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse{{[0-9]*}}>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<4xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf32>
 // CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref<4xf32>
@@ -56,16 +56,16 @@ func.func @mul_inv_dense1d(%arga: tensor<32xf32, #SpVec>,
 }
 
 // CHECK-LABEL:   func.func @mul_inv_enc_dense1d(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>,
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<4xf32, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> {
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse{{[0-9]*}}>,
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<4xf32, #sparse{{[0-9]*}}>) -> tensor<32xf32, #sparse{{[0-9]*}}> {
 // CHECK:           %[[VAL_2:.*]] = arith.constant 32 : index
 // CHECK:           %[[VAL_3:.*]] = arith.constant 3 : index
 // CHECK:           %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK:           %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK:           %[[VAL_6:.*]] = tensor.empty() : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
-// CHECK:           %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<4xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
-// CHECK:           %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_6]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK:           %[[VAL_6:.*]] = tensor.empty() : tensor<32xf32, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xf32>
+// CHECK:           %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<4xf32, #sparse{{[0-9]*}}> to memref<?xf32>
+// CHECK:           %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_6]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK:           %[[VAL_10:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref<?xf32>
 // CHECK:           scf.for %[[VAL_11:.*]] = %[[VAL_4]] to %[[VAL_2]] step %[[VAL_5]] {
 // CHECK:             %[[VAL_12:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_11]]] : memref<?xf32>
@@ -74,8 +74,8 @@ func.func @mul_inv_dense1d(%arga: tensor<32xf32, #SpVec>,
 // CHECK:             %[[VAL_15:.*]] = arith.addf %[[VAL_12]], %[[VAL_14]] : f32
 // CHECK:             memref.store %[[VAL_15]], %[[VAL_9]]{{\[}}%[[VAL_11]]] : memref<?xf32>
 // CHECK:           }
-// CHECK:           %[[VAL_16:.*]] = sparse_tensor.load %[[VAL_6]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           return %[[VAL_16]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_16:.*]] = sparse_tensor.load %[[VAL_6]] : tensor<32xf32, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_16]] : tensor<32xf32, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @mul_inv_enc_dense1d(%arga: tensor<32xf32, #EncDenseVec>,
                             %argb: tensor<4xf32, #EncDenseVec>) -> tensor<32xf32, #EncDenseVec> {
@@ -102,16 +102,16 @@ func.func @mul_inv_enc_dense1d(%arga: tensor<32xf32, #EncDenseVec>,
 }
 
 // CHECK-LABEL:   func @and_affine_dense1d(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xi32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<34xi32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32xi32>) -> tensor<32xi32> {
 // CHECK-DAG:       %[[ZERO:.*]] = arith.constant 0 : i32
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 2 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi32, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi32, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi32, #sparse{{[0-9]*}}>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<34xi32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xi32>
 // CHECK:           linalg.fill ins(%[[ZERO]] : i32) outs(%[[VAL_11]] : memref<32xi32>)
@@ -152,7 +152,7 @@ func.func @and_affine_dense1d(%arga: tensor<32xi32, #SpVec>,
 }
 
 // CHECK-LABEL:   func @mul_affine_dense2d(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<34x19xf64>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16xf64>) -> tensor<32x16xf64> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 1 : index
@@ -160,9 +160,9 @@ func.func @and_affine_dense1d(%arga: tensor<32xi32, #SpVec>,
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 2 : index
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 3 : index
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf64, #sparse{{[0-9]*}}>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<34x19xf64>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf64>
 // CHECK:           scf.for %[[VAL_14:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_3]] {
@@ -209,20 +209,20 @@ func.func @mul_affine_dense2d(%arga: tensor<32x16xf64, #CSR>,
 }
 
 // CHECK-LABEL:   func.func @mul_affine_dense_dim_2d(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<34x16xf64, #sparse_tensor.encoding
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x19xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<34x16xf64, #sparse{{[0-9]*}}>
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x19xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16xf64>) -> tensor<32x16xf64> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 19 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 2 : index
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 3 : index
-// CHECK:           %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<34x16xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<34x16xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<34x16xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
-// CHECK:           %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32x19xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32x19xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x19xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK:           %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<34x16xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<34x16xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<34x16xf64, #sparse{{[0-9]*}}> to memref<?xf64>
+// CHECK:           %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32x19xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32x19xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x19xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:           %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf64>
 // CHECK:           %[[VAL_15:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_5]]] : memref<?xindex>
@@ -274,19 +274,19 @@ func.func @mul_affine_dense_dim_2d(%arga: tensor<34x16xf64, #CSR>,
 
 // CHECK-LABEL:   func.func @mul_const_affine_dense_dim_2d(
 // CHECK-SAME:      %[[VAL_0:.*]]: tensor<34x16xf64,
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x19xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x19xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16xf64>) -> tensor<32x16xf64> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 19 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 2 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 3 : index
-// CHECK:           %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<34x16xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<34x16xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<34x16xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
-// CHECK:           %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32x19xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32x19xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x19xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK:           %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<34x16xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<34x16xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<34x16xf64, #sparse{{[0-9]*}}> to memref<?xf64>
+// CHECK:           %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32x19xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32x19xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x19xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:           %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf64>
 // CHECK:           %[[VAL_15:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_5]]] : memref<?xindex>
 // CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_6]]] : memref<?xindex>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_concat.mlir b/mlir/test/Dialect/SparseTensor/sparse_concat.mlir
index 86dc9a117507..a323fa85d2fd 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_concat.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_concat.mlir
@@ -7,19 +7,19 @@
 #DCSR = #sparse_tensor.encoding<{map = (d0, d1) -> (d0 : compressed, d1 : compressed)}>
 
 // CHECK-LABEL: @concat_sparse_sparse(
-//  CHECK-SAME:  %[[TMP_arg0:.*]]: tensor<2x4xf64, #sparse_tensor
-//  CHECK-SAME:  %[[TMP_arg1:.*]]: tensor<3x4xf64, #sparse_tensor
-//  CHECK-SAME:  %[[TMP_arg2:.*]]: tensor<4x4xf64, #sparse_tensor
+//  CHECK-SAME:  %[[TMP_arg0:.*]]: tensor<2x4xf64, #sparse>
+//  CHECK-SAME:  %[[TMP_arg1:.*]]: tensor<3x4xf64, #sparse>
+//  CHECK-SAME:  %[[TMP_arg2:.*]]: tensor<4x4xf64, #sparse>
 //       CHECK:  %[[TMP_c0:.*]] = arith.constant 0 : index
 //       CHECK:  %[[TMP_c1:.*]] = arith.constant 1 : index
 //       CHECK:  %[[TMP_c5:.*]] = arith.constant 5 : index
 //       CHECK:  %[[TMP_c2:.*]] = arith.constant 2 : index
-//       CHECK:  %[[TMP_0:.*]] = bufferization.alloc_tensor() : tensor<9x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_1:.*]] = sparse_tensor.positions %[[TMP_arg0]] {level = 0 : index} : tensor<2x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_2:.*]] = sparse_tensor.coordinates %[[TMP_arg0]] {level = 0 : index} : tensor<2x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_3:.*]] = sparse_tensor.positions %[[TMP_arg0]] {level = 1 : index} : tensor<2x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_4:.*]] = sparse_tensor.coordinates %[[TMP_arg0]] {level = 1 : index} : tensor<2x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_5:.*]] = sparse_tensor.values %[[TMP_arg0]] : tensor<2x4xf64, #sparse_tensor
+//       CHECK:  %[[TMP_0:.*]] = bufferization.alloc_tensor() : tensor<9x4xf64, #sparse>
+//       CHECK:  %[[TMP_1:.*]] = sparse_tensor.positions %[[TMP_arg0]] {level = 0 : index} : tensor<2x4xf64, #sparse>
+//       CHECK:  %[[TMP_2:.*]] = sparse_tensor.coordinates %[[TMP_arg0]] {level = 0 : index} : tensor<2x4xf64, #sparse>
+//       CHECK:  %[[TMP_3:.*]] = sparse_tensor.positions %[[TMP_arg0]] {level = 1 : index} : tensor<2x4xf64, #sparse>
+//       CHECK:  %[[TMP_4:.*]] = sparse_tensor.coordinates %[[TMP_arg0]] {level = 1 : index} : tensor<2x4xf64, #sparse>
+//       CHECK:  %[[TMP_5:.*]] = sparse_tensor.values %[[TMP_arg0]] : tensor<2x4xf64, #sparse>
 //       CHECK:  %[[TMP_6:.*]] = memref.load %[[TMP_1]][%[[TMP_c0]]] : memref<?xindex>
 //       CHECK:  %[[TMP_7:.*]] = memref.load %[[TMP_1]][%[[TMP_c1]]] : memref<?xindex>
 //       CHECK:  %[[RET_1:.*]] = scf.for %[[TMP_arg3:.*]] = %[[TMP_6]] to %[[TMP_7]] step %[[TMP_c1]] iter_args(%[[A0:.*]] = %[[TMP_0]])
@@ -30,16 +30,16 @@
 //       CHECK:    %[[RET_4:.*]] = scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] iter_args(%[[A1:.*]] = %[[A0]])
 //       CHECK:      %[[TMP_27:.*]] = memref.load %[[TMP_4]][%[[TMP_arg4]]] : memref<?xindex>
 //       CHECK:      %[[TMP_28:.*]] = memref.load %[[TMP_5]][%[[TMP_arg4]]] : memref<?xf64>
-//       CHECK:      %[[NEW_1:.*]] = tensor.insert %[[TMP_28]] into %[[A1]][%[[TMP_23]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor
+//       CHECK:      %[[NEW_1:.*]] = tensor.insert %[[TMP_28]] into %[[A1]][%[[TMP_23]], %[[TMP_27]]] : tensor<9x4xf64, #sparse>
 //       CHECK:      scf.yield %[[NEW_1]]
 //       CHECK:    }
 //       CHECK:    scf.yield %[[RET_4]]
 //       CHECK:  }
-//       CHECK:  %[[TMP_8:.*]] = sparse_tensor.positions %[[TMP_arg1]] {level = 0 : index} : tensor<3x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_9:.*]] = sparse_tensor.coordinates %[[TMP_arg1]] {level = 0 : index} : tensor<3x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_10:.*]] = sparse_tensor.positions %[[TMP_arg1]] {level = 1 : index} : tensor<3x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_11:.*]] = sparse_tensor.coordinates %[[TMP_arg1]] {level = 1 : index} : tensor<3x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_12:.*]] = sparse_tensor.values %[[TMP_arg1]] : tensor<3x4xf64, #sparse_tensor
+//       CHECK:  %[[TMP_8:.*]] = sparse_tensor.positions %[[TMP_arg1]] {level = 0 : index} : tensor<3x4xf64, #sparse>
+//       CHECK:  %[[TMP_9:.*]] = sparse_tensor.coordinates %[[TMP_arg1]] {level = 0 : index} : tensor<3x4xf64, #sparse>
+//       CHECK:  %[[TMP_10:.*]] = sparse_tensor.positions %[[TMP_arg1]] {level = 1 : index} : tensor<3x4xf64, #sparse>
+//       CHECK:  %[[TMP_11:.*]] = sparse_tensor.coordinates %[[TMP_arg1]] {level = 1 : index} : tensor<3x4xf64, #sparse>
+//       CHECK:  %[[TMP_12:.*]] = sparse_tensor.values %[[TMP_arg1]] : tensor<3x4xf64, #sparse>
 //       CHECK:  %[[TMP_13:.*]] = memref.load %[[TMP_8]][%[[TMP_c0]]] : memref<?xindex>
 //       CHECK:  %[[TMP_14:.*]] = memref.load %[[TMP_8]][%[[TMP_c1]]] : memref<?xindex>
 //       CHECK:  %[[RET_2:.*]] = scf.for %[[TMP_arg3:.*]] = %[[TMP_13]] to %[[TMP_14]] step %[[TMP_c1]] iter_args(%[[A2:.*]] = %[[RET_1]])
@@ -51,16 +51,16 @@
 //       CHECK:      %[[TMP_27:.*]] = memref.load %[[TMP_11]][%[[TMP_arg4]]] : memref<?xindex>
 //       CHECK:      %[[TMP_28:.*]] = memref.load %[[TMP_12]][%[[TMP_arg4]]] : memref<?xf64>
 //       CHECK:      %[[TMP_29:.*]] = arith.addi %[[TMP_23]], %[[TMP_c2]] : index
-//       CHECK:      %[[NEW_2:.*]] = tensor.insert %[[TMP_28]] into %[[A3]][%[[TMP_29]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor
+//       CHECK:      %[[NEW_2:.*]] = tensor.insert %[[TMP_28]] into %[[A3]][%[[TMP_29]], %[[TMP_27]]] : tensor<9x4xf64, #sparse>
 //       CHECK:      scf.yield %[[NEW_2]]
 //       CHECK:    }
 //       CHECK:    scf.yield %[[RET_5]]
 //       CHECK:  }
-//       CHECK:  %[[TMP_15:.*]] = sparse_tensor.positions %[[TMP_arg2]] {level = 0 : index} : tensor<4x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_16:.*]] = sparse_tensor.coordinates %[[TMP_arg2]] {level = 0 : index} : tensor<4x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_17:.*]] = sparse_tensor.positions %[[TMP_arg2]] {level = 1 : index} : tensor<4x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_18:.*]] = sparse_tensor.coordinates %[[TMP_arg2]] {level = 1 : index} : tensor<4x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_19:.*]] = sparse_tensor.values %[[TMP_arg2]] : tensor<4x4xf64, #sparse_tensor
+//       CHECK:  %[[TMP_15:.*]] = sparse_tensor.positions %[[TMP_arg2]] {level = 0 : index} : tensor<4x4xf64, #sparse>
+//       CHECK:  %[[TMP_16:.*]] = sparse_tensor.coordinates %[[TMP_arg2]] {level = 0 : index} : tensor<4x4xf64, #sparse>
+//       CHECK:  %[[TMP_17:.*]] = sparse_tensor.positions %[[TMP_arg2]] {level = 1 : index} : tensor<4x4xf64, #sparse>
+//       CHECK:  %[[TMP_18:.*]] = sparse_tensor.coordinates %[[TMP_arg2]] {level = 1 : index} : tensor<4x4xf64, #sparse>
+//       CHECK:  %[[TMP_19:.*]] = sparse_tensor.values %[[TMP_arg2]] : tensor<4x4xf64, #sparse>
 //       CHECK:  %[[TMP_20:.*]] = memref.load %[[TMP_15]][%[[TMP_c0]]] : memref<?xindex>
 //       CHECK:  %[[TMP_21:.*]] = memref.load %[[TMP_15]][%[[TMP_c1]]] : memref<?xindex>
 //       CHECK:  %[[RET_3:.*]] = scf.for %[[TMP_arg3:.*]] = %[[TMP_20]] to %[[TMP_21]] step %[[TMP_c1]] iter_args(%[[A4:.*]] = %[[RET_2]])
@@ -72,13 +72,13 @@
 //       CHECK:      %[[TMP_27:.*]] = memref.load %[[TMP_18]][%[[TMP_arg4]]] : memref<?xindex>
 //       CHECK:      %[[TMP_28:.*]] = memref.load %[[TMP_19]][%[[TMP_arg4]]] : memref<?xf64>
 //       CHECK:      %[[TMP_29:.*]] = arith.addi %[[TMP_23]], %[[TMP_c5]] : index
-//       CHECK:      %[[NEW_3:.*]] = tensor.insert %[[TMP_28]] into %[[A5]][%[[TMP_29]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor
+//       CHECK:      %[[NEW_3:.*]] = tensor.insert %[[TMP_28]] into %[[A5]][%[[TMP_29]], %[[TMP_27]]] : tensor<9x4xf64, #sparse>
 //       CHECK:      scf.yield %[[NEW_3]]
 //       CHECK:    }
 //       CHECK:    scf.yield %[[RET_6]]
 //       CHECK:  }
 //       CHECK:  %[[TMP_23:.*]] = sparse_tensor.load %[[RET_3]] hasInserts
-//       CHECK:  return %[[TMP_23]] : tensor<9x4xf64, #sparse_tensor
+//       CHECK:  return %[[TMP_23]] : tensor<9x4xf64, #sparse>
 func.func @concat_sparse_sparse(%arg0: tensor<2x4xf64, #DCSR>,
                                 %arg1: tensor<3x4xf64, #DCSR>,
                                 %arg2: tensor<4x4xf64, #DCSR>)
@@ -91,21 +91,21 @@ func.func @concat_sparse_sparse(%arg0: tensor<2x4xf64, #DCSR>,
 }
 
 // CHECK-LABEL: @concat_sparse_sparse_dynamic(
-//  CHECK-SAME:  %[[TMP_arg0:.*]]: tensor<2x4xf64, #sparse_tensor
-//  CHECK-SAME:  %[[TMP_arg1:.*]]: tensor<3x4xf64, #sparse_tensor
-//  CHECK-SAME:  %[[TMP_arg2:.*]]: tensor<4x4xf64, #sparse_tensor
+//  CHECK-SAME:  %[[TMP_arg0:.*]]: tensor<2x4xf64, #sparse>
+//  CHECK-SAME:  %[[TMP_arg1:.*]]: tensor<3x4xf64, #sparse>
+//  CHECK-SAME:  %[[TMP_arg2:.*]]: tensor<4x4xf64, #sparse>
 //   CHECK-DAG:  %[[TMP_c0:.*]] = arith.constant 0 : index
 //   CHECK-DAG:  %[[TMP_c1:.*]] = arith.constant 1 : index
 //   CHECK-DAG:  %[[TMP_c5:.*]] = arith.constant 5 : index
 //   CHECK-DAG:  %[[TMP_c2:.*]] = arith.constant 2 : index
 //   CHECK-DAG:  %[[TMP_c9:.*]] = arith.constant 9 : index
 //   CHECK-DAG:  %[[TMP_c4:.*]] = arith.constant 4 : index
-//       CHECK:  %[[TMP_0:.*]] = bufferization.alloc_tensor(%[[TMP_c9]], %[[TMP_c4]]) : tensor<?x?xf64, #sparse_tensor
-//       CHECK:  %[[TMP_1:.*]] = sparse_tensor.positions %[[TMP_arg0]] {level = 0 : index} : tensor<2x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_2:.*]] = sparse_tensor.coordinates %[[TMP_arg0]] {level = 0 : index} : tensor<2x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_3:.*]] = sparse_tensor.positions %[[TMP_arg0]] {level = 1 : index} : tensor<2x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_4:.*]] = sparse_tensor.coordinates %[[TMP_arg0]] {level = 1 : index} : tensor<2x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_5:.*]] = sparse_tensor.values %[[TMP_arg0]] : tensor<2x4xf64, #sparse_tensor
+//       CHECK:  %[[TMP_0:.*]] = bufferization.alloc_tensor(%[[TMP_c9]], %[[TMP_c4]]) : tensor<?x?xf64, #sparse>
+//       CHECK:  %[[TMP_1:.*]] = sparse_tensor.positions %[[TMP_arg0]] {level = 0 : index} : tensor<2x4xf64, #sparse>
+//       CHECK:  %[[TMP_2:.*]] = sparse_tensor.coordinates %[[TMP_arg0]] {level = 0 : index} : tensor<2x4xf64, #sparse>
+//       CHECK:  %[[TMP_3:.*]] = sparse_tensor.positions %[[TMP_arg0]] {level = 1 : index} : tensor<2x4xf64, #sparse>
+//       CHECK:  %[[TMP_4:.*]] = sparse_tensor.coordinates %[[TMP_arg0]] {level = 1 : index} : tensor<2x4xf64, #sparse>
+//       CHECK:  %[[TMP_5:.*]] = sparse_tensor.values %[[TMP_arg0]] : tensor<2x4xf64, #sparse>
 //       CHECK:  %[[TMP_6:.*]] = memref.load %[[TMP_1]][%[[TMP_c0]]] : memref<?xindex>
 //       CHECK:  %[[TMP_7:.*]] = memref.load %[[TMP_1]][%[[TMP_c1]]] : memref<?xindex>
 //       CHECK:  %[[RET_1:.*]] = scf.for %[[TMP_arg3:.*]] = %[[TMP_6]] to %[[TMP_7]] step %[[TMP_c1]] iter_args(%[[A0:.*]] = %[[TMP_0]])
@@ -116,16 +116,16 @@ func.func @concat_sparse_sparse(%arg0: tensor<2x4xf64, #DCSR>,
 //       CHECK:    %[[RET_4:.*]] = scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] iter_args(%[[A1:.*]] = %[[A0]])
 //       CHECK:      %[[TMP_27:.*]] = memref.load %[[TMP_4]][%[[TMP_arg4]]] : memref<?xindex>
 //       CHECK:      %[[TMP_28:.*]] = memref.load %[[TMP_5]][%[[TMP_arg4]]] : memref<?xf64>
-//       CHECK:      %[[NEW_1:.*]] = tensor.insert %[[TMP_28]] into %[[A1]][%[[TMP_23]], %[[TMP_27]]] : tensor<?x?xf64, #sparse_tensor
+//       CHECK:      %[[NEW_1:.*]] = tensor.insert %[[TMP_28]] into %[[A1]][%[[TMP_23]], %[[TMP_27]]] : tensor<?x?xf64, #sparse>
 //       CHECK:      scf.yield %[[NEW_1]]
 //       CHECK:    }
 //       CHECK:    scf.yield %[[RET_4]]
 //       CHECK:  }
-//       CHECK:  %[[TMP_8:.*]] = sparse_tensor.positions %[[TMP_arg1]] {level = 0 : index} : tensor<3x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_9:.*]] = sparse_tensor.coordinates %[[TMP_arg1]] {level = 0 : index} : tensor<3x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_10:.*]] = sparse_tensor.positions %[[TMP_arg1]] {level = 1 : index} : tensor<3x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_11:.*]] = sparse_tensor.coordinates %[[TMP_arg1]] {level = 1 : index} : tensor<3x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_12:.*]] = sparse_tensor.values %[[TMP_arg1]] : tensor<3x4xf64, #sparse_tensor
+//       CHECK:  %[[TMP_8:.*]] = sparse_tensor.positions %[[TMP_arg1]] {level = 0 : index} : tensor<3x4xf64, #sparse>
+//       CHECK:  %[[TMP_9:.*]] = sparse_tensor.coordinates %[[TMP_arg1]] {level = 0 : index} : tensor<3x4xf64, #sparse>
+//       CHECK:  %[[TMP_10:.*]] = sparse_tensor.positions %[[TMP_arg1]] {level = 1 : index} : tensor<3x4xf64, #sparse>
+//       CHECK:  %[[TMP_11:.*]] = sparse_tensor.coordinates %[[TMP_arg1]] {level = 1 : index} : tensor<3x4xf64, #sparse>
+//       CHECK:  %[[TMP_12:.*]] = sparse_tensor.values %[[TMP_arg1]] : tensor<3x4xf64, #sparse>
 //       CHECK:  %[[TMP_13:.*]] = memref.load %[[TMP_8]][%[[TMP_c0]]] : memref<?xindex>
 //       CHECK:  %[[TMP_14:.*]] = memref.load %[[TMP_8]][%[[TMP_c1]]] : memref<?xindex>
 //       CHECK:  %[[RET_2:.*]] = scf.for %[[TMP_arg3:.*]] = %[[TMP_13]] to %[[TMP_14]] step %[[TMP_c1]] iter_args(%[[A2:.*]] = %[[RET_1]])
@@ -137,16 +137,16 @@ func.func @concat_sparse_sparse(%arg0: tensor<2x4xf64, #DCSR>,
 //       CHECK:      %[[TMP_27:.*]] = memref.load %[[TMP_11]][%[[TMP_arg4]]] : memref<?xindex>
 //       CHECK:      %[[TMP_28:.*]] = memref.load %[[TMP_12]][%[[TMP_arg4]]] : memref<?xf64>
 //       CHECK:      %[[TMP_29:.*]] = arith.addi %[[TMP_23]], %[[TMP_c2]] : index
-//       CHECK:      %[[NEW_2:.*]] = tensor.insert %[[TMP_28]] into %[[A3]][%[[TMP_29]], %[[TMP_27]]] : tensor<?x?xf64, #sparse_tensor
+//       CHECK:      %[[NEW_2:.*]] = tensor.insert %[[TMP_28]] into %[[A3]][%[[TMP_29]], %[[TMP_27]]] : tensor<?x?xf64, #sparse>
 //       CHECK:      scf.yield %[[NEW_2]]
 //       CHECK:    }
 //       CHECK:    scf.yield %[[RET_5]]
 //       CHECK:  }
-//       CHECK:  %[[TMP_15:.*]] = sparse_tensor.positions %[[TMP_arg2]] {level = 0 : index} : tensor<4x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_16:.*]] = sparse_tensor.coordinates %[[TMP_arg2]] {level = 0 : index} : tensor<4x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_17:.*]] = sparse_tensor.positions %[[TMP_arg2]] {level = 1 : index} : tensor<4x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_18:.*]] = sparse_tensor.coordinates %[[TMP_arg2]] {level = 1 : index} : tensor<4x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_19:.*]] = sparse_tensor.values %[[TMP_arg2]] : tensor<4x4xf64, #sparse_tensor
+//       CHECK:  %[[TMP_15:.*]] = sparse_tensor.positions %[[TMP_arg2]] {level = 0 : index} : tensor<4x4xf64, #sparse>
+//       CHECK:  %[[TMP_16:.*]] = sparse_tensor.coordinates %[[TMP_arg2]] {level = 0 : index} : tensor<4x4xf64, #sparse>
+//       CHECK:  %[[TMP_17:.*]] = sparse_tensor.positions %[[TMP_arg2]] {level = 1 : index} : tensor<4x4xf64, #sparse>
+//       CHECK:  %[[TMP_18:.*]] = sparse_tensor.coordinates %[[TMP_arg2]] {level = 1 : index} : tensor<4x4xf64, #sparse>
+//       CHECK:  %[[TMP_19:.*]] = sparse_tensor.values %[[TMP_arg2]] : tensor<4x4xf64, #sparse>
 //       CHECK:  %[[TMP_20:.*]] = memref.load %[[TMP_15]][%[[TMP_c0]]] : memref<?xindex>
 //       CHECK:  %[[TMP_21:.*]] = memref.load %[[TMP_15]][%[[TMP_c1]]] : memref<?xindex>
 //       CHECK:  %[[RET_3:.*]] = scf.for %[[TMP_arg3:.*]] = %[[TMP_20]] to %[[TMP_21]] step %[[TMP_c1]] iter_args(%[[A4:.*]] = %[[RET_2]])
@@ -158,13 +158,13 @@ func.func @concat_sparse_sparse(%arg0: tensor<2x4xf64, #DCSR>,
 //       CHECK:      %[[TMP_27:.*]] = memref.load %[[TMP_18]][%[[TMP_arg4]]] : memref<?xindex>
 //       CHECK:      %[[TMP_28:.*]] = memref.load %[[TMP_19]][%[[TMP_arg4]]] : memref<?xf64>
 //       CHECK:      %[[TMP_29:.*]] = arith.addi %[[TMP_23]], %[[TMP_c5]] : index
-//       CHECK:      %[[NEW_3:.*]] = tensor.insert %[[TMP_28]] into %[[A5]][%[[TMP_29]], %[[TMP_27]]] : tensor<?x?xf64, #sparse_tensor
+//       CHECK:      %[[NEW_3:.*]] = tensor.insert %[[TMP_28]] into %[[A5]][%[[TMP_29]], %[[TMP_27]]] : tensor<?x?xf64, #sparse>
 //       CHECK:      scf.yield %[[NEW_3]]
 //       CHECK:    }
 //       CHECK:    scf.yield %[[RET_6]]
 //       CHECK:  }
 //       CHECK:  %[[TMP_23:.*]] = sparse_tensor.load %[[RET_3]] hasInserts
-//       CHECK:  return %[[TMP_23]] : tensor<?x?xf64, #sparse_tensor
+//       CHECK:  return %[[TMP_23]] : tensor<?x?xf64, #sparse>
 func.func @concat_sparse_sparse_dynamic(%arg0: tensor<2x4xf64, #DCSR>,
                                 %arg1: tensor<3x4xf64, #DCSR>,
                                 %arg2: tensor<4x4xf64, #DCSR>)
@@ -177,9 +177,9 @@ func.func @concat_sparse_sparse_dynamic(%arg0: tensor<2x4xf64, #DCSR>,
 }
 
 // CHECK-LABEL:   func.func @concat_sparse_sparse_dense(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<2x4xf64, #sparse_tensor
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<3x4xf64, #sparse_tensor
-// CHECK-SAME:      %[[VAL_2:.*]]: tensor<4x4xf64, #sparse_tensor
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<2x4xf64, #sparse>
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<3x4xf64, #sparse>
+// CHECK-SAME:      %[[VAL_2:.*]]: tensor<4x4xf64, #sparse>
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 4 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 9 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 5 : index
@@ -189,11 +189,11 @@ func.func @concat_sparse_sparse_dynamic(%arg0: tensor<2x4xf64, #DCSR>,
 // CHECK-DAG:       %[[VAL_9:.*]] = arith.constant 2 : index
 // CHECK:           %[[VAL_10:.*]] = bufferization.alloc_tensor(%[[VAL_4]], %[[VAL_3]]) : tensor<?x?xf64>
 // CHECK:           %[[VAL_11:.*]] = linalg.fill ins(%[[VAL_6]] : f64) outs(%[[VAL_10]] : tensor<?x?xf64>) -> tensor<?x?xf64>
-// CHECK:           %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<2x4xf64, #sparse_tensor
-// CHECK:           %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<2x4xf64, #sparse_tensor
-// CHECK:           %[[VAL_14:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<2x4xf64, #sparse_tensor
-// CHECK:           %[[VAL_15:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<2x4xf64, #sparse_tensor
-// CHECK:           %[[VAL_16:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<2x4xf64, #sparse_tensor
+// CHECK:           %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<2x4xf64, #sparse>
+// CHECK:           %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<2x4xf64, #sparse>
+// CHECK:           %[[VAL_14:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<2x4xf64, #sparse>
+// CHECK:           %[[VAL_15:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<2x4xf64, #sparse>
+// CHECK:           %[[VAL_16:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<2x4xf64, #sparse>
 // CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_7]]] : memref<?xindex>
 // CHECK:           %[[VAL_18:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_8]]] : memref<?xindex>
 // CHECK:           %[[VAL_19:.*]] = scf.for %[[VAL_20:.*]] = %[[VAL_17]] to %[[VAL_18]] step %[[VAL_8]] iter_args(%[[VAL_21:.*]] = %[[VAL_11]]) -> (tensor<?x?xf64>) {
@@ -209,11 +209,11 @@ func.func @concat_sparse_sparse_dynamic(%arg0: tensor<2x4xf64, #DCSR>,
 // CHECK:             }
 // CHECK:             scf.yield %[[VAL_26]] : tensor<?x?xf64>
 // CHECK:           }
-// CHECK:           %[[VAL_32:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<3x4xf64, #sparse_tensor
-// CHECK:           %[[VAL_33:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<3x4xf64, #sparse_tensor
-// CHECK:           %[[VAL_34:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<3x4xf64, #sparse_tensor
-// CHECK:           %[[VAL_35:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<3x4xf64, #sparse_tensor
-// CHECK:           %[[VAL_36:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<3x4xf64, #sparse_tensor
+// CHECK:           %[[VAL_32:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<3x4xf64, #sparse>
+// CHECK:           %[[VAL_33:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<3x4xf64, #sparse>
+// CHECK:           %[[VAL_34:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<3x4xf64, #sparse>
+// CHECK:           %[[VAL_35:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<3x4xf64, #sparse>
+// CHECK:           %[[VAL_36:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<3x4xf64, #sparse>
 // CHECK:           %[[VAL_37:.*]] = memref.load %[[VAL_32]]{{\[}}%[[VAL_7]]] : memref<?xindex>
 // CHECK:           %[[VAL_38:.*]] = memref.load %[[VAL_32]]{{\[}}%[[VAL_8]]] : memref<?xindex>
 // CHECK:           %[[VAL_39:.*]] = scf.for %[[VAL_40:.*]] = %[[VAL_37]] to %[[VAL_38]] step %[[VAL_8]] iter_args(%[[VAL_41:.*]] = %[[VAL_19]]) -> (tensor<?x?xf64>) {
@@ -230,11 +230,11 @@ func.func @concat_sparse_sparse_dynamic(%arg0: tensor<2x4xf64, #DCSR>,
 // CHECK:             }
 // CHECK:             scf.yield %[[VAL_46]] : tensor<?x?xf64>
 // CHECK:           }
-// CHECK:           %[[VAL_53:.*]] = sparse_tensor.positions %[[VAL_2]] {level = 0 : index} : tensor<4x4xf64, #sparse_tensor
-// CHECK:           %[[VAL_54:.*]] = sparse_tensor.coordinates %[[VAL_2]] {level = 0 : index} : tensor<4x4xf64, #sparse_tensor
-// CHECK:           %[[VAL_55:.*]] = sparse_tensor.positions %[[VAL_2]] {level = 1 : index} : tensor<4x4xf64, #sparse_tensor
-// CHECK:           %[[VAL_56:.*]] = sparse_tensor.coordinates %[[VAL_2]] {level = 1 : index} : tensor<4x4xf64, #sparse_tensor
-// CHECK:           %[[VAL_57:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<4x4xf64, #sparse_tensor
+// CHECK:           %[[VAL_53:.*]] = sparse_tensor.positions %[[VAL_2]] {level = 0 : index} : tensor<4x4xf64, #sparse>
+// CHECK:           %[[VAL_54:.*]] = sparse_tensor.coordinates %[[VAL_2]] {level = 0 : index} : tensor<4x4xf64, #sparse>
+// CHECK:           %[[VAL_55:.*]] = sparse_tensor.positions %[[VAL_2]] {level = 1 : index} : tensor<4x4xf64, #sparse>
+// CHECK:           %[[VAL_56:.*]] = sparse_tensor.coordinates %[[VAL_2]] {level = 1 : index} : tensor<4x4xf64, #sparse>
+// CHECK:           %[[VAL_57:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<4x4xf64, #sparse>
 // CHECK:           %[[VAL_58:.*]] = memref.load %[[VAL_53]]{{\[}}%[[VAL_7]]] : memref<?xindex>
 // CHECK:           %[[VAL_59:.*]] = memref.load %[[VAL_53]]{{\[}}%[[VAL_8]]] : memref<?xindex>
 // CHECK:           %[[VAL_60:.*]] = scf.for %[[VAL_61:.*]] = %[[VAL_58]] to %[[VAL_59]] step %[[VAL_8]] iter_args(%[[VAL_62:.*]] = %[[VAL_39]]) -> (tensor<?x?xf64>) {
diff --git a/mlir/test/Dialect/SparseTensor/sparse_conv_2d_slice_based.mlir b/mlir/test/Dialect/SparseTensor/sparse_conv_2d_slice_based.mlir
index de0582e5c688..0cd57ac64b50 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_conv_2d_slice_based.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_conv_2d_slice_based.mlir
@@ -7,8 +7,8 @@
 #DCSR = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : compressed, d1 : compressed) }>
 
 // CHECK-LABEL:   func.func @conv2d_all_sparse_CSR(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<8x8xi32, #sparse_tensor.encoding<{{.*}}>>,
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<3x3xi32>) -> tensor<6x6xi32, #sparse_tensor.encoding<{{.*}}>> {
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<8x8xi32, #sparse{{[0-9]*}}>,
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<3x3xi32>) -> tensor<6x6xi32, #sparse{{[0-9]*}}> {
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant -2 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 8 : index
@@ -19,12 +19,12 @@
 // CHECK-DAG:       %[[VAL_9:.*]] = arith.constant 4 : index
 // CHECK-DAG:       %[[VAL_10:.*]] = arith.constant 0 : i32
 // CHECK-DAG:       %[[VAL_11:.*]] = arith.constant false
-// CHECK-DAG:       %[[VAL_12:.*]] = tensor.empty() : tensor<6x6xi32, #sparse_tensor.encoding<{{.*}}>>
-// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<8x8xi32, #sparse_tensor.encoding<{{.*}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<8x8xi32, #sparse_tensor.encoding<{{.*}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_15:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<8x8xi32, #sparse_tensor.encoding<{{.*}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_16:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<8x8xi32, #sparse_tensor.encoding<{{.*}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_17:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8x8xi32, #sparse_tensor.encoding<{{.*}}>> to memref<?xi32>
+// CHECK-DAG:       %[[VAL_12:.*]] = tensor.empty() : tensor<6x6xi32, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<8x8xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<8x8xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_15:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<8x8xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_16:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<8x8xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_17:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8x8xi32, #sparse{{[0-9]*}}> to memref<?xi32>
 // CHECK-DAG:       %[[VAL_18:.*]] = memref.alloca() : memref<11xindex>
 // CHECK-DAG:       %[[VAL_19:.*]] = memref.alloca() : memref<5xindex>
 // CHECK-DAG:       %[[VAL_20:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_7]]] : memref<?xindex>
@@ -38,10 +38,10 @@
 // CHECK:           %[[VAL_24:.*]] = arith.andi %[[VAL_21]], %[[VAL_23]] : i1
 // CHECK:           %[[VAL_25:.*]] = arith.addi %[[VAL_22]], %[[VAL_3]] : index
 // CHECK:           %[[VAL_26:.*]] = arith.select %[[VAL_24]], %[[VAL_25]], %[[VAL_6]] : index
-// CHECK:           %[[VAL_27:.*]]:3 = scf.while (%[[VAL_28:.*]] = %[[VAL_21]], %[[VAL_29:.*]] = %[[VAL_22]], %[[VAL_30:.*]] = %[[VAL_26]], %[[VAL_31:.*]] = %[[VAL_12]]) : (i1, index, index, tensor<6x6xi32, #sparse_tensor.encoding<{{.*}}>>) -> (index, index, tensor<6x6xi32, #sparse_tensor.encoding<{{.*}}>>) {
-// CHECK:             scf.condition(%[[VAL_28]]) %[[VAL_29]], %[[VAL_30]], %[[VAL_31]] : index, index, tensor<6x6xi32, #sparse_tensor.encoding<{{.*}}>>
+// CHECK:           %[[VAL_27:.*]]:3 = scf.while (%[[VAL_28:.*]] = %[[VAL_21]], %[[VAL_29:.*]] = %[[VAL_22]], %[[VAL_30:.*]] = %[[VAL_26]], %[[VAL_31:.*]] = %[[VAL_12]]) : (i1, index, index, tensor<6x6xi32, #sparse{{[0-9]*}}>) -> (index, index, tensor<6x6xi32, #sparse{{[0-9]*}}>) {
+// CHECK:             scf.condition(%[[VAL_28]]) %[[VAL_29]], %[[VAL_30]], %[[VAL_31]] : index, index, tensor<6x6xi32, #sparse{{[0-9]*}}>
 // CHECK:           } do {
-// CHECK:           ^bb0(%[[VAL_32:.*]]: index, %[[VAL_33:.*]]: index, %[[VAL_34:.*]]: tensor<6x6xi32, #sparse_tensor.encoding<{{.*}}>>):
+// CHECK:           ^bb0(%[[VAL_32:.*]]: index, %[[VAL_33:.*]]: index, %[[VAL_34:.*]]: tensor<6x6xi32, #sparse{{[0-9]*}}>):
 // CHECK:             %[[VAL_35:.*]] = memref.load %[[VAL_19]]{{\[}}%[[VAL_6]]] : memref<5xindex>
 // CHECK:             %[[VAL_36:.*]]:4 = scf.for %[[VAL_37:.*]] = %[[VAL_8]] to %[[VAL_35]] step %[[VAL_5]] iter_args(%[[VAL_38:.*]] = %[[VAL_11]], %[[VAL_39:.*]] = %[[VAL_4]], %[[VAL_40:.*]] = %[[VAL_8]], %[[VAL_41:.*]] = %[[VAL_6]]) -> (i1, index, index, index) {
 // CHECK:               %[[VAL_42:.*]] = memref.load %[[VAL_19]]{{\[}}%[[VAL_37]]] : memref<5xindex>
@@ -90,10 +90,10 @@
 // CHECK:             %[[VAL_79:.*]] = arith.andi %[[VAL_77]]#0, %[[VAL_78]] : i1
 // CHECK:             %[[VAL_80:.*]] = arith.addi %[[VAL_77]]#1, %[[VAL_3]] : index
 // CHECK:             %[[VAL_81:.*]] = arith.select %[[VAL_79]], %[[VAL_80]], %[[VAL_6]] : index
-// CHECK:             %[[VAL_82:.*]]:3 = scf.while (%[[VAL_83:.*]] = %[[VAL_77]]#0, %[[VAL_84:.*]] = %[[VAL_77]]#1, %[[VAL_85:.*]] = %[[VAL_81]], %[[VAL_86:.*]] = %[[VAL_34]]) : (i1, index, index, tensor<6x6xi32, #sparse_tensor.encoding<{{.*}}>>) -> (index, index, tensor<6x6xi32, #sparse_tensor.encoding<{{.*}}>>) {
-// CHECK:               scf.condition(%[[VAL_83]]) %[[VAL_84]], %[[VAL_85]], %[[VAL_86]] : index, index, tensor<6x6xi32, #sparse_tensor.encoding<{{.*}}>>
+// CHECK:             %[[VAL_82:.*]]:3 = scf.while (%[[VAL_83:.*]] = %[[VAL_77]]#0, %[[VAL_84:.*]] = %[[VAL_77]]#1, %[[VAL_85:.*]] = %[[VAL_81]], %[[VAL_86:.*]] = %[[VAL_34]]) : (i1, index, index, tensor<6x6xi32, #sparse{{[0-9]*}}>) -> (index, index, tensor<6x6xi32, #sparse{{[0-9]*}}>) {
+// CHECK:               scf.condition(%[[VAL_83]]) %[[VAL_84]], %[[VAL_85]], %[[VAL_86]] : index, index, tensor<6x6xi32, #sparse{{[0-9]*}}>
 // CHECK:             } do {
-// CHECK:             ^bb0(%[[VAL_87:.*]]: index, %[[VAL_88:.*]]: index, %[[VAL_89:.*]]: tensor<6x6xi32, #sparse_tensor.encoding<{{.*}}>>):
+// CHECK:             ^bb0(%[[VAL_87:.*]]: index, %[[VAL_88:.*]]: index, %[[VAL_89:.*]]: tensor<6x6xi32, #sparse{{[0-9]*}}>):
 // CHECK:               %[[VAL_90:.*]] = memref.load %[[VAL_19]]{{\[}}%[[VAL_7]]] : memref<5xindex>
 // CHECK:               %[[VAL_91:.*]] = arith.addi %[[VAL_90]], %[[VAL_8]] : index
 // CHECK:               %[[VAL_92:.*]] = arith.addi %[[VAL_90]], %[[VAL_5]] : index
@@ -145,11 +145,11 @@
 // CHECK:                 memref.store %[[VAL_112]], %[[VAL_18]]{{\[}}%[[VAL_7]]] : memref<11xindex>
 // CHECK:                 scf.yield %[[VAL_133]], %[[VAL_136:.*]]#1, %[[VAL_2]] : index, i32, i1
 // CHECK:               }
-// CHECK:               %[[VAL_137:.*]] = scf.if %[[VAL_138:.*]]#2 -> (tensor<6x6xi32, #sparse_tensor.encoding<{{.*}}>>) {
-// CHECK:                 %[[VAL_139:.*]] = sparse_tensor.insert %[[VAL_138]]#1 into %[[VAL_89]]{{\[}}%[[VAL_33]], %[[VAL_88]]] : tensor<6x6xi32, #sparse_tensor.encoding<{{.*}}>>
-// CHECK:                 scf.yield %[[VAL_139]] : tensor<6x6xi32, #sparse_tensor.encoding<{{.*}}>>
+// CHECK:               %[[VAL_137:.*]] = scf.if %[[VAL_138:.*]]#2 -> (tensor<6x6xi32, #sparse{{[0-9]*}}>) {
+// CHECK:                 %[[VAL_139:.*]] = sparse_tensor.insert %[[VAL_138]]#1 into %[[VAL_89]]{{\[}}%[[VAL_33]], %[[VAL_88]]] : tensor<6x6xi32, #sparse{{[0-9]*}}>
+// CHECK:                 scf.yield %[[VAL_139]] : tensor<6x6xi32, #sparse{{[0-9]*}}>
 // CHECK:               } else {
-// CHECK:                 scf.yield %[[VAL_89]] : tensor<6x6xi32, #sparse_tensor.encoding<{{.*}}>>
+// CHECK:                 scf.yield %[[VAL_89]] : tensor<6x6xi32, #sparse{{[0-9]*}}>
 // CHECK:               }
 // CHECK:               memref.store %[[VAL_6]], %[[VAL_19]]{{\[}}%[[VAL_7]]] : memref<5xindex>
 // CHECK:               memref.store %[[VAL_6]], %[[VAL_18]]{{\[}}%[[VAL_7]]] : memref<11xindex>
@@ -202,7 +202,7 @@
 // CHECK:               %[[VAL_175:.*]] = arith.addi %[[VAL_174]], %[[VAL_5]] : index
 // CHECK:               %[[VAL_176:.*]] = arith.cmpi ule, %[[VAL_175]], %[[VAL_4]] : index
 // CHECK:               %[[VAL_177:.*]] = arith.andi %[[VAL_173]]#1, %[[VAL_176]] : i1
-// CHECK:               scf.yield %[[VAL_177]], %[[VAL_173]]#0, %[[VAL_174]], %[[VAL_178:.*]] : i1, index, index, tensor<6x6xi32, #sparse_tensor.encoding<{{.*}}>>
+// CHECK:               scf.yield %[[VAL_177]], %[[VAL_173]]#0, %[[VAL_174]], %[[VAL_178:.*]] : i1, index, index, tensor<6x6xi32, #sparse{{[0-9]*}}>
 // CHECK:             }
 // CHECK:             memref.store %[[VAL_6]], %[[VAL_19]]{{\[}}%[[VAL_7]]] : memref<5xindex>
 // CHECK:             %[[VAL_179:.*]] = arith.cmpi ugt, %[[VAL_32]], %[[VAL_33]] : index
@@ -254,10 +254,10 @@
 // CHECK:             %[[VAL_214:.*]] = arith.addi %[[VAL_213]], %[[VAL_5]] : index
 // CHECK:             %[[VAL_215:.*]] = arith.cmpi ule, %[[VAL_214]], %[[VAL_4]] : index
 // CHECK:             %[[VAL_216:.*]] = arith.andi %[[VAL_212]]#1, %[[VAL_215]] : i1
-// CHECK:             scf.yield %[[VAL_216]], %[[VAL_212]]#0, %[[VAL_213]], %[[VAL_217:.*]]#2 : i1, index, index, tensor<6x6xi32, #sparse_tensor.encoding<{{.*}}>>
+// CHECK:             scf.yield %[[VAL_216]], %[[VAL_212]]#0, %[[VAL_213]], %[[VAL_217:.*]]#2 : i1, index, index, tensor<6x6xi32, #sparse{{[0-9]*}}>
 // CHECK:           }
-// CHECK:           %[[VAL_218:.*]] = sparse_tensor.load %[[VAL_219:.*]]#2 hasInserts : tensor<6x6xi32, #sparse_tensor.encoding<{{.*}}>>
-// CHECK:           return %[[VAL_218]] : tensor<6x6xi32, #sparse_tensor.encoding<{{.*}}>>
+// CHECK:           %[[VAL_218:.*]] = sparse_tensor.load %[[VAL_219:.*]]#2 hasInserts : tensor<6x6xi32, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_218]] : tensor<6x6xi32, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @conv2d_all_sparse_CSR(%arg0: tensor<8x8xi32, #DCSR>,
                                  %arg1: tensor<3x3xi32>) -> tensor<6x6xi32, #DCSR> {
diff --git a/mlir/test/Dialect/SparseTensor/sparse_extract_slice.mlir b/mlir/test/Dialect/SparseTensor/sparse_extract_slice.mlir
index ae53f4d854e2..c501a09baa6e 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_extract_slice.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_extract_slice.mlir
@@ -12,7 +12,7 @@
 // CHECK-SAME:                            %[[VAL_0:.*0]]: memref<?xindex>,
 // CHECK-SAME:                            %[[VAL_1:.*1]]: memref<?xindex>,
 // CHECK-SAME:                            %[[VAL_2:.*2]]: memref<?xf64>,
-// CHECK-SAME:                            %[[VAL_3:.*3]]: !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>)
+// CHECK-SAME:                            %[[VAL_3:.*3]]: !sparse_tensor.storage_specifier<#sparse{{[0-9]*}}>)
 // CHECK:           %[[VAL_4:.*]] = sparse_tensor.storage_specifier.init with %[[VAL_3]]
 // CHECK:           %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK:           %[[VAL_6:.*]] = arith.constant 4 : index
diff --git a/mlir/test/Dialect/SparseTensor/sparse_foreach.mlir b/mlir/test/Dialect/SparseTensor/sparse_foreach.mlir
index 5983289c752e..eb611156722a 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_foreach.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_foreach.mlir
@@ -143,13 +143,13 @@ func.func @foreach_print_slice(%A: tensor<4x4xf64, #CSR_SLICE>) {
 }>
 
 // CHECK-LABEL:   func.func @foreach_bcoo(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<4x4x4xf64, #{{.*}}>>) {
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<4x4x4xf64, #sparse{{[0-9]*}}>) {
 // CHECK-DAG:       %[[VAL_1:.*]] = arith.constant 4 : index
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 2 : index
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<4x4x4xf64, #{{.*}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<4x4x4xf64, #{{.*}}>> to memref<?xf64>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<4x4x4xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<4x4x4xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:           scf.for %[[VAL_7:.*]] = %[[VAL_2]] to %[[VAL_1]] step %[[VAL_3]] {
 // CHECK:             %[[VAL_8:.*]] = arith.muli %[[VAL_7]], %[[VAL_4]] : index
 // CHECK:             %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_8]]] : memref<?xindex>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir b/mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir
index f25a56469d7e..8c09523174bd 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir
@@ -31,13 +31,13 @@
 }
 
 // CHECK-LABEL: func @abs(
-// CHECK-SAME:    %[[VAL_0:.*]]: tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:    %[[VAL_0:.*]]: tensor<32xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:    %[[VAL_1:.*]]: tensor<32xf64>) -> tensor<32xf64> {
 // CHECK-DAG:     %[[VAL_2:.*]] = arith.constant 0 : index
 // CHECK-DAG:     %[[VAL_3:.*]] = arith.constant 1 : index
-// CHECK:         %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:         %[[VAL_5:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:         %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK:         %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:         %[[VAL_5:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:         %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:         %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf64>
 // CHECK:         %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK:         %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -63,13 +63,13 @@ func.func @abs(%arga: tensor<32xf64, #SV>,
 }
 
 // CHECK-LABEL: func @ceil(
-// CHECK-SAME:    %[[VAL_0:.*]]: tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:    %[[VAL_0:.*]]: tensor<32xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:    %[[VAL_1:.*]]: tensor<32xf64>) -> tensor<32xf64> {
 // CHECK-DAG:     %[[VAL_2:.*]] = arith.constant 0 : index
 // CHECK-DAG:     %[[VAL_3:.*]] = arith.constant 1 : index
-// CHECK:         %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:         %[[VAL_5:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:         %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK:         %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:         %[[VAL_5:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:         %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:         %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf64>
 // CHECK:         %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK:         %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -95,13 +95,13 @@ func.func @ceil(%arga: tensor<32xf64, #SV>,
 }
 
 // CHECK-LABEL: func @floor(
-// CHECK-SAME:    %[[VAL_0:.*]]: tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:    %[[VAL_0:.*]]: tensor<32xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:    %[[VAL_1:.*]]: tensor<32xf64>) -> tensor<32xf64> {
 // CHECK-DAG:     %[[VAL_2:.*]] = arith.constant 0 : index
 // CHECK-DAG:     %[[VAL_3:.*]] = arith.constant 1 : index
-// CHECK:         %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:         %[[VAL_5:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:         %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK:         %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:         %[[VAL_5:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:         %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:         %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf64>
 // CHECK:         %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK:         %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -127,13 +127,13 @@ func.func @floor(%arga: tensor<32xf64, #SV>,
 }
 
 // CHECK-LABEL: func @neg(
-// CHECK-SAME:    %[[VAL_0:.*]]: tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:    %[[VAL_0:.*]]: tensor<32xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:    %[[VAL_1:.*]]: tensor<32xf64>) -> tensor<32xf64> {
 // CHECK-DAG:     %[[VAL_2:.*]] = arith.constant 0 : index
 // CHECK-DAG:     %[[VAL_3:.*]] = arith.constant 1 : index
-// CHECK:         %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:         %[[VAL_5:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:         %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK:         %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:         %[[VAL_5:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:         %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:         %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf64>
 // CHECK:         %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK:         %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -159,16 +159,16 @@ func.func @neg(%arga: tensor<32xf64, #SV>,
 }
 
 // CHECK-LABEL: func @add(
-// CHECK-SAME:    %[[VAL_0:.*]]: tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:    %[[VAL_0:.*]]: tensor<32xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:    %[[VAL_1:.*]]: tensor<32xf64>,
 // CHECK-SAME:    %[[VAL_2:.*]]: tensor<32xf64>) -> tensor<32xf64> {
 // CHECK-DAG:     %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-DAG:     %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:     %[[VAL_5:.*]] = arith.constant true
 // CHECK-DAG:     %[[VAL_6:.*]] = arith.constant 1 : index
-// CHECK:         %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:         %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:         %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:         %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}>
+// CHECK:         %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}>
+// CHECK:         %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse{{[0-9]*}}>
 // CHECK:         %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf64>
 // CHECK:         %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf64>
 // CHECK:         %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -219,16 +219,16 @@ func.func @add(%arga: tensor<32xf64, #SV>,
 }
 
 // CHECK-LABEL: func @sub(
-// CHECK-SAME:    %[[VAL_0:.*]]: tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:    %[[VAL_0:.*]]: tensor<32xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:    %[[VAL_1:.*]]: tensor<32xf64>,
 // CHECK-SAME:    %[[VAL_2:.*]]: tensor<32xf64>) -> tensor<32xf64> {
 // CHECK-DAG:     %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-DAG:     %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:     %[[VAL_5:.*]] = arith.constant true
 // CHECK-DAG:     %[[VAL_6:.*]] = arith.constant 1 : index
-// CHECK:         %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:         %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:         %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK:         %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:         %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:         %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:         %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf64>
 // CHECK:         %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf64>
 // CHECK:         %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -281,14 +281,14 @@ func.func @sub(%arga: tensor<32xf64, #SV>,
 }
 
 // CHECK-LABEL: func @mul(
-// CHECK-SAME:    %[[VAL_0:.*]]: tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:    %[[VAL_0:.*]]: tensor<32xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:    %[[VAL_1:.*]]: tensor<32xf64>,
 // CHECK-SAME:    %[[VAL_2:.*]]: tensor<32xf64>) -> tensor<32xf64> {
 // CHECK-DAG:     %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:     %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK:         %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:         %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:         %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:         %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}>
+// CHECK:         %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}>
+// CHECK:         %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse{{[0-9]*}}>
 // CHECK:         %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf64>
 // CHECK:         %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf64>
 // CHECK:         %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -317,14 +317,14 @@ func.func @mul(%arga: tensor<32xf64, #SV>,
 }
 
 // CHECK-LABEL: func @divbyc(
-// CHECK-SAME:    %[[VAL_0:.*]]: tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:    %[[VAL_0:.*]]: tensor<32xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:    %[[VAL_1:.*]]: tensor<32xf64>) -> tensor<32xf64> {
 // CHECK-DAG:     %[[VAL_2:.*]] = arith.constant 2.000000e+00 : f64
 // CHECK-DAG:     %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:     %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK:         %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:         %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:         %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:         %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}>
+// CHECK:         %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}>
+// CHECK:         %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse{{[0-9]*}}>
 // CHECK:         %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf64>
 // CHECK:         %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK:         %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -351,13 +351,13 @@ func.func @divbyc(%arga: tensor<32xf64, #SV>,
 }
 
 // CHECK-LABEL:   func.func @zero_preserving_math(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> {
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf64, #sparse{{[0-9]*}}>) -> tensor<32xf64, #sparse{{[0-9]*}}> {
 // CHECK-DAG:       %[[VAL_1:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 1 : index
-// CHECK:           %[[VAL_3:.*]] = tensor.empty() : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_5:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK:           %[[VAL_3:.*]] = tensor.empty() : tensor<32xf64, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_5:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:           %[[VAL_7:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_1]]] : memref<?xindex>
 // CHECK:           %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK:           %[[T:.*]] = scf.for %[[VAL_9:.*]] = %[[VAL_7]] to %[[VAL_8]] step %[[VAL_2]] {{.*}} {
@@ -371,11 +371,11 @@ func.func @divbyc(%arga: tensor<32xf64, #SV>,
 // CHECK:             %[[VAL_17:.*]] = math.log1p %[[VAL_16]] : f64
 // CHECK:             %[[VAL_18:.*]] = math.sin %[[VAL_17]] : f64
 // CHECK:             %[[VAL_19:.*]] = math.tanh %[[VAL_18]] : f64
-// CHECK:             %[[Y:.*]] = sparse_tensor.insert %[[VAL_19]] into %{{.*}}[%[[VAL_10]]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:             %[[Y:.*]] = sparse_tensor.insert %[[VAL_19]] into %{{.*}}[%[[VAL_10]]] : tensor<32xf64, #sparse{{[0-9]*}}>
 // CHECK:             scf.yield %[[Y]]
 // CHECK:           }
-// CHECK:           %[[VAL_20:.*]] = sparse_tensor.load %[[T]] hasInserts : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           return %[[VAL_20]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_20:.*]] = sparse_tensor.load %[[T]] hasInserts : tensor<32xf64, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_20]] : tensor<32xf64, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @zero_preserving_math(%arga: tensor<32xf64, #SV>) -> tensor<32xf64, #SV> {
   %c32 = arith.constant 32 : index
@@ -398,25 +398,25 @@ func.func @zero_preserving_math(%arga: tensor<32xf64, #SV>) -> tensor<32xf64, #S
 }
 
 // CHECK-LABEL:   func.func @complex_divbyc(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xcomplex<f64>, #sparse_tensor.encoding<{{{.*}}}>> {
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xcomplex<f64>, #sparse{{.*}}> {
 // CHECK-DAG:       %[[VAL_1:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 1 : index
 // CHECK:           %[[VAL_3:.*]] = complex.constant [0.000000e+00, 1.000000e+00] : complex<f64>
-// CHECK:           %[[VAL_4:.*]] = tensor.empty() : tensor<32xcomplex<f64>, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xcomplex<f64>, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xcomplex<f64>, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xcomplex<f64>, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xcomplex<f64>>
+// CHECK:           %[[VAL_4:.*]] = tensor.empty() : tensor<32xcomplex<f64>, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xcomplex<f64>, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xcomplex<f64>, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xcomplex<f64>, #sparse{{[0-9]*}}> to memref<?xcomplex<f64>>
 // CHECK:           %[[VAL_8:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_1]]] : memref<?xindex>
 // CHECK:           %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK:           %[[T:.*]] = scf.for %[[VAL_10:.*]] = %[[VAL_8]] to %[[VAL_9]] step %[[VAL_2]] {{.*}} {
 // CHECK:             %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_10]]] : memref<?xindex>
 // CHECK:             %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_10]]] : memref<?xcomplex<f64>>
 // CHECK:             %[[VAL_13:.*]] = complex.div %[[VAL_12]], %[[VAL_3]] : complex<f64>
-// CHECK:             %[[Y:.*]] = sparse_tensor.insert %[[VAL_13]] into %{{.*}}[%[[VAL_11]]] : tensor<32xcomplex<f64>, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:             %[[Y:.*]] = sparse_tensor.insert %[[VAL_13]] into %{{.*}}[%[[VAL_11]]] : tensor<32xcomplex<f64>, #sparse{{[0-9]*}}>
 // CHECK:             scf.yield %[[Y]]
 // CHECK:           }
-// CHECK:           %[[VAL_14:.*]] = sparse_tensor.load %[[T]] hasInserts : tensor<32xcomplex<f64>, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           return %[[VAL_14]] : tensor<32xcomplex<f64>, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_14:.*]] = sparse_tensor.load %[[T]] hasInserts : tensor<32xcomplex<f64>, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_14]] : tensor<32xcomplex<f64>, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @complex_divbyc(%arg0: tensor<32xcomplex<f64>, #SV>) -> tensor<32xcomplex<f64>, #SV> {
   %c = complex.constant [0.0, 1.0] : complex<f64>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_index.mlir b/mlir/test/Dialect/SparseTensor/sparse_index.mlir
index e49c89e01790..b09bd0a74009 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_index.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_index.mlir
@@ -18,17 +18,17 @@
 }
 
 // CHECK-LABEL:   func.func @dense_index(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?x?xi64, #sparse_tensor.encoding
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?x?xi64, #sparse{{[0-9]*}}>
 // CHECK-DAG:       %[[VAL_1:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_3:.*]] = sparse_tensor.lvl %[[VAL_0]], %[[VAL_1]] : tensor<?x?xi64, #sparse_tensor.encoding
-// CHECK-DAG:       %[[VAL_4:.*]] = sparse_tensor.lvl %[[VAL_0]], %[[VAL_1]] : tensor<?x?xi64, #sparse_tensor.encoding
-// CHECK-DAG:       %[[VAL_5:.*]] = tensor.empty(%[[VAL_3]], %[[VAL_4]]) : tensor<?x?xi64, #sparse_tensor.encoding
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xi64, #sparse_tensor.encoding
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.lvl %[[VAL_0]], %[[VAL_1]] : tensor<?x?xi64, #sparse_tensor.encoding
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.lvl %[[VAL_0]], %[[VAL_2]] : tensor<?x?xi64, #sparse_tensor.encoding
-// CHECK-DAG:       %[[VAL_24:.*]] = sparse_tensor.lvl %[[VAL_5]], %[[VAL_2]] : tensor<?x?xi64, #sparse_tensor.encoding
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_5]] : tensor<?x?xi64, #sparse_tensor.encoding
+// CHECK-DAG:       %[[VAL_3:.*]] = sparse_tensor.lvl %[[VAL_0]], %[[VAL_1]] : tensor<?x?xi64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_4:.*]] = sparse_tensor.lvl %[[VAL_0]], %[[VAL_1]] : tensor<?x?xi64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_5:.*]] = tensor.empty(%[[VAL_3]], %[[VAL_4]]) : tensor<?x?xi64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xi64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.lvl %[[VAL_0]], %[[VAL_1]] : tensor<?x?xi64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.lvl %[[VAL_0]], %[[VAL_2]] : tensor<?x?xi64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_24:.*]] = sparse_tensor.lvl %[[VAL_5]], %[[VAL_2]] : tensor<?x?xi64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_5]] : tensor<?x?xi64, #sparse{{[0-9]*}}>
 // CHECK:           scf.for %[[VAL_10:.*]] = %[[VAL_1]] to %[[VAL_7]] step %[[VAL_2]] {
 // CHECK:             scf.for %[[VAL_11:.*]] = %[[VAL_1]] to %[[VAL_8]] step %[[VAL_2]] {
 // CHECK:               %[[VAL_12:.*]] = arith.muli %[[VAL_8]], %[[VAL_10]] : index
@@ -43,8 +43,8 @@
 // CHECK:               memref.store %[[VAL_20]], %[[VAL_9]]{{\[}}%[[VAL_15]]] : memref<?xi64>
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_21:.*]] = sparse_tensor.load %[[VAL_5]] : tensor<?x?xi64, #sparse_tensor.encoding
-// CHECK:           return %[[VAL_21]] : tensor<?x?xi64, #sparse_tensor.encoding
+// CHECK:           %[[VAL_21:.*]] = sparse_tensor.load %[[VAL_5]] : tensor<?x?xi64, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_21]] : tensor<?x?xi64, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @dense_index(%arga: tensor<?x?xi64, #DenseMatrix>)
                       -> tensor<?x?xi64, #DenseMatrix> {
@@ -70,17 +70,17 @@ func.func @dense_index(%arga: tensor<?x?xi64, #DenseMatrix>)
 
 
 // CHECK-LABEL:   func.func @sparse_index(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?x?xi64, #sparse_tensor.encoding
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?x?xi64, #sparse{{[0-9]*}}>
 // CHECK-DAG:       %[[VAL_1:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_3:.*]] = sparse_tensor.lvl %[[VAL_0]], %[[VAL_1]] : tensor<?x?xi64, #sparse_tensor.encoding
-// CHECK-DAG:       %[[VAL_4:.*]] = sparse_tensor.lvl %[[VAL_0]], %[[VAL_1]] : tensor<?x?xi64, #sparse_tensor.encoding
-// CHECK-DAG:       %[[VAL_5:.*]] = tensor.empty(%[[VAL_3]], %[[VAL_4]]) : tensor<?x?xi64, #sparse_tensor.encoding
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<?x?xi64, #sparse_tensor.encoding
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<?x?xi64, #sparse_tensor.encoding
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<?x?xi64, #sparse_tensor.encoding
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<?x?xi64, #sparse_tensor.encoding
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xi64, #sparse_tensor.encoding
+// CHECK-DAG:       %[[VAL_3:.*]] = sparse_tensor.lvl %[[VAL_0]], %[[VAL_1]] : tensor<?x?xi64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_4:.*]] = sparse_tensor.lvl %[[VAL_0]], %[[VAL_1]] : tensor<?x?xi64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_5:.*]] = tensor.empty(%[[VAL_3]], %[[VAL_4]]) : tensor<?x?xi64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<?x?xi64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<?x?xi64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<?x?xi64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<?x?xi64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xi64, #sparse{{[0-9]*}}>
 // CHECK:           %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_1]]] : memref<?xindex>
 // CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK:           %[[T:.*]] = scf.for %[[VAL_13:.*]] = %[[VAL_11]] to %[[VAL_12]] step %[[VAL_2]] {{.*}} {
@@ -95,13 +95,13 @@ func.func @dense_index(%arga: tensor<?x?xi64, #DenseMatrix>)
 // CHECK:               %[[VAL_22:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_18]]] : memref<?xi64>
 // CHECK:               %[[VAL_23:.*]] = arith.muli %[[VAL_21]], %[[VAL_22]] : i64
 // CHECK:               %[[VAL_24:.*]] = arith.muli %[[VAL_20]], %[[VAL_23]] : i64
-// CHECK:               %[[Y:.*]] = sparse_tensor.insert %[[VAL_24]] into %{{.*}}[%[[VAL_14]], %[[VAL_19]]] : tensor<?x?xi64, #sparse_tensor.encoding
+// CHECK:               %[[Y:.*]] = sparse_tensor.insert %[[VAL_24]] into %{{.*}}[%[[VAL_14]], %[[VAL_19]]] : tensor<?x?xi64, #sparse{{[0-9]*}}>
 // CHECK:               scf.yield %[[Y]]
 // CHECK:             }
 // CHECK:             scf.yield %[[L]]
 // CHECK:           }
-// CHECK:           %[[VAL_25:.*]] = sparse_tensor.load %[[T]] hasInserts : tensor<?x?xi64, #sparse_tensor.encoding
-// CHECK:           return %[[VAL_25]] : tensor<?x?xi64, #sparse_tensor.encoding
+// CHECK:           %[[VAL_25:.*]] = sparse_tensor.load %[[T]] hasInserts : tensor<?x?xi64, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_25]] : tensor<?x?xi64, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @sparse_index(%arga: tensor<?x?xi64, #SparseMatrix>)
                        -> tensor<?x?xi64, #SparseMatrix> {
diff --git a/mlir/test/Dialect/SparseTensor/sparse_int_ops.mlir b/mlir/test/Dialect/SparseTensor/sparse_int_ops.mlir
index da7d45bd63e1..868b11c91fe3 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_int_ops.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_int_ops.mlir
@@ -23,16 +23,16 @@
 }
 
 // CHECK-LABEL:   func @add(
-// CHECK-SAME:              %[[VAL_0:.*]]: tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:              %[[VAL_0:.*]]: tensor<32xi64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:              %[[VAL_1:.*]]: tensor<32xi64>,
 // CHECK-SAME:              %[[VAL_2:.*]]: tensor<32xi64>) -> tensor<32xi64> {
 // CHECK-DAG:           %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-DAG:           %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:           %[[VAL_5:.*]] = arith.constant true
 // CHECK-DAG:           %[[VAL_6:.*]] = arith.constant 1 : index
-// CHECK:           %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse{{[0-9]*}}>
 // CHECK:           %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64>
 // CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xi64>
 // CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -83,7 +83,7 @@ func.func @add(%arga: tensor<32xi64, #SV>,
 }
 
 // CHECK-LABEL:   func @sub(
-// CHECK-SAME:              %[[VAL_0:.*]]: tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:              %[[VAL_0:.*]]: tensor<32xi64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:              %[[VAL_1:.*]]: tensor<32xi64>,
 // CHECK-SAME:              %[[VAL_2:.*]]: tensor<32xi64>) -> tensor<32xi64> {
 // CHECK-DAG:           %[[VAL_3:.*]] = arith.constant 32 : index
@@ -91,9 +91,9 @@ func.func @add(%arga: tensor<32xi64, #SV>,
 // CHECK-DAG:           %[[VAL_5:.*]] = arith.constant true
 // CHECK-DAG:           %[[VAL_6:.*]] = arith.constant 1 : index
 // CHECK-DAG:           %[[VAL_7:.*]] = arith.constant 0 : i64
-// CHECK:           %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse{{[0-9]*}}>
 // CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64>
 // CHECK:           %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xi64>
 // CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -146,14 +146,14 @@ func.func @sub(%arga: tensor<32xi64, #SV>,
 }
 
 // CHECK-LABEL:   func @mul(
-// CHECK-SAME:              %[[VAL_0:.*]]: tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:              %[[VAL_0:.*]]: tensor<32xi64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:              %[[VAL_1:.*]]: tensor<32xi64>,
 // CHECK-SAME:              %[[VAL_2:.*]]: tensor<32xi64>) -> tensor<32xi64> {
 // CHECK-DAG:           %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:           %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK:           %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse{{[0-9]*}}>
 // CHECK:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64>
 // CHECK:           %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xi64>
 // CHECK:           %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -182,14 +182,14 @@ func.func @mul(%arga: tensor<32xi64, #SV>,
 }
 
 // CHECK-LABEL:   func @divsbyc(
-// CHECK-SAME:                  %[[VAL_0:.*]]: tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:                  %[[VAL_0:.*]]: tensor<32xi64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:                  %[[VAL_1:.*]]: tensor<32xi64>) -> tensor<32xi64> {
 // CHECK-DAG:           %[[VAL_2:.*]] = arith.constant 2 : i64
 // CHECK-DAG:           %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:           %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK:           %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse{{[0-9]*}}>
 // CHECK:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64>
 // CHECK:           %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -216,14 +216,14 @@ func.func @divsbyc(%arga: tensor<32xi64, #SV>,
 }
 
 // CHECK-LABEL:   func @divubyc(
-// CHECK-SAME:                  %[[VAL_0:.*]]: tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:                  %[[VAL_0:.*]]: tensor<32xi64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:                  %[[VAL_1:.*]]: tensor<32xi64>) -> tensor<32xi64> {
 // CHECK-DAG:           %[[VAL_2:.*]] = arith.constant 2 : i64
 // CHECK-DAG:           %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:           %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK:           %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{.*}}}>>
-// CHECK:           %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse{{[0-9]*}}>
 // CHECK:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64>
 // CHECK:           %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -250,14 +250,14 @@ func.func @divubyc(%arga: tensor<32xi64, #SV>,
 }
 
 // CHECK-LABEL:   func @and(
-// CHECK-SAME:              %[[VAL_0:.*]]: tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:              %[[VAL_0:.*]]: tensor<32xi64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:              %[[VAL_1:.*]]: tensor<32xi64>,
 // CHECK-SAME:              %[[VAL_2:.*]]: tensor<32xi64>) -> tensor<32xi64> {
 // CHECK-DAG:           %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:           %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK:           %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi64>
+// CHECK:           %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse{{[0-9]*}}> to memref<?xi64>
 // CHECK:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64>
 // CHECK:           %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xi64>
 // CHECK:           %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -286,16 +286,16 @@ func.func @and(%arga: tensor<32xi64, #SV>,
 }
 
 // CHECK-LABEL:   func @or(
-// CHECK-SAME:             %[[VAL_0:.*]]: tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:             %[[VAL_0:.*]]: tensor<32xi64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:             %[[VAL_1:.*]]: tensor<32xi64>,
 // CHECK-SAME:             %[[VAL_2:.*]]: tensor<32xi64>) -> tensor<32xi64> {
 // CHECK-DAG:           %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-DAG:           %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:           %[[VAL_5:.*]] = arith.constant true
 // CHECK-DAG:           %[[VAL_6:.*]] = arith.constant 1 : index
-// CHECK:           %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi64>
+// CHECK:           %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse{{[0-9]*}}> to memref<?xi64>
 // CHECK:           %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64>
 // CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xi64>
 // CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -346,16 +346,16 @@ func.func @or(%arga: tensor<32xi64, #SV>,
 }
 
 // CHECK-LABEL:   func @xor(
-// CHECK-SAME:             %[[VAL_0:.*]]: tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:             %[[VAL_0:.*]]: tensor<32xi64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:             %[[VAL_1:.*]]: tensor<32xi64>,
 // CHECK-SAME:             %[[VAL_2:.*]]: tensor<32xi64>) -> tensor<32xi64> {
 // CHECK-DAG:           %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-DAG:           %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:           %[[VAL_5:.*]] = arith.constant true
 // CHECK-DAG:           %[[VAL_6:.*]] = arith.constant 1 : index
-// CHECK:           %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi64>
+// CHECK:           %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse{{[0-9]*}}> to memref<?xi64>
 // CHECK:           %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64>
 // CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xi64>
 // CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -406,14 +406,14 @@ func.func @xor(%arga: tensor<32xi64, #SV>,
 }
 
 // CHECK-LABEL:   func @ashrbyc(
-// CHECK-SAME:                  %[[VAL_0:.*]]: tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:                  %[[VAL_0:.*]]: tensor<32xi64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:                  %[[VAL_1:.*]]: tensor<32xi64>) -> tensor<32xi64> {
 // CHECK-DAG:           %[[VAL_2:.*]] = arith.constant 2 : i64
 // CHECK-DAG:           %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:           %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK:           %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi64>
+// CHECK:           %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse{{[0-9]*}}> to memref<?xi64>
 // CHECK:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64>
 // CHECK:           %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -440,14 +440,14 @@ func.func @ashrbyc(%arga: tensor<32xi64, #SV>,
 }
 
 // CHECK-LABEL:   func @lsrbyc(
-// CHECK-SAME:                 %[[VAL_0:.*]]: tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:                 %[[VAL_0:.*]]: tensor<32xi64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:                 %[[VAL_1:.*]]: tensor<32xi64>) -> tensor<32xi64> {
 // CHECK-DAG:           %[[VAL_2:.*]] = arith.constant 2 : i64
 // CHECK-DAG:           %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:           %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK:           %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi64>
+// CHECK:           %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse{{[0-9]*}}> to memref<?xi64>
 // CHECK:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64>
 // CHECK:           %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -474,14 +474,14 @@ func.func @lsrbyc(%arga: tensor<32xi64, #SV>,
 }
 
 // CHECK-LABEL:   func @lslbyc(
-// CHECK-SAME:                 %[[VAL_0:.*]]: tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:                 %[[VAL_0:.*]]: tensor<32xi64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:                 %[[VAL_1:.*]]: tensor<32xi64>) -> tensor<32xi64> {
 // CHECK-DAG:           %[[VAL_2:.*]] = arith.constant 2 : i64
 // CHECK-DAG:           %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:           %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK:           %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi64>
+// CHECK:           %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse{{[0-9]*}}> to memref<?xi64>
 // CHECK:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64>
 // CHECK:           %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_kernels.mlir b/mlir/test/Dialect/SparseTensor/sparse_kernels.mlir
index 276a864327da..912d8bf5145e 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_kernels.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_kernels.mlir
@@ -7,17 +7,17 @@
 #DCSR = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : compressed, d1 : compressed) }>
 
 // CHECK-LABEL:   func.func @matmul1(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<10x20xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<20x30xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<10x30xf32>) -> tensor<10x30xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 30 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<10x20xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<10x20xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<10x20xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<10x20xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10x20xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<20x30xf32>
 // CHECK:           %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<10x30xf32>
 // CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -53,7 +53,7 @@ func.func @matmul1(%a: tensor<10x20xf32, #DCSR>,
 
 // CHECK-LABEL:   func.func @matmul_sparse_rhs(
 // CHECK-SAME:      %[[VAL_0:.*]]: tensor<10x20xf32>,
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<20x30xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<20x30xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<10x30xf32>) -> tensor<10x30xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 10 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
@@ -102,40 +102,40 @@ func.func @matmul_sparse_rhs(%a: tensor<10x20xf32>,
 // Computes C = A x B with all matrices sparse (SpMSpM) in DCSR.
 //
 // CHECK-LABEL:   func.func @matmul2(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<4x8xf64, #sparse_tensor.encoding<{{{.*}}}>>,
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<8x4xf64, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<4x4xf64, #sparse_tensor.encoding<{{{.*}}}>> {
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<4x8xf64, #sparse{{[0-9]*}}>,
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<8x4xf64, #sparse{{[0-9]*}}>) -> tensor<4x4xf64, #sparse{{[0-9]*}}> {
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant false
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant true
-// CHECK-DAG:       %[[VAL_6:.*]] = tensor.empty() : tensor<4x4xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<4x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<4x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<4x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<4x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<4x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<8x4xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<8x4xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<8x4xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_15:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<8x4xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_16:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<8x4xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK-DAG:       %[[VAL_6:.*]] = tensor.empty() : tensor<4x4xf64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<4x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<4x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<4x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<4x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<4x8xf64, #sparse{{[0-9]*}}> to memref<?xf64>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<8x4xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<8x4xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<8x4xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_15:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<8x4xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_16:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<8x4xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK:           %[[VAL_18:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_3]]] : memref<?xindex>
-// CHECK:           %[[VAL_19:.*]] = scf.for %[[VAL_20:.*]] = %[[VAL_17]] to %[[VAL_18]] step %[[VAL_3]] iter_args(%[[VAL_21:.*]] = %[[VAL_6]]) -> (tensor<4x4xf64, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK:           %[[VAL_19:.*]] = scf.for %[[VAL_20:.*]] = %[[VAL_17]] to %[[VAL_18]] step %[[VAL_3]] iter_args(%[[VAL_21:.*]] = %[[VAL_6]]) -> (tensor<4x4xf64, #sparse{{[0-9]*}}>) {
 // CHECK:             %[[VAL_22:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_20]]] : memref<?xindex>
-// CHECK:             %[[VAL_23:.*]], %[[VAL_24:.*]], %[[VAL_25:.*]], %[[VAL_26:.*]] = sparse_tensor.expand %[[VAL_6]] : tensor<4x4xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>, memref<?xi1>, memref<?xindex>
+// CHECK:             %[[VAL_23:.*]], %[[VAL_24:.*]], %[[VAL_25:.*]], %[[VAL_26:.*]] = sparse_tensor.expand %[[VAL_6]] : tensor<4x4xf64, #sparse{{[0-9]*}}> to memref<?xf64>, memref<?xi1>, memref<?xindex>
 // CHECK:             %[[VAL_27:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_20]]] : memref<?xindex>
 // CHECK:             %[[VAL_28:.*]] = arith.addi %[[VAL_20]], %[[VAL_3]] : index
 // CHECK:             %[[VAL_29:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_28]]] : memref<?xindex>
 // CHECK:             %[[VAL_30:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK:             %[[VAL_31:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_3]]] : memref<?xindex>
-// CHECK:             %[[VAL_32:.*]]:4 = scf.while (%[[VAL_33:.*]] = %[[VAL_27]], %[[VAL_34:.*]] = %[[VAL_30]], %[[VAL_35:.*]] = %[[VAL_26]], %[[VAL_36:.*]] = %[[VAL_21]]) : (index, index, index, tensor<4x4xf64, #sparse_tensor.encoding<{{{.*}}}>>) -> (index, index, index, tensor<4x4xf64, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK:             %[[VAL_32:.*]]:4 = scf.while (%[[VAL_33:.*]] = %[[VAL_27]], %[[VAL_34:.*]] = %[[VAL_30]], %[[VAL_35:.*]] = %[[VAL_26]], %[[VAL_36:.*]] = %[[VAL_21]]) : (index, index, index, tensor<4x4xf64, #sparse{{[0-9]*}}>) -> (index, index, index, tensor<4x4xf64, #sparse{{[0-9]*}}>) {
 // CHECK:               %[[VAL_37:.*]] = arith.cmpi ult, %[[VAL_33]], %[[VAL_29]] : index
 // CHECK:               %[[VAL_38:.*]] = arith.cmpi ult, %[[VAL_34]], %[[VAL_31]] : index
 // CHECK:               %[[VAL_39:.*]] = arith.andi %[[VAL_37]], %[[VAL_38]] : i1
-// CHECK:               scf.condition(%[[VAL_39]]) %[[VAL_33]], %[[VAL_34]], %[[VAL_35]], %[[VAL_36]] : index, index, index, tensor<4x4xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:               scf.condition(%[[VAL_39]]) %[[VAL_33]], %[[VAL_34]], %[[VAL_35]], %[[VAL_36]] : index, index, index, tensor<4x4xf64, #sparse{{[0-9]*}}>
 // CHECK:             } do {
-// CHECK:             ^bb0(%[[VAL_40:.*]]: index, %[[VAL_41:.*]]: index, %[[VAL_42:.*]]: index, %[[VAL_43:.*]]: tensor<4x4xf64, #sparse_tensor.encoding<{{{.*}}}>>):
+// CHECK:             ^bb0(%[[VAL_40:.*]]: index, %[[VAL_41:.*]]: index, %[[VAL_42:.*]]: index, %[[VAL_43:.*]]: tensor<4x4xf64, #sparse{{[0-9]*}}>):
 // CHECK:               %[[VAL_44:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_40]]] : memref<?xindex>
 // CHECK:               %[[VAL_45:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_41]]] : memref<?xindex>
 // CHECK:               %[[VAL_46:.*]] = arith.cmpi ult, %[[VAL_45]], %[[VAL_44]] : index
@@ -143,7 +143,7 @@ func.func @matmul_sparse_rhs(%a: tensor<10x20xf32>,
 // CHECK:               %[[VAL_48:.*]] = arith.cmpi eq, %[[VAL_44]], %[[VAL_47]] : index
 // CHECK:               %[[VAL_49:.*]] = arith.cmpi eq, %[[VAL_45]], %[[VAL_47]] : index
 // CHECK:               %[[VAL_50:.*]] = arith.andi %[[VAL_48]], %[[VAL_49]] : i1
-// CHECK:               %[[VAL_51:.*]]:2 = scf.if %[[VAL_50]] -> (index, tensor<4x4xf64, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK:               %[[VAL_51:.*]]:2 = scf.if %[[VAL_50]] -> (index, tensor<4x4xf64, #sparse{{[0-9]*}}>) {
 // CHECK:                 %[[VAL_52:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_40]]] : memref<?xf64>
 // CHECK:                 %[[VAL_53:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_41]]] : memref<?xindex>
 // CHECK:                 %[[VAL_54:.*]] = arith.addi %[[VAL_41]], %[[VAL_3]] : index
@@ -167,9 +167,9 @@ func.func @matmul_sparse_rhs(%a: tensor<10x20xf32>,
 // CHECK:                   memref.store %[[VAL_63]], %[[VAL_23]]{{\[}}%[[VAL_59]]] : memref<?xf64>
 // CHECK:                   scf.yield %[[VAL_68:.*]] : index
 // CHECK:                 }
-// CHECK:                 scf.yield %[[VAL_69:.*]], %[[VAL_43]] : index, tensor<4x4xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:                 scf.yield %[[VAL_69:.*]], %[[VAL_43]] : index, tensor<4x4xf64, #sparse{{[0-9]*}}>
 // CHECK:               } else {
-// CHECK:                 scf.yield %[[VAL_42]], %[[VAL_43]] : index, tensor<4x4xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:                 scf.yield %[[VAL_42]], %[[VAL_43]] : index, tensor<4x4xf64, #sparse{{[0-9]*}}>
 // CHECK:               }
 // CHECK:               %[[VAL_70:.*]] = arith.cmpi eq, %[[VAL_44]], %[[VAL_47]] : index
 // CHECK:               %[[VAL_71:.*]] = arith.addi %[[VAL_40]], %[[VAL_3]] : index
@@ -177,13 +177,13 @@ func.func @matmul_sparse_rhs(%a: tensor<10x20xf32>,
 // CHECK:               %[[VAL_73:.*]] = arith.cmpi eq, %[[VAL_45]], %[[VAL_47]] : index
 // CHECK:               %[[VAL_74:.*]] = arith.addi %[[VAL_41]], %[[VAL_3]] : index
 // CHECK:               %[[VAL_75:.*]] = arith.select %[[VAL_73]], %[[VAL_74]], %[[VAL_41]] : index
-// CHECK:               scf.yield %[[VAL_72]], %[[VAL_75]], %[[VAL_76:.*]]#0, %[[VAL_76]]#1 : index, index, index, tensor<4x4xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:               scf.yield %[[VAL_72]], %[[VAL_75]], %[[VAL_76:.*]]#0, %[[VAL_76]]#1 : index, index, index, tensor<4x4xf64, #sparse{{[0-9]*}}>
 // CHECK:             }
-// CHECK:             %[[VAL_77:.*]] = sparse_tensor.compress %[[VAL_23]], %[[VAL_24]], %[[VAL_25]], %[[VAL_78:.*]]#2 into %[[VAL_78]]#3{{\[}}%[[VAL_22]]] : memref<?xf64>, memref<?xi1>, memref<?xindex>, tensor<4x4xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:             scf.yield %[[VAL_77]] : tensor<4x4xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:             %[[VAL_77:.*]] = sparse_tensor.compress %[[VAL_23]], %[[VAL_24]], %[[VAL_25]], %[[VAL_78:.*]]#2 into %[[VAL_78]]#3{{\[}}%[[VAL_22]]] : memref<?xf64>, memref<?xi1>, memref<?xindex>, tensor<4x4xf64, #sparse{{[0-9]*}}>
+// CHECK:             scf.yield %[[VAL_77]] : tensor<4x4xf64, #sparse{{[0-9]*}}>
 // CHECK:           }
-// CHECK:           %[[VAL_79:.*]] = sparse_tensor.load %[[VAL_80:.*]] hasInserts : tensor<4x4xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           return %[[VAL_79]] : tensor<4x4xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_79:.*]] = sparse_tensor.load %[[VAL_80:.*]] hasInserts : tensor<4x4xf64, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_79]] : tensor<4x4xf64, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @matmul2(%A: tensor<4x8xf64, #DCSR>,
               %B: tensor<8x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> {
@@ -198,18 +198,18 @@ func.func @matmul2(%A: tensor<4x8xf64, #DCSR>,
 
 // CHECK-LABEL:   func.func @conv2d(
 // CHECK-SAME:      %[[VAL_0:.*]]: tensor<8x8xi32>,
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<3x3xi32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<3x3xi32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<6x6xi32>) -> tensor<6x6xi32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 6 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : memref<8x8xi32>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<3x3xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<3x3xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<3x3xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<3x3xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<3x3xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi32>
-// CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<6x6xi32>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<3x3xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<3x3xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<3x3xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<3x3xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<3x3xi32, #sparse{{[0-9]*}}> to memref<?xi32>
+// CHECK:           %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<6x6xi32>
 // CHECK:           scf.for %[[VAL_13:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
 // CHECK:             scf.for %[[VAL_14:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
 // CHECK:               %[[VAL_15:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_13]], %[[VAL_14]]] : memref<6x6xi32>
@@ -249,18 +249,18 @@ func.func @conv2d(%input:  tensor<8x8xi32>,
 
 // CHECK-LABEL:   func.func @quantized_matmul(
 // CHECK-SAME:      %[[VAL_0:.*]]: tensor<5x3xi8>,
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<3x6xi8, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<3x6xi8, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<5x6xi64>) -> tensor<5x6xi64> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 5 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 2 : i64
 // CHECK-DAG:       %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_0]] : memref<5x3xi8>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<3x6xi8, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<3x6xi8, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<3x6xi8, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<3x6xi8, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<3x6xi8, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi8>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<3x6xi8, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<3x6xi8, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<3x6xi8, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<3x6xi8, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<3x6xi8, #sparse{{[0-9]*}}> to memref<?xi8>
 // CHECK:           %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<5x6xi64>
 // CHECK:           scf.for %[[VAL_14:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
 // CHECK:             %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -299,16 +299,16 @@ func.func @quantized_matmul(%input1: tensor<5x3xi8>,
 }
 
 // CHECK-LABEL:   func.func @sparse_dot(
-// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<1024xf32, #sparse_tensor.encoding<{{{.*}}}>>, %[[VAL_1:.*1]]: tensor<1024xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<1024xf32, #sparse{{[0-9]*}}>, %[[VAL_1:.*1]]: tensor<1024xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*2]]: tensor<f32>) -> tensor<f32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<1024xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<1024xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<1024xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<1024xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<1024xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<1024xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<1024xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<1024xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<1024xf32, #sparse{{[0-9]*}}> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<1024xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<1024xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<1024xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<f32>
 // CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_11]][] : memref<f32>
 // CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_lower.mlir b/mlir/test/Dialect/SparseTensor/sparse_lower.mlir
index 788dd9c26e6c..6112856fbf29 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_lower.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_lower.mlir
@@ -21,15 +21,15 @@
 }
 
 // CHECK-HIR-LABEL:   func @matvec(
-// CHECK-HIR-SAME:      %[[VAL_0:.*]]: tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-HIR-SAME:      %[[VAL_0:.*]]: tensor<32x64xf64, #sparse{{[0-9]*}}>
 // CHECK-HIR-SAME:      %[[VAL_1:.*]]: tensor<64xf64>,
 // CHECK-HIR-SAME:      %[[VAL_2:.*]]: tensor<32xf64>) -> tensor<32xf64> {
 // CHECK-HIR-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-HIR-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-HIR-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-HIR-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-HIR-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-HIR-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-HIR-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x64xf64, #sparse{{[0-9]*}}>
+// CHECK-HIR-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x64xf64, #sparse{{[0-9]*}}>
+// CHECK-HIR-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x64xf64, #sparse{{[0-9]*}}>
 // CHECK-HIR-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<64xf64>
 // CHECK-HIR-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf64>
 // CHECK-HIR:           scf.for %[[VAL_12:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
diff --git a/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir b/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir
index da08f53bd089..401da152a8bd 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir
@@ -23,7 +23,7 @@
 }
 
 // CHECK-HIR-LABEL:   func @matvec(
-// CHECK-HIR-SAME:                 %[[VAL_0:.*]]: tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-HIR-SAME:                 %[[VAL_0:.*]]: tensor<32x64xf64, #sparse{{[0-9]*}}>,
 // CHECK-HIR-SAME:                 %[[VAL_1:.*]]: tensor<64xf64>,
 // CHECK-HIR-SAME:                 %[[VAL_2:.*]]: tensor<32xf64>) -> tensor<32xf64> {
 // CHECK-HIR-DAG:       %[[VAL_3:.*]] = arith.constant 64 : index
diff --git a/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir b/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir
index eb6920304ed8..d769876d8ee8 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir
@@ -21,15 +21,15 @@
 }
 
 // CHECK-HIR-LABEL:   func @matvec(
-// CHECK-HIR-SAME:      %[[VAL_0:.*]]: tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-HIR-SAME:      %[[VAL_0:.*]]: tensor<32x64xf64, #sparse{{[0-9]*}}>,
 // CHECK-HIR-SAME:      %[[VAL_1:.*]]: tensor<64xf64>,
 // CHECK-HIR-SAME:      %[[VAL_2:.*]]: tensor<32xf64>) -> tensor<32xf64> {
 // CHECK-HIR-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-HIR-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-HIR-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-HIR:           %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-HIR:           %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-HIR:           %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-HIR:           %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x64xf64, #sparse{{[0-9]*}}>
+// CHECK-HIR:           %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x64xf64, #sparse{{[0-9]*}}>
+// CHECK-HIR:           %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x64xf64, #sparse{{[0-9]*}}>
 // CHECK-HIR:           %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<64xf64>
 // CHECK-HIR:           %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf64>
 // CHECK-HIR:           scf.for %[[VAL_11:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
diff --git a/mlir/test/Dialect/SparseTensor/sparse_nd.mlir b/mlir/test/Dialect/SparseTensor/sparse_nd.mlir
index 27716a82c164..50fec5b05f92 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_nd.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_nd.mlir
@@ -24,7 +24,7 @@
 
 // CHECK-LABEL:   func @mul(
 // CHECK-SAME:              %[[VAL_0:.*]]: tensor<10x20x30x40x50x60x70x80xf32>,
-// CHECK-SAME:              %[[VAL_1:.*]]: tensor<80x70x60x50x40x30x20x10xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:              %[[VAL_1:.*]]: tensor<80x70x60x50x40x30x20x10xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:              %[[VAL_2:.*]]: tensor<10x20x30x40x50x60x70x80xf32>) -> tensor<10x20x30x40x50x60x70x80xf32> {
 // CHECK-DAG:       %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 10 : index
@@ -36,11 +36,11 @@
 // CHECK-DAG:       %[[VAL_11:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_12:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_0]] : memref<10x20x30x40x50x60x70x80xf32>
-// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 3 : index} : tensor<80x70x60x50x40x30x20x10xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_15:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 3 : index} : tensor<80x70x60x50x40x30x20x10xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_16:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 4 : index} : tensor<80x70x60x50x40x30x20x10xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_17:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 4 : index} : tensor<80x70x60x50x40x30x20x10xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_18:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<80x70x60x50x40x30x20x10xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 3 : index} : tensor<80x70x60x50x40x30x20x10xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_15:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 3 : index} : tensor<80x70x60x50x40x30x20x10xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_16:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 4 : index} : tensor<80x70x60x50x40x30x20x10xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_17:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 4 : index} : tensor<80x70x60x50x40x30x20x10xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_18:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<80x70x60x50x40x30x20x10xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_20:.*]] = bufferization.to_memref %[[VAL_2]] : memref<10x20x30x40x50x60x70x80xf32>
 // CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_20]] : memref<10x20x30x40x50x60x70x80xf32>
 // CHECK:           scf.for %[[VAL_21:.*]] = %[[VAL_11]] to %[[VAL_10]] step %[[VAL_12]] {
diff --git a/mlir/test/Dialect/SparseTensor/sparse_out.mlir b/mlir/test/Dialect/SparseTensor/sparse_out.mlir
index 8bf0625425ea..b1795ff2e1a2 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_out.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_out.mlir
@@ -21,13 +21,13 @@
 }
 
 // CHECK-LABEL:   func.func @sparse_simply_dynamic1(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> {
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>) -> tensor<32x16xf32, #sparse{{[0-9]*}}> {
 // CHECK-DAG:       %[[VAL_1:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 2.000000e+00 : f32
-// CHECK-DAG:       %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK:           %[[VAL_7:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_1]]] : memref<?xindex>
 // CHECK:           %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_9:.*]] = %[[VAL_7]] to %[[VAL_8]] step %[[VAL_2]] {
@@ -40,8 +40,8 @@
 // CHECK:               memref.store %[[VAL_15]], %[[VAL_6]]{{\[}}%[[VAL_13]]] : memref<?xf32>
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_16:.*]] = sparse_tensor.load %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           return %[[VAL_16]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_16:.*]] = sparse_tensor.load %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_16]] : tensor<32x16xf32, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @sparse_simply_dynamic1(%argx: tensor<32x16xf32, #DCSR>) -> tensor<32x16xf32, #DCSR> {
   %c = arith.constant 2.0 : f32
@@ -55,12 +55,12 @@ func.func @sparse_simply_dynamic1(%argx: tensor<32x16xf32, #DCSR>) -> tensor<32x
 }
 
 // CHECK-LABEL:   func.func @sparse_simply_dynamic2(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> {
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>) -> tensor<32x16xf32, #sparse{{[0-9]*}}> {
 // CHECK-DAG:       %[[VAL_1:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_3:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_3:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK:           %[[VAL_6:.*]] = memref.load %[[VAL_3]]{{\[}}%[[VAL_1]]] : memref<?xindex>
 // CHECK:           %[[VAL_7:.*]] = memref.load %[[VAL_3]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_8:.*]] = %[[VAL_6]] to %[[VAL_7]] step %[[VAL_2]] {
@@ -74,8 +74,8 @@ func.func @sparse_simply_dynamic1(%argx: tensor<32x16xf32, #DCSR>) -> tensor<32x
 // CHECK:               memref.store %[[VAL_15]], %[[VAL_5]]{{\[}}%[[VAL_12]]] : memref<?xf32>
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_16:.*]] = sparse_tensor.load %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           return %[[VAL_16]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_16:.*]] = sparse_tensor.load %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_16]] : tensor<32x16xf32, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @sparse_simply_dynamic2(%argx: tensor<32x16xf32, #DCSR>) -> tensor<32x16xf32, #DCSR> {
   %0 = linalg.generic #trait_scale_inpl
@@ -97,30 +97,30 @@ func.func @sparse_simply_dynamic2(%argx: tensor<32x16xf32, #DCSR>) -> tensor<32x
 }
 
 // CHECK-LABEL:   func.func @sparse_truly_dynamic(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>> {
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<10x20xf32, #sparse{{[0-9]*}}>) -> tensor<10x20xf32, #sparse{{[0-9]*}}> {
 // CHECK-DAG:       %[[VAL_1:.*]] = arith.constant 10 : index
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 2.000000e+00 : f32
-// CHECK-DAG:       %[[VAL_5:.*]] = tensor.empty() : tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
-// CHECK:           %[[VAL_9:.*]] = scf.for %[[VAL_10:.*]] = %[[VAL_2]] to %[[VAL_1]] step %[[VAL_3]] iter_args(%[[VAL_11:.*]] = %[[VAL_5]]) -> (tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK-DAG:       %[[VAL_5:.*]] = tensor.empty() : tensor<10x20xf32, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<10x20xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<10x20xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10x20xf32, #sparse{{[0-9]*}}> to memref<?xf32>
+// CHECK:           %[[VAL_9:.*]] = scf.for %[[VAL_10:.*]] = %[[VAL_2]] to %[[VAL_1]] step %[[VAL_3]] iter_args(%[[VAL_11:.*]] = %[[VAL_5]]) -> (tensor<10x20xf32, #sparse{{[0-9]*}}>) {
 // CHECK:             %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_10]]] : memref<?xindex>
 // CHECK:             %[[VAL_13:.*]] = arith.addi %[[VAL_10]], %[[VAL_3]] : index
 // CHECK:             %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_13]]] : memref<?xindex>
-// CHECK:             %[[VAL_15:.*]] = scf.for %[[VAL_16:.*]] = %[[VAL_12]] to %[[VAL_14]] step %[[VAL_3]] iter_args(%[[VAL_17:.*]] = %[[VAL_11]]) -> (tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK:             %[[VAL_15:.*]] = scf.for %[[VAL_16:.*]] = %[[VAL_12]] to %[[VAL_14]] step %[[VAL_3]] iter_args(%[[VAL_17:.*]] = %[[VAL_11]]) -> (tensor<10x20xf32, #sparse{{[0-9]*}}>) {
 // CHECK:               %[[VAL_18:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_16]]] : memref<?xindex>
 // CHECK:               %[[VAL_19:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_16]]] : memref<?xf32>
 // CHECK:               %[[VAL_20:.*]] = arith.mulf %[[VAL_19]], %[[VAL_4]] : f32
-// CHECK:               %[[VAL_21:.*]] = sparse_tensor.insert %[[VAL_20]] into %[[VAL_17]]{{\[}}%[[VAL_10]], %[[VAL_18]]] : tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:               scf.yield %[[VAL_21]] : tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:               %[[VAL_21:.*]] = sparse_tensor.insert %[[VAL_20]] into %[[VAL_17]]{{\[}}%[[VAL_10]], %[[VAL_18]]] : tensor<10x20xf32, #sparse{{[0-9]*}}>
+// CHECK:               scf.yield %[[VAL_21]] : tensor<10x20xf32, #sparse{{[0-9]*}}>
 // CHECK:             }
-// CHECK:             scf.yield %[[VAL_22:.*]] : tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:             scf.yield %[[VAL_22:.*]] : tensor<10x20xf32, #sparse{{[0-9]*}}>
 // CHECK:           }
-// CHECK:           %[[VAL_23:.*]] = sparse_tensor.load %[[VAL_24:.*]] hasInserts : tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           return %[[VAL_23]] : tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_23:.*]] = sparse_tensor.load %[[VAL_24:.*]] hasInserts : tensor<10x20xf32, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_23]] : tensor<10x20xf32, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @sparse_truly_dynamic(%arga: tensor<10x20xf32, #CSR>) -> tensor<10x20xf32, #DCSR> {
   %s = arith.constant 2.0 : f32
@@ -146,41 +146,41 @@ func.func @sparse_truly_dynamic(%arga: tensor<10x20xf32, #CSR>) -> tensor<10x20x
 }
 
 // CHECK-LABEL:   func.func @sumred(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?x?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>,
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<?x?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>> {
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?x?x?xi32, #sparse{{[0-9]*}}>,
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<?x?x?xi32, #sparse{{[0-9]*}}>) -> tensor<?x?xi32, #sparse{{[0-9]*}}> {
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : i32
 // CHECK-DAG:       %[[VAL_FALSE:.*]] = arith.constant false
 // CHECK-DAG:       %[[VAL_TRUE:.*]] = arith.constant true
-// CHECK:           %[[VAL_5:.*]] = tensor.dim %[[VAL_0]], %[[VAL_2]] : tensor<?x?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_6:.*]] = tensor.dim %[[VAL_0]], %[[VAL_3]] : tensor<?x?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_7:.*]] = tensor.empty(%[[VAL_5]], %[[VAL_6]]) : tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<?x?x?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<?x?x?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<?x?x?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<?x?x?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<?x?x?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<?x?x?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?x?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi32>
-// CHECK:           %[[VAL_15:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?x?x?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_16:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<?x?x?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_17:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<?x?x?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_18:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<?x?x?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_19:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 2 : index} : tensor<?x?x?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_20:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 2 : index} : tensor<?x?x?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_21:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?x?x?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi32>
+// CHECK:           %[[VAL_5:.*]] = tensor.dim %[[VAL_0]], %[[VAL_2]] : tensor<?x?x?xi32, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_6:.*]] = tensor.dim %[[VAL_0]], %[[VAL_3]] : tensor<?x?x?xi32, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_7:.*]] = tensor.empty(%[[VAL_5]], %[[VAL_6]]) : tensor<?x?xi32, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<?x?x?xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<?x?x?xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<?x?x?xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<?x?x?xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<?x?x?xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<?x?x?xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?x?xi32, #sparse{{[0-9]*}}> to memref<?xi32>
+// CHECK:           %[[VAL_15:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?x?x?xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_16:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<?x?x?xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_17:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<?x?x?xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_18:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<?x?x?xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_19:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 2 : index} : tensor<?x?x?xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_20:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 2 : index} : tensor<?x?x?xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_21:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?x?x?xi32, #sparse{{[0-9]*}}> to memref<?xi32>
 // CHECK:           %[[VAL_22:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK:           %[[VAL_23:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK:           %[[VAL_24:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK:           %[[VAL_25:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_3]]] : memref<?xindex>
-// CHECK:           %[[VAL_26:.*]]:3 = scf.while (%[[VAL_27:.*]] = %[[VAL_22]], %[[VAL_28:.*]] = %[[VAL_24]], %[[VAL_29:.*]] = %[[VAL_7]]) : (index, index, tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>) -> (index, index, tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK:           %[[VAL_26:.*]]:3 = scf.while (%[[VAL_27:.*]] = %[[VAL_22]], %[[VAL_28:.*]] = %[[VAL_24]], %[[VAL_29:.*]] = %[[VAL_7]]) : (index, index, tensor<?x?xi32, #sparse{{[0-9]*}}>) -> (index, index, tensor<?x?xi32, #sparse{{[0-9]*}}>) {
 // CHECK:             %[[VAL_30:.*]] = arith.cmpi ult, %[[VAL_27]], %[[VAL_23]] : index
 // CHECK:             %[[VAL_31:.*]] = arith.cmpi ult, %[[VAL_28]], %[[VAL_25]] : index
 // CHECK:             %[[VAL_32:.*]] = arith.andi %[[VAL_30]], %[[VAL_31]] : i1
-// CHECK:             scf.condition(%[[VAL_32]]) %[[VAL_27]], %[[VAL_28]], %[[VAL_29]] : index, index, tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:             scf.condition(%[[VAL_32]]) %[[VAL_27]], %[[VAL_28]], %[[VAL_29]] : index, index, tensor<?x?xi32, #sparse{{[0-9]*}}>
 // CHECK:           } do {
-// CHECK:           ^bb0(%[[VAL_33:.*]]: index, %[[VAL_34:.*]]: index, %[[VAL_35:.*]]: tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>):
+// CHECK:           ^bb0(%[[VAL_33:.*]]: index, %[[VAL_34:.*]]: index, %[[VAL_35:.*]]: tensor<?x?xi32, #sparse{{[0-9]*}}>):
 // CHECK:             %[[VAL_36:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_33]]] : memref<?xindex>
 // CHECK:             %[[VAL_37:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_34]]] : memref<?xindex>
 // CHECK:             %[[VAL_38:.*]] = arith.cmpi ult, %[[VAL_37]], %[[VAL_36]] : index
@@ -188,20 +188,20 @@ func.func @sparse_truly_dynamic(%arga: tensor<10x20xf32, #CSR>) -> tensor<10x20x
 // CHECK:             %[[VAL_40:.*]] = arith.cmpi eq, %[[VAL_36]], %[[VAL_39]] : index
 // CHECK:             %[[VAL_41:.*]] = arith.cmpi eq, %[[VAL_37]], %[[VAL_39]] : index
 // CHECK:             %[[VAL_42:.*]] = arith.andi %[[VAL_40]], %[[VAL_41]] : i1
-// CHECK:             %[[VAL_43:.*]] = scf.if %[[VAL_42]] -> (tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK:             %[[VAL_43:.*]] = scf.if %[[VAL_42]] -> (tensor<?x?xi32, #sparse{{[0-9]*}}>) {
 // CHECK:               %[[VAL_44:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_33]]] : memref<?xindex>
 // CHECK:               %[[VAL_45:.*]] = arith.addi %[[VAL_33]], %[[VAL_3]] : index
 // CHECK:               %[[VAL_46:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_45]]] : memref<?xindex>
 // CHECK:               %[[VAL_47:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_34]]] : memref<?xindex>
 // CHECK:               %[[VAL_48:.*]] = arith.addi %[[VAL_34]], %[[VAL_3]] : index
 // CHECK:               %[[VAL_49:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_48]]] : memref<?xindex>
-// CHECK:               %[[VAL_50:.*]]:3 = scf.while (%[[VAL_51:.*]] = %[[VAL_44]], %[[VAL_52:.*]] = %[[VAL_47]], %[[VAL_53:.*]] = %[[VAL_35]]) : (index, index, tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>) -> (index, index, tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK:               %[[VAL_50:.*]]:3 = scf.while (%[[VAL_51:.*]] = %[[VAL_44]], %[[VAL_52:.*]] = %[[VAL_47]], %[[VAL_53:.*]] = %[[VAL_35]]) : (index, index, tensor<?x?xi32, #sparse{{[0-9]*}}>) -> (index, index, tensor<?x?xi32, #sparse{{[0-9]*}}>) {
 // CHECK:                 %[[VAL_54:.*]] = arith.cmpi ult, %[[VAL_51]], %[[VAL_46]] : index
 // CHECK:                 %[[VAL_55:.*]] = arith.cmpi ult, %[[VAL_52]], %[[VAL_49]] : index
 // CHECK:                 %[[VAL_56:.*]] = arith.andi %[[VAL_54]], %[[VAL_55]] : i1
-// CHECK:                 scf.condition(%[[VAL_56]]) %[[VAL_51]], %[[VAL_52]], %[[VAL_53]] : index, index, tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:                 scf.condition(%[[VAL_56]]) %[[VAL_51]], %[[VAL_52]], %[[VAL_53]] : index, index, tensor<?x?xi32, #sparse{{[0-9]*}}>
 // CHECK:               } do {
-// CHECK:               ^bb0(%[[VAL_57:.*]]: index, %[[VAL_58:.*]]: index, %[[VAL_59:.*]]: tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>):
+// CHECK:               ^bb0(%[[VAL_57:.*]]: index, %[[VAL_58:.*]]: index, %[[VAL_59:.*]]: tensor<?x?xi32, #sparse{{[0-9]*}}>):
 // CHECK:                 %[[VAL_60:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_57]]] : memref<?xindex>
 // CHECK:                 %[[VAL_61:.*]] = memref.load %[[VAL_18]]{{\[}}%[[VAL_58]]] : memref<?xindex>
 // CHECK:                 %[[VAL_62:.*]] = arith.cmpi ult, %[[VAL_61]], %[[VAL_60]] : index
@@ -209,20 +209,20 @@ func.func @sparse_truly_dynamic(%arga: tensor<10x20xf32, #CSR>) -> tensor<10x20x
 // CHECK:                 %[[VAL_64:.*]] = arith.cmpi eq, %[[VAL_60]], %[[VAL_63]] : index
 // CHECK:                 %[[VAL_65:.*]] = arith.cmpi eq, %[[VAL_61]], %[[VAL_63]] : index
 // CHECK:                 %[[VAL_66:.*]] = arith.andi %[[VAL_64]], %[[VAL_65]] : i1
-// CHECK:                 %[[VAL_67:.*]] = scf.if %[[VAL_66]] -> (tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK:                 %[[VAL_67:.*]] = scf.if %[[VAL_66]] -> (tensor<?x?xi32, #sparse{{[0-9]*}}>) {
 // CHECK:                   %[[VAL_68:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_57]]] : memref<?xindex>
 // CHECK:                   %[[VAL_69:.*]] = arith.addi %[[VAL_57]], %[[VAL_3]] : index
 // CHECK:                   %[[VAL_70:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_69]]] : memref<?xindex>
 // CHECK:                   %[[VAL_71:.*]] = memref.load %[[VAL_19]]{{\[}}%[[VAL_58]]] : memref<?xindex>
 // CHECK:                   %[[VAL_72:.*]] = arith.addi %[[VAL_58]], %[[VAL_3]] : index
 // CHECK:                   %[[VAL_73:.*]] = memref.load %[[VAL_19]]{{\[}}%[[VAL_72]]] : memref<?xindex>
-// CHECK:                   %[[VAL_74:.*]]:5 = scf.while (%[[VAL_75:.*]] = %[[VAL_68]], %[[VAL_76:.*]] = %[[VAL_71]], %[[VAL_77:.*]] = %[[VAL_4]], %[[VAL_200:.*]] = %[[VAL_FALSE]], %[[VAL_78:.*]] = %[[VAL_59]]) : (index, index, i32, i1, tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>) -> (index, index, i32, i1, tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK:                   %[[VAL_74:.*]]:5 = scf.while (%[[VAL_75:.*]] = %[[VAL_68]], %[[VAL_76:.*]] = %[[VAL_71]], %[[VAL_77:.*]] = %[[VAL_4]], %[[VAL_200:.*]] = %[[VAL_FALSE]], %[[VAL_78:.*]] = %[[VAL_59]]) : (index, index, i32, i1, tensor<?x?xi32, #sparse{{[0-9]*}}>) -> (index, index, i32, i1, tensor<?x?xi32, #sparse{{[0-9]*}}>) {
 // CHECK:                     %[[VAL_79:.*]] = arith.cmpi ult, %[[VAL_75]], %[[VAL_70]] : index
 // CHECK:                     %[[VAL_80:.*]] = arith.cmpi ult, %[[VAL_76]], %[[VAL_73]] : index
 // CHECK:                     %[[VAL_81:.*]] = arith.andi %[[VAL_79]], %[[VAL_80]] : i1
-// CHECK:                     scf.condition(%[[VAL_81]]) %[[VAL_75]], %[[VAL_76]], %[[VAL_77]], %[[VAL_200]], %[[VAL_78]] : index, index, i32, i1, tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:                     scf.condition(%[[VAL_81]]) %[[VAL_75]], %[[VAL_76]], %[[VAL_77]], %[[VAL_200]], %[[VAL_78]] : index, index, i32, i1, tensor<?x?xi32, #sparse{{[0-9]*}}>
 // CHECK:                   } do {
-// CHECK:                   ^bb0(%[[VAL_82:.*]]: index, %[[VAL_83:.*]]: index, %[[VAL_84:.*]]: i32, %[[VAL_201:.*]]: i1, %[[VAL_85:.*]]: tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>):
+// CHECK:                   ^bb0(%[[VAL_82:.*]]: index, %[[VAL_83:.*]]: index, %[[VAL_84:.*]]: i32, %[[VAL_201:.*]]: i1, %[[VAL_85:.*]]: tensor<?x?xi32, #sparse{{[0-9]*}}>):
 // CHECK:                     %[[VAL_86:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_82]]] : memref<?xindex>
 // CHECK:                     %[[VAL_87:.*]] = memref.load %[[VAL_20]]{{\[}}%[[VAL_83]]] : memref<?xindex>
 // CHECK:                     %[[VAL_88:.*]] = arith.cmpi ult, %[[VAL_87]], %[[VAL_86]] : index
@@ -230,14 +230,14 @@ func.func @sparse_truly_dynamic(%arga: tensor<10x20xf32, #CSR>) -> tensor<10x20x
 // CHECK:                     %[[VAL_90:.*]] = arith.cmpi eq, %[[VAL_86]], %[[VAL_89]] : index
 // CHECK:                     %[[VAL_91:.*]] = arith.cmpi eq, %[[VAL_87]], %[[VAL_89]] : index
 // CHECK:                     %[[VAL_92:.*]] = arith.andi %[[VAL_90]], %[[VAL_91]] : i1
-// CHECK:                     %[[VAL_93:.*]]:3 = scf.if %[[VAL_92]] -> (i32, i1, tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK:                     %[[VAL_93:.*]]:3 = scf.if %[[VAL_92]] -> (i32, i1, tensor<?x?xi32, #sparse{{[0-9]*}}>) {
 // CHECK:                       %[[VAL_94:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_82]]] : memref<?xi32>
 // CHECK:                       %[[VAL_95:.*]] = memref.load %[[VAL_21]]{{\[}}%[[VAL_83]]] : memref<?xi32>
 // CHECK:                       %[[VAL_96:.*]] = arith.muli %[[VAL_94]], %[[VAL_95]] : i32
 // CHECK:                       %[[VAL_97:.*]] = arith.addi %[[VAL_84]], %[[VAL_96]] : i32
-// CHECK:                       scf.yield %[[VAL_97]], %[[VAL_TRUE]], %[[VAL_85]] : i32, i1, tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:                       scf.yield %[[VAL_97]], %[[VAL_TRUE]], %[[VAL_85]] : i32, i1, tensor<?x?xi32, #sparse{{[0-9]*}}>
 // CHECK:                     } else {
-// CHECK:                       scf.yield %[[VAL_84]], %[[VAL_201]], %[[VAL_85]] : i32, i1, tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:                       scf.yield %[[VAL_84]], %[[VAL_201]], %[[VAL_85]] : i32, i1, tensor<?x?xi32, #sparse{{[0-9]*}}>
 // CHECK:                     }
 // CHECK:                     %[[VAL_98:.*]] = arith.cmpi eq, %[[VAL_86]], %[[VAL_89]] : index
 // CHECK:                     %[[VAL_99:.*]] = arith.addi %[[VAL_82]], %[[VAL_3]] : index
@@ -245,17 +245,17 @@ func.func @sparse_truly_dynamic(%arga: tensor<10x20xf32, #CSR>) -> tensor<10x20x
 // CHECK:                     %[[VAL_101:.*]] = arith.cmpi eq, %[[VAL_87]], %[[VAL_89]] : index
 // CHECK:                     %[[VAL_102:.*]] = arith.addi %[[VAL_83]], %[[VAL_3]] : index
 // CHECK:                     %[[VAL_103:.*]] = arith.select %[[VAL_101]], %[[VAL_102]], %[[VAL_83]] : index
-// CHECK:                     scf.yield %[[VAL_100]], %[[VAL_103]], %[[VAL_104:.*]]#0, %[[VAL_104]]#1, %[[VAL_104]]#2 : index, index, i32, i1, tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:                     scf.yield %[[VAL_100]], %[[VAL_103]], %[[VAL_104:.*]]#0, %[[VAL_104]]#1, %[[VAL_104]]#2 : index, index, i32, i1, tensor<?x?xi32, #sparse{{[0-9]*}}>
 // CHECK:                   }
-// CHECK:                   %[[VAL_202:.*]] = scf.if %[[VAL_74]]#3 -> (tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>) {
-// CHECK:                     %[[VAL_105:.*]] = sparse_tensor.insert %[[VAL_74]]#2 into %[[VAL_74]]#4{{\[}}%[[VAL_39]], %[[VAL_63]]] : tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:                     scf.yield %[[VAL_105]] : tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:                   %[[VAL_202:.*]] = scf.if %[[VAL_74]]#3 -> (tensor<?x?xi32, #sparse{{[0-9]*}}>) {
+// CHECK:                     %[[VAL_105:.*]] = sparse_tensor.insert %[[VAL_74]]#2 into %[[VAL_74]]#4{{\[}}%[[VAL_39]], %[[VAL_63]]] : tensor<?x?xi32, #sparse{{[0-9]*}}>
+// CHECK:                     scf.yield %[[VAL_105]] : tensor<?x?xi32, #sparse{{[0-9]*}}>
 // CHECK:                   } else {
-// CHECK:                     scf.yield %[[VAL_74]]#4 : tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:                     scf.yield %[[VAL_74]]#4 : tensor<?x?xi32, #sparse{{[0-9]*}}>
 // CHECK:                   }
-// CHECK:                   scf.yield %[[VAL_202]] : tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:                   scf.yield %[[VAL_202]] : tensor<?x?xi32, #sparse{{[0-9]*}}>
 // CHECK:                 } else {
-// CHECK:                   scf.yield %[[VAL_59]] : tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:                   scf.yield %[[VAL_59]] : tensor<?x?xi32, #sparse{{[0-9]*}}>
 // CHECK:                 }
 // CHECK:                 %[[VAL_107:.*]] = arith.cmpi eq, %[[VAL_60]], %[[VAL_63]] : index
 // CHECK:                 %[[VAL_108:.*]] = arith.addi %[[VAL_57]], %[[VAL_3]] : index
@@ -263,11 +263,11 @@ func.func @sparse_truly_dynamic(%arga: tensor<10x20xf32, #CSR>) -> tensor<10x20x
 // CHECK:                 %[[VAL_110:.*]] = arith.cmpi eq, %[[VAL_61]], %[[VAL_63]] : index
 // CHECK:                 %[[VAL_111:.*]] = arith.addi %[[VAL_58]], %[[VAL_3]] : index
 // CHECK:                 %[[VAL_112:.*]] = arith.select %[[VAL_110]], %[[VAL_111]], %[[VAL_58]] : index
-// CHECK:                 scf.yield %[[VAL_109]], %[[VAL_112]], %[[VAL_113:.*]] : index, index, tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:                 scf.yield %[[VAL_109]], %[[VAL_112]], %[[VAL_113:.*]] : index, index, tensor<?x?xi32, #sparse{{[0-9]*}}>
 // CHECK:               }
-// CHECK:               scf.yield %[[VAL_114:.*]]#2 : tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:               scf.yield %[[VAL_114:.*]]#2 : tensor<?x?xi32, #sparse{{[0-9]*}}>
 // CHECK:             } else {
-// CHECK:               scf.yield %[[VAL_35]] : tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:               scf.yield %[[VAL_35]] : tensor<?x?xi32, #sparse{{[0-9]*}}>
 // CHECK:             }
 // CHECK:             %[[VAL_115:.*]] = arith.cmpi eq, %[[VAL_36]], %[[VAL_39]] : index
 // CHECK:             %[[VAL_116:.*]] = arith.addi %[[VAL_33]], %[[VAL_3]] : index
@@ -275,10 +275,10 @@ func.func @sparse_truly_dynamic(%arga: tensor<10x20xf32, #CSR>) -> tensor<10x20x
 // CHECK:             %[[VAL_118:.*]] = arith.cmpi eq, %[[VAL_37]], %[[VAL_39]] : index
 // CHECK:             %[[VAL_119:.*]] = arith.addi %[[VAL_34]], %[[VAL_3]] : index
 // CHECK:             %[[VAL_120:.*]] = arith.select %[[VAL_118]], %[[VAL_119]], %[[VAL_34]] : index
-// CHECK:             scf.yield %[[VAL_117]], %[[VAL_120]], %[[VAL_121:.*]] : index, index, tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:             scf.yield %[[VAL_117]], %[[VAL_120]], %[[VAL_121:.*]] : index, index, tensor<?x?xi32, #sparse{{[0-9]*}}>
 // CHECK:           }
-// CHECK:           %[[VAL_122:.*]] = sparse_tensor.load %[[VAL_123:.*]]#2 hasInserts : tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           return %[[VAL_122]] : tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_122:.*]] = sparse_tensor.load %[[VAL_123:.*]]#2 hasInserts : tensor<?x?xi32, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_122]] : tensor<?x?xi32, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @sumred(%arga: tensor<?x?x?xi32, #SparseTensor>,
              %argb: tensor<?x?x?xi32, #SparseTensor>) -> tensor<?x?xi32, #DCSR> {
@@ -310,42 +310,42 @@ func.func @sumred(%arga: tensor<?x?x?xi32, #SparseTensor>,
 }
 
 // CHECK-LABEL:   func.func @matmat(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>,
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> {
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?x?xf32, #sparse{{[0-9]*}}>,
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<?x?xf32, #sparse{{[0-9]*}}>) -> tensor<?x?xf32, #sparse{{[0-9]*}}> {
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant false
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant true
-// CHECK:           %[[VAL_6:.*]] = tensor.dim %[[VAL_0]], %[[VAL_2]] : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_7:.*]] = tensor.dim %[[VAL_1]], %[[VAL_3]] : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_8:.*]] = tensor.empty(%[[VAL_6]], %[[VAL_7]]) : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
-// CHECK:           %[[VAL_14:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_15:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_16:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_17:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_18:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK:           %[[VAL_6:.*]] = tensor.dim %[[VAL_0]], %[[VAL_2]] : tensor<?x?xf32, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_7:.*]] = tensor.dim %[[VAL_1]], %[[VAL_3]] : tensor<?x?xf32, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_8:.*]] = tensor.empty(%[[VAL_6]], %[[VAL_7]]) : tensor<?x?xf32, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xf32>
+// CHECK:           %[[VAL_14:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_15:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_16:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_17:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_18:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK:           %[[VAL_19:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK:           %[[VAL_20:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_3]]] : memref<?xindex>
-// CHECK:           %[[VAL_21:.*]] = scf.for %[[VAL_22:.*]] = %[[VAL_19]] to %[[VAL_20]] step %[[VAL_3]] iter_args(%[[VAL_23:.*]] = %[[VAL_8]]) -> (tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK:           %[[VAL_21:.*]] = scf.for %[[VAL_22:.*]] = %[[VAL_19]] to %[[VAL_20]] step %[[VAL_3]] iter_args(%[[VAL_23:.*]] = %[[VAL_8]]) -> (tensor<?x?xf32, #sparse{{[0-9]*}}>) {
 // CHECK:             %[[VAL_24:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_22]]] : memref<?xindex>
-// CHECK:             %[[VAL_25:.*]], %[[VAL_26:.*]], %[[VAL_27:.*]], %[[VAL_28:.*]] = sparse_tensor.expand %[[VAL_8]] : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>, memref<?xi1>, memref<?xindex>
+// CHECK:             %[[VAL_25:.*]], %[[VAL_26:.*]], %[[VAL_27:.*]], %[[VAL_28:.*]] = sparse_tensor.expand %[[VAL_8]] : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xf32>, memref<?xi1>, memref<?xindex>
 // CHECK:             %[[VAL_29:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_22]]] : memref<?xindex>
 // CHECK:             %[[VAL_30:.*]] = arith.addi %[[VAL_22]], %[[VAL_3]] : index
 // CHECK:             %[[VAL_31:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_30]]] : memref<?xindex>
 // CHECK:             %[[VAL_32:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK:             %[[VAL_33:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_3]]] : memref<?xindex>
-// CHECK:             %[[VAL_34:.*]]:4 = scf.while (%[[VAL_35:.*]] = %[[VAL_29]], %[[VAL_36:.*]] = %[[VAL_32]], %[[VAL_37:.*]] = %[[VAL_28]], %[[VAL_38:.*]] = %[[VAL_23]]) : (index, index, index, tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>) -> (index, index, index, tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK:             %[[VAL_34:.*]]:4 = scf.while (%[[VAL_35:.*]] = %[[VAL_29]], %[[VAL_36:.*]] = %[[VAL_32]], %[[VAL_37:.*]] = %[[VAL_28]], %[[VAL_38:.*]] = %[[VAL_23]]) : (index, index, index, tensor<?x?xf32, #sparse{{[0-9]*}}>) -> (index, index, index, tensor<?x?xf32, #sparse{{[0-9]*}}>) {
 // CHECK:               %[[VAL_39:.*]] = arith.cmpi ult, %[[VAL_35]], %[[VAL_31]] : index
 // CHECK:               %[[VAL_40:.*]] = arith.cmpi ult, %[[VAL_36]], %[[VAL_33]] : index
 // CHECK:               %[[VAL_41:.*]] = arith.andi %[[VAL_39]], %[[VAL_40]] : i1
-// CHECK:               scf.condition(%[[VAL_41]]) %[[VAL_35]], %[[VAL_36]], %[[VAL_37]], %[[VAL_38]] : index, index, index, tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:               scf.condition(%[[VAL_41]]) %[[VAL_35]], %[[VAL_36]], %[[VAL_37]], %[[VAL_38]] : index, index, index, tensor<?x?xf32, #sparse{{[0-9]*}}>
 // CHECK:             } do {
-// CHECK:             ^bb0(%[[VAL_42:.*]]: index, %[[VAL_43:.*]]: index, %[[VAL_44:.*]]: index, %[[VAL_45:.*]]: tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>):
+// CHECK:             ^bb0(%[[VAL_42:.*]]: index, %[[VAL_43:.*]]: index, %[[VAL_44:.*]]: index, %[[VAL_45:.*]]: tensor<?x?xf32, #sparse{{[0-9]*}}>):
 // CHECK:               %[[VAL_46:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_42]]] : memref<?xindex>
 // CHECK:               %[[VAL_47:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_43]]] : memref<?xindex>
 // CHECK:               %[[VAL_48:.*]] = arith.cmpi ult, %[[VAL_47]], %[[VAL_46]] : index
@@ -353,7 +353,7 @@ func.func @sumred(%arga: tensor<?x?x?xi32, #SparseTensor>,
 // CHECK:               %[[VAL_50:.*]] = arith.cmpi eq, %[[VAL_46]], %[[VAL_49]] : index
 // CHECK:               %[[VAL_51:.*]] = arith.cmpi eq, %[[VAL_47]], %[[VAL_49]] : index
 // CHECK:               %[[VAL_52:.*]] = arith.andi %[[VAL_50]], %[[VAL_51]] : i1
-// CHECK:               %[[VAL_53:.*]]:2 = scf.if %[[VAL_52]] -> (index, tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK:               %[[VAL_53:.*]]:2 = scf.if %[[VAL_52]] -> (index, tensor<?x?xf32, #sparse{{[0-9]*}}>) {
 // CHECK:                 %[[VAL_54:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_42]]] : memref<?xf32>
 // CHECK:                 %[[VAL_55:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_43]]] : memref<?xindex>
 // CHECK:                 %[[VAL_56:.*]] = arith.addi %[[VAL_43]], %[[VAL_3]] : index
@@ -377,9 +377,9 @@ func.func @sumred(%arga: tensor<?x?x?xi32, #SparseTensor>,
 // CHECK:                   memref.store %[[VAL_65]], %[[VAL_25]]{{\[}}%[[VAL_61]]] : memref<?xf32>
 // CHECK:                   scf.yield %[[VAL_70:.*]] : index
 // CHECK:                 }
-// CHECK:                 scf.yield %[[VAL_71:.*]], %[[VAL_45]] : index, tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:                 scf.yield %[[VAL_71:.*]], %[[VAL_45]] : index, tensor<?x?xf32, #sparse{{[0-9]*}}>
 // CHECK:               } else {
-// CHECK:                 scf.yield %[[VAL_44]], %[[VAL_45]] : index, tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:                 scf.yield %[[VAL_44]], %[[VAL_45]] : index, tensor<?x?xf32, #sparse{{[0-9]*}}>
 // CHECK:               }
 // CHECK:               %[[VAL_72:.*]] = arith.cmpi eq, %[[VAL_46]], %[[VAL_49]] : index
 // CHECK:               %[[VAL_73:.*]] = arith.addi %[[VAL_42]], %[[VAL_3]] : index
@@ -387,13 +387,13 @@ func.func @sumred(%arga: tensor<?x?x?xi32, #SparseTensor>,
 // CHECK:               %[[VAL_75:.*]] = arith.cmpi eq, %[[VAL_47]], %[[VAL_49]] : index
 // CHECK:               %[[VAL_76:.*]] = arith.addi %[[VAL_43]], %[[VAL_3]] : index
 // CHECK:               %[[VAL_77:.*]] = arith.select %[[VAL_75]], %[[VAL_76]], %[[VAL_43]] : index
-// CHECK:               scf.yield %[[VAL_74]], %[[VAL_77]], %[[VAL_78:.*]]#0, %[[VAL_78]]#1 : index, index, index, tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:               scf.yield %[[VAL_74]], %[[VAL_77]], %[[VAL_78:.*]]#0, %[[VAL_78]]#1 : index, index, index, tensor<?x?xf32, #sparse{{[0-9]*}}>
 // CHECK:             }
-// CHECK:             %[[VAL_79:.*]] = sparse_tensor.compress %[[VAL_25]], %[[VAL_26]], %[[VAL_27]], %[[VAL_80:.*]]#2 into %[[VAL_80]]#3{{\[}}%[[VAL_24]]] : memref<?xf32>, memref<?xi1>, memref<?xindex>, tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:             scf.yield %[[VAL_79]] : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:             %[[VAL_79:.*]] = sparse_tensor.compress %[[VAL_25]], %[[VAL_26]], %[[VAL_27]], %[[VAL_80:.*]]#2 into %[[VAL_80]]#3{{\[}}%[[VAL_24]]] : memref<?xf32>, memref<?xi1>, memref<?xindex>, tensor<?x?xf32, #sparse{{[0-9]*}}>
+// CHECK:             scf.yield %[[VAL_79]] : tensor<?x?xf32, #sparse{{[0-9]*}}>
 // CHECK:           }
-// CHECK:           %[[VAL_81:.*]] = sparse_tensor.load %[[VAL_82:.*]] hasInserts : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           return %[[VAL_81]] : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_81:.*]] = sparse_tensor.load %[[VAL_82:.*]] hasInserts : tensor<?x?xf32, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_81]] : tensor<?x?xf32, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @matmat(%arga: tensor<?x?xf32, #DCSR>,
              %argb: tensor<?x?xf32, #DCSR>) -> tensor<?x?xf32, #DCSR> {
diff --git a/mlir/test/Dialect/SparseTensor/sparse_pack.mlir b/mlir/test/Dialect/SparseTensor/sparse_pack.mlir
index 80cfa3c635f3..7cb699092f88 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_pack.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_pack.mlir
@@ -40,7 +40,7 @@ func.func @sparse_pack(%values: tensor<6xf64>, %pos:tensor<2xindex>, %coordinate
 // CHECK-SAME:      %[[VAL_0:.*]]: memref<?xindex>,
 // CHECK-SAME:      %[[VAL_1:.*]]: memref<?xi32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: memref<?xf64>,
-// CHECK-SAME:      %[[VAL_3:.*]]: !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_3:.*]]: !sparse_tensor.storage_specifier<#sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_4:.*]]: tensor<6xf64>,
 // CHECK-SAME:      %[[VAL_5:.*]]: tensor<2xindex>,
 // CHECK-SAME:      %[[VAL_6:.*]]: tensor<6x2xi32>) -> (tensor<6xf64>, tensor<2xindex>, tensor<6x2xi32>) {
diff --git a/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir b/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir
index 8118eeceb6a6..7a35e0ff0c3a 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir
@@ -15,7 +15,7 @@
   doc = "x(i) += A(i,j) * b(j)"
 }
 // CHECK-LABEL:  func.func @matvec(
-//  CHECK-SAME:    %[[TMP_arg0:.*]]: tensor<16x32xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+//  CHECK-SAME:    %[[TMP_arg0:.*]]: tensor<16x32xf32, #sparse{{[0-9]*}}>,
 //  CHECK-SAME:    %[[TMP_arg1:.*]]: tensor<32xf32>,
 //  CHECK-SAME:    %[[TMP_arg2:.*]]: tensor<16xf32>) -> tensor<16xf32> {
 //   CHECK-DAG:  %[[TMP_c16:.*]] = arith.constant 16 : index
diff --git a/mlir/test/Dialect/SparseTensor/sparse_perm.mlir b/mlir/test/Dialect/SparseTensor/sparse_perm.mlir
index 83cbfa771f1d..e1e474ebee5f 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_perm.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_perm.mlir
@@ -14,7 +14,7 @@
 }
 
 // CHECK-LABEL:   func @sparse_static_dims(
-// CHECK-SAME:                          %[[VAL_0:.*]]: tensor<10x20x30xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:                          %[[VAL_0:.*]]: tensor<10x20x30xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:                          %[[VAL_1:.*]]: tensor<20x30x10xf32>) -> tensor<20x30x10xf32> {
 // CHECK-DAG:       %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 20 : index
@@ -23,7 +23,7 @@
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
 // CHECK:           %[[DEMAP:.*]] = sparse_tensor.reinterpret_map %[[VAL_0]]
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[DEMAP]] : tensor<30x10x20xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[DEMAP]] : tensor<30x10x20xf32, #sparse{{[0-9]*}}>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<20x30x10xf32>
 // CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_9]] : memref<20x30x10xf32>)
 // CHECK:           scf.for %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
@@ -53,17 +53,17 @@ func.func @sparse_static_dims(%arga: tensor<10x20x30xf32, #X>,
 }
 
 // CHECK-LABEL:   func @sparse_dynamic_dims(
-// CHECK-SAME:                          %[[VAL_0:.*]]: tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:                          %[[VAL_0:.*]]: tensor<?x?x?xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:                          %[[VAL_1:.*]]: tensor<?x?x?xf32>) -> tensor<?x?x?xf32> {
 // CHECK-DAG:       %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 2 : index
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
 // CHECK:           %[[DEMAP:.*]] = sparse_tensor.reinterpret_map %[[VAL_0]]
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.values %[[DEMAP]] : tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.lvl %[[DEMAP]], %[[VAL_2]] : tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.lvl %[[DEMAP]], %[[VAL_3]] : tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.lvl %[[DEMAP]], %[[VAL_4]] : tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.values %[[DEMAP]] : tensor<?x?x?xf32, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.lvl %[[DEMAP]], %[[VAL_2]] : tensor<?x?x?xf32, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.lvl %[[DEMAP]], %[[VAL_3]] : tensor<?x?x?xf32, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.lvl %[[DEMAP]], %[[VAL_4]] : tensor<?x?x?xf32, #sparse{{[0-9]*}}>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<?x?x?xf32>
 // CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_10]] : memref<?x?x?xf32>)
 // CHECK:           scf.for %[[VAL_11:.*]] = %[[VAL_3]] to %[[VAL_7]] step %[[VAL_4]] {
diff --git a/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir b/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir
index 1078e5c282df..3ec2c89af420 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir
@@ -16,16 +16,16 @@
 }
 
 // CHECK-HIR-LABEL:   func @sparse_dynamic_dims(
-// CHECK-HIR-SAME:      %[[VAL_0:.*]]: tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-HIR-SAME:      %[[VAL_0:.*]]: tensor<?x?x?xf32, #sparse{{[0-9]*}}>,
 // CHECK-HIR-SAME:      %[[VAL_1:.*]]: tensor<f32>) -> tensor<f32> {
 // CHECK-HIR-DAG:       %[[VAL_2:.*]] = arith.constant 1 : index
 // CHECK-HIR-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-HIR-DAG:       %[[VAL_4:.*]] = arith.constant 2 : index
 // CHECK-HIR:           %[[DEMAP:. *]] = sparse_tensor.reinterpret_map %[[VAL_0]]
-// CHECK-HIR-DAG:       %[[VAL_5:.*]] = sparse_tensor.lvl %[[DEMAP]], %[[VAL_3]] : tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-HIR-DAG:       %[[VAL_6:.*]] = sparse_tensor.lvl %[[DEMAP]], %[[VAL_2]] : tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-HIR-DAG:       %[[VAL_7:.*]] = sparse_tensor.lvl %[[DEMAP]], %[[VAL_4]] : tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-HIR-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[DEMAP]] : tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-HIR-DAG:       %[[VAL_5:.*]] = sparse_tensor.lvl %[[DEMAP]], %[[VAL_3]] : tensor<?x?x?xf32, #sparse{{[0-9]*}}>
+// CHECK-HIR-DAG:       %[[VAL_6:.*]] = sparse_tensor.lvl %[[DEMAP]], %[[VAL_2]] : tensor<?x?x?xf32, #sparse{{[0-9]*}}>
+// CHECK-HIR-DAG:       %[[VAL_7:.*]] = sparse_tensor.lvl %[[DEMAP]], %[[VAL_4]] : tensor<?x?x?xf32, #sparse{{[0-9]*}}>
+// CHECK-HIR-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[DEMAP]] : tensor<?x?x?xf32, #sparse{{[0-9]*}}>
 // CHECK-HIR-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<f32>
 // CHECK-HIR:           %[[VAL_11:.*]] = tensor.extract %[[VAL_1]][] : tensor<f32>
 // CHECK-HIR:           %[[VAL_12:.*]] = scf.for %[[VAL_13:.*]] = %[[VAL_3]] to %[[VAL_5]] step %[[VAL_2]] iter_args(%[[VAL_14:.*]] = %[[VAL_11]]) -> (f32) {
diff --git a/mlir/test/Dialect/SparseTensor/sparse_reinterpret_map.mlir b/mlir/test/Dialect/SparseTensor/sparse_reinterpret_map.mlir
index c4931c62c626..c6a3a366af9f 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_reinterpret_map.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_reinterpret_map.mlir
@@ -25,11 +25,11 @@
 // CHECK-LABEL: func @mul(
 // CHECK-SAME:  %[[A0:.*0]]: tensor<32x32xf32>,
 // CHECK-SAME:  %[[A1:.*1]]: tensor<32x32xf32>,
-// CHECK-SAME:  %[[A2:.*2]]: tensor<32x32xf32, #sparse_tensor.encoding<{{{.*}}}>>)
+// CHECK-SAME:  %[[A2:.*2]]: tensor<32x32xf32, #sparse{{[0-9]*}}>)
 // CHECK:       %[[T0:.*]] = sparse_tensor.reinterpret_map %[[A2]]
 // CHECK:       %[[T1:.*]] = linalg.generic {doc = {{.*}} indexing_maps = [#[[$map0]], #[[$map1]], #[[$map2]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
 // CHECK:       %[[T2:.*]] = sparse_tensor.reinterpret_map %[[T1]]
-// CHECK:       return %[[T2]] : tensor<32x32xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:       return %[[T2]] : tensor<32x32xf32, #sparse{{[0-9]*}}>
 func.func @mul(%arg0: tensor<32x32xf32>,
                %arg1: tensor<32x32xf32>,
                %arg2: tensor<32x32xf32, #BSR>) -> tensor<32x32xf32, #BSR> {
@@ -56,17 +56,17 @@ func.func @mul(%arg0: tensor<32x32xf32>,
 }>
 
 // CHECK-LABEL:   func.func @sparse_foreach_reinterpret_map(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<2x4xf64
-// CHECK:           %[[VAL_1:.*]] = bufferization.alloc_tensor() : tensor<1x2x2x2xf64
-// CHECK:           %[[VAL_2:.*]] = sparse_tensor.reinterpret_map %[[VAL_0]] : tensor<2x4xf64
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<2x4xf64, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_1:.*]] = bufferization.alloc_tensor() : tensor<1x2x2x2xf64, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_2:.*]] = sparse_tensor.reinterpret_map %[[VAL_0]] : tensor<2x4xf64, #sparse{{[0-9]*}}>
 // CHECK:           %[[VAL_4:.*]] = sparse_tensor.foreach in %[[VAL_2]] init(%[[VAL_1]])
-// CHECK:           ^bb0(%[[VAL_5:.*]]: index, %[[VAL_6:.*]]: index, %[[VAL_7:.*]]: index, %[[VAL_8:.*]]: index, %[[VAL_9:.*]]: f64, %[[VAL_10:.*]]: tensor<1x2x2x2xf64
-// CHECK:             %[[VAL_11:.*]] = sparse_tensor.insert %[[VAL_9]] into %[[VAL_10]]{{\[}}%[[VAL_5]], %[[VAL_6]], %[[VAL_7]], %[[VAL_8]]] : tensor<1x2x2x2xf64
-// CHECK:             sparse_tensor.yield %[[VAL_11]] : tensor<1x2x2x2xf64
+// CHECK:           ^bb0(%[[VAL_5:.*]]: index, %[[VAL_6:.*]]: index, %[[VAL_7:.*]]: index, %[[VAL_8:.*]]: index, %[[VAL_9:.*]]: f64, %[[VAL_10:.*]]: tensor<1x2x2x2xf64, #sparse{{[0-9]*}}>
+// CHECK:             %[[VAL_11:.*]] = sparse_tensor.insert %[[VAL_9]] into %[[VAL_10]]{{\[}}%[[VAL_5]], %[[VAL_6]], %[[VAL_7]], %[[VAL_8]]] : tensor<1x2x2x2xf64, #sparse{{[0-9]*}}>
+// CHECK:             sparse_tensor.yield %[[VAL_11]] : tensor<1x2x2x2xf64, #sparse{{[0-9]*}}>
 // CHECK:           }
-// CHECK:           %[[VAL_12:.*]] = sparse_tensor.reinterpret_map %[[VAL_4]] : tensor<1x2x2x2xf64
-// CHECK:           %[[VAL_13:.*]] = sparse_tensor.load %[[VAL_12]] hasInserts : tensor<2x4xf64
-// CHECK:           return %[[VAL_13]] : tensor<2x4xf64
+// CHECK:           %[[VAL_12:.*]] = sparse_tensor.reinterpret_map %[[VAL_4]] : tensor<1x2x2x2xf64, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_13:.*]] = sparse_tensor.load %[[VAL_12]] hasInserts : tensor<2x4xf64, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_13]] : tensor<2x4xf64, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @sparse_foreach_reinterpret_map(%6 : tensor<2x4xf64, #BSR>) -> tensor<2x4xf64, #BSR> {
   %7 = bufferization.alloc_tensor() : tensor<2x4xf64, #BSR>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir b/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir
index d3d6d8c91fa4..eea77c6e5a6c 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir
@@ -11,9 +11,9 @@
 // roundtrip:
 //
 // CHECK-ROUND-LABEL: func.func @sparse_expand(
-// CHECK-ROUND-SAME:  %[[A:.*]]: tensor<100xf64, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<10x10xf64, #sparse_tensor.encoding<{{{.*}}}>>
-//      CHECK-ROUND:  %[[E:.*]] = tensor.expand_shape %[[A]] {{\[\[}}0, 1]] : tensor<100xf64, #sparse_tensor.encoding<{{{.*}}}>> into tensor<10x10xf64, #sparse_tensor.encoding<{{{.*}}}>>
-//      CHECK-ROUND:  return %[[E]] : tensor<10x10xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-ROUND-SAME:  %[[A:.*]]: tensor<100xf64, #sparse{{[0-9]*}}>) -> tensor<10x10xf64, #sparse{{[0-9]*}}>
+//      CHECK-ROUND:  %[[E:.*]] = tensor.expand_shape %[[A]] {{\[\[}}0, 1]] : tensor<100xf64, #sparse{{[0-9]*}}> into tensor<10x10xf64, #sparse{{[0-9]*}}>
+//      CHECK-ROUND:  return %[[E]] : tensor<10x10xf64, #sparse{{[0-9]*}}>
 //
 // CHECK-LABEL:   func.func @sparse_expand(
 // CHECK-SAME:    %[[S:.*0]]:
@@ -36,7 +36,7 @@
 // CHECK:         }
 // CHECK:         %[[NT1:.*]] = sparse_tensor.load %[[RET]] hasInserts
 // CHECK-NOT:     sparse_tensor.convert
-// CHECK:         return %[[NT1]] : tensor<10x10xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:         return %[[NT1]] : tensor<10x10xf64, #sparse{{[0-9]*}}>
 //
 func.func @sparse_expand(%arg0: tensor<100xf64, #SparseVector>) -> tensor<10x10xf64, #SparseMatrix> {
   %0 = tensor.expand_shape %arg0 [[0, 1]] :
@@ -48,9 +48,9 @@ func.func @sparse_expand(%arg0: tensor<100xf64, #SparseVector>) -> tensor<10x10x
 // roundtrip:
 //
 // CHECK-ROUND-LABEL: func.func @sparse_collapse(
-// CHECK-ROUND-SAME:  %[[A:.*]]: tensor<10x10xf64, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<100xf64, #sparse_tensor.encoding<{{{.*}}}>>
-//      CHECK-ROUND:  %[[C:.*]] = tensor.collapse_shape %[[A]] {{\[\[}}0, 1]] : tensor<10x10xf64, #sparse_tensor.encoding<{{{.*}}}>> into tensor<100xf64, #sparse_tensor.encoding<{{{.*}}}>>
-//      CHECK-ROUND:  return %[[C]] : tensor<100xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-ROUND-SAME:  %[[A:.*]]: tensor<10x10xf64, #sparse{{[0-9]*}}>) -> tensor<100xf64, #sparse{{[0-9]*}}>
+//      CHECK-ROUND:  %[[C:.*]] = tensor.collapse_shape %[[A]] {{\[\[}}0, 1]] : tensor<10x10xf64, #sparse{{[0-9]*}}> into tensor<100xf64, #sparse{{[0-9]*}}>
+//      CHECK-ROUND:  return %[[C]] : tensor<100xf64, #sparse{{[0-9]*}}>
 //
 // CHECK-LABEL:   func.func @sparse_collapse(
 // CHECK-SAME:    %[[S:.*0]]:
@@ -82,7 +82,7 @@ func.func @sparse_expand(%arg0: tensor<100xf64, #SparseVector>) -> tensor<10x10x
 // CHECK:         }
 // CHECK:        %[[NT1:.*]] = sparse_tensor.load %[[RET]] hasInserts
 // CHECK-NOT:    sparse_tensor.convert
-// CHECK:        return %[[NT1]] : tensor<100xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:        return %[[NT1]] : tensor<100xf64, #sparse{{[0-9]*}}>
 //
 func.func @sparse_collapse(%arg0: tensor<10x10xf64, #SparseMatrix>) -> tensor<100xf64, #SparseVector> {
   %0 = tensor.collapse_shape %arg0 [[0, 1]] :
@@ -94,9 +94,9 @@ func.func @sparse_collapse(%arg0: tensor<10x10xf64, #SparseMatrix>) -> tensor<10
 // roundtrip:
 //
 // CHECK-ROUND-LABEL: func.func @dynamic_sparse_expand(
-// CHECK-ROUND-SAME:  %[[A:.*]]: tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<?x10xf64, #sparse_tensor.encoding<{{{.*}}}>>
-//      CHECK-ROUND:  %[[E:.*]] = tensor.expand_shape %[[A]] {{\[\[}}0, 1]] : tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>> into tensor<?x10xf64, #sparse_tensor.encoding<{{{.*}}}>>
-//      CHECK-ROUND:  return %[[E]] : tensor<?x10xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-ROUND-SAME:  %[[A:.*]]: tensor<?xf64, #sparse{{[0-9]*}}>) -> tensor<?x10xf64, #sparse{{[0-9]*}}>
+//      CHECK-ROUND:  %[[E:.*]] = tensor.expand_shape %[[A]] {{\[\[}}0, 1]] : tensor<?xf64, #sparse{{[0-9]*}}> into tensor<?x10xf64, #sparse{{[0-9]*}}>
+//      CHECK-ROUND:  return %[[E]] : tensor<?x10xf64, #sparse{{[0-9]*}}>
 //
 // CHECK-LABEL:   func.func @dynamic_sparse_expand(
 // CHECK-SAME:    %[[S:.*0]]:
@@ -125,7 +125,7 @@ func.func @sparse_collapse(%arg0: tensor<10x10xf64, #SparseMatrix>) -> tensor<10
 // CHECK:         }
 // CHECK:         %[[NT1:.*]] = sparse_tensor.load %[[RET]] hasInserts
 // CHECK-NOT:     sparse_tensor.convert
-// CHECK:         return %[[NT1]] : tensor<?x10xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:         return %[[NT1]] : tensor<?x10xf64, #sparse{{[0-9]*}}>
 //
 func.func @dynamic_sparse_expand(%arg0: tensor<?xf64, #SparseVector>) -> tensor<?x10xf64, #SparseMatrix> {
   %0 = tensor.expand_shape %arg0 [[0, 1]] :
@@ -137,9 +137,9 @@ func.func @dynamic_sparse_expand(%arg0: tensor<?xf64, #SparseVector>) -> tensor<
 // roundtrip:
 //
 // CHECK-ROUND-LABEL: func.func @dynamic_sparse_collapse(
-// CHECK-ROUND-SAME:  %[[A:.*]]: tensor<10x?xf64, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>>
-//      CHECK-ROUND:  %[[C:.*]] = tensor.collapse_shape %[[A]] {{\[\[}}0, 1]] : tensor<10x?xf64, #sparse_tensor.encoding<{{{.*}}}>> into tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>>
-//      CHECK-ROUND:  return %[[C]] : tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-ROUND-SAME:  %[[A:.*]]: tensor<10x?xf64, #sparse{{[0-9]*}}>) -> tensor<?xf64, #sparse{{[0-9]*}}>
+//      CHECK-ROUND:  %[[C:.*]] = tensor.collapse_shape %[[A]] {{\[\[}}0, 1]] : tensor<10x?xf64, #sparse{{[0-9]*}}> into tensor<?xf64, #sparse{{[0-9]*}}>
+//      CHECK-ROUND:  return %[[C]] : tensor<?xf64, #sparse{{[0-9]*}}>
 //
 // CHECK-LABEL:   func.func @dynamic_sparse_collapse(
 // CHECK-SAME:    %[[S:.*0]]:
@@ -176,7 +176,7 @@ func.func @dynamic_sparse_expand(%arg0: tensor<?xf64, #SparseVector>) -> tensor<
 // CHECK:        }
 // CHECK:        %[[NT1:.*]] = sparse_tensor.load %[[RET]] hasInserts
 // CHECK-NOT:    sparse_tensor.convert
-// CHECK:        return %[[NT1]] : tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:        return %[[NT1]] : tensor<?xf64, #sparse{{[0-9]*}}>
 //
 func.func @dynamic_sparse_collapse(%arg0: tensor<10x?xf64, #SparseMatrix>) -> tensor<?xf64, #SparseVector> {
   %0 = tensor.collapse_shape %arg0 [[0, 1]] :
diff --git a/mlir/test/Dialect/SparseTensor/sparse_scalars.mlir b/mlir/test/Dialect/SparseTensor/sparse_scalars.mlir
index 4ca577b9f729..666882edcbab 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_scalars.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_scalars.mlir
@@ -19,7 +19,7 @@
 }
 
 // CHECK-LABEL:   func @mul(
-// CHECK-SAME:              %[[VAL_0:.*0]]: tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>>,
+// CHECK-SAME:              %[[VAL_0:.*0]]: tensor<32x16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:              %[[VAL_1:.*1]]: tensor<f32>,
 // CHECK-SAME:              %[[VAL_2:.*2]]: f32,
 // CHECK-SAME:              %[[VAL_3:.*3]]: f32,
@@ -28,11 +28,11 @@
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_8:.*]] = arith.addf %[[VAL_2]], %[[VAL_3]] : f32
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_1]] : memref<f32>
 // CHECK-DAG:       %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_4]] : memref<32x16xf32>
 // CHECK-DAG:       %[[VAL_16:.*]] = memref.load %[[VAL_14]][] : memref<f32>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_sddmm.mlir b/mlir/test/Dialect/SparseTensor/sparse_sddmm.mlir
index 1d95fe8d0569..6fc2db481c6f 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_sddmm.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_sddmm.mlir
@@ -55,7 +55,7 @@ func.func @fold_yield_direct_zero() -> tensor<32xf64> {
 }
 
 // CHECK-LABEL:   func.func @sampled_dd_unfused(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<8x8xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<8x8xf64>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<8x8xf64>) -> tensor<8x8xf64> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 8 : index
@@ -66,11 +66,11 @@ func.func @fold_yield_direct_zero() -> tensor<32xf64> {
 // CHECK-DAG:       %[[VAL_8:.*]] = bufferization.alloc_tensor() copy(%[[VAL_6]]) {memory_space = 0 : i64} : tensor<8x8xf64>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<8x8xf64>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<8x8xf64>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<8x8xf64, #sparse_tensor.encoding<{{.*}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<8x8xf64, #sparse_tensor.encoding<{{.*}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<8x8xf64, #sparse_tensor.encoding<{{.*}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<8x8xf64, #sparse_tensor.encoding<{{.*}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_15:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8x8xf64, #sparse_tensor.encoding<{{.*}}>> to memref<?xf64>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_15:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:           %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_8]] : memref<8x8xf64>
 // CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           %[[VAL_18:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_5]]] : memref<?xindex>
@@ -121,9 +121,9 @@ func.func @sampled_dd_unfused(%args: tensor<8x8xf64, #SM>,
 }
 
 // CHECK-LABEL:   func.func @sparse_sampled_dd_unfused(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<8x8xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<8x8xf64>,
-// CHECK-SAME:      %[[VAL_2:.*]]: tensor<8x8xf64>) -> tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> {
+// CHECK-SAME:      %[[VAL_2:.*]]: tensor<8x8xf64>) -> tensor<8x8xf64, #sparse{{[0-9]*}}> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 8 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
@@ -131,19 +131,19 @@ func.func @sampled_dd_unfused(%args: tensor<8x8xf64, #SM>,
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_8:.*]] = arith.constant dense<0.000000e+00> : tensor<8x8xf64>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.alloc_tensor() copy(%[[VAL_8]]) : tensor<8x8xf64>
-// CHECK-DAG:       %[[VAL_10:.*]] = tensor.empty() : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-DAG:       %[[VAL_10:.*]] = tensor.empty() : tensor<8x8xf64, #sparse{{[0-9]*}}>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<8x8xf64>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<8x8xf64>
-// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_15:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_16:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_17:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_15:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_16:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_17:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:           %[[VAL_18:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           %[[VAL_19:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_5]]] : memref<?xindex>
-// CHECK:           %[[VAL_20:.*]] = scf.for %[[VAL_21:.*]] = %[[VAL_18]] to %[[VAL_19]] step %[[VAL_5]] iter_args(%[[VAL_22:.*]] = %[[VAL_10]]) -> (tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK:           %[[VAL_20:.*]] = scf.for %[[VAL_21:.*]] = %[[VAL_18]] to %[[VAL_19]] step %[[VAL_5]] iter_args(%[[VAL_22:.*]] = %[[VAL_10]]) -> (tensor<8x8xf64, #sparse{{[0-9]*}}>) {
 // CHECK:             %[[VAL_23:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_21]]] : memref<?xindex>
-// CHECK:             %[[VAL_24:.*]], %[[VAL_25:.*]], %[[VAL_26:.*]], %[[VAL_27:.*]] = sparse_tensor.expand %[[VAL_10]] : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>, memref<?xi1>, memref<?xindex>
+// CHECK:             %[[VAL_24:.*]], %[[VAL_25:.*]], %[[VAL_26:.*]], %[[VAL_27:.*]] = sparse_tensor.expand %[[VAL_10]] : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xf64>, memref<?xi1>, memref<?xindex>
 // CHECK:             %[[VAL_28:.*]] = scf.for %[[VAL_29:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] iter_args(%[[VAL_30:.*]] = %[[VAL_27]]) -> (index) {
 // CHECK:               %[[VAL_31:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_23]], %[[VAL_29]]] : memref<8x8xf64>
 // CHECK:               %[[VAL_32:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_21]]] : memref<?xindex>
@@ -172,11 +172,11 @@ func.func @sampled_dd_unfused(%args: tensor<8x8xf64, #SM>,
 // CHECK:               }
 // CHECK:               scf.yield %[[VAL_35]] : index
 // CHECK:             }
-// CHECK:             %[[VAL_49:.*]] = sparse_tensor.compress %[[VAL_24]], %[[VAL_25]], %[[VAL_26]], %[[VAL_28]] into %[[VAL_22]]{{\[}}%[[VAL_23]]] : memref<?xf64>, memref<?xi1>, memref<?xindex>, tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:             scf.yield %[[VAL_49]] : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:             %[[VAL_49:.*]] = sparse_tensor.compress %[[VAL_24]], %[[VAL_25]], %[[VAL_26]], %[[VAL_28]] into %[[VAL_22]]{{\[}}%[[VAL_23]]] : memref<?xf64>, memref<?xi1>, memref<?xindex>, tensor<8x8xf64, #sparse{{[0-9]*}}>
+// CHECK:             scf.yield %[[VAL_49]] : tensor<8x8xf64, #sparse{{[0-9]*}}>
 // CHECK:           }
-// CHECK:           %[[VAL_50:.*]] = sparse_tensor.load %[[VAL_20]] hasInserts : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           return %[[VAL_50]] : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_50:.*]] = sparse_tensor.load %[[VAL_20]] hasInserts : tensor<8x8xf64, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_50]] : tensor<8x8xf64, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @sparse_sampled_dd_unfused(%args: tensor<8x8xf64, #SM>,
                                      %arga: tensor<8x8xf64>,
diff --git a/mlir/test/Dialect/SparseTensor/sparse_sddmm_org.mlir b/mlir/test/Dialect/SparseTensor/sparse_sddmm_org.mlir
index 79bedcf5a49e..5fa332f8f181 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_sddmm_org.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_sddmm_org.mlir
@@ -21,27 +21,27 @@
 }
 
 // CHECK-LABEL:   func.func @sparse_sampled_dd_unfused(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<8x8xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<8x8xf64>,
-// CHECK-SAME:      %[[VAL_2:.*]]: tensor<8x8xf64>) -> tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> {
+// CHECK-SAME:      %[[VAL_2:.*]]: tensor<8x8xf64>) -> tensor<8x8xf64, #sparse{{[0-9]*}}> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 8 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant false
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant true
-// CHECK:           %[[VAL_8:.*]] = tensor.empty() : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_8:.*]] = tensor.empty() : tensor<8x8xf64, #sparse{{[0-9]*}}>
 // CHECK:           %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<8x8xf64>
 // CHECK:           %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<8x8xf64>
-// CHECK:           %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_13:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_14:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_15:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK:           %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_13:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_14:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_15:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_5]]] : memref<?xindex>
-// CHECK:           %[[VAL_18:.*]] = scf.for %[[VAL_19:.*]] = %[[VAL_16]] to %[[VAL_17]] step %[[VAL_5]] iter_args(%[[VAL_20:.*]] = %[[VAL_8]]) -> (tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK:           %[[VAL_18:.*]] = scf.for %[[VAL_19:.*]] = %[[VAL_16]] to %[[VAL_17]] step %[[VAL_5]] iter_args(%[[VAL_20:.*]] = %[[VAL_8]]) -> (tensor<8x8xf64, #sparse{{[0-9]*}}>) {
 // CHECK:             %[[VAL_21:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_19]]] : memref<?xindex>
-// CHECK:             %[[VAL_22:.*]], %[[VAL_23:.*]], %[[VAL_24:.*]], %[[VAL_25:.*]] = sparse_tensor.expand %[[VAL_8]] : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>, memref<?xi1>, memref<?xindex>
+// CHECK:             %[[VAL_22:.*]], %[[VAL_23:.*]], %[[VAL_24:.*]], %[[VAL_25:.*]] = sparse_tensor.expand %[[VAL_8]] : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xf64>, memref<?xi1>, memref<?xindex>
 // CHECK:             %[[VAL_26:.*]] = scf.for %[[VAL_27:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] iter_args(%[[VAL_28:.*]] = %[[VAL_25]]) -> (index) {
 // CHECK:               %[[VAL_29:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_21]], %[[VAL_27]]] : memref<8x8xf64>
 // CHECK:               %[[VAL_30:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_19]]] : memref<?xindex>
@@ -70,11 +70,11 @@
 // CHECK:               } {"Emitted from" = "linalg.generic"}
 // CHECK:               scf.yield %[[VAL_48:.*]] : index
 // CHECK:             } {"Emitted from" = "linalg.generic"}
-// CHECK:             %[[VAL_49:.*]] = sparse_tensor.compress %[[VAL_22]], %[[VAL_23]], %[[VAL_24]], %[[VAL_50:.*]] into %[[VAL_20]]{{\[}}%[[VAL_21]]] : memref<?xf64>, memref<?xi1>, memref<?xindex>, tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:             scf.yield %[[VAL_49]] : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:             %[[VAL_49:.*]] = sparse_tensor.compress %[[VAL_22]], %[[VAL_23]], %[[VAL_24]], %[[VAL_50:.*]] into %[[VAL_20]]{{\[}}%[[VAL_21]]] : memref<?xf64>, memref<?xi1>, memref<?xindex>, tensor<8x8xf64, #sparse{{[0-9]*}}>
+// CHECK:             scf.yield %[[VAL_49]] : tensor<8x8xf64, #sparse{{[0-9]*}}>
 // CHECK:           } {"Emitted from" = "linalg.generic"}
-// CHECK:           %[[VAL_51:.*]] = sparse_tensor.load %[[VAL_52:.*]] hasInserts : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           return %[[VAL_51]] : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_51:.*]] = sparse_tensor.load %[[VAL_52:.*]] hasInserts : tensor<8x8xf64, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_51]] : tensor<8x8xf64, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @sparse_sampled_dd_unfused(%args: tensor<8x8xf64, #SM>,
                                      %arga: tensor<8x8xf64>,
diff --git a/mlir/test/Dialect/SparseTensor/sparse_tensor_reshape.mlir b/mlir/test/Dialect/SparseTensor/sparse_tensor_reshape.mlir
index 339d65ce5716..47d24ebf24fc 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_tensor_reshape.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_tensor_reshape.mlir
@@ -35,7 +35,7 @@
 // CHECK:            scf.yield %[[RET_1]]
 // CHECK:         }
 // CHECK:        %[[NT1:.*]] = sparse_tensor.load %[[RET]] hasInserts
-// CHECK:        return %[[NT1]] : tensor<10x10xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:        return %[[NT1]] : tensor<10x10xf64, #sparse{{[0-9]*}}>
 //
 func.func @sparse_reshape(%arg0: tensor<4x25xf64, #SparseMatrix>) -> tensor<10x10xf64, #SparseMatrix> {
   %shape = arith.constant dense <[ 10, 10 ]> : tensor<2xi32>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_transpose.mlir b/mlir/test/Dialect/SparseTensor/sparse_transpose.mlir
index fd41fedf91c2..80a989d789df 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_transpose.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_transpose.mlir
@@ -16,34 +16,34 @@
 // TODO: improve auto-conversion followed by yield
 
 // CHECK-LABEL:   func.func @sparse_transpose_auto(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<3x4xf64, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<4x3xf64, #sparse_tensor.encoding<{{{.*}}}>> {
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<3x4xf64, #sparse{{[0-9]*}}>) -> tensor<4x3xf64, #sparse{{[0-9]*}}> {
 // CHECK-DAG:       %[[VAL_1:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_3:.*]] = tensor.empty() : tensor<4x3xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_4:.*]] = sparse_tensor.convert %[[VAL_0]] : tensor<3x4xf64, #sparse_tensor.encoding<{{{.*}}}>> to tensor<3x4xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[DEMAP:.*]] = sparse_tensor.reinterpret_map %[[VAL_4]] : tensor<3x4xf64, #sparse_tensor.encoding<{{{.*}}}>> to tensor<4x3xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[DEMAP]] {level = 0 : index} : tensor<4x3xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[DEMAP]] {level = 0 : index} : tensor<4x3xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[DEMAP]] {level = 1 : index} : tensor<4x3xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[DEMAP]] {level = 1 : index} : tensor<4x3xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[DEMAP]] : tensor<4x3xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK-DAG:       %[[VAL_3:.*]] = tensor.empty() : tensor<4x3xf64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_4:.*]] = sparse_tensor.convert %[[VAL_0]] : tensor<3x4xf64, #sparse{{[0-9]*}}> to tensor<3x4xf64, #sparse{{[0-9]*}}>
+// CHECK:           %[[DEMAP:.*]] = sparse_tensor.reinterpret_map %[[VAL_4]] : tensor<3x4xf64, #sparse{{[0-9]*}}> to tensor<4x3xf64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[DEMAP]] {level = 0 : index} : tensor<4x3xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[DEMAP]] {level = 0 : index} : tensor<4x3xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[DEMAP]] {level = 1 : index} : tensor<4x3xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[DEMAP]] {level = 1 : index} : tensor<4x3xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[DEMAP]] : tensor<4x3xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:           %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_1]]] : memref<?xindex>
 // CHECK:           %[[VAL_11:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_2]]] : memref<?xindex>
-// CHECK:           %[[VAL_12:.*]] = scf.for %[[VAL_13:.*]] = %[[VAL_10]] to %[[VAL_11]] step %[[VAL_2]] iter_args(%[[VAL_14:.*]] = %[[VAL_3]]) -> (tensor<4x3xf64, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK:           %[[VAL_12:.*]] = scf.for %[[VAL_13:.*]] = %[[VAL_10]] to %[[VAL_11]] step %[[VAL_2]] iter_args(%[[VAL_14:.*]] = %[[VAL_3]]) -> (tensor<4x3xf64, #sparse{{[0-9]*}}>) {
 // CHECK:             %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_13]]] : memref<?xindex>
 // CHECK:             %[[VAL_16:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_13]]] : memref<?xindex>
 // CHECK:             %[[VAL_17:.*]] = arith.addi %[[VAL_13]], %[[VAL_2]] : index
 // CHECK:             %[[VAL_18:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_17]]] : memref<?xindex>
-// CHECK:             %[[VAL_19:.*]] = scf.for %[[VAL_20:.*]] = %[[VAL_16]] to %[[VAL_18]] step %[[VAL_2]] iter_args(%[[VAL_21:.*]] = %[[VAL_14]]) -> (tensor<4x3xf64, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK:             %[[VAL_19:.*]] = scf.for %[[VAL_20:.*]] = %[[VAL_16]] to %[[VAL_18]] step %[[VAL_2]] iter_args(%[[VAL_21:.*]] = %[[VAL_14]]) -> (tensor<4x3xf64, #sparse{{[0-9]*}}>) {
 // CHECK:               %[[VAL_22:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_20]]] : memref<?xindex>
 // CHECK:               %[[VAL_23:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_20]]] : memref<?xf64>
-// CHECK:               %[[VAL_24:.*]] = sparse_tensor.insert %[[VAL_23]] into %[[VAL_21]]{{\[}}%[[VAL_15]], %[[VAL_22]]] : tensor<4x3xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:               scf.yield %[[VAL_24]] : tensor<4x3xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:               %[[VAL_24:.*]] = sparse_tensor.insert %[[VAL_23]] into %[[VAL_21]]{{\[}}%[[VAL_15]], %[[VAL_22]]] : tensor<4x3xf64, #sparse{{[0-9]*}}>
+// CHECK:               scf.yield %[[VAL_24]] : tensor<4x3xf64, #sparse{{[0-9]*}}>
 // CHECK:             }
-// CHECK:             scf.yield %[[VAL_25:.*]] : tensor<4x3xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:             scf.yield %[[VAL_25:.*]] : tensor<4x3xf64, #sparse{{[0-9]*}}>
 // CHECK:           }
-// CHECK:           %[[VAL_26:.*]] = sparse_tensor.load %[[VAL_27:.*]] hasInserts : tensor<4x3xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           return %[[VAL_26]] : tensor<4x3xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_26:.*]] = sparse_tensor.load %[[VAL_27:.*]] hasInserts : tensor<4x3xf64, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_26]] : tensor<4x3xf64, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @sparse_transpose_auto(%arga: tensor<3x4xf64, #DCSR>)
                                      -> tensor<4x3xf64, #DCSR> {
diff --git a/mlir/test/Dialect/SparseTensor/sparse_vector_chain.mlir b/mlir/test/Dialect/SparseTensor/sparse_vector_chain.mlir
index 269a4926c9ec..e3508f11cb75 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_vector_chain.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_vector_chain.mlir
@@ -18,19 +18,19 @@
 //
 // CHECK-LABEL:   func.func @sparse_matrix_sum(
 // CHECK-SAME:      %[[VAL_0:.*]]: tensor<f64>,
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<64x32xf64, #sparse_tensor.encoding<{{{.*}}}>>,
-// CHECK-SAME:      %[[VAL_2:.*]]: tensor<64x32xf64, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<f64> {
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<64x32xf64, #sparse{{[0-9]*}}>,
+// CHECK-SAME:      %[[VAL_2:.*]]: tensor<64x32xf64, #sparse{{[0-9]*}}>) -> tensor<f64> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 8 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant dense<0.000000e+00> : vector<8xf64>
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 64 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 1 : index
-// CHECK:           %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<64x32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<64x32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<64x32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
-// CHECK:           %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_2]] {level = 1 : index} : tensor<64x32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_2]] {level = 1 : index} : tensor<64x32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<64x32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK:           %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<64x32xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<64x32xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<64x32xf64, #sparse{{[0-9]*}}> to memref<?xf64>
+// CHECK:           %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_2]] {level = 1 : index} : tensor<64x32xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_2]] {level = 1 : index} : tensor<64x32xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<64x32xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:           %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_0]] : memref<f64>
 // CHECK:           %[[VAL_15:.*]] = memref.load %[[VAL_14]][] : memref<f64>
 // CHECK:           %[[VAL_16:.*]] = scf.for %[[VAL_17:.*]] = %[[VAL_6]] to %[[VAL_5]] step %[[VAL_7]] iter_args(%[[VAL_18:.*]] = %[[VAL_15]]) -> (f64) {
diff --git a/mlir/test/Dialect/SparseTensor/sparse_vector_index.mlir b/mlir/test/Dialect/SparseTensor/sparse_vector_index.mlir
index 9bc24fd02b82..c9d432924c0d 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_vector_index.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_vector_index.mlir
@@ -17,7 +17,7 @@
 }
 
 // CHECK-LABEL: func.func @sparse_index_1d_conj(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<8xi64, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<8xi64> {
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<8xi64, #sparse{{[0-9]*}}>) -> tensor<8xi64> {
 // CHECK-DAG:       %[[VAL_1:.*]] = arith.constant 8 : index
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant dense<0> : vector<8xi64>
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant dense<0> : vector<8xindex>
@@ -25,9 +25,9 @@
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_7:.*]] = tensor.empty() : tensor<8xi64>
-// CHECK:           %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<8xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<8xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi64>
+// CHECK:           %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<8xi64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<8xi64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8xi64, #sparse{{[0-9]*}}> to memref<?xi64>
 // CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_7]] : memref<8xi64>
 // CHECK:           linalg.fill ins(%[[VAL_4]] : i64) outs(%[[VAL_11]] : memref<8xi64>)
 // CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_5]]] : memref<?xindex>
@@ -59,7 +59,7 @@ func.func @sparse_index_1d_conj(%arga: tensor<8xi64, #SparseVector>) -> tensor<8
 }
 
 // CHECK-LABEL: func.func @sparse_index_1d_disj(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<8xi64, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<8xi64> {
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<8xi64, #sparse{{[0-9]*}}>) -> tensor<8xi64> {
 // CHECK-DAG:       %[[VAL_1:.*]] = arith.constant 8 : index
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant dense<[0, 1, 2, 3, 4, 5, 6, 7]> : vector<8xindex>
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : i64
@@ -67,9 +67,9 @@ func.func @sparse_index_1d_conj(%arga: tensor<8xi64, #SparseVector>) -> tensor<8
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_7:.*]] = tensor.empty() : tensor<8xi64>
-// CHECK:           %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<8xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<8xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi64>
+// CHECK:           %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<8xi64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<8xi64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8xi64, #sparse{{[0-9]*}}> to memref<?xi64>
 // CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_7]] : memref<8xi64>
 // CHECK:           linalg.fill ins(%[[VAL_3]] : i64) outs(%[[VAL_11]] : memref<8xi64>)
 // CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<?xindex>
diff --git a/mlir/test/Dialect/SparseTensor/spy_sddmm.mlir b/mlir/test/Dialect/SparseTensor/spy_sddmm.mlir
index a4211991f260..287b62ef44c6 100644
--- a/mlir/test/Dialect/SparseTensor/spy_sddmm.mlir
+++ b/mlir/test/Dialect/SparseTensor/spy_sddmm.mlir
@@ -20,15 +20,15 @@
 // CHECK-LABEL: func.func @sparse_sampled_dd(
 // CHECK-SAME:    %[[VAL_0:.*0]]: tensor<8x8xf64>,
 // CHECK-SAME:    %[[VAL_1:.*1]]: tensor<8x8xf64>,
-// CHECK-SAME:    %[[VAL_2:.*2]]: tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> {
+// CHECK-SAME:    %[[VAL_2:.*2]]: tensor<8x8xf64, #sparse{{[0-9]*}}>) -> tensor<8x8xf64, #sparse{{[0-9]*}}> {
 // CHECK-DAG:     %[[VAL_3:.*]] = arith.constant 8 : index
 // CHECK-DAG:     %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:     %[[VAL_5:.*]] = arith.constant 1 : index
 // CHECK-DAG:     %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : memref<8x8xf64>
 // CHECK-DAG:     %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<8x8xf64>
-// CHECK-DAG:     %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_2]] {level = 1 : index} : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:     %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_2]] {level = 1 : index} : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:     %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK-DAG:     %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_2]] {level = 1 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:     %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_2]] {level = 1 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:     %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:         scf.for %[[VAL_11:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
 // CHECK:           scf.for %[[VAL_12:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
 // CHECK:             %[[VAL_13:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_11]]] : memref<?xindex>
@@ -45,8 +45,8 @@
 // CHECK:             } {"Emitted from" = "linalg.generic"}
 // CHECK:           } {"Emitted from" = "linalg.generic"}
 // CHECK:         } {"Emitted from" = "linalg.generic"}
-// CHECK:         %[[VAL_23:.*]] = sparse_tensor.load %[[VAL_2]] : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           return %[[VAL_23]] : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:         %[[VAL_23:.*]] = sparse_tensor.load %[[VAL_2]] : tensor<8x8xf64, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_23]] : tensor<8x8xf64, #sparse{{[0-9]*}}>
 // CHECK:       }
 func.func @sparse_sampled_dd(%argA: tensor<8x8xf64>,
                              %argB: tensor<8x8xf64>,
diff --git a/mlir/test/Dialect/SparseTensor/unused-tensor.mlir b/mlir/test/Dialect/SparseTensor/unused-tensor.mlir
index 330bb9fa5748..f85acb9c6969 100644
--- a/mlir/test/Dialect/SparseTensor/unused-tensor.mlir
+++ b/mlir/test/Dialect/SparseTensor/unused-tensor.mlir
@@ -21,7 +21,7 @@
 
 // CHECK-LABEL:   func.func @b_ununsed(
 // CHECK-SAME:      %[[VAL_0:.*]]: tensor<2x4xf64>,
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<8x4xf64, #sparse_tensor.encoding<{{.*}}>>,
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<8x4xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<2x4xf64>) -> tensor<2x4xf64> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 8 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 2 : index
diff --git a/mlir/test/Dialect/SparseTensor/vectorize_reduction.mlir b/mlir/test/Dialect/SparseTensor/vectorize_reduction.mlir
index ef3c0f88d422..d2b51322e0be 100644
--- a/mlir/test/Dialect/SparseTensor/vectorize_reduction.mlir
+++ b/mlir/test/Dialect/SparseTensor/vectorize_reduction.mlir
@@ -9,13 +9,13 @@
 
 // CHECK-ON-LABEL:   func.func @sparse_reduction_ori(
 // CHECK-ON-SAME:      %[[VAL_0:.*]]: tensor<i13>,
-// CHECK-ON-SAME:      %[[VAL_1:.*]]: tensor<?xi13, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<i13> {
+// CHECK-ON-SAME:      %[[VAL_1:.*]]: tensor<?xi13, #sparse{{[0-9]*}}>) -> tensor<i13> {
 // CHECK-ON-DAG:       %[[VAL_2:.*]] = arith.constant 8 : index
 // CHECK-ON-DAG:       %[[VAL_3:.*]] = arith.constant dense<0> : vector<8xi13>
 // CHECK-ON-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-ON-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-ON:           %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xi13, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-ON:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xi13, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi13>
+// CHECK-ON:           %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xi13, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-ON:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xi13, #sparse{{[0-9]*}}> to memref<?xi13>
 // CHECK-ON:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : memref<i13>
 // CHECK-ON:           %[[VAL_9:.*]] = memref.load %[[VAL_8]][] : memref<i13>
 // CHECK-ON:           %[[VAL_10:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -37,11 +37,11 @@
 //
 // CHECK-OFF-LABEL:   func.func @sparse_reduction_ori(
 // CHECK-OFF-SAME:      %[[VAL_0:.*]]: tensor<i13>,
-// CHECK-OFF-SAME:      %[[VAL_1:.*]]: tensor<?xi13, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<i13> {
+// CHECK-OFF-SAME:      %[[VAL_1:.*]]: tensor<?xi13, #sparse{{[0-9]*}}>) -> tensor<i13> {
 // CHECK-OFF-DAG:       %[[VAL_2:.*]] = arith.constant 0 : index
 // CHECK-OFF-DAG:       %[[VAL_3:.*]] = arith.constant 1 : index
-// CHECK-OFF:           %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xi13, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-OFF:           %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xi13, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi13>
+// CHECK-OFF:           %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xi13, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-OFF:           %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xi13, #sparse{{[0-9]*}}> to memref<?xi13>
 // CHECK-OFF:           %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : memref<i13>
 // CHECK-OFF:           %[[VAL_7:.*]] = memref.load %[[VAL_6]][] : memref<i13>
 // CHECK-OFF:           %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
@@ -86,13 +86,13 @@ func.func @sparse_reduction_ori(%argx: tensor<i13>,
 
 // CHECK-ON-LABEL:   func.func @sparse_reduction_ori_accumulator_on_rhs(
 // CHECK-ON-SAME:      %[[VAL_0:.*]]: tensor<i13>,
-// CHECK-ON-SAME:      %[[VAL_1:.*]]: tensor<?xi13, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<i13> {
+// CHECK-ON-SAME:      %[[VAL_1:.*]]: tensor<?xi13, #sparse{{[0-9]*}}>) -> tensor<i13> {
 // CHECK-ON-DAG:       %[[VAL_2:.*]] = arith.constant 8 : index
 // CHECK-ON-DAG:       %[[VAL_3:.*]] = arith.constant dense<0> : vector<8xi13>
 // CHECK-ON-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-ON-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-ON:           %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xi13, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-ON:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xi13, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi13>
+// CHECK-ON:           %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xi13, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-ON:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xi13, #sparse{{[0-9]*}}> to memref<?xi13>
 // CHECK-ON:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : memref<i13>
 // CHECK-ON:           %[[VAL_9:.*]] = memref.load %[[VAL_8]][] : memref<i13>
 // CHECK-ON:           %[[VAL_10:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -114,11 +114,11 @@ func.func @sparse_reduction_ori(%argx: tensor<i13>,
 //
 // CHECK-OFF-LABEL:   func.func @sparse_reduction_ori_accumulator_on_rhs(
 // CHECK-OFF-SAME:      %[[VAL_0:.*]]: tensor<i13>,
-// CHECK-OFF-SAME:      %[[VAL_1:.*]]: tensor<?xi13, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<i13> {
+// CHECK-OFF-SAME:      %[[VAL_1:.*]]: tensor<?xi13, #sparse{{[0-9]*}}>) -> tensor<i13> {
 // CHECK-OFF-DAG:       %[[VAL_2:.*]] = arith.constant 0 : index
 // CHECK-OFF-DAG:       %[[VAL_3:.*]] = arith.constant 1 : index
-// CHECK-OFF:           %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xi13, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-OFF:           %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xi13, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi13>
+// CHECK-OFF:           %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xi13, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-OFF:           %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xi13, #sparse{{[0-9]*}}> to memref<?xi13>
 // CHECK-OFF:           %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : memref<i13>
 // CHECK-OFF:           %[[VAL_7:.*]] = memref.load %[[VAL_6]][] : memref<i13>
 // CHECK-OFF:           %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
@@ -161,13 +161,13 @@ func.func @sparse_reduction_ori_accumulator_on_rhs(%argx: tensor<i13>,
 //
 // CHECK-ON-LABEL:   func.func @sparse_reduction_subi(
 // CHECK-ON-SAME:      %[[VAL_0:.*]]: tensor<i32>,
-// CHECK-ON-SAME:      %[[VAL_1:.*]]: tensor<?xi32, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<i32> {
+// CHECK-ON-SAME:      %[[VAL_1:.*]]: tensor<?xi32, #sparse{{[0-9]*}}>) -> tensor<i32> {
 // CHECK-ON-DAG:       %[[VAL_2:.*]] = arith.constant 8 : index
 // CHECK-ON-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-ON-DAG:       %[[VAL_4:.*]] = arith.constant dense<0> : vector<8xi32>
 // CHECK-ON-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-ON:           %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-ON:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi32>
+// CHECK-ON:           %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-ON:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xi32, #sparse{{[0-9]*}}> to memref<?xi32>
 // CHECK-ON:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : memref<i32>
 // CHECK-ON:           %[[VAL_9:.*]] = memref.load %[[VAL_8]][] : memref<i32>
 // CHECK-ON:           %[[VAL_10:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -189,11 +189,11 @@ func.func @sparse_reduction_ori_accumulator_on_rhs(%argx: tensor<i13>,
 //
 // CHECK-OFF-LABEL:   func.func @sparse_reduction_subi(
 // CHECK-OFF-SAME:      %[[VAL_0:.*]]: tensor<i32>,
-// CHECK-OFF-SAME:      %[[VAL_1:.*]]: tensor<?xi32, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<i32> {
+// CHECK-OFF-SAME:      %[[VAL_1:.*]]: tensor<?xi32, #sparse{{[0-9]*}}>) -> tensor<i32> {
 // CHECK-OFF-DAG:       %[[VAL_2:.*]] = arith.constant 0 : index
 // CHECK-OFF-DAG:       %[[VAL_3:.*]] = arith.constant 1 : index
-// CHECK-OFF:           %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-OFF:           %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi32>
+// CHECK-OFF:           %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-OFF:           %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xi32, #sparse{{[0-9]*}}> to memref<?xi32>
 // CHECK-OFF:           %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : memref<i32>
 // CHECK-OFF:           %[[VAL_7:.*]] = memref.load %[[VAL_6]][] : memref<i32>
 // CHECK-OFF:           %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
@@ -236,13 +236,13 @@ func.func @sparse_reduction_subi(%argx: tensor<i32>,
 
 // CHECK-ON-LABEL: func.func @sparse_reduction_xor(
 // CHECK-ON-SAME: %[[VAL_0:.*]]: tensor<i32>,
-// CHECK-ON-SAME: %[[VAL_1:.*]]: tensor<?xi32, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<i32> {
+// CHECK-ON-SAME: %[[VAL_1:.*]]: tensor<?xi32, #sparse{{[0-9]*}}>) -> tensor<i32> {
 // CHECK-ON-DAG:  %[[VAL_2:.*]] = arith.constant 8 : index
 // CHECK-ON-DAG:  %[[VAL_3:.*]] = arith.constant dense<0> : vector<8xi32>
 // CHECK-ON-DAG:  %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-ON-DAG:  %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-ON:  %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-ON:  %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi32>
+// CHECK-ON:  %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-ON:  %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xi32, #sparse{{[0-9]*}}> to memref<?xi32>
 // CHECK-ON:  %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : memref<i32>
 // CHECK-ON:  %[[VAL_9:.*]] = memref.load %[[VAL_8]][] : memref<i32>
 // CHECK-ON:  %[[VAL_10:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -264,11 +264,11 @@ func.func @sparse_reduction_subi(%argx: tensor<i32>,
 //
 // CHECK-OFF-LABEL: func.func @sparse_reduction_xor(
 // CHECK-OFF-SAME:  %[[VAL_0:.*]]: tensor<i32>,
-// CHECK-OFF-SAME:  %[[VAL_1:.*]]: tensor<?xi32, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<i32> {
+// CHECK-OFF-SAME:  %[[VAL_1:.*]]: tensor<?xi32, #sparse{{[0-9]*}}>) -> tensor<i32> {
 // CHECK-OFF-DAG:   %[[VAL_2:.*]] = arith.constant 0 : index
 // CHECK-OFF-DAG:   %[[VAL_3:.*]] = arith.constant 1 : index
-// CHECK-OFF:   %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-OFF:   %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi32>
+// CHECK-OFF:   %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-OFF:   %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xi32, #sparse{{[0-9]*}}> to memref<?xi32>
 // CHECK-OFF:   %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : memref<i32>
 // CHECK-OFF:   %[[VAL_7:.*]] = memref.load %[[VAL_6]][] : memref<i32>
 // CHECK-OFF:   %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
@@ -312,13 +312,13 @@ func.func @sparse_reduction_xor(%argx: tensor<i32>,
 
 // CHECK-ON-LABEL: func.func @sparse_reduction_addi(
 // CHECK-ON-SAME:   %[[VAL_0:.*]]: tensor<i32>,
-// CHECK-ON-SAME:   %[[VAL_1:.*]]: tensor<?xi32, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<i32> {
+// CHECK-ON-SAME:   %[[VAL_1:.*]]: tensor<?xi32, #sparse{{[0-9]*}}>) -> tensor<i32> {
 // CHECK-ON-DAG:   %[[VAL_2:.*]] = arith.constant 8 : index
 // CHECK-ON-DAG:   %[[VAL_3:.*]] = arith.constant dense<0> : vector<8xi32>
 // CHECK-ON-DAG:   %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-ON-DAG:   %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-ON:   %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-ON:   %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi32>
+// CHECK-ON:   %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-ON:   %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xi32, #sparse{{[0-9]*}}> to memref<?xi32>
 // CHECK-ON:   %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : memref<i32>
 // CHECK-ON:   %[[VAL_9:.*]] = memref.load %[[VAL_8]][] : memref<i32>
 // CHECK-ON:   %[[VAL_10:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -340,11 +340,11 @@ func.func @sparse_reduction_xor(%argx: tensor<i32>,
 //
 // CHECK-OFF-LABEL: func.func @sparse_reduction_addi(
 // CHECK-OFF-SAME:   %[[VAL_0:.*]]: tensor<i32>,
-// CHECK-OFF-SAME:   %[[VAL_1:.*]]: tensor<?xi32, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<i32> {
+// CHECK-OFF-SAME:   %[[VAL_1:.*]]: tensor<?xi32, #sparse{{[0-9]*}}>) -> tensor<i32> {
 // CHECK-OFF-DAG:   %[[VAL_2:.*]] = arith.constant 0 : index
 // CHECK-OFF-DAG:   %[[VAL_3:.*]] = arith.constant 1 : index
-// CHECK-OFF:   %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-OFF:   %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi32>
+// CHECK-OFF:   %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-OFF:   %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xi32, #sparse{{[0-9]*}}> to memref<?xi32>
 // CHECK-OFF:   %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : memref<i32>
 // CHECK-OFF:   %[[VAL_7:.*]] = memref.load %[[VAL_6]][] : memref<i32>
 // CHECK-OFF:   %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
@@ -388,13 +388,13 @@ func.func @sparse_reduction_addi(%argx: tensor<i32>,
 
 // CHECK-ON-LABEL: func.func @sparse_reduction_subf(
 // CHECK-ON-SAME:   %[[VAL_0:.*]]: tensor<f32>,
-// CHECK-ON-SAME:   %[[VAL_1:.*]]: tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<f32> {
+// CHECK-ON-SAME:   %[[VAL_1:.*]]: tensor<?xf32, #sparse{{[0-9]*}}>) -> tensor<f32> {
 // CHECK-ON-DAG:   %[[VAL_2:.*]] = arith.constant 8 : index
 // CHECK-ON-DAG:   %[[VAL_3:.*]] = arith.constant dense<0.000000e+00> : vector<8xf32>
 // CHECK-ON-DAG:   %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-ON-DAG:   %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-ON:   %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-ON:   %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-ON:   %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-ON:   %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-ON:   %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : memref<f32>
 // CHECK-ON:   %[[VAL_9:.*]] = memref.load %[[VAL_8]][] : memref<f32>
 // CHECK-ON:   %[[VAL_10:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -416,11 +416,11 @@ func.func @sparse_reduction_addi(%argx: tensor<i32>,
 //
 // CHECK-OFF-LABEL: func.func @sparse_reduction_subf(
 // CHECK-OFF-SAME:   %[[VAL_0:.*]]: tensor<f32>,
-// CHECK-OFF-SAME:   %[[VAL_1:.*]]: tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<f32> {
+// CHECK-OFF-SAME:   %[[VAL_1:.*]]: tensor<?xf32, #sparse{{[0-9]*}}>) -> tensor<f32> {
 // CHECK-OFF-DAG:   %[[VAL_2:.*]] = arith.constant 0 : index
 // CHECK-OFF-DAG:   %[[VAL_3:.*]] = arith.constant 1 : index
-// CHECK-OFF:   %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-OFF:   %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-OFF:   %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-OFF:   %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-OFF:   %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : memref<f32>
 // CHECK-OFF:   %[[VAL_7:.*]] = memref.load %[[VAL_6]][] : memref<f32>
 // CHECK-OFF:   %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
@@ -464,13 +464,13 @@ func.func @sparse_reduction_subf(%argx: tensor<f32>,
 
 // CHECK-ON-LABEL: func.func @sparse_reduction_addf(
 // CHECK-ON-SAME:  %[[VAL_0:.*]]: tensor<f32>,
-// CHECK-ON-SAME:  %[[VAL_1:.*]]: tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<f32> {
+// CHECK-ON-SAME:  %[[VAL_1:.*]]: tensor<?xf32, #sparse{{[0-9]*}}>) -> tensor<f32> {
 // CHECK-ON-DAG:   %[[VAL_2:.*]] = arith.constant 8 : index
 // CHECK-ON-DAG:   %[[VAL_3:.*]] = arith.constant dense<0.000000e+00> : vector<8xf32>
 // CHECK-ON-DAG:   %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-ON-DAG:   %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-ON:   %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-ON:   %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-ON:   %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-ON:   %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-ON:   %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : memref<f32>
 // CHECK-ON:   %[[VAL_9:.*]] = memref.load %[[VAL_8]][] : memref<f32>
 // CHECK-ON:   %[[VAL_10:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -492,11 +492,11 @@ func.func @sparse_reduction_subf(%argx: tensor<f32>,
 //
 // CHECK-OFF-LABEL: func.func @sparse_reduction_addf(
 // CHECK-OFF-SAME:  %[[VAL_0:.*]]: tensor<f32>,
-// CHECK-OFF-SAME:  %[[VAL_1:.*]]: tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<f32> {
+// CHECK-OFF-SAME:  %[[VAL_1:.*]]: tensor<?xf32, #sparse{{[0-9]*}}>) -> tensor<f32> {
 // CHECK-OFF-DAG:   %[[VAL_2:.*]] = arith.constant 0 : index
 // CHECK-OFF-DAG:   %[[VAL_3:.*]] = arith.constant 1 : index
-// CHECK-OFF:   %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-OFF:   %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-OFF:   %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-OFF:   %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-OFF:   %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : memref<f32>
 // CHECK-OFF:   %[[VAL_7:.*]] = memref.load %[[VAL_6]][] : memref<f32>
 // CHECK-OFF:   %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/block.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/block.mlir
index d92165e98cea..6468c4b45d24 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/block.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/block.mlir
@@ -3,7 +3,7 @@
 //
 // Set-up that's shared across all tests in this directory. In principle, this
 // config could be moved to lit.local.cfg. However, there are downstream users that
-//  do not use these LIT config files. Hence why this is kept inline.
+// do not use these LIT config files. Hence why this is kept inline.
 //
 // DEFINE: %{sparsifier_opts} = enable-runtime-library=true
 // DEFINE: %{sparsifier_opts_sve} = enable-arm-sve=true %{sparsifier_opts}
@@ -20,10 +20,13 @@
 // REDEFINE: %{env} = TENSOR0="%mlir_src_dir/test/Integration/data/block.mtx"
 // RUN: %{compile} | env %{env} %{run} | FileCheck %s
 //
-// TODO: enable!
 // Do the same run, but now with direct IR generation.
 // REDEFINE: %{sparsifier_opts} = enable-runtime-library=false
-// R_UN: %{compile} | env %{env} %{run} | FileCheck %s
+// RUN: %{compile} | env %{env} %{run} | FileCheck %s
+//
+// Do the same run, but now with direct IR generation and vectorization.
+// REDEFINE: %{sparsifier_opts} = enable-runtime-library=false enable-buffer-initialization=true vl=2 reassociate-fp-reductions=true enable-index-optimizations=true
+// RUN: %{compile} | env %{env} %{run} | FileCheck %s
 
 !Filename = !llvm.ptr
 
diff --git a/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-sddmm-lib.mlir b/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-sddmm-lib.mlir
index fb0da0c0e649..db5c154e5e43 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-sddmm-lib.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-sddmm-lib.mlir
@@ -16,8 +16,7 @@
 //
 // without RT lib:
 //
-// TODO: make this work
-// R_U_N: %{compile} enable-runtime-library=false" | %{run}
+// RUN: %{compile} enable-runtime-library=false" | %{run}
 
 !Filename = !llvm.ptr
 
diff --git a/mlir/test/Target/Cpp/attrs.mlir b/mlir/test/Target/Cpp/attrs.mlir
index 4e5bdfda2957..0a42570ecf2a 100644
--- a/mlir/test/Target/Cpp/attrs.mlir
+++ b/mlir/test/Target/Cpp/attrs.mlir
@@ -3,8 +3,8 @@
 // CHECK-LABEL: void opaque_attrs() {
 func.func @opaque_attrs() {
   // CHECK-NEXT: f(OPAQUE_ENUM_VALUE);
-  emitc.call "f"() {args = [#emitc.opaque<"OPAQUE_ENUM_VALUE">]} : () -> ()
+  emitc.call_opaque "f"() {args = [#emitc.opaque<"OPAQUE_ENUM_VALUE">]} : () -> ()
   // CHECK-NEXT: f("some string");
-  emitc.call "f"() {args = [#emitc.opaque<"\"some string\"">]} : () -> ()
+  emitc.call_opaque "f"() {args = [#emitc.opaque<"\"some string\"">]} : () -> ()
   return
 }
diff --git a/mlir/test/Target/Cpp/call.mlir b/mlir/test/Target/Cpp/call.mlir
index 3bd4b4be0478..2bcdc8720518 100644
--- a/mlir/test/Target/Cpp/call.mlir
+++ b/mlir/test/Target/Cpp/call.mlir
@@ -1,34 +1,34 @@
 // RUN: mlir-translate -mlir-to-cpp %s | FileCheck %s -check-prefix=CPP-DEFAULT
 // RUN: mlir-translate -mlir-to-cpp -declare-variables-at-top %s | FileCheck %s -check-prefix=CPP-DECLTOP
 
-func.func @emitc_call() {
-  %0 = emitc.call "func_a" () : () -> i32
-  %1 = emitc.call "func_b" () : () -> i32
+func.func @emitc_call_opaque() {
+  %0 = emitc.call_opaque "func_a" () : () -> i32
+  %1 = emitc.call_opaque "func_b" () : () -> i32
   return
 }
-// CPP-DEFAULT: void emitc_call() {
+// CPP-DEFAULT: void emitc_call_opaque() {
 // CPP-DEFAULT-NEXT: int32_t [[V0:[^ ]*]] = func_a();
 // CPP-DEFAULT-NEXT: int32_t [[V1:[^ ]*]] = func_b();
 
-// CPP-DECLTOP: void emitc_call() {
+// CPP-DECLTOP: void emitc_call_opaque() {
 // CPP-DECLTOP-NEXT: int32_t [[V0:[^ ]*]];
 // CPP-DECLTOP-NEXT: int32_t [[V1:[^ ]*]];
 // CPP-DECLTOP-NEXT: [[V0:]] = func_a();
 // CPP-DECLTOP-NEXT: [[V1:]] = func_b();
 
 
-func.func @emitc_call_two_results() {
+func.func @emitc_call_opaque_two_results() {
   %0 = arith.constant 0 : index
-  %1:2 = emitc.call "two_results" () : () -> (i32, i32)
+  %1:2 = emitc.call_opaque "two_results" () : () -> (i32, i32)
   return
 }
-// CPP-DEFAULT: void emitc_call_two_results() {
+// CPP-DEFAULT: void emitc_call_opaque_two_results() {
 // CPP-DEFAULT-NEXT: size_t [[V1:[^ ]*]] = 0;
 // CPP-DEFAULT-NEXT: int32_t [[V2:[^ ]*]];
 // CPP-DEFAULT-NEXT: int32_t [[V3:[^ ]*]];
 // CPP-DEFAULT-NEXT: std::tie([[V2]], [[V3]]) = two_results();
 
-// CPP-DECLTOP: void emitc_call_two_results() {
+// CPP-DECLTOP: void emitc_call_opaque_two_results() {
 // CPP-DECLTOP-NEXT: size_t [[V1:[^ ]*]];
 // CPP-DECLTOP-NEXT: int32_t [[V2:[^ ]*]];
 // CPP-DECLTOP-NEXT: int32_t [[V3:[^ ]*]];
diff --git a/mlir/test/Target/Cpp/common-cpp.mlir b/mlir/test/Target/Cpp/common-cpp.mlir
index 252f5e214840..b537e7098deb 100644
--- a/mlir/test/Target/Cpp/common-cpp.mlir
+++ b/mlir/test/Target/Cpp/common-cpp.mlir
@@ -8,13 +8,13 @@ emitc.include <"myheader.h">
 // CHECK: void test_foo_print() {
 func.func @test_foo_print() {
   // CHECK: [[V1:[^ ]*]] = foo::constant({0, 1});
-  %0 = emitc.call "foo::constant"() {args = [dense<[0, 1]> : tensor<2xi32>]} : () -> (i32)
+  %0 = emitc.call_opaque "foo::constant"() {args = [dense<[0, 1]> : tensor<2xi32>]} : () -> (i32)
   // CHECK: [[V2:[^ ]*]] = foo::op_and_attr({0, 1}, [[V1]]);
-  %1 = emitc.call "foo::op_and_attr"(%0) {args = [dense<[0, 1]> : tensor<2xi32>, 0 : index]} : (i32) -> (i32)
+  %1 = emitc.call_opaque "foo::op_and_attr"(%0) {args = [dense<[0, 1]> : tensor<2xi32>, 0 : index]} : (i32) -> (i32)
   // CHECK: [[V3:[^ ]*]] = foo::op_and_attr([[V2]], {0, 1});
-  %2 = emitc.call "foo::op_and_attr"(%1) {args = [0 : index, dense<[0, 1]> : tensor<2xi32>]} : (i32) -> (i32)
+  %2 = emitc.call_opaque "foo::op_and_attr"(%1) {args = [0 : index, dense<[0, 1]> : tensor<2xi32>]} : (i32) -> (i32)
   // CHECK: foo::print([[V3]]);
-  emitc.call "foo::print"(%2): (i32) -> ()
+  emitc.call_opaque "foo::print"(%2): (i32) -> ()
   return
 }
 
@@ -27,7 +27,7 @@ func.func @test_single_return(%arg0 : i32) -> i32 {
 // CHECK: std::tuple<int32_t, int32_t> test_multiple_return()
 func.func @test_multiple_return() -> (i32, i32) {
   // CHECK: std::tie([[V3:.*]], [[V4:.*]]) = foo::blah();
-  %0:2 = emitc.call "foo::blah"() : () -> (i32, i32)
+  %0:2 = emitc.call_opaque "foo::blah"() : () -> (i32, i32)
   // CHECK: [[V5:[^ ]*]] = test_single_return([[V3]]);
   %1 = call @test_single_return(%0#0) : (i32) -> i32
   // CHECK: return std::make_tuple([[V5]], [[V4]]);
@@ -37,48 +37,48 @@ func.func @test_multiple_return() -> (i32, i32) {
 // CHECK: test_float
 func.func @test_float() {
   // CHECK: foo::constant({(float)0.0e+00, (float)1.000000000e+00})
-  %0 = emitc.call "foo::constant"() {args = [dense<[0.000000e+00, 1.000000e+00]> : tensor<2xf32>]} : () -> f32
+  %0 = emitc.call_opaque "foo::constant"() {args = [dense<[0.000000e+00, 1.000000e+00]> : tensor<2xf32>]} : () -> f32
   return
 }
 
 // CHECK: test_uint
 func.func @test_uint() {
   // CHECK: uint32_t
-  %0 = emitc.call "foo::constant"() {args = [dense<[0, 1]> : tensor<2xui32>]} : () -> ui32
+  %0 = emitc.call_opaque "foo::constant"() {args = [dense<[0, 1]> : tensor<2xui32>]} : () -> ui32
   // CHECK: uint64_t
-  %1 = emitc.call "foo::constant"() {args = [dense<[0, 1]> : tensor<2xui64>]} : () -> ui64
+  %1 = emitc.call_opaque "foo::constant"() {args = [dense<[0, 1]> : tensor<2xui64>]} : () -> ui64
   return
 }
 
 // CHECK: int64_t test_plus_int(int64_t [[V1]])
 func.func @test_plus_int(%arg0 : i64) -> i64 {
   // CHECK: mhlo::add([[V1]], [[V1]])
-  %0 = emitc.call "mhlo::add"(%arg0, %arg0) {args = [0 : index, 1 : index]} : (i64, i64) -> i64
+  %0 = emitc.call_opaque "mhlo::add"(%arg0, %arg0) {args = [0 : index, 1 : index]} : (i64, i64) -> i64
   return %0 : i64
 }
 
 // CHECK: Tensor<float, 2> mixed_types(Tensor<double, 2> [[V1]])
 func.func @mixed_types(%arg0: tensor<2xf64>) -> tensor<2xf32> {
   // CHECK: foo::mixed_types([[V1]]);
-  %0 = emitc.call "foo::mixed_types"(%arg0) {args = [0 : index]} : (tensor<2xf64>) -> tensor<2xf32>
+  %0 = emitc.call_opaque "foo::mixed_types"(%arg0) {args = [0 : index]} : (tensor<2xf64>) -> tensor<2xf32>
   return %0 : tensor<2xf32>
 }
 
 // CHECK: Tensor<uint64_t> mhlo_convert(Tensor<uint32_t> [[V1]])
 func.func @mhlo_convert(%arg0: tensor<ui32>) -> tensor<ui64> {
   // CHECK: mhlo::convert([[V1]]);
-  %0 = emitc.call "mhlo::convert"(%arg0) {args = [0 : index]} : (tensor<ui32>) -> tensor<ui64>
+  %0 = emitc.call_opaque "mhlo::convert"(%arg0) {args = [0 : index]} : (tensor<ui32>) -> tensor<ui64>
   return %0 : tensor<ui64>
 }
 
 // CHECK: status_t opaque_types(bool [[V1:[^ ]*]], char [[V2:[^ ]*]]) {
 func.func @opaque_types(%arg0: !emitc.opaque<"bool">, %arg1: !emitc.opaque<"char">) -> !emitc.opaque<"status_t"> {
   // CHECK: int [[V3:[^ ]*]] = a([[V1]], [[V2]]);
-  %0 = emitc.call "a"(%arg0, %arg1) : (!emitc.opaque<"bool">, !emitc.opaque<"char">) -> (!emitc.opaque<"int">)
+  %0 = emitc.call_opaque "a"(%arg0, %arg1) : (!emitc.opaque<"bool">, !emitc.opaque<"char">) -> (!emitc.opaque<"int">)
   // CHECK: char [[V4:[^ ]*]] = b([[V3]]);
-  %1 = emitc.call "b"(%0): (!emitc.opaque<"int">) -> (!emitc.opaque<"char">)
+  %1 = emitc.call_opaque "b"(%0): (!emitc.opaque<"int">) -> (!emitc.opaque<"char">)
   // CHECK: status_t [[V5:[^ ]*]] = c([[V3]], [[V4]]);
-  %2 = emitc.call "c"(%0, %1): (!emitc.opaque<"int">, !emitc.opaque<"char">) -> (!emitc.opaque<"status_t">)
+  %2 = emitc.call_opaque "c"(%0, %1): (!emitc.opaque<"int">, !emitc.opaque<"char">) -> (!emitc.opaque<"status_t">)
   return %2 : !emitc.opaque<"status_t">
 }
 
diff --git a/mlir/test/Target/Cpp/control_flow.mlir b/mlir/test/Target/Cpp/control_flow.mlir
index 474fa958a11c..436543f7ace9 100644
--- a/mlir/test/Target/Cpp/control_flow.mlir
+++ b/mlir/test/Target/Cpp/control_flow.mlir
@@ -8,12 +8,12 @@ func.func @simple(i64, i1) -> i64 {
 ^bb1:
   cf.br ^bb3(%a: i64)
 ^bb2:
-  %b = emitc.call "add"(%a, %a) : (i64, i64) -> i64
+  %b = emitc.call_opaque "add"(%a, %a) : (i64, i64) -> i64
   cf.br ^bb3(%b: i64)
 ^bb3(%c: i64):
   cf.br ^bb4(%c, %a : i64, i64)
 ^bb4(%d : i64, %e : i64):
-  %0 = emitc.call "add"(%d, %e) : (i64, i64) -> i64
+  %0 = emitc.call_opaque "add"(%d, %e) : (i64, i64) -> i64
   return %0 : i64
 }
   // CPP-DECLTOP: int64_t simple(int64_t [[A:[^ ]*]], bool [[COND:[^ ]*]]) {
diff --git a/mlir/test/Target/Cpp/for.mlir b/mlir/test/Target/Cpp/for.mlir
index c02c8b1ac33e..90504b1347bb 100644
--- a/mlir/test/Target/Cpp/for.mlir
+++ b/mlir/test/Target/Cpp/for.mlir
@@ -3,7 +3,7 @@
 
 func.func @test_for(%arg0 : index, %arg1 : index, %arg2 : index) {
   emitc.for %i0 = %arg0 to %arg1 step %arg2 {
-    %0 = emitc.call "f"() : () -> i32
+    %0 = emitc.call_opaque "f"() : () -> i32
   }
   return
 }
@@ -35,8 +35,8 @@ func.func @test_for_yield() {
   emitc.assign %s0 : i32 to %2 : i32
   emitc.assign %p0 : f32 to %3 : f32
   emitc.for %iter = %start to %stop step %step {
-    %sn = emitc.call "add"(%2, %iter) : (i32, index) -> i32
-    %pn = emitc.call "mul"(%3, %iter) : (f32, index) -> f32
+    %sn = emitc.call_opaque "add"(%2, %iter) : (i32, index) -> i32
+    %pn = emitc.call_opaque "mul"(%3, %iter) : (f32, index) -> f32
     emitc.assign %sn : i32 to %2 : i32
     emitc.assign %pn : f32 to %3 : f32
     emitc.yield
@@ -116,8 +116,8 @@ func.func @test_for_yield_2() {
   emitc.assign %s0 : i32 to %2 : i32
   emitc.assign %p0 : f32 to %3 : f32
   emitc.for %iter = %start to %stop step %step {
-    %sn = emitc.call "add"(%2, %iter) : (i32, index) -> i32
-    %pn = emitc.call "mul"(%3, %iter) : (f32, index) -> f32
+    %sn = emitc.call_opaque "add"(%2, %iter) : (i32, index) -> i32
+    %pn = emitc.call_opaque "mul"(%3, %iter) : (f32, index) -> f32
     emitc.assign %sn : i32 to %2 : i32
     emitc.assign %pn : f32 to %3 : f32
     emitc.yield
diff --git a/mlir/test/Target/Cpp/if.mlir b/mlir/test/Target/Cpp/if.mlir
index beff2182777b..743f8ad39688 100644
--- a/mlir/test/Target/Cpp/if.mlir
+++ b/mlir/test/Target/Cpp/if.mlir
@@ -3,7 +3,7 @@
 
 func.func @test_if(%arg0: i1, %arg1: f32) {
   emitc.if %arg0 {
-     %0 = emitc.call "func_const"(%arg1) : (f32) -> i32
+     %0 = emitc.call_opaque "func_const"(%arg1) : (f32) -> i32
   }
   return
 }
@@ -23,9 +23,9 @@ func.func @test_if(%arg0: i1, %arg1: f32) {
 
 func.func @test_if_else(%arg0: i1, %arg1: f32) {
   emitc.if %arg0 {
-    %0 = emitc.call "func_true"(%arg1) : (f32) -> i32
+    %0 = emitc.call_opaque "func_true"(%arg1) : (f32) -> i32
   } else {
-    %0 = emitc.call "func_false"(%arg1) : (f32) -> i32
+    %0 = emitc.call_opaque "func_false"(%arg1) : (f32) -> i32
   }
   return
 }
@@ -53,13 +53,13 @@ func.func @test_if_yield(%arg0: i1, %arg1: f32) {
   %x = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> i32
   %y = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> f64
   emitc.if %arg0 {
-    %1 = emitc.call "func_true_1"(%arg1) : (f32) -> i32
-    %2 = emitc.call "func_true_2"(%arg1) : (f32) -> f64
+    %1 = emitc.call_opaque "func_true_1"(%arg1) : (f32) -> i32
+    %2 = emitc.call_opaque "func_true_2"(%arg1) : (f32) -> f64
     emitc.assign %1 : i32 to %x : i32
     emitc.assign %2 : f64 to %y : f64
   } else {
-    %1 = emitc.call "func_false_1"(%arg1) : (f32) -> i32
-    %2 = emitc.call "func_false_2"(%arg1) : (f32) -> f64
+    %1 = emitc.call_opaque "func_false_1"(%arg1) : (f32) -> i32
+    %2 = emitc.call_opaque "func_false_2"(%arg1) : (f32) -> f64
     emitc.assign %1 : i32 to %x : i32
     emitc.assign %2 : f64 to %y : f64
   }
diff --git a/mlir/test/Target/Cpp/literal_call_operand.mlir b/mlir/test/Target/Cpp/literal_call_operand.mlir
index 428b66bb2519..00adb441cedf 100644
--- a/mlir/test/Target/Cpp/literal_call_operand.mlir
+++ b/mlir/test/Target/Cpp/literal_call_operand.mlir
@@ -3,7 +3,7 @@
 
 func.func @emitc_call_operand() {
   %p0 = emitc.literal "M_PI" : f32
-  %1 = emitc.call "foo"(%p0) : (f32) -> f32
+  %1 = emitc.call_opaque "foo"(%p0) : (f32) -> f32
   return
 }
 // CPP-DEFAULT: void emitc_call_operand() {
@@ -15,7 +15,7 @@ func.func @emitc_call_operand() {
 
 func.func @emitc_call_operand_arg() {
   %p0 = emitc.literal "M_PI" : f32
-  %1 = emitc.call "bar"(%p0) {args = [42 : i32, 0 : index]} : (f32) -> f32
+  %1 = emitc.call_opaque "bar"(%p0) {args = [42 : i32, 0 : index]} : (f32) -> f32
   return
 }
 // CPP-DEFAULT: void emitc_call_operand_arg() {
diff --git a/mlir/test/Target/Cpp/types.mlir b/mlir/test/Target/Cpp/types.mlir
index 8b4cecf75612..0585b27eb6c2 100644
--- a/mlir/test/Target/Cpp/types.mlir
+++ b/mlir/test/Target/Cpp/types.mlir
@@ -3,15 +3,15 @@
 // CHECK-LABEL: void opaque_types() {
 func.func @opaque_types() {
   // CHECK-NEXT: f<int>();
-  emitc.call "f"() {template_args = [!emitc<opaque<"int">>]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc<opaque<"int">>]} : () -> ()
   // CHECK-NEXT: f<byte>();
-  emitc.call "f"() {template_args = [!emitc<opaque<"byte">>]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc<opaque<"byte">>]} : () -> ()
   // CHECK-NEXT: f<unsigned>();
-  emitc.call "f"() {template_args = [!emitc<opaque<"unsigned">>]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc<opaque<"unsigned">>]} : () -> ()
   // CHECK-NEXT: f<status_t>();
-  emitc.call "f"() {template_args = [!emitc<opaque<"status_t">>]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc<opaque<"status_t">>]} : () -> ()
   // CHECK-NEXT: f<std::vector<std::string>>();
-  emitc.call "f"() {template_args = [!emitc.opaque<"std::vector<std::string>">]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc.opaque<"std::vector<std::string>">]} : () -> ()
 
   return
 }
@@ -19,19 +19,19 @@ func.func @opaque_types() {
 // CHECK-LABEL: void ptr_types() {
 func.func @ptr_types() {
   // CHECK-NEXT: f<int32_t*>();
-  emitc.call "f"() {template_args = [!emitc.ptr<i32>]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc.ptr<i32>]} : () -> ()
   // CHECK-NEXT: f<int64_t*>();
-  emitc.call "f"() {template_args = [!emitc.ptr<i64>]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc.ptr<i64>]} : () -> ()
   // CHECK-NEXT: f<float*>();
-  emitc.call "f"() {template_args = [!emitc.ptr<f32>]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc.ptr<f32>]} : () -> ()
   // CHECK-NEXT: f<double*>();
-  emitc.call "f"() {template_args = [!emitc.ptr<f64>]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc.ptr<f64>]} : () -> ()
   // CHECK-NEXT: int32_t* [[V0:[^ ]*]] = f();
-  %0 = emitc.call "f"() : () -> (!emitc.ptr<i32>)
+  %0 = emitc.call_opaque "f"() : () -> (!emitc.ptr<i32>)
   // CHECK-NEXT: int32_t** [[V1:[^ ]*]] = f([[V0:[^ ]*]]);
-  %1 = emitc.call "f"(%0) : (!emitc.ptr<i32>) -> (!emitc.ptr<!emitc.ptr<i32>>)
+  %1 = emitc.call_opaque "f"(%0) : (!emitc.ptr<i32>) -> (!emitc.ptr<!emitc.ptr<i32>>)
   // CHECK-NEXT: f<int*>();
-  emitc.call "f"() {template_args = [!emitc.ptr<!emitc.opaque<"int">>]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc.ptr<!emitc.opaque<"int">>]} : () -> ()
 
   return
 }
diff --git a/mlir/test/Target/SPIRV/composite-op.mlir b/mlir/test/Target/SPIRV/composite-op.mlir
index 5a31216867c0..5f302fd0d38f 100644
--- a/mlir/test/Target/SPIRV/composite-op.mlir
+++ b/mlir/test/Target/SPIRV/composite-op.mlir
@@ -22,8 +22,8 @@ spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader], []> {
     spirv.ReturnValue %0: vector<4xf32>
   }
   spirv.func @vector_shuffle(%vector1: vector<4xf32>, %vector2: vector<2xf32>) -> vector<3xf32> "None" {
-    // CHECK: %{{.+}} = spirv.VectorShuffle [1 : i32, 3 : i32, -1 : i32] %{{.+}} : vector<4xf32>, %arg1 : vector<2xf32> -> vector<3xf32>
-    %0 = spirv.VectorShuffle [1: i32, 3: i32, 0xffffffff: i32] %vector1: vector<4xf32>, %vector2: vector<2xf32> -> vector<3xf32>
+    // CHECK: %{{.+}} = spirv.VectorShuffle [1 : i32, 3 : i32, -1 : i32] %{{.+}}, %arg1 : vector<4xf32>, vector<2xf32> -> vector<3xf32>
+    %0 = spirv.VectorShuffle [1: i32, 3: i32, 0xffffffff: i32] %vector1, %vector2 : vector<4xf32>, vector<2xf32> -> vector<3xf32>
     spirv.ReturnValue %0: vector<3xf32>
   }
 }
diff --git a/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp b/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp
index e28d88e046fc..2a20327c80b7 100644
--- a/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp
+++ b/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp
@@ -123,8 +123,7 @@ FOREVERY_BINOP(IMPL_BINOP_PATTERN)
 class MergerTestBase : public ::testing::Test {
 protected:
   MergerTestBase(unsigned numTensors, unsigned numLoops)
-      : merger(numTensors, numLoops, /*numFilterLoops=*/0,
-               /*maxRank=*/numLoops) {
+      : merger(numTensors, numLoops, /*maxRank=*/numLoops) {
     tensors.reserve(numTensors);
     for (unsigned t = 0; t < numTensors; t++)
       tensors.push_back(merger.addTensorExp(tid(t)));
author	Jacek Caban <jacek@codeweavers.com>	2023-11-17 11:46:52 +0000
committer	Florian Hahn <flo@fhahn.com>	2023-11-17 11:46:52 +0000
commit	77c2f330a9fc2f332388e777f396451a5d4a6bc8 (patch)
tree	51c91ad2fd7d215ca0f55250070f8e24b48c907f
parent	05bd5ecad3cbd218fca59dfcd71be2e5294a08ed (diff)
parent	ec42d547eba5c0ad0bddbecc8902d35383968e78 (diff)